共计 6797 个字符,预计需要花费 17 分钟才能阅读完成。
简介
profiling 工具是语言生态的重要组成部分。这篇 blog 对 erlang 的 vm 层 profiling 做一个总结。
profiling tools overview
可参考
https://www.erlang.org/doc/ef…
名字 | 性能 | 残余性能) |
---|---|---|
cprof | 统计模块的调用次数,可找出调用次数热点 | 0.67 |
eprof | 统计模块的调用次数和工夫,可找出调用工夫、次数热点 | 0.17 |
fprof | 统计模块调用次数和关系,可生成 call-graph | 0.008 |
eflame | 通过 erlang trace 采集调用栈,过程状态 | 0.003 |
looking_glass | 也是基于 erlang trace | 0.004 |
具体数据
压测代码如下:
defp do_sth(:cpu_intensive) do | |
start_time = :erlang.system_time(:microsecond) | |
end_time = start_time + 10_000_000 | |
do_until_time(0, start_time, end_time) | |
end | |
def do_until_time(count, start_time, end_time) do | |
now = :erlang.system_time(:microsecond) | |
if now >= end_time do | |
Logger.warn("\ntotal_time: #{div(end_time - start_time, 1000_000)} second\ncount: #{count}, avg_μs: #{div(end_time - start_time, count)}" | |
) | |
else | |
for v <- 1..100 do | |
a = %{v => Base.encode64(:crypto.strong_rand_bytes(20))} | |
b = Jason.encode!(a) | |
Jason.decode!(b) | |
end | |
do_until_time(count + 1, start_time, end_time) | |
end | |
end |
无压测
性能基准
total_time: 10 second | |
count: 24287, avg_μs: 411 | |
cpu: %100 |
cprof
示例代码
:cprof.start() | |
do_sth(:cpu_intensive) | |
:cprof.pause() | |
data = :cprof.analyse() | |
Logger.warn("cprof_result:\n#{inspect(data, pretty: true)}") |
输入后果
仅能取得模块的调用次数。
{243741614, | |
[ | |
{Jason.Encode, 75685344, | |
[{{Jason.Encode, :escape_json_chunk, 5}, 49314144}, | |
{{Jason.Encode, :value, 3}, 3296400}, | |
{{Jason.Encode, :escape_json, 4}, 3296400}, | |
{{Jason.Encode, :escape_json, 3}, 3296400}, | |
{{Jason.Encode, :"-fun.escape_json/3-", 3}, 3296400}, | |
{{Jason.Encode, :map_naive_loop, 3}, 1648200}, | |
{{Jason.Encode, :map_naive, 3}, 1648200}, | |
{{Jason.Encode, :key, 2}, 1648200}, | |
{{Jason.Encode, :escape_function, 1}, 1648200}, | |
{{Jason.Encode, :encode_string, 2}, 1648200}, | |
{{Jason.Encode, :encode_map_function, 1}, 1648200}, | |
{{Jason.Encode, :encode, 2}, 1648200}, | |
{{Jason.Encode, :"-fun.map_naive/3-", 3}, 1648200} | |
]}, | |
{Jason.Decoder, 75685344, | |
[{{Jason.Decoder, :string, 6}, 52610544}, | |
{{Jason.Decoder, :value, 5}, 3296400}, | |
{{Jason.Decoder, :"-string_decode_function/1-fun-0-", 1}, 3296400}, | |
{{Jason.Decoder, :terminate, 6}, 1648200}, | |
{{Jason.Decoder, :string_decode_function, 1}, 1648200}, | |
{{Jason.Decoder, :parse, 2}, 1648200}, | |
{{Jason.Decoder, :object_decode_function, 1}, 1648200}, | |
{{Jason.Decoder, :object, 6}, 1648200}, | |
{{Jason.Decoder, :key_decode_function, 1}, 1648200}, | |
{{Jason.Decoder, :key, 6}, 1648200}, | |
{{Jason.Decoder, :key, 5}, 1648200}, | |
{{Jason.Decoder, :float_decode_function, 1}, 1648200}, | |
{{Jason.Decoder, :"-key_decode_function/1-fun-0-", 1}, 1648200} | |
]} |
性能
total_time: 10 second | |
count: 16482, avg_μs: 606 | |
cpu: %100 |
eprof
示例代码
:eprof.start() | |
:eprof.start_profiling(:erlang.processes()) | |
do_sth(:cpu_intensive) | |
:eprof.stop_profiling() | |
data = :eprof.analyze(:total, sort: :time) | |
Logger.warn("eprof_result:\n#{inspect(data, pretty: true)}") |
输入后果
统计了调用工夫。按工夫占比排序是个较实用的选项。
FUNCTION CALLS % TIME [uS / CALLS] | |
-------- ----- ------- ---- [----------] | |
gen:call/4 1 0.00 0 [0.00] | |
gen:do_for_proc/2 1 0.00 0 [0.00] | |
... | |
maps:from_list/1 1249500 2.04 203667 [0.16] | |
'Elixir.Enum':into_map/2 833000 2.38 237244 [0.28] | |
'Elixir.ProfilingTest':'-do_until_time/3-fun-0-'/2 416500 2.39 238687 [0.57] | |
'Elixir.Jason.Decoder':parse/2 416500 2.41 240746 [0.58] | |
'Elixir.Base':do_encode64/2 416500 2.65 264394 [0.63] | |
'Elixir.Base':'-do_encode64/2-lbc$^0/2-0-'/2 1666000 7.30 728723 [0.44] | |
'Elixir.Base':enc64_pair/1 5831000 7.39 737817 [0.13] | |
crypto:strong_rand_bytes_nif/1 416500 7.56 754244 [1.81] | |
'Elixir.Jason.Encode':escape_json_chunk/5 12461680 12.19 1216586 [0.10] | |
'Elixir.Jason.Decoder':string/6 13294680 13.93 1390338 [0.10] | |
------------------------------------------------------------ -------- ------- ------- [----------] | |
Total: 61597835 100.00% 9981650 [0.16] |
性能
total_time: 10 second | |
count: 4165, avg_μs: 2400 | |
cpu: %100 |
fprof
示例代码
:fprof.trace([:start, {:procs, :erlang.processes()}]) | |
do_sth(:cpu_intensive) | |
# 默认写入 fprof.trace 文件 | |
:fprof.trace(:stop) | |
# 读取文件并剖析 | |
:fprof.profile() | |
# 输入后果,按函数本身调用工夫排序,不蕴含调用其余函数的工夫 | |
:fprof.analyse(sort: :own) | |
# 按累计工夫排序,包含调用其余函数的工夫 | |
:fprof.analyse(sort: :acc) |
输入后果
kcallgrind
call graph 生成见:https://segmentfault.com/a/11…
sort own
Processing data... | |
Creating output... | |
%% Analysis results: | |
{ analysis_options, | |
[{callers, true}, | |
{sort, own}, | |
{totals, false}, | |
{details, true}]}. | |
% CNT ACC OWN | |
[{totals, 5040222,10053.041, 9982.294}]. %%% | |
% CNT ACC OWN | |
[{"<0.212.0>", 5034485,undefined, 9948.017}]. %% | |
{[{{'Elixir.Jason.Decoder',string,6}, 1011296, 0.000, 1539.664}, | |
{{'Elixir.Jason.Decoder',key,5}, 33800, 2135.166, 47.418}, | |
{{'Elixir.Jason.Decoder',value,5}, 33800, 0.000, 46.437}], | |
{{'Elixir.Jason.Decoder',string,6}, 1078896, 2135.166, 1633.519}, % | |
[{{'Elixir.Jason.Decoder',string,6}, 1011296, 0.000, 1539.664}, | |
{{'Elixir.Jason.Decoder',object,6}, 33800, 303.359, 140.051}, | |
{{'Elixir.Jason.Decoder','-string_decode_function/1-fun-0-',1},67600, 85.381, 85.381}, | |
{{'Elixir.Jason.Decoder',key,6}, 33800, 1899.845, 46.730}, | |
{garbage_collect, 4858, 18.554, 18.554}]}. | |
{[{{'Elixir.Jason.Encode',escape_json_chunk,5},943696, 0.000, 1351.146}, | |
{{'Elixir.Jason.Encode',escape_json,4}, 67600, 1487.645, 94.579}], | |
{{'Elixir.Jason.Encode',escape_json_chunk,5},1011296, 1487.645, 1445.725}, % | |
[{{'Elixir.Jason.Encode',escape_json_chunk,5},943696, 0.000, 1351.146}, | |
{garbage_collect, 8224, 41.920, 41.920}]}. |
sort acc
Processing data... | |
Creating output... | |
%% Analysis results: | |
{ analysis_options, | |
[{callers, true}, | |
{sort, acc}, | |
{totals, false}, | |
{details, true}]}. | |
% CNT ACC OWN | |
[{totals, 5040222,10053.041, 9982.294}]. %%% | |
% CNT ACC OWN | |
[{"<0.212.0>", 5034485,undefined, 9948.017}]. %% | |
{[{{'Elixir.ProfilingTest','test fprof',1}, 0,10051.624, 0.002}, | |
{{fprof,call,1}, 1, 1.377, 0.004}, | |
{undefined, 0, 0.033, 0.028}], | |
{{fprof,just_call,2}, 1,10053.034, 0.034}, % | |
[{{'Elixir.ProfilingTest',do_sth,1}, 1,10051.622, 0.003}, | |
{suspend, 1, 1.368, 0.000}, | |
{{erlang,monitor,2}, 1, 0.005, 0.005}, | |
{{erlang,demonitor,1}, 1, 0.005, 0.005}]}. | |
{[{undefined, 0,10053.008, 0.001}], | |
{{'Elixir.ProfilingTest','test fprof',1}, 0,10053.008, 0.001}, % | |
[{{fprof,just_call,2}, 0,10051.624, 0.002}, | |
{{fprof,trace,1}, 1, 1.383, 0.002}]}. | |
{[{{fprof,just_call,2}, 1,10051.622, 0.003}], | |
{{'Elixir.ProfilingTest',do_sth,1}, 1,10051.622, 0.003}, % | |
[{{'Elixir.ProfilingTest',do_until_time,3}, 1,10051.617, 0.028}, | |
{{erlang,system_time,1}, 1, 0.002, 0.002}]}. |
性能
11:47:51.914 [warn] | |
total_time: 10 second | |
count: 341, avg_μs: 29325 | |
cpu: %160 |
eflame
示例代码
spawn(fn -> do_sth(:cpu_intensive) end) | |
:eflame2.write_trace(:global_and_local_calls, './eflame.test', :all, 15 * 1000) | |
# 上面的语句须要期待一段时间 | |
:eflame2.format_trace('./eflame.test', './eflame.test.out') |
实现输入:
Hello, world, I'm <0.821.0> and I'm running.... | |
<0.230.0> <0.228.0> iex(2)> | |
nil | |
<0.50.0> <0.10.0> <0.194.0> <0.64.0> | |
Writing to ./eflame.test.out for 8 processes... finished! |
输入
生成火焰图:
~/git/eflame/flamegraph.riak-color.pl eflame.test.out > eflame.svg
性能
total_time: 10 second | |
count: 91, avg_μs: 109890 | |
cpu: %120 |
looking_grass
代码
# 不限度采集范畴会间接爆内存 | |
:lg.trace([{:scope, [self()]}], | |
:lg_file_tracer, | |
'traces.lz4', | |
%{process_dump: true} | |
) | |
do_sth(:cpu_intensive) | |
:lg.stop() | |
:lg_flame.profile_many('traces.lz4.*', 'output') |
输入
显然失落了一些调用栈。暂未查明起因。
~/git/FlameGraph/flamegraph.pl output > looking_grass.svg
性能
total_time: 10 second | |
count: 127, avg_μs: 78740 |
总结
- erlang 产品中可用的 profiling 工具非常少,仅 cprof 耗费较少,eprof 也须要审慎开启。通过 trace pattern 缩小对性能的影响.
- erlang 多过程,火焰图不适宜作为剖析 cpu 耗费的工具,可用于剖析单个过程场景。
- 没有堪用的火焰图工具,实现太毛糙(没有采样,没有对两头后果做解决),内存,CPU 耗费都过大。
参考
https://www.erlang.org/doc/ef…
https://interscity.org/assets…
https://github.com/slfritchie…
https://github.com/rabbitmq/l…
正文完