乐趣区

关于erlang:erlang-nodename-phash-冲突坑

概述

在线上遇到了因节点名哈希值抵触导致的局部机器无负载问题。10 台机器中,抵触的机器达到了 4 台之多。假如哈希的概率是均匀的。10 台机器中,不存在抵触的概率靠近

>>> 1 - (1.0 / (2 ** 32)) * 10
0.9999999976716936

实际上,10 台中哈希值抵触了 6 台。于是看源码找答案。

过程

先从 phash2 api 动手

erlang 的 api 调用形式和 linux 有相似之处。通过函数指针数组。
erl_bif_list.h 中看到,咱们调用的 phash2 对应 phash2_2。

BIF_LIST(am_erlang,am_phash,2,phash_2,phash_2,37)
BIF_LIST(am_erlang,am_phash2,1,phash2_1,phash2_1,38)
BIF_LIST(am_erlang,am_phash2,2,phash2_2,phash2_2,39)

bif.c:4905

BIF_RETTYPE phash2_2(BIF_ALIST_2)
{
    Uint32 hash;
    Uint32 final_hash;
    Uint32 range;
    Eterm trap_state = THE_NON_VALUE;

    /* Check for special case 2^32 */
    if (term_equals_2pow32(BIF_ARG_2)) {range = 0;} else {
    Uint u;
    if (!term_to_Uint(BIF_ARG_2, &u) || ((u >> 16) >> 16) != 0 || !u) {BIF_ERROR(BIF_P, BADARG);
    }
    range = (Uint32) u;
    }
    hash = trapping_make_hash2(BIF_ARG_1, &trap_state, BIF_P);
    if (trap_state != THE_NON_VALUE) {BIF_TRAP2(&bif_trap_export[BIF_phash2_2], BIF_P, trap_state, BIF_ARG_2);
    }
    if (range) {final_hash = hash % range; /* [0..range-1] */
    } else {final_hash = hash;}
    /*
     * Return either a small or a big. Use the heap for bigs if there is room.
     */
#if defined(ARCH_64)
    BIF_RET(make_small(final_hash));
#else
    if (IS_USMALL(0, final_hash)) {BIF_RET(make_small(final_hash));
    } else {Eterm* hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
    BIF_RET(uint_to_big(final_hash, hp));
    }
#endif
}

通过调试能够确认

1298    {(gdb) bt
#0  make_hash2_helper (term_param=203, can_trap=1, state_mref_write_back=0x7fd2b7bfc598, p=0x7fd2b9900b88) at beam/utils.c:1298
#1  0x0000558545a0a5eb in trapping_make_hash2 (term=203, state_mref_write_back=0x7fd2b7bfc598, p=0x7fd2b9900b88) at beam/utils.c:1983
#2  0x0000558545a284ad in phash2_1 (A__p=0x7fd2b9900b88, BIF__ARGS=0x7fd2ba200200, A__I=0x7fd2b7d5ef60) at beam/bif.c:4897
#3  0x00005585459455b2 in process_main (x_reg_array=0x7fd2ba200200, f_reg_array=0x7fd2ba202240) at x86_64-unknown-linux-gnu/opt/smp/beam_cold.h:121
#4  0x00005585459613ac in sched_thread_func (vesdp=0x7fd2b89440c0) at beam/erl_process.c:8498
#5  0x0000558545c47a63 in thr_wrapper (vtwd=0x7ffcd94d55a0) at pthread/ethread.c:122
#6  0x00007fd2fb2896db in start_thread (arg=0x7fd2b7bff700) at pthread_create.c:463
#7  0x00007fd2fadaa71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

phash2 对 atom 的解决

erts/emulator/beam/utils.c:1418
间接用 atom 的值找到 erts_atom_table 对应的哈希值。

    if (primary_tag(term) == TAG_PRIMARY_IMMED1) {switch (term & _TAG_IMMED1_MASK) {
    case _TAG_IMMED1_IMMED2:
        switch (term & _TAG_IMMED2_MASK) {
        case _TAG_IMMED2_ATOM:
            /* Fast, but the poor hash value should be mixed. */
            return atom_tab(atom_val(term))->slot.bucket.hvalue;
        }

能够看到,erts_atom_table 是全局的,下限为 1024*1024 哈希表。这也是为什么原子最大只能存储 1048576。
erts/emulator/beam/atom.h:atom_tab

atom_tab(Uint i)
{return (Atom *) erts_index_lookup(&erts_atom_table, i);
}

erts/emulator/beam/index.h:erts_index_lookup
seg_table 是一个 1024*1024 的二维数组

ERTS_GLB_INLINE IndexSlot*
erts_index_lookup(IndexTable* t, Uint ix)
{return t->seg_table[ix>>INDEX_PAGE_SHIFT][ix&INDEX_PAGE_MASK];
}

搜寻 erts_atom_table 从代码能够看到,是生成原子时,在批改 erts_atom_table 时,计算的哈希值。问题的关键在于原子的哈希值是如何计算的。

atom hash

从 atom.c:init_atom_table 中能够看到。原子的哈希值是通过 init 时赋值的函数指针计算的,具体逻辑如下:
atom.c:atom_hash

static HashValue
atom_hash(Atom *obj)
{
    byte *p = obj->name;
    int len = obj->len;
    HashValue h = 0, g;
    byte v;

    while (len--)
    {
        v = *p++;
        /* latin1 clutch for r16 */
        if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80)
        {v = (v << 6) | (*p & 0x3F);
            p++;
            len--;
        }
        /* normal hashpjw follows for v */
        h = (h << 4) + v;
        if ((g = h & 0xf0000000))
        {h ^= (g >> 24);
            h ^= g;
        }
    }
    return h;
}

调试确认,节点名 atom 的计算,也是走这里,那么为什么会抵触呢?

Thread 6 "2_scheduler" hit Breakpoint 1, atom_hash (obj=0x7f39bfef92d0) at beam/atom.c:131
131         byte* p = obj->name;
(gdb) p obj->name
$19 = (byte *) 0x7f3a08580180 "test1@ubuntu"
(gdb) p obj->len
$20 = 12
(gdb) bt
#0  atom_hash (obj=0x7f39bfef92d0) at beam/atom.c:131
#1  0x000055c20bd115ab in hash_fetch (h=0x55c20c1d8040 <erts_atom_table>, tmpl=0x7f39bfef92d0, hash=0x55c20bd12a67 <atom_hash>, 
    cmp=0x55c20bd12b48 <atom_cmp>) at beam/hash.h:133
#2  0x000055c20bd11d27 in hash_get (h=0x55c20c1d8040 <erts_atom_table>, tmpl=0x7f39bfef92d0) at beam/hash.c:228
#3  0x000055c20bd124bc in index_get (t=0x55c20c1d8040 <erts_atom_table>, tmpl=0x7f39bfef92d0) at beam/index.c:109
#4  0x000055c20bd12fb7 in erts_atom_put_index (name=0x7f3a08580180 "test1@ubuntu", len=12, enc=ERTS_ATOM_ENC_UTF8, trunc=1) at beam/atom.c:299
#5  0x000055c20bd13115 in erts_atom_put (name=0x7f3a08580180 "test1@ubuntu", len=12, enc=ERTS_ATOM_ENC_UTF8, trunc=1) at beam/atom.c:350
#6  0x000055c20bc57596 in list_to_atom_1 (A__p=0x7f39bf203588, BIF__ARGS=0x7f39c67c4280, A__I=0x7f39be3d9590) at beam/bif.c:2913
#7  0x000055c20bb65087 in process_main (x_reg_array=0x7f39c67c4280, f_reg_array=0x7f39c67c6300) at x86_64-unknown-linux-gnu/opt/smp/beam_hot.h:331
#8  0x000055c20bb973ac in sched_thread_func (vesdp=0x7f39c4f4e480) at beam/erl_process.c:8498
#9  0x000055c20be7da63 in thr_wrapper (vtwd=0x7ffc7f688b60) at pthread/ethread.c:122
#10 0x00007f3a078816db in start_thread (arg=0x7f39bfefc700) at pthread_create.c:463
#11 0x00007f3a073a271f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
        
(gdb) printf "%d,%s\n", h, obj->name
1385189,test1@ubuntu
(gdb) printf "%d,%s\n", h, obj->name
1385013,test4@ubuntu
(gdb) printf "%d,%s\n", h, obj->name
1384965,test7@ubuntu

用 python 写了同样的算法,发现抵触概率真的很高,参考论断里的链接。

def atom_hash(s):
    h = 0
    g = 0
    for v in s:
        h = (h << 4) + ord(v)
        g = h & 0xf0000000
        if g:
            h ^= (g >> 24)
            h ^= g
    return h


for num in range(10):
    value = "test{0}@ubuntu".format(num)
    hash_val = atom_hash(value)
    print(value, hash_val)

test0@ubuntu 1385205
test1@ubuntu 1385189
test2@ubuntu 1385173
test3@ubuntu 1385157
test4@ubuntu 1385013
test5@ubuntu 1384997
test6@ubuntu 1384981
test7@ubuntu 1384965
test8@ubuntu 1385077
test9@ubuntu 1385061

论断

  • 原子的哈希值和原子的字符串展现相干,即同样的原子,在不同的节点上(同样的 erlang vm 版本,同样的哈希算法),那么该原子的哈希值是一样的。
  • atom_hash 应用了 hashpjw 算法,该算法,即 erlang 原子哈希值的生成算法很容易抵触。没找到更权威的材料了:https://blog.csdn.net/iteye_1…
退出移动版