关于java:深入理解HashMap

一, 什么是HashMap

Java中Map是用来存储键值对<key,value>的汇合类,也就是哈希表,而HashMap是Map的实现类.
具备存储效率高,查问快的特点,但不是线程同步的,依照哈希表的特点,Map中的key是不能反复的.

二, 实现原理

HashMap采纳数组+链表+红黑树实现每个数组空间采纳Node<key,value>节点来保留键值对,在肯定的条件下会采纳TreeNode<key,value>保留数据.

三, 根本属性介绍

//The default initial capacity - MUST be a power of two//默认初始容量-必须是2的幂.static final int DEFAULT_INITIAL_CAPACITY = 1 << 4

//The maximum capacity, used if a higher value is implicitly specified by either of the constructors with arguments.MUST be a power of two <= 1<<30.//哈希数组最大容量,如果构造函数制订了更大的值,则只应用最大值,并且大小只能为2的幂static final int MAXIMUM_CAPACITY = 1 << 30

//The load factor used when none specified in constructor//构造函数中未指定时应用的负载系数(默认应用)static final float DEFAULT_LOAD_FACTOR = 0.75f

//The bin count threshold for using a tree rather than list for a bin.//Bins are converted to trees when adding an element to a bin with at least this many nodes.//The value must be greater than 2 and should be at least 8 to mesh with assumptions in tree removal about //conversion back to plain bins upon shrinkage.//树化阈值: 当一个数组框中的节点数量大于TREEIFY_THRESHOLD时该当采纳树来存储,而不是链表static final int TREEIFY_THRESHOLD = 8

//The bin count threshold for untreeifying a (split) bin during a resize operation.//Should be less than TREEIFY_THRESHOLD, and at most 6 to mesh with shrinkage detection under removal.//树节点数量小于6个时,则将树转换为链表static final int UNTREEIFY_THRESHOLD = 6

//The smallest table capacity for which bins may be treeified.//树化的最小阈值: 只有哈希表中的节点数量大于64时,能力将链表转换为树static final int MIN_TREEIFY_CAPACITY = 64

//存储节点的哈希数组transient Node<K, V>[] table

//哈希表中的节点数量transient int size

//批改HashMap的次数,用于fail-fasttransient int modCount

//The next size value at which to resize (capacity * load factor)//下一次须要扩容的界线(扩容阈值)int threshold

//The load factor for the hash table//负载系数,次要用来调节哈希表的填充度final float loadFactor

四, 构造函数

public HashMap(int initialCapacity, float loadFactor) {    if (initialCapacity < 0)        throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity);    if (initialCapacity > MAXIMUM_CAPACITY)        initialCapacity = MAXIMUM_CAPACITY;    if (loadFactor <= 0 || Float.isNaN(loadFactor))        throw new IllegalArgumentException("Illegal load factor: " + loadFactor);    this.loadFactor = loadFactor;    //设置下一次扩容的阈值,这里不是真正的阈值,前面会从新设置    this.threshold = tableSizeFor(initialCapacity);}

五, 罕用办法介绍

/** * 计算传入值的hash值,用来计算存储的地位 * 将hashCode的高16位与低16位进行异或操作,这样做的起因次要是为了更好的扩散hash散布,缩小哈希碰撞 * 如果采纳&则计算出来的后果会向0凑近,采纳|计算的后果会向1凑近,都不能做到均匀分布 * 做异或操作的起因: 加大对哈希码低位的随机性 * String s = "Hello World!"; * Integer.toBinaryString(s.hashCode()) * h.hashCode() = 1100 0110 0011 1100 1011 0110 0001 1101 * h.hashCode() >>> 16 = 0000 0000 0000 0000 1100 0110 0011 1100 * 1100 0110 0011 1100 1011 0110 0001 1101  ^ * 0000 0000 0000 0000 1100 0110 0011 1100 * 1100 0110 0011 1100 1000 0110 0001 1100 */static final int hash(Object key) {    int h;    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);}

/** * 刚办法次要用来计算哈希表的大小,从下面的属性可知,哈希表的大小始终为2的幂 * 这样做的起因是: * 1. length的二进制示意始终为100...000,length - 1的二进制为1111...111 * 2. 长度等于2的幂时,hash%length = hash&(length - 1),但后者比前者效率更高 * 3. 能够保障计算的地位更加的扩散,如果长度为奇数,则length - 1为偶数,无论怎么取余或&操作,最终后果都为偶数,会造成一半空间节约 */static final int tableSizeFor(int cap) {    int n = cap - 1;    n |= n >>> 1;    n |= n >>> 2;    n |= n >>> 4;    n |= n >>> 8;    n |= n >>> 16;    return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;}

获取节点

final Node<K, V> getNode(int hash, Object key) {    Node<K, V>[] tab;    Node<K, V> first, e;    int n;    K k;    if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1) & hash]) != null) {        // 始终查看首节点是否为要找的节点        if (first.hash == hash && ((k = first.key) == key || (key != null && key.equals(k))))            return first;        if ((e = first.next) != null) {            //如果首节点为树形节点,则须要在红黑树外面查找            if (first instanceof TreeNode)                return ((TreeNode<K, V>) first).getTreeNode(hash, key);            do {                //在链表中寻找须要找的节点                if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k))))                    return e;                //向链表前面挪动            } while ((e = e.next) != null);        }    }    return null;}//查找节点final TreeNode<K, V> getTreeNode(int h, Object k) {    return ((parent != null) ? root() : this).find(h, k, null);}//获取树的根节点final TreeNode<K, V> root() {    for (TreeNode<K, V> r = this, p; ; ) {        //只有根节点的父节点为空        if ((p = r.parent) == null)            return r;        //向父节点挪动        r = p;    }}//在树中查找final TreeNode<K, V> find(int h, Object k, Class<?> kc) {    TreeNode<K, V> p = this;        //复制以后节点    do {        int ph, dir;    //以后节点的hash值,方向        K pk;           //以后节点的key        TreeNode<K, V> pl = p.left, pr = p.right, q;    //以后节点的左孩子,右孩子,以及q来返回查找到的后果        //以后节点的hash值大于须要找的节点的hash值,须要向左子树挪动        if ((ph = p.hash) > h)            p = pl;        //以后节点的hash值小于须要找的节点的hash值,须要向右子树挪动        else if (ph < h)            p = pr;        //以后节点的hash值雷同,并且key也雷同,间接返回        else if ((pk = p.key) == k || (k != null && k.equals(pk)))            return p;        //左子树为空,向右子树挪动查找        else if (pl == null)            p = pr;        //右子树为空,向左子树挪动判断        else if (pr == null)            p = pl;        //这一步次要通过Comparable比照以后遍历的节点p的key和要查找的key的大小        else if ((kc != null || (kc = comparableClassFor(k)) != null) && (dir = compareComparables(kc, k, pk)) != 0)            p = (dir < 0) ? pl : pr;        //小于0向左遍历,大于0向右遍历        //这一步示意无奈通过Comparable比拟大小,则从右孩子持续递归遍历查找        else if ((q = pr.find(h, k, kc)) != null)            return q;        else            //从右孩子递归遍历后还没有找到,从左孩子持续查找            p = pl;    } while (p != null);    return null;}static int compareComparables(Class<?> kc, Object k, Object x) {    return (x == null || x.getClass() != kc ? 0 : ((Comparable) k).compareTo(x));}

插入节点

final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) {        Node<K, V>[] tab;        Node<K, V> p;        int n, i;        /*         * 1, 如果哈希表为空,则通过resize()创立.所以,初始化链表的机会为第一次调用put函数时         */        if ((tab = table) == null || (n = tab.length) == 0)            n = (tab = resize()).length;        /*         * 判断是否存在hash抵触:         * 若不存在,则间接在该数组地位新建节点,直接插入         * 否则,代表hash抵触,即以后存储地位已存在节点,则顺次向下判断         * a. 以后地位的key与要存入地位的key雷同         * b. 判断需插入的数据结构是否为红黑树或链表         */        if ((p = tab[i = (n - 1) & hash]) == null)            tab[i] = newNode(hash, key, value, null);        else {            Node<K, V> e;       //待批改的节点            K k;            /*             * a. 判断table[i]的元素的key是否与需插入的key一样,如果一样间接用新的value笼罩旧的value             */            if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k))))                e = p;                /*                 * b. 判断须要插入的是否为红黑树,如果为红黑树则间接在树中插入或更新键值对                 */            else if (p instanceof TreeNode)                e = ((TreeNode<K, V>) p).putTreeVal(this, tab, hash, key, value);                /*                 * c. 如果是链表,则间接在链表中更新键值对                 * i.  遍历table[i],判断key是否存在,采纳equals比照以后遍历节点的key与待插入数据的key,如果雷同则间接用新的value笼罩旧value                 * ii. 遍历结束没有上述情况,则间接在链表尾部插入新的节点                 * tips: 新增节点之后,须要判断链表的长度是否大于8,如果是,须要树化                 */            else {                for (int binCount = 0; ; ++binCount) {                    if ((e = p.next) == null) {                        //向链表尾部插入                        p.next = newNode(hash, key, value, null);                        //如果以后链表的长度大于8,则须要树化                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st                            treeifyBin(tab, hash);                        break;                    }                    //如果找到了key雷同的节点,则间接退出                    if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k))))                        break;                    //向后挪动节点进行遍历                    p = e;                }            }            /*             * 找到须要批改的node则间接用新值代替旧值             */            if (e != null) { // existing mapping for key                V oldValue = e.value;                if (!onlyIfAbsent || oldValue == null)                    e.value = value;                //默认实现为空,为了给LinkedHashMap来实现有序map                afterNodeAccess(e);                return oldValue;            }        }        //批改次数+1,用于fail-fast        ++modCount;        //节点数量大于阈值,须要扩容        if (++size > threshold)            resize();        afterNodeInsertion(evict);        return null;}

final Node<K, V>[] resize() {    //扩容前哈希表    Node<K, V>[] oldTab = table;    //旧哈希表长度    int oldCap = (oldTab == null) ? 0 : oldTab.length;    //扩容前哈希表扩容阈值    int oldThr = threshold;    int newCap, newThr = 0;    if (oldCap > 0) {        //旧容量大于最大值,间接返回旧的哈希表        if (oldCap >= MAXIMUM_CAPACITY) {            threshold = Integer.MAX_VALUE;            return oldTab;        } else if            //旧容量小于最大容量的一半,并且哈希表曾经初始化了则新阈值为旧阈值的一倍            ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY)            newThr = oldThr << 1; // double threshold    } else if (oldThr > 0)        // 如果旧的阈值大于0,则示意哈希表曾经被初始化了,则新的容量等于旧的阈值        newCap = oldThr;    else {               // zero initial threshold signifies using defaults        //初始化        newCap = DEFAULT_INITIAL_CAPACITY;        //新的阈值等于初始容量*加载因子        newThr = (int) (DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);    }    //如果新的阈值等于0    if (newThr == 0) {        //计算新的阈值        float ft = (float) newCap * loadFactor;        //新的容量大于最大值并且新的阈值大于最大值,则新的阈值为最大值        newThr = (newCap < MAXIMUM_CAPACITY && ft < (float) MAXIMUM_CAPACITY ? (int) ft : Integer.MAX_VALUE);    }    //赋值新的阈值    threshold = newThr;    //创立新的哈希表,赋值新的哈希表    @SuppressWarnings({"all"})    Node<K, V>[] newTab = (Node<K, V>[]) new Node[newCap];    table = newTab;    if (oldTab != null) {        //将每个bucket中都挪动到新的bucket中        for (int j = 0; j < oldCap; ++j) {            Node<K, V> e;            if ((e = oldTab[j]) != null) {                //旧节点置空                oldTab[j] = null;                //以后节点没有后置节点,间接计算地位插入                if (e.next == null)                    newTab[e.hash & (newCap - 1)] = e;                else if                    //以后节点为树节点,须要插入树节点并且调节树均衡                    (e instanceof TreeNode)                    ((TreeNode<K, V>) e).split(this, newTab, j, oldCap);                else {                    // 解决链表节点                    Node<K, V> loHead = null, loTail = null;                    Node<K, V> hiHead = null, hiTail = null;                    Node<K, V> next;                    do {                        next = e.next;                        //原地位不须要挪动                        if ((e.hash & oldCap) == 0) {                            if (loTail == null)                                loHead = e;                            else                                //尾插法                                loTail.next = e;                            //尾节点向后挪动                            loTail = e;                        } else {                            //须要从新计算插入地位                            if (hiTail == null)                                hiHead = e;                            else                                hiTail.next = e;                            //尾节点向后挪动                            hiTail = e;                        }                    } while ((e = next) != null);                    //原索引放在bucket中                    if (loTail != null) {                        loTail.next = null;                        newTab[j] = loHead;                    }                    //原索引+oldCap放到bucket中                    if (hiTail != null) {                        hiTail.next = null;                        newTab[j + oldCap] = hiHead;                    }                }            }        }    }    return newTab;}

五, 与之前版本比照

1. hash值计算形式

//1.7 static final int hash(int h) {    h ^= k.hashCode();     h ^= (h >>> 20) ^ (h >>> 12);    return h ^ (h >>> 7) ^ (h >>> 4);}//1.8 static final int hash(Object key) {    int h;    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);}

从上能够看出1.7 一共做了5次异或操作和4次无符号右移操作,而1.8一共做了一次右移和一次异或,效率比1.8高

2. 存储数据结构不同

1.7 采纳数组 + 链表解决哈希抵触,链表采纳头插法
1.8 采纳数组 + 链表 + 红黑树解决哈希抵触,链表采纳尾插法

并且1.7的数据结构还会造成循环链表

问题产生的条件:
多线程同时进行put时,如果同时调用了resize操作,可能会导致循环链表产生,进而会导致前面get的时候产生死循环
问题产生的起因:
多线程下进行put,本来应该在以后节点的next被其余线程移到新数组的空地位上,而以后节点的next指针指向刚放入的节点,从而造成循环链表.

3. 扩容机制不同

1.7 对原先的每一个节点从新计算插入地位
1.8 只有第一个地位上的节点不须要从新计算插入地位,其余的都为以后地位+旧数组容量