一, 什么是HashMap
Java中Map是用来存储键值对<key,value>的汇合类,也就是哈希表,而HashMap是Map的实现类.具备存储效率高,查问快的特点,但不是线程同步的,依照哈希表的特点,Map中的key是不能反复的.
二, 实现原理
HashMap采纳数组+链表+红黑树实现每个数组空间采纳Node<key,value>节点来保留键值对,在肯定的条件下会采纳TreeNode<key,value>保留数据.
三, 根本属性介绍
//The default initial capacity - MUST be a power of two//默认初始容量-必须是2的幂.static final int DEFAULT_INITIAL_CAPACITY = 1 << 4
//The maximum capacity, used if a higher value is implicitly specified by either of the constructors with arguments.MUST be a power of two <= 1<<30.//哈希数组最大容量,如果构造函数制订了更大的值,则只应用最大值,并且大小只能为2的幂static final int MAXIMUM_CAPACITY = 1 << 30
//The load factor used when none specified in constructor//构造函数中未指定时应用的负载系数(默认应用)static final float DEFAULT_LOAD_FACTOR = 0.75f
//The bin count threshold for using a tree rather than list for a bin.//Bins are converted to trees when adding an element to a bin with at least this many nodes.//The value must be greater than 2 and should be at least 8 to mesh with assumptions in tree removal about //conversion back to plain bins upon shrinkage.//树化阈值: 当一个数组框中的节点数量大于TREEIFY_THRESHOLD时该当采纳树来存储,而不是链表static final int TREEIFY_THRESHOLD = 8
//The bin count threshold for untreeifying a (split) bin during a resize operation.//Should be less than TREEIFY_THRESHOLD, and at most 6 to mesh with shrinkage detection under removal.//树节点数量小于6个时,则将树转换为链表static final int UNTREEIFY_THRESHOLD = 6
//The smallest table capacity for which bins may be treeified.//树化的最小阈值: 只有哈希表中的节点数量大于64时,能力将链表转换为树static final int MIN_TREEIFY_CAPACITY = 64
//存储节点的哈希数组transient Node<K, V>[] table
//哈希表中的节点数量transient int size
//批改HashMap的次数,用于fail-fasttransient int modCount
//The next size value at which to resize (capacity * load factor)//下一次须要扩容的界线(扩容阈值)int threshold
//The load factor for the hash table//负载系数,次要用来调节哈希表的填充度final float loadFactor
四, 构造函数
public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; //设置下一次扩容的阈值,这里不是真正的阈值,前面会从新设置 this.threshold = tableSizeFor(initialCapacity);}
五, 罕用办法介绍
/** * 计算传入值的hash值,用来计算存储的地位 * 将hashCode的高16位与低16位进行异或操作,这样做的起因次要是为了更好的扩散hash散布,缩小哈希碰撞 * 如果采纳&则计算出来的后果会向0凑近,采纳|计算的后果会向1凑近,都不能做到均匀分布 * 做异或操作的起因: 加大对哈希码低位的随机性 * String s = "Hello World!"; * Integer.toBinaryString(s.hashCode()) * h.hashCode() = 1100 0110 0011 1100 1011 0110 0001 1101 * h.hashCode() >>> 16 = 0000 0000 0000 0000 1100 0110 0011 1100 * 1100 0110 0011 1100 1011 0110 0001 1101 ^ * 0000 0000 0000 0000 1100 0110 0011 1100 * 1100 0110 0011 1100 1000 0110 0001 1100 */static final int hash(Object key) { int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);}
/** * 刚办法次要用来计算哈希表的大小,从下面的属性可知,哈希表的大小始终为2的幂 * 这样做的起因是: * 1. length的二进制示意始终为100...000,length - 1的二进制为1111...111 * 2. 长度等于2的幂时,hash%length = hash&(length - 1),但后者比前者效率更高 * 3. 能够保障计算的地位更加的扩散,如果长度为奇数,则length - 1为偶数,无论怎么取余或&操作,最终后果都为偶数,会造成一半空间节约 */static final int tableSizeFor(int cap) { int n = cap - 1; n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16; return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;}
获取节点
final Node<K, V> getNode(int hash, Object key) { Node<K, V>[] tab; Node<K, V> first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1) & hash]) != null) { // 始终查看首节点是否为要找的节点 if (first.hash == hash && ((k = first.key) == key || (key != null && key.equals(k)))) return first; if ((e = first.next) != null) { //如果首节点为树形节点,则须要在红黑树外面查找 if (first instanceof TreeNode) return ((TreeNode<K, V>) first).getTreeNode(hash, key); do { //在链表中寻找须要找的节点 if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; //向链表前面挪动 } while ((e = e.next) != null); } } return null;}//查找节点final TreeNode<K, V> getTreeNode(int h, Object k) { return ((parent != null) ? root() : this).find(h, k, null);}//获取树的根节点final TreeNode<K, V> root() { for (TreeNode<K, V> r = this, p; ; ) { //只有根节点的父节点为空 if ((p = r.parent) == null) return r; //向父节点挪动 r = p; }}//在树中查找final TreeNode<K, V> find(int h, Object k, Class<?> kc) { TreeNode<K, V> p = this; //复制以后节点 do { int ph, dir; //以后节点的hash值,方向 K pk; //以后节点的key TreeNode<K, V> pl = p.left, pr = p.right, q; //以后节点的左孩子,右孩子,以及q来返回查找到的后果 //以后节点的hash值大于须要找的节点的hash值,须要向左子树挪动 if ((ph = p.hash) > h) p = pl; //以后节点的hash值小于须要找的节点的hash值,须要向右子树挪动 else if (ph < h) p = pr; //以后节点的hash值雷同,并且key也雷同,间接返回 else if ((pk = p.key) == k || (k != null && k.equals(pk))) return p; //左子树为空,向右子树挪动查找 else if (pl == null) p = pr; //右子树为空,向左子树挪动判断 else if (pr == null) p = pl; //这一步次要通过Comparable比照以后遍历的节点p的key和要查找的key的大小 else if ((kc != null || (kc = comparableClassFor(k)) != null) && (dir = compareComparables(kc, k, pk)) != 0) p = (dir < 0) ? pl : pr; //小于0向左遍历,大于0向右遍历 //这一步示意无奈通过Comparable比拟大小,则从右孩子持续递归遍历查找 else if ((q = pr.find(h, k, kc)) != null) return q; else //从右孩子递归遍历后还没有找到,从左孩子持续查找 p = pl; } while (p != null); return null;}static int compareComparables(Class<?> kc, Object k, Object x) { return (x == null || x.getClass() != kc ? 0 : ((Comparable) k).compareTo(x));}
插入节点
final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K, V>[] tab; Node<K, V> p; int n, i; /* * 1, 如果哈希表为空,则通过resize()创立.所以,初始化链表的机会为第一次调用put函数时 */ if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; /* * 判断是否存在hash抵触: * 若不存在,则间接在该数组地位新建节点,直接插入 * 否则,代表hash抵触,即以后存储地位已存在节点,则顺次向下判断 * a. 以后地位的key与要存入地位的key雷同 * b. 判断需插入的数据结构是否为红黑树或链表 */ if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { Node<K, V> e; //待批改的节点 K k; /* * a. 判断table[i]的元素的key是否与需插入的key一样,如果一样间接用新的value笼罩旧的value */ if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; /* * b. 判断须要插入的是否为红黑树,如果为红黑树则间接在树中插入或更新键值对 */ else if (p instanceof TreeNode) e = ((TreeNode<K, V>) p).putTreeVal(this, tab, hash, key, value); /* * c. 如果是链表,则间接在链表中更新键值对 * i. 遍历table[i],判断key是否存在,采纳equals比照以后遍历节点的key与待插入数据的key,如果雷同则间接用新的value笼罩旧value * ii. 遍历结束没有上述情况,则间接在链表尾部插入新的节点 * tips: 新增节点之后,须要判断链表的长度是否大于8,如果是,须要树化 */ else { for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { //向链表尾部插入 p.next = newNode(hash, key, value, null); //如果以后链表的长度大于8,则须要树化 if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st treeifyBin(tab, hash); break; } //如果找到了key雷同的节点,则间接退出 if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; //向后挪动节点进行遍历 p = e; } } /* * 找到须要批改的node则间接用新值代替旧值 */ if (e != null) { // existing mapping for key V oldValue = e.value; if (!onlyIfAbsent || oldValue == null) e.value = value; //默认实现为空,为了给LinkedHashMap来实现有序map afterNodeAccess(e); return oldValue; } } //批改次数+1,用于fail-fast ++modCount; //节点数量大于阈值,须要扩容 if (++size > threshold) resize(); afterNodeInsertion(evict); return null;}
final Node<K, V>[] resize() { //扩容前哈希表 Node<K, V>[] oldTab = table; //旧哈希表长度 int oldCap = (oldTab == null) ? 0 : oldTab.length; //扩容前哈希表扩容阈值 int oldThr = threshold; int newCap, newThr = 0; if (oldCap > 0) { //旧容量大于最大值,间接返回旧的哈希表 if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } else if //旧容量小于最大容量的一半,并且哈希表曾经初始化了则新阈值为旧阈值的一倍 ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1; // double threshold } else if (oldThr > 0) // 如果旧的阈值大于0,则示意哈希表曾经被初始化了,则新的容量等于旧的阈值 newCap = oldThr; else { // zero initial threshold signifies using defaults //初始化 newCap = DEFAULT_INITIAL_CAPACITY; //新的阈值等于初始容量*加载因子 newThr = (int) (DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } //如果新的阈值等于0 if (newThr == 0) { //计算新的阈值 float ft = (float) newCap * loadFactor; //新的容量大于最大值并且新的阈值大于最大值,则新的阈值为最大值 newThr = (newCap < MAXIMUM_CAPACITY && ft < (float) MAXIMUM_CAPACITY ? (int) ft : Integer.MAX_VALUE); } //赋值新的阈值 threshold = newThr; //创立新的哈希表,赋值新的哈希表 @SuppressWarnings({"all"}) Node<K, V>[] newTab = (Node<K, V>[]) new Node[newCap]; table = newTab; if (oldTab != null) { //将每个bucket中都挪动到新的bucket中 for (int j = 0; j < oldCap; ++j) { Node<K, V> e; if ((e = oldTab[j]) != null) { //旧节点置空 oldTab[j] = null; //以后节点没有后置节点,间接计算地位插入 if (e.next == null) newTab[e.hash & (newCap - 1)] = e; else if //以后节点为树节点,须要插入树节点并且调节树均衡 (e instanceof TreeNode) ((TreeNode<K, V>) e).split(this, newTab, j, oldCap); else { // 解决链表节点 Node<K, V> loHead = null, loTail = null; Node<K, V> hiHead = null, hiTail = null; Node<K, V> next; do { next = e.next; //原地位不须要挪动 if ((e.hash & oldCap) == 0) { if (loTail == null) loHead = e; else //尾插法 loTail.next = e; //尾节点向后挪动 loTail = e; } else { //须要从新计算插入地位 if (hiTail == null) hiHead = e; else hiTail.next = e; //尾节点向后挪动 hiTail = e; } } while ((e = next) != null); //原索引放在bucket中 if (loTail != null) { loTail.next = null; newTab[j] = loHead; } //原索引+oldCap放到bucket中 if (hiTail != null) { hiTail.next = null; newTab[j + oldCap] = hiHead; } } } } } return newTab;}
五, 与之前版本比照
1. hash值计算形式
//1.7 static final int hash(int h) { h ^= k.hashCode(); h ^= (h >>> 20) ^ (h >>> 12); return h ^ (h >>> 7) ^ (h >>> 4);}//1.8 static final int hash(Object key) { int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);}
从上能够看出1.7 一共做了5次异或操作和4次无符号右移操作,而1.8一共做了一次右移和一次异或,效率比1.8高
2. 存储数据结构不同
1.7 采纳数组 + 链表解决哈希抵触,链表采纳头插法1.8 采纳数组 + 链表 + 红黑树 解决哈希抵触,链表采纳尾插法
并且1.7的数据结构还会造成循环链表
问题产生的条件:多线程同时进行put时,如果同时调用了resize操作,可能会导致循环链表产生,进而会导致前面get的时候产生死循环
问题产生的起因:
多线程下进行put,本来应该在以后节点的next被其余线程移到新数组的空地位上,而以后节点的next指针指向刚放入的节点,从而造成循环链表.
3. 扩容机制不同
1.7 对原先的每一个节点从新计算插入地位1.8 只有第一个地位上的节点不须要从新计算插入地位,其余的都为以后地位+旧数组容量