Skip to content

Commit 036d0fc

Browse files
author
xutao
committed
提交
1 parent 23ebc3c commit 036d0fc

3 files changed

Lines changed: 298 additions & 1 deletion

File tree

Week02/242.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
class Soltion {
2+
3+
public boolean isAnagram(String s, String t) {
4+
if (s.length() != t.length()) {
5+
return false;
6+
}
7+
int[] counter = new int[26];
8+
for (int i = 0; i < s.length(); i++) {
9+
counter[s.charAt(i) - 'a']++;
10+
counter[t.charAt(i) - 'a']--;
11+
}
12+
for (int count : counter) {
13+
if (count != 0) {
14+
return false;
15+
}
16+
}
17+
return true;
18+
}
19+
}

Week02/NOTE.md

Lines changed: 264 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,264 @@
1-
学习笔记
1+
## 理论
2+
####hashmap我认为是对于散列表的工程性实现,散列表结构如下
3+
4+
![image-20200621231025363](/Users/sailength-work/Library/Application Support/typora-user-images/image-20200621231025363.png)
5+
6+
通过这个例子,我们可以总结出这样的规律:散列表用的就是数组支持按照下标随机访问的时候,时间复杂度是 O(1) 的特性。我们通过散列函数把元素的键值映射为下标,然后将数据存储在数组中对应下标的位置。当我们按照键值查询元素时,我们用同样的散列函数,将键值转化数组下标,从对应的数组下标的位置取数据。
7+
8+
由于散列函数,有时候会出现hash出现相同值的情况,然后就会出现散列冲突(开放寻址和链表法),hashmap使用了链表法,方法如下![image-20200621231514111](/Users/sailength-work/Library/Application Support/typora-user-images/image-20200621231514111.png)
9+
10+
主要是通过hash函数,计算bucket,然后放入slot中。
11+
12+
以上,就是从概念层面对于hashmap的理解
13+
14+
但是其中会有几个问题:
15+
16+
- 1.如何设计相对好的散列函数?
17+
18+
- 2.随着元素增多,bucket是有限的,如何防止散列冲突导致的性能下降?
19+
20+
- 3.上个问题的延伸,随着元素的增多,必然会扩容bucket,如何实现高效扩容和阈值判断呢?
21+
22+
23+
24+
25+
26+
##带着上面的三个问题,看下java的hashmap代码
27+
28+
#### 1、我们先看下hashmap的散列函数
29+
30+
```java
31+
/**
32+
* Computes key.hashCode() and spreads (XORs) higher bits of hash
33+
* to lower. Because the table uses power-of-two masking, sets of
34+
* hashes that vary only in bits above the current mask will
35+
* always collide. (Among known examples are sets of Float keys
36+
* holding consecutive whole numbers in small tables.) So we
37+
* apply a transform that spreads the impact of higher bits
38+
* downward. There is a tradeoff between speed, utility, and
39+
* quality of bit-spreading. Because many common sets of hashes
40+
* are already reasonably distributed (so don't benefit from
41+
* spreading), and because we use trees to handle large sets of
42+
* collisions in bins, we just XOR some shifted bits in the
43+
* cheapest possible way to reduce systematic lossage, as well as
44+
* to incorporate impact of the highest bits that would otherwise
45+
* never be used in index calculations because of table bounds.
46+
*/
47+
static final int hash(Object key) {
48+
int h;
49+
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
50+
}
51+
```
52+
53+
首先取key的hashCode然后和hashcode右移16位(高位用0补全)然后进行异或
54+
55+
#### 2、put函数
56+
57+
58+
59+
60+
61+
```java
62+
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
63+
boolean evict) {
64+
Node<K,V>[] tab; Node<K,V> p; int n, i;
65+
//首先把table赋值给tab 检查是不是null 如果是null 那么初始化一下(resize)
66+
//饭后返回table的长度给n
67+
if ((tab = table) == null || (n = tab.length) == 0)
68+
n = (tab = resize()).length;
69+
//(n-1)&hash作为桶的标号
70+
//我们知道桶的数量是 2^n
71+
//2^n - 1 在二进制中表示就是全是1 按位与就比较平均的分配桶了
72+
//但是为什么不直接用hashcode作为hash呢?
73+
//由于使用桶数量-1作为掩码,那么注定高位的hashcode无法参与运算
74+
//我们把高16位与低16使用xor运算,让高位参与hash,减少一定的冲突
75+
//但是如果 桶的数量不是 2^n 那么这么做hash就不那么平均了
76+
//在这里判断是不是有节点,如果没有,那么证明这里没有Hash冲突直接创建一个节点放这里就好了
77+
if ((p = tab[i = (n - 1) & hash]) == null){
78+
//在这里创建一个节点赋值给第i个桶
79+
tab[i] = newNode(hash, key, value, null);
80+
}
81+
else {
82+
//这里证明有Hash冲突了
83+
Node<K,V> e; K k;
84+
//判断当前的hash是不是相等,然后判断key相等
85+
//因为key是个对象,相对来讲equals运算时间更长,先比较hash,如果不相等就不用equals比较了
86+
//如果是相同的键,那么赋值给一个临时引用,先不覆盖
87+
if (p.hash == hash &&
88+
((k = p.key) == key || (key != null && key.equals(k))))
89+
e = p;
90+
//这里判断当前桶里的头结点是不是TreeNode,如果是TreeNode,那就把这个节点插到红黑树里面
91+
else if (p instanceof TreeNode)
92+
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
93+
else {
94+
//如果不是Tree节点,那么说明现在还是个链表
95+
//开始遍历
96+
for (int binCount = 0; ; ++binCount) {
97+
//判断是不是尾节点
98+
if ((e = p.next) == null) {
99+
//是尾节点那就直接插后面
100+
p.next = newNode(hash, key, value, null);
101+
//判断链表多长
102+
//如果超过阈值,那就把链表变成二叉树
103+
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
104+
treeifyBin(tab, hash);
105+
break;
106+
}
107+
//hash相等之后在进行equals判断如果相等,证明这个已经key已经存在了break
108+
if (e.hash == hash &&
109+
((k = e.key) == key || (key != null && key.equals(k))))
110+
break;
111+
p = e;
112+
}
113+
}
114+
//e是key所在的节点如果是null那么证明是新插的,否则覆盖引用
115+
if (e != null) { // existing mapping for key
116+
V oldValue = e.value;
117+
if (!onlyIfAbsent || oldValue == null)
118+
//如果旧值是null或onlyIfAbsent==0那么覆盖值然后直接返回就可以了
119+
//因为并没有改变table结构或者链表什么的
120+
e.value = value;
121+
//这个是空的,需要子类覆盖一下
122+
//但是绝不能改变table结构什么的
123+
afterNodeAccess(e);
124+
return oldValue;
125+
}
126+
}
127+
//更改迭代器
128+
++modCount;
129+
//大于阈值 扩容
130+
if (++size > threshold)
131+
resize();
132+
afterNodeInsertion(evict);
133+
return null;
134+
}
135+
```
136+
137+
138+
139+
140+
141+
#### 3.扩容
142+
143+
```java
144+
/**
145+
* Initializes or doubles table size. If null, allocates in
146+
* accord with initial capacity target held in field threshold.
147+
* Otherwise, because we are using power-of-two expansion, the
148+
* elements from each bin must either stay at same index, or move
149+
* with a power of two offset in the new table.
150+
*
151+
* @return the table
152+
*/
153+
final Node<K,V>[] resize() {
154+
Node<K,V>[] oldTab = table;
155+
//如果是null那么说明是新创建的
156+
//下面的几个变量,Cap桶容量,Thr阈值
157+
int oldCap = (oldTab == null) ? 0 : oldTab.length;
158+
int oldThr = threshold;
159+
int newCap, newThr = 0;
160+
//先判断旧桶有没有
161+
if (oldCap > 0) {
162+
//如果旧的长度大于最大阈值,那么把桶扩大到最大Integer.MAX,然后不会发生resize了
163+
if (oldCap >= MAXIMUM_CAPACITY) {
164+
threshold = Integer.MAX_VALUE;
165+
return oldTab;
166+
}
167+
//桶容量*2如果比最大容量小以及旧的大于默认桶容量
168+
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
169+
oldCap >= DEFAULT_INITIAL_CAPACITY)
170+
newThr = oldThr << 1; // double threshold
171+
}
172+
//旧的没桶数量 但是有阈值 说明第一次创建
173+
else if (oldThr > 0) // initial capacity was placed in threshold
174+
newCap = oldThr;
175+
else { // zero initial threshold signifies using defaults
176+
//没有阈值,没有旧桶容量,说明默认初始化的
177+
newCap = DEFAULT_INITIAL_CAPACITY;
178+
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
179+
}
180+
if (newThr == 0) {
181+
//新阈值 为0 说明需要计算阈值
182+
float ft = (float)newCap * loadFactor;
183+
//cap*factor但是如果太大,那么就扩大到Integer.MAX_VALUE
184+
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
185+
(int)ft : Integer.MAX_VALUE);
186+
}
187+
threshold = newThr;
188+
@SuppressWarnings({"rawtypes","unchecked"})
189+
//按照计算好的桶的数量扩容
190+
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
191+
table = newTab;
192+
if (oldTab != null) {
193+
//把旧的桶所有东西放在新桶里面
194+
for (int j = 0; j < oldCap; ++j) {
195+
Node<K,V> e;
196+
if ((e = oldTab[j]) != null) {
197+
//先清空旧的桶那个位置
198+
oldTab[j] = null;
199+
//如果只有一个元素 直接放在新桶对应的位置
200+
if (e.next == null)
201+
newTab[e.hash & (newCap - 1)] = e;
202+
//如果是树节点
203+
else if (e instanceof TreeNode)
204+
//对应的放在新桶里面
205+
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
206+
else { // preserve order
207+
//链表节点 遍历,然后对应的放新桶里面
208+
Node<K,V> loHead = null, loTail = null;
209+
Node<K,V> hiHead = null, hiTail = null;
210+
Node<K,V> next;
211+
//开始遍历链表
212+
do {
213+
next = e.next;
214+
//这里挺有意思的
215+
//判断 oldCap那个位置是0还是1
216+
//如果是0 那么扩容后不变
217+
//如果是1 那么扩容后桶是原来位置+原来桶的数量
218+
if ((e.hash & oldCap) == 0) {
219+
if (loTail == null)
220+
loHead = e;
221+
else
222+
loTail.next = e;
223+
loTail = e;
224+
}
225+
else {
226+
if (hiTail == null)
227+
hiHead = e;
228+
else
229+
hiTail.next = e;
230+
hiTail = e;
231+
}
232+
} while ((e = next) != null);
233+
//对应的链表放进去就行
234+
if (loTail != null) {
235+
loTail.next = null;
236+
newTab[j] = loHead;
237+
}
238+
if (hiTail != null) {
239+
hiTail.next = null;
240+
newTab[j + oldCap] = hiHead;
241+
}
242+
}
243+
}
244+
}
245+
}
246+
return newTab;
247+
}
248+
```
249+
250+
## 综上可以解答以上几个问题了
251+
252+
#### 1.hashmap采取了取模的方式进行hash
253+
254+
#### 2.如果单个entry的slot链表过长,超过阈值,默认是8,会进行树化操作,转成红黑树,使查询变为O(n)减少性能退化
255+
256+
#### 3.负载因子(非空闲的bucket/总bucket)大于0.75会进行扩容,从而减少hash冲突
257+
258+
#### 4.hashmap有几个问题
259+
260+
- a.多线程链表环导致cpu飙升
261+
262+
- b.非线程安全
263+
264+
- c.如果扩容会导致,单次的插入的性能升高,对于性能要求较高的系统是无法接受的=>redis对于hashmap有此类的解决、

Week02/Solution.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
class Solution {
2+
public int[] twoSum(int[] nums, int target) {
3+
Map<Integer, Integer> map = new HashMap<>();
4+
for (int i = 0; i < nums.length; i++) {
5+
map.put(nums[i], i);
6+
}
7+
for (int i = 0; i < nums.length; i++) {
8+
int val = target - nums[i];
9+
if (map.containsKey(val) && map.get(val) != i) {
10+
return new int[]{i, map.get(val)};
11+
}
12+
}
13+
throw new IllegalArgumentException("no mactch");
14+
}
15+
}

0 commit comments

Comments
 (0)