forked from algorithm010/algorithm010
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBloomFilter.java
More file actions
132 lines (118 loc) · 3.75 KB
/
BloomFilter.java
File metadata and controls
132 lines (118 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package Week08;
import java.util.BitSet;
import java.util.Random;
import java.util.Iterator;
public class BloomFilter implements Cloneable {
private BitSet hashes;
private RandomInRange prng;
private int k; // Number of hash functions
private static final double LN2 = 0.6931471805599453; // ln(2)
/**
* Create a new bloom filter.
* @param n Expected number of elements
* @param m Desired size of the container in bits
**/
public BloomFilter(int n, int m) {
k = (int) Math.round(LN2 * m / n);
if (k <= 0) k = 1;
this.hashes = new BitSet(m);
this.prng = new RandomInRange(m, k);
}
/**
* Create a bloom filter of 1Mib.
* @param n Expected number of elements
**/
public BloomFilter(int n) {
this(n, 1024*1024*8);
}
/**
* Add an element to the container
**/
public void add(Object o) {
prng.init(o);
for (RandomInRange r : prng) hashes.set(r.value);
}
/**
* If the element is in the container, returns true.
* If the element is not in the container, returns true with a probability ≈ e^(-ln(2)² * m/n), otherwise false.
* So, when m is large enough, the return value can be interpreted as:
* - true : the element is probably in the container
* - false : the element is definitely not in the container
**/
public boolean contains(Object o) {
prng.init(o);
for (RandomInRange r : prng)
if (!hashes.get(r.value))
return false;
return true;
}
/**
* Removes all of the elements from this filter.
**/
public void clear() {
hashes.clear();
}
/**
* Create a copy of the current filter
**/
public BloomFilter clone() throws CloneNotSupportedException {
return (BloomFilter) super.clone();
}
/**
* Generate a unique hash representing the filter
**/
public int hashCode() {
return hashes.hashCode() ^ k;
}
/**
* Test if the filters have equal bitsets.
* WARNING: two filters may contain the same elements, but not be equal
* (if the filters have different size for example).
*/
public boolean equals(BloomFilter other) {
return this.hashes.equals(other.hashes) && this.k == other.k;
}
/**
* Merge another bloom filter into the current one.
* After this operation, the current bloom filter contains all elements in
* other.
**/
public void merge(BloomFilter other) {
if (other.k != this.k || other.hashes.size() != this.hashes.size()) {
throw new IllegalArgumentException("Incompatible bloom filters");
}
this.hashes.or(other.hashes);
}
private class RandomInRange
implements Iterable<RandomInRange>, Iterator<RandomInRange> {
private Random prng;
private int max; // Maximum value returned + 1
private int count; // Number of random elements to generate
private int i = 0; // Number of elements generated
public int value; // The current value
RandomInRange(int maximum, int k) {
max = maximum;
count = k;
prng = new Random();
}
public void init(Object o) {
prng.setSeed(o.hashCode());
}
public Iterator<RandomInRange> iterator() {
i = 0;
return this;
}
public RandomInRange next() {
i++;
value = prng.nextInt() % max;
if (value<0) value = -value;
return this;
}
public boolean hasNext() {
return i < count;
}
public void remove() {
throw new UnsupportedOperationException();
}
}
}