-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbloom-test.c
More file actions
381 lines (318 loc) · 12.1 KB
/
bloom-test.c
File metadata and controls
381 lines (318 loc) · 12.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
/* test/bloom-test.c
*
* Run a bunch of tests on the Bloom filter implementation. Will print to
* standard output if run in a terminal, will print to the browser console if
* compiled using emscripten and loaded into the browser.
*
* Created by Jacob Strieb
* January 2021
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "bloom.h"
/*******************************************************************************
* Constants (strings for testing)
******************************************************************************/
// Statically allocate some strings to add to the filter. The songs from
// which these lyrics were taken are some of my favorites. I've done my best
// to include variety, and I highly recommend you giving a listen to any you
// are unfamiliar with.
char *input1[] = { "This is the very first test!" };
// Layla (Acoustic Version) - Eric Clapton
char *input2[] = {
"See if you can spot this one?",
"What will you do when you get lonely",
"No one waiting by your side?",
"You've been running, hiding much too long",
"You know it's just your foolish pride"
};
// Blinding Lights - The Weeknd
char *input3[] = {
"I look around and",
"Sin City's cold and empty",
"No one's around to judge me ",
"I can't see clearly when you're gone"
};
// Gorgeous - Kanye West
char *input4[] = {
"Penitentiary chances, the devil dances",
"And eventually answers to the call of autumn",
"All them fallin' for the love of ballin'",
"Got caught with thirty rocks, the cop look like Alec Baldwin",
"Inter-century anthems based off inner-city tantrums",
"Based off the way we was branded",
"Face it, Jerome get more time than Brandon",
"And at the airport, they check all through my bag",
"And tell me that it's random",
"But we stay winning",
"This week has been a bad massage, I need a happy ending",
"And a new beginning and a new fitted",
"And some job opportunities that's lucrative",
"This the real world, homie, school finished",
"They done stole your dreams, you don't know who did it",
"I treat the cash the way the government treats AIDS",
"I won't be satisfied 'til all my n****s get it, get it?"
};
// Doses and Mimosas - Cherub
char *input5[] = {
"Ten in the morning",
"And I'm skipping breakfast",
"And drinking a beverage",
"To ignore it all",
"Guess ignorance is bliss and",
"I've come to embrace it",
"It's all overrated",
"Except drugs and alcohol"
};
// Vivir mi Vida - Marc Anthony
char *input6[] = {
"Voy a vivir el momento",
"Para entender el destino",
"Voy a escuchar en silencio",
"Para encontrar el camino"
};
// Oh Devil - Electric Guest
char *input7[] = {
"Oh, devil, I know you're afraid",
"Sometimes it's hard to learn from all your mistakes",
"Oh, devil, I'm glad that you came",
"Guess I should learn how to live because it won't go away"
};
/*******************************************************************************
* Helper functions
******************************************************************************/
/***
* Add several strings to the Bloom filter, ensuring that they are stil in
* there as the test proceeds.
*/
int test_in(byte *bloom, int bloom_size, char *strings[], int len) {
int sizes[len];
for (int i = 0; i < len; i++) {
sizes[i] = strlen(strings[i]);
}
for (int i = 0; i < len; i++) {
// Make sure the previous strings are still in the filter
for (int j = 0; j < i; j++) {
if (!in_bloom(bloom, bloom_size, (byte *)strings[j], sizes[j])) {
printf("False negative:\n%s\n", strings[j]);
return 0;
}
}
// Add the string
add_bloom(bloom, bloom_size, (byte *)strings[i], sizes[i]);
}
return 1;
}
/***
* Ensure strings that shouldn't be in the Bloom filter aren't, assuming no
* false positives, which are theoretically possible, but have a diminishingly
* low chance of happening for appropriately sized Bloom filters.
*/
int test_out(byte *bloom, int bloom_size, char *strings[], int len) {
int sizes[len];
for (int i = 0; i < len; i++) {
sizes[i] = strlen(strings[i]);
}
for (int i = 0; i < len; i++) {
if (in_bloom(bloom, bloom_size, (byte *)strings[i], sizes[i])) {
printf("False positive:\n%s\n", strings[i]);
return 0;
}
}
return 1;
}
/***
* Test a Bloom filter with stuff already added (in test_new_bloom)
*/
int test_old_bloom(byte *bloom, uint8_t size) {
int success = 1;
success = success && test_in(bloom, size, input7, 4);
success = success && test_in(bloom, size, input6, 4);
success = success && test_in(bloom, size, input5, 8);
success = success && test_in(bloom, size, input4, 17);
success = success && test_in(bloom, size, input3, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
return success;
}
/***
* Test the same entries for a bunch of different sized Bloom filters.
*/
int test_new_bloom(byte *bloom, uint8_t size) {
int success = 1;
// TODO: Should this still be here?
success = success && test_in(bloom, size, NULL, 0);
// Ensure inputs that haven't been added yet aren't in the filter, add inputs
// one-by-one, and check both that they are in there, and that they haven't
// changed the membership status of other inputs that are(n't) supposed to be
// in there
success = success && test_out(bloom, size, input1, 1);
success = success && test_out(bloom, size, input2, 5);
success = success && test_out(bloom, size, input3, 4);
success = success && test_out(bloom, size, input4, 17);
success = success && test_out(bloom, size, input5, 8);
success = success && test_out(bloom, size, input6, 4);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input1, 1);
success = success && test_out(bloom, size, input2, 5);
success = success && test_out(bloom, size, input3, 4);
success = success && test_out(bloom, size, input4, 17);
success = success && test_out(bloom, size, input5, 8);
success = success && test_out(bloom, size, input6, 4);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
success = success && test_out(bloom, size, input3, 4);
success = success && test_out(bloom, size, input4, 17);
success = success && test_out(bloom, size, input5, 8);
success = success && test_out(bloom, size, input6, 4);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input3, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
success = success && test_out(bloom, size, input4, 17);
success = success && test_out(bloom, size, input5, 8);
success = success && test_out(bloom, size, input6, 4);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input4, 17);
success = success && test_in(bloom, size, input3, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
success = success && test_out(bloom, size, input5, 8);
success = success && test_out(bloom, size, input6, 4);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input5, 8);
success = success && test_in(bloom, size, input4, 17);
success = success && test_in(bloom, size, input3, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
success = success && test_out(bloom, size, input6, 4);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input6, 4);
success = success && test_in(bloom, size, input5, 8);
success = success && test_in(bloom, size, input4, 17);
success = success && test_in(bloom, size, input3, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
success = success && test_out(bloom, size, input7, 4);
success = success && test_in(bloom, size, input7, 4);
success = success && test_in(bloom, size, input6, 4);
success = success && test_in(bloom, size, input5, 8);
success = success && test_in(bloom, size, input4, 17);
success = success && test_in(bloom, size, input3, 4);
success = success && test_in(bloom, size, input2, 5);
success = success && test_in(bloom, size, input1, 1);
if (!success) {
printf("Bloom filter test failed for size %d!\n", (int)size);
}
return success;
}
/***
* Test writing compressed files and decompressing them in-memory
*/
int test_compression(byte **bloom, uint8_t size, size_t *new_size) {
int success = 1;
// Write the compressed Bloom filter out so we can load it back in and test
char *tempfilename = "/tmp/delete.bloom";
write_compressed_bloom(tempfilename, *bloom, size);
free_bloom(*bloom);
// Open the gzipped temporary file and read the entire thing into a buffer
FILE *tempfile;
if ((tempfile = fopen(tempfilename, "rb")) == NULL) {
puts("Failed to open compressed file!");
return 0;
}
// Seek to the end to get file length
if (fseek(tempfile, 0l, SEEK_END)) {
puts("Failed seek to the end of the compressed file!");
return 0;
}
long tempfile_length;
if ((tempfile_length = ftell(tempfile)) == -1) {
puts("Failed to get compressed file position!");
return 0;
}
rewind(tempfile);
// Read the temporary file
byte *compressed = (byte *)malloc(tempfile_length * sizeof(char));
if (fread(compressed, 1, tempfile_length, tempfile) == 0) {
puts("Could not read from compressed file!");
return 0;
}
if (fclose(tempfile) != 0) {
puts("Could not close compressed file!");
return 0;
}
byte *decompressed = NULL;
*new_size = decompress_bloom(compressed, tempfile_length, &decompressed);
if (new_size == 0) {
puts("Could not successfully decompress the Bloom filter!");
return 0;
}
*bloom = decompressed;
free(compressed);
return success;
}
/***
* Test combining Bloom filters
*
* TODO: Improve – test_in might not actually be confirming that it works...
*/
int test_combine() {
int success = 1;
uint8_t size = 15;
byte *bloom1 = new_bloom(size);
byte *bloom2 = new_bloom(size);
success = success && test_in(bloom1, size, input2, 5);
success = success && test_in(bloom2, size, input4, 17);
success = success && test_out(bloom1, size, input4, 17);
combine_bloom(bloom1, bloom2, size);
success = success && test_in(bloom1, size, input4, 17);
free(bloom1);
free(bloom2);
return success;
}
/*******************************************************************************
* Main function
******************************************************************************/
int main(int argc, char *argv[]) {
int success = 1;
puts("Testing Bloom filter library...\n");
// Test the same input on Bloom filters for each number of bits in the range
// 9 to 31
for (uint8_t i = 9; i < 32; i++) {
printf("Testing a Bloom filter of size %d...\n", (int)i);
// Make a new Bloom filter of the current size
byte *bloom = new_bloom(i);
// Test the Bloom filter
success = success && test_new_bloom(bloom, i);
// Write a compressed version, then read it back and decompress it
size_t new_size;
success = success && test_compression(&bloom, i, &new_size);
if (new_size != (size_t)(1 << (i - 3))) {
printf("New Bloom filter has size %d when size %d was expected!\n",
(int)new_size, (int)(1 << (i - 3)));
success = 0;
break;
}
// Ensure that the right values are still in the decompressed version
success = success && test_old_bloom(bloom, i);
// Clean up
free_bloom(bloom);
if (!success) {
break;
}
}
// Test combining Bloom filters
success = success && test_combine();
// TODO: Add tests that create new bloom filters and generate many strings
// over and over, confirming that over time the average converges to the
// expected theoretical number of collisions
puts(success ? "Success!" : "Failure!");
puts("");
return 0;
// TODO: Remove
(void)argc;
(void)argv;
}