-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQueryEngine.Tests.fs
More file actions
395 lines (327 loc) · 12.8 KB
/
QueryEngine.Tests.fs
File metadata and controls
395 lines (327 loc) · 12.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
module OpenDiffix.Core.QueryEngineTests
open Xunit
open FsUnit.Xunit
open QueryEngine
type Tests(db: DBFixture) =
let tableSettings =
Map
[ //
"customers", { AidColumns = [ "id" ] }
"purchases", { AidColumns = [ "cid" ] }
"customers_small", { AidColumns = [ "id" ] }
]
let noiselessAnonParams =
{
TableSettings = tableSettings
Salt = [||]
AccessLevel = PublishTrusted
Strict = false
Suppression = { LowThreshold = 2; LowMeanGap = 0.0; LayerSD = 0. }
AdaptiveBuckets = AdaptiveBucketsParams.Default
OutlierCount = { Lower = 1; Upper = 1 }
TopCount = { Lower = 1; Upper = 1 }
LayerNoiseSD = 0.
RecoverOutliers = true
UseAdaptiveBuckets = false
}
let noisyAnonParams = { AnonymizationParams.Default with TableSettings = tableSettings }
let runQueryWithCustomAnonParams anonymizationParams query =
let queryContext = QueryContext.make anonymizationParams db.DataProvider
run queryContext query
let runQuery = runQueryWithCustomAnonParams noiselessAnonParams
[<Fact>]
let ``query 1`` () =
let expected =
{
Columns = [ { Name = "n"; Type = StringType } ]
Rows = [ [| String "Water" |] ]
}
let queryResult = runQuery "SELECT name AS n FROM products WHERE id = 1"
queryResult |> should equal expected
[<Fact>]
let ``query 2`` () =
let expected =
{
Columns = [ { Name = "c1"; Type = IntegerType }; { Name = "c2"; Type = IntegerType } ]
Rows = [ [| Integer 11L; Integer 4L |] ]
}
let queryResult = runQuery "SELECT count(*) AS c1, count(DISTINCT length(name)) AS c2 FROM products"
queryResult |> should equal expected
[<Fact>]
let ``query 3`` () =
let expected =
{
Columns = [ { Name = "name"; Type = StringType }; { Name = "sum"; Type = RealType } ]
Rows = [ [| String "Chicken"; Real 12.81 |] ]
}
let queryResult = runQuery "SELECT name, SUM(price) FROM products GROUP BY 1 HAVING length(name) = 7"
queryResult |> should equal expected
[<Fact>]
let ``query 4`` () =
let expected =
{
Columns =
[
{ Name = "city"; Type = StringType }
{ Name = "count"; Type = IntegerType }
{ Name = "sum"; Type = IntegerType }
{ Name = "count_noise"; Type = RealType }
{ Name = "count_noise"; Type = RealType }
{ Name = "count_noise"; Type = RealType }
{ Name = "sum_noise"; Type = RealType }
]
Rows =
[
[| String "Berlin"; Integer 10L; Integer 295L; Real 0.0; Real 0.0; Real 0.0; Real 0.0 |]
[| String "Rome"; Integer 10L; Integer 300L; Real 0.0; Real 0.0; Real 0.0; Real 0.0 |]
]
}
let queryResult =
runQuery
"SELECT city, count(distinct id), sum(age), count_noise(*), count_noise(id), count_noise(distinct id), sum_noise(age) FROM customers_small GROUP BY city"
queryResult |> should equal expected
[<Fact>]
let ``query 5 - bucket expansion`` () =
let queryResult = runQuery "SELECT city FROM customers_small"
let expectedRows =
List.collect (fun name -> [ for _i in 1..10 -> [| String name |] ]) [ "Berlin"; "Rome" ]
let expected = { Columns = [ { Name = "city"; Type = StringType } ]; Rows = expectedRows }
queryResult |> should equal expected
/// Returns the aggregate result of a query such as `SELECT count(*) FROM ...`
let runQueryToInteger query =
runQuery query
|> fun result ->
result.Rows
|> List.head
|> Array.head
|> function
| Integer i -> Integer i
| other -> failwith $"Unexpected return '%A{other}'"
// query 6 - query 9 removed (anonymizing JOINs, which are not supported now)
[<Fact>]
let ``query 10`` () =
let expected =
{
Columns = [ { Name = "n"; Type = StringType } ]
Rows = [ [| String "Water" |] ]
}
runQuery "SELECT p.name AS n FROM products AS p WHERE id = 1"
|> should equal expected
let equivalentQueries expectedQuery testQuery =
let testResult = runQueryWithCustomAnonParams noisyAnonParams testQuery
let expected = runQueryWithCustomAnonParams noisyAnonParams expectedQuery
testResult |> should equal expected
[<Fact>]
let ``query 11`` () =
let expected =
{
Columns = [ { Name = "n"; Type = StringType } ]
Rows = [ [| String "1Water" |] ]
}
let queryResult =
runQuery "SELECT CAST(id AS text) || name AS n FROM products WHERE id = 1 GROUP BY id, name"
queryResult |> should equal expected
[<Fact>]
let ``query 12 - limit`` () =
let expected =
{
Columns = [ { Name = "id"; Type = IntegerType } ]
Rows = [ [| Integer 1L |]; [| Integer 2L |]; [| Integer 3L |] ]
}
let queryResult = runQuery "SELECT id FROM products LIMIT 3"
queryResult |> should equal expected
[<Fact>]
let ``query 12 - group with rounding`` () =
let queryResult = runQuery "SELECT round_by(age, 5), count(*) FROM customers_small GROUP BY 1"
let expected =
{
Columns = [ { Name = "round_by"; Type = IntegerType }; { Name = "count"; Type = IntegerType } ]
Rows = [ [| Integer 25L; Integer 7L |]; [| Integer 30L; Integer 7L |]; [| Integer 35L; Integer 6L |] ]
}
queryResult |> should equal expected
[<Fact>]
let ``query 13 - order by`` () =
let queryResult = runQuery "SELECT age, count(*) FROM customers_small GROUP BY 1 ORDER BY 1"
let expected =
{
Columns = [ { Name = "age"; Type = IntegerType }; { Name = "count"; Type = IntegerType } ]
Rows = [ [| Integer 25L; Integer 7L |]; [| Integer 30L; Integer 7L |]; [| Integer 35L; Integer 6L |] ]
}
queryResult |> should equal expected
[<Fact>]
let ``query 14 - avg parity`` () =
let expected =
runQuery "SELECT sum(age) / cast(count(age), 'real') as avg FROM customers_small GROUP BY city"
let queryResult = runQuery "SELECT avg(age) FROM customers_small GROUP BY city"
queryResult |> should equal expected
[<Fact>]
let ``query 15 - avg_noise parity`` () =
let expected =
runQuery "SELECT sum_noise(age) / count(age) as avg_noise FROM customers_small GROUP BY city"
let queryResult = runQuery "SELECT avg_noise(age) FROM customers_small GROUP BY city"
queryResult |> should equal expected
let ``query 16 - count histogram`` () =
let queryResult =
runQuery
"""
SELECT floor_by(amount, 1.5) as amount, count(*), count_histogram(cid)
FROM purchases
GROUP BY 1
ORDER BY 1
"""
let expected =
{
Columns =
[
{ Name = "amount"; Type = RealType }
{ Name = "count"; Type = IntegerType }
{ Name = "count_histogram"; Type = ListType(ListType IntegerType) }
]
Rows =
[
[|
Real 0.0
Integer 337L
List
[
List [ Value.Null; Integer 3L ]
List [ Integer 1L; Integer 61L ]
List [ Integer 2L; Integer 52L ]
List [ Integer 3L; Integer 36L ]
List [ Integer 4L; Integer 11L ]
]
|]
[|
Real 1.5
Integer 132L
List
[
List [ Integer 1L; Integer 76L ]
List [ Integer 2L; Integer 25L ]
List [ Integer 3L; Integer 2L ]
]
|]
[|
Real 3.0
Integer 34L
List
[ //
List [ Integer 1L; Integer 26L ]
List [ Integer 2L; Integer 4L ]
]
|]
[|
Real 4.5
Integer 2L
List
[ //
List [ Integer 1L; Integer 2L ]
]
|]
]
}
queryResult |> should equal expected
[<Fact>]
let ``Subquery wrappers produce consistent results`` () =
equivalentQueries
"SELECT p.name AS n FROM products AS p WHERE id = 1"
"SELECT n FROM (SELECT p.name AS n FROM products AS p WHERE id = 1) x"
equivalentQueries
"SELECT count(*) AS c1, count(DISTINCT length(name)) AS c2 FROM products"
"SELECT count(*) AS c1, count(DISTINCT length(name)) AS c2 FROM (SELECT name FROM products) x"
equivalentQueries
"SELECT count(*) FROM products as a LEFT JOIN products as b ON a.id = b.id"
"SELECT count(*) FROM (SELECT a.id, b.id FROM products as a LEFT JOIN products as b ON a.id = b.id) x"
[<Fact>]
let ``Direct query can use diffix functions`` () =
let expected =
{
Columns = [ { Name = "dc"; Type = IntegerType }; { Name = "lc"; Type = BooleanType } ]
Rows = [ [| Integer 11L; Boolean false |] ]
}
let queryResult = runQuery "SELECT diffix_count(*, id) AS dc, diffix_low_count(id) AS lc FROM products"
queryResult |> should equal expected
[<Fact>]
let ``Grouping order doesn't change results`` () =
equivalentQueries
"SELECT count(*) FROM customers_small GROUP BY round_by(age, 10), city ORDER BY 1"
"SELECT count(*) FROM customers_small GROUP BY city, round_by(age, 10) ORDER BY 1"
[<Fact>]
let ``Duplicated grouping doesn't change results`` () =
equivalentQueries
"SELECT count(*) FROM customers_small GROUP BY city, city ORDER BY 1"
"SELECT count(*) FROM customers_small GROUP BY city ORDER BY 1"
[<Fact>]
let ``Equivalent filtering and grouping doesn't change seed`` () =
equivalentQueries
"SELECT count(*) FROM customers WHERE city = 'Berlin' GROUP BY city"
"SELECT count(*) FROM customers WHERE city = 'Berlin'"
equivalentQueries
"SELECT count(*) FROM customers WHERE city = 'Berlin' AND round_by(age, 10) = 20 GROUP BY city, round_by(age, 10)"
"SELECT count(*) FROM customers WHERE city = 'Berlin' AND round_by(age, 10) = 20"
let tsCast x = $"cast({x}, 'timestamp')"
equivalentQueries
$"""SELECT count(*) FROM customers WHERE date_trunc('year', {tsCast "last_seen"}) = {tsCast "'2017-01-01'"} GROUP BY date_trunc('year', {tsCast "last_seen"})"""
$"""SELECT count(*) FROM customers WHERE date_trunc('year', {tsCast "last_seen"}) = {tsCast "'2017-01-01'"}"""
[<Fact>]
let ``Anonymizing subquery`` () =
let queryResult =
runQuery
"""
SELECT count(city), sum(count) FROM
(SELECT city, count(*) FROM customers_small GROUP BY 1) t
WHERE length(city) > 3
"""
queryResult.Rows |> should equal [ [| Integer 2L; Integer 20L |] ]
[<Fact>]
let ``Joining anonymizing subqueries`` () =
let queryResult =
runQuery
"""
SELECT t1.city, t1.count, t2.count FROM
(SELECT city, count(*) FROM customers GROUP BY 1) t1
LEFT JOIN
(SELECT city, count(*) FROM customers_small GROUP BY 1) t2
ON t1.city = t2.city
"""
let expectedRows =
[
[| String "Paris"; Integer 26L; Value.Null |]
[| String "Berlin"; Integer 77L; Integer 10L |]
[| String "Rome"; Integer 50L; Integer 10L |]
[| String "Madrid"; Integer 25L; Value.Null |]
[| String "London"; Integer 25L; Value.Null |]
]
queryResult.Rows |> should equal expectedRows
[<Fact>]
let ``Join between personal tables`` () =
let queryResult = runQuery "SELECT count(*) FROM customers AS c JOIN purchases AS p ON c.id = p.cid"
let expectedRows = [ [| Integer 506L |] ]
queryResult.Rows |> should equal expectedRows
[<Fact>]
let ``Join between personal and public tables`` () =
let queryResult = runQuery "SELECT count(*) FROM purchases JOIN products ON pid = products.id"
let expectedRows = [ [| Integer 443L |] ]
queryResult.Rows |> should equal expectedRows
[<Fact>]
let ``Anonymizing non-aggregating select sub-query`` () =
let queryResult = runQuery "SELECT city, count(*) FROM (SELECT city FROM customers_small) t GROUP BY 1"
let expectedRows = [ [| String "Berlin"; Integer 10L |]; [| String "Rome"; Integer 10L |] ]
queryResult.Rows |> should equal expectedRows
[<Fact>]
let ``'Adaptive Buckets' sub-query`` () =
let anonParams = { noiselessAnonParams with UseAdaptiveBuckets = true }
let queryResult =
runQueryWithCustomAnonParams
anonParams
"SELECT city, sum(age) FROM (SELECT city, age FROM customers) t GROUP BY 1"
let expectedRows =
[
[| String "London"; Integer 1175L |]
[| String "Berlin"; Integer 3443L |]
[| String "Rome"; Integer 2590L |]
[| String "Paris"; Integer 1261L |]
[| String "Madrid"; Integer 1081L |]
]
queryResult.Rows |> should equal expectedRows
interface IClassFixture<DBFixture>