Skip to content

Commit 1e73e49

Browse files
committed
Stable WHERE parsing
- Minor fix: add default values and initializations in Query class - Add WHERE's operators parsing - Add WHERE's aggregates parsing - Add negation support
1 parent 7ba09dc commit 1e73e49

4 files changed

Lines changed: 147 additions & 55 deletions

File tree

Parser.py

Lines changed: 67 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -256,22 +256,57 @@ def get_column_name_with_alias_table(self,column, table_of_from):
256256
else:
257257
return str(one_table_of_column) + '.' + str(column)
258258

259+
def intersect(self, a, b):
260+
return list(set(a) & set(b))
261+
262+
def predict_operation_type(self, previous_column_offset, current_column_offset):
263+
interval_offset = range(previous_column_offset, current_column_offset)
264+
if(len(self.intersect(interval_offset, self.count_keyword_offset)) >= 1):
265+
return 0
266+
elif(len(self.intersect(interval_offset, self.sum_keyword_offset)) >= 1):
267+
return 1
268+
elif(len(self.intersect(interval_offset, self.average_keyword_offset)) >= 1):
269+
return 2
270+
elif(len(self.intersect(interval_offset, self.max_keyword_offset)) >= 1):
271+
return 3
272+
elif(len(self.intersect(interval_offset, self.min_keyword_offset)) >= 1):
273+
return 4
274+
else:
275+
return None
276+
277+
def predict_operator(self, current_column_offset, next_column_offset):
278+
interval_offset = range(current_column_offset, next_column_offset)
279+
if(len(self.intersect(interval_offset, self.negation_keyword_offset)) >= 1) and (len(self.intersect(interval_offset, self.greater_keyword_offset)) >= 1):
280+
return 0 # less
281+
elif(len(self.intersect(interval_offset, self.negation_keyword_offset)) >= 1) and (len(self.intersect(interval_offset, self.less_keyword_offset)) >= 1):
282+
return 1 # greater
283+
if(len(self.intersect(interval_offset, self.less_keyword_offset)) >= 1):
284+
return 0 # less
285+
elif(len(self.intersect(interval_offset, self.greater_keyword_offset)) >= 1):
286+
return 1 # greater
287+
elif(len(self.intersect(interval_offset, self.between_keyword_offset)) >= 1):
288+
return 2 # between
289+
elif(len(self.intersect(interval_offset, self.negation_keyword_offset)) >= 1):
290+
return 3 # not equal
291+
else:
292+
return 4 # equal
293+
259294
def run(self):
260295
number_of_where_columns = 0
261296
columns_of_where = []
262297
offset_of = {}
263298
column_offset = []
264-
count_keyword_offset = []
265-
sum_keyword_offset = []
266-
average_keyword_offset = []
267-
max_keyword_offset = []
268-
min_keyword_offset = []
269-
greater_keyword_offset = []
270-
less_keyword_offset = []
271-
between_keyword_offset = []
272-
junction_keyword_offset = []
273-
disjunction_keyword_offset = []
274-
negation_keyword_offset = []
299+
self.count_keyword_offset = []
300+
self.sum_keyword_offset = []
301+
self.average_keyword_offset = []
302+
self.max_keyword_offset = []
303+
self.min_keyword_offset = []
304+
self.greater_keyword_offset = []
305+
self.less_keyword_offset = []
306+
self.between_keyword_offset = []
307+
self.junction_keyword_offset = []
308+
self.disjunction_keyword_offset = []
309+
self.negation_keyword_offset = []
275310

276311
for phrase in self.phrases:
277312
for i in range(0, len(phrase)):
@@ -283,45 +318,49 @@ def run(self):
283318
column_offset.append(i)
284319
break
285320
if phrase[i] in self.count_keywords: # before the column
286-
count_keyword_offset.append(i)
321+
self.count_keyword_offset.append(i)
287322
if phrase[i] in self.sum_keywords: # before the column
288-
sum_keyword_offset.append(i)
323+
self.sum_keyword_offset.append(i)
289324
if phrase[i] in self.average_keywords: # before the column
290-
average_keyword_offset.append(i)
325+
self.average_keyword_offset.append(i)
291326
if phrase[i] in self.max_keywords: # before the column
292-
max_keyword_offset.append(i)
327+
self.max_keyword_offset.append(i)
293328
if phrase[i] in self.min_keywords: # before the column
294-
min_keyword_offset.append(i)
329+
self.min_keyword_offset.append(i)
295330
if phrase[i] in self.greater_keywords: # after the column
296-
greater_keyword_offset.append(i)
331+
self.greater_keyword_offset.append(i)
297332
if phrase[i] in self.less_keywords: # after the column
298-
less_keyword_offset.append(i)
333+
self.less_keyword_offset.append(i)
299334
if phrase[i] in self.between_keywords: # after the column
300-
between_keyword_offset.append(i)
301-
if phrase[i] in self.junction_keywords: # between the column and the equal, greater or less keyword
302-
junction_keyword_offset.append(i)
335+
self.between_keyword_offset.append(i)
336+
if phrase[i] in self.junction_keywords: # after the column
337+
self.junction_keyword_offset.append(i)
303338
if phrase[i] in self.disjunction_keywords: # after the column
304-
disjunction_keyword_offset.append(i)
339+
self.disjunction_keyword_offset.append(i)
305340
if phrase[i] in self.negation_keywords: # between the column and the equal, greater or less keyword
306-
negation_keyword_offset.append(i)
341+
self.negation_keyword_offset.append(i)
307342

308343
for table_of_from in self.tables_of_from:
309344
where_object = Where()
310345
for i in range(0, len(column_offset)):
346+
current = column_offset[i]
347+
311348
if i == 0:
312-
previous = 0
349+
previous = 0
313350
else:
314351
previous = column_offset[i-1]
315352

316353
if i == (len(column_offset) - 1):
317-
_next = -1
354+
_next = 100 # put max integer in python, here!
318355
else:
319356
_next = column_offset[i+1]
320357

321358
junction = None
322-
operator = None
323-
value = None
324-
where_object.add_condition(junction, Condition(None, operator, value))
359+
column = self.get_column_name_with_alias_table(columns_of_where[i], table_of_from)
360+
operation_type = self.predict_operation_type(previous, current)
361+
value = 'OOV' # Out Of Vocabulary: feature not implemented yet
362+
operator = self.predict_operator(current, _next)
363+
where_object.add_condition(junction, Condition(column, operation_type, operator, value))
325364
self.where_objects.append(where_object)
326365

327366
def join(self):

Query.py

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -185,22 +185,21 @@ def print_json(self, output):
185185

186186
class Condition():
187187
column = ''
188+
column_type = ''
188189
operator = ''
189190
value = ''
190191

191-
def __init__(self, column, operator, value):
192+
def __init__(self, column, column_type, operator, value):
192193
self.column = column
194+
self.column_type = column_type
193195
self.operator = operator
194196
self.value = value
195197

196198
def get_column(self):
197199
return self.column
198200

199-
def get_just_column_name(self, column):
200-
if column != str(None):
201-
return column.rsplit('.', 1)[1]
202-
else:
203-
return column
201+
def get_column_type(self):
202+
return self.column_type
204203

205204
def get_operator(self):
206205
return self.operator
@@ -209,20 +208,54 @@ def get_value(self):
209208
return self.value
210209

211210
def get_in_list(self):
212-
return [self.column, self.operator, self.value]
211+
return [self.column, self.column_type, self.operator, self.value]
212+
213+
def get_just_column_name(self, column):
214+
if column != str(None):
215+
return column.rsplit('.', 1)[1]
216+
else:
217+
return column
218+
219+
def get_column_with_type_operation(self, column, column_type):
220+
if column_type == 0: # count
221+
return color.BOLD + 'COUNT(' + color.END + self.column + color.BOLD + ')' + color.END
222+
elif column_type == 1: # sum
223+
return color.BOLD + 'SUM(' + color.END + self.column + color.BOLD + ')' + color.END
224+
elif column_type == 2: # average
225+
return color.BOLD + 'AVG(' + color.END + self.column + color.BOLD + ')' + color.END
226+
elif column_type == 3: # max
227+
return color.BOLD + 'MAX(' + color.END + self.column + color.BOLD + ')' + color.END
228+
elif column_type == 4: # min
229+
return color.BOLD + 'MIN(' + color.END + self.column + color.BOLD + ')' + color.END
230+
else: # nothing
231+
return self.column
232+
233+
def get_pretty_operator(self, operator):
234+
if operator == 0: #
235+
return color.BOLD + '<' + color.END
236+
elif operator == 1: #
237+
return color.BOLD + '>' + color.END
238+
elif operator == 2: #
239+
return color.BOLD + 'BETWEEN' + color.END + ' OOV ' + color.BOLD + 'AND' + color.END
240+
elif operator == 3: #
241+
return color.BOLD + '!=' + color.END
242+
else: # 4 = nothing
243+
return color.BOLD + '==' + color.END
213244

214245
def __str__(self):
215-
return '' + '\n'
246+
return str(self.get_column_with_type_operation(self.column, self.column_type)) + ' ' + str(self.get_pretty_operator(self.operator)) + ' ' + str(self.value)
216247

217248
def print_json(self, output):
218-
output.write('\t\t\t{ "column": "' + self.get_just_column_name(str(self.column)) + '",\n\t\t\t "operator": "' + str(self.operator) + '",\n\t\t\t "value": "' + str(self.value) + '"\n\t\t\t}')
249+
output.write('\t\t\t{ "column": "' + self.get_just_column_name(str(self.column)) + '",\n\t\t\t "type": "' + str(self.column_type) + '",\n\t\t\t "operator": "' + str(self.operator) + '",\n\t\t\t "value": "' + str(self.value) + '"\n\t\t\t}')
219250

220251
class Where():
221252
conditions = []
222253

223254
def __init__(self, clause=None):
224255
if clause is not None:
225256
self.conditions.append([None, clause])
257+
else:
258+
self.conditions = []
226259

227260
def add_condition(self, junction, clause):
228261
self.conditions.append([junction, clause])
@@ -231,10 +264,16 @@ def get_conditions(self):
231264
return self.conditions
232265

233266
def __str__(self):
267+
string = ''
234268
if len(self.conditions) >= 1:
235-
return '' + '\n'
269+
for i in range(0, len(self.conditions)):
270+
if i == 0:
271+
string += '\n' + color.BOLD + 'WHERE' + color.END + ' ' + str(self.conditions[i][1])
272+
else:
273+
string += '\n' + color.BOLD + str(self.conditions[i][0]) + color.END + ' ' + str(self.conditions[i][1])
274+
return string
236275
else:
237-
return ''
276+
return string
238277

239278
def print_json(self, output):
240279
if len(self.conditions) >= 1:
@@ -306,6 +345,7 @@ def __init__(self, columns=None, order=None):
306345
self.columns = columns
307346
else:
308347
self.columns = []
348+
309349
if order is not None:
310350
self.order = order
311351
else:
@@ -379,16 +419,28 @@ class Query():
379419
def __init__(self, select=None, _from=None, join=None, where=None, group_by=None, order_by=None):
380420
if select is not None:
381421
self.select = select
422+
else:
423+
self.select = None
382424
if _from is not None:
383425
self._from = _from
426+
else:
427+
self._from = None
384428
if join is not None:
385429
self.join = join
430+
else:
431+
self.join = None
386432
if where is not None:
387433
self.where = where
434+
else:
435+
self.where = None
388436
if group_by is not None:
389437
self.group_by = group_by
438+
else:
439+
self.group_by = None
390440
if order_by is not None:
391441
self.order_by = order_by
442+
else:
443+
self.order_by = None
392444

393445
def set_select(self, select):
394446
self.select = select

README.md

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,31 @@ If you want a version using TreeTagger, a <a rel="wrapper" href="https://perso.l
4141
- [X] natural join
4242
- [X] union join
4343
- [ ] WHERE
44-
- [ ] one condition
45-
- [ ] multiple conditions
44+
- [X] one condition
45+
- [X] multiple conditions
4646
- [ ] junction
4747
- [ ] disjunction
4848
- [ ] cross-condition
49-
- [ ] operators
50-
- [ ] equal operator
51-
- [ ] not equal operator
52-
- [ ] greater-than operator
53-
- [ ] less-than operator
54-
- [ ] between operator
55-
- [ ] aggregate functions
56-
- [ ] sum in condition
57-
- [ ] avg in condition
58-
- [ ] min in condition
59-
- [ ] max in condition
49+
- [X] operators
50+
- [X] equal operator
51+
- [X] not equal operator
52+
- [X] greater-than operator
53+
- [X] less-than operator
54+
- [X] between operator
55+
- [X] aggregate functions
56+
- [X] sum in condition
57+
- [X] avg in condition
58+
- [X] min in condition
59+
- [X] max in condition
6060
- [X] ORDER BY
6161
- [X] ASC
6262
- [ ] DESC
6363
- [X] GROUP BY
6464
- [X] alias
6565
- [X] multiple queries
6666
- [X] exception and error handling
67-
- [ ] negation support
67+
- [X] negation support
68+
- [ ] detection of values (OOV)
6869

6970
## How to use it?
7071

lang/french.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ MIN: minimum, minimal, plus petit
55
COUNT: combien, nombre
66
JUNCTION: et
77
DISJUNCTION: ou
8-
GREATER: plus grand
9-
LESS: plus petit
8+
GREATER: plus grand, supérieur
9+
LESS: plus petit, inférieur
1010
BETWEEN: entre
1111
ORDER: ordre, ordonné
1212
GROUP: groupe, groupé, rangé

0 commit comments

Comments
 (0)