-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHW5.py
More file actions
32 lines (26 loc) · 870 Bytes
/
HW5.py
File metadata and controls
32 lines (26 loc) · 870 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import json
import re
class Word:
def __init__(self, **kwargs):
vars(self).update(kwargs)
def __repr__(self):
return str(vars(self))
def make_list(words_dicts):
words = []
for i in words_dicts:
word = {}
word['wordform'] = i['text']
word['analyses'] = len(i['analysis'])
if i['analysis']:
word['lemma'] = i['analysis'][0]['lex']
word['word_class'] = re.findall('^([A-Z]+)[=,]', i['analysis'][0]['gr'])[0]
words.append(Word(**word))
print(Word(**word))
words_dicts = []
words_json = open('python_mystem.json','r',encoding='utf-8')
for line in words_json:
line = json.loads(line)
if line not in words_dicts and line['text'] != '\\s' and \
re.search('\\w+',line['text']) is not None:
words_dicts.append(line)
make_list(words_dicts)