|
4 | 4 | import os |
5 | 5 |
|
6 | 6 | ltp_data_dir = '/Users/xuming06/Codes/ltp_data_v3.4.0' |
| 7 | + |
| 8 | +# segment |
7 | 9 | cws_model_path = os.path.join(ltp_data_dir, 'cws.model') |
8 | 10 |
|
9 | 11 | from pyltp import Segmentor |
|
14 | 16 | words = segmentor.segment(text) |
15 | 17 | print(" ".join(words)) # 我 是 中国 人 , 我 在 爱斯基摩 打雪仗 。 欧几 里 得 是 西元前 三 世纪 的 希腊 数学家 |
16 | 18 |
|
| 19 | +# segment with lexicon |
17 | 20 | segmentor = Segmentor() |
18 | 21 | # load self dictionary |
19 | 22 | segmentor.load_with_lexicon(cws_model_path, './self_dict.txt') |
20 | 23 | words = segmentor.segment(text) |
21 | 24 | print(" ".join(words)) # 我 是 中国 人 , 我 在 爱斯基摩 打雪仗 。 欧几里得 是 西元前 三 世纪 的 希腊 数学家 |
22 | 25 |
|
| 26 | +# pos |
23 | 27 | pos_model_path = os.path.join(ltp_data_dir, 'pos.model') |
24 | 28 |
|
25 | 29 | from pyltp import Postagger |
|
34 | 38 | word_pos = list(i[0] + '/' + i[1] for i in zipped) |
35 | 39 | print(' '.join(word_pos)) |
36 | 40 |
|
| 41 | + |
| 42 | +# ner |
37 | 43 | ner_model_path = os.path.join(ltp_data_dir, 'ner.model') |
38 | 44 | from pyltp import NamedEntityRecognizer |
39 | 45 |
|
|
45 | 51 |
|
46 | 52 | print(' '.join(nertags)) |
47 | 53 |
|
| 54 | + |
| 55 | +# parser |
48 | 56 | par_model_path = os.path.join(ltp_data_dir, 'parser.model') |
49 | 57 | from pyltp import Parser |
50 | 58 |
|
|
61 | 69 | for i in range(len(words)): |
62 | 70 | print(relation[i] + '(' + words[i] + ', ' + heads[i] + ')') |
63 | 71 |
|
| 72 | +# SRL |
| 73 | +srl_model_path = os.path.join(ltp_data_dir,'pisrl.model') |
| 74 | +from pyltp import SementicRoleLabeller |
| 75 | +srl = SementicRoleLabeller() |
| 76 | +srl.load(srl_model_path) |
| 77 | +roles = srl.label(words,postags,arcs) |
| 78 | +for role in roles: |
| 79 | + print(role.index, "".join( |
| 80 | + ["%s:(%d,%d)" % (arg.name, arg.range.start,arg.range.end) for arg in role.arguments])) |
| 81 | + |
| 82 | +srl.release() |
64 | 83 |
|
65 | 84 | # jieba |
66 | 85 | import jieba |
|
0 commit comments