-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathget_entity.py
More file actions
57 lines (48 loc) · 1.79 KB
/
get_entity.py
File metadata and controls
57 lines (48 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import requests
import logging
import tagme
from config import TAGME_TOKEN, KG_KEY, PROXIES
logger = logging.getLogger('Entity')
tagme.GCUBE_TOKEN = TAGME_TOKEN
WIKI_URL = 'https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&language=en&limit=1&format=json'
KG_URL = 'https://kgsearch.googleapis.com/v1/entities:search?query={}&key=' + KG_KEY + '&limit=1'
def get_entities(sentence: str) -> list:
# Extract entities from a sentence, return information about entities
res = []
for ann in tagme.annotate(sentence).annotations:
entity = {
'start_pos': ann.begin,
'end_pos': ann.end,
'score': ann.score,
'title': ann.entity_title,
'tagme_id': ann.entity_id,
'wiki_id': get_wiki_id(ann.entity_title),
'kg_id': get_kg_id(ann.entity_title)
}
res.append(entity)
return res
def get_wiki_id(entity_title: str) -> str:
# Get entity id from wiki data
try:
search_res = json.loads(
requests.get(WIKI_URL.format(entity_title)).text)
entity_id = search_res['search'][0]['id']
except Exception as e:
logger.debug(e)
entity_id = ''
return entity_id
def get_kg_id(entity_title: str) -> str:
# Get id from KG-search
try:
search_res = json.loads(requests.get(KG_URL.format(entity_title),
proxies=PROXIES).text)
entity_id = search_res['itemListElement'][0]['result']['@id']
except Exception as e:
logger.debug(e)
entity_id = ''
return entity_id
if __name__ == '__main__':
s = 'hampion cotton rocks ! not only they variety sizes colours , but also good quality . this make excellent gift'
entities = get_entities(s)
print(entities)