-
Notifications
You must be signed in to change notification settings - Fork 37
Expand file tree
/
Copy pathrecord_similarity.py
More file actions
131 lines (122 loc) · 3.53 KB
/
record_similarity.py
File metadata and controls
131 lines (122 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""
Example code to call Analytics API to get similarity score between a list of records
"""
import argparse
import json
import os
from rosette.api import API, RecordSimilarityParameters, RosetteException
def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'):
""" Run the example """
# Create an API instance
api = API(user_key=key, service_url=alt_url)
fields = {
"primaryName": {
"type": "rni_name",
"weight": 0.5
},
"dob": {
"type": "rni_date",
"weight": 0.2
},
"addr": {
"type": "rni_address",
"weight": 0.5
},
"dob2": {
"type": "rni_date",
"weight": 0.1
},
"jobTitle": {
"type": "rni_string",
"weight": 0.2
},
"age": {
"type": "rni_number",
"weight": 0.4
},
"isRetired": {
"type": "rni_boolean",
"weight": 0.05
}
}
properties = {
"threshold": 0.7,
"includeExplainInfo": True
}
records = {
"left": [
{
"primaryName": {
"text": "Ethan R",
"entityType": "PERSON",
"language": "eng",
"languageOfOrigin": "eng",
"script": "Latn"
},
"dob": "1993-04-16",
"addr": "123 Roadlane Ave",
"dob2": {
"date": "04161993",
"format": "MMddyyyy"
},
"jobTitle": "software engineer"
},
{
"dob": {
"date": "1993-04-16"
},
"primaryName": {
"text": "Evan R"
},
"age": 47,
"isRetired": False
}
],
"right": [
{
"dob": {
"date": "1993-04-16"
},
"primaryName": {
"text": "Seth R",
"language": "eng"
},
"jobTitle": "manager",
"isRetired": True
},
{
"primaryName": "Ivan R",
"dob": {
"date": "1993-04-16"
},
"addr": {
"houseNumber": "123",
"road": "Roadlane Ave"
},
"dob2": {
"date": "1993/04/16"
},
"age": 72,
"isRetired": True
}
]
}
params = RecordSimilarityParameters()
params["fields"] = fields
params["properties"] = properties
params["records"] = records
try:
return api.record_similarity(params)
except RosetteException as exception:
print(exception)
PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='Calls the ' +
os.path.splitext(os.path.basename(__file__))[0] + ' endpoint')
PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True)
PARSER.add_argument('-u', '--url', help="Alternative API URL",
default='https://analytics.babelstreet.com/rest/v1/')
if __name__ == '__main__':
ARGS = PARSER.parse_args()
RESULT = run(ARGS.key, ARGS.url)
print(RESULT)