-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathfindQualis.py
More file actions
29 lines (22 loc) · 999 Bytes
/
findQualis.py
File metadata and controls
29 lines (22 loc) · 999 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas as pd
import re
from fuzzywuzzy import fuzz, process
qualis_df = pd.read_csv('Data/qualis-capes.csv', encoding="ISO-8859-1")
similarity_memo = {}
def find_similar_journal(target_text, threshold=70):
if target_text in similarity_memo:
return similarity_memo[target_text]
similar_rows = []
for _, row in qualis_df.iterrows():
pattern = r'\s*\((PRINT|ONLINE|IMPRESSO)\)\s*'
value = re.sub(pattern, '', row['TíTULO'])
similarity_score = fuzz.ratio(target_text.upper(), value)
# If the similarity score is above the threshold, consider it a match
if similarity_score >= threshold:
similar_rows.append((row['ESTRATO'], similarity_score))
# Sort the results by similarity score in descending order
similar_rows.sort(key=lambda x: x[1], reverse=True)
if len(similar_rows) > 0:
similarity_memo[target_text] = similar_rows[0][0]
return similar_rows[0][0]
return 'NF'