Skip to content

Commit 6ec9993

Browse files
committed
Updated the publications proposal to note the current script path.
1 parent 0a568ba commit 6ec9993

File tree

4 files changed

+13999
-6
lines changed

4 files changed

+13999
-6
lines changed

Proposals/publications/add_bibtex.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,54 @@
1111
import re
1212
import subprocess
1313
import json
14+
from time import sleep
1415
from crossref.restful import Works, Etiquette
1516

1617
csv.field_size_limit(sys.maxsize)
1718

19+
1820
def clean_doi(doi_string:str):
21+
"""_Clean a DOI string_
22+
23+
Args:
24+
doi_string (str): _A text string that purportedly contains a DOI._
25+
26+
Raises:
27+
Exception: _Raises a TypeError if the object passed is not a string._
28+
29+
Returns:
30+
_str_: _A cleaned DOI string._
31+
"""
1932
if type(doi_string) is not str:
20-
raise Exception('TypeError', f'The doi passed -- {doi_string} -- is not a string.')
33+
raise Exception('TypeError', f'The doi passed is not a string.')
2134
outcome = re.match(r'.*(\b10\.\d{4,9}/[-.;()/:\w]+)', doi_string)
2235
if outcome is None:
2336
return None
2437
else:
2538
return outcome.group(1)
2639

40+
2741
def break_citation(citation:str):
28-
with open('temp.txt', 'w') as wr:
42+
"""_Break Citation String Apart_
43+
44+
Args:
45+
citation (str): _A citation string from the Neotoma Database._
46+
47+
Raises:
48+
Exception: _A ValueError exception if the object could not be parsed._
49+
50+
Returns:
51+
_dict_: _A dict representation of the anystyle output._
52+
"""
53+
with open('/tmp/temp.txt', 'w') as wr:
2954
wr.write(citation)
30-
outcome = subprocess.run(['anystyle', '-f', 'json', 'parse', 'temp.txt'], capture_output = True)
55+
outcome = subprocess.run(['anystyle', '-f', 'json', 'parse', '/tmp/temp.txt'], capture_output = True)
3156
if outcome.stdout == b'':
3257
raise Exception('ValueError', f'Could not perform extraction from: {citation}')
3358
else:
3459
return json.loads(outcome.stdout)
3560

61+
3662
def return_bibtex(doi_string:str):
3763
url = 'https://doi.org/' + urllib.request.quote(doi_string)
3864
header = {
@@ -42,6 +68,7 @@ def return_bibtex(doi_string:str):
4268
response = requests.get(url, headers=header)
4369
return response.text.strip()
4470

71+
4572
def check_crossref(cite_object:str):
4673
url = 'https://api.crossref.org/works'
4774
url_call = requests.get(url,
@@ -60,14 +87,26 @@ def check_crossref(cite_object:str):
6087
else:
6188
return None
6289

63-
with open('data/neotoma_publications_202410071440.csv') as file:
64-
db_data = list(csv.DictReader(file))
90+
91+
def call_publications():
92+
"""_Get Publications from Neotoma_
93+
94+
Returns:
95+
_dict_: _A dictionary of Neotoma Publications_
96+
"""
97+
result = requests.get("https://api.neotomadb.org/v2.0/data/publications?limit=100000")
98+
if result.status_code == 200:
99+
pubs = json.loads(result.content).get('data').get('result')
100+
return pubs
101+
102+
db_data = [i.get('publication') for i in call_publications()]
65103

66104
# For each row
67105
for i in db_data:
106+
print(f'publicationid: {i.get('publicationid')}')
68107
if any([j in ['bibtex', 'newdoi', 'json'] for j in i.keys()]):
69108
continue
70-
if i.get('doi', '') != '':
109+
if i.get('doi', '') or '' != '':
71110
try:
72111
outcome = clean_doi(i.get('doi'))
73112
if outcome != i.get('doi'):
@@ -93,8 +132,12 @@ def check_crossref(cite_object:str):
93132
print('New match found.')
94133
i['newdoi'] = outcome.get('DOI')
95134
i['json'] = json.dumps(outcome)
135+
bibtex = return_bibtex(outcome.get('DOI'))
136+
i['bibtex'] = i.get('bibtex', '') + bibtex
96137
else:
97138
print('No new match.')
139+
sleep(2)
140+
98141

99142
with open('output.csv', 'w') as file:
100143
writer = csv.DictWriter(file, fieldnames=['publicationid', 'citation', 'doi', 'notes', 'newdoi', 'json', 'bibtex'])

0 commit comments

Comments
 (0)