1616import dash_bootstrap_components as dbc
1717from dash_iconify import DashIconify
1818import seaborn as sns
19- sys .path .append (os .path .join (os .path .dirname (__file__ ), os .pardir , os .pardir , os .pardir ))
19+
20+ sys .path .append (
21+ os .path .join (os .path .dirname (__file__ ), os .pardir , os .pardir , os .pardir )
22+ )
2023
2124from src .logs import get_logger
2225
3538
3639logger = get_logger (__name__ )
3740
41+
3842def layout (gddid = None ):
3943 try :
4044 logger .info (f"Loading article { gddid } " )
4145 global original
4246 global results
43-
47+
4448 original = load_data (f"/entity_extraction/{ gddid } .json" )
4549 results = copy .deepcopy (original )
4650
4751 except FileNotFoundError :
48- return html .Div ([
49- html .H1 ("Error - gddid Not Found" ),
50- html .P ("The requested gddid does not exist in the files." ),
51- html .P ("Please check the article's gddid and try again." ),
52- dcc .Link ("Go back to Home" , href = "/" ),
53- ])
54-
52+ return html .Div (
53+ [
54+ html .H1 ("Error - gddid Not Found" ),
55+ html .P ("The requested gddid does not exist in the files." ),
56+ html .P ("Please check the article's gddid and try again." ),
57+ dcc .Link ("Go back to Home" , href = "/" ),
58+ ]
59+ )
60+
5561 relevance_score = round (original ["predict_proba" ], 2 ) * 100
56-
57- logger .info (
58- f"Relevance score for article { gddid } = { relevance_score } "
59- )
60-
62+
63+ logger .info (f"Relevance score for article { gddid } = { relevance_score } " )
6164
6265 sidebar = html .Div (
6366 [
@@ -202,16 +205,14 @@ def layout(gddid=None):
202205
203206 layout = dmc .NotificationsProvider (
204207 html .Div (
205- dbc .Row (
206- html .H2 (original ["title" ],
207- style = h2_style )),
208- dbc .Row (
209- html .H4 (original ["journal" ],
210- style = h4_style )),
211- dbc .Row (
212- [
213- dmc .Group ([
214- dbc .Col (
208+ [
209+ dbc .Row (html .H2 (original ["title" ], style = h2_style )),
210+ dbc .Row (html .H4 (original ["journal" ], style = h4_style )),
211+ dbc .Row (
212+ [
213+ dmc .Group (
214+ [
215+ dbc .Col (
215216 [
216217 dmc .Button (
217218 dmc .Text ("Home" , style = nav_text_style ),
@@ -331,7 +332,7 @@ def layout(gddid=None):
331332 ),
332333 variant = "filled" ,
333334 active = True ,
334- href = "http://doi.org/" + original ["doi " ],
335+ href = "http://doi.org/" + original ["DOI " ],
335336 target = "_blank" ,
336337 style = nav_button_style ,
337338 )
@@ -966,60 +967,60 @@ def save_submit(submit, save, relevant, data):
966967 str: The notification to display
967968 """
968969 callback_context = [p ["prop_id" ] for p in dash .callback_context .triggered ][0 ]
969-
970+
970971 if callback_context == "confirm-submit-button.n_clicks" and submit :
971972 update_data = {
972973 "gddid" : data ["gddid" ],
973974 "last_updated" : datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" ),
974- "corrected_entities" : json .dumps (data [' entities' ]),
975- "status" : "Completed"
975+ "corrected_entities" : json .dumps (data [" entities" ]),
976+ "status" : "Completed" ,
976977 }
977978 update_output (** update_data )
978979 logger .info ("Entities saved!" )
979- return dmc .Notification (
980- title = "Review Complete!" ,
981- id = "submit-notification" ,
982- action = "show" ,
983- color = "green" ,
984- message = "Proceed to home page" ,
985- icon = DashIconify (icon = "ic:round-celebration" ),
986- )
987-
980+ return dmc .Notification (
981+ title = "Review Complete!" ,
982+ id = "submit-notification" ,
983+ action = "show" ,
984+ color = "green" ,
985+ message = "Proceed to home page" ,
986+ icon = DashIconify (icon = "ic:round-celebration" ),
987+ )
988+
988989 elif callback_context == "confirm-irrelevant-button.n_clicks" and relevant :
989990 update_data = {
990991 "gddid" : data ["gddid" ],
991992 "last_updated" : datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" ),
992993 "corrected_entities" : "None" ,
993- "status" : "Non-relevant"
994+ "status" : "Non-relevant" ,
994995 }
995996 update_output (** update_data )
996997 logger .info ("Article removed from queue" )
997- return dmc .Notification (
998- title = "Article Removed!" ,
999- id = "remove-notification" ,
1000- action = "show" ,
1001- color = "red" ,
1002- message = "Proceed to home page" ,
1003- icon = DashIconify (icon = "dashicons-remove" ),
1004- )
1005-
998+ return dmc .Notification (
999+ title = "Article Removed!" ,
1000+ id = "remove-notification" ,
1001+ action = "show" ,
1002+ color = "red" ,
1003+ message = "Proceed to home page" ,
1004+ icon = DashIconify (icon = "dashicons-remove" ),
1005+ )
1006+
10061007 elif callback_context == "save-button.n_clicks" and save :
10071008 update_data = {
10081009 "gddid" : data ["gddid" ],
10091010 "last_updated" : datetime .now ().strftime ("%Y-%m-%d" ),
1010- "corrected_entities" : json .dumps (data [' entities' ]),
1011- "status" : "In Progress"
1011+ "corrected_entities" : json .dumps (data [" entities" ]),
1012+ "status" : "In Progress" ,
10121013 }
10131014 update_output (** update_data )
10141015 logger .info ("Article progress saved!" )
1015- return dmc .Notification (
1016- title = "Progress Saved!" ,
1017- id = "save-notification" ,
1018- action = "show" ,
1019- color = "yellow" ,
1020- message = "Don't forget to comeback and finish the review" ,
1021- icon = DashIconify (icon = "dashicons-saved" ),
1022- )
1016+ return dmc .Notification (
1017+ title = "Progress Saved!" ,
1018+ id = "save-notification" ,
1019+ action = "show" ,
1020+ color = "yellow" ,
1021+ message = "Don't forget to comeback and finish the review" ,
1022+ icon = DashIconify (icon = "dashicons-saved" ),
1023+ )
10231024
10241025 else :
10251026 return None
@@ -1076,6 +1077,7 @@ def tabs_control(n_clicks, site, region, taxa, geog, alti, age, email, accordian
10761077 tabs = defaultdict (list )
10771078 logger .debug (f"Accordian: { accordian } " )
10781079 relevant_sentences = pd .DataFrame (data ["relevant_sentences" ])
1080+ logger .debug (relevant_sentences )
10791081 positive_values = relevant_sentences ["sentid" ][relevant_sentences ["sentid" ] > 0 ]
10801082 # Get all the sentences and corresponding section names
10811083 for entity , values in data ["entities" ][accordian ].items ():
@@ -1245,6 +1247,7 @@ def toggle_confirmation_modal(n_clicks_close, n_clicks, submit, opened):
12451247 prevent_initial_call = True ,
12461248 )(toggle_confirmation_modal )
12471249
1250+
12481251def load_data (file_path ):
12491252 """Fetches the extracted entities and metadata for an article
12501253
@@ -1256,69 +1259,74 @@ def load_data(file_path):
12561259 Returns
12571260 -------
12581261 dict: entities and metadata for an article
1259-
1262+
12601263 """
12611264 entities = json .load (open (file_path , "r" ))
12621265 logger .info (f"Entities extracted from file: { file_path } " )
1263-
1264- metadata , corrected_entities = get_article_metadata (entities [' gddid' ])
1266+
1267+ metadata , corrected_entities = get_article_metadata (entities [" gddid" ])
12651268 logger .info (f"Metadata extracted for the article" )
1266-
1269+
12671270 if corrected_entities != "None" :
12681271 entities ["entities" ] = json .loads (corrected_entities )
12691272 logger .info ("Fetched verified entities from stored output" )
1270-
1271- return {** entities , ** metadata [entities ['gddid' ]]}
12721273
1273-
1274+ return {** entities , ** metadata [entities ["gddid" ]]}
1275+
1276+
12741277def get_article_metadata (gddid ):
12751278 """Fetch the article metadata
1276-
1279+
12771280 Parameter
12781281 ---------
12791282 gddid: str
12801283 xDD ID of the current selected article
1281-
1284+
12821285 Returns
12831286 -------
12841287 dict: dictionary containing the current article's metadata
12851288 str: dictionary of updated entities in string format
12861289 """
12871290 # Read the Parquet file with pushdown predicate
1288- article_metadata = pd .read_parquet (os .path .join (
1289- "/MetaExtractor" ,
1290- "inputs" ,
1291- os .environ ["ARTICLE_RELEVANCE_BATCH" ]
1292- ))
1291+ article_metadata = pd .read_parquet (
1292+ os .path .join ("/MetaExtractor" , "inputs" , os .environ ["ARTICLE_RELEVANCE_BATCH" ])
1293+ )
12931294 filtered_metadata = (
1294- article_metadata [article_metadata ['gddid' ] == gddid ]
1295- [[
1296- 'DOI' , 'gddid' , 'predict_proba' , 'title' ,
1297- 'subtitle' , 'journal' , 'status' , 'last_updated' ,
1298- 'corrected_entities'
1299- ]]
1295+ article_metadata [article_metadata ["gddid" ] == gddid ][
1296+ [
1297+ "DOI" ,
1298+ "gddid" ,
1299+ "predict_proba" ,
1300+ "title" ,
1301+ "subtitle" ,
1302+ "journal" ,
1303+ "status" ,
1304+ "last_updated" ,
1305+ "corrected_entities" ,
1306+ ]
1307+ ]
13001308 .set_index ("gddid" )
1301- .to_dict (orient = ' index' )
1309+ .to_dict (orient = " index" )
13021310 )
1303-
1311+
13041312 if gddid in filtered_metadata :
13051313 corrected_entities = filtered_metadata [gddid ].get ("corrected_entities" , "None" )
13061314 else :
13071315 corrected_entities = "None"
1308-
1316+
13091317 return filtered_metadata , corrected_entities
13101318
13111319
13121320def update_output (** args ):
13131321 """
13141322 Updates the article relevance parquet file
13151323 with extracted and verified entities
1316-
1324+
13171325 Parameter
13181326 ---------
13191327 args: dict
13201328 Various keys to update in the file
1321-
1329+
13221330 gddid: str
13231331 xDD ID of the article to update
13241332 last_updated: datetime
@@ -1328,20 +1336,20 @@ def update_output(**args):
13281336 status: str
13291337 Status of the reviewing process
13301338 """
1331-
1332- article_metadata = pd .read_parquet (os .path .join (
1333- "/MetaExtractor" ,
1334- "inputs" ,
1335- os .environ ["ARTICLE_RELEVANCE_BATCH" ]
1336- ))
1337- article_metadata .loc [article_metadata ['gddid' ] == args ['gddid' ], 'status' ] = args ['status' ]
1338- article_metadata .loc [article_metadata ['gddid' ] == args ['gddid' ], 'last_updated' ] = args ['last_updated' ]
1339- article_metadata .loc [article_metadata ['gddid' ] == args ['gddid' ], 'corrected_entities' ] = args ['corrected_entities' ]
1340-
1341- article_metadata .to_parquet (os .path .join (
1342- "/MetaExtractor" ,
1343- "inputs" ,
1344- os .environ ["ARTICLE_RELEVANCE_BATCH" ]
1345- ))
1346-
13471339
1340+ article_metadata = pd .read_parquet (
1341+ os .path .join ("/MetaExtractor" , "inputs" , os .environ ["ARTICLE_RELEVANCE_BATCH" ])
1342+ )
1343+ article_metadata .loc [article_metadata ["gddid" ] == args ["gddid" ], "status" ] = args [
1344+ "status"
1345+ ]
1346+ article_metadata .loc [
1347+ article_metadata ["gddid" ] == args ["gddid" ], "last_updated"
1348+ ] = args ["last_updated" ]
1349+ article_metadata .loc [
1350+ article_metadata ["gddid" ] == args ["gddid" ], "corrected_entities"
1351+ ] = args ["corrected_entities" ]
1352+
1353+ article_metadata .to_parquet (
1354+ os .path .join ("/MetaExtractor" , "inputs" , os .environ ["ARTICLE_RELEVANCE_BATCH" ])
1355+ )
0 commit comments