Skip to content

Commit 5fb25b0

Browse files
committed
Fixed Errors in merge
1 parent cfaadb5 commit 5fb25b0

1 file changed

Lines changed: 106 additions & 98 deletions

File tree

src/data_review_tool/pages/article_review.py

Lines changed: 106 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
import dash_bootstrap_components as dbc
1717
from dash_iconify import DashIconify
1818
import seaborn as sns
19-
sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir))
19+
20+
sys.path.append(
21+
os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir)
22+
)
2023

2124
from src.logs import get_logger
2225

@@ -35,29 +38,29 @@
3538

3639
logger = get_logger(__name__)
3740

41+
3842
def layout(gddid=None):
3943
try:
4044
logger.info(f"Loading article {gddid}")
4145
global original
4246
global results
43-
47+
4448
original = load_data(f"/entity_extraction/{gddid}.json")
4549
results = copy.deepcopy(original)
4650

4751
except FileNotFoundError:
48-
return html.Div([
49-
html.H1("Error - gddid Not Found"),
50-
html.P("The requested gddid does not exist in the files."),
51-
html.P("Please check the article's gddid and try again."),
52-
dcc.Link("Go back to Home", href="/"),
53-
])
54-
52+
return html.Div(
53+
[
54+
html.H1("Error - gddid Not Found"),
55+
html.P("The requested gddid does not exist in the files."),
56+
html.P("Please check the article's gddid and try again."),
57+
dcc.Link("Go back to Home", href="/"),
58+
]
59+
)
60+
5561
relevance_score = round(original["predict_proba"], 2) * 100
56-
57-
logger.info(
58-
f"Relevance score for article {gddid} = {relevance_score}"
59-
)
60-
62+
63+
logger.info(f"Relevance score for article {gddid} = {relevance_score}")
6164

6265
sidebar = html.Div(
6366
[
@@ -202,16 +205,14 @@ def layout(gddid=None):
202205

203206
layout = dmc.NotificationsProvider(
204207
html.Div(
205-
dbc.Row(
206-
html.H2(original["title"],
207-
style=h2_style)),
208-
dbc.Row(
209-
html.H4(original["journal"],
210-
style=h4_style)),
211-
dbc.Row(
212-
[
213-
dmc.Group([
214-
dbc.Col(
208+
[
209+
dbc.Row(html.H2(original["title"], style=h2_style)),
210+
dbc.Row(html.H4(original["journal"], style=h4_style)),
211+
dbc.Row(
212+
[
213+
dmc.Group(
214+
[
215+
dbc.Col(
215216
[
216217
dmc.Button(
217218
dmc.Text("Home", style=nav_text_style),
@@ -331,7 +332,7 @@ def layout(gddid=None):
331332
),
332333
variant="filled",
333334
active=True,
334-
href="http://doi.org/" + original["doi"],
335+
href="http://doi.org/" + original["DOI"],
335336
target="_blank",
336337
style=nav_button_style,
337338
)
@@ -966,60 +967,60 @@ def save_submit(submit, save, relevant, data):
966967
str: The notification to display
967968
"""
968969
callback_context = [p["prop_id"] for p in dash.callback_context.triggered][0]
969-
970+
970971
if callback_context == "confirm-submit-button.n_clicks" and submit:
971972
update_data = {
972973
"gddid": data["gddid"],
973974
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
974-
"corrected_entities": json.dumps(data['entities']),
975-
"status": "Completed"
975+
"corrected_entities": json.dumps(data["entities"]),
976+
"status": "Completed",
976977
}
977978
update_output(**update_data)
978979
logger.info("Entities saved!")
979-
return dmc.Notification(
980-
title="Review Complete!",
981-
id="submit-notification",
982-
action="show",
983-
color="green",
984-
message="Proceed to home page",
985-
icon=DashIconify(icon="ic:round-celebration"),
986-
)
987-
980+
return dmc.Notification(
981+
title="Review Complete!",
982+
id="submit-notification",
983+
action="show",
984+
color="green",
985+
message="Proceed to home page",
986+
icon=DashIconify(icon="ic:round-celebration"),
987+
)
988+
988989
elif callback_context == "confirm-irrelevant-button.n_clicks" and relevant:
989990
update_data = {
990991
"gddid": data["gddid"],
991992
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
992993
"corrected_entities": "None",
993-
"status": "Non-relevant"
994+
"status": "Non-relevant",
994995
}
995996
update_output(**update_data)
996997
logger.info("Article removed from queue")
997-
return dmc.Notification(
998-
title="Article Removed!",
999-
id="remove-notification",
1000-
action="show",
1001-
color="red",
1002-
message="Proceed to home page",
1003-
icon=DashIconify(icon="dashicons-remove"),
1004-
)
1005-
998+
return dmc.Notification(
999+
title="Article Removed!",
1000+
id="remove-notification",
1001+
action="show",
1002+
color="red",
1003+
message="Proceed to home page",
1004+
icon=DashIconify(icon="dashicons-remove"),
1005+
)
1006+
10061007
elif callback_context == "save-button.n_clicks" and save:
10071008
update_data = {
10081009
"gddid": data["gddid"],
10091010
"last_updated": datetime.now().strftime("%Y-%m-%d"),
1010-
"corrected_entities": json.dumps(data['entities']),
1011-
"status": "In Progress"
1011+
"corrected_entities": json.dumps(data["entities"]),
1012+
"status": "In Progress",
10121013
}
10131014
update_output(**update_data)
10141015
logger.info("Article progress saved!")
1015-
return dmc.Notification(
1016-
title="Progress Saved!",
1017-
id="save-notification",
1018-
action="show",
1019-
color="yellow",
1020-
message="Don't forget to comeback and finish the review",
1021-
icon=DashIconify(icon="dashicons-saved"),
1022-
)
1016+
return dmc.Notification(
1017+
title="Progress Saved!",
1018+
id="save-notification",
1019+
action="show",
1020+
color="yellow",
1021+
message="Don't forget to comeback and finish the review",
1022+
icon=DashIconify(icon="dashicons-saved"),
1023+
)
10231024

10241025
else:
10251026
return None
@@ -1076,6 +1077,7 @@ def tabs_control(n_clicks, site, region, taxa, geog, alti, age, email, accordian
10761077
tabs = defaultdict(list)
10771078
logger.debug(f"Accordian: {accordian}")
10781079
relevant_sentences = pd.DataFrame(data["relevant_sentences"])
1080+
logger.debug(relevant_sentences)
10791081
positive_values = relevant_sentences["sentid"][relevant_sentences["sentid"] > 0]
10801082
# Get all the sentences and corresponding section names
10811083
for entity, values in data["entities"][accordian].items():
@@ -1245,6 +1247,7 @@ def toggle_confirmation_modal(n_clicks_close, n_clicks, submit, opened):
12451247
prevent_initial_call=True,
12461248
)(toggle_confirmation_modal)
12471249

1250+
12481251
def load_data(file_path):
12491252
"""Fetches the extracted entities and metadata for an article
12501253
@@ -1256,69 +1259,74 @@ def load_data(file_path):
12561259
Returns
12571260
-------
12581261
dict: entities and metadata for an article
1259-
1262+
12601263
"""
12611264
entities = json.load(open(file_path, "r"))
12621265
logger.info(f"Entities extracted from file: {file_path}")
1263-
1264-
metadata, corrected_entities = get_article_metadata(entities['gddid'])
1266+
1267+
metadata, corrected_entities = get_article_metadata(entities["gddid"])
12651268
logger.info(f"Metadata extracted for the article")
1266-
1269+
12671270
if corrected_entities != "None":
12681271
entities["entities"] = json.loads(corrected_entities)
12691272
logger.info("Fetched verified entities from stored output")
1270-
1271-
return {**entities, **metadata[entities['gddid']]}
12721273

1273-
1274+
return {**entities, **metadata[entities["gddid"]]}
1275+
1276+
12741277
def get_article_metadata(gddid):
12751278
"""Fetch the article metadata
1276-
1279+
12771280
Parameter
12781281
---------
12791282
gddid: str
12801283
xDD ID of the current selected article
1281-
1284+
12821285
Returns
12831286
-------
12841287
dict: dictionary containing the current article's metadata
12851288
str: dictionary of updated entities in string format
12861289
"""
12871290
# Read the Parquet file with pushdown predicate
1288-
article_metadata = pd.read_parquet(os.path.join(
1289-
"/MetaExtractor",
1290-
"inputs",
1291-
os.environ["ARTICLE_RELEVANCE_BATCH"]
1292-
))
1291+
article_metadata = pd.read_parquet(
1292+
os.path.join("/MetaExtractor", "inputs", os.environ["ARTICLE_RELEVANCE_BATCH"])
1293+
)
12931294
filtered_metadata = (
1294-
article_metadata[article_metadata['gddid'] == gddid]
1295-
[[
1296-
'DOI', 'gddid', 'predict_proba', 'title',
1297-
'subtitle', 'journal', 'status', 'last_updated',
1298-
'corrected_entities'
1299-
]]
1295+
article_metadata[article_metadata["gddid"] == gddid][
1296+
[
1297+
"DOI",
1298+
"gddid",
1299+
"predict_proba",
1300+
"title",
1301+
"subtitle",
1302+
"journal",
1303+
"status",
1304+
"last_updated",
1305+
"corrected_entities",
1306+
]
1307+
]
13001308
.set_index("gddid")
1301-
.to_dict(orient='index')
1309+
.to_dict(orient="index")
13021310
)
1303-
1311+
13041312
if gddid in filtered_metadata:
13051313
corrected_entities = filtered_metadata[gddid].get("corrected_entities", "None")
13061314
else:
13071315
corrected_entities = "None"
1308-
1316+
13091317
return filtered_metadata, corrected_entities
13101318

13111319

13121320
def update_output(**args):
13131321
"""
13141322
Updates the article relevance parquet file
13151323
with extracted and verified entities
1316-
1324+
13171325
Parameter
13181326
---------
13191327
args: dict
13201328
Various keys to update in the file
1321-
1329+
13221330
gddid: str
13231331
xDD ID of the article to update
13241332
last_updated: datetime
@@ -1328,20 +1336,20 @@ def update_output(**args):
13281336
status: str
13291337
Status of the reviewing process
13301338
"""
1331-
1332-
article_metadata = pd.read_parquet(os.path.join(
1333-
"/MetaExtractor",
1334-
"inputs",
1335-
os.environ["ARTICLE_RELEVANCE_BATCH"]
1336-
))
1337-
article_metadata.loc[article_metadata['gddid'] == args['gddid'], 'status'] = args['status']
1338-
article_metadata.loc[article_metadata['gddid'] == args['gddid'], 'last_updated'] = args['last_updated']
1339-
article_metadata.loc[article_metadata['gddid'] == args['gddid'], 'corrected_entities'] = args['corrected_entities']
1340-
1341-
article_metadata.to_parquet(os.path.join(
1342-
"/MetaExtractor",
1343-
"inputs",
1344-
os.environ["ARTICLE_RELEVANCE_BATCH"]
1345-
))
1346-
13471339

1340+
article_metadata = pd.read_parquet(
1341+
os.path.join("/MetaExtractor", "inputs", os.environ["ARTICLE_RELEVANCE_BATCH"])
1342+
)
1343+
article_metadata.loc[article_metadata["gddid"] == args["gddid"], "status"] = args[
1344+
"status"
1345+
]
1346+
article_metadata.loc[
1347+
article_metadata["gddid"] == args["gddid"], "last_updated"
1348+
] = args["last_updated"]
1349+
article_metadata.loc[
1350+
article_metadata["gddid"] == args["gddid"], "corrected_entities"
1351+
] = args["corrected_entities"]
1352+
1353+
article_metadata.to_parquet(
1354+
os.path.join("/MetaExtractor", "inputs", os.environ["ARTICLE_RELEVANCE_BATCH"])
1355+
)

0 commit comments

Comments
 (0)