Skip to content

Commit 35a988e

Browse files
committed
bug: fixed new data paths
1 parent 097e6ff commit 35a988e

File tree

1 file changed

+92
-30
lines changed

1 file changed

+92
-30
lines changed

tests/entity_extraction/test_baseline_entity_extraction.py

Lines changed: 92 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def test_extract_email(test_sentences, expected_results):
229229
for test_sentence, expected_result in zip(test_sentences, expected_results):
230230
assert extract_email(test_sentence) == expected_result
231231

232+
232233
@pytest.mark.parametrize(
233234
"test_sentences, expected_results",
234235
[
@@ -245,15 +246,15 @@ def test_extract_email(test_sentences, expected_results):
245246
"10°50'E",
246247
],
247248
[
248-
[{'start': 0, 'end': 13, 'labels': ['GEOG'], 'text': '40:26:46.302N'}],
249-
[{'start': 0, 'end': 14, 'labels': ['GEOG'], 'text': '079:58:55.903W'}],
250-
[{'start': 0, 'end': 10, 'labels': ['GEOG'], 'text': '40°26′46″N'}],
251-
[{'start': 0, 'end': 13, 'labels': ['GEOG'], 'text': '40d 26′ 46″ N'}],
252-
[{'start': 0, 'end': 13, 'labels': ['GEOG'], 'text': 'N40:26:46.302'}],
253-
[{'start': 0, 'end': 10, 'labels': ['GEOG'], 'text': 'N40°26′46″'}],
254-
[{'start': 0, 'end': 12, 'labels': ['GEOG'], 'text': 'N40d 26′ 46″'}],
255-
[{'start': 0, 'end': 11, 'labels': ['GEOG'], 'text': '52°05.75′ N'}],
256-
[{'start': 0, 'end': 7, 'labels': ['GEOG'], 'text': "10°50'E"}]
249+
[{"start": 0, "end": 13, "labels": ["GEOG"], "text": "40:26:46.302N"}],
250+
[{"start": 0, "end": 14, "labels": ["GEOG"], "text": "079:58:55.903W"}],
251+
[{"start": 0, "end": 10, "labels": ["GEOG"], "text": "40°26′46″N"}],
252+
[{"start": 0, "end": 13, "labels": ["GEOG"], "text": "40d 26′ 46″ N"}],
253+
[{"start": 0, "end": 13, "labels": ["GEOG"], "text": "N40:26:46.302"}],
254+
[{"start": 0, "end": 10, "labels": ["GEOG"], "text": "N40°26′46″"}],
255+
[{"start": 0, "end": 12, "labels": ["GEOG"], "text": "N40d 26′ 46″"}],
256+
[{"start": 0, "end": 11, "labels": ["GEOG"], "text": "52°05.75′ N"}],
257+
[{"start": 0, "end": 7, "labels": ["GEOG"], "text": "10°50'E"}],
257258
],
258259
)
259260
],
@@ -266,6 +267,7 @@ def test_extract_geographic_coordinates(test_sentences, expected_results):
266267
for test_sentence, expected_result in zip(test_sentences, expected_results):
267268
assert extract_geographic_coordinates(test_sentence) == expected_result
268269

270+
269271
@pytest.mark.parametrize(
270272
"test_sentences, expected_results",
271273
[
@@ -275,14 +277,43 @@ def test_extract_geographic_coordinates(test_sentences, expected_results):
275277
"The scenery around Garibaldi lake is pristine",
276278
"This movie was shot in the old towns of Europe",
277279
"Philosophical Transactions of and tbe pollen record in the British Isles, In : Birks HH, Birks HJb, Kaland PE, Moe D, eds.",
278-
"Holocene fluctuations of cold climate in the Swiss Alps ( H. ZOLLER -)"
280+
"Holocene fluctuations of cold climate in the Swiss Alps ( H. ZOLLER -)",
279281
],
280282
[
281-
[{'start': 30, 'end': 36, 'labels': ['REGION'], 'text': 'Europe'}, {'start': 131, 'end': 152, 'labels': ['REGION'], 'text': 'the Pacific Northwest'}],
282-
[{'start': 19, 'end': 33, 'labels': ['REGION'], 'text': 'Garibaldi lake'}],
283-
[{'start': 40, 'end': 46, 'labels': ['REGION'], 'text': 'Europe'}],
284-
[{'start': 55, 'end': 72, 'labels': ['REGION'], 'text': 'the British Isles'}],
285-
[{'start': 41, 'end': 55, 'labels': ['REGION'], 'text': 'the Swiss Alps'}]
283+
[
284+
{"start": 30, "end": 36, "labels": ["REGION"], "text": "Europe"},
285+
{
286+
"start": 131,
287+
"end": 152,
288+
"labels": ["REGION"],
289+
"text": "the Pacific Northwest",
290+
},
291+
],
292+
[
293+
{
294+
"start": 19,
295+
"end": 33,
296+
"labels": ["REGION"],
297+
"text": "Garibaldi lake",
298+
}
299+
],
300+
[{"start": 40, "end": 46, "labels": ["REGION"], "text": "Europe"}],
301+
[
302+
{
303+
"start": 55,
304+
"end": 72,
305+
"labels": ["REGION"],
306+
"text": "the British Isles",
307+
}
308+
],
309+
[
310+
{
311+
"start": 41,
312+
"end": 55,
313+
"labels": ["REGION"],
314+
"text": "the Swiss Alps",
315+
}
316+
],
286317
],
287318
)
288319
],
@@ -295,33 +326,58 @@ def test_extract_region_names(test_sentences, expected_results):
295326
for test_sentence, expected_result in zip(test_sentences, expected_results):
296327
assert extract_region_names(test_sentence) == expected_result
297328

329+
298330
@pytest.mark.parametrize(
299331
"test_sentences, expected_results",
300332
[
301333
(
302334
[
303335
"Percentage calculation is based on the terrestrial pollen sum from which Betula was excluded KM/1 KM/2 KM/3 NM/1 NM/2 NM/3 NM/4 NM/5 NM/6 NM/7 NM/8",
304-
"The palaeoecology of an Early Neolithic waterlogged site in northwestern England ( F. OLovmLo -)A pollen-analytical study of cores from the Outer Silver Pit", #False positive
336+
"The palaeoecology of an Early Neolithic waterlogged site in northwestern England ( F. OLovmLo -)A pollen-analytical study of cores from the Outer Silver Pit", # False positive
305337
"Description Salix 0.57 1.76 0.73 13.3 1.67 8.78 1.50 2.88 Solanum dulcamara 0 0 0.73 0 0 1.58 0 0 Lysimachia vulgaris 0 0 4.90 0 0.84 0.53 0 0 Mentha-type 00 0 1.04 0 0 00 Lemna 00 0 7.44 0 1.58 0 0",
306338
"The first major impacts upon the vegetation record become eident from about 3610 BP with sharp reductions in arboreal taxa, the appearance of cerealtype pollen in L.A.BI, and marked increases in Calluna, Foaceae and Cyperaceae.",
307339
"The overlying Sphagnum peat is devoid of clastic elements for a short period during which sediment inorganic content declines.",
308-
"Abstract ) ( A. T. CROSS, G. G. THOMPSON and J. B. ZAITZEFF ) 3 - 1 1 Gymnospermae, general The gymnospermous affinity of Eucommiidites ERDTMAN, 1948"
340+
"Abstract ) ( A. T. CROSS, G. G. THOMPSON and J. B. ZAITZEFF ) 3 - 1 1 Gymnospermae, general The gymnospermous affinity of Eucommiidites ERDTMAN, 1948",
309341
],
310342
[
311-
[{'start': 73, 'end': 79, 'labels': ['TAXA'], 'text': 'Betula'}],
312-
[{'start': 146, 'end': 152, 'labels': ['TAXA'], 'text': 'Silver'}], # False positive
343+
[{"start": 73, "end": 79, "labels": ["TAXA"], "text": "Betula"}],
313344
[
314-
{'start': 12, 'end': 17, 'labels': ['TAXA'], 'text': 'Salix'},
315-
{'start': 58, 'end': 75, 'labels': ['TAXA'], 'text': 'Solanum dulcamara'},
316-
{'start': 98, 'end': 117, 'labels': ['TAXA'], 'text': 'Lysimachia vulgaris'},
317-
{'start': 143, 'end': 154, 'labels': ['TAXA'], 'text': 'Mentha-type'},
318-
{'start': 143, 'end': 149, 'labels': ['TAXA'], 'text': 'Mentha'},
319-
{'start': 172, 'end': 177, 'labels': ['TAXA'], 'text': 'Lemna'}],
345+
{"start": 146, "end": 152, "labels": ["TAXA"], "text": "Silver"}
346+
], # False positive
347+
[
348+
{"start": 12, "end": 17, "labels": ["TAXA"], "text": "Salix"},
349+
{
350+
"start": 58,
351+
"end": 75,
352+
"labels": ["TAXA"],
353+
"text": "Solanum dulcamara",
354+
},
355+
{
356+
"start": 98,
357+
"end": 117,
358+
"labels": ["TAXA"],
359+
"text": "Lysimachia vulgaris",
360+
},
361+
{
362+
"start": 143,
363+
"end": 154,
364+
"labels": ["TAXA"],
365+
"text": "Mentha-type",
366+
},
367+
{"start": 143, "end": 149, "labels": ["TAXA"], "text": "Mentha"},
368+
{"start": 172, "end": 177, "labels": ["TAXA"], "text": "Lemna"},
369+
],
320370
[
321-
{'start': 195, 'end': 202, 'labels': ['TAXA'], 'text': 'Calluna'},
322-
{'start': 216, 'end': 226, 'labels': ['TAXA'], 'text': 'Cyperaceae'}],
323-
[{'start': 14, 'end': 22, 'labels': ['TAXA'], 'text': 'Sphagnum'}],
324-
[{'start': 70, 'end': 81, 'labels': ['TAXA'], 'text': 'Gymnosperma'}]
371+
{"start": 195, "end": 202, "labels": ["TAXA"], "text": "Calluna"},
372+
{
373+
"start": 216,
374+
"end": 226,
375+
"labels": ["TAXA"],
376+
"text": "Cyperaceae",
377+
},
378+
],
379+
[{"start": 14, "end": 22, "labels": ["TAXA"], "text": "Sphagnum"}],
380+
[{"start": 70, "end": 81, "labels": ["TAXA"], "text": "Gymnosperma"}],
325381
],
326382
)
327383
],
@@ -332,4 +388,10 @@ def test_extract_taxa(test_sentences, expected_results):
332388
"""
333389

334390
for test_sentence, expected_result in zip(test_sentences, expected_results):
335-
assert extract_taxa(test_sentence, os.path.join("data", "raw", "taxa.csv")) == expected_result
391+
assert (
392+
extract_taxa(
393+
test_sentence,
394+
os.path.join("data", "entity-extraction", "raw", "taxa.csv"),
395+
)
396+
== expected_result
397+
)

0 commit comments

Comments
 (0)