@@ -229,6 +229,7 @@ def test_extract_email(test_sentences, expected_results):
229229 for test_sentence , expected_result in zip (test_sentences , expected_results ):
230230 assert extract_email (test_sentence ) == expected_result
231231
232+
232233@pytest .mark .parametrize (
233234 "test_sentences, expected_results" ,
234235 [
@@ -245,15 +246,15 @@ def test_extract_email(test_sentences, expected_results):
245246 "10°50'E" ,
246247 ],
247248 [
248- [{' start' : 0 , ' end' : 13 , ' labels' : [' GEOG' ], ' text' : ' 40:26:46.302N' }],
249- [{' start' : 0 , ' end' : 14 , ' labels' : [' GEOG' ], ' text' : ' 079:58:55.903W' }],
250- [{' start' : 0 , ' end' : 10 , ' labels' : [' GEOG' ], ' text' : ' 40°26′46″N' }],
251- [{' start' : 0 , ' end' : 13 , ' labels' : [' GEOG' ], ' text' : ' 40d 26′ 46″ N' }],
252- [{' start' : 0 , ' end' : 13 , ' labels' : [' GEOG' ], ' text' : ' N40:26:46.302' }],
253- [{' start' : 0 , ' end' : 10 , ' labels' : [' GEOG' ], ' text' : ' N40°26′46″' }],
254- [{' start' : 0 , ' end' : 12 , ' labels' : [' GEOG' ], ' text' : ' N40d 26′ 46″' }],
255- [{' start' : 0 , ' end' : 11 , ' labels' : [' GEOG' ], ' text' : ' 52°05.75′ N' }],
256- [{' start' : 0 , ' end' : 7 , ' labels' : [' GEOG' ], ' text' : "10°50'E" }]
249+ [{" start" : 0 , " end" : 13 , " labels" : [" GEOG" ], " text" : " 40:26:46.302N" }],
250+ [{" start" : 0 , " end" : 14 , " labels" : [" GEOG" ], " text" : " 079:58:55.903W" }],
251+ [{" start" : 0 , " end" : 10 , " labels" : [" GEOG" ], " text" : " 40°26′46″N" }],
252+ [{" start" : 0 , " end" : 13 , " labels" : [" GEOG" ], " text" : " 40d 26′ 46″ N" }],
253+ [{" start" : 0 , " end" : 13 , " labels" : [" GEOG" ], " text" : " N40:26:46.302" }],
254+ [{" start" : 0 , " end" : 10 , " labels" : [" GEOG" ], " text" : " N40°26′46″" }],
255+ [{" start" : 0 , " end" : 12 , " labels" : [" GEOG" ], " text" : " N40d 26′ 46″" }],
256+ [{" start" : 0 , " end" : 11 , " labels" : [" GEOG" ], " text" : " 52°05.75′ N" }],
257+ [{" start" : 0 , " end" : 7 , " labels" : [" GEOG" ], " text" : "10°50'E" }],
257258 ],
258259 )
259260 ],
@@ -266,6 +267,7 @@ def test_extract_geographic_coordinates(test_sentences, expected_results):
266267 for test_sentence , expected_result in zip (test_sentences , expected_results ):
267268 assert extract_geographic_coordinates (test_sentence ) == expected_result
268269
270+
269271@pytest .mark .parametrize (
270272 "test_sentences, expected_results" ,
271273 [
@@ -275,14 +277,43 @@ def test_extract_geographic_coordinates(test_sentences, expected_results):
275277 "The scenery around Garibaldi lake is pristine" ,
276278 "This movie was shot in the old towns of Europe" ,
277279 "Philosophical Transactions of and tbe pollen record in the British Isles, In : Birks HH, Birks HJb, Kaland PE, Moe D, eds." ,
278- "Holocene fluctuations of cold climate in the Swiss Alps ( H. ZOLLER -)"
280+ "Holocene fluctuations of cold climate in the Swiss Alps ( H. ZOLLER -)" ,
279281 ],
280282 [
281- [{'start' : 30 , 'end' : 36 , 'labels' : ['REGION' ], 'text' : 'Europe' }, {'start' : 131 , 'end' : 152 , 'labels' : ['REGION' ], 'text' : 'the Pacific Northwest' }],
282- [{'start' : 19 , 'end' : 33 , 'labels' : ['REGION' ], 'text' : 'Garibaldi lake' }],
283- [{'start' : 40 , 'end' : 46 , 'labels' : ['REGION' ], 'text' : 'Europe' }],
284- [{'start' : 55 , 'end' : 72 , 'labels' : ['REGION' ], 'text' : 'the British Isles' }],
285- [{'start' : 41 , 'end' : 55 , 'labels' : ['REGION' ], 'text' : 'the Swiss Alps' }]
283+ [
284+ {"start" : 30 , "end" : 36 , "labels" : ["REGION" ], "text" : "Europe" },
285+ {
286+ "start" : 131 ,
287+ "end" : 152 ,
288+ "labels" : ["REGION" ],
289+ "text" : "the Pacific Northwest" ,
290+ },
291+ ],
292+ [
293+ {
294+ "start" : 19 ,
295+ "end" : 33 ,
296+ "labels" : ["REGION" ],
297+ "text" : "Garibaldi lake" ,
298+ }
299+ ],
300+ [{"start" : 40 , "end" : 46 , "labels" : ["REGION" ], "text" : "Europe" }],
301+ [
302+ {
303+ "start" : 55 ,
304+ "end" : 72 ,
305+ "labels" : ["REGION" ],
306+ "text" : "the British Isles" ,
307+ }
308+ ],
309+ [
310+ {
311+ "start" : 41 ,
312+ "end" : 55 ,
313+ "labels" : ["REGION" ],
314+ "text" : "the Swiss Alps" ,
315+ }
316+ ],
286317 ],
287318 )
288319 ],
@@ -295,33 +326,58 @@ def test_extract_region_names(test_sentences, expected_results):
295326 for test_sentence , expected_result in zip (test_sentences , expected_results ):
296327 assert extract_region_names (test_sentence ) == expected_result
297328
329+
298330@pytest .mark .parametrize (
299331 "test_sentences, expected_results" ,
300332 [
301333 (
302334 [
303335 "Percentage calculation is based on the terrestrial pollen sum from which Betula was excluded KM/1 KM/2 KM/3 NM/1 NM/2 NM/3 NM/4 NM/5 NM/6 NM/7 NM/8" ,
304- "The palaeoecology of an Early Neolithic waterlogged site in northwestern England ( F. OLovmLo -)A pollen-analytical study of cores from the Outer Silver Pit" , # False positive
336+ "The palaeoecology of an Early Neolithic waterlogged site in northwestern England ( F. OLovmLo -)A pollen-analytical study of cores from the Outer Silver Pit" , # False positive
305337 "Description Salix 0.57 1.76 0.73 13.3 1.67 8.78 1.50 2.88 Solanum dulcamara 0 0 0.73 0 0 1.58 0 0 Lysimachia vulgaris 0 0 4.90 0 0.84 0.53 0 0 Mentha-type 00 0 1.04 0 0 00 Lemna 00 0 7.44 0 1.58 0 0" ,
306338 "The first major impacts upon the vegetation record become eident from about 3610 BP with sharp reductions in arboreal taxa, the appearance of cerealtype pollen in L.A.BI, and marked increases in Calluna, Foaceae and Cyperaceae." ,
307339 "The overlying Sphagnum peat is devoid of clastic elements for a short period during which sediment inorganic content declines." ,
308- "Abstract ) ( A. T. CROSS, G. G. THOMPSON and J. B. ZAITZEFF ) 3 - 1 1 Gymnospermae, general The gymnospermous affinity of Eucommiidites ERDTMAN, 1948"
340+ "Abstract ) ( A. T. CROSS, G. G. THOMPSON and J. B. ZAITZEFF ) 3 - 1 1 Gymnospermae, general The gymnospermous affinity of Eucommiidites ERDTMAN, 1948" ,
309341 ],
310342 [
311- [{'start' : 73 , 'end' : 79 , 'labels' : ['TAXA' ], 'text' : 'Betula' }],
312- [{'start' : 146 , 'end' : 152 , 'labels' : ['TAXA' ], 'text' : 'Silver' }], # False positive
343+ [{"start" : 73 , "end" : 79 , "labels" : ["TAXA" ], "text" : "Betula" }],
313344 [
314- {'start' : 12 , 'end' : 17 , 'labels' : ['TAXA' ], 'text' : 'Salix' },
315- {'start' : 58 , 'end' : 75 , 'labels' : ['TAXA' ], 'text' : 'Solanum dulcamara' },
316- {'start' : 98 , 'end' : 117 , 'labels' : ['TAXA' ], 'text' : 'Lysimachia vulgaris' },
317- {'start' : 143 , 'end' : 154 , 'labels' : ['TAXA' ], 'text' : 'Mentha-type' },
318- {'start' : 143 , 'end' : 149 , 'labels' : ['TAXA' ], 'text' : 'Mentha' },
319- {'start' : 172 , 'end' : 177 , 'labels' : ['TAXA' ], 'text' : 'Lemna' }],
345+ {"start" : 146 , "end" : 152 , "labels" : ["TAXA" ], "text" : "Silver" }
346+ ], # False positive
347+ [
348+ {"start" : 12 , "end" : 17 , "labels" : ["TAXA" ], "text" : "Salix" },
349+ {
350+ "start" : 58 ,
351+ "end" : 75 ,
352+ "labels" : ["TAXA" ],
353+ "text" : "Solanum dulcamara" ,
354+ },
355+ {
356+ "start" : 98 ,
357+ "end" : 117 ,
358+ "labels" : ["TAXA" ],
359+ "text" : "Lysimachia vulgaris" ,
360+ },
361+ {
362+ "start" : 143 ,
363+ "end" : 154 ,
364+ "labels" : ["TAXA" ],
365+ "text" : "Mentha-type" ,
366+ },
367+ {"start" : 143 , "end" : 149 , "labels" : ["TAXA" ], "text" : "Mentha" },
368+ {"start" : 172 , "end" : 177 , "labels" : ["TAXA" ], "text" : "Lemna" },
369+ ],
320370 [
321- {'start' : 195 , 'end' : 202 , 'labels' : ['TAXA' ], 'text' : 'Calluna' },
322- {'start' : 216 , 'end' : 226 , 'labels' : ['TAXA' ], 'text' : 'Cyperaceae' }],
323- [{'start' : 14 , 'end' : 22 , 'labels' : ['TAXA' ], 'text' : 'Sphagnum' }],
324- [{'start' : 70 , 'end' : 81 , 'labels' : ['TAXA' ], 'text' : 'Gymnosperma' }]
371+ {"start" : 195 , "end" : 202 , "labels" : ["TAXA" ], "text" : "Calluna" },
372+ {
373+ "start" : 216 ,
374+ "end" : 226 ,
375+ "labels" : ["TAXA" ],
376+ "text" : "Cyperaceae" ,
377+ },
378+ ],
379+ [{"start" : 14 , "end" : 22 , "labels" : ["TAXA" ], "text" : "Sphagnum" }],
380+ [{"start" : 70 , "end" : 81 , "labels" : ["TAXA" ], "text" : "Gymnosperma" }],
325381 ],
326382 )
327383 ],
@@ -332,4 +388,10 @@ def test_extract_taxa(test_sentences, expected_results):
332388 """
333389
334390 for test_sentence , expected_result in zip (test_sentences , expected_results ):
335- assert extract_taxa (test_sentence , os .path .join ("data" , "raw" , "taxa.csv" )) == expected_result
391+ assert (
392+ extract_taxa (
393+ test_sentence ,
394+ os .path .join ("data" , "entity-extraction" , "raw" , "taxa.csv" ),
395+ )
396+ == expected_result
397+ )
0 commit comments