Skip to content

Commit c135b09

Browse files
committed
tests: initial basic tests
1 parent 31c2fc8 commit c135b09

1 file changed

Lines changed: 80 additions & 0 deletions

File tree

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Author: Ty Andrews
2+
# Date: 2023-06-02
3+
4+
import os, sys
5+
6+
import json
7+
import pytest
8+
9+
# ensure that the parent directory is on the path for relative imports
10+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
11+
12+
from src.entity_extraction.training.hf_token_classification.labelstudio_preprocessing import (
13+
convert_labelled_data_to_hf_format,
14+
)
15+
16+
17+
# test that a nonexistant folder raises an error
18+
def test_nonexistant_folder_raises_error():
19+
with pytest.raises(FileNotFoundError):
20+
convert_labelled_data_to_hf_format("data/labelled/nonexistant_folder")
21+
22+
23+
# test that a folder without train/test/val raises an error
24+
# create temporary folders for just train/val
25+
def test_folder_without_train_test_val_raises_error(tmp_path):
26+
# create a folder with just train/val
27+
folder = tmp_path / "folder"
28+
folder.mkdir()
29+
(folder / "train").mkdir()
30+
(folder / "val").mkdir()
31+
32+
with pytest.raises(FileNotFoundError):
33+
convert_labelled_data_to_hf_format(folder)
34+
35+
36+
# test that the function processes the data correctly
37+
def test_process_labelled_data(tmp_path):
38+
folder_path = str(tmp_path)
39+
40+
train_folder = os.path.join(folder_path, "train")
41+
test_folder = os.path.join(folder_path, "test")
42+
val_folder = os.path.join(folder_path, "val")
43+
44+
os.makedirs(train_folder)
45+
os.makedirs(test_folder)
46+
os.makedirs(val_folder)
47+
48+
sample_data = {
49+
"task": {"data": {"text": "Sample text", "gdd_id": "sample_id"}},
50+
"result": [
51+
{
52+
"id": "OXpADMYGB3",
53+
"type": "labels",
54+
"value": {
55+
"end": 63,
56+
"text": "Neogene Mediterranean",
57+
"start": 42,
58+
"labels": ["REGION"],
59+
},
60+
"origin": "prediction",
61+
"to_name": "text",
62+
"from_name": "label",
63+
}
64+
],
65+
}
66+
67+
with open(os.path.join(train_folder, "123.txt"), "w") as f:
68+
json.dump(sample_data, f)
69+
70+
with open(os.path.join(test_folder, "456.txt"), "w") as f:
71+
json.dump(sample_data, f)
72+
73+
with open(os.path.join(val_folder, "789.txt"), "w") as f:
74+
json.dump(sample_data, f)
75+
76+
convert_labelled_data_to_hf_format(folder_path)
77+
78+
assert os.path.exists(os.path.join(folder_path, "train.json"))
79+
assert os.path.exists(os.path.join(folder_path, "test.json"))
80+
assert os.path.exists(os.path.join(folder_path, "val.json"))

0 commit comments

Comments
 (0)