-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_validation.py
More file actions
101 lines (84 loc) · 3.84 KB
/
test_validation.py
File metadata and controls
101 lines (84 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import pytest
from parse_document_model import Document, Page
from parse_document_model.attributes import PageAttributes, TextAttributes
from parse_document_model.marks import Mark, TextStyleMark, UrlMark
def test_read_from_json():
filepaths = ["test/data/extract-text-1.json",
"test/data/extract-text-2.json",
"test/data/extract-text-empty.json"]
for filepath in filepaths:
doc_json = json.load(open(filepath, "r"))
doc = Document(**doc_json)
# Check the Document
assert doc.category == "doc"
assert isinstance(doc.content, list)
# Check the Page
for page in doc.content:
assert isinstance(page, Page)
assert page.category == "page"
assert isinstance(page.attributes, PageAttributes)
assert isinstance(page.content, list)
# Check Text
for text in page.content:
assert text.category in ["page-header", "title", "heading", "body", "footer"]
assert isinstance(text.content, str)
assert isinstance(text.attributes, TextAttributes)
assert isinstance(text.marks, list)
# Check Marks
for mark in text.marks:
assert isinstance(mark, Mark)
def test_style_marks():
text_style_mark_json = [{"category": "textStyle", "font": {"id": "1", "name": "test-font", "size": 1}},
{"category": "textStyle", "color": {"id": "1", "r": 0, "g": 0, "b": 0}},
{"category": "textStyle", "font": {"id": "1", "name": "test-font", "size": 1},
"color": {"id": "1", "r": 0, "g": 0, "b": 0}},
{"category": "textStyle"},
{"category": "textStyle", "url": "test-url"}]
for mark_json in text_style_mark_json:
if "font" in mark_json or "color" in mark_json:
mark = TextStyleMark(**mark_json)
assert isinstance(mark, TextStyleMark)
else:
with pytest.raises(ValueError):
TextStyleMark(**mark_json)
def test_url_marks():
url_mark_json = [{"category": "link", "url": "test-url"},
{"category": "link"},
{"category": "link", "font": {"id": "1", "name": "test-font", "size": 1}},
{"category": "link", "color": {"id": "1", "r": 0, "g": 0, "b": 0}}]
for mark_json in url_mark_json:
if "url" in mark_json:
mark = UrlMark(**mark_json)
assert isinstance(mark, UrlMark)
else:
with pytest.raises(ValueError):
UrlMark(**mark_json)
def test_text_attributes_level():
valid_text_attributes = [
{"bounding_box": [], "level": 1},
{"bounding_box": [], "level": 2},
{"bounding_box": [], "level": 3},
{"bounding_box": [], "level": 4},
{"bounding_box": [], "level": None},
{"bounding_box": []},
{}
]
for attributes_json in valid_text_attributes:
text_attributes = TextAttributes(**attributes_json)
assert isinstance(text_attributes, TextAttributes)
assert isinstance(text_attributes.level, (int, type(None)))
if text_attributes.level is not None:
assert text_attributes.level in range(1, 5)
assert attributes_json["level"] == text_attributes.level
else:
assert "level" not in attributes_json or attributes_json["level"] is None
invalid_text_attributes = [
{"bounding_box": [], "level": -1},
{"bounding_box": [], "level": "invalid"},
{"bounding_box": [], "level": 2.5},
{"bounding_box": [], "level": 5},
]
for attributes_json in invalid_text_attributes:
with pytest.raises(ValueError):
TextAttributes(**attributes_json)