|
1 | | -<?php |
2 | | - |
3 | | -namespace OneOffTech\Parse\Client\DocumentFormat; |
4 | | - |
5 | | -use Countable; |
6 | | -use OneOffTech\Parse\Client\Exceptions\EmptyDocumentException; |
7 | | -use OneOffTech\Parse\Client\Exceptions\InvalidDocumentFormatException; |
8 | | -use RecursiveArrayIterator; |
9 | | -use RecursiveIteratorIterator; |
10 | | - |
11 | | -class DocumentNode implements Countable |
12 | | -{ |
13 | | - |
14 | | - public function __construct( |
15 | | - public readonly array $content, |
16 | | - public readonly array $attributes = [], |
17 | | - ) {} |
18 | | - |
19 | | - |
20 | | - public function type(): string |
21 | | - { |
22 | | - return 'doc'; |
23 | | - } |
24 | | - |
25 | | - |
26 | | - /** |
27 | | - * The number of pages in this document as extracted by the parser. |
28 | | - */ |
29 | | - public function count(): int |
30 | | - { |
31 | | - return count($this->content); |
32 | | - } |
33 | | - |
34 | | - /** |
35 | | - * Test if the document is empty, i.e. contains no pages or has no textual content on any of the pages |
36 | | - */ |
37 | | - public function isEmpty(): bool |
38 | | - { |
39 | | - return $this->count() === 0 || !$this->hasContent(); |
40 | | - } |
41 | | - |
42 | | - /** |
43 | | - * Test if the document has discernible textual content on any of the pages |
44 | | - */ |
45 | | - public function hasContent(): bool |
46 | | - { |
47 | | - foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) { |
48 | | - if($key === 'text' && !empty($value)){ |
49 | | - return true; |
50 | | - } |
51 | | - } |
52 | | - |
53 | | - return false; |
54 | | - } |
55 | | - |
56 | | - |
57 | | - /** |
58 | | - * The pages in this document |
59 | | - * |
60 | | - * @return \OneOffTech\Parse\Client\DocumentFormat\PageNode[] |
61 | | - */ |
62 | | - public function pages(): array |
63 | | - { |
64 | | - return array_map(fn($page) => PageNode::fromArray($page), $this->content); |
65 | | - } |
66 | | - |
67 | | - public function text(): string |
68 | | - { |
69 | | - $text = []; |
70 | | - |
71 | | - foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) { |
72 | | - if($key === 'text' && !empty($value)){ |
73 | | - $text[] = $value; |
74 | | - } |
75 | | - } |
76 | | - |
77 | | - return join(PHP_EOL, $text); |
78 | | - } |
79 | | - |
80 | | - |
81 | | - /** |
82 | | - * Throw exception if document has no textual content |
83 | | - * |
84 | | - * @throws OneOffTech\Parse\Client\Exceptions\EmptyDocumentException when document has no textual content |
85 | | - */ |
86 | | - public function throwIfNoContent(): self |
87 | | - { |
88 | | - if(!$this->hasContent()){ |
89 | | - throw new EmptyDocumentException("Document has no textual content."); |
90 | | - } |
91 | | - |
92 | | - return $this; |
93 | | - } |
94 | | - |
95 | | - |
96 | | - /** |
97 | | - * Create a document node from associative array |
98 | | - */ |
99 | | - public static function fromArray(array $data): DocumentNode |
100 | | - { |
101 | | - if(!(isset($data['category']) && isset($data['content']))){ |
102 | | - throw new InvalidDocumentFormatException("Unexpected document structure. Missing category or content."); |
103 | | - } |
104 | | - |
105 | | - if($data['category'] !== 'doc'){ |
106 | | - throw new InvalidDocumentFormatException("Unexpected node category. Expecting [doc] found [{$data['category']}]."); |
107 | | - } |
108 | | - |
109 | | - if(!is_array($data['content'])){ |
110 | | - throw new InvalidDocumentFormatException("Unexpected content format. Expecting [array]."); |
111 | | - } |
112 | | - |
113 | | - return new DocumentNode($data['content'] ?? [], $data['attributes'] ?? []); |
114 | | - } |
115 | | -} |
| 1 | +<?php |
| 2 | + |
| 3 | +namespace OneOffTech\Parse\Client\DocumentFormat; |
| 4 | + |
| 5 | +use Countable; |
| 6 | +use OneOffTech\Parse\Client\Exceptions\EmptyDocumentException; |
| 7 | +use OneOffTech\Parse\Client\Exceptions\InvalidDocumentFormatException; |
| 8 | +use RecursiveArrayIterator; |
| 9 | +use RecursiveIteratorIterator; |
| 10 | + |
| 11 | +class DocumentNode implements Countable |
| 12 | +{ |
| 13 | + public function __construct( |
| 14 | + public readonly array $content, |
| 15 | + public readonly array $attributes = [], |
| 16 | + ) {} |
| 17 | + |
| 18 | + public function type(): string |
| 19 | + { |
| 20 | + return 'doc'; |
| 21 | + } |
| 22 | + |
| 23 | + /** |
| 24 | + * The number of pages in this document as extracted by the parser. |
| 25 | + */ |
| 26 | + public function count(): int |
| 27 | + { |
| 28 | + return count($this->content); |
| 29 | + } |
| 30 | + |
| 31 | + /** |
| 32 | + * Test if the document is empty, i.e. contains no pages or has no textual content on any of the pages |
| 33 | + */ |
| 34 | + public function isEmpty(): bool |
| 35 | + { |
| 36 | + return $this->count() === 0 || ! $this->hasContent(); |
| 37 | + } |
| 38 | + |
| 39 | + /** |
| 40 | + * Test if the document has discernible textual content on any of the pages |
| 41 | + */ |
| 42 | + public function hasContent(): bool |
| 43 | + { |
| 44 | + foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) { |
| 45 | + if ($key === 'text' && ! empty($value)) { |
| 46 | + return true; |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + return false; |
| 51 | + } |
| 52 | + |
| 53 | + /** |
| 54 | + * The pages in this document |
| 55 | + * |
| 56 | + * @return \OneOffTech\Parse\Client\DocumentFormat\PageNode[] |
| 57 | + */ |
| 58 | + public function pages(): array |
| 59 | + { |
| 60 | + return array_map(fn ($page) => PageNode::fromArray($page), $this->content); |
| 61 | + } |
| 62 | + |
| 63 | + public function text(): string |
| 64 | + { |
| 65 | + $text = []; |
| 66 | + |
| 67 | + foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) { |
| 68 | + if ($key === 'text' && ! empty($value)) { |
| 69 | + $text[] = $value; |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + return implode(PHP_EOL, $text); |
| 74 | + } |
| 75 | + |
| 76 | + /** |
| 77 | + * Throw exception if document has no textual content |
| 78 | + * |
| 79 | + * @throws OneOffTech\Parse\Client\Exceptions\EmptyDocumentException when document has no textual content |
| 80 | + */ |
| 81 | + public function throwIfNoContent(): self |
| 82 | + { |
| 83 | + if (! $this->hasContent()) { |
| 84 | + throw new EmptyDocumentException('Document has no textual content.'); |
| 85 | + } |
| 86 | + |
| 87 | + return $this; |
| 88 | + } |
| 89 | + |
| 90 | + /** |
| 91 | + * Create a document node from associative array |
| 92 | + */ |
| 93 | + public static function fromArray(array $data): DocumentNode |
| 94 | + { |
| 95 | + if (! (isset($data['category']) && isset($data['content']))) { |
| 96 | + throw new InvalidDocumentFormatException('Unexpected document structure. Missing category or content.'); |
| 97 | + } |
| 98 | + |
| 99 | + if ($data['category'] !== 'doc') { |
| 100 | + throw new InvalidDocumentFormatException("Unexpected node category. Expecting [doc] found [{$data['category']}]."); |
| 101 | + } |
| 102 | + |
| 103 | + if (! is_array($data['content'])) { |
| 104 | + throw new InvalidDocumentFormatException('Unexpected content format. Expecting [array].'); |
| 105 | + } |
| 106 | + |
| 107 | + return new DocumentNode($data['content'] ?? [], $data['attributes'] ?? []); |
| 108 | + } |
| 109 | +} |
0 commit comments