File tree Expand file tree Collapse file tree 2 files changed +38
-0
lines changed
Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Original file line number Diff line number Diff line change @@ -24,4 +24,11 @@ enum DocumentProcessor: string
2424 * Uses LLamaCloud https://cloud.llamaindex.ai/ as document processor to extract text
2525 */
2626 case LLAMAPARSE = 'llama ' ;
27+
28+ /**
29+ * The Unstructured processor
30+ *
31+ * Uses Unstructored https://unstructured.io/ as document processor to extract text
32+ */
33+ case UNSTRUCTURED = 'unstructured ' ;
2734}
Original file line number Diff line number Diff line change 9999
100100 $ mockClient ->assertSentCount (1 );
101101});
102+
103+ test ('unstructured can be selected as processor ' , function () {
104+ $ mockClient = MockClient::global ([
105+ ExtractTextRequest::class => MockResponse::fixture ('extract-text-empty ' ),
106+ ]);
107+
108+ $ connector = new ParseConnector ('fake ' , 'http://localhost:5002 ' );
109+ $ connector ->withMockClient ($ mockClient );
110+
111+ $ connector ->parse (
112+ url: 'http://localhost/empty.pdf ' ,
113+ options: new ParseOption (DocumentProcessor::UNSTRUCTURED ),
114+ );
115+
116+ $ mockClient ->assertSent (ExtractTextRequest::class);
117+
118+ $ mockClient ->assertSent (function (Request $ request , Response $ response ) {
119+ if (! $ request instanceof ExtractTextRequest) {
120+ return false ;
121+ }
122+
123+ /** @var array */
124+ $ body = $ request ->body ()->all ();
125+
126+ return $ body ['url ' ] === 'http://localhost/empty.pdf '
127+ && $ body ['mime_type ' ] === 'application/pdf '
128+ && $ body ['driver ' ] === 'unstructured ' ;
129+ });
130+
131+ $ mockClient ->assertSentCount (1 );
132+ });
You can’t perform that action at this time.
0 commit comments