@@ -35,6 +35,7 @@ class LLMType(str, Enum):
3535 CUSTOM = "custom"
3636
3737class ParserType (str , Enum ):
38+ TEXT = "text"
3839 UNSTRUCTURED = "unstructured"
3940 PYMUPDF = "pymupdf"
4041 PYPDF = "pypdf"
@@ -54,6 +55,13 @@ class ChunkingStrategy(str, Enum):
5455 SEMANTIC = "SemanticChunker"
5556 CUSTOM = "custom"
5657
58+ NO_CHUNK_SIZE_STRATEGIES = [
59+ ChunkingStrategy .MARKDOWN ,
60+ ChunkingStrategy .HTML ,
61+ ChunkingStrategy .SEMANTIC ,
62+ ChunkingStrategy .CUSTOM
63+ ]
64+
5765class EmbeddingType (str , Enum ):
5866 OPENAI = "openai"
5967 AZURE_OPENAI = "azure_openai"
@@ -123,7 +131,7 @@ def get_class():
123131LLM_MAP = {
124132 LLMType .OPENAI : lazy_load ("langchain_openai" , "ChatOpenAI" ),
125133 LLMType .AZURE_OPENAI : lazy_load ("langchain_openai" , "AzureChatOpenAI" ),
126- LLMType .HUGGINGFACE : lazy_load ("langchain_huggingface" , "HuggingFaceHub " ),
134+ LLMType .HUGGINGFACE : lazy_load ("langchain_huggingface" , "HuggingFaceEndpoint " ),
127135 LLMType .OLLAMA : lazy_load ("langchain_ollama" , "OllamaChat" ),
128136 LLMType .COHERE : lazy_load ("langchain_community.llms" , "Cohere" ),
129137 LLMType .VERTEXAI : lazy_load ("langchain_google_vertexai" , "VertexAI" ),
@@ -132,6 +140,8 @@ def get_class():
132140}
133141
134142LOADER_MAP = {
143+ # ParserType.UNSTRUCTURED: lazy_load("langchain_unstructured", "UnstructuredLoader"),
144+ ParserType .TEXT : lazy_load ("langchain.document_loaders" , "TextLoader" ),
135145 ParserType .UNSTRUCTURED : lazy_load ("langchain_community.document_loaders" , "UnstructuredFileLoader" ),
136146 ParserType .PYMUPDF : lazy_load ("langchain_community.document_loaders" , "PyMuPDFLoader" ),
137147 ParserType .PYPDF : lazy_load ("langchain_community.document_loaders" , "PyPDFLoader" ),
@@ -163,18 +173,18 @@ def get_class():
163173}
164174
165175VECTORDB_MAP = {
166- VectorDatabase .FAISS : lazy_load ("langchain .vectorstores" , "FAISS" ),
167- VectorDatabase .CHROMA : lazy_load ("langchain.vectorstores " , "Chroma" ),
168- VectorDatabase .PINECONE : lazy_load ("langchain.vectorstores " , "Pinecone " ),
169- VectorDatabase .WEAVIATE : lazy_load ("langchain .vectorstores" , "Weaviate " ),
170- VectorDatabase .QDRANT : lazy_load ("langchain.vectorstores " , "Qdrant " ),
171- VectorDatabase .MILVUS : lazy_load ("langchain.vectorstores " , "Milvus" ),
172- VectorDatabase .PGVECTOR : lazy_load ("langchain.vectorstores " , "PGVector" ),
173- VectorDatabase .ELASTICSEARCH : lazy_load ("langchain.vectorstores " , "ElasticsearchStore" ),
176+ VectorDatabase .FAISS : lazy_load ("langchain_community .vectorstores" , "FAISS" ),
177+ VectorDatabase .CHROMA : lazy_load ("langchain_chroma " , "Chroma" ),
178+ VectorDatabase .PINECONE : lazy_load ("langchain_pinecone " , "PineconeVectorStore " ),
179+ VectorDatabase .WEAVIATE : lazy_load ("langchain_weaviate .vectorstores" , "WeaviateVectorStore " ),
180+ VectorDatabase .QDRANT : lazy_load ("langchain_qdrant " , "QdrantVectorStore " ),
181+ VectorDatabase .MILVUS : lazy_load ("langchain_milvus " , "Milvus" ),
182+ VectorDatabase .PGVECTOR : lazy_load ("langchain_postgres " , "PGVector" ),
183+ VectorDatabase .ELASTICSEARCH : lazy_load ("langchain-elasticsearch " , "ElasticsearchStore" ),
174184}
175185
176186RETRIEVER_MAP = {
177- RetrieverType .BM25 : lazy_load ("langchain .retrievers" , "BM25Retriever" ),
187+ RetrieverType .BM25 : lazy_load ("langchain_community .retrievers" , "BM25Retriever" ),
178188}
179189
180190RERANKER_MAP = {
@@ -226,6 +236,12 @@ def get_class():
226236
227237# Environment variable requirements for components
228238COMPONENT_ENV_REQUIREMENTS = {
239+ # Unstructured
240+ ParserType .UNSTRUCTURED : {
241+ "required" : [],
242+ "optional" : [],
243+ "packages" : [_PkgSpec ("langchain-unstructured" )]
244+ },
229245 # Embedding Models
230246 EmbeddingType .AZURE_OPENAI : {
231247 "required" : ["AZURE_OPENAI_API_KEY" , "AZURE_OPENAI_ENDPOINT" ],
@@ -350,40 +366,59 @@ def get_class():
350366 VectorDatabase .PINECONE : {
351367 "required" : ["PINECONE_API_KEY" , "PINECONE_ENVIRONMENT" ],
352368 "optional" : [],
353- "packages" : [_PkgSpec ("pinecone-client" , "pinecone" )]
369+ "packages" : [
370+ _PkgSpec ("langchain-pinecone" ),
371+ _PkgSpec ("pinecone-client" , "pinecone" )
372+ ]
354373 },
355374 VectorDatabase .WEAVIATE : {
356375 "required" : ["WEAVIATE_URL" , "WEAVIATE_API_KEY" ],
357376 "optional" : [],
358- "packages" : [_PkgSpec ("weaviate-client" , "weaviate" )]
377+ "packages" : [
378+ _PkgSpec ("weaviate-client" , "weaviate" ),
379+ _PkgSpec ("langchain-weaviate" )
380+ ]
359381 },
360382 VectorDatabase .QDRANT : {
361383 "required" : ["QDRANT_URL" ],
362384 "optional" : ["QDRANT_API_KEY" ],
363- "packages" : [_PkgSpec ("qdrant-client" , "qdrant" )]
385+ "packages" : [
386+ _PkgSpec ("qdrant-client" , "qdrant" ),
387+ _PkgSpec ("langchain-qdrant" )
388+ ]
364389 },
365390 VectorDatabase .MILVUS : {
366391 "required" : ["MILVUS_HOST" , "MILVUS_PORT" ],
367392 "optional" : [],
368- "packages" : [_PkgSpec ("pymilvus" )]
393+ "packages" : [
394+ _PkgSpec ("pymilvus" ),
395+ _PkgSpec ("langchain-milvus" )
396+ ]
369397 },
370398 VectorDatabase .PGVECTOR : {
371399 "required" : ["PGVECTOR_CONNECTION_STRING" ],
372400 "optional" : [],
373401 "packages" : [
402+ _PkgSpec ("langchain-postgres" ),
374403 _PkgSpec ("psycopg2-binary" ),
375404 _PkgSpec ("pgvector" )
376405 ]
377406 },
378407 VectorDatabase .ELASTICSEARCH : {
379408 "required" : ["ELASTICSEARCH_URL" ],
380409 "optional" : ["ELASTICSEARCH_API_KEY" ],
381- "packages" : [_PkgSpec ("elasticsearch" )]
410+ "packages" : [
411+ _PkgSpec ("elasticsearch" ),
412+ _PkgSpec ("langchain-elasticsearch" )
413+ ]
382414 },
383415 VectorDatabase .CHROMA : {
384416 "required" : [],
385417 "optional" : [],
386- "packages" : [_PkgSpec ("chromadb" )]
418+ "packages" : [
419+ _PkgSpec ("langchain-chroma" ),
420+ _PkgSpec ("chromadb" )
421+ ]
387422 },
388423 VectorDatabase .FAISS : {
389424 "required" : [],
0 commit comments