ChatFAQ
diff --git a/‎BOOK_OF_FRUSTRATIONS.md‎
Lines changed: 1 addition & 0 deletions b/‎BOOK_OF_FRUSTRATIONS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎back/poetry.lock‎
Lines changed: 579 additions & 548 deletions b/‎back/poetry.lock‎
Lines changed: 579 additions & 548 deletions
diff --git a/‎back/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎back/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎chat_rag/chat_rag/data/splitters.py‎
Lines changed: 1 addition & 1 deletion b/‎chat_rag/chat_rag/data/splitters.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎chat_rag/chat_rag/llms/claude_client.py‎
Lines changed: 20 additions & 26 deletions b/‎chat_rag/chat_rag/llms/claude_client.py‎
Lines changed: 20 additions & 26 deletions
@@ -10,3 +10,4 @@ This file is intended for writing down all those TODOs/tech. deb. you know it ne
 - [ ] RAGs should not be loaded in shared celery memory, but in another service which celery calls.
 - [ ] RAG need to be deleted and created if the retriever type wants to be changed. This deletion and creation shouldn't be needed and the RAG should be able to handle this change. 
 - [ ] Search correct faiss-gpu version and pin it.
+- [ ] Default PDF parser doesn't handle images yet.
@@ -33,7 +33,7 @@ pgvector = "^0.2.3"
 django-filter = "^23.2"
 django-storages = "*"
 sqlalchemy = "^2.0.16"
-chat-rag = {version = "0.1.54"}
+chat-rag = {version = "0.1.56"}
 gevent = "23.9.0"
 torch = [
     { version = "^2.0.1", source = "torch" },
 
@@ -241,7 +241,7 @@ class SmartSplitter:
     Splits the text into information meaningful chunks using the GPT-4 model.
     This can reach API rate limits very quickly.
     """
-    def __init__(self, model_name='gpt-4'):
+    def __init__(self, model_name='gpt-4-0125-preview'):
         """
         Parameters
         ----------
 
@@ -1,21 +1,20 @@
 from typing import List, Dict
 import os
 
-from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+from anthropic import Anthropic
 
 from chat_rag.llms import RAGLLM, CONTEXT_PREFIX
 
 
 class ClaudeChatModel(RAGLLM):
     def __init__(self, llm_name, **kwargs) -> None:
         self.llm_name = llm_name
-        self.anthropic = Anthropic(
-            api_key=os.environ["ANTHROPIC_API_KEY"],
+        self.client = Anthropic(
+            api_key=os.environ.get("ANTHROPIC_API_KEY"),
         )
 
     def format_prompt(
         self,
-        messages: List[Dict[str, str]],
         contexts: List[str],
         system_prefix: str,
         n_contexts_to_use: int = 3,
@@ -26,8 +25,6 @@ def format_prompt(
         Formats the prompt to be used by the model.
         Parameters
         ----------
-        messages : List[Tuple[str, str]]
-            The messages to use for the prompt. Pair of (role, message).
         contexts : list
             The context to use.
         system_prefix : str
@@ -49,20 +46,14 @@ def format_prompt(
         list
             The formatted prompt.
         """
-        prompt = self.format_system_prompt(
+        system_prompt = self.format_system_prompt(
             contexts=contexts,
             system_prefix=system_prefix,
             n_contexts_to_use=n_contexts_to_use,
             lang=lang,
         )
 
-        for message in messages:
-            if message['role'] == 'user':
-                prompt += f"{HUMAN_PROMPT} {message['content']}{AI_PROMPT}"
-            elif message['role'] == 'assistant':
-                prompt += " " + message['content']
-
-        return prompt
+        return system_prompt
 
     def generate(
         self,
@@ -93,18 +84,19 @@ def generate(
             The generated text.
         """
 
-        prompt = self.format_prompt(messages, contexts, **prompt_structure_dict, lang=lang)
+        system_prompt = self.format_prompt(contexts, **prompt_structure_dict, lang=lang)
 
-        completion = self.anthropic.completions.create(
+        message = self.client.messages.create(
             model=self.llm_name,
-            max_tokens_to_sample=generation_config_dict['max_new_tokens'],
+            system=system_prompt,
+            messages=messages,
+            max_tokens=generation_config_dict['max_new_tokens'],
             temperature=generation_config_dict['temperature'],
             top_p=generation_config_dict['top_p'],
             top_k=generation_config_dict['top_k'],
-            prompt=prompt,
         )
 
-        return completion.completion
+        return message.content[0].text
 
     def stream(
         self,
@@ -114,7 +106,7 @@ def stream(
         generation_config_dict: dict = None,
         lang: str = "en",
         **kwargs,
-    ) -> str:
+    ):
         """
         Generate text from a prompt using the model.
         Parameters
@@ -135,20 +127,22 @@ def stream(
             The generated text.
         """
 
-        prompt = self.format_prompt(messages, contexts, **prompt_structure_dict, lang=lang)
+        system_prompt = self.format_prompt(contexts, **prompt_structure_dict, lang=lang)
 
-        stream = self.anthropic.completions.create(
+        stream = self.client.messages.create(
             model=self.llm_name,
-            max_tokens_to_sample=generation_config_dict['max_new_tokens'],
+            system=system_prompt,
+            messages=messages,
+            max_tokens=generation_config_dict['max_new_tokens'],
             temperature=generation_config_dict['temperature'],
             top_p=generation_config_dict['top_p'],
             top_k=generation_config_dict['top_k'],
-            prompt=prompt,
             stream=True,
         )
 
-        for completion in stream:
-            yield completion.completion
+        for event in stream:
+            if event.type == "content_block_delta":
+                yield event.delta.text