Bug fixing

Diegi97 · Diegi97 · commit 3313f102039f · 2025-02-16T10:32:38.000+01:00
diff --git a/back/back/apps/broker/serializers/messages/__init__.py b/back/back/apps/broker/serializers/messages/__init__.py
@@ -107,14 +107,14 @@ class Reference(serializers.Serializer):
     knowledge_base_id = serializers.CharField(required=False, allow_null=True, allow_blank=True)
 
 
-class ToolUse(serializers.Serializer):
+class ToolUsePayload(serializers.Serializer):
     id = serializers.CharField(required=True)
     name = serializers.CharField(required=True)
     args = serializers.JSONField(required=True)
     text = serializers.CharField(required=False, allow_null=True, allow_blank=True)
 
 
-class ToolResult(serializers.Serializer):
+class ToolResultPayload(serializers.Serializer):
     id = serializers.CharField(required=False, allow_null=True, allow_blank=True)
     name = serializers.CharField(required=False, allow_null=True, allow_blank=True)
     result = serializers.CharField(required=True)
@@ -124,19 +124,6 @@ class MessagePayload(serializers.Serializer):
     content = serializers.SerializerMethodField()
     references = Reference(required=False, allow_null=True)
 
-    @extend_schema_field(
-        PolymorphicProxySerializer(
-            component_name="MessageContent",
-            serializers=[
-                serializers.CharField,
-                ToolUse,
-                ToolResult
-            ],
-        )
-    )
-    def get_content(self, obj):
-        return obj
-
 
 class HTMLPayload(serializers.Serializer):
     @staticmethod
@@ -176,6 +163,8 @@ class QuickRepliesPayload(serializers.Serializer):
             "ImagePayload": ImagePayload,
             "SatisfactionPayload": SatisfactionPayload,
             "QuickRepliesPayload": QuickRepliesPayload,
+            "ToolUsePayload": ToolUsePayload,
+            "ToolResultPayload": ToolResultPayload,
         },
     )
 )
diff --git a/back/back/apps/language_model/consumers/__init__.py b/back/back/apps/language_model/consumers/__init__.py
@@ -25,8 +25,8 @@
 from back.config import settings
 from back.utils import WSStatusCodes
 from back.utils.custom_channels import CustomAsyncConsumer
-from chat_rag.llms.types import Content, Message, ToolResult, ToolUse
 from chat_rag.llms import load_llm
+from chat_rag.llms.types import Content, Message, ToolResult, ToolUse
 
 logger = getLogger(__name__)
 
@@ -59,29 +59,21 @@ def process_stack(stack) -> List[Content]:
         """
         contents = []
         payload = stack.get("payload", {})
+        type = stack.get("type")
 
         # Create a text content if available.
-        text = payload.get("content")
-        if text:
-            contents.append(Content(text=text, type="text"))
+        if type == "message":
+            contents.append(Content(text=payload.get("content"), type="text"))
 
         # Check if this stack represents a tool call (tool use).
-        if payload.get("tool_use"):
-            try:
-                tool_use_obj = ToolUse(**payload["tool_use"])
-                contents.append(Content(tool_use=tool_use_obj, type="tool_use"))
-            except Exception as e:
-                # If it fails to parse tool_use, we simply skip it.
-                pass
+        if type == "tool_use":
+            tool_use_obj = ToolUse(**payload)
+            contents.append(Content(tool_use=tool_use_obj, type="tool_use"))
 
         # Check if this stack represents a tool result.
-        if payload.get("tool_result"):
-            try:
-                tool_result_obj = ToolResult(**payload["tool_result"])
-                contents.append(Content(tool_result=tool_result_obj, type="tool_result"))
-            except Exception as e:
-                # If it fails to parse tool_result, we simply skip it.
-                pass
+        if type == "tool_result":
+            tool_result_obj = ToolResult(**payload)
+            contents.append(Content(tool_result=tool_result_obj, type="tool_result"))
 
         return contents
 
diff --git a/chat_rag/chat_rag/llms/format_tools.py b/chat_rag/chat_rag/llms/format_tools.py
@@ -135,6 +135,7 @@ def format_tools(
     tools_formatted = []
     if mode in {Mode.OPENAI_TOOLS, Mode.MISTRAL_TOOLS}:
         for tool in tools:
+            # As it is already in the openai format, we can just append it
             tools_formatted.append(tool)
 
     elif mode == Mode.ANTHROPIC_TOOLS:
diff --git a/chat_rag/chat_rag/llms/openai_client.py b/chat_rag/chat_rag/llms/openai_client.py
@@ -27,7 +27,6 @@ def _format_tools(self, tools: List[Union[Callable, Dict]], tool_choice: str = N
         tools_formatted = format_tools(tools, mode=Mode.OPENAI_TOOLS)
         tool_choice = self._check_tool_choice(tools_formatted, tool_choice)
 
-
         # If the tool_choice is a named tool, then apply correct formatting
         if tool_choice in [tool['title'] for tool in tools]:
             tool_choice = {
diff --git a/sdk/chatfaq_sdk/layers/__init__.py b/sdk/chatfaq_sdk/layers/__init__.py
@@ -356,14 +356,14 @@ def __init__(self, id: str = None, name: str = None, result: dict = None):
         super().__init__()
         self.id = id
         self.name = name
-        self.result = result
+        self.tool_result = result
 
     async def build_payloads(self, ctx, data):
         payload = {
             "payload": {
                 "id": self.id,
                 "name": self.name,
-                "result": self.result,
+                "result": self.tool_result,
             }
         }
         yield [payload], True
diff --git a/sdk/chatfaq_sdk/utils.py b/sdk/chatfaq_sdk/utils.py
@@ -1,24 +1,173 @@
-def convert_mml_to_llm_format(mml):
+import inspect
+from typing import Dict, List
+
+ROLES_MAP = {
+    "bot": "assistant",
+    "human": "user",
+}
+
+
+def convert_mml_to_llm_format(mml: List[Dict]) -> List[Dict]:
     """
     Converts the MML (Message Markup Language) format to the common LLM message format.
+    Analogous to format_msgs_chain_to_llm_context in back/back/apps/language_model/consumers/__init__.py
+
 
     :param mml: List of messages in MML format
     :return: List of messages in LLM format {'role': 'user', 'content': '...'}
     """
-    roles_map = {
-        "bot": "assistant",
-        "human": "user",
+    aggregated_messages = []
+    current_role = None  # "user" for human and "assistant" for bot
+    aggregated_contents = []  # list of Content objects for the current group
+
+    def process_stack(stack: Dict) -> List[Dict]:
+        """
+        Process a single stack item into a list of LLM message format.
+        """
+        contents = []
+        type = stack.get("type")
+
+        if type == "message":
+            contents.append(
+                {
+                    "type": "text",
+                    "text": stack["payload"]["content"],
+                }
+            )
+        elif type == "tool_use":
+            contents.append(
+                {
+                    "type": "tool_use",
+                    "tool_use": stack["payload"],
+                }
+            )
+        elif type == "tool_result":
+            contents.append(
+                {
+                    "type": "tool_result",
+                    "tool_result": stack["payload"],
+                }
+            )
+
+        return contents
+
+    def process_msg(msg: Dict) -> List[Dict]:
+        """
+        Process each broker message into a list of LLM message format by iterating over its stacks.
+        """
+        contents = []
+        for stack in msg.get("stack", []):
+            contents.extend(process_stack(stack))
+        return contents
+
+    def merge_contents(existing: List[Dict], new: List[Dict]) -> List[Dict]:
+        """
+        Merge two lists of LLM message format.
+        If the last element of the existing list and the first element of the new list are both text,
+        then they are concatenated.
+        """
+        if not existing:
+            return new
+        if not new:
+            return existing
+
+        merged = existing.copy()
+        if merged and new and merged[-1]["type"] == "text" and new[0]["type"] == "text":
+            merged[-1]["text"] = (
+                merged[-1]["text"].strip() + " " + new[0]["text"].strip()
+            )
+            merged.extend(new[1:])
+        else:
+            merged.extend(new)
+        return merged
+
+    for msg in mml:
+        role = ROLES_MAP[msg["sender"]["type"]]
+        msg_contents = process_msg(msg)
+        if not msg_contents:
+            continue
+
+        if current_role is None:
+            current_role = role
+            aggregated_contents = msg_contents
+        elif current_role == role:
+            aggregated_contents = merge_contents(aggregated_contents, msg_contents)
+        else:
+            aggregated_messages.append(
+                {
+                    "role": current_role,
+                    "content": aggregated_contents,
+                }
+            )
+            current_role = role
+            aggregated_contents = msg_contents
+
+    if aggregated_contents:
+        aggregated_messages.append(
+            {
+                "role": current_role,
+                "content": aggregated_contents,
+            }
+        )
+
+    return aggregated_messages
+
+
+def function_to_json(func) -> dict:
+    """
+    Converts a Python function into a JSON-serializable dictionary
+    that describes the function's signature, including its name,
+    description, and parameters.
+    Function from https://github.com/openai/swarm
+
+    Args:
+        func: The function to be converted.
+
+    Returns:
+        A dictionary representing the function's signature in JSON format.
+    """
+    type_map = {
+        str: "string",
+        int: "integer",
+        float: "number",
+        bool: "boolean",
+        list: "array",
+        dict: "object",
+        type(None): "null",
+    }
+
+    try:
+        signature = inspect.signature(func)
+    except ValueError as e:
+        raise ValueError(
+            f"Failed to get signature for function {func.__name__}: {str(e)}"
+        )
+
+    parameters = {}
+    for param in signature.parameters.values():
+        try:
+            param_type = type_map.get(param.annotation, "string")
+        except KeyError as e:
+            raise KeyError(
+                f"Unknown type annotation {param.annotation} for parameter {param.name}: {str(e)}"
+            )
+        parameters[param.name] = {"type": param_type}
+
+    required = [
+        param.name
+        for param in signature.parameters.values()
+        if param.default == inspect._empty
+    ]
+
+    return {
+        "type": "function",
+        "function": {
+            "name": func.__name__,
+            "description": func.__doc__ or "",
+            "parameters": {
+                "type": "object",
+                "properties": parameters,
+                "required": required,
+            },
+        },
     }
-    messages = []
-
-    for message in mml:
-        for stack in message.get("stack", []):
-            content = stack["payload"].get("content")
-            if not content:
-                continue
-            messages.append({
-                "role": roles_map[message["sender"]["type"]],
-                "content": content,
-            })
-
-    return messages