Enhance error handling and add cache configuration support in LLM request flow

Diegi97 · Diegi97 · commit 9a097331f661 · 2025-02-20T17:51:03.000+01:00
diff --git a/back/back/apps/broker/serializers/rpc.py b/back/back/apps/broker/serializers/rpc.py
@@ -128,6 +128,7 @@ class RPCLLMRequestSerializer(serializers.Serializer):
     stream = serializers.BooleanField(default=False)
     use_conversation_context = serializers.BooleanField(default=True)
     response_schema = serializers.JSONField(default=dict, required=False, allow_null=True)
+    cache_config = CacheConfigSerializer(required=False, allow_null=True)
     
     def validate(self, attrs):
         if not attrs.get("messages") and not attrs.get("use_conversation_context"):
diff --git a/back/back/apps/language_model/consumers/__init__.py b/back/back/apps/language_model/consumers/__init__.py
@@ -1,7 +1,7 @@
 import json
 import uuid
 from logging import getLogger
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Callable, Awaitable
 
 from channels.db import database_sync_to_async
 from channels.generic.websocket import AsyncJsonWebsocketConsumer
@@ -210,16 +210,19 @@ async def query_llm(
     cache_config: Optional[Dict] = None,
     response_schema: Optional[Dict] = None,
     stream: bool = False,
+    error_handler: Callable[[dict], Awaitable[None]] = None,
 ):
     try:
         llm_config = await database_sync_to_async(LLMConfig.enabled_objects.get)(
             name=llm_config_name
         )
     except LLMConfig.DoesNotExist:
-        yield {
-            "content": [{"type": "text", "text": f"LLM config with name: {llm_config_name} does not exist."}],
-            "last_chunk": True,
-        }
+        await error_handler({
+            "payload": {
+                "errors": f"LLM config with name: {llm_config_name} does not exist.",
+                "request_info": {"llm_config_name": llm_config_name},
+            }
+        })
         return
 
     conv = await database_sync_to_async(Conversation.objects.get)(pk=conversation_id)
@@ -238,25 +241,24 @@ async def query_llm(
                 # pop the system message
                 messages = messages[1:]
         elif not prev_messages:
-            yield {
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "Error: No previous messages and no messages provided.",
-                    }
-                ],
-                "last_chunk": True,
-            }
+            await error_handler({
+                "payload": {
+                    "errors": "Error: No previous messages and no messages provided.",
+                    "request_info": {"conversation_id": conversation_id},
+                }
+            })
             return
         if messages:
             new_messages.extend(messages)
     else:
         new_messages = messages
         if new_messages is None:
-            yield {
-                "content": [{"type": "text", "text": "Error: No messages provided."}],
-                "last_chunk": True,
-            }
+            await error_handler({
+                "payload": {
+                    "errors": "Error: No messages provided.",
+                    "request_info": {"conversation_id": conversation_id},
+                }
+            })
             return
 
     try:
@@ -323,10 +325,12 @@ async def query_llm(
 
     except Exception as e:
         logger.error("Error during LLM query", exc_info=e)
-        yield {
-            "content": [{"type": "text", "text": "There was an error generating the response. Please try again or contact the administrator."}],
-            "last_chunk": True,
-        }
+        await error_handler({
+            "payload": {
+                "errors": "There was an error generating the response. Please try again or contact the administrator.",
+                "request_info": {"conversation_id": conversation_id},
+            }
+        })
         return
 
 
@@ -425,6 +429,7 @@ async def process_llm_request(self, data):
             data.get("cache_config"),
             data.get("response_schema"),
             data.get("stream"),
+            error_handler=self.error_response,
         ):
             await self.send(
                 json.dumps(
diff --git a/chat_rag/chat_rag/llms/gemini_client.py b/chat_rag/chat_rag/llms/gemini_client.py
@@ -402,7 +402,7 @@ async def astream(
 
             if name:  # If the cache exists, use it
                 config_kwargs["cached_content"] = name
-                response = self.client.aio.models.generate_content_stream(
+                response = await self.client.aio.models.generate_content_stream(
                     model=self.llm_name,
                     contents=contents,
                     config=GenerateContentConfig(**config_kwargs),
@@ -413,7 +413,7 @@ async def astream(
                     cached_messages, system_prompt, cache_config
                 )
                 config_kwargs["cached_content"] = cache_name
-                response = self.client.aio.models.generate_content_stream(
+                response = await self.client.aio.models.generate_content_stream(
                     model=self.llm_name,
                     contents=contents,
                     config=GenerateContentConfig(**config_kwargs),
@@ -422,7 +422,7 @@ async def astream(
             # Add system prompt to the config
             config_kwargs["system_instruction"] = system_prompt
             # No caching needed
-            response = self.client.aio.models.generate_content_stream(
+            response = await self.client.aio.models.generate_content_stream(
                 model=self.llm_name,
                 contents=contents,
                 config=GenerateContentConfig(**config_kwargs),
diff --git a/sdk/examples/full_kb_rag_example/fsm_definition.py b/sdk/examples/full_kb_rag_example/fsm_definition.py
@@ -13,7 +13,7 @@
 
 
 async def send_greeting(sdk: ChatFAQSDK, ctx: dict):
-    yield Message("How can I help you today?", allow_feedback=False)
+    yield Message("How can I help you today?")
 
 
 async def send_rag_answer(sdk: ChatFAQSDK, ctx: dict):