Skip to content

Commit 37d3249

Browse files
authored
Merge pull request #172 from WithAgency/develop
Security and operationnals improvements
2 parents 5a6e7cc + 22348f4 commit 37d3249

File tree

24 files changed

+688
-85
lines changed

24 files changed

+688
-85
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,4 +250,5 @@ back/models/
250250
*.ipynb
251251
chat_rag/data/
252252
chat_rag/examples/
253-
*/.ragatouille/*
253+
*/.ragatouille/*
254+
/back/dump.rdb

Dogefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,15 @@ workers:
113113
envs: [ sdk ]
114114
instance_size: XS
115115

116-
# - name: ray
117-
# image:
118-
# host: docker.io
119-
# registry: chatfaq
120-
# repository: back-chatrag
121-
# tag: "develop"
122-
# envs: [ django, ray_worker ]
123-
# instance_size: L
124-
# command: /.venv/bin/ray start --address=back:6375 --resources='{"tasks":100,"ai_components":100}' --block
116+
- name: ray
117+
image:
118+
host: docker.io
119+
registry: chatfaq
120+
repository: back-chatrag
121+
tag: "develop"
122+
envs: [ django, ray_worker ]
123+
instance_size: XXL
124+
command: /.venv/bin/ray start --address=back:6375 --resources='{"tasks":100,"ai_components":100}' --block
125125

126126
jobs:
127127
- name: migrate

admin/nuxt.config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ export default envManager((env) => {
2323
});
2424
const viteNuxtConfig = defineNuxtConfig({
2525
ssr: true,
26+
devServer: {
27+
port: process.env.NUXT_PORT || 3000,
28+
},
2629
css: ["@/assets/styles/global.scss"],
2730
buildModules: [],
2831
modules: [...config.modules, "@pinia/nuxt", "@element-plus/nuxt"],

back/back/apps/broker/consumers/bots/custom_ws.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ async def gather_fsm_def(self):
2727
return fsm, None if fsm else f"`No FSM found with name {name}`"
2828

2929
async def gather_user_id(self):
30-
return self.scope["url_route"]["kwargs"]["sender_id"]
30+
# If user is authenticated, use their sender_uuid
31+
if self.scope.get("user") and self.scope["user"].is_authenticated:
32+
return str(self.scope["user"].sender_uuid)
33+
# Otherwise, fall back to URL parameter
34+
return self.scope["url_route"]["kwargs"].get("sender_id")
3135

3236
async def gather_initial_conversation_metadata(self):
3337
params = parse_qs(self.scope["query_string"])

back/back/apps/broker/serializers/messages/custom_ws.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,16 @@ def to_mml(self, ctx: BotConsumer) -> Union[bool, "Message"]:
2929
if not self.is_valid():
3030
return False
3131

32+
sender_data = self.data["sender"].copy()
33+
34+
# If user is authenticated, use their sender_uuid as the sender ID
35+
if ctx.scope.get("user") and ctx.scope["user"].is_authenticated:
36+
sender_data["id"] = str(ctx.scope["user"].sender_uuid)
37+
3238
s = MessageSerializer(
3339
data={
3440
"stack": self.data["stack"],
35-
"sender": self.data["sender"],
41+
"sender": sender_data,
3642
"send_time": int(time.time() * 1000),
3743
"conversation": ctx.conversation.pk,
3844
}

back/back/apps/broker/views/__init__.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,19 @@ def instance_permissions(self, request):
100100

101101
@action(methods=("get",), detail=False, permission_classes=[AllowAny])
102102
def from_sender(self, request, *args, **kwargs):
103-
if not request.query_params.get("sender"):
104-
return JsonResponse(
105-
{"error": "sender is required"},
106-
status=400,
107-
)
103+
# Use authenticated user's sender_uuid if available, otherwise fall back to query param
104+
if request.user.is_authenticated:
105+
sender_id = str(request.user.sender_uuid)
106+
else:
107+
sender_id = request.query_params.get("sender")
108+
if not sender_id:
109+
return JsonResponse(
110+
{"error": "sender is required"},
111+
status=400,
112+
)
113+
108114
results = []
109-
for c in Conversation.conversations_from_sender(request.query_params.get("sender")):
115+
for c in Conversation.conversations_from_sender(sender_id):
110116
if error := self._instance_permissions(c, request):
111117
return error
112118
results.append(ConversationSerializer(c).data)

back/back/apps/health/checks.py

Lines changed: 157 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,23 @@
1-
from collections.abc import Mapping, Sequence
1+
import contextlib
2+
import io
3+
import logging
4+
from typing import Mapping, Sequence
25

36
from asgiref.sync import async_to_sync
47
from django.conf import settings
58
from health_check.cache.backends import CacheBackend
6-
from health_check.contrib.psutil.backends import MemoryUsage
79
from health_check.db.backends import (
10+
BaseHealthCheckBackend,
811
DatabaseBackend,
912
ServiceUnavailable,
10-
BaseHealthCheckBackend,
1113
)
1214

1315
from .base import DjangoHealthCheckWrapper, HealthCheck, Outcome, Status
1416
from .models import Event
1517

18+
# Get a logger instance
19+
logger = logging.getLogger(__name__)
20+
1621

1722
def disp_window(window: Mapping[str, int]) -> str:
1823
"""
@@ -104,41 +109,6 @@ def get_name(self) -> str:
104109
return "Database"
105110

106111

107-
class RamUsage(DjangoHealthCheckWrapper):
108-
"""
109-
Checks that we don't use too much RAM
110-
"""
111-
112-
base_class = MemoryUsage
113-
114-
def get_name(self) -> str:
115-
return "RAM Usage"
116-
117-
def get_resolving_actions(self, outcome: Outcome) -> str:
118-
return """# __CODE__ — RAM usage is too high
119-
120-
The memory usage in the container running the application is too high.
121-
122-
## Possible causes
123-
124-
- There is a memory leak in the application
125-
- The application just needs more RAM
126-
127-
## Possible solutions
128-
129-
- Short term, restart the container
130-
- Long term, identify if this issue comes from a leak (in which case you can
131-
fix the leak) or if the application just needs more RAM (in which case you
132-
can increase the RAM allocated to the container)
133-
"""
134-
135-
def suggest_reboot(self, outcome: Outcome) -> Sequence[str]:
136-
return ["api"]
137-
138-
139-
# :: IF api__redis
140-
141-
142112
class Cache(DjangoHealthCheckWrapper):
143113
"""
144114
Validates cache accessibility. Since the queue is also the cache, it will
@@ -172,9 +142,6 @@ def suggest_reboot(self, outcome: Outcome) -> Sequence[str]:
172142
return ["redis"]
173143

174144

175-
# :: ENDIF
176-
177-
178145
class ProcrastinateBuiltInHealthCheck(BaseHealthCheckBackend):
179146
"""
180147
Health check for Procrastinate task processor.
@@ -201,8 +168,8 @@ def check_status(self):
201168
Use the built-in healthchecks to check if the Procrastinate app is
202169
working.
203170
"""
204-
from procrastinate.contrib.django.healthchecks import healthchecks
205171
from procrastinate import exceptions
172+
from procrastinate.contrib.django.healthchecks import healthchecks
206173

207174
try:
208175
async_to_sync(healthchecks)(app=self.app)
@@ -252,3 +219,151 @@ def get_resolving_actions(self, outcome: Outcome) -> str:
252219

253220
def suggest_reboot(self, outcome: Outcome) -> Sequence[str]:
254221
return ["procrastinate_worker"]
222+
223+
224+
class ModuleSimulationBase(HealthCheck):
225+
"""
226+
Base class for module simulation health checks.
227+
Checks the status by looking at the results of the last periodic task run.
228+
"""
229+
230+
MODULE_NUMBER = None
231+
MODULE_NAME = None
232+
WINDOW = dict(hours=7)
233+
234+
def get_name(self) -> str:
235+
return f"{self.MODULE_NAME} Simulation"
236+
237+
238+
def get_status(self) -> Outcome:
239+
"""
240+
Checks the status of the module simulation based on the latest event
241+
recorded by the periodic Procrastinate task.
242+
"""
243+
event_type = f"module_{self.MODULE_NUMBER}_simulation"
244+
stats = Event.objects.type(event_type).within(**self.WINDOW).stats()
245+
stats_str = disp_stats(stats)
246+
247+
if stats["total"] == 0:
248+
# No events found, means the task likely didn't run
249+
outcome = dict(
250+
status=Status.ERROR,
251+
message=f"No simulation task events found in the last {disp_window(self.WINDOW)}",
252+
)
253+
elif stats["failure"]:
254+
outcome = dict(
255+
status=Status.ERROR,
256+
message=f"{stats_str} in the last {disp_window(self.WINDOW)}",
257+
)
258+
else:
259+
outcome = dict(
260+
status=Status.OK,
261+
message=f"{stats_str} in the last {disp_window(self.WINDOW)}",
262+
)
263+
264+
return Outcome(
265+
instance=self,
266+
**outcome,
267+
)
268+
269+
def get_resolving_actions(self, outcome: Outcome) -> str:
270+
# Adjust the explanation slightly
271+
return f"""# __CODE__ — {self.MODULE_NAME} Simulation Task Failed or Delayed
272+
273+
This check verifies the status of the last background task run for the {self.MODULE_NAME} simulation.
274+
The background task simulates a file generation via WebSocket to verify:
275+
- The WebSocket server is reachable.
276+
- The FSM works correctly.
277+
- The FastAPI modules server is reachable.
278+
- The file generation LLM is reachable.
279+
- The file storage is reachable.
280+
281+
## Possible Causes for ERROR/WARNING:
282+
283+
- **Network Connectivity:** Issues connecting to the WebSocket server, module server, LLM, or storage.
284+
- **Base File Missing:** The required input file (`health_check_files/...`) might be missing from storage.
285+
- **Module/FSM Logic Error:** An error within the specific module's logic or the FSM definition.
286+
- **Resource Exhaustion:** The simulation task might be timing out due to resource limits (CPU, RAM).
287+
- The LLM API keys might be invalid or the LLM provider is down.
288+
"""
289+
290+
def suggest_reboot(self, outcome: Outcome) -> Sequence[str]:
291+
return ["fsm", "module server"]
292+
293+
294+
class Module1Simulation(ModuleSimulationBase):
295+
"""
296+
Simulates a file generation with module 1 of the chatbot to check if
297+
WebSocket connection, message processing and file generation are working correctly.
298+
"""
299+
300+
MODULE_NUMBER = 1
301+
MODULE_NAME = "Info2ArticleXia"
302+
303+
304+
class Module2Simulation(ModuleSimulationBase):
305+
"""
306+
Simulates a file generation with module 2 of the chatbot to check if
307+
WebSocket connection, message processing and file generation are working correctly.
308+
"""
309+
310+
MODULE_NUMBER = 2
311+
MODULE_NAME = "TopicsIndexGenXia"
312+
313+
314+
class Module3Simulation(ModuleSimulationBase):
315+
"""
316+
Simulates a file generation with module 3 of the chatbot to check if
317+
WebSocket connection, message processing and file generation are working correctly.
318+
"""
319+
320+
MODULE_NUMBER = 3
321+
MODULE_NAME = "ColAgreeSumXia"
322+
323+
324+
class LLMCheck(HealthCheck):
325+
"""
326+
Validates that the enabled LLM are working correctly.
327+
"""
328+
329+
WINDOW = dict(hours=1)
330+
331+
def get_name(self) -> str:
332+
return "LLM Check"
333+
334+
def get_status(self) -> Outcome:
335+
events = Event.objects.types(["llm_call_complete", "llm_call_start"]).within(
336+
**self.WINDOW
337+
)
338+
stats = events.stats()
339+
stats_str = disp_stats(stats)
340+
341+
if stats["failure"]:
342+
errors = [e.data for e in events.filter(is_success=False)]
343+
return Outcome(
344+
instance=self,
345+
status=Status.ERROR,
346+
message=f"{stats_str} in the last {disp_window(self.WINDOW)}",
347+
extra={"errors": errors},
348+
)
349+
else:
350+
return Outcome(
351+
instance=self,
352+
status=Status.OK,
353+
message=f"{stats_str} in the last {disp_window(self.WINDOW)}",
354+
)
355+
356+
def get_resolving_actions(self, outcome: Outcome) -> str:
357+
return """# __CODE__ — LLM failed
358+
359+
This check validates that the enabled LLM are working correctly.
360+
361+
## Possible causes
362+
363+
- The API key is invalid.
364+
- The defined endpoint url is invalid.
365+
- The model provider is down.
366+
"""
367+
368+
def suggest_reboot(self, outcome: Outcome) -> Sequence[str]:
369+
return []

back/back/apps/health/migrations/0001_initial.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
# Generated by Django 5.1.6 on 2025-02-21 23:49
1+
# Generated by Django 4.1.7 on 2023-12-12 14:17
22

33
from django.db import migrations, models
44

55

66
class Migration(migrations.Migration):
7-
87
initial = True
98

109
dependencies = []

0 commit comments

Comments
 (0)