Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ class PhotoReasoningViewModel(
// to avoid re-executing already-executed commands
private var incrementalCommandCount = 0

// Mistral rate limiting per API key (1.1 seconds between requests with same key)
// Mistral rate limiting per API key (1.5 seconds between requests with same key)
private val mistralNextAllowedRequestAtMsByKey = mutableMapOf<String, Long>()
private var lastMistralTokenTimeMs = 0L
private var lastMistralTokenKey: String? = null
private val MISTRAL_MIN_INTERVAL_MS = 1100L
private val MISTRAL_MIN_INTERVAL_MS = 1500L

// Accumulated full text during streaming for incremental command parsing
private var streamingAccumulatedText = StringBuilder()
Expand Down Expand Up @@ -609,6 +609,7 @@ class PhotoReasoningViewModel(
val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel()

clearStaleErrorState()
stopExecutionFlag.set(false)

// Check for Human Expert model
if (currentModel == ModelOption.HUMAN_EXPERT) {
Expand Down Expand Up @@ -1203,28 +1204,28 @@ private fun reasonWithMistral(
markKeyCooldown(selectedKey, requestEndMs)
blockedKeysThisRound.add(selectedKey)
consecutiveFailures++
if (consecutiveFailures >= 5) {
throw IOException("Mistral request failed after 5 attempts: ${e.message}", e)
if (consecutiveFailures >= maxAttempts) {
throw IOException("Mistral request failed after $maxAttempts attempts: ${e.message}", e)
}
withContext(Dispatchers.Main) {
replaceAiMessageText(
if (consecutiveFailures >= maxAttempts) {
throw IOException("Mistral request failed after $maxAttempts attempts: ${e.message}", e)
"Mistral Netzwerkfehler (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...",
isPending = true
)
}
}
"Mistral Netzwerkfehler (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...",
}

if (stopExecutionFlag.get()) {
throw IOException("Mistral request aborted.")
}

val finalResponse = response ?: throw IOException("Mistral request failed after 5 attempts.")
val finalResponse = response ?: throw IOException("Mistral request failed after $maxAttempts attempts.")

if (!finalResponse.isSuccessful) {
val errBody = finalResponse.body?.string()
finalResponse.close()
val finalResponse = response ?: throw IOException("Mistral request failed after $maxAttempts attempts.")
throw IOException("Mistral Error ${finalResponse.code}: $errBody")
}

val body = finalResponse.body ?: throw IOException("Empty response body from Mistral")
Expand Down
172 changes: 172 additions & 0 deletions scripts/mistral_cooldown_probe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/env python3
import json
import subprocess
import time
from typing import Tuple, List

MISTRAL_API_KEY = "zsEegAJFadHH4uooe2lW0HVNmy1rpqGT"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛑 Security Vulnerability: Remove hardcoded API key before merge. Hardcoded credentials in source code create security risks and can lead to unauthorized access if the repository is compromised1.

Replace with environment variable or configuration file that's excluded from version control.

MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY", "")

Footnotes

  1. CWE-798: Use of Hard-coded Credentials - https://cwe.mitre.org/data/definitions/798.html

MISTRAL_MODEL = "mistral-large-latest"
MISTRAL_ENDPOINT = "https://api.mistral.ai/v1/chat/completions"


def now_ms() -> int:
return int(time.time() * 1000)


def curl_chat(payload: dict, stream: bool) -> Tuple[int, int, int]:
"""
Returns: (http_code, request_started_ms, last_token_ms_or_response_end_ms)
For non-stream requests, 3rd value is response-end timestamp.
"""
request_started = now_ms()
cmd = [
"curl",
"-sS",
"-X",
"POST",
MISTRAL_ENDPOINT,
"-H",
"Content-Type: application/json",
"-H",
f"Authorization: Bearer {MISTRAL_API_KEY}",
"--data-binary",
json.dumps(payload),
"-w",
"\nHTTP_STATUS:%{http_code}\n",
]
if stream:
cmd.insert(1, "-N")

proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)

last_token_ms = request_started
http_code = 0
assert proc.stdout is not None
for line in proc.stdout:
line = line.rstrip("\n")
if line.startswith("data:"):
data = line[5:].strip()
if data and data != "[DONE]":
last_token_ms = now_ms()
elif line.startswith("HTTP_STATUS:"):
try:
http_code = int(line.split(":", 1)[1].strip())
except ValueError:
http_code = 0

exit_code = proc.wait()
if exit_code != 0:
raise RuntimeError(f"curl failed with exit code {exit_code}")

if not stream:
last_token_ms = now_ms()
return http_code, request_started, last_token_ms


def sleep_until(target_ms: int) -> None:
remaining = target_ms - now_ms()
if remaining > 0:
time.sleep(remaining / 1000.0)


def probe_last_token_mode(delays: List[int]) -> None:
print("=== PROBE: ab_letztem_token ===")
min_success = None
for delay in delays:
stream_payload = {
"model": MISTRAL_MODEL,
"messages": [{"role": "user", "content": "Sag nur OK."}],
"max_tokens": 32,
"stream": True,
}
code, _, last_token = curl_chat(stream_payload, stream=True)
if code != 200:
print(f"baseline_stream_failed http={code}")
continue

sleep_until(last_token + delay)
probe_payload = {
"model": MISTRAL_MODEL,
"messages": [{"role": "user", "content": "OK?"}],
"max_tokens": 1,
"stream": False,
}
probe_code, _, _ = curl_chat(probe_payload, stream=False)
print(f"delay={delay}ms http={probe_code}")
if min_success is None and probe_code == 200:
min_success = delay
print(f"min_success_delay_ms={min_success}")
print()


def probe_request_start_mode(delays: List[int]) -> None:
print("=== PROBE: ab_request_start ===")
min_success = None
for delay in delays:
baseline_payload = {
"model": MISTRAL_MODEL,
"messages": [{"role": "user", "content": "Sag nur OK."}],
"max_tokens": 32,
"stream": True,
}
request_started = now_ms()
baseline_cmd = [
"curl",
"-sS",
"-N",
"-X",
"POST",
MISTRAL_ENDPOINT,
"-H",
"Content-Type: application/json",
"-H",
f"Authorization: Bearer {MISTRAL_API_KEY}",
"--data-binary",
json.dumps(baseline_payload),
"-w",
"\nHTTP_STATUS:%{http_code}\n",
]
baseline_proc = subprocess.Popen(
baseline_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)

sleep_until(request_started + delay)
probe_payload = {
"model": MISTRAL_MODEL,
"messages": [{"role": "user", "content": "OK?"}],
"max_tokens": 1,
"stream": False,
}
probe_code, _, _ = curl_chat(probe_payload, stream=False)
print(f"delay={delay}ms http={probe_code}")
if min_success is None and probe_code == 200:
min_success = delay

baseline_output, _ = baseline_proc.communicate()
baseline_status = 0
for line in baseline_output.splitlines():
if line.startswith("HTTP_STATUS:"):
try:
baseline_status = int(line.split(":", 1)[1].strip())
except ValueError:
baseline_status = 0
if baseline_status != 200:
print(f"baseline_stream_failed http={baseline_status}")
print(f"min_success_delay_ms={min_success}")
print()


if __name__ == "__main__":
step_delays = list(range(100, 3001, 100))
probe_last_token_mode(step_delays)
probe_request_start_mode(step_delays)
4 changes: 4 additions & 0 deletions scripts/mistral_cooldown_probe.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec python3 "$SCRIPT_DIR/mistral_cooldown_probe.py"