Skip to content

Commit a772205

Browse files
Merge pull request #69 from Android-PowerUser/fix-compilation-errors-in-mainactivity.kt-esbisz
Improve Mistral rate-limit handling and add cooldown probe scripts
2 parents 8fe297e + fe8b023 commit a772205

1 file changed

Lines changed: 28 additions & 4 deletions

File tree

app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ import com.google.ai.sample.webrtc.WebRTCSender
7171
import com.google.ai.sample.webrtc.SignalingClient
7272
import org.webrtc.IceCandidate
7373
import kotlin.math.max
74+
import kotlin.math.roundToLong
7475

7576
class PhotoReasoningViewModel(
7677
application: Application,
@@ -183,11 +184,11 @@ class PhotoReasoningViewModel(
183184
// to avoid re-executing already-executed commands
184185
private var incrementalCommandCount = 0
185186

186-
// Mistral rate limiting per API key (4 seconds between requests with same key)
187+
// Mistral rate limiting per API key (1.5 seconds between requests with same key)
187188
private val mistralNextAllowedRequestAtMsByKey = mutableMapOf<String, Long>()
188189
private var lastMistralTokenTimeMs = 0L
189190
private var lastMistralTokenKey: String? = null
190-
private val MISTRAL_MIN_INTERVAL_MS = 4000L
191+
private val MISTRAL_MIN_INTERVAL_MS = 1500L
191192

192193
// Accumulated full text during streaming for incremental command parsing
193194
private var streamingAccumulatedText = StringBuilder()
@@ -1140,11 +1141,31 @@ private fun reasonWithMistral(
11401141
mistralNextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt)
11411142
}
11421143

1144+
fun markKeyCooldown(key: String, referenceTimeMs: Long, extraDelayMs: Long) {
1145+
val normalizedExtraDelay = extraDelayMs.coerceAtLeast(0L)
1146+
val nextAllowedAt = referenceTimeMs + max(MISTRAL_MIN_INTERVAL_MS, normalizedExtraDelay)
1147+
val existing = mistralNextAllowedRequestAtMsByKey[key] ?: 0L
1148+
mistralNextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt)
1149+
}
1150+
11431151
fun remainingWaitForKeyMs(key: String, nowMs: Long): Long {
11441152
val nextAllowedAt = mistralNextAllowedRequestAtMsByKey[key] ?: 0L
11451153
return (nextAllowedAt - nowMs).coerceAtLeast(0L)
11461154
}
11471155

1156+
fun parseRetryAfterMs(headerValue: String?): Long? {
1157+
if (headerValue.isNullOrBlank()) return null
1158+
val seconds = headerValue.trim().toDoubleOrNull() ?: return null
1159+
return (seconds * 1000.0).roundToLong().coerceAtLeast(0L)
1160+
}
1161+
1162+
fun parseRateLimitResetDelayMs(response: okhttp3.Response, nowMs: Long): Long? {
1163+
val resetHeader = response.header("x-ratelimit-reset") ?: return null
1164+
val resetEpochSeconds = resetHeader.trim().toLongOrNull() ?: return null
1165+
val resetMs = resetEpochSeconds * 1000L
1166+
return (resetMs - nowMs).coerceAtLeast(0L)
1167+
}
1168+
11481169
fun isRetryableMistralFailure(code: Int): Boolean {
11491170
return code == 429 || code >= 500
11501171
}
@@ -1176,7 +1197,10 @@ private fun reasonWithMistral(
11761197
try {
11771198
val attemptResponse = client.newCall(buildRequest(selectedKey)).execute()
11781199
val requestEndMs = System.currentTimeMillis()
1179-
markKeyCooldown(selectedKey, requestEndMs)
1200+
val retryAfterMs = parseRetryAfterMs(attemptResponse.header("Retry-After"))
1201+
val resetDelayMs = parseRateLimitResetDelayMs(attemptResponse, requestEndMs)
1202+
val serverRequestedDelayMs = max(retryAfterMs ?: 0L, resetDelayMs ?: 0L)
1203+
markKeyCooldown(selectedKey, requestEndMs, serverRequestedDelayMs)
11801204

11811205
if (attemptResponse.isSuccessful) {
11821206
response = attemptResponse
@@ -1195,7 +1219,7 @@ private fun reasonWithMistral(
11951219
consecutiveFailures++
11961220
withContext(Dispatchers.Main) {
11971221
replaceAiMessageText(
1198-
"Mistral temporär nicht verfügbar (Versuch $consecutiveFailures/$maxAttempts). Wiederhole...",
1222+
"Mistral temporär nicht verfügbar (Versuch $consecutiveFailures/$maxAttempts). Warte auf Server-Rate-Limit und wiederhole...",
11991223
isPending = true
12001224
)
12011225
}

0 commit comments

Comments
 (0)