@@ -71,6 +71,7 @@ import com.google.ai.sample.webrtc.WebRTCSender
7171import com.google.ai.sample.webrtc.SignalingClient
7272import org.webrtc.IceCandidate
7373import kotlin.math.max
74+ import kotlin.math.roundToLong
7475
7576class PhotoReasoningViewModel (
7677 application : Application ,
@@ -183,11 +184,11 @@ class PhotoReasoningViewModel(
183184 // to avoid re-executing already-executed commands
184185 private var incrementalCommandCount = 0
185186
186- // Mistral rate limiting per API key (4 seconds between requests with same key)
187+ // Mistral rate limiting per API key (1.5 seconds between requests with same key)
187188 private val mistralNextAllowedRequestAtMsByKey = mutableMapOf<String , Long >()
188189 private var lastMistralTokenTimeMs = 0L
189190 private var lastMistralTokenKey: String? = null
190- private val MISTRAL_MIN_INTERVAL_MS = 4000L
191+ private val MISTRAL_MIN_INTERVAL_MS = 1500L
191192
192193 // Accumulated full text during streaming for incremental command parsing
193194 private var streamingAccumulatedText = StringBuilder ()
@@ -1140,11 +1141,31 @@ private fun reasonWithMistral(
11401141 mistralNextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt)
11411142 }
11421143
1144+ fun markKeyCooldown (key : String , referenceTimeMs : Long , extraDelayMs : Long ) {
1145+ val normalizedExtraDelay = extraDelayMs.coerceAtLeast(0L )
1146+ val nextAllowedAt = referenceTimeMs + max(MISTRAL_MIN_INTERVAL_MS , normalizedExtraDelay)
1147+ val existing = mistralNextAllowedRequestAtMsByKey[key] ? : 0L
1148+ mistralNextAllowedRequestAtMsByKey[key] = max(existing, nextAllowedAt)
1149+ }
1150+
11431151 fun remainingWaitForKeyMs (key : String , nowMs : Long ): Long {
11441152 val nextAllowedAt = mistralNextAllowedRequestAtMsByKey[key] ? : 0L
11451153 return (nextAllowedAt - nowMs).coerceAtLeast(0L )
11461154 }
11471155
1156+ fun parseRetryAfterMs (headerValue : String? ): Long? {
1157+ if (headerValue.isNullOrBlank()) return null
1158+ val seconds = headerValue.trim().toDoubleOrNull() ? : return null
1159+ return (seconds * 1000.0 ).roundToLong().coerceAtLeast(0L )
1160+ }
1161+
1162+ fun parseRateLimitResetDelayMs (response : okhttp3.Response , nowMs : Long ): Long? {
1163+ val resetHeader = response.header(" x-ratelimit-reset" ) ? : return null
1164+ val resetEpochSeconds = resetHeader.trim().toLongOrNull() ? : return null
1165+ val resetMs = resetEpochSeconds * 1000L
1166+ return (resetMs - nowMs).coerceAtLeast(0L )
1167+ }
1168+
11481169 fun isRetryableMistralFailure (code : Int ): Boolean {
11491170 return code == 429 || code >= 500
11501171 }
@@ -1176,7 +1197,10 @@ private fun reasonWithMistral(
11761197 try {
11771198 val attemptResponse = client.newCall(buildRequest(selectedKey)).execute()
11781199 val requestEndMs = System .currentTimeMillis()
1179- markKeyCooldown(selectedKey, requestEndMs)
1200+ val retryAfterMs = parseRetryAfterMs(attemptResponse.header(" Retry-After" ))
1201+ val resetDelayMs = parseRateLimitResetDelayMs(attemptResponse, requestEndMs)
1202+ val serverRequestedDelayMs = max(retryAfterMs ? : 0L , resetDelayMs ? : 0L )
1203+ markKeyCooldown(selectedKey, requestEndMs, serverRequestedDelayMs)
11801204
11811205 if (attemptResponse.isSuccessful) {
11821206 response = attemptResponse
@@ -1195,7 +1219,7 @@ private fun reasonWithMistral(
11951219 consecutiveFailures++
11961220 withContext(Dispatchers .Main ) {
11971221 replaceAiMessageText(
1198- " Mistral temporär nicht verfügbar (Versuch $consecutiveFailures /$maxAttempts ). Wiederhole ..." ,
1222+ " Mistral temporär nicht verfügbar (Versuch $consecutiveFailures /$maxAttempts ). Warte auf Server-Rate-Limit und wiederhole ..." ,
11991223 isPending = true
12001224 )
12011225 }
0 commit comments