forked from sanbuphy/learn-coding-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclaudeAiLimits.ts
More file actions
515 lines (458 loc) · 16.4 KB
/
claudeAiLimits.ts
File metadata and controls
515 lines (458 loc) · 16.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
import { APIError } from '@anthropic-ai/sdk'
import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs'
import isEqual from 'lodash-es/isEqual.js'
import { getIsNonInteractiveSession } from '../bootstrap/state.js'
import { isClaudeAISubscriber } from '../utils/auth.js'
import { getModelBetas } from '../utils/betas.js'
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
import { logError } from '../utils/log.js'
import { getSmallFastModel } from '../utils/model/model.js'
import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js'
import { logEvent } from './analytics/index.js'
import { getAPIMetadata } from './api/claude.js'
import { getAnthropicClient } from './api/client.js'
import {
processRateLimitHeaders,
shouldProcessRateLimits,
} from './rateLimitMocking.js'
// Re-export message functions from centralized location
export {
getRateLimitErrorMessage,
getRateLimitWarning,
getUsingOverageText,
} from './rateLimitMessages.js'
type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected'
type RateLimitType =
| 'five_hour'
| 'seven_day'
| 'seven_day_opus'
| 'seven_day_sonnet'
| 'overage'
export type { RateLimitType }
type EarlyWarningThreshold = {
utilization: number // 0-1 scale: trigger warning when usage >= this
timePct: number // 0-1 scale: trigger warning when time elapsed <= this
}
type EarlyWarningConfig = {
rateLimitType: RateLimitType
claimAbbrev: '5h' | '7d'
windowSeconds: number
thresholds: EarlyWarningThreshold[]
}
// Early warning configurations in priority order (checked first to last)
// Used as fallback when server doesn't send surpassed-threshold header
// Warns users when they're consuming quota faster than the time window allows
const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [
{
rateLimitType: 'five_hour',
claimAbbrev: '5h',
windowSeconds: 5 * 60 * 60,
thresholds: [{ utilization: 0.9, timePct: 0.72 }],
},
{
rateLimitType: 'seven_day',
claimAbbrev: '7d',
windowSeconds: 7 * 24 * 60 * 60,
thresholds: [
{ utilization: 0.75, timePct: 0.6 },
{ utilization: 0.5, timePct: 0.35 },
{ utilization: 0.25, timePct: 0.15 },
],
},
]
// Maps claim abbreviations to rate limit types for header-based detection
const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = {
'5h': 'five_hour',
'7d': 'seven_day',
overage: 'overage',
}
const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = {
five_hour: 'session limit',
seven_day: 'weekly limit',
seven_day_opus: 'Opus limit',
seven_day_sonnet: 'Sonnet limit',
overage: 'extra usage limit',
}
export function getRateLimitDisplayName(type: RateLimitType): string {
return RATE_LIMIT_DISPLAY_NAMES[type] || type
}
/**
* Calculate what fraction of a time window has elapsed.
* Used for time-relative early warning fallback.
* @param resetsAt - Unix epoch timestamp in seconds when the limit resets
* @param windowSeconds - Duration of the window in seconds
* @returns fraction (0-1) of the window that has elapsed
*/
function computeTimeProgress(resetsAt: number, windowSeconds: number): number {
const nowSeconds = Date.now() / 1000
const windowStart = resetsAt - windowSeconds
const elapsed = nowSeconds - windowStart
return Math.max(0, Math.min(1, elapsed / windowSeconds))
}
// Reason why overage is disabled/rejected
// These values come from the API's unified limiter
export type OverageDisabledReason =
| 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier
| 'org_level_disabled' // Organization doesn't have overage enabled
| 'org_level_disabled_until' // Organization overage temporarily disabled
| 'out_of_credits' // Organization has insufficient credits
| 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled
| 'member_level_disabled' // Account specifically has overage disabled
| 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit
| 'group_zero_credit_limit' // Resolved group limit has a zero credit limit
| 'member_zero_credit_limit' // Account has a zero credit limit
| 'org_service_level_disabled' // Org service specifically has overage disabled
| 'org_service_zero_credit_limit' // Org service has a zero credit limit
| 'no_limits_configured' // No overage limits configured for account
| 'unknown' // Unknown reason, should not happen
export type ClaudeAILimits = {
status: QuotaStatus
// unifiedRateLimitFallbackAvailable is currently used to warn users that set
// their model to Opus whenever they are about to run out of quota. It does
// not change the actual model that is used.
unifiedRateLimitFallbackAvailable: boolean
resetsAt?: number
rateLimitType?: RateLimitType
utilization?: number
overageStatus?: QuotaStatus
overageResetsAt?: number
overageDisabledReason?: OverageDisabledReason
isUsingOverage?: boolean
surpassedThreshold?: number
}
// Exported for testing only
export let currentLimits: ClaudeAILimits = {
status: 'allowed',
unifiedRateLimitFallbackAvailable: false,
isUsingOverage: false,
}
/**
* Raw per-window utilization from response headers, tracked on every API
* response (unlike currentLimits.utilization which is only set when a warning
* threshold fires). Exposed to statusline scripts via getRawUtilization().
*/
type RawWindowUtilization = {
utilization: number // 0-1 fraction
resets_at: number // unix epoch seconds
}
type RawUtilization = {
five_hour?: RawWindowUtilization
seven_day?: RawWindowUtilization
}
let rawUtilization: RawUtilization = {}
export function getRawUtilization(): RawUtilization {
return rawUtilization
}
function extractRawUtilization(headers: globalThis.Headers): RawUtilization {
const result: RawUtilization = {}
for (const [key, abbrev] of [
['five_hour', '5h'],
['seven_day', '7d'],
] as const) {
const util = headers.get(
`anthropic-ratelimit-unified-${abbrev}-utilization`,
)
const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`)
if (util !== null && reset !== null) {
result[key] = { utilization: Number(util), resets_at: Number(reset) }
}
}
return result
}
type StatusChangeListener = (limits: ClaudeAILimits) => void
export const statusListeners: Set<StatusChangeListener> = new Set()
export function emitStatusChange(limits: ClaudeAILimits) {
currentLimits = limits
statusListeners.forEach(listener => listener(limits))
const hoursTillReset = Math.round(
(limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60),
)
logEvent('tengu_claudeai_limits_status_changed', {
status:
limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable,
hoursTillReset,
})
}
async function makeTestQuery() {
const model = getSmallFastModel()
const anthropic = await getAnthropicClient({
maxRetries: 0,
model,
source: 'quota_check',
})
const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
const betas = getModelBetas(model)
// biome-ignore lint/plugin: quota check needs raw response access via asResponse()
return anthropic.beta.messages
.create({
model,
max_tokens: 1,
messages,
metadata: getAPIMetadata(),
...(betas.length > 0 ? { betas } : {}),
})
.asResponse()
}
export async function checkQuotaStatus(): Promise<void> {
// Skip network requests if nonessential traffic is disabled
if (isEssentialTrafficOnly()) {
return
}
// Check if we should process rate limits (real subscriber or mock testing)
if (!shouldProcessRateLimits(isClaudeAISubscriber())) {
return
}
// In non-interactive mode (-p), the real query follows immediately and
// extractQuotaStatusFromHeaders() will update limits from its response
// headers (claude.ts), so skip this pre-check API call.
if (getIsNonInteractiveSession()) {
return
}
try {
// Make a minimal request to check quota
const raw = await makeTestQuery()
// Update limits based on the response
extractQuotaStatusFromHeaders(raw.headers)
} catch (error) {
if (error instanceof APIError) {
extractQuotaStatusFromError(error)
}
}
}
/**
* Check if early warning should be triggered based on surpassed-threshold header.
* Returns ClaudeAILimits if a threshold was surpassed, null otherwise.
*/
function getHeaderBasedEarlyWarning(
headers: globalThis.Headers,
unifiedRateLimitFallbackAvailable: boolean,
): ClaudeAILimits | null {
// Check each claim type for surpassed threshold header
for (const [claimAbbrev, rateLimitType] of Object.entries(
EARLY_WARNING_CLAIM_MAP,
)) {
const surpassedThreshold = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`,
)
// If threshold header is present, user has crossed a warning threshold
if (surpassedThreshold !== null) {
const utilizationHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
)
const resetHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
)
const utilization = utilizationHeader
? Number(utilizationHeader)
: undefined
const resetsAt = resetHeader ? Number(resetHeader) : undefined
return {
status: 'allowed_warning',
resetsAt,
rateLimitType: rateLimitType as RateLimitType,
utilization,
unifiedRateLimitFallbackAvailable,
isUsingOverage: false,
surpassedThreshold: Number(surpassedThreshold),
}
}
}
return null
}
/**
* Check if time-relative early warning should be triggered for a rate limit type.
* Fallback when server doesn't send surpassed-threshold header.
* Returns ClaudeAILimits if thresholds are exceeded, null otherwise.
*/
function getTimeRelativeEarlyWarning(
headers: globalThis.Headers,
config: EarlyWarningConfig,
unifiedRateLimitFallbackAvailable: boolean,
): ClaudeAILimits | null {
const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config
const utilizationHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
)
const resetHeader = headers.get(
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
)
if (utilizationHeader === null || resetHeader === null) {
return null
}
const utilization = Number(utilizationHeader)
const resetsAt = Number(resetHeader)
const timeProgress = computeTimeProgress(resetsAt, windowSeconds)
// Check if any threshold is exceeded: high usage early in the window
const shouldWarn = thresholds.some(
t => utilization >= t.utilization && timeProgress <= t.timePct,
)
if (!shouldWarn) {
return null
}
return {
status: 'allowed_warning',
resetsAt,
rateLimitType,
utilization,
unifiedRateLimitFallbackAvailable,
isUsingOverage: false,
}
}
/**
* Get early warning limits using header-based detection with time-relative fallback.
* 1. First checks for surpassed-threshold header (new server-side approach)
* 2. Falls back to time-relative thresholds (client-side calculation)
*/
function getEarlyWarningFromHeaders(
headers: globalThis.Headers,
unifiedRateLimitFallbackAvailable: boolean,
): ClaudeAILimits | null {
// Try header-based detection first (preferred when API sends the header)
const headerBasedWarning = getHeaderBasedEarlyWarning(
headers,
unifiedRateLimitFallbackAvailable,
)
if (headerBasedWarning) {
return headerBasedWarning
}
// Fallback: Use time-relative thresholds (client-side calculation)
// This catches users burning quota faster than sustainable
for (const config of EARLY_WARNING_CONFIGS) {
const timeRelativeWarning = getTimeRelativeEarlyWarning(
headers,
config,
unifiedRateLimitFallbackAvailable,
)
if (timeRelativeWarning) {
return timeRelativeWarning
}
}
return null
}
function computeNewLimitsFromHeaders(
headers: globalThis.Headers,
): ClaudeAILimits {
const status =
(headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) ||
'allowed'
const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset')
const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined
const unifiedRateLimitFallbackAvailable =
headers.get('anthropic-ratelimit-unified-fallback') === 'available'
// Headers for rate limit type and overage support
const rateLimitType = headers.get(
'anthropic-ratelimit-unified-representative-claim',
) as RateLimitType | null
const overageStatus = headers.get(
'anthropic-ratelimit-unified-overage-status',
) as QuotaStatus | null
const overageResetsAtHeader = headers.get(
'anthropic-ratelimit-unified-overage-reset',
)
const overageResetsAt = overageResetsAtHeader
? Number(overageResetsAtHeader)
: undefined
// Reason why overage is disabled (spending cap or wallet empty)
const overageDisabledReason = headers.get(
'anthropic-ratelimit-unified-overage-disabled-reason',
) as OverageDisabledReason | null
// Determine if we're using overage (standard limits rejected but overage allowed)
const isUsingOverage =
status === 'rejected' &&
(overageStatus === 'allowed' || overageStatus === 'allowed_warning')
// Check for early warning based on surpassed-threshold header
// If status is allowed/allowed_warning and we find a surpassed threshold, show warning
let finalStatus: QuotaStatus = status
if (status === 'allowed' || status === 'allowed_warning') {
const earlyWarning = getEarlyWarningFromHeaders(
headers,
unifiedRateLimitFallbackAvailable,
)
if (earlyWarning) {
return earlyWarning
}
// No early warning threshold surpassed
finalStatus = 'allowed'
}
return {
status: finalStatus,
resetsAt,
unifiedRateLimitFallbackAvailable,
...(rateLimitType && { rateLimitType }),
...(overageStatus && { overageStatus }),
...(overageResetsAt && { overageResetsAt }),
...(overageDisabledReason && { overageDisabledReason }),
isUsingOverage,
}
}
/**
* Cache the extra usage disabled reason from API headers.
*/
function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void {
// A null reason means extra usage is enabled (no disabled reason header)
const reason =
headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null
const cached = getGlobalConfig().cachedExtraUsageDisabledReason
if (cached !== reason) {
saveGlobalConfig(current => ({
...current,
cachedExtraUsageDisabledReason: reason,
}))
}
}
export function extractQuotaStatusFromHeaders(
headers: globalThis.Headers,
): void {
// Check if we need to process rate limits
const isSubscriber = isClaudeAISubscriber()
if (!shouldProcessRateLimits(isSubscriber)) {
// If we have any rate limit state, clear it
rawUtilization = {}
if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
const defaultLimits: ClaudeAILimits = {
status: 'allowed',
unifiedRateLimitFallbackAvailable: false,
isUsingOverage: false,
}
emitStatusChange(defaultLimits)
}
return
}
// Process headers (applies mocks from /mock-limits command if active)
const headersToUse = processRateLimitHeaders(headers)
rawUtilization = extractRawUtilization(headersToUse)
const newLimits = computeNewLimitsFromHeaders(headersToUse)
// Cache extra usage status (persists across sessions)
cacheExtraUsageDisabledReason(headersToUse)
if (!isEqual(currentLimits, newLimits)) {
emitStatusChange(newLimits)
}
}
export function extractQuotaStatusFromError(error: APIError): void {
if (
!shouldProcessRateLimits(isClaudeAISubscriber()) ||
error.status !== 429
) {
return
}
try {
let newLimits = { ...currentLimits }
if (error.headers) {
// Process headers (applies mocks from /mock-limits command if active)
const headersToUse = processRateLimitHeaders(error.headers)
rawUtilization = extractRawUtilization(headersToUse)
newLimits = computeNewLimitsFromHeaders(headersToUse)
// Cache extra usage status (persists across sessions)
cacheExtraUsageDisabledReason(headersToUse)
}
// For errors, always set status to rejected even if headers are not present.
newLimits.status = 'rejected'
if (!isEqual(currentLimits, newLimits)) {
emitStatusChange(newLimits)
}
} catch (e) {
logError(e as Error)
}
}