@@ -38,6 +38,7 @@ type RetryOptions = {
3838 excludeProviders : string [ ]
3939 retryCount : number
4040}
41+ type BillingSource = "anonymous" | "free" | "byok" | "subscription" | "balance"
4142
4243export async function handler (
4344 input : APIEvent ,
@@ -51,6 +52,7 @@ export async function handler(
5152 type AuthInfo = Awaited < ReturnType < typeof authenticate > >
5253 type ModelInfo = Awaited < ReturnType < typeof validateModel > >
5354 type ProviderInfo = Awaited < ReturnType < typeof selectProvider > >
55+ type CostInfo = ReturnType < typeof calculateCost >
5456
5557 const MAX_FAILOVER_RETRIES = 3
5658 const MAX_429_RETRIES = 3
@@ -139,21 +141,22 @@ export async function handler(
139141 "llm.error.code" : res . status ,
140142 "llm.error.message" : res . statusText ,
141143 } )
144+ }
142145
143- // Try another provider => stop retrying if using fallback provider
144- if (
145- // ie. openai 404 error: Item with id 'msg_0ead8b004a3b165d0069436a6b6834819896da85b63b196a3f' not found.
146- res . status !== 404 &&
147- // ie. cannot change codex model providers mid-session
148- modelInfo . stickyProvider !== "strict" &&
149- modelInfo . fallbackProvider &&
150- providerInfo . id !== modelInfo . fallbackProvider
151- ) {
152- return retriableRequest ( {
153- excludeProviders : [ ... retry . excludeProviders , providerInfo . id ] ,
154- retryCount : retry . retryCount + 1 ,
155- } )
156- }
146+ // Try another provider => stop retrying if using fallback provider
147+ if (
148+ res . status !== 200 &&
149+ // ie. openai 404 error: Item with id 'msg_0ead8b004a3b165d0069436a6b6834819896da85b63b196a3f' not found.
150+ res . status !== 404 &&
151+ // ie. cannot change codex model providers mid-session
152+ modelInfo . stickyProvider !== "strict" &&
153+ modelInfo . fallbackProvider &&
154+ providerInfo . id !== modelInfo . fallbackProvider
155+ ) {
156+ return retriableRequest ( {
157+ excludeProviders : [ ... retry . excludeProviders , providerInfo . id ] ,
158+ retryCount : retry . retryCount + 1 ,
159+ } )
157160 }
158161
159162 return { providerInfo, reqBody, res, startTimestamp }
@@ -183,18 +186,25 @@ export async function handler(
183186
184187 // Handle non-streaming response
185188 if ( ! isStream ) {
186- const responseConverter = createResponseConverter ( providerInfo . format , opts . format )
187189 const json = await res . json ( )
188- const body = JSON . stringify ( responseConverter ( json ) )
190+ const usageInfo = providerInfo . normalizeUsage ( json . usage )
191+ const costInfo = calculateCost ( modelInfo , usageInfo )
192+ await trialLimiter ?. track ( usageInfo )
193+ await rateLimiter ?. track ( )
194+ await trackUsage ( billingSource , authInfo , modelInfo , providerInfo , usageInfo , costInfo )
195+ await reload ( billingSource , authInfo , costInfo )
196+
197+ const responseConverter = createResponseConverter ( providerInfo . format , opts . format )
198+ const body = JSON . stringify (
199+ responseConverter ( {
200+ ...json ,
201+ cost : calculateOccuredCost ( billingSource , costInfo ) ,
202+ } ) ,
203+ )
189204 logger . metric ( { response_length : body . length } )
190205 logger . debug ( "RESPONSE: " + body )
191206 dataDumper ?. provideResponse ( body )
192207 dataDumper ?. flush ( )
193- const tokensInfo = providerInfo . normalizeUsage ( json . usage )
194- await trialLimiter ?. track ( tokensInfo )
195- await rateLimiter ?. track ( )
196- const costInfo = await trackUsage ( authInfo , modelInfo , providerInfo , billingSource , tokensInfo )
197- await reload ( authInfo , costInfo )
198208 return new Response ( body , {
199209 status : resStatus ,
200210 statusText : res . statusText ,
@@ -226,12 +236,16 @@ export async function handler(
226236 dataDumper ?. flush ( )
227237 await rateLimiter ?. track ( )
228238 const usage = usageParser . retrieve ( )
239+ let cost = "0"
229240 if ( usage ) {
230- const tokensInfo = providerInfo . normalizeUsage ( usage )
231- await trialLimiter ?. track ( tokensInfo )
232- const costInfo = await trackUsage ( authInfo , modelInfo , providerInfo , billingSource , tokensInfo )
233- await reload ( authInfo , costInfo )
241+ const usageInfo = providerInfo . normalizeUsage ( usage )
242+ const costInfo = calculateCost ( modelInfo , usageInfo )
243+ await trialLimiter ?. track ( usageInfo )
244+ await trackUsage ( billingSource , authInfo , modelInfo , providerInfo , usageInfo , costInfo )
245+ await reload ( billingSource , authInfo , costInfo )
246+ cost = calculateOccuredCost ( billingSource , costInfo )
234247 }
248+ c . enqueue ( encoder . encode ( usageParser . buidlCostChunk ( cost ) ) )
235249 c . close ( )
236250 return
237251 }
@@ -283,7 +297,6 @@ export async function handler(
283297 return pump ( )
284298 } ,
285299 } )
286-
287300 return new Response ( stream , {
288301 status : resStatus ,
289302 statusText : res . statusText ,
@@ -498,9 +511,9 @@ export async function handler(
498511 }
499512 }
500513
501- function validateBilling ( authInfo : AuthInfo , modelInfo : ModelInfo ) {
514+ function validateBilling ( authInfo : AuthInfo , modelInfo : ModelInfo ) : BillingSource {
502515 if ( ! authInfo ) return "anonymous"
503- if ( authInfo . provider ?. credentials ) return "free "
516+ if ( authInfo . provider ?. credentials ) return "byok "
504517 if ( authInfo . isFree ) return "free"
505518 if ( modelInfo . allowAnonymous ) return "free"
506519
@@ -613,13 +626,7 @@ export async function handler(
613626 return res
614627 }
615628
616- async function trackUsage (
617- authInfo : AuthInfo ,
618- modelInfo : ModelInfo ,
619- providerInfo : ProviderInfo ,
620- billingSource : ReturnType < typeof validateBilling > ,
621- usageInfo : UsageInfo ,
622- ) {
629+ function calculateCost ( modelInfo : ModelInfo , usageInfo : UsageInfo ) {
623630 const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
624631 usageInfo
625632
@@ -657,6 +664,33 @@ export async function handler(
657664 ( cacheReadCost ?? 0 ) +
658665 ( cacheWrite5mCost ?? 0 ) +
659666 ( cacheWrite1hCost ?? 0 )
667+ return {
668+ totalCostInCent,
669+ inputCost,
670+ outputCost,
671+ reasoningCost,
672+ cacheReadCost,
673+ cacheWrite5mCost,
674+ cacheWrite1hCost,
675+ }
676+ }
677+
678+ function calculateOccuredCost ( billingSource : BillingSource , costInfo : CostInfo ) {
679+ return billingSource === "balance" ? ( costInfo . totalCostInCent / 100 ) . toFixed ( 8 ) : "0"
680+ }
681+
682+ async function trackUsage (
683+ billingSource : BillingSource ,
684+ authInfo : AuthInfo ,
685+ modelInfo : ModelInfo ,
686+ providerInfo : ProviderInfo ,
687+ usageInfo : UsageInfo ,
688+ costInfo : CostInfo ,
689+ ) {
690+ const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
691+ usageInfo
692+ const { totalCostInCent, inputCost, outputCost, reasoningCost, cacheReadCost, cacheWrite5mCost, cacheWrite1hCost } =
693+ costInfo
660694
661695 logger . metric ( {
662696 "tokens.input" : inputTokens ,
@@ -677,7 +711,7 @@ export async function handler(
677711 if ( billingSource === "anonymous" ) return
678712 authInfo = authInfo !
679713
680- const cost = authInfo . provider ?. credentials ? 0 : centsToMicroCents ( totalCostInCent )
714+ const cost = centsToMicroCents ( totalCostInCent )
681715 await Database . use ( ( db ) =>
682716 Promise . all ( [
683717 db . insert ( UsageTable ) . values ( {
@@ -772,16 +806,12 @@ export async function handler(
772806 return { costInMicroCents : cost }
773807 }
774808
775- async function reload ( authInfo : AuthInfo , costInfo : Awaited < ReturnType < typeof trackUsage > > ) {
776- if ( ! authInfo ) return
777- if ( authInfo . isFree ) return
778- if ( authInfo . provider ?. credentials ) return
779- if ( authInfo . subscription ) return
780-
781- if ( ! costInfo ) return
809+ async function reload ( billingSource : BillingSource , authInfo : AuthInfo , costInfo : CostInfo ) {
810+ if ( billingSource !== "balance" ) return
811+ authInfo = authInfo !
782812
783813 const reloadTrigger = centsToMicroCents ( ( authInfo . billing . reloadTrigger ?? Billing . RELOAD_TRIGGER ) * 100 )
784- if ( authInfo . billing . balance - costInfo . costInMicroCents >= reloadTrigger ) return
814+ if ( authInfo . billing . balance - costInfo . totalCostInCent >= reloadTrigger ) return
785815 if ( authInfo . billing . timeReloadLockedTill && authInfo . billing . timeReloadLockedTill > new Date ( ) ) return
786816
787817 const lock = await Database . use ( ( tx ) =>
0 commit comments