@@ -112,6 +112,86 @@ describe("session.compaction.isOverflow", () => {
112112 } )
113113 } )
114114
115+ // ─── Bug reproduction tests ───────────────────────────────────────────
116+ // These tests demonstrate that when limit.input is set, isOverflow()
117+ // does not subtract any headroom for the next model response. This means
118+ // compaction only triggers AFTER we've already consumed the full input
119+ // budget, leaving zero room for the next API call's output tokens.
120+ //
121+ // Compare: without limit.input, usable = context - output (reserves space).
122+ // With limit.input, usable = limit.input (reserves nothing).
123+ //
124+ // Related issues: #10634, #8089, #11086, #12621
125+ // Open PRs: #6875, #12924
126+
127+ test ( "BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not" , async ( ) => {
128+ await using tmp = await tmpdir ( )
129+ await Instance . provide ( {
130+ directory : tmp . path ,
131+ fn : async ( ) => {
132+ // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K
133+ const model = createModel ( { context : 200_000 , input : 200_000 , output : 32_000 } )
134+
135+ // We've used 198K tokens total. Only 2K under the input limit.
136+ // On the next turn, the full conversation (198K) becomes input,
137+ // plus the model needs room to generate output — this WILL overflow.
138+ const tokens = { input : 180_000 , output : 15_000 , reasoning : 0 , cache : { read : 3_000 , write : 0 } }
139+ // count = 180K + 3K + 15K = 198K
140+ // usable = limit.input = 200K (no output subtracted!)
141+ // 198K > 200K = false → no compaction triggered
142+
143+ // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓
144+ // WITH limit.input: usable = 200K, and 198K > 200K = false ✗
145+
146+ // With 198K used and only 2K headroom, the next turn will overflow.
147+ // Compaction MUST trigger here.
148+ expect ( await SessionCompaction . isOverflow ( { tokens, model } ) ) . toBe ( true )
149+ } ,
150+ } )
151+ } )
152+
153+ test ( "BUG: without limit.input, same token count correctly triggers compaction" , async ( ) => {
154+ await using tmp = await tmpdir ( )
155+ await Instance . provide ( {
156+ directory : tmp . path ,
157+ fn : async ( ) => {
158+ // Same model but without limit.input — uses context - output instead
159+ const model = createModel ( { context : 200_000 , output : 32_000 } )
160+
161+ // Same token usage as above
162+ const tokens = { input : 180_000 , output : 15_000 , reasoning : 0 , cache : { read : 3_000 , write : 0 } }
163+ // count = 198K
164+ // usable = context - output = 200K - 32K = 168K
165+ // 198K > 168K = true → compaction correctly triggered
166+
167+ const result = await SessionCompaction . isOverflow ( { tokens, model } )
168+ expect ( result ) . toBe ( true ) // ← Correct: headroom is reserved
169+ } ,
170+ } )
171+ } )
172+
173+ test ( "BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it" , async ( ) => {
174+ await using tmp = await tmpdir ( )
175+ await Instance . provide ( {
176+ directory : tmp . path ,
177+ fn : async ( ) => {
178+ // Two models with identical context/output limits, differing only in limit.input
179+ const withInputLimit = createModel ( { context : 200_000 , input : 200_000 , output : 32_000 } )
180+ const withoutInputLimit = createModel ( { context : 200_000 , output : 32_000 } )
181+
182+ // 170K total tokens — well above context-output (168K) but below input limit (200K)
183+ const tokens = { input : 155_000 , output : 10_000 , reasoning : 0 , cache : { read : 5_000 , write : 0 } }
184+
185+ const withLimit = await SessionCompaction . isOverflow ( { tokens, model : withInputLimit } )
186+ const withoutLimit = await SessionCompaction . isOverflow ( { tokens, model : withoutInputLimit } )
187+
188+ // Both models have identical real capacity — they should agree:
189+ expect ( withLimit ) . toBe ( true ) // should compact (170K leaves no room for 32K output)
190+ expect ( withoutLimit ) . toBe ( true ) // correctly compacts (170K > 168K)
191+ } ,
192+ } )
193+ } )
194+
115195 test ( "returns false when model context limit is 0" , async ( ) => {
116196 await using tmp = await tmpdir ( )
117197 await Instance . provide ( {
0 commit comments