Skip to content

Commit 5ecf1f0

Browse files
committed
Fix prompt cache test: remove mocks, add git status case
1 parent a7c73cf commit 5ecf1f0

File tree

1 file changed

+140
-83
lines changed

1 file changed

+140
-83
lines changed
Lines changed: 140 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,114 +1,171 @@
1-
import * as mainPromptModule from '@codebuff/agent-runtime/main-prompt'
2-
import { assistantMessage, userMessage } from '@codebuff/common/util/messages'
3-
import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test'
1+
import fs from 'fs'
2+
import path from 'path'
43

5-
import { CodebuffClient } from '../client'
6-
import * as databaseModule from '../impl/database'
4+
import { describe, expect, it } from 'bun:test'
75

6+
import { CodebuffClient } from '../client'
7+
import { EventCollector, DEFAULT_TIMEOUT } from '../../e2e/utils'
8+
9+
import type { AgentOutput } from '@codebuff/common/types/session-state'
10+
11+
const apiKey = process.env.CODEBUFF_API_KEY
12+
13+
function extractOutputText(output: AgentOutput): string {
14+
if (output.type !== 'lastMessage' && output.type !== 'allMessages') return ''
15+
const messages = output.value as { role: string; content: unknown }[]
16+
for (const msg of messages) {
17+
if (msg.role !== 'assistant') continue
18+
if (typeof msg.content === 'string') return msg.content
19+
if (Array.isArray(msg.content)) {
20+
for (const part of msg.content) {
21+
if (
22+
typeof part === 'object' &&
23+
part !== null &&
24+
'type' in part &&
25+
part.type === 'text' &&
26+
'text' in part
27+
) {
28+
return String(part.text)
29+
}
30+
}
31+
}
32+
}
33+
return ''
34+
}
835

936
describe('Prompt Caching', () => {
10-
afterEach(() => {
11-
mock.restore()
12-
})
13-
1437
it(
1538
'should be cheaper on second request',
1639
async () => {
17-
spyOn(databaseModule, 'getUserInfoFromApiKey').mockResolvedValue({
18-
id: 'user-123',
19-
} as Awaited<ReturnType<typeof databaseModule.getUserInfoFromApiKey>>)
20-
21-
spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
22-
async (params) => {
23-
const { sendAction, action: promptAction, promptId } = params
24-
const sessionState = promptAction.sessionState
25-
const hasHistory =
26-
sessionState.mainAgentState.messageHistory.length > 0
27-
const creditsUsed = hasHistory ? 10 : 100
28-
29-
sessionState.mainAgentState.creditsUsed = creditsUsed
30-
sessionState.mainAgentState.directCreditsUsed = creditsUsed
31-
32-
if (promptAction.prompt) {
33-
sessionState.mainAgentState.messageHistory.push(
34-
userMessage(promptAction.prompt),
35-
assistantMessage('hi'),
36-
)
37-
}
38-
39-
await sendAction({
40-
action: {
41-
type: 'response-chunk',
42-
userInputId: promptId,
43-
chunk: {
44-
type: 'finish',
45-
totalCost: creditsUsed,
46-
},
47-
},
48-
})
49-
50-
const output = {
51-
type: 'lastMessage' as const,
52-
value: sessionState.mainAgentState.messageHistory.slice(-1),
53-
}
54-
55-
await sendAction({
56-
action: {
57-
type: 'prompt-response',
58-
promptId,
59-
sessionState,
60-
output,
61-
},
62-
})
63-
64-
return {
65-
sessionState,
66-
output,
67-
}
68-
},
69-
)
40+
if (!apiKey) {
41+
console.log(
42+
'Skipping prompt caching integration test: set CODEBUFF_API_KEY to run.\n' +
43+
'Example: CODEBUFF_API_KEY=your-key bun test src/__tests__/run.integration.test.ts',
44+
)
45+
return
46+
}
47+
48+
const client = new CodebuffClient({ apiKey })
7049

7150
const filler =
7251
`Run UUID: ${crypto.randomUUID()} ` +
7352
'Ignore this text. This is just to make the prompt longer. '.repeat(500)
7453
const prompt = 'respond with "hi"'
7554

76-
const client = new CodebuffClient({
77-
apiKey: 'test-api-key',
78-
})
79-
let cost1 = -1
55+
const collector1 = new EventCollector()
8056
const run1 = await client.run({
81-
prompt: `${filler}\n\n${prompt}`,
8257
agent: 'base2',
83-
handleEvent: (event) => {
84-
if (event.type === 'finish') {
85-
cost1 = event.totalCost
86-
}
87-
},
58+
prompt: `${filler}\n\n${prompt}`,
59+
handleEvent: collector1.handleEvent,
8860
})
8961

9062
console.dir(run1.output, { depth: null })
91-
expect(run1.output.type).not.toEqual('error')
63+
expect(run1.output.type).not.toBe('error')
64+
65+
const cost1 = collector1.getLastEvent('finish')?.totalCost ?? -1
9266
expect(cost1).toBeGreaterThanOrEqual(0)
9367

94-
let cost2 = -1
68+
const collector2 = new EventCollector()
9569
const run2 = await client.run({
96-
prompt,
9770
agent: 'base2',
71+
prompt,
9872
previousRun: run1,
99-
handleEvent: (event) => {
100-
if (event.type === 'finish') {
101-
cost2 = event.totalCost
102-
}
103-
},
73+
handleEvent: collector2.handleEvent,
10474
})
10575

10676
console.dir(run2.output, { depth: null })
107-
expect(run2.output.type).not.toEqual('error')
77+
expect(run2.output.type).not.toBe('error')
78+
79+
const cost2 = collector2.getLastEvent('finish')?.totalCost ?? -1
10880
expect(cost2).toBeGreaterThanOrEqual(0)
10981

110-
expect(cost1).toBeGreaterThan(cost2)
82+
console.log(`First request cost: ${cost1}, Second request cost: ${cost2}`)
83+
expect(cost2).toBeLessThanOrEqual(cost1 * 0.5)
84+
},
85+
DEFAULT_TIMEOUT * 2,
86+
)
87+
88+
it(
89+
'should not invalidate cache when git status changes between requests',
90+
async () => {
91+
if (!apiKey) {
92+
console.log(
93+
'Skipping prompt caching integration test: set CODEBUFF_API_KEY to run.',
94+
)
95+
return
96+
}
97+
98+
const magic1 = Math.floor(10000 + Math.random() * 90000)
99+
const magic2 = Math.floor(10000 + Math.random() * 90000)
100+
const tempFile1 = path.join(
101+
__dirname,
102+
`cache-test-magic-${magic1}.tmp`,
103+
)
104+
const tempFile2 = path.join(
105+
__dirname,
106+
`cache-test-magic-${magic2}.tmp`,
107+
)
108+
109+
try {
110+
fs.writeFileSync(tempFile1, `MAGIC_NUMBER=${magic1}`)
111+
112+
const client = new CodebuffClient({ apiKey, cwd: process.cwd() })
113+
114+
const filler =
115+
`Run UUID: ${crypto.randomUUID()} ` +
116+
'Ignore this text. This is just to make the prompt longer. '.repeat(
117+
500,
118+
)
119+
120+
const collector1 = new EventCollector()
121+
const run1 = await client.run({
122+
agent: 'base2',
123+
prompt:
124+
`${filler}\n\n` +
125+
'Look at the Initial Git Changes section in your system prompt. ' +
126+
'There should be an untracked file in sdk/src/__tests__/ whose filename contains a 5-digit number. ' +
127+
'What is that 5-digit number? Respond with ONLY the number, nothing else.',
128+
handleEvent: collector1.handleEvent,
129+
})
130+
131+
console.dir(run1.output, { depth: null })
132+
expect(run1.output.type).not.toBe('error')
133+
134+
const responseText = extractOutputText(run1.output)
135+
console.log(
136+
`Magic number: ${magic1}, LLM response: "${responseText}"`,
137+
)
138+
expect(responseText).toContain(String(magic1))
139+
140+
const cost1 = collector1.getLastEvent('finish')?.totalCost ?? -1
141+
expect(cost1).toBeGreaterThanOrEqual(0)
142+
143+
fs.unlinkSync(tempFile1)
144+
fs.writeFileSync(tempFile2, `MAGIC_NUMBER=${magic2}`)
145+
146+
const collector2 = new EventCollector()
147+
const run2 = await client.run({
148+
agent: 'base2',
149+
prompt: 'respond with "hi"',
150+
previousRun: run1,
151+
handleEvent: collector2.handleEvent,
152+
})
153+
154+
console.dir(run2.output, { depth: null })
155+
expect(run2.output.type).not.toBe('error')
156+
157+
const cost2 = collector2.getLastEvent('finish')?.totalCost ?? -1
158+
expect(cost2).toBeGreaterThanOrEqual(0)
159+
160+
console.log(
161+
`Git status change test - Magic: ${magic1}${magic2}, First: ${cost1}, Second: ${cost2}`,
162+
)
163+
expect(cost2).toBeLessThanOrEqual(cost1 * 0.5)
164+
} finally {
165+
try { fs.unlinkSync(tempFile1) } catch {}
166+
try { fs.unlinkSync(tempFile2) } catch {}
167+
}
111168
},
112-
{ timeout: 20_000 },
169+
DEFAULT_TIMEOUT * 2,
113170
)
114171
})

0 commit comments

Comments
 (0)