Skip to content

Commit 560b7ad

Browse files
[codex] fix DeepSeek image attachments (#590)
Co-authored-by: James Grugett <jahooma@gmail.com>
1 parent a7101da commit 560b7ad

5 files changed

Lines changed: 293 additions & 59 deletions

File tree

web/src/app/api/v1/chat/completions/__tests__/completions.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,10 @@ describe('/api/v1/chat/completions POST endpoint', () => {
779779
const fetchedUrls: string[] = []
780780
const fetchViaDeepSeek = mock(
781781
async (url: string | URL | Request, init?: RequestInit) => {
782+
if (String(url).startsWith('https://api.ipinfo.io/lookup/')) {
783+
return Response.json({})
784+
}
785+
782786
fetchedUrls.push(String(url))
783787
fetchedBodies.push(JSON.parse(init?.body as string))
784788
return new Response(
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { describe, expect, it } from 'bun:test'
2+
3+
import {
4+
buildDeepSeekRequestBody,
5+
normalizeDeepSeekRequestBody,
6+
} from '../deepseek-request-body'
7+
8+
import type { ChatCompletionRequestBody } from '../types'
9+
10+
describe('normalizeDeepSeekRequestBody', () => {
11+
it('converts multimodal user content into DeepSeek text content without mutating input', () => {
12+
const body: ChatCompletionRequestBody = {
13+
model: 'deepseek/deepseek-v4-pro',
14+
messages: [
15+
{
16+
role: 'user',
17+
content: [
18+
{ type: 'text', text: 'What is in this image?' },
19+
{
20+
type: 'image_url',
21+
image_url: { url: 'data:image/png;base64,AAECAw==' },
22+
},
23+
],
24+
},
25+
],
26+
}
27+
28+
const normalized = normalizeDeepSeekRequestBody(body)
29+
30+
expect(normalized.messages[0].content).toBe(
31+
'What is in this image?\n\n[1 image was omitted because the DeepSeek API does not support image input.]',
32+
)
33+
expect(body.messages[0].content).toEqual([
34+
{ type: 'text', text: 'What is in this image?' },
35+
{
36+
type: 'image_url',
37+
image_url: { url: 'data:image/png;base64,AAECAw==' },
38+
},
39+
])
40+
})
41+
42+
it('keeps text-only messages unchanged', () => {
43+
const body: ChatCompletionRequestBody = {
44+
model: 'deepseek/deepseek-v4-pro',
45+
messages: [{ role: 'user', content: 'Hello' }],
46+
}
47+
48+
expect(normalizeDeepSeekRequestBody(body)).toEqual({
49+
...body,
50+
model: 'deepseek-v4-pro',
51+
})
52+
})
53+
54+
it('does not throw on minimal provider-path bodies without messages', () => {
55+
const body = {
56+
model: 'deepseek/deepseek-v4-pro',
57+
stream: false,
58+
} as ChatCompletionRequestBody
59+
60+
expect(normalizeDeepSeekRequestBody(body)).toEqual({
61+
...body,
62+
model: 'deepseek-v4-pro',
63+
})
64+
})
65+
})
66+
67+
describe('buildDeepSeekRequestBody', () => {
68+
it('builds DeepSeek-compatible JSON when the request contains an image attachment', () => {
69+
const body: ChatCompletionRequestBody = {
70+
model: 'deepseek/deepseek-v4-pro',
71+
messages: [
72+
{ role: 'system', content: 'You are a coding assistant.' },
73+
{
74+
role: 'user',
75+
content: [
76+
{ type: 'text', text: 'Please inspect this screenshot.' },
77+
{
78+
type: 'image_url',
79+
image_url: { url: 'data:image/jpeg;base64,/9j/4AAQSkZJRg==' },
80+
},
81+
],
82+
},
83+
],
84+
stream: true,
85+
reasoning: { enabled: true, effort: 'medium' },
86+
provider: { order: ['DeepSeek'] },
87+
transforms: ['middle-out'],
88+
codebuff_metadata: { run_id: 'run-1', cost_mode: 'free' },
89+
usage: { include: true },
90+
}
91+
92+
const sentBody = buildDeepSeekRequestBody(body, body.model)
93+
94+
expect(sentBody).toMatchObject({
95+
model: 'deepseek-v4-pro',
96+
stream: true,
97+
stream_options: { include_usage: true },
98+
thinking: { type: 'enabled', reasoning_effort: 'high' },
99+
})
100+
expect(sentBody).not.toHaveProperty('reasoning')
101+
expect(sentBody).not.toHaveProperty('provider')
102+
expect(sentBody).not.toHaveProperty('transforms')
103+
expect(sentBody).not.toHaveProperty('codebuff_metadata')
104+
expect(sentBody).not.toHaveProperty('usage')
105+
106+
const messages = sentBody.messages as Array<{ content: string }>
107+
expect(messages[1].content).toBe(
108+
'Please inspect this screenshot.\n\n[1 image was omitted because the DeepSeek API does not support image input.]',
109+
)
110+
expect(JSON.stringify(sentBody)).not.toContain('image_url')
111+
expect(JSON.stringify(body)).toContain('image_url')
112+
})
113+
})
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import { deepseekModels } from '@codebuff/common/constants/model-config'
2+
3+
import type { ChatCompletionRequestBody } from './types'
4+
5+
export const DEEPSEEK_MODEL_IDS: Record<string, string> = {
6+
[deepseekModels.deepseekV4ProDirect]: deepseekModels.deepseekV4ProDirect,
7+
[deepseekModels.deepseekV4Pro]: deepseekModels.deepseekV4ProDirect,
8+
}
9+
10+
export function getDeepSeekModelId(openrouterModel: string): string {
11+
return DEEPSEEK_MODEL_IDS[openrouterModel] ?? openrouterModel
12+
}
13+
14+
function toDeepSeekReasoningEffort(effort: unknown): 'high' | 'max' {
15+
return effort === 'max' || effort === 'xhigh' ? 'max' : 'high'
16+
}
17+
18+
function unsupportedAttachmentNotice(kind: string, count: number): string {
19+
const noun = count === 1 ? kind : `${kind}s`
20+
const verb = count === 1 ? 'was' : 'were'
21+
return `[${count} ${noun} ${verb} omitted because the DeepSeek API does not support ${kind} input.]`
22+
}
23+
24+
function contentPartsToDeepSeekText(
25+
content: NonNullable<
26+
ChatCompletionRequestBody['messages'][number]['content']
27+
>,
28+
): string {
29+
if (!Array.isArray(content)) {
30+
return content
31+
}
32+
33+
const textParts: string[] = []
34+
let imageCount = 0
35+
let fileCount = 0
36+
let unsupportedCount = 0
37+
38+
for (const part of content) {
39+
switch (part.type) {
40+
case 'text': {
41+
if (typeof part.text === 'string' && part.text.length > 0) {
42+
textParts.push(part.text)
43+
}
44+
break
45+
}
46+
case 'image_url': {
47+
imageCount += 1
48+
break
49+
}
50+
case 'file': {
51+
fileCount += 1
52+
break
53+
}
54+
default: {
55+
unsupportedCount += 1
56+
break
57+
}
58+
}
59+
}
60+
61+
if (imageCount > 0) {
62+
textParts.push(unsupportedAttachmentNotice('image', imageCount))
63+
}
64+
if (fileCount > 0) {
65+
textParts.push(unsupportedAttachmentNotice('file', fileCount))
66+
}
67+
if (unsupportedCount > 0) {
68+
textParts.push(
69+
unsupportedAttachmentNotice('unsupported content part', unsupportedCount),
70+
)
71+
}
72+
73+
return textParts.join('\n\n')
74+
}
75+
76+
export function normalizeDeepSeekRequestBody(
77+
body: ChatCompletionRequestBody,
78+
originalModel: string = body.model,
79+
): ChatCompletionRequestBody {
80+
const messages = Array.isArray(body.messages)
81+
? body.messages.map((message) => ({
82+
...message,
83+
content:
84+
message.content === undefined || message.content === null
85+
? message.content
86+
: contentPartsToDeepSeekText(message.content),
87+
}))
88+
: body.messages
89+
90+
return {
91+
...body,
92+
model: getDeepSeekModelId(originalModel),
93+
messages,
94+
}
95+
}
96+
97+
export function buildDeepSeekRequestBody(
98+
body: ChatCompletionRequestBody,
99+
originalModel: string = body.model,
100+
): Record<string, unknown> {
101+
const deepseekBody = normalizeDeepSeekRequestBody(
102+
body,
103+
originalModel,
104+
) as unknown as Record<string, unknown>
105+
106+
// DeepSeek uses `thinking` instead of OpenRouter's `reasoning`.
107+
if (deepseekBody.reasoning && typeof deepseekBody.reasoning === 'object') {
108+
const reasoning = deepseekBody.reasoning as {
109+
enabled?: boolean
110+
effort?: 'high' | 'medium' | 'low'
111+
}
112+
deepseekBody.thinking = {
113+
type: reasoning.enabled === false ? 'disabled' : 'enabled',
114+
reasoning_effort: toDeepSeekReasoningEffort(reasoning.effort),
115+
}
116+
} else if (deepseekBody.reasoning_effort) {
117+
deepseekBody.thinking = {
118+
type: 'enabled',
119+
reasoning_effort: toDeepSeekReasoningEffort(
120+
deepseekBody.reasoning_effort,
121+
),
122+
}
123+
}
124+
delete deepseekBody.reasoning
125+
delete deepseekBody.reasoning_effort
126+
127+
// Strip OpenRouter-specific / internal fields.
128+
delete deepseekBody.provider
129+
delete deepseekBody.transforms
130+
delete deepseekBody.codebuff_metadata
131+
delete deepseekBody.usage
132+
133+
// For streaming, request usage in the final chunk.
134+
if (deepseekBody.stream) {
135+
deepseekBody.stream_options = { include_usage: true }
136+
}
137+
138+
return deepseekBody
139+
}

web/src/llm-api/deepseek.ts

Lines changed: 15 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { Agent } from 'undici'
22

3-
import { deepseekModels } from '@codebuff/common/constants/model-config'
43
import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
54
import { getErrorObject } from '@codebuff/common/util/error'
65
import { env } from '@codebuff/internal/env'
@@ -10,6 +9,10 @@ import {
109
extractRequestMetadata,
1110
insertMessageToBigQuery,
1211
} from './helpers'
12+
import {
13+
buildDeepSeekRequestBody,
14+
DEEPSEEK_MODEL_IDS,
15+
} from './deepseek-request-body'
1316

1417
import type { UsageData } from './helpers'
1518
import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery'
@@ -40,32 +43,25 @@ const DEEPSEEK_V4_PRO_PRICING: DeepSeekPricing = {
4043
outputCostPerToken: 0.87 / 1_000_000,
4144
}
4245

43-
/** Single source of truth for DeepSeek model metadata and pricing.
44-
* Kept as one map so adding a model can't drift between routing and billing. */
4546
const DEEPSEEK_MODELS: Record<
4647
string,
4748
{ deepseekId: string; pricing: DeepSeekPricing }
48-
> = {
49-
[deepseekModels.deepseekV4ProDirect]: {
50-
deepseekId: deepseekModels.deepseekV4ProDirect,
51-
pricing: DEEPSEEK_V4_PRO_PRICING,
52-
},
53-
[deepseekModels.deepseekV4Pro]: {
54-
deepseekId: deepseekModels.deepseekV4ProDirect,
55-
pricing: DEEPSEEK_V4_PRO_PRICING,
56-
},
57-
}
49+
> = Object.fromEntries(
50+
Object.entries(DEEPSEEK_MODEL_IDS).map(([model, deepseekId]) => [
51+
model,
52+
{
53+
deepseekId,
54+
pricing: DEEPSEEK_V4_PRO_PRICING,
55+
},
56+
]),
57+
)
5858

5959
const DEEPSEEK_ROUTED_MODELS = new Set<string>(Object.keys(DEEPSEEK_MODELS))
6060

6161
export function isDeepSeekModel(model: string): boolean {
6262
return DEEPSEEK_ROUTED_MODELS.has(model)
6363
}
6464

65-
function getDeepSeekModelId(openrouterModel: string): string {
66-
return DEEPSEEK_MODELS[openrouterModel]?.deepseekId ?? openrouterModel
67-
}
68-
6965
function getDeepSeekPricing(model: string): DeepSeekPricing {
7066
const entry = DEEPSEEK_MODELS[model]
7167
if (!entry) {
@@ -87,52 +83,13 @@ type LineResult = {
8783
patchedLine: string
8884
}
8985

90-
function toDeepSeekReasoningEffort(effort: unknown): 'high' | 'max' {
91-
return effort === 'max' || effort === 'xhigh' ? 'max' : 'high'
92-
}
93-
94-
function createDeepSeekRequest(params: {
86+
export function createDeepSeekRequest(params: {
9587
body: ChatCompletionRequestBody
9688
originalModel: string
9789
fetch: typeof globalThis.fetch
9890
}) {
9991
const { body, originalModel, fetch } = params
100-
const deepseekBody: Record<string, unknown> = {
101-
...body,
102-
model: getDeepSeekModelId(originalModel),
103-
}
104-
105-
// DeepSeek uses `thinking` instead of OpenRouter's `reasoning`.
106-
if (deepseekBody.reasoning && typeof deepseekBody.reasoning === 'object') {
107-
const reasoning = deepseekBody.reasoning as {
108-
enabled?: boolean
109-
effort?: 'high' | 'medium' | 'low'
110-
}
111-
deepseekBody.thinking = {
112-
type: reasoning.enabled === false ? 'disabled' : 'enabled',
113-
reasoning_effort: toDeepSeekReasoningEffort(reasoning.effort),
114-
}
115-
} else if (deepseekBody.reasoning_effort) {
116-
deepseekBody.thinking = {
117-
type: 'enabled',
118-
reasoning_effort: toDeepSeekReasoningEffort(
119-
deepseekBody.reasoning_effort,
120-
),
121-
}
122-
}
123-
delete deepseekBody.reasoning
124-
delete deepseekBody.reasoning_effort
125-
126-
// Strip OpenRouter-specific / internal fields
127-
delete deepseekBody.provider
128-
delete deepseekBody.transforms
129-
delete deepseekBody.codebuff_metadata
130-
delete deepseekBody.usage
131-
132-
// For streaming, request usage in the final chunk
133-
if (deepseekBody.stream) {
134-
deepseekBody.stream_options = { include_usage: true }
135-
}
92+
const deepseekBody = buildDeepSeekRequestBody(body, originalModel)
13693

13794
if (!env.DEEPSEEK_API_KEY) {
13895
throw new Error('DEEPSEEK_API_KEY is not configured')

0 commit comments

Comments
 (0)