Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "markpdfdown",
"version": "0.1.5-tone",
"version": "0.1.0-local",
"description": "A high-quality PDF to Markdown tool based on large language model visual recognition.",
"author": "MarkPDFdown",
"main": "dist/main/index.js",
Expand Down
110 changes: 105 additions & 5 deletions src/core/infrastructure/services/CloudService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,109 @@ class CloudService {

private constructor() {}

private extractDownloadFileName(contentDisposition: string, fallback: string): string {

This comment was marked as outdated.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH] Missing unit tests for complex Content-Disposition filename parsing and encoding fallbacks

A large, multi-branch parsing pipeline replaced a simple filename regex, but no tests are shown to validate behavior across valid/invalid headers, charsets, and fallback interactions.

Suggestion: Add focused unit tests for extractDownloadFileName via public method behavior (download result filename), covering: RFC5987 utf-8/latin1, malformed percent-encoding, quoted/unquoted plain filename, both filename* and filename precedence, empty/missing header fallback (task-${id}.pdf), mojibake repair positive/negative cases, and sanitization/path traversal/control chars.

Risk: Incorrect filenames in downloads, silent fallback to wrong names, and regressions on internationalized filenames (especially CJK/latin1 edge cases).

Confidence: 0.93

[From SubAgent: testing]

const rfc5987Name = this.parseRFC5987Filename(contentDisposition);
if (rfc5987Name) {
return this.sanitizeDownloadFileName(rfc5987Name, fallback);
}

const plainName = this.parsePlainFilename(contentDisposition);
if (!plainName) {
return this.sanitizeDownloadFileName(fallback, fallback);
}

const repairedName = this.tryRepairUtf8Mojibake(plainName);
return this.sanitizeDownloadFileName(repairedName || plainName, fallback);
}

private parseRFC5987Filename(contentDisposition: string): string | null {
const match = contentDisposition.match(/filename\*\s*=\s*([^;]+)/i);
if (!match) return null;

const rawValue = match[1]?.trim();
if (!rawValue) return null;

const unquoted = rawValue.replace(/^"(.*)"$/, '$1');
const parts = unquoted.match(/^([^']*)'[^']*'(.*)$/);
if (!parts) return null;

const charset = (parts[1] || 'utf-8').trim().toLowerCase();
const encodedValue = parts[2] || '';

try {
if (charset === 'utf-8' || charset === 'utf8') {
return decodeURIComponent(encodedValue);
}

const bytes = this.percentDecodeToBytes(encodedValue);
if (charset === 'iso-8859-1' || charset === 'latin1') {
return Buffer.from(bytes).toString('latin1');
}
return Buffer.from(bytes).toString('utf8');
} catch {
return null;
}
}

private parsePlainFilename(contentDisposition: string): string | null {
const match = contentDisposition.match(/filename\s*=\s*("(?:\\.|[^"])*"|[^;]+)/i);
if (!match) return null;

let value = match[1]?.trim();
if (!value) return null;

if (value.startsWith('"') && value.endsWith('"')) {
value = value.slice(1, -1).replace(/\\"/g, '"');
}

return value;
}

private percentDecodeToBytes(input: string): number[] {

This comment was marked as outdated.

const bytes: number[] = [];
for (let i = 0; i < input.length; i++) {
const ch = input[i];
if (ch === '%' && i + 2 < input.length) {
const hex = input.slice(i + 1, i + 3);
const parsed = Number.parseInt(hex, 16);
if (!Number.isNaN(parsed)) {
bytes.push(parsed);
i += 2;
continue;
}
}
bytes.push(input.charCodeAt(i));
}
return bytes;
}

private tryRepairUtf8Mojibake(input: string): string | null {
const hasCjk = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(input);
if (hasCjk) return null;

const latinSupplementCount = Array.from(input).filter((ch) => {
const code = ch.charCodeAt(0);
return code >= 0x00c0 && code <= 0x00ff;
}).length;
if (latinSupplementCount < 2) return null;

const repaired = Buffer.from(input, 'latin1').toString('utf8');
if (!repaired) return null;

const repairedHasCjk = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(repaired);
const roundTrip = Buffer.from(repaired, 'utf8').toString('latin1') === input;
if (repairedHasCjk && roundTrip) {
return repaired;
}
return null;
}

private sanitizeDownloadFileName(input: string, fallback: string): string {
// Sanitize: extract basename and strip control/reserved characters
// eslint-disable-next-line no-control-regex
return path.basename(input).replace(/[\u0000-\u001f<>:"|?*]/g, '_') || fallback;
}

private normalizeCheckoutStatus(data: any): PaymentCheckoutStatusApiResponse | null {
if (!data || typeof data !== 'object') {
return null;
Expand Down Expand Up @@ -454,11 +557,8 @@ class CloudService {
}

const contentDisposition = res.headers.get('Content-Disposition') || '';
const match = contentDisposition.match(/filename="?([^";\n]+)"?/);
const rawName = match ? match[1] : `task-${id}.pdf`;
// Sanitize: extract basename and strip control/reserved characters
// eslint-disable-next-line no-control-regex
const fileName = path.basename(rawName).replace(/[\u0000-\u001f<>:"|?*]/g, '_') || `task-${id}.pdf`;
const fallbackName = `task-${id}.pdf`;
const fileName = this.extractDownloadFileName(contentDisposition, fallbackName);

const buffer = await res.arrayBuffer();
return { success: true, data: { buffer, fileName } };
Expand Down
59 changes: 59 additions & 0 deletions src/core/infrastructure/services/__tests__/CloudService.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,65 @@ describe('CloudService', () => {
expect(result.data?.fileName).toBe('task-task-xyz.pdf')
})

it('downloadPdf decodes RFC5987 filename* for non-english names', async () => {
const cloudService = (await import('../CloudService.js')).default
const response = makeJsonResponse(200, {})
response.headers.get.mockReturnValue(
"attachment; filename*=UTF-8''%E4%B8%AD%E6%96%87%E6%8A%80%E6%9C%AF%E6%89%8B%E5%86%8C.pdf",
)
response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)

const result = await cloudService.downloadPdf('task-cn')
expect(result.data?.fileName).toBe('中文技术手册.pdf')
})

it('downloadPdf decodes RFC5987 latin1 filename* values', async () => {
const cloudService = (await import('../CloudService.js')).default
const response = makeJsonResponse(200, {})
response.headers.get.mockReturnValue("attachment; filename*=ISO-8859-1''caf%E9.pdf")
response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)

const result = await cloudService.downloadPdf('task-latin1')
expect(result.data?.fileName).toBe('café.pdf')
})

it('downloadPdf falls back to task file name on malformed RFC5987 value', async () => {
const cloudService = (await import('../CloudService.js')).default
const response = makeJsonResponse(200, {})
response.headers.get.mockReturnValue("attachment; filename*=UTF-8''bad%ZZ.pdf")
response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)

const result = await cloudService.downloadPdf('task-malformed')
expect(result.data?.fileName).toBe('task-task-malformed.pdf')
})

it('downloadPdf falls back to utf8 decode for unknown RFC5987 charset', async () => {
const cloudService = (await import('../CloudService.js')).default
const response = makeJsonResponse(200, {})
response.headers.get.mockReturnValue("attachment; filename*=X-UNKNOWN''%E4%B8%AD%E6%96%87.pdf")
response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)

const result = await cloudService.downloadPdf('task-unknown-charset')
expect(result.data?.fileName).toBe('中文.pdf')
})

it('downloadPdf repairs common UTF-8 mojibake in filename', async () => {
const cloudService = (await import('../CloudService.js')).default
const response = makeJsonResponse(200, {})
const original = '中文手册2.0.pdf'
const mojibake = Buffer.from(original, 'utf8').toString('latin1')
response.headers.get.mockReturnValue(`attachment; filename="${mojibake}"`)
response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)

const result = await cloudService.downloadPdf('task-mojibake')
expect(result.data?.fileName).toBe(original)
})

it('downloadPdf/getPageImage return error on non-OK response', async () => {
const cloudService = (await import('../CloudService.js')).default
mockAuthManager.fetchWithAuth
Expand Down
1 change: 1 addition & 0 deletions src/main/ipc/__tests__/handlers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ vi.mock('../../../shared/ipc/channels.js', () => ({
FILE: {
GET_IMAGE_PATH: 'file:getImagePath',
DOWNLOAD_MARKDOWN: 'file:downloadMarkdown',
COPY_IMAGE_TO_CLIPBOARD: 'file:copyImageToClipboard',
SELECT_DIALOG: 'file:selectDialog',
UPLOAD: 'file:upload',
UPLOAD_FILE_CONTENT: 'file:uploadFileContent',
Expand Down
116 changes: 115 additions & 1 deletion src/main/ipc/handlers/__tests__/file.handler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ const mockDialog = {
showSaveDialog: vi.fn()
}

const mockClipboard = {
writeImage: vi.fn(),
}

const mockNativeImage = {
createFromPath: vi.fn(),
createFromDataURL: vi.fn(),
createFromBuffer: vi.fn(),
}

const mockFs = {
existsSync: vi.fn(),
mkdirSync: vi.fn(),
Expand All @@ -36,7 +46,9 @@ const mockIpcMain = {
// Mock modules
vi.mock('electron', () => ({
ipcMain: mockIpcMain,
dialog: mockDialog
dialog: mockDialog,
clipboard: mockClipboard,
nativeImage: mockNativeImage,
}))

vi.mock('path', () => ({
Expand Down Expand Up @@ -66,6 +78,7 @@ vi.mock('../../../../shared/ipc/channels.js', () => ({
FILE: {
GET_IMAGE_PATH: 'file:getImagePath',
DOWNLOAD_MARKDOWN: 'file:downloadMarkdown',
COPY_IMAGE_TO_CLIPBOARD: 'file:copyImageToClipboard',
SELECT_DIALOG: 'file:selectDialog',
UPLOAD: 'file:upload',
UPLOAD_FILE_CONTENT: 'file:uploadFileContent'
Expand All @@ -87,11 +100,112 @@ describe('File Handler', () => {
mockFileLogic.getUploadDir.mockReturnValue('/uploads')
mockFs.statSync.mockReturnValue({ size: 1024 })
mockFs.existsSync.mockReturnValue(true)
const fakeImage = { isEmpty: vi.fn(() => false) }
mockNativeImage.createFromPath.mockReturnValue(fakeImage)
mockNativeImage.createFromDataURL.mockReturnValue(fakeImage)
mockNativeImage.createFromBuffer.mockReturnValue(fakeImage)

const { registerFileHandlers } = await import('../file.handler.js')
registerFileHandlers()
})

describe('file:copyImageToClipboard', () => {

This comment was marked as outdated.

it('should copy image from local path successfully', async () => {
const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, '/tmp/page.png')

expect(result).toEqual({
success: true,
data: { copied: true },
})
expect(mockNativeImage.createFromPath).toHaveBeenCalledWith('/tmp/page.png')
expect(mockClipboard.writeImage).toHaveBeenCalledTimes(1)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] Clipboard tests do not verify the image object passed to writeImage

New success-path tests only check that writeImage was invoked, not that it received the created native image. A handler bug could call writeImage with the wrong object (or undefined) while these tests still pass.

Suggestion: Strengthen assertions with expect(mockClipboard.writeImage).toHaveBeenCalledWith(fakeImage) (or the specific instance for each path/dataURL case). Keep the count assertion as secondary.

Risk: Reduced test effectiveness; regressions in argument wiring may slip through CI.

Confidence: 0.94

[From SubAgent: general]

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] Potential flakiness from missing mock reset expectations around clipboard/nativeImage call counts

Call-count assertions on shared mocks are vulnerable if cleanup/reset policy changes or if setup invokes these mocks unexpectedly.

Suggestion: Ensure beforeEach(() => vi.clearAllMocks()) (or resetAllMocks) is explicitly present in this suite and keep assertions robust with precondition checks.

Risk: Inter-test state leakage can produce non-deterministic failures and brittle CI behavior.

Confidence: 0.85

[From SubAgent: testing]

})

it('should copy image from data URL successfully', async () => {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM] Missing assertion that only one nativeImage factory is used per source type

Tests validate successful path selection but not exclusivity, so a bug invoking multiple decoding branches could pass unnoticed.

Suggestion: For each input type test, add negative assertions such as expect(mockNativeImage.createFromPath).not.toHaveBeenCalled() (for data URL), and similarly for other factories.

Risk: Regression in branch routing may cause incorrect parsing, extra work, or surprising side effects without failing tests.

Confidence: 0.93

[From SubAgent: testing]

const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, 'data:image/png;base64,abcd')

expect(result.success).toBe(true)
expect(mockNativeImage.createFromDataURL).toHaveBeenCalledWith('data:image/png;base64,abcd')
expect(mockClipboard.writeImage).toHaveBeenCalledTimes(1)
})

it('should copy image from file URL successfully', async () => {
const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, 'file:///tmp/page.png')

expect(result.success).toBe(true)
expect(mockNativeImage.createFromPath).toHaveBeenCalledWith(expect.stringContaining('page.png'))
expect(mockClipboard.writeImage).toHaveBeenCalledTimes(1)
})

it('should return error when image source is missing', async () => {
const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, '')

expect(result).toEqual({
success: false,
error: 'Image source is required',
})
})

it('should reject remote image URLs', async () => {
const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, 'https://cdn.example.com/page.png')

expect(result).toEqual({
success: false,
error: 'Remote image URLs are not allowed',
})
expect(mockClipboard.writeImage).not.toHaveBeenCalled()
})

it('should return error when image is empty', async () => {
mockNativeImage.createFromPath.mockReturnValueOnce({
isEmpty: vi.fn(() => true),
})

const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, '/tmp/empty.png')

expect(result).toEqual({
success: false,
error: 'Image data is empty or invalid',
})
expect(mockClipboard.writeImage).not.toHaveBeenCalled()
})

it('should return error when nativeImage creation throws', async () => {
mockNativeImage.createFromPath.mockImplementationOnce(() => {
throw new Error('createFromPath failed')
})

const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, '/tmp/bad.png')

expect(result).toEqual({
success: false,
error: 'createFromPath failed',
})
expect(mockClipboard.writeImage).not.toHaveBeenCalled()
})

it('should return error when clipboard write throws', async () => {
mockClipboard.writeImage.mockImplementationOnce(() => {
throw new Error('clipboard failed')
})

const handler = handlers.get('file:copyImageToClipboard')
const result = await handler!({}, '/tmp/page.png')

expect(result).toEqual({
success: false,
error: 'clipboard failed',
})
})
})

describe('file:getImagePath', () => {
it('should return image path and exists status', async () => {
mockFs.existsSync.mockReturnValue(true)
Expand Down
Loading