MarkPDFdown · jorben · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · github-actions
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "markpdfdown",
-  "version": "0.1.5-tone",
+  "version": "0.1.0-local",
   "description": "A high-quality PDF to Markdown tool based on large language model visual recognition.",
   "author": "MarkPDFdown",
   "main": "dist/main/index.js",

diff --git a/src/core/infrastructure/services/CloudService.ts b/src/core/infrastructure/services/CloudService.ts
@@ -37,6 +37,109 @@ class CloudService {
 
   private constructor() {}
 
+  private extractDownloadFileName(contentDisposition: string, fallback: string): string {
+    const rfc5987Name = this.parseRFC5987Filename(contentDisposition);
+    if (rfc5987Name) {
+      return this.sanitizeDownloadFileName(rfc5987Name, fallback);
+    }
+
+    const plainName = this.parsePlainFilename(contentDisposition);
+    if (!plainName) {
+      return this.sanitizeDownloadFileName(fallback, fallback);
+    }
+
+    const repairedName = this.tryRepairUtf8Mojibake(plainName);
+    return this.sanitizeDownloadFileName(repairedName || plainName, fallback);
+  }
+
+  private parseRFC5987Filename(contentDisposition: string): string | null {
+    const match = contentDisposition.match(/filename\*\s*=\s*([^;]+)/i);
+    if (!match) return null;
+
+    const rawValue = match[1]?.trim();
+    if (!rawValue) return null;
+
+    const unquoted = rawValue.replace(/^"(.*)"$/, '$1');
+    const parts = unquoted.match(/^([^']*)'[^']*'(.*)$/);
+    if (!parts) return null;
+
+    const charset = (parts[1] || 'utf-8').trim().toLowerCase();
+    const encodedValue = parts[2] || '';
+
+    try {
+      if (charset === 'utf-8' || charset === 'utf8') {
+        return decodeURIComponent(encodedValue);
+      }
+
+      const bytes = this.percentDecodeToBytes(encodedValue);
+      if (charset === 'iso-8859-1' || charset === 'latin1') {
+        return Buffer.from(bytes).toString('latin1');
+      }
+      return Buffer.from(bytes).toString('utf8');
+    } catch {
+      return null;
+    }
+  }
+
+  private parsePlainFilename(contentDisposition: string): string | null {
+    const match = contentDisposition.match(/filename\s*=\s*("(?:\\.|[^"])*"|[^;]+)/i);
+    if (!match) return null;
+
+    let value = match[1]?.trim();
+    if (!value) return null;
+
+    if (value.startsWith('"') && value.endsWith('"')) {
+      value = value.slice(1, -1).replace(/\\"/g, '"');
+    }
+
+    return value;
+  }
+
+  private percentDecodeToBytes(input: string): number[] {
+    const bytes: number[] = [];
+    for (let i = 0; i < input.length; i++) {
+      const ch = input[i];
+      if (ch === '%' && i + 2 < input.length) {
+        const hex = input.slice(i + 1, i + 3);
+        const parsed = Number.parseInt(hex, 16);
+        if (!Number.isNaN(parsed)) {
+          bytes.push(parsed);
+          i += 2;
+          continue;
+        }
+      }
+      bytes.push(input.charCodeAt(i));
+    }
+    return bytes;
+  }
+
+  private tryRepairUtf8Mojibake(input: string): string | null {
+    const hasCjk = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(input);
+    if (hasCjk) return null;
+
+    const latinSupplementCount = Array.from(input).filter((ch) => {
+      const code = ch.charCodeAt(0);
+      return code >= 0x00c0 && code <= 0x00ff;
+    }).length;
+    if (latinSupplementCount < 2) return null;
+
+    const repaired = Buffer.from(input, 'latin1').toString('utf8');
+    if (!repaired) return null;
+
+    const repairedHasCjk = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(repaired);
+    const roundTrip = Buffer.from(repaired, 'utf8').toString('latin1') === input;
+    if (repairedHasCjk && roundTrip) {
+      return repaired;
+    }
+    return null;
+  }
+
+  private sanitizeDownloadFileName(input: string, fallback: string): string {
+    // Sanitize: extract basename and strip control/reserved characters
+    // eslint-disable-next-line no-control-regex
+    return path.basename(input).replace(/[\u0000-\u001f<>:"|?*]/g, '_') || fallback;
+  }
+
   private normalizeCheckoutStatus(data: any): PaymentCheckoutStatusApiResponse | null {
     if (!data || typeof data !== 'object') {
       return null;
@@ -454,11 +557,8 @@ class CloudService {
       }
 
       const contentDisposition = res.headers.get('Content-Disposition') || '';
-      const match = contentDisposition.match(/filename="?([^";\n]+)"?/);
-      const rawName = match ? match[1] : `task-${id}.pdf`;
-      // Sanitize: extract basename and strip control/reserved characters
-      // eslint-disable-next-line no-control-regex
-      const fileName = path.basename(rawName).replace(/[\u0000-\u001f<>:"|?*]/g, '_') || `task-${id}.pdf`;
+      const fallbackName = `task-${id}.pdf`;
+      const fileName = this.extractDownloadFileName(contentDisposition, fallbackName);
 
       const buffer = await res.arrayBuffer();
       return { success: true, data: { buffer, fileName } };

diff --git a/src/core/infrastructure/services/__tests__/CloudService.test.ts b/src/core/infrastructure/services/__tests__/CloudService.test.ts
@@ -283,6 +283,65 @@ describe('CloudService', () => {
     expect(result.data?.fileName).toBe('task-task-xyz.pdf')
   })
 
+  it('downloadPdf decodes RFC5987 filename* for non-english names', async () => {
+    const cloudService = (await import('../CloudService.js')).default
+    const response = makeJsonResponse(200, {})
+    response.headers.get.mockReturnValue(
+      "attachment; filename*=UTF-8''%E4%B8%AD%E6%96%87%E6%8A%80%E6%9C%AF%E6%89%8B%E5%86%8C.pdf",
+    )
+    response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
+    mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)
+
+    const result = await cloudService.downloadPdf('task-cn')
+    expect(result.data?.fileName).toBe('中文技术手册.pdf')
+  })
+
+  it('downloadPdf decodes RFC5987 latin1 filename* values', async () => {
+    const cloudService = (await import('../CloudService.js')).default
+    const response = makeJsonResponse(200, {})
+    response.headers.get.mockReturnValue("attachment; filename*=ISO-8859-1''caf%E9.pdf")
+    response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
+    mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)
+
+    const result = await cloudService.downloadPdf('task-latin1')
+    expect(result.data?.fileName).toBe('café.pdf')
+  })
+
+  it('downloadPdf falls back to task file name on malformed RFC5987 value', async () => {
+    const cloudService = (await import('../CloudService.js')).default
+    const response = makeJsonResponse(200, {})
+    response.headers.get.mockReturnValue("attachment; filename*=UTF-8''bad%ZZ.pdf")
+    response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
+    mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)
+
+    const result = await cloudService.downloadPdf('task-malformed')
+    expect(result.data?.fileName).toBe('task-task-malformed.pdf')
+  })
+
+  it('downloadPdf falls back to utf8 decode for unknown RFC5987 charset', async () => {
+    const cloudService = (await import('../CloudService.js')).default
+    const response = makeJsonResponse(200, {})
+    response.headers.get.mockReturnValue("attachment; filename*=X-UNKNOWN''%E4%B8%AD%E6%96%87.pdf")
+    response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
+    mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)
+
+    const result = await cloudService.downloadPdf('task-unknown-charset')
+    expect(result.data?.fileName).toBe('中文.pdf')
+  })
+
+  it('downloadPdf repairs common UTF-8 mojibake in filename', async () => {
+    const cloudService = (await import('../CloudService.js')).default
+    const response = makeJsonResponse(200, {})
+    const original = '中文手册2.0.pdf'
+    const mojibake = Buffer.from(original, 'utf8').toString('latin1')
+    response.headers.get.mockReturnValue(`attachment; filename="${mojibake}"`)
+    response.arrayBuffer.mockResolvedValue(new Uint8Array([1]).buffer)
+    mockAuthManager.fetchWithAuth.mockResolvedValueOnce(response)
+
+    const result = await cloudService.downloadPdf('task-mojibake')
+    expect(result.data?.fileName).toBe(original)
+  })
+
   it('downloadPdf/getPageImage return error on non-OK response', async () => {
     const cloudService = (await import('../CloudService.js')).default
     mockAuthManager.fetchWithAuth

diff --git a/src/main/ipc/__tests__/handlers.test.ts b/src/main/ipc/__tests__/handlers.test.ts
@@ -152,6 +152,7 @@ vi.mock('../../../shared/ipc/channels.js', () => ({
     FILE: {
       GET_IMAGE_PATH: 'file:getImagePath',
       DOWNLOAD_MARKDOWN: 'file:downloadMarkdown',
+      COPY_IMAGE_TO_CLIPBOARD: 'file:copyImageToClipboard',
       SELECT_DIALOG: 'file:selectDialog',
       UPLOAD: 'file:upload',
       UPLOAD_FILE_CONTENT: 'file:uploadFileContent',

diff --git a/src/main/ipc/handlers/__tests__/file.handler.test.ts b/src/main/ipc/handlers/__tests__/file.handler.test.ts
@@ -14,6 +14,16 @@ const mockDialog = {
   showSaveDialog: vi.fn()
 }
 
+const mockClipboard = {
+  writeImage: vi.fn(),
+}
+
+const mockNativeImage = {
+  createFromPath: vi.fn(),
+  createFromDataURL: vi.fn(),
+  createFromBuffer: vi.fn(),
+}
+
 const mockFs = {
   existsSync: vi.fn(),
   mkdirSync: vi.fn(),
@@ -36,7 +46,9 @@ const mockIpcMain = {
 // Mock modules
 vi.mock('electron', () => ({
   ipcMain: mockIpcMain,
-  dialog: mockDialog
+  dialog: mockDialog,
+  clipboard: mockClipboard,
+  nativeImage: mockNativeImage,
 }))
 
 vi.mock('path', () => ({
@@ -66,6 +78,7 @@ vi.mock('../../../../shared/ipc/channels.js', () => ({
     FILE: {
       GET_IMAGE_PATH: 'file:getImagePath',
       DOWNLOAD_MARKDOWN: 'file:downloadMarkdown',
+      COPY_IMAGE_TO_CLIPBOARD: 'file:copyImageToClipboard',
       SELECT_DIALOG: 'file:selectDialog',
       UPLOAD: 'file:upload',
       UPLOAD_FILE_CONTENT: 'file:uploadFileContent'
@@ -87,11 +100,112 @@ describe('File Handler', () => {
     mockFileLogic.getUploadDir.mockReturnValue('/uploads')
     mockFs.statSync.mockReturnValue({ size: 1024 })
     mockFs.existsSync.mockReturnValue(true)
+    const fakeImage = { isEmpty: vi.fn(() => false) }
+    mockNativeImage.createFromPath.mockReturnValue(fakeImage)
+    mockNativeImage.createFromDataURL.mockReturnValue(fakeImage)
+    mockNativeImage.createFromBuffer.mockReturnValue(fakeImage)
 
     const { registerFileHandlers } = await import('../file.handler.js')
     registerFileHandlers()
   })
 
+  describe('file:copyImageToClipboard', () => {
+    it('should copy image from local path successfully', async () => {
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, '/tmp/page.png')
+
+      expect(result).toEqual({
+        success: true,
+        data: { copied: true },
+      })
+      expect(mockNativeImage.createFromPath).toHaveBeenCalledWith('/tmp/page.png')
+      expect(mockClipboard.writeImage).toHaveBeenCalledTimes(1)
+    })
+
+    it('should copy image from data URL successfully', async () => {
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, 'data:image/png;base64,abcd')
+
+      expect(result.success).toBe(true)
+      expect(mockNativeImage.createFromDataURL).toHaveBeenCalledWith('data:image/png;base64,abcd')
+      expect(mockClipboard.writeImage).toHaveBeenCalledTimes(1)
+    })
+
+    it('should copy image from file URL successfully', async () => {
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, 'file:///tmp/page.png')
+
+      expect(result.success).toBe(true)
+      expect(mockNativeImage.createFromPath).toHaveBeenCalledWith(expect.stringContaining('page.png'))
+      expect(mockClipboard.writeImage).toHaveBeenCalledTimes(1)
+    })
+
+    it('should return error when image source is missing', async () => {
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, '')
+
+      expect(result).toEqual({
+        success: false,
+        error: 'Image source is required',
+      })
+    })
+
+    it('should reject remote image URLs', async () => {
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, 'https://cdn.example.com/page.png')
+
+      expect(result).toEqual({
+        success: false,
+        error: 'Remote image URLs are not allowed',
+      })
+      expect(mockClipboard.writeImage).not.toHaveBeenCalled()
+    })
+
+    it('should return error when image is empty', async () => {
+      mockNativeImage.createFromPath.mockReturnValueOnce({
+        isEmpty: vi.fn(() => true),
+      })
+
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, '/tmp/empty.png')
+
+      expect(result).toEqual({
+        success: false,
+        error: 'Image data is empty or invalid',
+      })
+      expect(mockClipboard.writeImage).not.toHaveBeenCalled()
+    })
+
+    it('should return error when nativeImage creation throws', async () => {
+      mockNativeImage.createFromPath.mockImplementationOnce(() => {
+        throw new Error('createFromPath failed')
+      })
+
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, '/tmp/bad.png')
+
+      expect(result).toEqual({
+        success: false,
+        error: 'createFromPath failed',
+      })
+      expect(mockClipboard.writeImage).not.toHaveBeenCalled()
+    })
+
+    it('should return error when clipboard write throws', async () => {
+      mockClipboard.writeImage.mockImplementationOnce(() => {
+        throw new Error('clipboard failed')
+      })
+
+      const handler = handlers.get('file:copyImageToClipboard')
+      const result = await handler!({}, '/tmp/page.png')
+
+      expect(result).toEqual({
+        success: false,
+        error: 'clipboard failed',
+      })
+    })
+  })
+
   describe('file:getImagePath', () => {
     it('should return image path and exists status', async () => {
       mockFs.existsSync.mockReturnValue(true)