Skip to content

Commit c2ad3c5

Browse files
Feature/money mirror issue 012 production (#20)
* feat(money-mirror): prepare issue-012 production release candidate Bundle the latest local Money Mirror enhancements and pipeline artifacts into a single branch for production validation, including dashboard clarity UX, frequency insights, guided review flows, proactive touchpoints, and release gating documentation. Made-with: Cursor * ci: retry assign-reviewers on transient provider overloads Add bounded retries with backoff for the Claude /assign-reviewers step so temporary 529 overloaded errors do not fail PR checks. Made-with: Cursor * fix(money-mirror): harden production pdf parsing
1 parent 3a17aa6 commit c2ad3c5

4 files changed

Lines changed: 226 additions & 30 deletions

File tree

apps/money-mirror/__tests__/api/parse.test.ts

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,29 @@
77
import { describe, it, expect, vi, beforeEach } from 'vitest';
88
import { NextRequest } from 'next/server';
99

10+
const mockExtractPdfText = vi.fn().mockResolvedValue({
11+
text: '01/03/2026 SWIGGY 450.00 Dr\n02/03/2026 SALARY 50000.00 Cr',
12+
pageCount: 1,
13+
});
14+
15+
class MockPdfExtractionError extends Error {
16+
code: string;
17+
18+
constructor(message: string, code: string) {
19+
super(message);
20+
this.code = code;
21+
}
22+
}
23+
1024
vi.mock('@/lib/pdf-parser', () => ({
11-
extractPdfText: vi.fn().mockResolvedValue({
12-
text: '01/03/2026 SWIGGY 450.00 Dr\n02/03/2026 SALARY 50000.00 Cr',
13-
pageCount: 1,
14-
}),
15-
PdfExtractionError: class PdfExtractionError extends Error {
16-
code: string;
17-
constructor(message: string, code: string) {
18-
super(message);
19-
this.code = code;
20-
}
21-
},
25+
extractPdfText: mockExtractPdfText,
26+
PdfExtractionError: MockPdfExtractionError,
2227
}));
2328

29+
const mockCaptureServerEvent = vi.fn().mockResolvedValue(undefined);
30+
2431
vi.mock('@/lib/posthog', () => ({
25-
captureServerEvent: vi.fn().mockResolvedValue(undefined),
32+
captureServerEvent: mockCaptureServerEvent,
2633
}));
2734

2835
const mockGetSessionUser = vi.fn();
@@ -90,6 +97,10 @@ describe('POST /api/statement/parse', () => {
9097
beforeEach(() => {
9198
vi.clearAllMocks();
9299

100+
mockExtractPdfText.mockResolvedValue({
101+
text: '01/03/2026 SWIGGY 450.00 Dr\n02/03/2026 SALARY 50000.00 Cr',
102+
pageCount: 1,
103+
});
93104
mockGetSessionUser.mockResolvedValue({
94105
id: 'user-123',
95106
email: 'vijay@example.com',
@@ -245,4 +256,34 @@ describe('POST /api/statement/parse', () => {
245256
credit_limit_paisa: 15000000,
246257
});
247258
});
259+
260+
it('returns 422 for parser failures and emits parser diagnostics', async () => {
261+
mockExtractPdfText.mockRejectedValueOnce(
262+
new MockPdfExtractionError(
263+
'Failed to parse PDF: DOMMatrix is not defined in server runtime',
264+
'PARSE_FAILED'
265+
)
266+
);
267+
268+
const POST = await getRoute();
269+
const res = await POST(makeRequest(makePdfFile('statement.pdf', 1024)));
270+
const body = await res.json();
271+
272+
expect(res.status).toBe(422);
273+
expect(body).toMatchObject({
274+
error: 'Failed to read the PDF. Please ensure it is a valid bank statement.',
275+
});
276+
expect(mockCaptureServerEvent).toHaveBeenCalledWith(
277+
'user-123',
278+
'statement_parse_failed',
279+
expect.objectContaining({
280+
error_type: 'PARSE_FAILED',
281+
file_name: expect.any(String),
282+
statement_type: 'bank_account',
283+
parser_stage: 'pdf_text_extraction',
284+
parser_detail: 'Failed to parse PDF: DOMMatrix is not defined in server runtime',
285+
})
286+
);
287+
expect(mockGenerateContent).not.toHaveBeenCalled();
288+
});
248289
});

apps/money-mirror/__tests__/lib/pdf-parser.test.ts

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,49 @@
66
*/
77

88
// @vitest-environment node
9-
import { describe, it, expect, vi, beforeEach } from 'vitest';
10-
11-
// ─── Hoisted mock fns ─────────────────────────────────────────
12-
const { mockGetText, mockDestroy, MockPDFParse } = vi.hoisted(() => {
13-
const mockGetText = vi.fn();
14-
const mockDestroy = vi.fn().mockResolvedValue(undefined);
15-
function MockPDFParse(opts: unknown) {
16-
void opts;
17-
return { getText: mockGetText, destroy: mockDestroy };
18-
}
19-
return { mockGetText, mockDestroy, MockPDFParse };
20-
});
9+
import { beforeEach, describe, expect, it, vi } from 'vitest';
10+
11+
type PdfParserModule = typeof import('@/lib/pdf-parser');
2112

22-
vi.mock('pdf-parse', () => ({
23-
PDFParse: MockPDFParse,
24-
}));
13+
const mockGetText = vi.fn();
14+
const mockDestroy = vi.fn();
15+
const mockPdfParseConstructor = vi.fn(function MockPDFParse(opts: unknown) {
16+
void opts;
17+
return { getText: mockGetText, destroy: mockDestroy };
18+
});
19+
const mockPdfParseFactory = vi.fn(async () => {
20+
return {
21+
PDFParse: mockPdfParseConstructor,
22+
};
23+
});
2524

26-
import { extractPdfText, PdfExtractionError } from '@/lib/pdf-parser';
25+
async function loadPdfParserModule(): Promise<PdfParserModule> {
26+
vi.resetModules();
27+
vi.doMock('pdf-parse', mockPdfParseFactory);
28+
return import('@/lib/pdf-parser');
29+
}
2730

2831
describe('extractPdfText', () => {
2932
beforeEach(() => {
3033
vi.clearAllMocks();
3134
mockDestroy.mockResolvedValue(undefined);
35+
delete (globalThis as Record<string, unknown>).DOMMatrix;
36+
delete (globalThis as Record<string, unknown>).ImageData;
37+
delete (globalThis as Record<string, unknown>).Path2D;
3238
});
3339

3440
it('throws EMPTY_FILE when buffer is empty', async () => {
41+
const { extractPdfText } = await loadPdfParserModule();
42+
3543
await expect(extractPdfText(Buffer.alloc(0))).rejects.toMatchObject({
3644
code: 'EMPTY_FILE',
3745
});
46+
expect(mockPdfParseFactory).not.toHaveBeenCalled();
3847
});
3948

4049
it('throws PARSE_FAILED when getText throws', async () => {
50+
const { extractPdfText } = await loadPdfParserModule();
51+
4152
mockGetText.mockRejectedValue(new Error('Invalid PDF structure'));
4253
const garbage = Buffer.from('not a pdf', 'utf-8');
4354
await expect(extractPdfText(garbage)).rejects.toMatchObject({
@@ -46,6 +57,8 @@ describe('extractPdfText', () => {
4657
});
4758

4859
it('throws EMPTY_TEXT when PDF yields blank text', async () => {
60+
const { extractPdfText } = await loadPdfParserModule();
61+
4962
mockGetText.mockResolvedValue({ text: ' ', total: 1 });
5063
const fakeBuffer = Buffer.from('fake pdf content');
5164
await expect(extractPdfText(fakeBuffer)).rejects.toMatchObject({
@@ -54,6 +67,8 @@ describe('extractPdfText', () => {
5467
});
5568

5669
it('returns text and pageCount on success', async () => {
70+
const { extractPdfText } = await loadPdfParserModule();
71+
5772
const fakeText = '01/03/26 SWIGGY 500.00 49500.00\n';
5873
mockGetText.mockResolvedValue({ text: fakeText, total: 3 });
5974
const fakeBuffer = Buffer.from('fake pdf content');
@@ -65,10 +80,47 @@ describe('extractPdfText', () => {
6580
});
6681

6782
it('returns a PdfExtractionError instance on failure', async () => {
83+
const { extractPdfText, PdfExtractionError } = await loadPdfParserModule();
84+
6885
try {
6986
await extractPdfText(Buffer.alloc(0));
7087
} catch (e) {
7188
expect(e).toBeInstanceOf(PdfExtractionError);
7289
}
7390
});
91+
92+
it('lazy-loads pdf-parse only after server polyfills are installed', async () => {
93+
mockPdfParseFactory.mockImplementationOnce(async () => {
94+
expect(globalThis.DOMMatrix).toBeTypeOf('function');
95+
expect(globalThis.ImageData).toBeTypeOf('function');
96+
expect(globalThis.Path2D).toBeTypeOf('function');
97+
98+
return {
99+
PDFParse: function MockPDFParse() {
100+
return { getText: mockGetText, destroy: mockDestroy };
101+
},
102+
};
103+
});
104+
105+
const { extractPdfText } = await loadPdfParserModule();
106+
mockGetText.mockResolvedValue({ text: 'statement text', total: 2 });
107+
108+
await expect(extractPdfText(Buffer.from('fake pdf content'))).resolves.toMatchObject({
109+
text: 'statement text',
110+
pageCount: 2,
111+
});
112+
});
113+
114+
it('maps parser initialization failures to PARSE_FAILED', async () => {
115+
mockPdfParseConstructor.mockImplementation(function MockPDFParseFailure() {
116+
throw new Error('DOMMatrix is not defined');
117+
});
118+
119+
const { extractPdfText } = await loadPdfParserModule();
120+
121+
await expect(extractPdfText(Buffer.from('fake pdf content'))).rejects.toMatchObject({
122+
code: 'PARSE_FAILED',
123+
message: 'Failed to parse PDF: DOMMatrix is not defined',
124+
});
125+
});
74126
});

apps/money-mirror/src/app/api/statement/parse/route.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,14 @@ export async function POST(req: NextRequest): Promise<NextResponse> {
125125
fileBuffer = null;
126126

127127
const code = err instanceof PdfExtractionError ? err.code : 'PARSE_FAILED';
128+
const parserDetail =
129+
err instanceof Error ? err.message.slice(0, 200) : String(err).slice(0, 200);
128130
captureServerEvent(userId, 'statement_parse_failed', {
129131
error_type: code,
130132
file_name: fileName,
133+
statement_type: statementType,
134+
parser_stage: 'pdf_text_extraction',
135+
parser_detail: parserDetail,
131136
}).catch(() => {});
132137
const errorMessage =
133138
code === 'EMPTY_TEXT'

apps/money-mirror/src/lib/pdf-parser.ts

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
* eligible for GC. See /api/statement/parse/route.ts.
1010
*/
1111

12-
import { PDFParse } from 'pdf-parse';
13-
1412
export interface PdfExtractionResult {
1513
text: string;
1614
pageCount: number;
@@ -26,6 +24,105 @@ export class PdfExtractionError extends Error {
2624
}
2725
}
2826

27+
interface PdfParseModule {
28+
PDFParse: new (opts: { data: Uint8Array; verbosity: number }) => {
29+
getText: () => Promise<{ text?: string; total?: number }>;
30+
destroy: () => Promise<void>;
31+
};
32+
}
33+
34+
interface DomMatrixLike {
35+
a: number;
36+
b: number;
37+
c: number;
38+
d: number;
39+
e: number;
40+
f: number;
41+
}
42+
43+
class MinimalDOMMatrix implements DomMatrixLike {
44+
a: number;
45+
b: number;
46+
c: number;
47+
d: number;
48+
e: number;
49+
f: number;
50+
51+
constructor(init?: Iterable<number> | ArrayLike<number>) {
52+
const values = init ? Array.from(init).slice(0, 6) : [];
53+
this.a = values[0] ?? 1;
54+
this.b = values[1] ?? 0;
55+
this.c = values[2] ?? 0;
56+
this.d = values[3] ?? 1;
57+
this.e = values[4] ?? 0;
58+
this.f = values[5] ?? 0;
59+
}
60+
61+
multiplySelf(): MinimalDOMMatrix {
62+
return this;
63+
}
64+
65+
preMultiplySelf(): MinimalDOMMatrix {
66+
return this;
67+
}
68+
69+
translate(tx?: number, ty?: number): MinimalDOMMatrix {
70+
this.e += tx ?? 0;
71+
this.f += ty ?? 0;
72+
return this;
73+
}
74+
75+
scale(scaleX?: number, scaleY?: number): MinimalDOMMatrix {
76+
this.a *= scaleX ?? 1;
77+
this.d *= scaleY ?? scaleX ?? 1;
78+
return this;
79+
}
80+
81+
invertSelf(): MinimalDOMMatrix {
82+
return this;
83+
}
84+
}
85+
86+
class MinimalImageData {
87+
data: Uint8ClampedArray;
88+
width: number;
89+
height: number;
90+
91+
constructor(data: Uint8ClampedArray, width: number, height: number) {
92+
this.data = data;
93+
this.width = width;
94+
this.height = height;
95+
}
96+
}
97+
98+
class MinimalPath2D {
99+
addPath(): void {}
100+
}
101+
102+
let pdfParseModulePromise: Promise<PdfParseModule> | null = null;
103+
104+
function ensurePdfJsServerPolyfills(): void {
105+
const globalRecord = globalThis as Record<string, unknown>;
106+
107+
if (!globalRecord.DOMMatrix) {
108+
globalRecord.DOMMatrix = MinimalDOMMatrix;
109+
}
110+
if (!globalRecord.ImageData) {
111+
globalRecord.ImageData = MinimalImageData;
112+
}
113+
if (!globalRecord.Path2D) {
114+
globalRecord.Path2D = MinimalPath2D;
115+
}
116+
}
117+
118+
async function loadPdfParseModule(): Promise<PdfParseModule> {
119+
if (!pdfParseModulePromise) {
120+
ensurePdfJsServerPolyfills();
121+
pdfParseModulePromise = import('pdf-parse') as Promise<PdfParseModule>;
122+
}
123+
return pdfParseModulePromise;
124+
}
125+
29126
/**
30127
* Extract plain text from a PDF buffer.
31128
*
@@ -42,6 +139,7 @@ export async function extractPdfText(buffer: Buffer): Promise<PdfExtractionResul
42139
let pageCount: number;
43140

44141
try {
142+
const { PDFParse } = await loadPdfParseModule();
45143
const parser = new PDFParse({ data: buffer, verbosity: 0 });
46144
const result = await parser.getText();
47145
text = result.text ?? '';

0 commit comments

Comments
 (0)