From 9d49f71d8db5c522520e2200ab7f42c133b56653 Mon Sep 17 00:00:00 2001 From: Steven Gresh Date: Mon, 4 May 2026 21:16:14 -0700 Subject: [PATCH 1/2] fix(destination-google-sheets): sanitize double-quotes in cell values to prevent pasteData column misalignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Google Sheets pasteData API applies RFC 4180-like quoting semantics even when a custom delimiter (U+001F) is used. An unescaped `"` mid-value causes the server-side parser to enter quoted-field mode and absorb subsequent \x1f delimiters as content rather than column separators, silently dropping the columns that follow. A customer whose description field contained embedded JSON (e.g. `{"custom_fields":null,"default_payment_method":null,...}`) triggered this: the 6 columns after the description were missing from the written row. Fix: add `"` to PASTE_SANITIZE_RE; replace with `'` to preserve readability. Adds two regression tests — a rowsToTsv unit test asserting no raw double-quotes in the output, and a full-destination integration test verifying row column count is preserved. Co-Authored-By: Claude Sonnet 4.6 Committed-By-Agent: claude --- .../src/index.test.ts | 68 ++++++++++++++++++- .../destination-google-sheets/src/writer.ts | 11 +-- 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/packages/destination-google-sheets/src/index.test.ts b/packages/destination-google-sheets/src/index.test.ts index 060b398ab..3b8d54100 100644 --- a/packages/destination-google-sheets/src/index.test.ts +++ b/packages/destination-google-sheets/src/index.test.ts @@ -11,8 +11,10 @@ import { import { applyBatch, MAX_CELLS_PER_SPREADSHEET, + PASTE_COL_DELIMITER, readEnumValidations, readSheet, + rowsToTsv, type StreamBatchOps, } from './writer.js' import { createMemorySheets } from '../__tests__/memory-sheets.js' @@ -371,7 +373,9 @@ describe('destination-google-sheets', () => { const id = getSpreadsheetIds()[0] const rows = stripUpdatedAt(getData(id, 'types')!) - expect(rows[1]).toEqual(['hello', '42', 'true', '', '{"nested":true}']) + // Double-quotes are replaced with single-quotes to prevent the pasteData + // CSV parser from treating them as quoting characters (see sanitizeForPaste). + expect(rows[1]).toEqual(['hello', '42', 'true', '', "{'nested':true}"]) }) it('readSheet helper — reads back data through the fake client', async () => { @@ -2044,4 +2048,66 @@ describe('enum constraints on any column', () => { out.find((m) => m.type === 'connection_status' && m.connection_status.status === 'failed') ).toBeUndefined() }) + + describe('character escaping — pasteData CSV parser safety', () => { + // The Google Sheets pasteData API applies RFC 4180-like quoting semantics even + // when using a custom delimiter (U+001F). An unescaped `"` inside a cell value + // causes the server-side parser to enter quoted-field mode and absorb subsequent + // \x1f delimiters as content, dropping the columns that follow from the row. + // Regression: a customer whose description contained JSON with double-quoted keys + // (e.g. `{"custom_fields":null,...}`) caused the 6 columns after description to + // be missing entirely from the written row. + + const PROBLEMATIC_DESCRIPTION = + '> krpandayashutosh39@gmail.com03D22BF{"custom_fields":null,"default_payment_method":null,"default_shared_payment_token":null,"footer":null,"rendering_options":null}' + + it('rowsToTsv strips double-quotes so the pasteData parser cannot enter quoted-field mode', () => { + const row = ['before', PROBLEMATIC_DESCRIPTION, 'after1', 'after2', 'after3', 'after4', 'after5', 'after6'] + const tsv = rowsToTsv([row]) + + // No raw double-quotes in the output — they would trigger CSV quoting mode. + expect(tsv).not.toContain('"') + + // All 8 cells must still be delimited correctly. + const cells = tsv.split(PASTE_COL_DELIMITER) + expect(cells).toHaveLength(8) + }) + + it('description with double-quoted JSON preserves all columns in the written row', async () => { + const { sheets, getData, getSpreadsheetIds } = createMemorySheets() + const dest = createDestination(sheets) + + await collect( + dest.write( + { config: cfg(), catalog }, + toAsyncIter([ + record('customers', { + id: 'cus_test', + name: 'Test Customer', + email: 'test@example.com', + description: PROBLEMATIC_DESCRIPTION, + balance: 0, + currency: 'usd', + delinquent: false, + }), + ]) + ) + ) + + const id = getSpreadsheetIds()[0] + const rows = stripUpdatedAt(getData(id, 'customers')!) + expect(rows).toHaveLength(2) + const [headers, dataRow] = rows + + // Data row must have the same cell count as the header row — the description + // value must not have caused adjacent columns to be dropped. + expect(dataRow).toHaveLength(headers.length) + + const descIdx = (headers as string[]).indexOf('description') + expect(descIdx).toBeGreaterThanOrEqual(0) + // Double-quotes are replaced with single-quotes to preserve readability. + expect(String(dataRow[descIdx])).toContain("'custom_fields'") + expect(String(dataRow[descIdx])).not.toContain('"') + }) + }) }) diff --git a/packages/destination-google-sheets/src/writer.ts b/packages/destination-google-sheets/src/writer.ts index 55ed43c09..2e8e4c0d8 100644 --- a/packages/destination-google-sheets/src/writer.ts +++ b/packages/destination-google-sheets/src/writer.ts @@ -737,13 +737,16 @@ export interface StreamBatchOps { // `pasteData` column delimiter. Unit Separator (U+001F) — a control char // that won't naturally appear in Stripe data. Row separator is always `\n` -// (not configurable), so any `\n`, `\r`, or U+001F inside cells must be -// sanitized or the paste parser misaligns columns. +// (not configurable), so any `\n`, `\r`, U+001F, or `"` inside cells must be +// sanitized or the paste parser misaligns columns. The Sheets API applies +// RFC 4180-like quoting semantics even with a custom delimiter: an unescaped +// `"` mid-value causes the parser to enter quoted-field mode and absorb +// subsequent \x1f delimiters as content, dropping columns from the row. export const PASTE_COL_DELIMITER = '\x1f' -const PASTE_SANITIZE_RE = /[\n\r\x1f]/g +const PASTE_SANITIZE_RE = /[\n\r\x1f"]/g function sanitizeForPaste(value: string): string { - return value.replace(PASTE_SANITIZE_RE, ' ') + return value.replace(PASTE_SANITIZE_RE, (ch) => (ch === '"' ? "'" : ' ')) } export function rowsToTsv(rows: string[][]): string { From a57709ddc9207f78da3f23e11ff8c979369f86cc Mon Sep 17 00:00:00 2001 From: sgresh-stripe <157746132+sgresh-stripe@users.noreply.github.com> Date: Mon, 4 May 2026 21:21:05 -0700 Subject: [PATCH 2/2] Update index.test.ts --- packages/destination-google-sheets/src/index.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/destination-google-sheets/src/index.test.ts b/packages/destination-google-sheets/src/index.test.ts index 3b8d54100..f328660c4 100644 --- a/packages/destination-google-sheets/src/index.test.ts +++ b/packages/destination-google-sheets/src/index.test.ts @@ -2059,7 +2059,7 @@ describe('enum constraints on any column', () => { // be missing entirely from the written row. const PROBLEMATIC_DESCRIPTION = - '> krpandayashutosh39@gmail.com03D22BF{"custom_fields":null,"default_payment_method":null,"default_shared_payment_token":null,"footer":null,"rendering_options":null}' + '> test@gmail.com03D22BF{"custom_fields":null,"default_payment_method":null,"default_shared_payment_token":null,"footer":null,"rendering_options":null}' it('rowsToTsv strips double-quotes so the pasteData parser cannot enter quoted-field mode', () => { const row = ['before', PROBLEMATIC_DESCRIPTION, 'after1', 'after2', 'after3', 'after4', 'after5', 'after6']