From 4aef3ec3ea2d688e7c38847a9caa2f1217fb5fac Mon Sep 17 00:00:00 2001 From: jdalton Date: Tue, 5 May 2026 17:02:24 -0700 Subject: [PATCH] feat(scan): brotli-compress .socket.facts.json on upload (port of #1291) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port of barslev's PR #1291 from v1.x to main. depscan's api-v0 multipart boundary streams brotli decode based on the .br filename suffix, so the facts file uploads as ~10x smaller without changing the on-disk format coana writes. Adds packages/cli/src/utils/coana/compress-facts.mts with compressSocketFactsForUpload(scanPaths) — streams brotli a sibling .socket.facts.json.br next to each source file and returns swapped paths plus a cleanup() callback. Sibling-write keeps the multipart entry name inside cwd (depscan drops .. traversal entries). handleCreateNewScan calls it just before fetchCreateOrgFullScan and runs cleanup() in finally so the .br files are removed even on upload failure. Translation from v1.x to main: - @socketsecurity/registry/lib/fs -> @socketsecurity/lib/fs - fs.rm -> safeDelete (fleet-wide rule) - constants default-import -> named import (DOT_SOCKET_DOT_FACTS_JSON already lives in packages/cli/src/constants.mts) - v1.x added scan-id and reachability-error tests in the same file as the helper; main keeps utils/coana/extract-scan-id.mts separate and has no extractReachabilityErrors yet, so only the new helper's tests are added at test/unit/utils/coana/compress-facts.test.mts Skipping pre-commit test suite via DISABLE_PRECOMMIT_TEST=1 because test/unit/commands/analytics/output-analytics.test.mts has 3 date-dependent snapshot failures unrelated to this PR (snapshots encode the literal date Apr 18/19/20/21 and fail on any other day). Targeted vitest run on the touched files (32 tests in utils/coana/ + handle-create-new-scan) passes 32/32. --- .../commands/scan/handle-create-new-scan.mts | 57 +++++---- .../cli/src/utils/coana/compress-facts.mts | 90 +++++++++++++ .../unit/utils/coana/compress-facts.test.mts | 120 ++++++++++++++++++ 3 files changed, 245 insertions(+), 22 deletions(-) create mode 100644 packages/cli/src/utils/coana/compress-facts.mts create mode 100644 packages/cli/test/unit/utils/coana/compress-facts.test.mts diff --git a/packages/cli/src/commands/scan/handle-create-new-scan.mts b/packages/cli/src/commands/scan/handle-create-new-scan.mts index 51d7fa98b..7ca986024 100644 --- a/packages/cli/src/commands/scan/handle-create-new-scan.mts +++ b/packages/cli/src/commands/scan/handle-create-new-scan.mts @@ -31,6 +31,7 @@ import { runSocketBasics } from '../../utils/basics/spawn.mts' function excludeFactsJson(paths: string[]): string[] { return paths.filter(p => path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) } +import { compressSocketFactsForUpload } from '../../utils/coana/compress-facts.mts' import { findSocketYmlSync } from '../../utils/config.mts' import { getPackageFilesForScan } from '../../utils/fs/path-resolve.mts' import { readOrDefaultSocketJson } from '../../utils/socket/json.mts' @@ -290,28 +291,40 @@ export async function handleCreateNewScan({ } } - const fullScanCResult = await fetchCreateOrgFullScan( - scanPaths, - orgSlug, - { - commitHash, - commitMessage, - committers, - pullRequest, - repoName, - branchName, - scanType: reach.runReachabilityAnalysis - ? SCAN_TYPE_SOCKET_TIER1 - : SCAN_TYPE_SOCKET, - workspace, - }, - { - cwd, - defaultBranch, - pendingHead, - tmp, - }, - ) + // Brotli-compress any .socket.facts.json paths in scanPaths just before + // upload. depscan's api-v0 multipart boundary streams brotli decode based + // on the .br filename suffix. Coana keeps writing plain .socket.facts.json + // on disk, so the local read path (extractTier1ReachabilityScanId) stays + // correct. The cleanup() in the finally block removes the sibling .br + // files whether the upload succeeded or threw. + const compressed = await compressSocketFactsForUpload(scanPaths) + let fullScanCResult: Awaited> + try { + fullScanCResult = await fetchCreateOrgFullScan( + compressed.paths, + orgSlug, + { + commitHash, + commitMessage, + committers, + pullRequest, + repoName, + branchName, + scanType: reach.runReachabilityAnalysis + ? SCAN_TYPE_SOCKET_TIER1 + : SCAN_TYPE_SOCKET, + workspace, + }, + { + cwd, + defaultBranch, + pendingHead, + tmp, + }, + ) + } finally { + await compressed.cleanup() + } const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined diff --git a/packages/cli/src/utils/coana/compress-facts.mts b/packages/cli/src/utils/coana/compress-facts.mts new file mode 100644 index 000000000..0bced004b --- /dev/null +++ b/packages/cli/src/utils/coana/compress-facts.mts @@ -0,0 +1,90 @@ +/** + * Brotli compression for Coana facts files prior to upload. + * + * Key Functions: + * - compressSocketFactsForUpload: Brotli-compress any .socket.facts.json + * entries in scanPaths just before upload, returning swapped paths plus a + * cleanup callback. Coana keeps writing plain JSON; the on-the-wire form + * to depscan is brotli (api-v0 decodes at the multipart boundary). + * + * Integration: + * - Called from handleCreateNewScan immediately before fetchCreateOrgFullScan. + * - Sibling .br files live next to the source so the multipart entry name + * stays inside cwd (depscan strips .. traversal entries). + */ + +import { createReadStream, createWriteStream, existsSync } from 'node:fs' +import path from 'node:path' +import { pipeline } from 'node:stream/promises' +import { createBrotliCompress } from 'node:zlib' + +import { safeDelete } from '@socketsecurity/lib/fs' + +import { DOT_SOCKET_DOT_FACTS_JSON } from '../../constants.mts' + +export type CompressedScanPaths = { + cleanup: () => Promise + paths: string[] +} + +/** + * For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a + * sibling `.socket.facts.json.br` next to the original file and swap its + * path in. Other paths pass through unchanged. Missing files also pass + * through unchanged (the upload will fail downstream with the same error + * it would have). + * + * Streaming + worker-thread compression keeps the event loop responsive: + * default brotli quality (11) on a 60+MB facts file takes multiple seconds + * of CPU, which would otherwise freeze the spinner / signal handlers / + * any concurrent work. + * + * The `.br` lives next to the source rather than under the OS temp dir + * because depscan's multipart ingest (`addStreamEntry`) rejects entries + * whose names contain `..` traversal segments. The SDK computes the + * multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir + * temp path turns into `../../../var/folders/...` and gets dropped as + * `unmatchedFiles`. Sibling-write keeps the relative path inside cwd, and + * keeps the directory shape symmetric with the plain `.socket.facts.json` + * upload (depscan strips only the `.br` suffix at ingest, so + * `/.socket.facts.json.br` and `/.socket.facts.json` resolve to + * the same storage path). + * + * Concurrent scans against the same source directory are already racy on + * `.socket.facts.json` itself (coana writes to a single path), so the + * sibling `.br` doesn't introduce a new race. + * + * Caller MUST `await cleanup()` (typically in a `finally` block) once the + * upload completes — successful or not — to remove the sibling files. + */ +export async function compressSocketFactsForUpload( + scanPaths: string[], +): Promise { + const brPaths: string[] = [] + const paths = await Promise.all( + scanPaths.map(async p => { + if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { + return p + } + if (!existsSync(p)) { + return p + } + const brPath = `${p}.br` + await pipeline( + createReadStream(p), + createBrotliCompress(), + createWriteStream(brPath), + ) + brPaths.push(brPath) + return brPath + }), + ) + const cleanup = async () => { + const targets = brPaths.splice(0) + if (targets.length === 0) { + return + } + await safeDelete(targets, { force: true }) + } + return { __proto__: null, cleanup, paths } as CompressedScanPaths +} diff --git a/packages/cli/test/unit/utils/coana/compress-facts.test.mts b/packages/cli/test/unit/utils/coana/compress-facts.test.mts new file mode 100644 index 000000000..47984d8ae --- /dev/null +++ b/packages/cli/test/unit/utils/coana/compress-facts.test.mts @@ -0,0 +1,120 @@ +/** + * Unit tests for Coana facts-file brotli compression. + * + * Test Coverage: + * - compressSocketFactsForUpload: swaps .socket.facts.json paths for + * brotli-compressed .br temps, leaves other paths alone, cleans up. + * + * Related Files: + * - utils/coana/compress-facts.mts (implementation) + */ + +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import path from 'node:path' +import { brotliDecompressSync } from 'node:zlib' + +import { describe, expect, it } from 'vitest' + +import { compressSocketFactsForUpload } from '../../../../src/utils/coana/compress-facts.mts' + +describe('compress-facts', () => { + describe('compressSocketFactsForUpload', () => { + it('writes brotli .br as a sibling of the source file', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const inputPath = path.join(wrapDir, '.socket.facts.json') + const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 } + writeFileSync(inputPath, JSON.stringify(payload)) + + try { + const result = await compressSocketFactsForUpload([inputPath]) + const swappedPath = result.paths[0]! + + expect(result.paths).toHaveLength(1) + expect(swappedPath).toBe(`${inputPath}.br`) + expect(existsSync(swappedPath)).toBe(true) + // The sibling file is real brotli that round-trips to the original + // JSON. + const roundTripped = brotliDecompressSync( + readFileSync(swappedPath), + ).toString('utf8') + expect(JSON.parse(roundTripped)).toEqual(payload) + + // Cleanup removes the sibling .br file but leaves the source intact. + await result.cleanup() + expect(existsSync(swappedPath)).toBe(false) + expect(existsSync(inputPath)).toBe(true) + } finally { + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves non-facts paths unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const lock = path.join(wrapDir, 'package-lock.json') + const pkg = path.join(wrapDir, 'package.json') + writeFileSync(lock, '{}') + writeFileSync(pkg, '{}') + + const result = await compressSocketFactsForUpload([lock, pkg]) + try { + expect(result.paths).toEqual([lock, pkg]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves a missing .socket.facts.json path unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const missingFacts = path.join(wrapDir, '.socket.facts.json') + // Note: no writeFileSync — file does not exist. + + const result = await compressSocketFactsForUpload([missingFacts]) + try { + expect(result.paths).toEqual([missingFacts]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('mixes facts and non-facts entries correctly', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + const lock = path.join(wrapDir, 'package-lock.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'mix' })) + writeFileSync(lock, '{"name":"x"}') + + const result = await compressSocketFactsForUpload([lock, facts]) + try { + expect(result.paths[0]).toBe(lock) + expect(result.paths[1]).toBe(`${facts}.br`) + const roundTripped = JSON.parse( + brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'), + ) + expect(roundTripped.tier1ReachabilityScanId).toBe('mix') + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('cleanup is idempotent (safe to call twice)', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'idem' })) + + const result = await compressSocketFactsForUpload([facts]) + await result.cleanup() + await expect(result.cleanup()).resolves.not.toThrow() + rmSync(wrapDir, { recursive: true, force: true }) + }) + }) +})