Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 35 additions & 22 deletions packages/cli/src/commands/scan/handle-create-new-scan.mts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import { runSocketBasics } from '../../utils/basics/spawn.mts'
function excludeFactsJson(paths: string[]): string[] {
return paths.filter(p => path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON)
}
import { compressSocketFactsForUpload } from '../../utils/coana/compress-facts.mts'
import { findSocketYmlSync } from '../../utils/config.mts'
import { getPackageFilesForScan } from '../../utils/fs/path-resolve.mts'
import { readOrDefaultSocketJson } from '../../utils/socket/json.mts'
Expand Down Expand Up @@ -290,28 +291,40 @@ export async function handleCreateNewScan({
}
}

const fullScanCResult = await fetchCreateOrgFullScan(
scanPaths,
orgSlug,
{
commitHash,
commitMessage,
committers,
pullRequest,
repoName,
branchName,
scanType: reach.runReachabilityAnalysis
? SCAN_TYPE_SOCKET_TIER1
: SCAN_TYPE_SOCKET,
workspace,
},
{
cwd,
defaultBranch,
pendingHead,
tmp,
},
)
// Brotli-compress any .socket.facts.json paths in scanPaths just before
// upload. depscan's api-v0 multipart boundary streams brotli decode based
// on the .br filename suffix. Coana keeps writing plain .socket.facts.json
// on disk, so the local read path (extractTier1ReachabilityScanId) stays
// correct. The cleanup() in the finally block removes the sibling .br
// files whether the upload succeeded or threw.
const compressed = await compressSocketFactsForUpload(scanPaths)
let fullScanCResult: Awaited<ReturnType<typeof fetchCreateOrgFullScan>>
try {
fullScanCResult = await fetchCreateOrgFullScan(
compressed.paths,
orgSlug,
{
commitHash,
commitMessage,
committers,
pullRequest,
repoName,
branchName,
scanType: reach.runReachabilityAnalysis
? SCAN_TYPE_SOCKET_TIER1
: SCAN_TYPE_SOCKET,
workspace,
},
{
cwd,
defaultBranch,
pendingHead,
tmp,
},
)
} finally {
await compressed.cleanup()
}

const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined

Expand Down
90 changes: 90 additions & 0 deletions packages/cli/src/utils/coana/compress-facts.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/**
* Brotli compression for Coana facts files prior to upload.
*
* Key Functions:
* - compressSocketFactsForUpload: Brotli-compress any .socket.facts.json
* entries in scanPaths just before upload, returning swapped paths plus a
* cleanup callback. Coana keeps writing plain JSON; the on-the-wire form
* to depscan is brotli (api-v0 decodes at the multipart boundary).
*
* Integration:
* - Called from handleCreateNewScan immediately before fetchCreateOrgFullScan.
* - Sibling .br files live next to the source so the multipart entry name
* stays inside cwd (depscan strips .. traversal entries).
*/

import { createReadStream, createWriteStream, existsSync } from 'node:fs'
import path from 'node:path'
import { pipeline } from 'node:stream/promises'
import { createBrotliCompress } from 'node:zlib'

import { safeDelete } from '@socketsecurity/lib/fs'

import { DOT_SOCKET_DOT_FACTS_JSON } from '../../constants.mts'

export type CompressedScanPaths = {
cleanup: () => Promise<void>
paths: string[]
}

/**
* For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a
* sibling `.socket.facts.json.br` next to the original file and swap its
* path in. Other paths pass through unchanged. Missing files also pass
* through unchanged (the upload will fail downstream with the same error
* it would have).
*
* Streaming + worker-thread compression keeps the event loop responsive:
* default brotli quality (11) on a 60+MB facts file takes multiple seconds
* of CPU, which would otherwise freeze the spinner / signal handlers /
* any concurrent work.
*
* The `.br` lives next to the source rather than under the OS temp dir
* because depscan's multipart ingest (`addStreamEntry`) rejects entries
* whose names contain `..` traversal segments. The SDK computes the
* multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir
* temp path turns into `../../../var/folders/...` and gets dropped as
* `unmatchedFiles`. Sibling-write keeps the relative path inside cwd, and
* keeps the directory shape symmetric with the plain `.socket.facts.json`
* upload (depscan strips only the `.br` suffix at ingest, so
* `<dir>/.socket.facts.json.br` and `<dir>/.socket.facts.json` resolve to
* the same storage path).
*
* Concurrent scans against the same source directory are already racy on
* `.socket.facts.json` itself (coana writes to a single path), so the
* sibling `.br` doesn't introduce a new race.
*
* Caller MUST `await cleanup()` (typically in a `finally` block) once the
* upload completes — successful or not — to remove the sibling files.
*/
export async function compressSocketFactsForUpload(
scanPaths: string[],
): Promise<CompressedScanPaths> {
const brPaths: string[] = []
const paths = await Promise.all(
scanPaths.map(async p => {
if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) {
return p
}
if (!existsSync(p)) {
return p
}
const brPath = `${p}.br`
await pipeline(
createReadStream(p),
createBrotliCompress(),
createWriteStream(brPath),
)
brPaths.push(brPath)
return brPath
}),
)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Orphaned .br files on compression failure

Low Severity

If pipeline throws for any .socket.facts.json entry (e.g., disk-full, I/O error), the Promise.all rejects and compressSocketFactsForUpload throws before returning the cleanup callback. Any .br files already created by completed sibling pipelines, or partially written by the failing createWriteStream, are orphaned on disk with no cleanup path available to the caller.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 4aef3ec. Configure here.

const cleanup = async () => {
const targets = brPaths.splice(0)
if (targets.length === 0) {
return
}
await safeDelete(targets, { force: true })
}
return { __proto__: null, cleanup, paths } as CompressedScanPaths
}
120 changes: 120 additions & 0 deletions packages/cli/test/unit/utils/coana/compress-facts.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/**
* Unit tests for Coana facts-file brotli compression.
*
* Test Coverage:
* - compressSocketFactsForUpload: swaps .socket.facts.json paths for
* brotli-compressed .br temps, leaves other paths alone, cleans up.
*
* Related Files:
* - utils/coana/compress-facts.mts (implementation)
*/

import {
existsSync,
mkdtempSync,
readFileSync,
rmSync,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test file uses rmSync instead of safeDelete

Low Severity

The new test file imports and uses rmSync from node:fs five times for directory cleanup. The project convention (explicitly noted in fs.test.mts as "NEVER fs.rm/rmSync") requires using safeDelete from @socketsecurity/lib/fs. Since all test callbacks here are async, safeDelete is the correct choice. Other test files like config.test.mts follow this pattern correctly.

Additional Locations (1)
Fix in Cursor Fix in Web

Triggered by learned rule: No raw fs.rm or rm -rf — use safeDelete from @socketsecurity/lib/fs

Reviewed by Cursor Bugbot for commit 4aef3ec. Configure here.

writeFileSync,
} from 'node:fs'
import { tmpdir } from 'node:os'
import path from 'node:path'
import { brotliDecompressSync } from 'node:zlib'

import { describe, expect, it } from 'vitest'

import { compressSocketFactsForUpload } from '../../../../src/utils/coana/compress-facts.mts'

describe('compress-facts', () => {
describe('compressSocketFactsForUpload', () => {
it('writes brotli .br as a sibling of the source file', async () => {
const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-'))
const inputPath = path.join(wrapDir, '.socket.facts.json')
const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 }
writeFileSync(inputPath, JSON.stringify(payload))

try {
const result = await compressSocketFactsForUpload([inputPath])
const swappedPath = result.paths[0]!

expect(result.paths).toHaveLength(1)
expect(swappedPath).toBe(`${inputPath}.br`)
expect(existsSync(swappedPath)).toBe(true)
// The sibling file is real brotli that round-trips to the original
// JSON.
const roundTripped = brotliDecompressSync(
readFileSync(swappedPath),
).toString('utf8')
expect(JSON.parse(roundTripped)).toEqual(payload)

// Cleanup removes the sibling .br file but leaves the source intact.
await result.cleanup()
expect(existsSync(swappedPath)).toBe(false)
expect(existsSync(inputPath)).toBe(true)
} finally {
rmSync(wrapDir, { recursive: true, force: true })
}
})

it('leaves non-facts paths unchanged', async () => {
const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-'))
const lock = path.join(wrapDir, 'package-lock.json')
const pkg = path.join(wrapDir, 'package.json')
writeFileSync(lock, '{}')
writeFileSync(pkg, '{}')

const result = await compressSocketFactsForUpload([lock, pkg])
try {
expect(result.paths).toEqual([lock, pkg])
} finally {
await result.cleanup()
rmSync(wrapDir, { recursive: true, force: true })
}
})

it('leaves a missing .socket.facts.json path unchanged', async () => {
const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-'))
const missingFacts = path.join(wrapDir, '.socket.facts.json')
// Note: no writeFileSync — file does not exist.

const result = await compressSocketFactsForUpload([missingFacts])
try {
expect(result.paths).toEqual([missingFacts])
} finally {
await result.cleanup()
rmSync(wrapDir, { recursive: true, force: true })
}
})

it('mixes facts and non-facts entries correctly', async () => {
const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-'))
const facts = path.join(wrapDir, '.socket.facts.json')
const lock = path.join(wrapDir, 'package-lock.json')
writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'mix' }))
writeFileSync(lock, '{"name":"x"}')

const result = await compressSocketFactsForUpload([lock, facts])
try {
expect(result.paths[0]).toBe(lock)
expect(result.paths[1]).toBe(`${facts}.br`)
const roundTripped = JSON.parse(
brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'),
)
expect(roundTripped.tier1ReachabilityScanId).toBe('mix')
} finally {
await result.cleanup()
rmSync(wrapDir, { recursive: true, force: true })
}
})

it('cleanup is idempotent (safe to call twice)', async () => {
const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-'))
const facts = path.join(wrapDir, '.socket.facts.json')
writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'idem' }))

const result = await compressSocketFactsForUpload([facts])
await result.cleanup()
await expect(result.cleanup()).resolves.not.toThrow()
rmSync(wrapDir, { recursive: true, force: true })
})
})
})
Loading