From e12cb4df4a0c404ec08cd1847b37e659dfe963ba Mon Sep 17 00:00:00 2001 From: indexzero Date: Sun, 1 Feb 2026 02:18:02 -0500 Subject: [PATCH 1/2] feat(cli) add --format option to view query command Add support for multiple output formats in view query: - ndjson: One JSON object per line (default, streaming) - lines: Plain text values for shell piping - json: Complete JSON array for programmatic use The lines format outputs strings as-is and other types as JSON, with tab-separated values for multi-field records. This enables shell pipeline integration like sort, uniq, wc, and xargs. Co-Authored-By: Claude Opus 4.5 --- cli/cli/src/cmd/view/query.js | 55 +++++++-- cli/cli/src/cmd/view/query.test.js | 186 +++++++++++++++++++++++++++++ cli/cli/src/jack.js | 9 ++ doc/cli-reference.md | 73 ++++++++++- 4 files changed, 313 insertions(+), 10 deletions(-) create mode 100644 cli/cli/src/cmd/view/query.test.js diff --git a/cli/cli/src/cmd/view/query.js b/cli/cli/src/cmd/view/query.js index e5a803f..4363dd6 100644 --- a/cli/cli/src/cmd/view/query.js +++ b/cli/cli/src/cmd/view/query.js @@ -8,16 +8,19 @@ Query a defined view, outputting matching records. Options: --limit Maximum records to return --count Only output the count of matching records - --collect Collect all results into a JSON array --filter Filter expression (e.g., "name=lodash", "versions|length>10") - --json Output as ndjson (default) + --format Output format: ndjson (default), lines, json + - ndjson: One JSON object per line (streaming) + - lines: Plain text, one value per line + - json: Complete JSON array Examples: _all_docs view query npm-packages _all_docs view query npm-versions --limit 100 _all_docs view query npm-packages --count _all_docs view query npm-packages --filter "name=lodash" - _all_docs view query npm-versions --collect > all-versions.json + _all_docs view query npm-packages --format json > packages.json + _all_docs view query npm-packages --select 'name' --format lines | wc -l `; export const command = async (cli) => { @@ -62,15 +65,49 @@ export const command = async (cli) => { return; } - if (cli.values.collect) { - const results = await collectView(view, cache, options); - console.log(JSON.stringify(results, null, 2)); - return; + // Determine format (--collect is alias for --format json for backwards compat) + const format = cli.values.collect ? 'json' : (cli.values.format || 'ndjson'); + + // Validate format + if (!['ndjson', 'lines', 'json'].includes(format)) { + console.error(`Unknown format: ${format}`); + console.error('Valid formats: ndjson, lines, json'); + process.exit(1); } - // Stream ndjson output + // Collect results for json format + const results = []; + for await (const record of queryView(view, cache, options)) { - console.log(JSON.stringify(record)); + switch (format) { + case 'ndjson': + console.log(JSON.stringify(record)); + break; + + case 'lines': { + const values = Object.values(record); + if (values.length === 1) { + // Single field: output as-is (string) or JSON (other types) + const val = values[0]; + console.log(typeof val === 'string' ? val : JSON.stringify(val)); + } else { + // Multiple fields: tab-separated + console.log(values.map(v => + typeof v === 'string' ? v : JSON.stringify(v) + ).join('\t')); + } + break; + } + + case 'json': + results.push(record); + break; + } + } + + // Output collected results for json format + if (format === 'json') { + console.log(JSON.stringify(results, null, 2)); } } catch (err) { console.error(`Error querying view: ${err.message}`); diff --git a/cli/cli/src/cmd/view/query.test.js b/cli/cli/src/cmd/view/query.test.js new file mode 100644 index 0000000..a8f0541 --- /dev/null +++ b/cli/cli/src/cmd/view/query.test.js @@ -0,0 +1,186 @@ +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; + +/** + * Test the format output logic for view query --format option + */ + +/** + * Format a record for 'lines' output mode + * @param {object} record - The record to format + * @returns {string} The formatted line + */ +function formatLinesOutput(record) { + const values = Object.values(record); + if (values.length === 1) { + // Single field: output as-is (string) or JSON (other types) + const val = values[0]; + return typeof val === 'string' ? val : JSON.stringify(val); + } else { + // Multiple fields: tab-separated + return values.map(v => + typeof v === 'string' ? v : JSON.stringify(v) + ).join('\t'); + } +} + +describe('view query --format', () => { + describe('ndjson format', () => { + it('outputs valid JSON per line', () => { + const record = { name: 'lodash', count: 114 }; + const output = JSON.stringify(record); + + assert.equal(output, '{"name":"lodash","count":114}'); + assert.doesNotThrow(() => JSON.parse(output)); + }); + + it('handles nested objects', () => { + const record = { name: 'react', meta: { versions: ['18.0.0', '18.1.0'] } }; + const output = JSON.stringify(record); + + const parsed = JSON.parse(output); + assert.deepEqual(parsed.meta.versions, ['18.0.0', '18.1.0']); + }); + }); + + describe('lines format', () => { + it('outputs plain string for single string field', () => { + const record = { name: 'lodash' }; + const output = formatLinesOutput(record); + + assert.equal(output, 'lodash'); + // Should NOT be quoted + assert.ok(!output.startsWith('"')); + assert.ok(!output.startsWith('{')); + }); + + it('outputs JSON for single non-string field (array)', () => { + const record = { versions: ['1.0.0', '2.0.0'] }; + const output = formatLinesOutput(record); + + assert.equal(output, '["1.0.0","2.0.0"]'); + assert.doesNotThrow(() => JSON.parse(output)); + }); + + it('outputs JSON for single non-string field (number)', () => { + const record = { count: 42 }; + const output = formatLinesOutput(record); + + assert.equal(output, '42'); + }); + + it('outputs tab-separated for multiple fields', () => { + const record = { name: 'lodash', count: 114 }; + const output = formatLinesOutput(record); + + assert.equal(output, 'lodash\t114'); + assert.ok(output.includes('\t')); + }); + + it('handles mixed string and non-string in multi-field', () => { + const record = { name: 'react', versions: ['18.0.0', '18.1.0'], count: 2 }; + const output = formatLinesOutput(record); + + const parts = output.split('\t'); + assert.equal(parts.length, 3); + assert.equal(parts[0], 'react'); + assert.equal(parts[1], '["18.0.0","18.1.0"]'); + assert.equal(parts[2], '2'); + }); + + it('handles empty string values', () => { + const record = { name: '' }; + const output = formatLinesOutput(record); + + assert.equal(output, ''); + }); + + it('handles null values in multi-field', () => { + const record = { name: 'test', value: null }; + const output = formatLinesOutput(record); + + assert.equal(output, 'test\tnull'); + }); + }); + + describe('json format', () => { + it('outputs valid JSON array', () => { + const results = [ + { name: 'lodash', count: 114 }, + { name: 'express', count: 50 } + ]; + const output = JSON.stringify(results, null, 2); + + const parsed = JSON.parse(output); + assert.ok(Array.isArray(parsed)); + assert.equal(parsed.length, 2); + }); + + it('handles empty results', () => { + const results = []; + const output = JSON.stringify(results, null, 2); + + const parsed = JSON.parse(output); + assert.deepEqual(parsed, []); + }); + + it('handles single result', () => { + const results = [{ name: 'lodash' }]; + const output = JSON.stringify(results, null, 2); + + const parsed = JSON.parse(output); + assert.equal(parsed.length, 1); + assert.equal(parsed[0].name, 'lodash'); + }); + }); + + describe('format validation', () => { + it('recognizes valid formats', () => { + const validFormats = ['ndjson', 'lines', 'json']; + + for (const format of validFormats) { + assert.ok( + ['ndjson', 'lines', 'json'].includes(format), + `${format} should be valid` + ); + } + }); + + it('rejects invalid formats', () => { + const invalidFormats = ['csv', 'xml', 'yaml', 'tsv', '']; + + for (const format of invalidFormats) { + assert.ok( + !['ndjson', 'lines', 'json'].includes(format), + `${format} should be invalid` + ); + } + }); + }); + + describe('backwards compatibility', () => { + it('--collect maps to json format', () => { + const collect = true; + const explicitFormat = undefined; + + const format = collect ? 'json' : (explicitFormat || 'ndjson'); + assert.equal(format, 'json'); + }); + + it('defaults to ndjson when no flags set', () => { + const collect = false; + const explicitFormat = undefined; + + const format = collect ? 'json' : (explicitFormat || 'ndjson'); + assert.equal(format, 'ndjson'); + }); + + it('explicit --format overrides when --collect not set', () => { + const collect = false; + const explicitFormat = 'lines'; + + const format = collect ? 'json' : (explicitFormat || 'ndjson'); + assert.equal(format, 'lines'); + }); + }); +}); diff --git a/cli/cli/src/jack.js b/cli/cli/src/jack.js index 728eae4..ba27027 100644 --- a/cli/cli/src/jack.js +++ b/cli/cli/src/jack.js @@ -270,6 +270,15 @@ const cli = ack limit: { hint: 'n', description: `Maximum records to return` + }, + format: { + hint: 'fmt', + description: `Output format: ndjson (default), lines, json + + - ndjson: One JSON object per line (streaming) + - lines: Plain text values (for shell piping) + - json: Complete JSON array + ` } }) diff --git a/doc/cli-reference.md b/doc/cli-reference.md index 89ef742..bbeee0b 100644 --- a/doc/cli-reference.md +++ b/doc/cli-reference.md @@ -23,6 +23,7 @@ Available commands: - `partition` - Work with registry partitions - `packument` - Fetch and manage package documents - `cache` - Manage local cache +- `view` - Define and query views over cached data --- @@ -671,8 +672,42 @@ npx _all_docs view query [options] **Options:** - `--limit ` - Maximum records to return -- `--filter ` - Filter expression +- `--filter ` - Filter expression (e.g., `name=lodash`, `versions|length>10`) - `--count` - Only output the count of matching records +- `--format ` - Output format: ndjson (default), lines, json + +**Output Formats:** + +| Format | Description | Use Case | +|--------|-------------|----------| +| `ndjson` | Newline-delimited JSON (default) | Streaming processing with jq | +| `lines` | Plain text values | Shell piping, xargs, sort, uniq | +| `json` | JSON array | Small datasets, programmatic use | + +**Examples:** + +```bash +# Query with default ndjson output +npx _all_docs view query npm-pkgs + +# Query with limit +npx _all_docs view query npm-pkgs --limit 100 + +# Get count only +npx _all_docs view query npm-pkgs --count + +# Output as JSON array +npx _all_docs view query npm-pkgs --format json > packages.json + +# Output as plain text for shell piping +npx _all_docs view query npm-pkgs --select 'name' --format lines | wc -l + +# Find packages with many versions +npx _all_docs view query npm-pkgs --filter 'count > 100' --format lines + +# Count scoped packages +npx _all_docs view query npm-pkgs --select 'name' --format lines | grep '^@' | wc -l +``` ### view join @@ -682,27 +717,63 @@ Join two views on a common key. npx _all_docs view join [options] ``` +**Options:** +- `--on ` - Join key field (default: name) +- `--left` - Left join (include all from left view) +- `--inner` - Inner join (only records in both views) +- `--right` - Right join (include all from right view) +- `--full` - Full join (all records from both views) +- `--diff` - Set difference (records in left but not in right) +- `--limit ` - Maximum records to return +- `--select ` - Output field selection + This enables comparing packages across different sources: +**Examples:** + ```bash # Compare npm cache against local snapshot npx _all_docs view define npm --origin npm npx _all_docs view define snapshot --origin ./snapshot/ npx _all_docs view join npm snapshot --diff --select 'name' + +# Find packages in npm but not in CGR +npx _all_docs view join npm-pkgs cgr-pkgs --diff --select 'name' + +# Inner join - packages in both registries +npx _all_docs view join npm-pkgs cgr-pkgs --inner ``` ### view list List all defined views. +```bash +npx _all_docs view list [options] +``` + +**Options:** +- `--json` - Output as JSON + ### view show Show details of a defined view. +```bash +npx _all_docs view show +``` + ### view delete Delete a defined view. +```bash +npx _all_docs view delete [options] +``` + +**Options:** +- `--force`, `-f` - Delete without confirmation + --- ## Troubleshooting From cd1ddddbff2794be6852a1d39dec42d12adfbeb5 Mon Sep 17 00:00:00 2001 From: indexzero Date: Sun, 1 Feb 2026 23:27:36 -0500 Subject: [PATCH 2/2] feat(cli) add jsonl format alias for view query command Add jsonl as an accepted format for --format option, normalized internally to jsonl as the canonical format with ndjson as alias. Both formats produce identical output (one JSON object per line). Co-Authored-By: Claude Opus 4.5 --- cli/cli/src/cmd/view/query.js | 17 ++++++++++------- cli/cli/src/cmd/view/query.test.js | 14 +++++++++++--- cli/cli/src/jack.js | 4 ++-- doc/cli-reference.md | 3 ++- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/cli/cli/src/cmd/view/query.js b/cli/cli/src/cmd/view/query.js index 4363dd6..d9b9ed1 100644 --- a/cli/cli/src/cmd/view/query.js +++ b/cli/cli/src/cmd/view/query.js @@ -9,8 +9,8 @@ Options: --limit Maximum records to return --count Only output the count of matching records --filter Filter expression (e.g., "name=lodash", "versions|length>10") - --format Output format: ndjson (default), lines, json - - ndjson: One JSON object per line (streaming) + --format Output format: ndjson (default), jsonl, lines, json + - ndjson/jsonl: One JSON object per line (streaming) - lines: Plain text, one value per line - json: Complete JSON array @@ -68,10 +68,13 @@ export const command = async (cli) => { // Determine format (--collect is alias for --format json for backwards compat) const format = cli.values.collect ? 'json' : (cli.values.format || 'ndjson'); + // Normalize format (ndjson is alias for jsonl) + const normalizedFormat = format === 'ndjson' ? 'jsonl' : format; + // Validate format - if (!['ndjson', 'lines', 'json'].includes(format)) { + if (!['jsonl', 'lines', 'json'].includes(normalizedFormat)) { console.error(`Unknown format: ${format}`); - console.error('Valid formats: ndjson, lines, json'); + console.error('Valid formats: ndjson, jsonl, lines, json'); process.exit(1); } @@ -79,8 +82,8 @@ export const command = async (cli) => { const results = []; for await (const record of queryView(view, cache, options)) { - switch (format) { - case 'ndjson': + switch (normalizedFormat) { + case 'jsonl': console.log(JSON.stringify(record)); break; @@ -106,7 +109,7 @@ export const command = async (cli) => { } // Output collected results for json format - if (format === 'json') { + if (normalizedFormat === 'json') { console.log(JSON.stringify(results, null, 2)); } } catch (err) { diff --git a/cli/cli/src/cmd/view/query.test.js b/cli/cli/src/cmd/view/query.test.js index a8f0541..327d402 100644 --- a/cli/cli/src/cmd/view/query.test.js +++ b/cli/cli/src/cmd/view/query.test.js @@ -136,22 +136,30 @@ describe('view query --format', () => { describe('format validation', () => { it('recognizes valid formats', () => { - const validFormats = ['ndjson', 'lines', 'json']; + const validFormats = ['ndjson', 'jsonl', 'lines', 'json']; for (const format of validFormats) { + const normalized = format === 'ndjson' ? 'jsonl' : format; assert.ok( - ['ndjson', 'lines', 'json'].includes(format), + ['jsonl', 'lines', 'json'].includes(normalized), `${format} should be valid` ); } }); + it('normalizes ndjson to jsonl', () => { + const format = 'ndjson'; + const normalized = format === 'ndjson' ? 'jsonl' : format; + assert.equal(normalized, 'jsonl'); + }); + it('rejects invalid formats', () => { const invalidFormats = ['csv', 'xml', 'yaml', 'tsv', '']; for (const format of invalidFormats) { + const normalized = format === 'ndjson' ? 'jsonl' : format; assert.ok( - !['ndjson', 'lines', 'json'].includes(format), + !['jsonl', 'lines', 'json'].includes(normalized), `${format} should be invalid` ); } diff --git a/cli/cli/src/jack.js b/cli/cli/src/jack.js index ba27027..a44294e 100644 --- a/cli/cli/src/jack.js +++ b/cli/cli/src/jack.js @@ -273,9 +273,9 @@ const cli = ack }, format: { hint: 'fmt', - description: `Output format: ndjson (default), lines, json + description: `Output format: ndjson (default), jsonl, lines, json - - ndjson: One JSON object per line (streaming) + - ndjson/jsonl: One JSON object per line (streaming) - lines: Plain text values (for shell piping) - json: Complete JSON array ` diff --git a/doc/cli-reference.md b/doc/cli-reference.md index bbeee0b..68b397b 100644 --- a/doc/cli-reference.md +++ b/doc/cli-reference.md @@ -674,13 +674,14 @@ npx _all_docs view query [options] - `--limit ` - Maximum records to return - `--filter ` - Filter expression (e.g., `name=lodash`, `versions|length>10`) - `--count` - Only output the count of matching records -- `--format ` - Output format: ndjson (default), lines, json +- `--format ` - Output format: ndjson (default), jsonl, lines, json **Output Formats:** | Format | Description | Use Case | |--------|-------------|----------| | `ndjson` | Newline-delimited JSON (default) | Streaming processing with jq | +| `jsonl` | JSON Lines (alias for ndjson) | ML/data science tooling | | `lines` | Plain text values | Shell piping, xargs, sort, uniq | | `json` | JSON array | Small datasets, programmatic use |