diff --git a/README.md b/README.md index b20c411..1c00f30 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ A tool for visualizing ClickHouse RowBinary and Native format data. Features an ## Features - **Format support**: RowBinary and Native, modular system allows adding more +- **Native protocol version**: Select the Native `client_protocol_version` to inspect revision-specific wire layouts - **Hex Viewer**: Virtual-scrolling hex display with ASCII column - **AST Tree**: Collapsible tree view showing decoded structure - **Interactive Highlighting**: Selecting a node in the tree highlights corresponding bytes in the hex view (and vice versa) @@ -73,6 +74,7 @@ Open http://localhost:5173 - Click nodes in the AST tree to highlight bytes - Click bytes in the hex viewer to select the corresponding node - Use "Expand All" / "Collapse All" to navigate complex structures +4. When using `Native`, choose a protocol preset to compare legacy HTTP output against newer revisions such as custom serialization, Dynamic/JSON v2, replicated, and nullable sparse encodings ## Example Queries @@ -91,6 +93,18 @@ SELECT 42::Dynamic SELECT '{"user": {"id": 123}}'::JSON(`user.id` UInt32) ``` +## Native Protocol Versions + +The `Native` format toolbar exposes upstream protocol milestones from `0` through `54483`. This controls the `client_protocol_version` request parameter and the local decoder behavior, so the explorer can parse: + +- legacy HTTP Native blocks without `BlockInfo` (`0`) +- per-column serialization metadata (`54454+`) +- sparse and replicated serialization kinds (`54465+`, `54482+`) +- Dynamic/JSON v2 Native layouts (`54473+`) +- nullable sparse serialization (`54483`) + +See [docs/native-protocol-versions.md](docs/native-protocol-versions.md) for the revision-by-revision reference, and [docs/nativespec.md](docs/nativespec.md) for the Native layout details. + ## Tech Stack - React + TypeScript + Vite @@ -98,4 +112,5 @@ SELECT '{"user": {"id": 123}}'::JSON(`user.id` UInt32) - react-window (virtualized hex viewer) - react-resizable-panels (split pane layout) - Electron (desktop app, optional) +- Vitest + testcontainers (integration testing) - Playwright (e2e testing) diff --git a/src/components/AstTree/AstTree.tsx b/src/components/AstTree/AstTree.tsx index 7b88868..98ecf81 100644 --- a/src/components/AstTree/AstTree.tsx +++ b/src/components/AstTree/AstTree.tsx @@ -384,15 +384,9 @@ export function AstTree() { {/* Block-based display (Native) */} {parsedData.blocks?.map((block, blockIndex) => { const blockId = `block-${blockIndex}`; - const blockHeaderId = `block-${blockIndex}-header`; const isBlockExpanded = expandedNodes.has(blockId); - const isHeaderExpanded = expandedNodes.has(blockHeaderId); const byteCount = block.byteRange.end - block.byteRange.start; - // IDs for header metadata items (for hover highlighting) - const numColsId = `block-${blockIndex}-numcols`; - const numRowsId = `block-${blockIndex}-numrows`; - return (
{isBlockExpanded && (
- {/* Block Header Metadata */} -
-
{ - setActiveNode(blockHeaderId, 'Block metadata (Header)'); - toggleExpanded(blockHeaderId); - }} - onDoubleClick={() => scrollToHex(block.header.byteRange.start)} - onMouseEnter={() => setHoveredNode(blockHeaderId)} - onMouseLeave={() => setHoveredNode(null)} - style={{ '--depth': 1 } as React.CSSProperties} - > - {isHeaderExpanded ? '▼' : '▶'} - Header - Block metadata - - [{block.header.byteRange.start}:{block.header.byteRange.end}] ( - {block.header.byteRange.end - block.header.byteRange.start}B) - -
- {isHeaderExpanded && ( -
- {block.header.blockInfo && ( -
-
setActiveNode(`block-${blockIndex}-blockinfo`, 'BlockInfo')} - onDoubleClick={() => scrollToHex(block.header.blockInfo!.byteRange.start)} - onMouseEnter={() => setHoveredNode(`block-${blockIndex}-blockinfo`)} - onMouseLeave={() => setHoveredNode(null)} - style={{ '--depth': 2 } as React.CSSProperties} - > - BlockInfo - Field-based metadata - - [{block.header.blockInfo.byteRange.start}:{block.header.blockInfo.byteRange.end}] ( - {block.header.blockInfo.byteRange.end - block.header.blockInfo.byteRange.start}B) - -
- {block.header.blockInfo.fields.map((field) => { - const fieldId = `block-${blockIndex}-blockinfo-field-${field.fieldNumber}`; - return ( -
setActiveNode(fieldId, `${field.fieldName}: ${field.displayValue}`)} - onDoubleClick={() => scrollToHex(field.byteRange.start)} - onMouseEnter={() => setHoveredNode(fieldId)} - onMouseLeave={() => setHoveredNode(null)} - > - Field {field.fieldNumber} - {field.fieldName}: - {field.displayValue} - - [{field.byteRange.start}:{field.byteRange.end}] ( - {field.byteRange.end - field.byteRange.start}B) - -
- ); - })} -
- )} -
setActiveNode(numColsId, `numColumns: ${block.header.numColumns} (LEB128)`)} - onDoubleClick={() => scrollToHex(block.header.numColumnsRange.start)} - onMouseEnter={() => setHoveredNode(numColsId)} - onMouseLeave={() => setHoveredNode(null)} - > - LEB128 - numColumns: - {block.header.numColumns} - - [{block.header.numColumnsRange.start}:{block.header.numColumnsRange.end}] ( - {block.header.numColumnsRange.end - block.header.numColumnsRange.start}B) - -
-
setActiveNode(numRowsId, `numRows: ${block.header.numRows} (LEB128)`)} - onDoubleClick={() => scrollToHex(block.header.numRowsRange.start)} - onMouseEnter={() => setHoveredNode(numRowsId)} - onMouseLeave={() => setHoveredNode(null)} - > - LEB128 - numRows: - {block.header.numRows} - - [{block.header.numRowsRange.start}:{block.header.numRowsRange.end}] ( - {block.header.numRowsRange.end - block.header.numRowsRange.start}B) - -
-
- )} -
+ {/* Columns */} {block.columns.map((col) => { @@ -518,12 +414,6 @@ export function AstTree() { const isColHovered = col.id === hoveredNodeId; const colByteCount = col.dataByteRange.end - col.dataByteRange.start; - // IDs for column metadata (name and type) - const colNameId = `${col.id}-name`; - const colTypeId = `${col.id}-type`; - const colMetaId = `${col.id}-meta`; - const isColMetaExpanded = expandedNodes.has(colMetaId); - return (
{isColExpanded && (
- {/* Column Metadata (name + type definition) */} -
-
{ - setActiveNode(colMetaId, 'Column definition (Meta)'); - toggleExpanded(colMetaId); - }} - onDoubleClick={() => scrollToHex(col.nameByteRange.start)} - onMouseEnter={() => setHoveredNode(colMetaId)} - onMouseLeave={() => setHoveredNode(null)} - style={{ '--depth': 2 } as React.CSSProperties} - > - {isColMetaExpanded ? '▼' : '▶'} - Meta - Column definition - - [{col.metadataByteRange.start}:{col.metadataByteRange.end}] ( - {col.metadataByteRange.end - col.metadataByteRange.start}B) - -
- {isColMetaExpanded && ( -
-
setActiveNode(colNameId, `name: "${col.name}" (String)`)} - onDoubleClick={() => scrollToHex(col.nameByteRange.start)} - onMouseEnter={() => setHoveredNode(colNameId)} - onMouseLeave={() => setHoveredNode(null)} - > - String - name: - "{col.name}" - - [{col.nameByteRange.start}:{col.nameByteRange.end}] ( - {col.nameByteRange.end - col.nameByteRange.start}B) - -
-
setActiveNode(colTypeId, `type: "${col.typeString}" (String)`)} - onDoubleClick={() => scrollToHex(col.typeByteRange.start)} - onMouseEnter={() => setHoveredNode(colTypeId)} - onMouseLeave={() => setHoveredNode(null)} - > - String - type: - "{col.typeString}" - - [{col.typeByteRange.start}:{col.typeByteRange.end}] ( - {col.typeByteRange.end - col.typeByteRange.start}B) - -
- {col.serializationInfo && ( - <> -
setActiveNode(`${col.id}-serialization`, 'Serialization info')} - onDoubleClick={() => scrollToHex(col.serializationInfo!.byteRange.start)} - onMouseEnter={() => setHoveredNode(`${col.id}-serialization`)} - onMouseLeave={() => setHoveredNode(null)} - > - Meta - serialization: - - {col.serializationInfo.hasCustomSerialization ? 'custom' : 'default'} - - - [{col.serializationInfo.byteRange.start}:{col.serializationInfo.byteRange.end}] ( - {col.serializationInfo.byteRange.end - col.serializationInfo.byteRange.start}B) - -
-
setActiveNode(`${col.id}-serialization-has-custom`, `has_custom: ${col.serializationInfo!.hasCustomSerialization}`)} - onDoubleClick={() => scrollToHex(col.serializationInfo!.hasCustomRange.start)} - onMouseEnter={() => setHoveredNode(`${col.id}-serialization-has-custom`)} - onMouseLeave={() => setHoveredNode(null)} - > - UInt8 - has_custom: - - {col.serializationInfo.hasCustomSerialization ? '1' : '0'} - - - [{col.serializationInfo.hasCustomRange.start}:{col.serializationInfo.hasCustomRange.end}] ( - {col.serializationInfo.hasCustomRange.end - col.serializationInfo.hasCustomRange.start}B) - -
- {col.serializationInfo.kindStackRange && ( -
setActiveNode(`${col.id}-serialization-kinds`, `kind stack: ${col.serializationInfo!.kindStack.join(' -> ')}`)} - onDoubleClick={() => scrollToHex(col.serializationInfo!.kindStackRange!.start)} - onMouseEnter={() => setHoveredNode(`${col.id}-serialization-kinds`)} - onMouseLeave={() => setHoveredNode(null)} - > - Kinds - kindStack: - - {col.serializationInfo.kindStack.join(' -> ')} - - - [{col.serializationInfo.kindStackRange.start}:{col.serializationInfo.kindStackRange.end}] ( - {col.serializationInfo.kindStackRange.end - col.serializationInfo.kindStackRange.start}B) - -
- )} - - )} -
- )} -
+ + {col.dataPrefixNodes.map((node) => ( + + ))} {/* Column Values */} {col.values.map((node, valueIndex) => ( @@ -687,6 +463,10 @@ export function AstTree() {
); })} + + {parsedData.trailingNodes?.map((node) => ( + + ))}
); } diff --git a/src/components/HexViewer/HexViewer.tsx b/src/components/HexViewer/HexViewer.tsx index 66b1907..91fdc71 100644 --- a/src/components/HexViewer/HexViewer.tsx +++ b/src/components/HexViewer/HexViewer.tsx @@ -118,69 +118,8 @@ function buildHighlightMap( } // Handle blocks for Native format - parsedData.blocks?.forEach((block, blockIndex) => { - const metadataColor = '#ce93d8'; // Purple for metadata - - // Check for block header metadata section (the parent "Header" item) - const blockHeaderId = `block-${blockIndex}-header`; - if (activeNodeId === blockHeaderId || hoveredNodeId === blockHeaderId) { - const isActive = activeNodeId === blockHeaderId; - for (let i = block.header.byteRange.start; i < block.header.byteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - // Check for individual block header items (numColumns, numRows) - const numColsId = `block-${blockIndex}-numcols`; - const numRowsId = `block-${blockIndex}-numrows`; - - if (activeNodeId === numColsId || hoveredNodeId === numColsId) { - const isActive = activeNodeId === numColsId; - for (let i = block.header.numColumnsRange.start; i < block.header.numColumnsRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - if (activeNodeId === numRowsId || hoveredNodeId === numRowsId) { - const isActive = activeNodeId === numRowsId; - for (let i = block.header.numRowsRange.start; i < block.header.numRowsRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - const blockInfoId = `block-${blockIndex}-blockinfo`; - if (block.header.blockInfo && (activeNodeId === blockInfoId || hoveredNodeId === blockInfoId)) { - const isActive = activeNodeId === blockInfoId; - for (let i = block.header.blockInfo.byteRange.start; i < block.header.blockInfo.byteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - block.header.blockInfo?.fields.forEach((field) => { - const fieldId = `block-${blockIndex}-blockinfo-field-${field.fieldNumber}`; - if (activeNodeId === fieldId || hoveredNodeId === fieldId) { - const isActive = activeNodeId === fieldId; - for (let i = field.byteRange.start; i < field.byteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - }); - + parsedData.blocks?.forEach((block) => { + visitNode(block.header.astNode, 0); block.columns.forEach((col) => { // Check if the column itself is active/hovered const isColActive = col.id === activeNodeId; @@ -196,82 +135,15 @@ function buildHighlightMap( } } } - - // Check for column metadata section (name + type together) - const colMetaId = `${col.id}-meta`; - const colNameId = `${col.id}-name`; - const colTypeId = `${col.id}-type`; - - if (activeNodeId === colMetaId || hoveredNodeId === colMetaId) { - const isActive = activeNodeId === colMetaId; - for (let i = col.metadataByteRange.start; i < col.metadataByteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - if (activeNodeId === colNameId || hoveredNodeId === colNameId) { - const isActive = activeNodeId === colNameId; - for (let i = col.nameByteRange.start; i < col.nameByteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - if (activeNodeId === colTypeId || hoveredNodeId === colTypeId) { - const isActive = activeNodeId === colTypeId; - for (let i = col.typeByteRange.start; i < col.typeByteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - if (col.serializationInfo) { - const serializationId = `${col.id}-serialization`; - if (activeNodeId === serializationId || hoveredNodeId === serializationId) { - const isActive = activeNodeId === serializationId; - for (let i = col.serializationInfo.byteRange.start; i < col.serializationInfo.byteRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - const hasCustomId = `${col.id}-serialization-has-custom`; - if (activeNodeId === hasCustomId || hoveredNodeId === hasCustomId) { - const isActive = activeNodeId === hasCustomId; - for (let i = col.serializationInfo.hasCustomRange.start; i < col.serializationInfo.hasCustomRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - - const kindsId = `${col.id}-serialization-kinds`; - if (col.serializationInfo.kindStackRange && (activeNodeId === kindsId || hoveredNodeId === kindsId)) { - const isActive = activeNodeId === kindsId; - for (let i = col.serializationInfo.kindStackRange.start; i < col.serializationInfo.kindStackRange.end; i++) { - const existing = map.get(i); - if (!existing || isActive || !existing.isActive) { - map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); - } - } - } - } - + visitNode(col.metadataNode, 0); + col.dataPrefixNodes.forEach((node) => visitNode(node, 0)); // Also visit individual values col.values.forEach((node) => visitNode(node, 0)); }); }); + parsedData.trailingNodes?.forEach((node) => visitNode(node, 0)); + return map; } @@ -430,12 +302,15 @@ export function HexViewer() { parsedData.rows?.forEach((row) => { row.values.forEach((node) => visitNode(node, 0)); }); - // TODO: Handle blocks for Native format parsedData.blocks?.forEach((block) => { + visitNode(block.header.astNode, 0); block.columns.forEach((col) => { + visitNode(col.metadataNode, 0); + col.dataPrefixNodes.forEach((node) => visitNode(node, 0)); col.values.forEach((node) => visitNode(node, 0)); }); }); + parsedData.trailingNodes?.forEach((node) => visitNode(node, 0)); if (deepestNode) { setActiveNode((deepestNode as AstNode).id); diff --git a/src/core/decoder/coverage.integration.test.ts b/src/core/decoder/coverage.integration.test.ts index 3eac391..7f14546 100644 --- a/src/core/decoder/coverage.integration.test.ts +++ b/src/core/decoder/coverage.integration.test.ts @@ -7,6 +7,63 @@ import { formatUncoveredRanges, } from './test-helpers'; import { SMOKE_TEST_CASES } from './smoke-cases'; +import { NATIVE_PROTOCOL_PRESETS } from '../types/native-protocol'; + +interface NativeCoverageMatrixCase { + name: string; + query: string; + settings?: Record; +} + +const NATIVE_COVERAGE_MATRIX_CASES: NativeCoverageMatrixCase[] = [ + { + name: 'simple UInt8 column', + query: 'SELECT number::UInt8 AS val FROM numbers(3)', + }, + { + name: 'multiple columns baseline', + query: "SELECT 42::UInt32 as int_col, 'hello'::String as str_col, true::Bool as bool_col, 3.14::Float64 as float_col", + }, + { + name: 'Array integers', + query: 'SELECT [1, 2, 3]::Array(UInt32) as val', + }, + { + name: 'Tuple simple', + query: "SELECT (42, 'hello')::Tuple(UInt32, String) as val", + }, + { + name: 'Map with entries', + query: "SELECT map('a', 1, 'b', 2)::Map(String, UInt32) as val", + }, + { + name: 'LowCardinality compatibility', + query: 'SELECT toLowCardinality(toString(number % 2)) AS val FROM numbers(4)', + settings: { allow_suspicious_low_cardinality_types: 1 }, + }, + { + name: 'AggregateFunction compatibility', + query: 'SELECT avgState(number) AS val FROM numbers(10)', + }, + { + name: 'serialization metadata gate', + query: 'SELECT if(number = 5, 1, 0)::UInt8 AS sparse_val FROM numbers(10)', + }, + { + name: 'Nullable serialization metadata gate', + query: 'SELECT if(number = 5, 42, NULL)::Nullable(UInt8) AS sparse_nullable FROM numbers(10)', + }, + { + name: 'Dynamic serialization version gate', + query: 'SELECT 42::Dynamic AS val', + settings: { allow_experimental_dynamic_type: 1 }, + }, + { + name: 'JSON dynamic-path serialization version gate', + query: `SELECT '{"ip":"127.0.0.1","name":"test"}'::JSON(ip IPv4) AS val`, + settings: { allow_experimental_json_type: 1 }, + }, +]; /** * Byte coverage tests - verify that the AST leaf nodes cover all bytes in the data @@ -25,19 +82,6 @@ describe('Byte Coverage Tests', () => { await ctx.stop(); }); - // Test a representative subset of cases for coverage (used by Native format) - const coverageTestCases = SMOKE_TEST_CASES.filter(c => - // Focus on diverse type categories - c.name.includes('UInt8') || - c.name.includes('String basic') || - c.name.includes('Array integers') || - c.name.includes('Tuple simple') || - c.name.includes('Map with entries') || - c.name.includes('Nullable non-null') || - c.name.includes('Multiple columns') || - c.name.includes('IntervalSecond') - ); - describe('RowBinary Format', () => { it.each(SMOKE_TEST_CASES)( '$name - byte coverage', @@ -59,53 +103,27 @@ describe('Byte Coverage Tests', () => { }); describe('Native Format', () => { - it.each(coverageTestCases)( - '$name - byte coverage', - async ({ query, settings, skipNative }) => { - if (skipNative) return; - - const data = await ctx.queryNative(query, settings); - const parsed = decodeNative(data); - const coverage = analyzeByteRange(parsed, data.length); - - if (!coverage.isComplete) { - const details = formatUncoveredRanges(coverage, data); - console.log(`[Native] ${query}\n${details}`); - } - - // Native format has block headers that may not be fully covered - expect(coverage.coveragePercent).toBeGreaterThan(70); - }, - ); - }); - - describe('Full Coverage Sanity Checks', () => { - it('simple UInt8 value has reasonable coverage (RowBinary)', async () => { - const data = await ctx.queryRowBinary('SELECT 42::UInt8 as val'); - const parsed = decodeRowBinary(data); - const coverage = analyzeByteRange(parsed, data.length); - - // Should cover most of the data - expect(coverage.coveragePercent).toBeGreaterThan(50); - - // Log uncovered if any - if (!coverage.isComplete) { - console.log('Uncovered ranges:', coverage.uncoveredRanges); - } - }); - - it('simple UInt8 value has reasonable coverage (Native)', async () => { - const data = await ctx.queryNative('SELECT 42::UInt8 as val'); - const parsed = decodeNative(data); - const coverage = analyzeByteRange(parsed, data.length); - - // Should cover most of the data - expect(coverage.coveragePercent).toBeGreaterThan(50); - - // Log uncovered if any - if (!coverage.isComplete) { - console.log('Uncovered ranges:', coverage.uncoveredRanges); - } - }); + for (const testCase of NATIVE_COVERAGE_MATRIX_CASES) { + describe(testCase.name, () => { + it.each(NATIVE_PROTOCOL_PRESETS.map((preset) => preset.value))( + 'revision %s - byte coverage', + async (revision) => { + const data = await ctx.queryNative(testCase.query, { + ...(testCase.settings ?? {}), + client_protocol_version: revision, + }); + const parsed = decodeNative(data, revision); + const coverage = analyzeByteRange(parsed, data.length); + + if (!coverage.isComplete) { + const details = formatUncoveredRanges(coverage, data); + console.log(`[Native r${revision}] ${testCase.query}\n${details}`); + } + + expect(coverage.isComplete).toBe(true); + }, + ); + }); + } }); }, 300000); diff --git a/src/core/decoder/native-decoder.ts b/src/core/decoder/native-decoder.ts index 3ce137d..01a3dd0 100644 --- a/src/core/decoder/native-decoder.ts +++ b/src/core/decoder/native-decoder.ts @@ -20,6 +20,11 @@ import { ClickHouseFormat } from '../types/formats'; const SPARSE_END_OF_GRANULE_FLAG = 1n << 62n; const TEXT_DECODER = new TextDecoder(); +interface DecodedColumnData { + values: AstNode[]; + prefixNodes: AstNode[]; +} + /** * Native format decoder (column-oriented with blocks) * @@ -42,50 +47,84 @@ export class NativeDecoder extends FormatDecoder { this.protocolVersion = protocolVersion; } - decode(): ParsedData { - const blocks = this.decodeBlocks(); - const header = this.buildHeaderFromBlocks(blocks); - - return { - format: this.format, - header, - blocks, - totalBytes: this.reader.length, - }; - } - - private decodeBlocks(): BlockNode[] { - const blocks: BlockNode[] = []; - let blockIndex = 0; - - while (this.reader.remaining > 0) { - const block = this.decodeBlock(blockIndex); - - // Empty block (0 columns or 0 rows) signals end - if (block.columns.length === 0 || block.rowCount === 0) { - break; - } - - blocks.push(block); - blockIndex++; - } - - return blocks; - } + decode(): ParsedData { + const { blocks, trailingNodes } = this.decodeBlocks(); + const header = this.buildHeaderFromBlocks(blocks); + + return { + format: this.format, + header, + blocks, + trailingNodes, + totalBytes: this.reader.length, + }; + } + + private decodeBlocks(): { blocks: BlockNode[]; trailingNodes: AstNode[] } { + const blocks: BlockNode[] = []; + const trailingNodes: AstNode[] = []; + let blockIndex = 0; + + while (this.reader.remaining > 0) { + const block = this.decodeBlock(blockIndex); + + // Empty block (0 columns or 0 rows) signals end + if (block.columns.length === 0 || block.rowCount === 0) { + trailingNodes.push({ + id: `native-terminal-block-${blockIndex}`, + type: 'Native.EndBlock', + byteRange: block.byteRange, + value: { numColumns: block.header.numColumns, numRows: block.header.numRows }, + displayValue: `terminal block (${block.header.numColumns} columns, ${block.header.numRows} rows)`, + label: 'terminal_block', + children: [block.header.astNode], + }); + break; + } + + blocks.push(block); + blockIndex++; + } + + return { blocks, trailingNodes }; + } private decodeBlock(index: number): BlockNode { const startOffset = this.reader.offset; - const blockInfo = this.protocolVersion > 0 ? this.decodeBlockInfo() : undefined; + const blockInfoResult = this.protocolVersion > 0 ? this.decodeBlockInfo(index) : undefined; + const blockInfo = blockInfoResult?.blockInfo; // Read numColumns with byte range tracking const numColumnsStart = this.reader.offset; - const { value: numColumns } = decodeLEB128(this.reader); - const numColumnsRange: ByteRange = { start: numColumnsStart, end: this.reader.offset }; - - // Read numRows with byte range tracking - const numRowsStart = this.reader.offset; - const { value: numRows } = decodeLEB128(this.reader); - const numRowsRange: ByteRange = { start: numRowsStart, end: this.reader.offset }; + const { value: numColumns } = decodeLEB128(this.reader); + const numColumnsRange: ByteRange = { start: numColumnsStart, end: this.reader.offset }; + const numColumnsNode: AstNode = { + id: `block-${index}-numcols`, + type: 'VarUInt', + byteRange: numColumnsRange, + value: numColumns, + displayValue: String(numColumns), + label: 'numColumns', + }; + + // Read numRows with byte range tracking + const numRowsStart = this.reader.offset; + const { value: numRows } = decodeLEB128(this.reader); + const numRowsRange: ByteRange = { start: numRowsStart, end: this.reader.offset }; + const numRowsNode: AstNode = { + id: `block-${index}-numrows`, + type: 'VarUInt', + byteRange: numRowsRange, + value: numRows, + displayValue: String(numRows), + label: 'numRows', + }; + + const headerChildren: AstNode[] = []; + if (blockInfoResult?.astNode) { + headerChildren.push(blockInfoResult.astNode); + } + headerChildren.push(numColumnsNode, numRowsNode); const header: BlockHeaderNode = { byteRange: { @@ -94,9 +133,24 @@ export class NativeDecoder extends FormatDecoder { }, numColumns, numColumnsRange, + numColumnsNode, numRows, numRowsRange, + numRowsNode, blockInfo, + blockInfoNode: blockInfoResult?.astNode, + astNode: { + id: `block-${index}-header`, + type: 'Native.BlockHeader', + byteRange: { + start: blockInfo?.byteRange.start ?? numColumnsRange.start, + end: numRowsRange.end, + }, + value: { numColumns, numRows }, + displayValue: `${numColumns} columns, ${numRows} rows`, + label: 'header', + children: headerChildren, + }, }; // Empty block check @@ -127,42 +181,78 @@ export class NativeDecoder extends FormatDecoder { } private decodeBlockColumn(blockIndex: number, columnIndex: number, rowCount: number): BlockColumnNode { - // Read column name + const columnId = `block-${blockIndex}-col-${columnIndex}`; + // Read column name const nameStart = this.reader.offset; const { value: nameLen } = decodeLEB128(this.reader); const { value: nameBytes } = this.reader.readBytes(nameLen); const name = TEXT_DECODER.decode(nameBytes); - const nameByteRange: ByteRange = { start: nameStart, end: this.reader.offset }; - - // Read column type + const nameByteRange: ByteRange = { start: nameStart, end: this.reader.offset }; + const nameNode: AstNode = { + id: `${columnId}-name`, + type: 'String', + byteRange: nameByteRange, + value: name, + displayValue: `"${name}"`, + label: 'name', + }; + + // Read column type const typeStart = this.reader.offset; const { value: typeLen } = decodeLEB128(this.reader); const { value: typeBytes } = this.reader.readBytes(typeLen); const typeString = TEXT_DECODER.decode(typeBytes); const type = parseType(typeString); const typeByteRange: ByteRange = { start: typeStart, end: this.reader.offset }; + const typeNode: AstNode = { + id: `${columnId}-type`, + type: 'String', + byteRange: typeByteRange, + value: typeString, + displayValue: `"${typeString}"`, + label: 'type', + }; - const serializationInfo = this.decodeSerializationInfo(type); + const serializationResult = this.decodeSerializationInfo(type, columnId); + const serializationInfo = serializationResult?.info; const metadataByteRange: ByteRange = { start: nameByteRange.start, end: serializationInfo?.byteRange.end ?? typeByteRange.end, }; + const metadataChildren = [nameNode, typeNode]; + if (serializationResult?.astNode) { + metadataChildren.push(serializationResult.astNode); + } + const metadataNode: AstNode = { + id: `${columnId}-meta`, + type: 'Native.ColumnMeta', + byteRange: metadataByteRange, + value: { name, type: typeString }, + displayValue: `${name}: ${typeString}`, + label: 'meta', + children: metadataChildren, + }; // Read column data const dataStart = this.reader.offset; - const values = this.decodeColumnData(type, rowCount, serializationInfo); + const { values, prefixNodes } = this.decodeColumnData(type, rowCount, serializationInfo); const dataByteRange: ByteRange = { start: dataStart, end: this.reader.offset }; return { - id: `block-${blockIndex}-col-${columnIndex}`, - name, - nameByteRange, + id: columnId, + name, + nameByteRange, + nameNode, type, typeString, typeByteRange, + typeNode, metadataByteRange, + metadataNode, dataByteRange, + dataPrefixNodes: prefixNodes, serializationInfo, + serializationNode: serializationResult?.astNode, values, }; } @@ -171,63 +261,63 @@ export class NativeDecoder extends FormatDecoder { type: ClickHouseType, rowCount: number, serializationInfo?: NativeSerializationInfo, - ): AstNode[] { + ): DecodedColumnData { const kindStack = serializationInfo?.kindStack ?? ['DEFAULT']; return this.decodeColumnDataWithKinds(type, rowCount, kindStack.slice(1)); } - private decodeColumnDataDefault(type: ClickHouseType, rowCount: number): AstNode[] { + private decodeColumnDataDefault(type: ClickHouseType, rowCount: number): DecodedColumnData { // Handle complex types that have different columnar encoding switch (type.kind) { case 'Nullable': - return this.decodeNullableColumn(type.inner, rowCount); - case 'Array': - return this.decodeArrayColumn(type.element, rowCount); - case 'Map': - return this.decodeMapColumn(type.key, type.value, rowCount); - case 'LowCardinality': - return this.decodeLowCardinalityColumn(type.inner, rowCount); - case 'Variant': - return this.decodeVariantColumn(type.variants, rowCount); - case 'Dynamic': - return this.decodeDynamicColumn(rowCount); - case 'JSON': - return this.decodeJSONColumn(type, rowCount); - case 'Tuple': - return this.decodeTupleColumn(type.elements, type.names, rowCount); - case 'Nested': - throw new Error(`Native format: ${typeToString(type)} not yet implemented`); - // Geometry - Variant of geo types - case 'Geometry': - return this.decodeGeometryColumn(rowCount); - // Geo types - Array-based - case 'Ring': - return this.decodeRingColumn(rowCount); - case 'Polygon': - return this.decodePolygonColumn(rowCount); - case 'MultiPolygon': - return this.decodeMultiPolygonColumn(rowCount); - case 'LineString': - return this.decodeLineStringColumn(rowCount); - case 'MultiLineString': - return this.decodeMultiLineStringColumn(rowCount); - case 'QBit': - return this.decodeQBitColumn(type.element, type.dimension, rowCount); - case 'AggregateFunction': - return this.decodeAggregateFunctionColumn(type.functionName, type.argTypes, rowCount); - } + return { values: this.decodeNullableColumn(type.inner, rowCount), prefixNodes: [] }; + case 'Array': + return { values: this.decodeArrayColumn(type.element, rowCount), prefixNodes: [] }; + case 'Map': + return { values: this.decodeMapColumn(type.key, type.value, rowCount), prefixNodes: [] }; + case 'LowCardinality': + return { values: this.decodeLowCardinalityColumn(type.inner, rowCount), prefixNodes: [] }; + case 'Variant': + return { values: this.decodeVariantColumn(type.variants, rowCount), prefixNodes: [] }; + case 'Dynamic': + return { values: this.decodeDynamicColumn(rowCount), prefixNodes: [] }; + case 'JSON': + return { values: this.decodeJSONColumn(type, rowCount), prefixNodes: [] }; + case 'Tuple': + return { values: this.decodeTupleColumn(type.elements, type.names, rowCount), prefixNodes: [] }; + case 'Nested': + throw new Error(`Native format: ${typeToString(type)} not yet implemented`); + // Geometry - Variant of geo types + case 'Geometry': + return { values: this.decodeGeometryColumn(rowCount), prefixNodes: [] }; + // Geo types - Array-based + case 'Ring': + return { values: this.decodeRingColumn(rowCount), prefixNodes: [] }; + case 'Polygon': + return { values: this.decodePolygonColumn(rowCount), prefixNodes: [] }; + case 'MultiPolygon': + return { values: this.decodeMultiPolygonColumn(rowCount), prefixNodes: [] }; + case 'LineString': + return { values: this.decodeLineStringColumn(rowCount), prefixNodes: [] }; + case 'MultiLineString': + return { values: this.decodeMultiLineStringColumn(rowCount), prefixNodes: [] }; + case 'QBit': + return { values: this.decodeQBitColumn(type.element, type.dimension, rowCount), prefixNodes: [] }; + case 'AggregateFunction': + return { values: this.decodeAggregateFunctionColumn(type.functionName, type.argTypes, rowCount), prefixNodes: [] }; + } // Simple types: decode rowCount values sequentially const values: AstNode[] = []; - for (let i = 0; i < rowCount; i++) { - const node = this.decodeValue(type); - node.label = `[${i}]`; - values.push(node); + for (let i = 0; i < rowCount; i++) { + const node = this.decodeValue(type); + node.label = `[${i}]`; + values.push(node); } - return values; + return { values, prefixNodes: [] }; } - private decodeColumnDataWithKinds(type: ClickHouseType, rowCount: number, kinds: string[]): AstNode[] { + private decodeColumnDataWithKinds(type: ClickHouseType, rowCount: number, kinds: string[]): DecodedColumnData { if (kinds.length === 0) { return this.decodeColumnDataDefault(type, rowCount); } @@ -248,9 +338,10 @@ export class NativeDecoder extends FormatDecoder { } } - private decodeBlockInfo(): NativeBlockInfo { + private decodeBlockInfo(index: number): { blockInfo: NativeBlockInfo; astNode: AstNode } { const start = this.reader.offset; const fields: NativeBlockInfoField[] = []; + const children: AstNode[] = []; while (true) { const fieldNumberStart = this.reader.offset; @@ -258,17 +349,40 @@ export class NativeDecoder extends FormatDecoder { const fieldNumberRange: ByteRange = { start: fieldNumberStart, end: this.reader.offset }; if (fieldNumber === 0) { - return { + const blockInfo: NativeBlockInfo = { byteRange: { start, end: this.reader.offset }, terminatorRange: fieldNumberRange, fields, }; + children.push({ + id: `block-${index}-blockinfo-terminator`, + type: 'VarUInt', + byteRange: fieldNumberRange, + value: 0, + displayValue: '0', + label: 'terminator', + }); + return { + blockInfo, + astNode: { + id: `block-${index}-blockinfo`, + type: 'Native.BlockInfo', + byteRange: blockInfo.byteRange, + value: fields.reduce>((acc, field) => { + acc[field.fieldName] = field.value; + return acc; + }, {}), + displayValue: `${fields.length} fields`, + label: 'blockInfo', + children, + }, + }; } switch (fieldNumber) { case 1: { const { value, range } = this.reader.readUInt8(); - fields.push({ + const field = { fieldNumber, fieldName: 'is_overflows', value: value !== 0, @@ -276,12 +390,39 @@ export class NativeDecoder extends FormatDecoder { fieldNumberRange, valueRange: range, byteRange: { start: fieldNumberRange.start, end: range.end }, - }); + }; + fields.push(field); + children.push(this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}`, + `Native.BlockInfo.Field.${field.fieldName}`, + field.byteRange, + field.value, + field.displayValue, + field.fieldName, + [ + this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}-number`, + 'VarUInt', + fieldNumberRange, + fieldNumber, + String(fieldNumber), + 'field_number', + ), + this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}-value`, + 'UInt8', + range, + value, + String(value), + 'value', + ), + ], + )); break; } case 2: { const { value, range } = this.reader.readInt32LE(); - fields.push({ + const field = { fieldNumber, fieldName: 'bucket_num', value, @@ -289,7 +430,34 @@ export class NativeDecoder extends FormatDecoder { fieldNumberRange, valueRange: range, byteRange: { start: fieldNumberRange.start, end: range.end }, - }); + }; + fields.push(field); + children.push(this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}`, + `Native.BlockInfo.Field.${field.fieldName}`, + field.byteRange, + field.value, + field.displayValue, + field.fieldName, + [ + this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}-number`, + 'VarUInt', + fieldNumberRange, + fieldNumber, + String(fieldNumber), + 'field_number', + ), + this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}-value`, + 'Int32', + range, + value, + String(value), + 'value', + ), + ], + )); break; } case 3: { @@ -306,7 +474,7 @@ export class NativeDecoder extends FormatDecoder { values.push(value); } const valueRange: ByteRange = { start: valueStart, end: this.reader.offset }; - fields.push({ + const field = { fieldNumber, fieldName: 'out_of_order_buckets', value: values, @@ -314,7 +482,34 @@ export class NativeDecoder extends FormatDecoder { fieldNumberRange, valueRange, byteRange: { start: fieldNumberRange.start, end: valueRange.end }, - }); + }; + fields.push(field); + children.push(this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}`, + `Native.BlockInfo.Field.${field.fieldName}`, + field.byteRange, + values, + field.displayValue, + field.fieldName, + [ + this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}-number`, + 'VarUInt', + fieldNumberRange, + fieldNumber, + String(fieldNumber), + 'field_number', + ), + this.createNativeMetadataNode( + `block-${index}-blockinfo-field-${fieldNumber}-value`, + 'Array(Int32)', + valueRange, + values, + field.displayValue, + 'value', + ), + ], + )); break; } default: @@ -323,7 +518,10 @@ export class NativeDecoder extends FormatDecoder { } } - private decodeSerializationInfo(type: ClickHouseType): NativeSerializationInfo | undefined { + private decodeSerializationInfo( + type: ClickHouseType, + columnId: string, + ): { info: NativeSerializationInfo; astNode: AstNode } | undefined { if (this.protocolVersion < 54454) { return undefined; } @@ -340,13 +538,52 @@ export class NativeDecoder extends FormatDecoder { kindStackRange = { start: kindStackStart, end: this.reader.offset }; } - return { + const info: NativeSerializationInfo = { byteRange: { start, end: this.reader.offset }, hasCustomSerialization, hasCustomRange, kindStack, kindStackRange, }; + + const children = [ + this.createNativeMetadataNode( + `${columnId}-serialization-has-custom`, + 'UInt8', + hasCustomRange, + hasCustomValue, + String(hasCustomValue), + 'has_custom', + ), + ]; + if (kindStackRange) { + children.push( + this.createNativeMetadataNode( + `${columnId}-serialization-kinds`, + 'Native.SerializationKinds', + kindStackRange, + kindStack, + kindStack.join(' -> '), + 'kindStack', + ), + ); + } + + return { + info, + astNode: { + id: `${columnId}-serialization`, + type: 'Native.Serialization', + byteRange: info.byteRange, + value: { + hasCustomSerialization, + kindStack, + }, + displayValue: hasCustomSerialization ? 'custom' : 'default', + label: 'serialization', + children, + }, + }; } private decodeSerializationKindStack(type: ClickHouseType): string[] { @@ -406,8 +643,17 @@ export class NativeDecoder extends FormatDecoder { } } - private decodeSparseColumn(type: ClickHouseType, rowCount: number, nestedKinds: string[]): AstNode[] { + private decodeSparseColumn(type: ClickHouseType, rowCount: number, nestedKinds: string[]): DecodedColumnData { + const sparseStart = this.reader.offset; const positions = this.decodeSparsePositions(rowCount); + const sparseNode = this.createNativeMetadataNode( + this.generateId(), + 'Native.SparsePositions', + { start: sparseStart, end: this.reader.offset }, + positions, + `[${positions.join(', ')}]`, + 'sparse_positions', + ); if (type.kind === 'Nullable') { if (nestedKinds.length > 0) { @@ -417,16 +663,16 @@ export class NativeDecoder extends FormatDecoder { ); } - return this.decodeSparseNullableColumn(type.inner, rowCount, positions); + return this.decodeSparseNullableColumn(type.inner, rowCount, positions, sparseNode); } - const nonDefaultValues = this.decodeColumnDataWithKinds(type, positions.length, nestedKinds); + const nested = this.decodeColumnDataWithKinds(type, positions.length, nestedKinds); const values: AstNode[] = []; let valueIndex = 0; for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { if (valueIndex < positions.length && positions[valueIndex] === rowIndex) { - const node = this.cloneAstNode(nonDefaultValues[valueIndex], `[${rowIndex}]`); + const node = this.cloneAstNode(nested.values[valueIndex], `[${rowIndex}]`); node.label = `[${rowIndex}]`; values.push(node); valueIndex++; @@ -436,21 +682,22 @@ export class NativeDecoder extends FormatDecoder { values.push(this.createDefaultNode(type, rowIndex)); } - return values; + return { values, prefixNodes: [sparseNode, ...nested.prefixNodes] }; } private decodeSparseNullableColumn( innerType: ClickHouseType, rowCount: number, nonNullPositions: number[], - ): AstNode[] { - const nonNullValues = this.decodeColumnDataDefault(innerType, nonNullPositions.length); + sparseNode: AstNode, + ): DecodedColumnData { + const nonNullData = this.decodeColumnDataDefault(innerType, nonNullPositions.length); const values: AstNode[] = []; let valueIndex = 0; for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { if (valueIndex < nonNullPositions.length && nonNullPositions[valueIndex] === rowIndex) { - const innerNode = this.cloneAstNode(nonNullValues[valueIndex], 'value'); + const innerNode = this.cloneAstNode(nonNullData.values[valueIndex], 'value'); values.push({ id: this.generateId(), type: `Nullable(${typeToString(innerType)})`, @@ -476,30 +723,66 @@ export class NativeDecoder extends FormatDecoder { }); } - return values; + return { values, prefixNodes: [sparseNode, ...nonNullData.prefixNodes] }; } - private decodeReplicatedColumn(type: ClickHouseType, rowCount: number, nestedKinds: string[]): AstNode[] { + private decodeReplicatedColumn(type: ClickHouseType, rowCount: number, nestedKinds: string[]): DecodedColumnData { + const serializedRowCountStart = this.reader.offset; const { value: serializedRowCount } = decodeLEB128(this.reader); + const serializedRowCountNode = this.createNativeMetadataNode( + this.generateId(), + 'VarUInt', + { start: serializedRowCountStart, end: this.reader.offset }, + serializedRowCount, + String(serializedRowCount), + 'serialized_row_count', + ); if (serializedRowCount !== rowCount) { throw new Error( `Native format: replicated row count ${serializedRowCount} does not match block row count ${rowCount}`, ); } + const indexSizeStart = this.reader.offset; const { value: indexSize } = this.reader.readUInt8(); + const indexSizeNode = this.createNativeMetadataNode( + this.generateId(), + 'UInt8', + { start: indexSizeStart, end: this.reader.offset }, + indexSize, + String(indexSize), + 'index_size', + ); + const indexesStart = this.reader.offset; const indexes: number[] = []; for (let i = 0; i < rowCount; i++) { indexes.push(this.readReplicatedIndex(indexSize)); } + const indexesNode = this.createNativeMetadataNode( + this.generateId(), + `Array(UInt${indexSize * 8})`, + { start: indexesStart, end: this.reader.offset }, + indexes, + `[${indexes.join(', ')}]`, + 'indexes', + ); + const nestedRowCountStart = this.reader.offset; const { value: nestedRowCount } = decodeLEB128(this.reader); - const nestedValues = this.decodeColumnDataWithKinds(type, nestedRowCount, nestedKinds); + const nestedRowCountNode = this.createNativeMetadataNode( + this.generateId(), + 'VarUInt', + { start: nestedRowCountStart, end: this.reader.offset }, + nestedRowCount, + String(nestedRowCount), + 'nested_row_count', + ); + const nested = this.decodeColumnDataWithKinds(type, nestedRowCount, nestedKinds); - return indexes.map((index, rowIndex) => { - const sourceNode = nestedValues[index]; + const values = indexes.map((index, rowIndex) => { + const sourceNode = nested.values[index]; if (!sourceNode) { - throw new Error(`Native format: replicated index ${index} out of bounds for ${nestedValues.length} nested values`); + throw new Error(`Native format: replicated index ${index} out of bounds for ${nested.values.length} nested values`); } const cloned = this.cloneAstNode(sourceNode, `[${rowIndex}]`); @@ -509,6 +792,17 @@ export class NativeDecoder extends FormatDecoder { }; return cloned; }); + + return { + values, + prefixNodes: [ + serializedRowCountNode, + indexSizeNode, + indexesNode, + nestedRowCountNode, + ...nested.prefixNodes, + ], + }; } private readReplicatedIndex(indexSize: number): number { @@ -903,6 +1197,26 @@ export class NativeDecoder extends FormatDecoder { }; } + private createNativeMetadataNode( + id: string, + type: string, + byteRange: ByteRange, + value: unknown, + displayValue: string, + label?: string, + children?: AstNode[], + ): AstNode { + return { + id, + type, + byteRange, + value, + displayValue, + label, + children, + }; + } + private cloneAstNode(node: AstNode, label?: string): AstNode { return { ...node, @@ -1427,7 +1741,7 @@ export class NativeDecoder extends FormatDecoder { // Read all values for each element type const elementColumns: AstNode[][] = []; for (let i = 0; i < elements.length; i++) { - elementColumns.push(this.decodeColumnData(elements[i], rowCount)); + elementColumns.push(this.decodeColumnData(elements[i], rowCount).values); } // Assemble tuples @@ -1480,7 +1794,7 @@ export class NativeDecoder extends FormatDecoder { } // Read all values (even for NULLs) - const innerValues = this.decodeColumnData(innerType, rowCount); + const innerValues = this.decodeColumnData(innerType, rowCount).values; // Combine null map with values const values: AstNode[] = []; @@ -1559,7 +1873,7 @@ export class NativeDecoder extends FormatDecoder { } // Read all elements - const allElements = this.decodeColumnData(elementType, totalElements); + const allElements = this.decodeColumnData(elementType, totalElements).values; const elementsEnd = this.reader.offset; // Distribute elements to arrays @@ -1606,15 +1920,24 @@ export class NativeDecoder extends FormatDecoder { * 2. Keys stream: all keys * 3. Values stream: all values */ - private decodeMapColumn(keyType: ClickHouseType, valueType: ClickHouseType, rowCount: number): AstNode[] { - const typeStr = `Map(${typeToString(keyType)}, ${typeToString(valueType)})`; - - // Read cumulative offsets - const offsets: bigint[] = []; - for (let i = 0; i < rowCount; i++) { - const { value } = this.reader.readUInt64LE(); - offsets.push(value); - } + private decodeMapColumn(keyType: ClickHouseType, valueType: ClickHouseType, rowCount: number): AstNode[] { + const typeStr = `Map(${typeToString(keyType)}, ${typeToString(valueType)})`; + + // Read cumulative offsets + const offsets: bigint[] = []; + const offsetNodes: AstNode[] = []; + for (let i = 0; i < rowCount; i++) { + const { value, range } = this.reader.readUInt64LE(); + offsets.push(value); + offsetNodes.push({ + id: this.generateId(), + type: 'ArraySizes', + byteRange: range, + value, + displayValue: `${value} (cumulative)`, + label: `[${i}]`, + }); + } // Calculate sizes const totalEntries = rowCount > 0 ? Number(offsets[rowCount - 1]) : 0; @@ -1626,17 +1949,17 @@ export class NativeDecoder extends FormatDecoder { } // Read all keys - const allKeys = this.decodeColumnData(keyType, totalEntries); + const allKeys = this.decodeColumnData(keyType, totalEntries).values; // Read all values - const allValues = this.decodeColumnData(valueType, totalEntries); + const allValues = this.decodeColumnData(valueType, totalEntries).values; // Distribute to maps const values: AstNode[] = []; let entryIndex = 0; - for (let i = 0; i < rowCount; i++) { - const size = sizes[i]; - const entries: AstNode[] = []; + for (let i = 0; i < rowCount; i++) { + const size = sizes[i]; + const entries: AstNode[] = []; for (let j = 0; j < size; j++) { const key = allKeys[entryIndex + j]; @@ -1655,20 +1978,29 @@ export class NativeDecoder extends FormatDecoder { children: [key, value], }); } - - entryIndex += size; - - values.push({ - id: this.generateId(), - type: typeStr, - byteRange: { start: entries[0]?.byteRange.start ?? this.reader.offset, end: this.reader.offset }, - value: Object.fromEntries(entries.map(e => [e.children![0].value, e.children![1].value])), - displayValue: `{${entries.map(e => e.displayValue).join(', ')}}`, - label: `[${i}]`, - children: entries, - metadata: { size }, - }); - } + + entryIndex += size; + const entriesEnd = entries[entries.length - 1]?.byteRange.end ?? offsetNodes[i].byteRange.end; + const lengthNode: AstNode = { + id: this.generateId(), + type: 'UInt64', + byteRange: offsetNodes[i].byteRange, + value: BigInt(size), + displayValue: `${size} (cumulative: ${offsets[i]})`, + label: 'length', + }; + + values.push({ + id: this.generateId(), + type: typeStr, + byteRange: { start: offsetNodes[i].byteRange.start, end: entriesEnd }, + value: Object.fromEntries(entries.map(e => [e.children![0].value, e.children![1].value])), + displayValue: `{${entries.map(e => e.displayValue).join(', ')}}`, + label: `[${i}]`, + children: [lengthNode, ...entries], + metadata: { size }, + }); + } return values; } @@ -1701,7 +2033,7 @@ export class NativeDecoder extends FormatDecoder { // Determine the actual inner type for decoding (unwrap Nullable if present) const dictType = innerType.kind === 'Nullable' ? innerType.inner : innerType; - dictionary = this.decodeColumnData(dictType, Number(numKeys)); + dictionary = this.decodeColumnData(dictType, Number(numKeys)).values; } // Read row count @@ -1813,7 +2145,7 @@ export class NativeDecoder extends FormatDecoder { for (let v = 0; v < variants.length; v++) { const count = countPerVariant[v]; if (count > 0) { - variantData[v] = this.decodeColumnData(variants[v], count); + variantData[v] = this.decodeColumnData(variants[v], count).values; } else { variantData[v] = []; } @@ -2104,7 +2436,7 @@ export class NativeDecoder extends FormatDecoder { variantDataByDisc[disc] = sharedVariantData; } else if (typeIdx !== undefined) { // Declared type - variantDataByDisc[disc] = this.decodeColumnData(variants[typeIdx], count); + variantDataByDisc[disc] = this.decodeColumnData(variants[typeIdx], count).values; } } @@ -2691,7 +3023,7 @@ export class NativeDecoder extends FormatDecoder { return this.decodeArrayColumnWithPrefix(type.element, rowCount, prefix); } // No prefix - use normal decoding - return this.decodeColumnData(type, rowCount); + return this.decodeColumnData(type, rowCount).values; } /** @@ -2985,7 +3317,7 @@ export class NativeDecoder extends FormatDecoder { a[0].localeCompare(b[0]) ); for (const [pathName, pathType] of sortedTypedPaths) { - const nodes = this.decodeColumnData(pathType, rowCount); + const nodes = this.decodeColumnData(pathType, rowCount).values; typedPathData.set(pathName, nodes); } } @@ -3124,7 +3456,7 @@ export class NativeDecoder extends FormatDecoder { let value: unknown = null; if (entryType) { - const valueNodes = this.decodeColumnData(entryType, 1); + const valueNodes = this.decodeColumnData(entryType, 1).values; value = valueNodes[0]?.value; } diff --git a/src/core/decoder/native-protocol.test.ts b/src/core/decoder/native-protocol.test.ts index 8e488ed..1694b06 100644 --- a/src/core/decoder/native-protocol.test.ts +++ b/src/core/decoder/native-protocol.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from 'vitest'; import { NativeDecoder } from './native-decoder'; +import { analyzeByteRange } from './test-helpers'; function encodeLeb128(value: number | bigint): number[] { let current = BigInt(value); @@ -31,7 +32,6 @@ function encodeUInt64LE(value: number | bigint): number[] { } return bytes; } - function encodeSparseOffsets(nonDefaultRows: number[], rowCount: number): number[] { const END_OF_GRANULE_FLAG = 1n << 62n; const bytes: number[] = []; @@ -61,7 +61,6 @@ function collectNodes(node: unknown): Array<{ type?: string; label?: string; val } return nodes; } - describe('NativeDecoder protocol-aware parsing', () => { it('parses legacy HTTP Native blocks without protocol metadata', () => { const bytes = new Uint8Array([ @@ -113,6 +112,9 @@ describe('NativeDecoder protocol-aware parsing', () => { expect(column?.values.map((node) => node.value)).toEqual([0, 7, 0]); expect(column?.values[0].metadata?.isDefaultValue).toBe(true); expect(column?.values[1].metadata?.isDefaultValue).toBeUndefined(); + + const coverage = analyzeByteRange(parsed, bytes.length); + expect(coverage.isComplete).toBe(true); }); it('rejects BlockInfo field 3 before protocol version 54480', () => { @@ -162,6 +164,9 @@ describe('NativeDecoder protocol-aware parsing', () => { expect(parsed.blocks?.[0].columns[0].values.map((node) => node.value)).toEqual([7, 9]); expect(parsed.blocks?.[0].columns[0].values[0].metadata?.replicatedIndex).toBe(0); expect(parsed.blocks?.[0].columns[0].values[1].metadata?.replicatedIndex).toBe(1); + + const coverage = analyzeByteRange(parsed, bytes.length); + expect(coverage.isComplete).toBe(true); }); it('parses nullable sparse serialization', () => { @@ -228,4 +233,24 @@ describe('NativeDecoder protocol-aware parsing', () => { expect(nodes.some((node) => node.label === 'max_dynamic_paths')).toBe(false); expect(nodes.some((node) => node.label === 'max_dynamic_types')).toBe(false); }); + + it('covers terminal empty block bytes in the AST traversal', () => { + const bytes = new Uint8Array([ + 0x01, + 0x02, + ...encodeString('n'), + ...encodeString('UInt8'), + 0x01, + 0x02, + 0x00, + 0x00, + ]); + + const parsed = new NativeDecoder(bytes, 0).decode(); + const coverage = analyzeByteRange(parsed, bytes.length); + + expect(parsed.trailingNodes).toHaveLength(1); + expect(parsed.trailingNodes?.[0].type).toBe('Native.EndBlock'); + expect(coverage.isComplete).toBe(true); + }); }); diff --git a/src/core/decoder/test-helpers.ts b/src/core/decoder/test-helpers.ts index 1a57dc6..6a8e441 100644 --- a/src/core/decoder/test-helpers.ts +++ b/src/core/decoder/test-helpers.ts @@ -341,7 +341,12 @@ export function analyzeByteRange(data: ParsedData, dataLength: number): ByteCove // From blocks (Native format) if (data.blocks) { for (const block of data.blocks) { + leafNodes.push(...collectLeafNodes(block.header.astNode)); for (const col of block.columns) { + leafNodes.push(...collectLeafNodes(col.metadataNode)); + for (const node of col.dataPrefixNodes) { + leafNodes.push(...collectLeafNodes(node)); + } for (const node of col.values) { leafNodes.push(...collectLeafNodes(node)); } @@ -349,6 +354,10 @@ export function analyzeByteRange(data: ParsedData, dataLength: number): ByteCove } } + for (const node of data.trailingNodes ?? []) { + leafNodes.push(...collectLeafNodes(node)); + } + // Sort by start position and merge overlapping ranges const sortedRanges = leafNodes .map(n => ({ start: n.byteRange.start, end: n.byteRange.end })) diff --git a/src/core/types/ast.ts b/src/core/types/ast.ts index 9cc41a7..28dd8fb 100644 --- a/src/core/types/ast.ts +++ b/src/core/types/ast.ts @@ -57,9 +57,13 @@ export interface BlockHeaderNode { byteRange: ByteRange; numColumns: number; numColumnsRange: ByteRange; + numColumnsNode: AstNode; numRows: number; numRowsRange: ByteRange; + numRowsNode: AstNode; blockInfo?: NativeBlockInfo; + blockInfoNode?: AstNode; + astNode: AstNode; } /** @@ -80,12 +84,17 @@ export interface BlockColumnNode { id: string; name: string; nameByteRange: ByteRange; + nameNode: AstNode; type: import('./clickhouse-types').ClickHouseType; typeString: string; typeByteRange: ByteRange; + typeNode: AstNode; metadataByteRange: ByteRange; + metadataNode: AstNode; dataByteRange: ByteRange; + dataPrefixNodes: AstNode[]; serializationInfo?: NativeSerializationInfo; + serializationNode?: AstNode; values: AstNode[]; } @@ -124,4 +133,6 @@ export interface ParsedData { rows?: RowNode[]; /** Block-based formats (Native) */ blocks?: BlockNode[]; + /** Trailing protocol nodes not attached to data rows/blocks (for example terminal Native blocks) */ + trailingNodes?: AstNode[]; } diff --git a/src/store/store.ts b/src/store/store.ts index 2f270cf..ec1c79d 100644 --- a/src/store/store.ts +++ b/src/store/store.ts @@ -64,12 +64,17 @@ function collectAllNodeIds(parsedData: ParsedData): string[] { // Block-based formats (Native) parsedData.blocks?.forEach((block, i) => { ids.push(`block-${i}`); + visitNode(block.header.astNode); block.columns.forEach((col, j) => { ids.push(`block-${i}-col-${j}`); + visitNode(col.metadataNode); + col.dataPrefixNodes.forEach(visitNode); col.values.forEach(visitNode); }); }); + parsedData.trailingNodes?.forEach(visitNode); + return ids; }