From 858ba857ee2da0ae17a93cbb4c44c2cea4d43364 Mon Sep 17 00:00:00 2001 From: Alex Soffronow-Pagonidis Date: Wed, 11 Feb 2026 13:01:33 +0100 Subject: [PATCH 1/6] fix release workflow --- .github/workflows/release.yml | 4 +--- package.json | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4f896a3..79a9c72 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,9 +40,7 @@ jobs: ELECTRON: 'true' - name: Package desktop app - run: npx electron-builder --${{ matrix.platform }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: npx electron-builder --${{ matrix.platform }} --publish never - name: Upload artifacts uses: actions/upload-artifact@v4 diff --git a/package.json b/package.json index aa2ecfb..f60b403 100644 --- a/package.json +++ b/package.json @@ -2,6 +2,8 @@ "name": "rowbinary-explorer", "private": true, "version": "0.0.1", + "description": "Interactive hex viewer for ClickHouse RowBinary and Native wire formats", + "author": "alex-clickhouse ", "type": "module", "main": "dist-electron/main.js", "scripts": { From a3ff037df80c128e0ab4153fce5689176058b3b8 Mon Sep 17 00:00:00 2001 From: Alex Soffronow-Pagonidis Date: Wed, 11 Feb 2026 13:10:42 +0100 Subject: [PATCH 2/6] fix release 2 --- electron-builder.yml | 2 +- electron/main.ts | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/electron-builder.yml b/electron-builder.yml index bf0441b..fd787e8 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -8,7 +8,7 @@ files: - dist-electron/**/* extraResources: - from: config.default.json - to: ../config.json + to: config.json win: icon: build/icon.ico target: diff --git a/electron/main.ts b/electron/main.ts index b160b21..1e8c8b4 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -5,12 +5,12 @@ import fs from 'node:fs'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); -// config.json lives next to the app: +// Config paths: // dev → project root (process.cwd()), falls back to config.default.json -// prod → next to the executable (shipped from config.default.json) -const appDir = app.isPackaged ? path.dirname(process.execPath) : process.cwd(); -const configPath = path.join(appDir, 'config.json'); -const defaultConfigPath = path.join(appDir, 'config.default.json'); +// prod → extraResources dir (Contents/Resources on macOS, resources/ on Linux/Windows) +const configDir = app.isPackaged ? process.resourcesPath : process.cwd(); +const configPath = path.join(configDir, 'config.json'); +const defaultConfigPath = path.join(configDir, 'config.default.json'); interface Config { host: string; From b1132f39defff52eeb86abeab1689bc0412f41fe Mon Sep 17 00:00:00 2001 From: Alex Soffronow-Pagonidis Date: Wed, 11 Mar 2026 11:45:17 +0100 Subject: [PATCH 3/6] agents.md --- AGENTS.md | 216 +++++++++++++++++++++++++++++++++++++++++++++++++++++ CLAUDE.md | 217 +----------------------------------------------------- 2 files changed, 217 insertions(+), 216 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..e1cd5f1 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,216 @@ +# CLAUDE.md - ClickHouse Format Explorer + +## Project Overview + +A tool for visualizing ClickHouse RowBinary and Native wire format data. Features an interactive hex viewer with AST-based type visualization, similar to ImHex. Available as a web app (Docker) or an Electron desktop app that connects to your existing ClickHouse server. + +**Current scope**: RowBinaryWithNamesAndTypes and Native formats. + +## Tech Stack + +- **Frontend**: React 18 + TypeScript + Vite +- **State**: Zustand +- **UI**: react-window (virtualized hex viewer), react-resizable-panels (split panes) +- **Desktop**: Electron (optional, connects to user's ClickHouse) +- **Testing**: Vitest + testcontainers (integration), Playwright (Electron e2e) +- **Deployment**: Docker (bundles ClickHouse + nginx) or Electron desktop app + +## Commands + +```bash +npm run dev # Start web dev server (requires ClickHouse at localhost:8123) +npm run build # Build web app for production +npm run test # Run integration tests (uses testcontainers) +npm run lint # ESLint check +npm run test:e2e # Build Electron + run Playwright e2e tests + +# Electron desktop app +npm run electron:dev # Dev mode with hot reload +npm run electron:build # Package desktop installer for current platform + +# Docker (self-contained with bundled ClickHouse) +docker build -t rowbinary-explorer . +docker run -d -p 8080:80 rowbinary-explorer +``` + +## Directory Structure + +``` +src/ +├── components/ # React components +│ ├── App.tsx # Main layout with resizable panels +│ ├── QueryInput.tsx # SQL query input + run button + connection settings +│ ├── HexViewer/ # Virtualized hex viewer with highlighting +│ └── AstTree/ # Collapsible AST tree view +├── core/ +│ ├── types/ +│ │ ├── ast.ts # AstNode, ByteRange, ParsedData interfaces +│ │ └── clickhouse-types.ts # ClickHouseType discriminated union +│ ├── decoder/ +│ │ ├── rowbinary-decoder.ts # RowBinaryWithNamesAndTypes decoder +│ │ ├── native-decoder.ts # Native format decoder +│ │ ├── reader.ts # BinaryReader with byte-range tracking +│ │ ├── leb128.ts # LEB128 varint decoder +│ │ ├── test-helpers.ts # Shared test utilities +│ │ ├── smoke-cases.ts # Smoke test case definitions +│ │ └── validation-cases.ts # Validation test case definitions +│ ├── parser/ +│ │ ├── type-lexer.ts # Tokenizer for type strings +│ │ └── type-parser.ts # Parser: string -> ClickHouseType +│ └── clickhouse/ +│ └── client.ts # HTTP client (fetch for web, IPC for Electron) +├── store/ +│ └── store.ts # Zustand store (query, parsed data, UI state) +└── styles/ # CSS files +electron/ +├── main.ts # Electron main process (window, IPC handlers) +└── preload.ts # Preload script (contextBridge → electronAPI) +e2e/ +└── electron.spec.ts # Playwright Electron e2e tests +docs/ +├── rowbinaryspec.md # RowBinary wire format specification +├── nativespec.md # Native wire format specification +└── jsonspec.md # JSON type specification +docker/ +├── nginx.conf # Proxies /clickhouse to ClickHouse server +├── users.xml # Read-only ClickHouse user +└── supervisord.conf # Runs nginx + ClickHouse together +``` + +## Wire Format Docs + + * RowBinary: docs/rowbinaryspec.md + * Native: docs/nativespec.md + * JSON: docs/jsonspec.md + +## Key Concepts + +### AstNode +Every decoded value is represented as an `AstNode` (`src/core/types/ast.ts:12`): +- `id` - Unique identifier for selection/highlighting +- `type` - ClickHouse type name string +- `byteRange` - `{start, end}` byte offsets (exclusive end) +- `value` - Decoded JavaScript value +- `displayValue` - Human-readable string +- `children` - Child nodes for composite types (Array, Tuple, etc.) + +### ClickHouseType +A discriminated union representing all ClickHouse types (`src/core/types/clickhouse-types.ts:4`): +- Primitives: `UInt8`-`UInt256`, `Int8`-`Int256`, `Float32/64`, `String`, etc. +- Composites: `Array`, `Tuple`, `Map`, `Nullable`, `LowCardinality` +- Advanced: `Variant`, `Dynamic`, `JSON` +- Geo: `Point`, `Ring`, `Polygon`, `MultiPolygon`, `LineString`, `MultiLineString`, `Geometry` +- Intervals: `IntervalSecond`, `IntervalMinute`, `IntervalHour`, `IntervalDay`, `IntervalWeek`, `IntervalMonth`, `IntervalQuarter`, `IntervalYear` (stored as Int64) +- Other: `Enum8/16`, `Nested`, `QBit`, `AggregateFunction` + +### Decoding Flow +1. User enters SQL query, clicks "Run Query" +2. `ClickHouseClient` (`src/core/clickhouse/client.ts`) sends query: + - **Web mode**: `fetch()` via Vite proxy or nginx + - **Electron mode**: IPC to main process → `fetch()` to ClickHouse (no CORS) +3. Decoder parses the binary response: + - **RowBinary** (`rowbinary-decoder.ts`): Row-oriented, header + rows + - **Native** (`native-decoder.ts`): Column-oriented with blocks +4. Type strings parsed via `parseType()` into `ClickHouseType` +5. Each decoded value returns an `AstNode` with byte tracking +6. UI renders hex view (left) and AST tree (right) + +### Electron Architecture +``` +Renderer (React) Main Process (Node.js) + │ │ + ├─ window.electronAPI │ + │ .executeQuery(opts) ────────►├─ fetch(clickhouseUrl + query) + │ │ → ArrayBuffer + │◄── IPC response ──────────────┤ + │ │ + ├─ Uint8Array → decoders │ + └─ render hex view + AST tree │ +``` + +- Runtime detection: `window.electronAPI` exists → IPC path, otherwise → `fetch()` +- `vite-plugin-electron` activates only when `ELECTRON=true` env var is set +- Connection config in `config.json` (project root in dev, next to executable in prod) +- Experimental ClickHouse settings (Variant, Dynamic, JSON, etc.) sent as query params + +### Interactive Highlighting +- Click a node in AST tree → highlights corresponding bytes in hex view +- Click a byte in hex view → selects the deepest AST node containing that byte +- State managed in Zustand store: `activeNodeId`, `hoveredNodeId` + +## Adding a New ClickHouse Type + +1. Add type variant to `ClickHouseType` in `src/core/types/clickhouse-types.ts` +2. Add `typeToString()` case for serialization back to string +3. Add `getTypeColor()` case for UI coloring +4. Add parser case in `src/core/parser/type-parser.ts` +5. Add decoder method in `RowBinaryDecoder` (`src/core/decoder/rowbinary-decoder.ts`): + - Add case in `decodeValue()` switch + - Implement `decode{TypeName}()` method returning `AstNode` +6. Add decoder method in `NativeDecoder` (`src/core/decoder/native-decoder.ts`): + - Add case in `decodeValue()` switch + - For columnar types, may need `decode{TypeName}Column()` method +7. If type has binary type index (for Dynamic), add to `decodeDynamicType()` +8. Add test cases to `smoke-cases.ts` and `validation-cases.ts` + +## Important Implementation Details + +- **LEB128**: Variable-length integers used for string lengths, array sizes, column counts +- **UUID byte order**: ClickHouse uses a special byte ordering (see `decodeUUID()` at `decoder.ts:629`) +- **IPv4**: Stored as little-endian UInt32, displayed in reverse order +- **Dynamic type**: Uses BinaryTypeIndex encoding; type is encoded in the data itself +- **LowCardinality**: Does not affect wire format in RowBinary (transparent wrapper) +- **Nested**: Encoded as parallel arrays, one per field + +## Testing + +### Integration Tests (Vitest + testcontainers) + +Tests use testcontainers to spin up a real ClickHouse instance: +```bash +npm run test # Runs all integration tests +``` + +Tests are organized into three categories with shared test case definitions: + +1. **Smoke Tests** (`smoke.integration.test.ts`) + - Verify parsing succeeds without value validation + - Test cases defined in `smoke-cases.ts` + - Parametrized for both RowBinary and Native formats + +2. **Validation Tests** (`validation.integration.test.ts`) + - Verify decoded values and AST structure + - Test cases defined in `validation-cases.ts` with format-specific callbacks + - Check values, children counts, byte ranges, metadata + +3. **Coverage Tests** (`coverage.integration.test.ts`) + - Analyze byte coverage of AST leaf nodes + - Report uncovered byte ranges + +### Electron e2e Tests (Playwright) + +```bash +npm run test:e2e # Builds Electron app + runs Playwright tests +``` + +Tests in `e2e/electron.spec.ts` launch the actual Electron app and verify: +- App window opens and UI renders +- Host input is visible (Electron mode) and Share button is hidden +- Connection settings can be edited +- Upload button is present and functional + +### Test Case Interface +```typescript +interface ValidationTestCase { + name: string; + query: string; + settings?: Record; + rowBinaryValidator?: (result: DecodedResult) => void; + nativeValidator?: (result: DecodedResult) => void; +} +``` + +### Adding New Test Cases +1. Add query to `smoke-cases.ts` for basic parsing verification +2. Add to `validation-cases.ts` with validator callbacks for detailed checks +3. Use `bothFormats(validator)` helper when validation logic is identical diff --git a/CLAUDE.md b/CLAUDE.md index e1cd5f1..43c994c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,216 +1 @@ -# CLAUDE.md - ClickHouse Format Explorer - -## Project Overview - -A tool for visualizing ClickHouse RowBinary and Native wire format data. Features an interactive hex viewer with AST-based type visualization, similar to ImHex. Available as a web app (Docker) or an Electron desktop app that connects to your existing ClickHouse server. - -**Current scope**: RowBinaryWithNamesAndTypes and Native formats. - -## Tech Stack - -- **Frontend**: React 18 + TypeScript + Vite -- **State**: Zustand -- **UI**: react-window (virtualized hex viewer), react-resizable-panels (split panes) -- **Desktop**: Electron (optional, connects to user's ClickHouse) -- **Testing**: Vitest + testcontainers (integration), Playwright (Electron e2e) -- **Deployment**: Docker (bundles ClickHouse + nginx) or Electron desktop app - -## Commands - -```bash -npm run dev # Start web dev server (requires ClickHouse at localhost:8123) -npm run build # Build web app for production -npm run test # Run integration tests (uses testcontainers) -npm run lint # ESLint check -npm run test:e2e # Build Electron + run Playwright e2e tests - -# Electron desktop app -npm run electron:dev # Dev mode with hot reload -npm run electron:build # Package desktop installer for current platform - -# Docker (self-contained with bundled ClickHouse) -docker build -t rowbinary-explorer . -docker run -d -p 8080:80 rowbinary-explorer -``` - -## Directory Structure - -``` -src/ -├── components/ # React components -│ ├── App.tsx # Main layout with resizable panels -│ ├── QueryInput.tsx # SQL query input + run button + connection settings -│ ├── HexViewer/ # Virtualized hex viewer with highlighting -│ └── AstTree/ # Collapsible AST tree view -├── core/ -│ ├── types/ -│ │ ├── ast.ts # AstNode, ByteRange, ParsedData interfaces -│ │ └── clickhouse-types.ts # ClickHouseType discriminated union -│ ├── decoder/ -│ │ ├── rowbinary-decoder.ts # RowBinaryWithNamesAndTypes decoder -│ │ ├── native-decoder.ts # Native format decoder -│ │ ├── reader.ts # BinaryReader with byte-range tracking -│ │ ├── leb128.ts # LEB128 varint decoder -│ │ ├── test-helpers.ts # Shared test utilities -│ │ ├── smoke-cases.ts # Smoke test case definitions -│ │ └── validation-cases.ts # Validation test case definitions -│ ├── parser/ -│ │ ├── type-lexer.ts # Tokenizer for type strings -│ │ └── type-parser.ts # Parser: string -> ClickHouseType -│ └── clickhouse/ -│ └── client.ts # HTTP client (fetch for web, IPC for Electron) -├── store/ -│ └── store.ts # Zustand store (query, parsed data, UI state) -└── styles/ # CSS files -electron/ -├── main.ts # Electron main process (window, IPC handlers) -└── preload.ts # Preload script (contextBridge → electronAPI) -e2e/ -└── electron.spec.ts # Playwright Electron e2e tests -docs/ -├── rowbinaryspec.md # RowBinary wire format specification -├── nativespec.md # Native wire format specification -└── jsonspec.md # JSON type specification -docker/ -├── nginx.conf # Proxies /clickhouse to ClickHouse server -├── users.xml # Read-only ClickHouse user -└── supervisord.conf # Runs nginx + ClickHouse together -``` - -## Wire Format Docs - - * RowBinary: docs/rowbinaryspec.md - * Native: docs/nativespec.md - * JSON: docs/jsonspec.md - -## Key Concepts - -### AstNode -Every decoded value is represented as an `AstNode` (`src/core/types/ast.ts:12`): -- `id` - Unique identifier for selection/highlighting -- `type` - ClickHouse type name string -- `byteRange` - `{start, end}` byte offsets (exclusive end) -- `value` - Decoded JavaScript value -- `displayValue` - Human-readable string -- `children` - Child nodes for composite types (Array, Tuple, etc.) - -### ClickHouseType -A discriminated union representing all ClickHouse types (`src/core/types/clickhouse-types.ts:4`): -- Primitives: `UInt8`-`UInt256`, `Int8`-`Int256`, `Float32/64`, `String`, etc. -- Composites: `Array`, `Tuple`, `Map`, `Nullable`, `LowCardinality` -- Advanced: `Variant`, `Dynamic`, `JSON` -- Geo: `Point`, `Ring`, `Polygon`, `MultiPolygon`, `LineString`, `MultiLineString`, `Geometry` -- Intervals: `IntervalSecond`, `IntervalMinute`, `IntervalHour`, `IntervalDay`, `IntervalWeek`, `IntervalMonth`, `IntervalQuarter`, `IntervalYear` (stored as Int64) -- Other: `Enum8/16`, `Nested`, `QBit`, `AggregateFunction` - -### Decoding Flow -1. User enters SQL query, clicks "Run Query" -2. `ClickHouseClient` (`src/core/clickhouse/client.ts`) sends query: - - **Web mode**: `fetch()` via Vite proxy or nginx - - **Electron mode**: IPC to main process → `fetch()` to ClickHouse (no CORS) -3. Decoder parses the binary response: - - **RowBinary** (`rowbinary-decoder.ts`): Row-oriented, header + rows - - **Native** (`native-decoder.ts`): Column-oriented with blocks -4. Type strings parsed via `parseType()` into `ClickHouseType` -5. Each decoded value returns an `AstNode` with byte tracking -6. UI renders hex view (left) and AST tree (right) - -### Electron Architecture -``` -Renderer (React) Main Process (Node.js) - │ │ - ├─ window.electronAPI │ - │ .executeQuery(opts) ────────►├─ fetch(clickhouseUrl + query) - │ │ → ArrayBuffer - │◄── IPC response ──────────────┤ - │ │ - ├─ Uint8Array → decoders │ - └─ render hex view + AST tree │ -``` - -- Runtime detection: `window.electronAPI` exists → IPC path, otherwise → `fetch()` -- `vite-plugin-electron` activates only when `ELECTRON=true` env var is set -- Connection config in `config.json` (project root in dev, next to executable in prod) -- Experimental ClickHouse settings (Variant, Dynamic, JSON, etc.) sent as query params - -### Interactive Highlighting -- Click a node in AST tree → highlights corresponding bytes in hex view -- Click a byte in hex view → selects the deepest AST node containing that byte -- State managed in Zustand store: `activeNodeId`, `hoveredNodeId` - -## Adding a New ClickHouse Type - -1. Add type variant to `ClickHouseType` in `src/core/types/clickhouse-types.ts` -2. Add `typeToString()` case for serialization back to string -3. Add `getTypeColor()` case for UI coloring -4. Add parser case in `src/core/parser/type-parser.ts` -5. Add decoder method in `RowBinaryDecoder` (`src/core/decoder/rowbinary-decoder.ts`): - - Add case in `decodeValue()` switch - - Implement `decode{TypeName}()` method returning `AstNode` -6. Add decoder method in `NativeDecoder` (`src/core/decoder/native-decoder.ts`): - - Add case in `decodeValue()` switch - - For columnar types, may need `decode{TypeName}Column()` method -7. If type has binary type index (for Dynamic), add to `decodeDynamicType()` -8. Add test cases to `smoke-cases.ts` and `validation-cases.ts` - -## Important Implementation Details - -- **LEB128**: Variable-length integers used for string lengths, array sizes, column counts -- **UUID byte order**: ClickHouse uses a special byte ordering (see `decodeUUID()` at `decoder.ts:629`) -- **IPv4**: Stored as little-endian UInt32, displayed in reverse order -- **Dynamic type**: Uses BinaryTypeIndex encoding; type is encoded in the data itself -- **LowCardinality**: Does not affect wire format in RowBinary (transparent wrapper) -- **Nested**: Encoded as parallel arrays, one per field - -## Testing - -### Integration Tests (Vitest + testcontainers) - -Tests use testcontainers to spin up a real ClickHouse instance: -```bash -npm run test # Runs all integration tests -``` - -Tests are organized into three categories with shared test case definitions: - -1. **Smoke Tests** (`smoke.integration.test.ts`) - - Verify parsing succeeds without value validation - - Test cases defined in `smoke-cases.ts` - - Parametrized for both RowBinary and Native formats - -2. **Validation Tests** (`validation.integration.test.ts`) - - Verify decoded values and AST structure - - Test cases defined in `validation-cases.ts` with format-specific callbacks - - Check values, children counts, byte ranges, metadata - -3. **Coverage Tests** (`coverage.integration.test.ts`) - - Analyze byte coverage of AST leaf nodes - - Report uncovered byte ranges - -### Electron e2e Tests (Playwright) - -```bash -npm run test:e2e # Builds Electron app + runs Playwright tests -``` - -Tests in `e2e/electron.spec.ts` launch the actual Electron app and verify: -- App window opens and UI renders -- Host input is visible (Electron mode) and Share button is hidden -- Connection settings can be edited -- Upload button is present and functional - -### Test Case Interface -```typescript -interface ValidationTestCase { - name: string; - query: string; - settings?: Record; - rowBinaryValidator?: (result: DecodedResult) => void; - nativeValidator?: (result: DecodedResult) => void; -} -``` - -### Adding New Test Cases -1. Add query to `smoke-cases.ts` for basic parsing verification -2. Add to `validation-cases.ts` with validator callbacks for detailed checks -3. Use `bothFormats(validator)` helper when validation logic is identical +@AGENTS.md From 74fb74c9eb710e0c1656781c2b7fa175062ad73f Mon Sep 17 00:00:00 2001 From: Alex Soffronow-Pagonidis Date: Wed, 11 Mar 2026 11:45:32 +0100 Subject: [PATCH 4/6] docs --- docs/native-protocol-versions.md | 186 +++++++++++++++++++++++++++++++ docs/nativespec.md | 15 ++- 2 files changed, 196 insertions(+), 5 deletions(-) create mode 100644 docs/native-protocol-versions.md diff --git a/docs/native-protocol-versions.md b/docs/native-protocol-versions.md new file mode 100644 index 0000000..f2e761b --- /dev/null +++ b/docs/native-protocol-versions.md @@ -0,0 +1,186 @@ +# Native Protocol Versions + +This document explains how ClickHouse's `client_protocol_version` request parameter changes the HTTP `FORMAT Native` wire layout, and how the explorer maps that behavior. + +## Source of Truth + +The behavior described here is taken from the local ClickHouse source tree in `~/Code/clickhouse`: + +- `src/Server/HTTPHandler.cpp` +- `src/Formats/FormatFactory.cpp` +- `src/Processors/Formats/Impl/NativeFormat.cpp` +- `src/Formats/NativeWriter.cpp` +- `src/Core/ProtocolDefines.h` +- `src/Core/BlockInfo.cpp` +- `src/DataTypes/Serializations/SerializationInfo.cpp` + +## Request Flow + +For HTTP queries, `client_protocol_version` is handled as follows: + +1. `HTTPHandler.cpp` reads the `client_protocol_version` query parameter. +2. The value is stored in `Context.client_protocol_version`. +3. `FormatFactory.cpp` copies that value into `FormatSettings.client_protocol_version`. +4. `NativeFormat.cpp` passes it to `NativeWriter`. +5. `NativeWriter.cpp` changes the output layout based on the selected revision. + +The explorer's Native protocol selector controls this exact parameter. + +## Explorer Presets + +The UI exposes fixed revision presets instead of free-form input: + +| Preset | Upstream constant | Meaning | +|--------|-------------------|---------| +| `0` | legacy HTTP default | Omits `client_protocol_version` and preserves the old HTTP-native layout | +| `54405` | `DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE` | LowCardinality negotiation | +| `54452` | `DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING` | AggregateFunction state versioning | +| `54454` | `DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION` | Adds per-column serialization metadata | +| `54465` | `DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION` | Allows sparse serialization kinds | +| `54473` | `DBMS_MIN_REVISION_WITH_V2_DYNAMIC_AND_JSON_SERIALIZATION` | Dynamic/JSON v2 | +| `54480` | `DBMS_MIN_REVISION_WITH_OUT_OF_ORDER_BUCKETS_IN_AGGREGATION` | Adds `BlockInfo.out_of_order_buckets` | +| `54482` | `DBMS_MIN_REVISION_WITH_REPLICATED_SERIALIZATION` | Allows replicated serialization kinds | +| `54483` | `DBMS_MIN_REVISION_WITH_NULLABLE_SPARSE_SERIALIZATION` | Current upstream protocol version in the checked source tree | + +## What Changes on the Wire + +### `0`: Legacy HTTP Native + +With no explicit protocol version: + +- `NativeWriter` does not write `BlockInfo` +- AggregateFunction versioning is disabled +- Custom serialization metadata is not written +- Dynamic and JSON use the older v1 serialization path + +This matches the explorer's historical behavior and remains the default preset. + +### `> 0`: `BlockInfo` Appears Before Block Dimensions + +In `NativeWriter::write()`: + +- if `client_revision > 0`, `block.info.write()` runs before `NumColumns` and `NumRows` +- this is true even for HTTP `FORMAT Native` + +That means HTTP Native is not always just: + +```text +NumColumns, NumRows, columns... +``` + +With a non-zero protocol version it becomes: + +```text +BlockInfo, NumColumns, NumRows, columns... +``` + +`BlockInfo` is field-number encoded and terminated by field `0`. + +Fields currently relevant to Native output: + +| Field | Name | Type | Min revision | +|-------|------|------|--------------| +| `1` | `is_overflows` | `bool` | `0` | +| `2` | `bucket_num` | `Int32` | `0` | +| `3` | `out_of_order_buckets` | `Array(Int32)` | `54480` | + +## Column-Level Changes + +Each Native column still writes: + +1. column name +2. column type +3. optional serialization metadata +4. column data + +The change point is revision `54454`. + +### `54454`: Custom Serialization Metadata + +Starting at `DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION`, `NativeWriter` calls `getSerializationAndColumn()` and writes: + +- `UInt8 has_custom` +- if `has_custom == 1`, a binary-encoded serialization kind stack + +The kind-stack encoding comes from `SerializationInfo.cpp`. + +Common encodings: + +| Tag | Kind stack | +|-----|------------| +| `0` | `DEFAULT` | +| `1` | `DEFAULT -> SPARSE` | +| `2` | `DEFAULT -> DETACHED` | +| `3` | `DEFAULT -> SPARSE -> DETACHED` | +| `4` | `DEFAULT -> REPLICATED` | +| `5` | arbitrary combination, encoded as count + raw kinds | + +For tuple types, nested element serialization info is serialized recursively after the tuple's own kind stack. + +### `54465`: Sparse Serialization + +Once the client revision reaches `54465`, ClickHouse may keep sparse serialization instead of materializing the column first. + +For sparse columns: + +- the stream starts with sparse-offset metadata +- only non-default values are serialized in the value stream +- readers must materialize omitted rows as type defaults + +The explorer now materializes both plain sparse columns and sparse `Nullable(...)` columns. If other custom serialization combinations that the app does not model are encountered, the decoder raises an explicit error that includes the kind stack and selected protocol version. + +### `54482`: Replicated Serialization + +Before `54482`, `NativeWriter` converts replicated columns to full columns. + +At and after `54482`, ClickHouse may keep replicated serialization kinds in the output. The explorer decodes the replicated index stream, expands the shared nested values back to row-shaped AST nodes, and surfaces the metadata in the UI. + +### `54483`: Nullable Sparse Serialization + +At `54483`, sparse serialization can also apply to `Nullable`-based layouts. The null map is derived from sparse offsets instead of a separate explicit null-map stream, and the explorer reconstructs `NULL` vs value rows accordingly. + +## Type-Specific Revision Gates + +### `54405`: LowCardinality + +Below `54405`, `NativeWriter` removes the `LowCardinality` wrapper before sending data. At and after `54405`, the server can keep the real `LowCardinality(T)` type and dictionary-style encoding. + +### `54452`: AggregateFunction State Versioning + +At `54452`, `setVersionToAggregateFunctions()` begins passing the selected client revision into aggregate-state serialization. That lets aggregate functions negotiate compatible state formats across revisions. + +### `54473`: Dynamic / JSON v2 + +`NativeWriter::writeData()` switches these settings based on the selected revision: + +- below `54473`: `dynamic_serialization_version = V1`, `object_serialization_version = V1` +- at and above `54473`: `dynamic_serialization_version = V2`, `object_serialization_version = V2` + +This is the main protocol gate for modern Native `Dynamic` and `JSON` layouts. + +## Current Upstream Version + +In the checked local source tree, `src/Core/ProtocolDefines.h` defines: + +```text +DBMS_TCP_PROTOCOL_VERSION = 54483 +``` + +That is why the explorer's "current" preset is `54483`. + +## Explorer Behavior + +- The protocol selector is shown only for `Native`. +- The selected revision is used for both live HTTP queries and uploaded Native files. +- The default preset is `0` so existing behavior does not change unexpectedly. +- Share links preserve the Native protocol preset. +- The AST and hex viewer expose: + - `BlockInfo` + - `has_custom` + - serialization kind stacks + +## Practical Guidance + +- Use `0` when you want to match the explorer's original legacy HTTP behavior. +- Use `54483` when you want the most modern layout the checked ClickHouse source supports. +- Use intermediate presets when you need to isolate when a specific protocol feature appeared on the wire. diff --git a/docs/nativespec.md b/docs/nativespec.md index 19beaf2..2f408cb 100644 --- a/docs/nativespec.md +++ b/docs/nativespec.md @@ -191,6 +191,7 @@ Each column in the block is serialized sequentially with the following structure - Present only when `client_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION` (54454) - `UInt8 has_custom`: 0 or 1 indicating if custom serialization is used - If `has_custom == 1`: Binary-encoded serialization kind stack (for sparse columns, etc.) + - See [Native Protocol Versions](./native-protocol-versions.md) for the revision gates and kind-stack encoding 4. **Column Data** (columnar, type-specific): - All values for this column across all rows - Serialization depends on the data type (see subsequent sections) @@ -211,15 +212,17 @@ curl -s -XPOST "http://localhost:8123?default_format=Native" \ ## 3. BlockInfo (TCP Protocol Only) -`BlockInfo` contains additional metadata about the block, used primarily for distributed query execution and aggregation. It is **only present when using the TCP protocol** with `client_revision > 0`. When reading Native format over HTTP, `BlockInfo` is not included. +`BlockInfo` contains additional metadata about the block, used primarily for distributed query execution and aggregation. + +Historical note: when HTTP `FORMAT Native` is requested without `client_protocol_version`, blocks start directly with `NumColumns` / `NumRows`. However, the ClickHouse server source writes `BlockInfo` for HTTP Native too when `client_protocol_version > 0`. See [Native Protocol Versions](./native-protocol-versions.md). ### When BlockInfo is Present `BlockInfo` appears at the very beginning of each block, before the block dimensions, when: -- The block is transmitted over the TCP (Native) protocol -- `client_revision > 0` (i.e., not reading from an old client) +- `client_revision > 0` +- the writer path is using the revision-aware Native block layout -In the HTTP Native format context, `BlockInfo` is never serialized, and blocks start directly with NumColumns/NumRows. +With legacy HTTP Native (`client_revision = 0` / parameter omitted), blocks start directly with `NumColumns` / `NumRows`. ### Field-Based Serialization Format @@ -340,7 +343,7 @@ When reading a Native format stream over the TCP protocol with `server_revision 3. Stop when field number `0x00` is encountered 4. Then proceed to read the block dimensions (NumColumns, NumRows) and column data -**Note**: The HTTP Native format never includes `BlockInfo`, so HTTP readers should skip directly to reading NumColumns/NumRows. +**Note**: HTTP readers must not assume `BlockInfo` is absent. It is absent in the legacy default path, but present when HTTP `FORMAT Native` is requested with a positive `client_protocol_version`. --- @@ -3603,6 +3606,8 @@ SELECT * FROM dynamic_test FORMAT Native; The Native format behavior varies based on the protocol revision (also called `client_revision` in the code). This enables backward compatibility while allowing the format to evolve with new features. +For the request flow, exact source files, and the explorer's Native protocol presets, see [Native Protocol Versions](./native-protocol-versions.md). + ### Revision Thresholds | Revision | Constant Name | Feature | From 9a282b2cd22770e55951142179a81e6ba2f7fc0e Mon Sep 17 00:00:00 2001 From: Alex Soffronow-Pagonidis Date: Wed, 11 Mar 2026 11:46:23 +0100 Subject: [PATCH 5/6] add support for selecting protocol version in native format --- e2e/electron.spec.ts | 5 + electron/main.ts | 14 +- electron/preload.ts | 2 +- src/components/App.tsx | 16 +- src/components/AstTree/AstTree.tsx | 111 +++- src/components/HexViewer/HexViewer.tsx | 65 +- src/components/QueryInput.tsx | 39 +- src/core/clickhouse/client.ts | 19 +- src/core/clickhouse/request-params.test.ts | 31 + src/core/clickhouse/request-params.ts | 14 + src/core/decoder/index.ts | 9 +- src/core/decoder/native-decoder.ts | 566 +++++++++++++++--- .../native-protocol.integration.test.ts | 152 +++++ src/core/decoder/native-protocol.test.ts | 169 ++++++ src/core/decoder/test-helpers.ts | 4 +- src/core/types/ast.ts | 28 + src/core/types/native-protocol.ts | 74 +++ src/store/store.ts | 15 +- src/styles/app.css | 2 + 19 files changed, 1236 insertions(+), 99 deletions(-) create mode 100644 src/core/clickhouse/request-params.test.ts create mode 100644 src/core/clickhouse/request-params.ts create mode 100644 src/core/decoder/native-protocol.integration.test.ts create mode 100644 src/core/decoder/native-protocol.test.ts create mode 100644 src/core/types/native-protocol.ts diff --git a/e2e/electron.spec.ts b/e2e/electron.spec.ts index ba57f91..1708e06 100644 --- a/e2e/electron.spec.ts +++ b/e2e/electron.spec.ts @@ -23,6 +23,7 @@ test.describe('Electron app', () => { await expect(window.locator('.query-textarea')).toBeVisible(); await expect(window.locator('.query-btn.primary')).toBeVisible(); await expect(window.locator('#format-select')).toBeVisible(); + await expect(window.locator('#protocol-version-select')).toHaveCount(0); // In Electron mode, the host input should be visible and Share button hidden await expect(window.locator('#host-input')).toBeVisible(); @@ -32,6 +33,10 @@ test.describe('Electron app', () => { const shareButtons = window.locator('button', { hasText: 'Share' }); await expect(shareButtons).toHaveCount(0); + await window.locator('#format-select').selectOption('Native'); + await expect(window.locator('#protocol-version-select')).toBeVisible(); + await expect(window.locator('#protocol-version-select')).toHaveValue('0'); + // Verify the window title const title = await window.title(); expect(title).toBeTruthy(); diff --git a/electron/main.ts b/electron/main.ts index 1e8c8b4..903f57e 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -2,6 +2,8 @@ import { app, BrowserWindow, ipcMain } from 'electron'; import { fileURLToPath } from 'node:url'; import path from 'node:path'; import fs from 'node:fs'; +import { appendClickHouseRequestParams } from '../src/core/clickhouse/request-params'; +import { DEFAULT_NATIVE_PROTOCOL_VERSION } from '../src/core/types/native-protocol'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -66,12 +68,14 @@ function createWindow(): void { } // IPC: Execute a ClickHouse query -ipcMain.handle('execute-query', async (_event, options: { query: string; format: string }) => { +ipcMain.handle('execute-query', async (_event, options: { query: string; format: string; nativeProtocolVersion?: number }) => { const config = loadConfig(); - const params = new URLSearchParams({ - default_format: options.format, - ...CLICKHOUSE_SETTINGS, - }); + const params = new URLSearchParams(CLICKHOUSE_SETTINGS); + appendClickHouseRequestParams( + params, + options.format, + options.nativeProtocolVersion ?? DEFAULT_NATIVE_PROTOCOL_VERSION + ); const response = await fetch(`${config.host}/?${params}`, { method: 'POST', diff --git a/electron/preload.ts b/electron/preload.ts index 2b6da6b..a1fac7a 100644 --- a/electron/preload.ts +++ b/electron/preload.ts @@ -1,7 +1,7 @@ import { contextBridge, ipcRenderer } from 'electron'; contextBridge.exposeInMainWorld('electronAPI', { - executeQuery: (options: { query: string; format: string }): Promise => + executeQuery: (options: { query: string; format: string; nativeProtocolVersion?: number }): Promise => ipcRenderer.invoke('execute-query', options), getConfig: (): Promise<{ host: string }> => ipcRenderer.invoke('get-config'), diff --git a/src/components/App.tsx b/src/components/App.tsx index 8af5b4d..eb82792 100644 --- a/src/components/App.tsx +++ b/src/components/App.tsx @@ -6,17 +6,23 @@ import { QueryInput } from './QueryInput'; import { decodeBase64Url } from '../core/base64url'; import { useStore } from '../store/store'; import { ClickHouseFormat } from '../core/types/formats'; +import { + DEFAULT_NATIVE_PROTOCOL_VERSION, + isNativeProtocolVersion, +} from '../core/types/native-protocol'; import logo from '../assets/clickhouse-yellow-badge.svg'; import '../styles/app.css'; function App() { const setQuery = useStore((s) => s.setQuery); const setFormat = useStore((s) => s.setFormat); + const setNativeProtocolVersion = useStore((s) => s.setNativeProtocolVersion); useEffect(() => { const params = new URLSearchParams(window.location.search); const q = params.get('q'); const f = params.get('f'); + const pv = params.get('pv'); if (q) { try { @@ -28,8 +34,16 @@ function App() { if (f && Object.values(ClickHouseFormat).includes(f as ClickHouseFormat)) { setFormat(f as ClickHouseFormat); } + if (pv) { + const parsed = Number(pv); + if (Number.isInteger(parsed) && isNativeProtocolVersion(parsed)) { + setNativeProtocolVersion(parsed); + } else { + setNativeProtocolVersion(DEFAULT_NATIVE_PROTOCOL_VERSION); + } + } - if (q || f) { + if (q || f || pv) { window.history.replaceState({}, '', window.location.pathname); } }, []); // eslint-disable-line react-hooks/exhaustive-deps diff --git a/src/components/AstTree/AstTree.tsx b/src/components/AstTree/AstTree.tsx index 2856311..7b88868 100644 --- a/src/components/AstTree/AstTree.tsx +++ b/src/components/AstTree/AstTree.tsx @@ -419,7 +419,7 @@ export function AstTree() { setActiveNode(blockHeaderId, 'Block metadata (Header)'); toggleExpanded(blockHeaderId); }} - onDoubleClick={() => scrollToHex(block.header.numColumnsRange.start)} + onDoubleClick={() => scrollToHex(block.header.byteRange.start)} onMouseEnter={() => setHoveredNode(blockHeaderId)} onMouseLeave={() => setHoveredNode(null)} style={{ '--depth': 1 } as React.CSSProperties} @@ -428,12 +428,53 @@ export function AstTree() { Header Block metadata - [{block.header.numColumnsRange.start}:{block.header.numRowsRange.end}] ( - {block.header.numRowsRange.end - block.header.numColumnsRange.start}B) + [{block.header.byteRange.start}:{block.header.byteRange.end}] ( + {block.header.byteRange.end - block.header.byteRange.start}B) {isHeaderExpanded && (
+ {block.header.blockInfo && ( +
+
setActiveNode(`block-${blockIndex}-blockinfo`, 'BlockInfo')} + onDoubleClick={() => scrollToHex(block.header.blockInfo!.byteRange.start)} + onMouseEnter={() => setHoveredNode(`block-${blockIndex}-blockinfo`)} + onMouseLeave={() => setHoveredNode(null)} + style={{ '--depth': 2 } as React.CSSProperties} + > + BlockInfo + Field-based metadata + + [{block.header.blockInfo.byteRange.start}:{block.header.blockInfo.byteRange.end}] ( + {block.header.blockInfo.byteRange.end - block.header.blockInfo.byteRange.start}B) + +
+ {block.header.blockInfo.fields.map((field) => { + const fieldId = `block-${blockIndex}-blockinfo-field-${field.fieldNumber}`; + return ( +
setActiveNode(fieldId, `${field.fieldName}: ${field.displayValue}`)} + onDoubleClick={() => scrollToHex(field.byteRange.start)} + onMouseEnter={() => setHoveredNode(fieldId)} + onMouseLeave={() => setHoveredNode(null)} + > + Field {field.fieldNumber} + {field.fieldName}: + {field.displayValue} + + [{field.byteRange.start}:{field.byteRange.end}] ( + {field.byteRange.end - field.byteRange.start}B) + +
+ ); + })} +
+ )}
Meta Column definition - [{col.nameByteRange.start}:{col.typeByteRange.end}] ( - {col.typeByteRange.end - col.nameByteRange.start}B) + [{col.metadataByteRange.start}:{col.metadataByteRange.end}] ( + {col.metadataByteRange.end - col.metadataByteRange.start}B)
{isColMetaExpanded && ( @@ -563,6 +604,66 @@ export function AstTree() { {col.typeByteRange.end - col.typeByteRange.start}B)
+ {col.serializationInfo && ( + <> +
setActiveNode(`${col.id}-serialization`, 'Serialization info')} + onDoubleClick={() => scrollToHex(col.serializationInfo!.byteRange.start)} + onMouseEnter={() => setHoveredNode(`${col.id}-serialization`)} + onMouseLeave={() => setHoveredNode(null)} + > + Meta + serialization: + + {col.serializationInfo.hasCustomSerialization ? 'custom' : 'default'} + + + [{col.serializationInfo.byteRange.start}:{col.serializationInfo.byteRange.end}] ( + {col.serializationInfo.byteRange.end - col.serializationInfo.byteRange.start}B) + +
+
setActiveNode(`${col.id}-serialization-has-custom`, `has_custom: ${col.serializationInfo!.hasCustomSerialization}`)} + onDoubleClick={() => scrollToHex(col.serializationInfo!.hasCustomRange.start)} + onMouseEnter={() => setHoveredNode(`${col.id}-serialization-has-custom`)} + onMouseLeave={() => setHoveredNode(null)} + > + UInt8 + has_custom: + + {col.serializationInfo.hasCustomSerialization ? '1' : '0'} + + + [{col.serializationInfo.hasCustomRange.start}:{col.serializationInfo.hasCustomRange.end}] ( + {col.serializationInfo.hasCustomRange.end - col.serializationInfo.hasCustomRange.start}B) + +
+ {col.serializationInfo.kindStackRange && ( +
setActiveNode(`${col.id}-serialization-kinds`, `kind stack: ${col.serializationInfo!.kindStack.join(' -> ')}`)} + onDoubleClick={() => scrollToHex(col.serializationInfo!.kindStackRange!.start)} + onMouseEnter={() => setHoveredNode(`${col.id}-serialization-kinds`)} + onMouseLeave={() => setHoveredNode(null)} + > + Kinds + kindStack: + + {col.serializationInfo.kindStack.join(' -> ')} + + + [{col.serializationInfo.kindStackRange.start}:{col.serializationInfo.kindStackRange.end}] ( + {col.serializationInfo.kindStackRange.end - col.serializationInfo.kindStackRange.start}B) + +
+ )} + + )} )} diff --git a/src/components/HexViewer/HexViewer.tsx b/src/components/HexViewer/HexViewer.tsx index c844fa1..66b1907 100644 --- a/src/components/HexViewer/HexViewer.tsx +++ b/src/components/HexViewer/HexViewer.tsx @@ -125,8 +125,7 @@ function buildHighlightMap( const blockHeaderId = `block-${blockIndex}-header`; if (activeNodeId === blockHeaderId || hoveredNodeId === blockHeaderId) { const isActive = activeNodeId === blockHeaderId; - // Highlight entire header range (numColumns + numRows) - for (let i = block.header.numColumnsRange.start; i < block.header.numRowsRange.end; i++) { + for (let i = block.header.byteRange.start; i < block.header.byteRange.end; i++) { const existing = map.get(i); if (!existing || isActive || !existing.isActive) { map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); @@ -158,6 +157,30 @@ function buildHighlightMap( } } + const blockInfoId = `block-${blockIndex}-blockinfo`; + if (block.header.blockInfo && (activeNodeId === blockInfoId || hoveredNodeId === blockInfoId)) { + const isActive = activeNodeId === blockInfoId; + for (let i = block.header.blockInfo.byteRange.start; i < block.header.blockInfo.byteRange.end; i++) { + const existing = map.get(i); + if (!existing || isActive || !existing.isActive) { + map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); + } + } + } + + block.header.blockInfo?.fields.forEach((field) => { + const fieldId = `block-${blockIndex}-blockinfo-field-${field.fieldNumber}`; + if (activeNodeId === fieldId || hoveredNodeId === fieldId) { + const isActive = activeNodeId === fieldId; + for (let i = field.byteRange.start; i < field.byteRange.end; i++) { + const existing = map.get(i); + if (!existing || isActive || !existing.isActive) { + map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); + } + } + } + }); + block.columns.forEach((col) => { // Check if the column itself is active/hovered const isColActive = col.id === activeNodeId; @@ -181,8 +204,7 @@ function buildHighlightMap( if (activeNodeId === colMetaId || hoveredNodeId === colMetaId) { const isActive = activeNodeId === colMetaId; - // Highlight both name and type ranges - for (let i = col.nameByteRange.start; i < col.typeByteRange.end; i++) { + for (let i = col.metadataByteRange.start; i < col.metadataByteRange.end; i++) { const existing = map.get(i); if (!existing || isActive || !existing.isActive) { map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); @@ -210,6 +232,41 @@ function buildHighlightMap( } } + if (col.serializationInfo) { + const serializationId = `${col.id}-serialization`; + if (activeNodeId === serializationId || hoveredNodeId === serializationId) { + const isActive = activeNodeId === serializationId; + for (let i = col.serializationInfo.byteRange.start; i < col.serializationInfo.byteRange.end; i++) { + const existing = map.get(i); + if (!existing || isActive || !existing.isActive) { + map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); + } + } + } + + const hasCustomId = `${col.id}-serialization-has-custom`; + if (activeNodeId === hasCustomId || hoveredNodeId === hasCustomId) { + const isActive = activeNodeId === hasCustomId; + for (let i = col.serializationInfo.hasCustomRange.start; i < col.serializationInfo.hasCustomRange.end; i++) { + const existing = map.get(i); + if (!existing || isActive || !existing.isActive) { + map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); + } + } + } + + const kindsId = `${col.id}-serialization-kinds`; + if (col.serializationInfo.kindStackRange && (activeNodeId === kindsId || hoveredNodeId === kindsId)) { + const isActive = activeNodeId === kindsId; + for (let i = col.serializationInfo.kindStackRange.start; i < col.serializationInfo.kindStackRange.end; i++) { + const existing = map.get(i); + if (!existing || isActive || !existing.isActive) { + map.set(i, { color: metadataColor, isActive, isHovered: !isActive }); + } + } + } + } + // Also visit individual values col.values.forEach((node) => visitNode(node, 0)); }); diff --git a/src/components/QueryInput.tsx b/src/components/QueryInput.tsx index 1f44e57..d668cba 100644 --- a/src/components/QueryInput.tsx +++ b/src/components/QueryInput.tsx @@ -3,6 +3,10 @@ import { useStore } from '../store/store'; import { DEFAULT_QUERY } from '../core/clickhouse/client'; import { ClickHouseFormat, FORMAT_METADATA } from '../core/types/formats'; import { encodeBase64Url } from '../core/base64url'; +import { + DEFAULT_NATIVE_PROTOCOL_VERSION, + NATIVE_PROTOCOL_PRESETS, +} from '../core/types/native-protocol'; const isElectron = !!window.electronAPI; @@ -11,6 +15,8 @@ export function QueryInput() { const setQuery = useStore((s) => s.setQuery); const format = useStore((s) => s.format); const setFormat = useStore((s) => s.setFormat); + const nativeProtocolVersion = useStore((s) => s.nativeProtocolVersion); + const setNativeProtocolVersion = useStore((s) => s.setNativeProtocolVersion); const executeQuery = useStore((s) => s.executeQuery); const loadFile = useStore((s) => s.loadFile); const isLoading = useStore((s) => s.isLoading); @@ -48,10 +54,13 @@ export function QueryInput() { url.search = ''; url.searchParams.set('q', encodeBase64Url(query)); url.searchParams.set('f', format); + if (format === ClickHouseFormat.Native && nativeProtocolVersion !== DEFAULT_NATIVE_PROTOCOL_VERSION) { + url.searchParams.set('pv', String(nativeProtocolVersion)); + } navigator.clipboard.writeText(url.toString()); setShareLabel('Copied!'); setTimeout(() => setShareLabel('Share'), 2000); - }, [query, format]); + }, [query, format, nativeProtocolVersion]); const handleExecute = useCallback(() => { executeQuery(); @@ -81,6 +90,13 @@ export function QueryInput() { fileInputRef.current?.click(); }, []); + const handleProtocolVersionChange = useCallback( + (e: React.ChangeEvent) => { + setNativeProtocolVersion(Number(e.target.value)); + }, + [setNativeProtocolVersion] + ); + const handleFileChange = useCallback( (e: React.ChangeEvent) => { const file = e.target.files?.[0]; @@ -131,6 +147,27 @@ export function QueryInput() { ))} + {format === ClickHouseFormat.Native && ( +
+ + +
+ )}
; + executeQuery(options: { query: string; format: string; nativeProtocolVersion?: number }): Promise; getConfig(): Promise<{ host: string }>; saveConfig(config: { host: string }): Promise; } @@ -22,6 +24,7 @@ declare global { export interface QueryOptions { query: string; format?: ClickHouseFormat; + nativeProtocolVersion?: number; timeout?: number; } @@ -40,11 +43,16 @@ export class ClickHouseClient { /** * Execute a query and return raw binary data */ - async query({ query, format = ClickHouseFormat.RowBinaryWithNamesAndTypes, timeout = 30000 }: QueryOptions): Promise { + async query({ + query, + format = ClickHouseFormat.RowBinaryWithNamesAndTypes, + nativeProtocolVersion = DEFAULT_NATIVE_PROTOCOL_VERSION, + timeout = 30000, + }: QueryOptions): Promise { if (window.electronAPI) { const startTime = performance.now(); const buffer = await Promise.race([ - window.electronAPI.executeQuery({ query, format }), + window.electronAPI.executeQuery({ query, format, nativeProtocolVersion }), new Promise((_, reject) => setTimeout(() => reject(new Error(`Query timeout after ${timeout}ms`)), timeout) ), @@ -58,7 +66,10 @@ export class ClickHouseClient { const timeoutId = setTimeout(() => controller.abort(), timeout); try { - const response = await fetch(`${this.baseUrl}/?default_format=${format}`, { + const params = new URLSearchParams(); + appendClickHouseRequestParams(params, format, nativeProtocolVersion); + + const response = await fetch(`${this.baseUrl}/?${params.toString()}`, { method: 'POST', body: query, headers: { diff --git a/src/core/clickhouse/request-params.test.ts b/src/core/clickhouse/request-params.test.ts new file mode 100644 index 0000000..256deaf --- /dev/null +++ b/src/core/clickhouse/request-params.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from 'vitest'; +import { appendClickHouseRequestParams } from './request-params'; +import { ClickHouseFormat } from '../types/formats'; + +describe('appendClickHouseRequestParams', () => { + it('always sets default_format', () => { + const params = new URLSearchParams(); + + appendClickHouseRequestParams(params, ClickHouseFormat.RowBinaryWithNamesAndTypes); + + expect(params.get('default_format')).toBe(ClickHouseFormat.RowBinaryWithNamesAndTypes); + }); + + it('omits client_protocol_version for legacy Native', () => { + const params = new URLSearchParams(); + + appendClickHouseRequestParams(params, ClickHouseFormat.Native, 0); + + expect(params.get('default_format')).toBe(ClickHouseFormat.Native); + expect(params.has('client_protocol_version')).toBe(false); + }); + + it('adds client_protocol_version for explicit Native presets', () => { + const params = new URLSearchParams(); + + appendClickHouseRequestParams(params, ClickHouseFormat.Native, 54483); + + expect(params.get('default_format')).toBe(ClickHouseFormat.Native); + expect(params.get('client_protocol_version')).toBe('54483'); + }); +}); diff --git a/src/core/clickhouse/request-params.ts b/src/core/clickhouse/request-params.ts new file mode 100644 index 0000000..e2a9e56 --- /dev/null +++ b/src/core/clickhouse/request-params.ts @@ -0,0 +1,14 @@ +import { ClickHouseFormat } from '../types/formats'; +import { DEFAULT_NATIVE_PROTOCOL_VERSION } from '../types/native-protocol'; + +export function appendClickHouseRequestParams( + params: URLSearchParams, + format: string, + nativeProtocolVersion: number = DEFAULT_NATIVE_PROTOCOL_VERSION, +): void { + params.set('default_format', format); + + if (format === ClickHouseFormat.Native && nativeProtocolVersion !== DEFAULT_NATIVE_PROTOCOL_VERSION) { + params.set('client_protocol_version', String(nativeProtocolVersion)); + } +} diff --git a/src/core/decoder/index.ts b/src/core/decoder/index.ts index 07f2203..6ff765c 100644 --- a/src/core/decoder/index.ts +++ b/src/core/decoder/index.ts @@ -2,6 +2,7 @@ import { ClickHouseFormat } from '../types/formats'; import { FormatDecoder } from './format-decoder'; import { RowBinaryDecoder } from './rowbinary-decoder'; import { NativeDecoder } from './native-decoder'; +import { DEFAULT_NATIVE_PROTOCOL_VERSION } from '../types/native-protocol'; // Re-export types and classes export { FormatDecoder } from './format-decoder'; @@ -13,12 +14,16 @@ export { decodeLEB128, decodeLEB128BigInt } from './leb128'; /** * Factory function to create the appropriate decoder for a format */ -export function createDecoder(data: Uint8Array, format: ClickHouseFormat): FormatDecoder { +export function createDecoder( + data: Uint8Array, + format: ClickHouseFormat, + options?: { nativeProtocolVersion?: number }, +): FormatDecoder { switch (format) { case ClickHouseFormat.RowBinaryWithNamesAndTypes: return new RowBinaryDecoder(data); case ClickHouseFormat.Native: - return new NativeDecoder(data); + return new NativeDecoder(data, options?.nativeProtocolVersion ?? DEFAULT_NATIVE_PROTOCOL_VERSION); default: throw new Error(`Unsupported format: ${format}`); } diff --git a/src/core/decoder/native-decoder.ts b/src/core/decoder/native-decoder.ts index d7e359f..d169c90 100644 --- a/src/core/decoder/native-decoder.ts +++ b/src/core/decoder/native-decoder.ts @@ -1,12 +1,27 @@ -import { FormatDecoder } from './format-decoder'; -import { decodeLEB128 } from './leb128'; -import { parseType } from '../parser/type-parser'; -import { ClickHouseType, typeToString } from '../types/clickhouse-types'; -import { AstNode, BlockColumnNode, BlockHeaderNode, BlockNode, ByteRange, ColumnDefinition, HeaderNode, ParsedData } from '../types/ast'; -import { ClickHouseFormat } from '../types/formats'; - -/** - * Native format decoder (column-oriented with blocks) +import { FormatDecoder } from './format-decoder'; +import { decodeLEB128, decodeLEB128BigInt } from './leb128'; +import { parseType } from '../parser/type-parser'; +import { ClickHouseType, typeToString } from '../types/clickhouse-types'; +import { + AstNode, + BlockColumnNode, + BlockHeaderNode, + BlockNode, + ByteRange, + ColumnDefinition, + HeaderNode, + NativeBlockInfo, + NativeBlockInfoField, + NativeSerializationInfo, + ParsedData, +} from '../types/ast'; +import { ClickHouseFormat } from '../types/formats'; + +const SPARSE_END_OF_GRANULE_FLAG = 1n << 62n; +const TEXT_DECODER = new TextDecoder(); + +/** + * Native format decoder (column-oriented with blocks) * * Native format structure: * - Multiple blocks, each with: @@ -18,8 +33,14 @@ import { ClickHouseFormat } from '../types/formats'; * - column data (all values for this column) * - Empty block (0 columns, 0 rows) signals end */ -export class NativeDecoder extends FormatDecoder { - readonly format = ClickHouseFormat.Native; +export class NativeDecoder extends FormatDecoder { + readonly format = ClickHouseFormat.Native; + private readonly protocolVersion: number; + + constructor(data: Uint8Array, protocolVersion: number = 0) { + super(data); + this.protocolVersion = protocolVersion; + } decode(): ParsedData { const blocks = this.decodeBlocks(); @@ -52,11 +73,12 @@ export class NativeDecoder extends FormatDecoder { return blocks; } - private decodeBlock(index: number): BlockNode { - const startOffset = this.reader.offset; - - // Read numColumns with byte range tracking - const numColumnsStart = this.reader.offset; + private decodeBlock(index: number): BlockNode { + const startOffset = this.reader.offset; + const blockInfo = this.protocolVersion > 0 ? this.decodeBlockInfo() : undefined; + + // Read numColumns with byte range tracking + const numColumnsStart = this.reader.offset; const { value: numColumns } = decodeLEB128(this.reader); const numColumnsRange: ByteRange = { start: numColumnsStart, end: this.reader.offset }; @@ -64,13 +86,18 @@ export class NativeDecoder extends FormatDecoder { const numRowsStart = this.reader.offset; const { value: numRows } = decodeLEB128(this.reader); const numRowsRange: ByteRange = { start: numRowsStart, end: this.reader.offset }; - - const header: BlockHeaderNode = { - numColumns, - numColumnsRange, - numRows, - numRowsRange, - }; + + const header: BlockHeaderNode = { + byteRange: { + start: blockInfo?.byteRange.start ?? numColumnsRange.start, + end: numRowsRange.end, + }, + numColumns, + numColumnsRange, + numRows, + numRowsRange, + blockInfo, + }; // Empty block check if (numColumns === 0 || numRows === 0) { @@ -90,8 +117,8 @@ export class NativeDecoder extends FormatDecoder { columns.push(column); } - return { - index, + return { + index, byteRange: { start: startOffset, end: this.reader.offset }, header, rowCount: numRows, @@ -99,43 +126,60 @@ export class NativeDecoder extends FormatDecoder { }; } - private decodeBlockColumn(blockIndex: number, columnIndex: number, rowCount: number): BlockColumnNode { + private decodeBlockColumn(blockIndex: number, columnIndex: number, rowCount: number): BlockColumnNode { // Read column name - const nameStart = this.reader.offset; - const { value: nameLen } = decodeLEB128(this.reader); - const { value: nameBytes } = this.reader.readBytes(nameLen); - const name = new TextDecoder().decode(nameBytes); + const nameStart = this.reader.offset; + const { value: nameLen } = decodeLEB128(this.reader); + const { value: nameBytes } = this.reader.readBytes(nameLen); + const name = TEXT_DECODER.decode(nameBytes); const nameByteRange: ByteRange = { start: nameStart, end: this.reader.offset }; // Read column type - const typeStart = this.reader.offset; - const { value: typeLen } = decodeLEB128(this.reader); - const { value: typeBytes } = this.reader.readBytes(typeLen); - const typeString = new TextDecoder().decode(typeBytes); - const type = parseType(typeString); - const typeByteRange: ByteRange = { start: typeStart, end: this.reader.offset }; - - // Read column data - const dataStart = this.reader.offset; - const values = this.decodeColumnData(type, rowCount); - const dataByteRange: ByteRange = { start: dataStart, end: this.reader.offset }; - - return { + const typeStart = this.reader.offset; + const { value: typeLen } = decodeLEB128(this.reader); + const { value: typeBytes } = this.reader.readBytes(typeLen); + const typeString = TEXT_DECODER.decode(typeBytes); + const type = parseType(typeString); + const typeByteRange: ByteRange = { start: typeStart, end: this.reader.offset }; + + const serializationInfo = this.decodeSerializationInfo(type); + const metadataByteRange: ByteRange = { + start: nameByteRange.start, + end: serializationInfo?.byteRange.end ?? typeByteRange.end, + }; + + // Read column data + const dataStart = this.reader.offset; + const values = this.decodeColumnData(type, rowCount, serializationInfo); + const dataByteRange: ByteRange = { start: dataStart, end: this.reader.offset }; + + return { id: `block-${blockIndex}-col-${columnIndex}`, name, nameByteRange, - type, - typeString, - typeByteRange, - dataByteRange, - values, - }; - } - - private decodeColumnData(type: ClickHouseType, rowCount: number): AstNode[] { - // Handle complex types that have different columnar encoding - switch (type.kind) { - case 'Nullable': + type, + typeString, + typeByteRange, + metadataByteRange, + dataByteRange, + serializationInfo, + values, + }; + } + + private decodeColumnData( + type: ClickHouseType, + rowCount: number, + serializationInfo?: NativeSerializationInfo, + ): AstNode[] { + const kindStack = serializationInfo?.kindStack ?? ['DEFAULT']; + return this.decodeColumnDataWithKinds(type, rowCount, kindStack.slice(1)); + } + + private decodeColumnDataDefault(type: ClickHouseType, rowCount: number): AstNode[] { + // Handle complex types that have different columnar encoding + switch (type.kind) { + case 'Nullable': return this.decodeNullableColumn(type.inner, rowCount); case 'Array': return this.decodeArrayColumn(type.element, rowCount); @@ -179,11 +223,331 @@ export class NativeDecoder extends FormatDecoder { const node = this.decodeValue(type); node.label = `[${i}]`; values.push(node); - } - return values; - } - - private decodeValue(type: ClickHouseType): AstNode { + } + return values; + } + + private decodeColumnDataWithKinds(type: ClickHouseType, rowCount: number, kinds: string[]): AstNode[] { + if (kinds.length === 0) { + return this.decodeColumnDataDefault(type, rowCount); + } + + const nestedKinds = kinds.slice(0, -1); + const outermostKind = kinds[kinds.length - 1]; + + switch (outermostKind) { + case 'SPARSE': + return this.decodeSparseColumn(type, rowCount, nestedKinds); + case 'REPLICATED': + return this.decodeReplicatedColumn(type, rowCount, nestedKinds); + default: + throw new Error( + `Native format: unsupported serialization kind stack DEFAULT -> ${kinds.join(' -> ')} ` + + `for ${typeToString(type)} at protocol version ${this.protocolVersion}`, + ); + } + } + + private decodeBlockInfo(): NativeBlockInfo { + const start = this.reader.offset; + const fields: NativeBlockInfoField[] = []; + + while (true) { + const fieldNumberStart = this.reader.offset; + const { value: fieldNumber } = decodeLEB128(this.reader); + const fieldNumberRange: ByteRange = { start: fieldNumberStart, end: this.reader.offset }; + + if (fieldNumber === 0) { + return { + byteRange: { start, end: this.reader.offset }, + terminatorRange: fieldNumberRange, + fields, + }; + } + + switch (fieldNumber) { + case 1: { + const { value, range } = this.reader.readUInt8(); + fields.push({ + fieldNumber, + fieldName: 'is_overflows', + value: value !== 0, + displayValue: value !== 0 ? 'true' : 'false', + fieldNumberRange, + valueRange: range, + byteRange: { start: fieldNumberRange.start, end: range.end }, + }); + break; + } + case 2: { + const { value, range } = this.reader.readInt32LE(); + fields.push({ + fieldNumber, + fieldName: 'bucket_num', + value, + displayValue: String(value), + fieldNumberRange, + valueRange: range, + byteRange: { start: fieldNumberRange.start, end: range.end }, + }); + break; + } + case 3: { + if (this.protocolVersion < 54480) { + throw new Error( + `Native format: BlockInfo field 3 requires protocol version 54480+, got ${this.protocolVersion}`, + ); + } + const valueStart = this.reader.offset; + const { value: size } = decodeLEB128(this.reader); + const values: number[] = []; + for (let i = 0; i < size; i++) { + const { value } = this.reader.readInt32LE(); + values.push(value); + } + const valueRange: ByteRange = { start: valueStart, end: this.reader.offset }; + fields.push({ + fieldNumber, + fieldName: 'out_of_order_buckets', + value: values, + displayValue: `[${values.join(', ')}]`, + fieldNumberRange, + valueRange, + byteRange: { start: fieldNumberRange.start, end: valueRange.end }, + }); + break; + } + default: + throw new Error(`Native format: unknown BlockInfo field ${fieldNumber}`); + } + } + } + + private decodeSerializationInfo(type: ClickHouseType): NativeSerializationInfo | undefined { + if (this.protocolVersion < 54454) { + return undefined; + } + + const start = this.reader.offset; + const { value: hasCustomValue, range: hasCustomRange } = this.reader.readUInt8(); + const hasCustomSerialization = hasCustomValue !== 0; + let kindStackRange: ByteRange | undefined; + let kindStack = ['DEFAULT']; + + if (hasCustomSerialization) { + const kindStackStart = this.reader.offset; + kindStack = this.decodeSerializationKindStack(type); + kindStackRange = { start: kindStackStart, end: this.reader.offset }; + } + + return { + byteRange: { start, end: this.reader.offset }, + hasCustomSerialization, + hasCustomRange, + kindStack, + kindStackRange, + }; + } + + private decodeSerializationKindStack(type: ClickHouseType): string[] { + const { value: kindType } = this.reader.readUInt8(); + let kindStack: string[]; + + switch (kindType) { + case 0: + kindStack = ['DEFAULT']; + break; + case 1: + kindStack = ['DEFAULT', 'SPARSE']; + break; + case 2: + kindStack = ['DEFAULT', 'DETACHED']; + break; + case 3: + kindStack = ['DEFAULT', 'SPARSE', 'DETACHED']; + break; + case 4: + kindStack = ['DEFAULT', 'REPLICATED']; + break; + case 5: { + const { value: count } = decodeLEB128(this.reader); + kindStack = []; + for (let i = 0; i < count; i++) { + const { value: rawKind } = this.reader.readUInt8(); + kindStack.push(this.decodeSerializationKind(rawKind)); + } + break; + } + default: + throw new Error(`Native format: unknown serialization kind type ${kindType}`); + } + + if (type.kind === 'Tuple') { + type.elements.forEach((element) => { + this.decodeSerializationKindStack(element); + }); + } + + return kindStack; + } + + private decodeSerializationKind(rawKind: number): string { + switch (rawKind) { + case 0: + return 'DEFAULT'; + case 1: + return 'SPARSE'; + case 2: + return 'DETACHED'; + case 3: + return 'REPLICATED'; + default: + throw new Error(`Native format: unknown serialization kind ${rawKind}`); + } + } + + private decodeSparseColumn(type: ClickHouseType, rowCount: number, nestedKinds: string[]): AstNode[] { + const positions = this.decodeSparsePositions(rowCount); + + if (type.kind === 'Nullable') { + if (nestedKinds.length > 0) { + throw new Error( + `Native format: unsupported serialization kind stack DEFAULT -> ${nestedKinds.join(' -> ')} -> SPARSE ` + + `for ${typeToString(type)} at protocol version ${this.protocolVersion}`, + ); + } + + return this.decodeSparseNullableColumn(type.inner, rowCount, positions); + } + + const nonDefaultValues = this.decodeColumnDataWithKinds(type, positions.length, nestedKinds); + const values: AstNode[] = []; + let valueIndex = 0; + + for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { + if (valueIndex < positions.length && positions[valueIndex] === rowIndex) { + const node = this.cloneAstNode(nonDefaultValues[valueIndex], `[${rowIndex}]`); + node.label = `[${rowIndex}]`; + values.push(node); + valueIndex++; + continue; + } + + values.push(this.createDefaultNode(type, rowIndex)); + } + + return values; + } + + private decodeSparseNullableColumn( + innerType: ClickHouseType, + rowCount: number, + nonNullPositions: number[], + ): AstNode[] { + const nonNullValues = this.decodeColumnDataDefault(innerType, nonNullPositions.length); + const values: AstNode[] = []; + let valueIndex = 0; + + for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { + if (valueIndex < nonNullPositions.length && nonNullPositions[valueIndex] === rowIndex) { + const innerNode = this.cloneAstNode(nonNullValues[valueIndex], 'value'); + values.push({ + id: this.generateId(), + type: `Nullable(${typeToString(innerType)})`, + byteRange: innerNode.byteRange, + value: innerNode.value, + displayValue: innerNode.displayValue, + label: `[${rowIndex}]`, + children: [innerNode], + metadata: { isNull: false }, + }); + valueIndex++; + continue; + } + + values.push({ + id: this.generateId(), + type: `Nullable(${typeToString(innerType)})`, + byteRange: { start: this.reader.offset, end: this.reader.offset }, + value: null, + displayValue: 'NULL', + label: `[${rowIndex}]`, + metadata: { isNull: true, isDefaultValue: true }, + }); + } + + return values; + } + + private decodeReplicatedColumn(type: ClickHouseType, rowCount: number, nestedKinds: string[]): AstNode[] { + const { value: serializedRowCount } = decodeLEB128(this.reader); + if (serializedRowCount !== rowCount) { + throw new Error( + `Native format: replicated row count ${serializedRowCount} does not match block row count ${rowCount}`, + ); + } + + const { value: indexSize } = this.reader.readUInt8(); + const indexes: number[] = []; + for (let i = 0; i < rowCount; i++) { + indexes.push(this.readReplicatedIndex(indexSize)); + } + + const { value: nestedRowCount } = decodeLEB128(this.reader); + const nestedValues = this.decodeColumnDataWithKinds(type, nestedRowCount, nestedKinds); + + return indexes.map((index, rowIndex) => { + const sourceNode = nestedValues[index]; + if (!sourceNode) { + throw new Error(`Native format: replicated index ${index} out of bounds for ${nestedValues.length} nested values`); + } + + const cloned = this.cloneAstNode(sourceNode, `[${rowIndex}]`); + cloned.metadata = { + ...(cloned.metadata ?? {}), + replicatedIndex: index, + }; + return cloned; + }); + } + + private readReplicatedIndex(indexSize: number): number { + switch (indexSize) { + case 1: + return this.reader.readUInt8().value; + case 2: + return this.reader.readUInt16LE().value; + case 4: + return this.reader.readUInt32LE().value; + case 8: + return Number(this.reader.readUInt64LE().value); + default: + throw new Error(`Native format: unsupported replicated index size ${indexSize}`); + } + } + + private decodeSparsePositions(rowCount: number): number[] { + const positions: number[] = []; + let currentRow = 0; + + while (currentRow <= rowCount) { + const { value: rawGroupSize } = decodeLEB128BigInt(this.reader); + const endOfGranule = (rawGroupSize & SPARSE_END_OF_GRANULE_FLAG) !== 0n; + const groupSize = Number(rawGroupSize & ~SPARSE_END_OF_GRANULE_FLAG); + currentRow += groupSize; + + if (!endOfGranule) { + positions.push(currentRow); + currentRow += 1; + } else { + break; + } + } + + return positions; + } + + private decodeValue(type: ClickHouseType): AstNode { switch (type.kind) { // Unsigned integers case 'UInt8': @@ -457,7 +821,7 @@ export class NativeDecoder extends FormatDecoder { }; } - private buildHeaderFromBlocks(blocks: BlockNode[]): HeaderNode { + private buildHeaderFromBlocks(blocks: BlockNode[]): HeaderNode { if (blocks.length === 0) { return { byteRange: { start: 0, end: 0 }, @@ -476,16 +840,80 @@ export class NativeDecoder extends FormatDecoder { typeByteRange: col.typeByteRange, })); - return { - byteRange: { start: 0, end: firstBlock.columns[0]?.dataByteRange.start ?? 0 }, - columnCount: columns.length, - // For Native format, column count is per-block, use first block's range - columnCountRange: firstBlock.header.numColumnsRange, + return { + byteRange: { start: 0, end: firstBlock.columns[0]?.metadataByteRange.end ?? 0 }, + columnCount: columns.length, + // For Native format, column count is per-block, use first block's range + columnCountRange: firstBlock.header.numColumnsRange, columns, }; - } - - // Integer decoders + } + + private createDefaultNode(type: ClickHouseType, rowIndex: number): AstNode { + const typeName = typeToString(type); + const byteRange = { start: this.reader.offset, end: this.reader.offset }; + + switch (type.kind) { + case 'UInt8': + case 'UInt16': + case 'UInt32': + case 'Int8': + case 'Int16': + case 'Int32': + case 'Float32': + case 'Float64': + case 'BFloat16': + case 'Decimal32': + case 'Decimal64': + return this.createDefaultValueNode(typeName, 0, '0', rowIndex, byteRange); + case 'UInt64': + case 'UInt128': + case 'UInt256': + case 'Int64': + case 'Int128': + case 'Int256': + case 'Decimal128': + case 'Decimal256': + return this.createDefaultValueNode(typeName, 0n, '0', rowIndex, byteRange); + case 'Bool': + return this.createDefaultValueNode(typeName, false, 'false', rowIndex, byteRange); + case 'String': + case 'FixedString': + return this.createDefaultValueNode(typeName, '', '""', rowIndex, byteRange); + default: + throw new Error(`Native format: sparse default materialization not supported for ${typeName}`); + } + } + + private createDefaultValueNode( + type: string, + value: unknown, + displayValue: string, + rowIndex: number, + byteRange: ByteRange, + ): AstNode { + return { + id: this.generateId(), + type, + byteRange, + value, + displayValue, + label: `[${rowIndex}]`, + metadata: { isDefaultValue: true }, + }; + } + + private cloneAstNode(node: AstNode, label?: string): AstNode { + return { + ...node, + id: this.generateId(), + label: label ?? node.label, + children: node.children?.map((child) => this.cloneAstNode(child, child.label)), + metadata: node.metadata ? { ...node.metadata } : undefined, + }; + } + + // Integer decoders private decodeUInt8(): AstNode { const { value, range } = this.reader.readUInt8(); return { diff --git a/src/core/decoder/native-protocol.integration.test.ts b/src/core/decoder/native-protocol.integration.test.ts new file mode 100644 index 0000000..1ea8a93 --- /dev/null +++ b/src/core/decoder/native-protocol.integration.test.ts @@ -0,0 +1,152 @@ +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { NATIVE_PROTOCOL_PRESETS } from '../types/native-protocol'; +import { TestContext, decodeNative } from './test-helpers'; +import { AstNode } from '../types/ast'; + +interface NativeProtocolMatrixCase { + name: string; + query: string; + settings?: Record; + assertParsed: (parsed: ReturnType, revision: number) => void; +} + +function collectNodes(nodes: AstNode[]): AstNode[] { + const collected: AstNode[] = []; + + const visit = (node: AstNode) => { + collected.push(node); + node.children?.forEach(visit); + }; + + nodes.forEach(visit); + return collected; +} + +const NATIVE_PROTOCOL_MATRIX_CASES: NativeProtocolMatrixCase[] = [ + { + name: 'simple UInt8 column', + query: 'SELECT number::UInt8 AS val FROM numbers(3)', + assertParsed: (parsed, revision) => { + expect(parsed.blocks).toHaveLength(1); + expect(parsed.blocks?.[0].columns[0].values.map((node) => node.value)).toEqual([0, 1, 2]); + expect(parsed.blocks?.[0].header.blockInfo === undefined).toBe(revision === 0); + }, + }, + { + name: 'LowCardinality compatibility', + query: 'SELECT toLowCardinality(toString(number % 2)) AS val FROM numbers(4)', + settings: { allow_suspicious_low_cardinality_types: 1 }, + assertParsed: (parsed, revision) => { + const column = parsed.blocks?.[0].columns[0]; + expect(column).toBeDefined(); + if (revision !== 0 && revision < 54405) { + expect(column?.typeString).toBe('String'); + } else { + expect(column?.typeString).toBe('LowCardinality(String)'); + } + }, + }, + { + name: 'AggregateFunction compatibility', + query: 'SELECT avgState(number) AS val FROM numbers(10)', + assertParsed: (parsed) => { + const node = parsed.blocks?.[0].columns[0].values[0]; + expect(node?.type).toBe('AggregateFunction(avg, UInt64)'); + expect(node?.displayValue).toContain('avg=4.50'); + }, + }, + { + name: 'sparse serialization gate', + query: 'SELECT if(number = 5, 1, 0)::UInt8 AS sparse_val FROM numbers(10)', + assertParsed: (parsed, revision) => { + const column = parsed.blocks?.[0].columns[0]; + expect(column).toBeDefined(); + expect(column?.values.map((node) => node.value)).toEqual([0, 0, 0, 0, 0, 1, 0, 0, 0, 0]); + + if (revision === 0 || revision < 54454) { + expect(column?.serializationInfo).toBeUndefined(); + } else if (revision < 54465) { + expect(column?.serializationInfo?.hasCustomSerialization).toBe(false); + } else { + expect(column?.serializationInfo?.hasCustomSerialization).toBe(true); + expect(column?.serializationInfo?.kindStack).toEqual(['DEFAULT', 'SPARSE']); + } + }, + }, + { + name: 'Dynamic serialization version gate', + query: 'SELECT 42::Dynamic AS val', + settings: { allow_experimental_dynamic_type: 1 }, + assertParsed: (parsed, revision) => { + const column = parsed.blocks?.[0].columns[0]; + const headerNode = column?.values[0]; + expect(headerNode?.type).toBe('Dynamic.Header'); + expect((headerNode?.value as { version: number }).version).toBe(revision >= 54473 ? 2 : 1); + }, + }, + { + name: 'Nullable sparse serialization gate', + query: 'SELECT if(number = 5, 42, NULL)::Nullable(UInt8) AS sparse_nullable FROM numbers(10)', + assertParsed: (parsed, revision) => { + const column = parsed.blocks?.[0].columns[0]; + expect(column).toBeDefined(); + expect(column?.values.map((node) => node.value)).toEqual([null, null, null, null, null, 42, null, null, null, null]); + + if (revision === 0 || revision < 54454) { + expect(column?.serializationInfo).toBeUndefined(); + } else if (revision < 54483) { + expect(column?.serializationInfo?.hasCustomSerialization).toBe(false); + } else { + expect(column?.serializationInfo?.hasCustomSerialization).toBe(true); + expect(column?.serializationInfo?.kindStack).toEqual(['DEFAULT', 'SPARSE']); + } + }, + }, + { + name: 'JSON dynamic-path serialization version gate', + query: `SELECT '{"ip":"127.0.0.1","name":"test"}'::JSON(ip IPv4) AS val`, + settings: { allow_experimental_json_type: 1 }, + assertParsed: (parsed, revision) => { + const column = parsed.blocks?.[0].columns[0]; + const jsonNode = column?.values[0]; + expect(jsonNode?.type).toBe('JSON'); + + const structureNodes = collectNodes(jsonNode ? [jsonNode] : []).filter( + (node) => node.type === 'Dynamic.structure', + ); + expect(structureNodes.length).toBeGreaterThan(0); + + const versionNode = structureNodes[0].children?.find((child) => child.label === 'dynamic_version'); + expect(versionNode?.value).toBe(revision >= 54473 ? 2n : 1n); + }, + }, +]; + +describe('Native protocol revision matrix', () => { + const ctx = new TestContext(); + + beforeAll(async () => { + await ctx.start(); + }, 120000); + + afterAll(async () => { + await ctx.stop(); + }); + + for (const testCase of NATIVE_PROTOCOL_MATRIX_CASES) { + describe(testCase.name, () => { + it.each(NATIVE_PROTOCOL_PRESETS.map((preset) => preset.value))( + 'revision %s', + async (revision) => { + const data = await ctx.queryNative(testCase.query, { + ...(testCase.settings ?? {}), + client_protocol_version: revision, + }); + const parsed = decodeNative(data, revision); + + testCase.assertParsed(parsed, revision); + }, + ); + }); + } +}, 300000); diff --git a/src/core/decoder/native-protocol.test.ts b/src/core/decoder/native-protocol.test.ts new file mode 100644 index 0000000..748d9d4 --- /dev/null +++ b/src/core/decoder/native-protocol.test.ts @@ -0,0 +1,169 @@ +import { describe, expect, it } from 'vitest'; +import { NativeDecoder } from './native-decoder'; + +function encodeLeb128(value: number | bigint): number[] { + let current = BigInt(value); + const bytes: number[] = []; + + do { + let byte = Number(current & 0x7fn); + current >>= 7n; + if (current !== 0n) { + byte |= 0x80; + } + bytes.push(byte); + } while (current !== 0n); + + return bytes; +} + +function encodeString(value: string): number[] { + const bytes = Array.from(new TextEncoder().encode(value)); + return [...encodeLeb128(bytes.length), ...bytes]; +} + +function encodeSparseOffsets(nonDefaultRows: number[], rowCount: number): number[] { + const END_OF_GRANULE_FLAG = 1n << 62n; + const bytes: number[] = []; + let start = 0; + + for (const row of nonDefaultRows) { + const groupSize = row - start; + bytes.push(...encodeLeb128(groupSize)); + start += groupSize + 1; + } + + const trailingDefaults = start < rowCount ? rowCount - start : 0; + bytes.push(...encodeLeb128(BigInt(trailingDefaults) | END_OF_GRANULE_FLAG)); + + return bytes; +} + +describe('NativeDecoder protocol-aware parsing', () => { + it('parses legacy HTTP Native blocks without protocol metadata', () => { + const bytes = new Uint8Array([ + 0x01, // numColumns + 0x02, // numRows + ...encodeString('n'), + ...encodeString('UInt8'), + 0x01, + 0x02, + ]); + + const parsed = new NativeDecoder(bytes, 0).decode(); + + expect(parsed.blocks).toHaveLength(1); + expect(parsed.blocks?.[0].header.blockInfo).toBeUndefined(); + expect(parsed.blocks?.[0].columns[0].values.map((node) => node.value)).toEqual([1, 2]); + }); + + it('parses BlockInfo and sparse serialization metadata for modern protocol versions', () => { + const bytes = new Uint8Array([ + 0x01, // field 1: is_overflows + 0x00, // false + 0x02, // field 2: bucket_num + 0xff, 0xff, 0xff, 0xff, // -1 + 0x03, // field 3: out_of_order_buckets + 0x00, // empty vector + 0x00, // BlockInfo terminator + 0x01, // numColumns + 0x03, // numRows + ...encodeString('n'), + ...encodeString('UInt8'), + 0x01, // has_custom + 0x01, // SPARSE kind stack + ...encodeSparseOffsets([1], 3), + 0x07, // non-default value + ]); + + const parsed = new NativeDecoder(bytes, 54483).decode(); + const block = parsed.blocks?.[0]; + const column = block?.columns[0]; + + expect(block?.header.blockInfo?.fields.map((field) => field.fieldName)).toEqual([ + 'is_overflows', + 'bucket_num', + 'out_of_order_buckets', + ]); + expect(column?.serializationInfo?.hasCustomSerialization).toBe(true); + expect(column?.serializationInfo?.kindStack).toEqual(['DEFAULT', 'SPARSE']); + expect(column?.values.map((node) => node.value)).toEqual([0, 7, 0]); + expect(column?.values[0].metadata?.isDefaultValue).toBe(true); + expect(column?.values[1].metadata?.isDefaultValue).toBeUndefined(); + }); + + it('rejects BlockInfo field 3 before protocol version 54480', () => { + const bytes = new Uint8Array([ + 0x03, + 0x00, + 0x00, + 0x01, + 0x01, + ...encodeString('n'), + ...encodeString('UInt8'), + 0x07, + ]); + + expect(() => new NativeDecoder(bytes, 54473).decode()).toThrow( + 'BlockInfo field 3 requires protocol version 54480+', + ); + }); + + it('parses replicated serialization kind stacks', () => { + const bytes = new Uint8Array([ + 0x01, + 0x00, + 0x02, + 0xff, 0xff, 0xff, 0xff, + 0x03, + 0x01, + 0x05, 0x00, 0x00, 0x00, + 0x00, + 0x01, + 0x02, + ...encodeString('n'), + ...encodeString('UInt8'), + 0x01, + 0x04, + 0x02, + 0x01, + 0x00, + 0x01, + 0x02, + 0x07, + 0x09, + ]); + + const parsed = new NativeDecoder(bytes, 54482).decode(); + expect(parsed.blocks?.[0].columns[0].serializationInfo?.kindStack).toEqual(['DEFAULT', 'REPLICATED']); + expect(parsed.blocks?.[0].columns[0].values.map((node) => node.value)).toEqual([7, 9]); + expect(parsed.blocks?.[0].columns[0].values[0].metadata?.replicatedIndex).toBe(0); + expect(parsed.blocks?.[0].columns[0].values[1].metadata?.replicatedIndex).toBe(1); + }); + + it('parses nullable sparse serialization', () => { + const bytes = new Uint8Array([ + 0x01, + 0x00, + 0x02, + 0xff, 0xff, 0xff, 0xff, + 0x03, + 0x00, + 0x00, + 0x01, + 0x03, + ...encodeString('n'), + ...encodeString('Nullable(UInt8)'), + 0x01, + 0x01, + ...encodeSparseOffsets([1], 3), + 0x07, + ]); + + const parsed = new NativeDecoder(bytes, 54483).decode(); + expect(parsed.blocks?.[0].columns[0].serializationInfo?.kindStack).toEqual(['DEFAULT', 'SPARSE']); + expect(parsed.blocks?.[0].columns[0].values.map((node) => node.value)).toEqual([null, 7, null]); + expect(parsed.blocks?.[0].columns[0].values[0].metadata?.isNull).toBe(true); + expect(parsed.blocks?.[0].columns[0].values[1].metadata?.isNull).toBe(false); + }); +}); diff --git a/src/core/decoder/test-helpers.ts b/src/core/decoder/test-helpers.ts index 273e7a4..1a57dc6 100644 --- a/src/core/decoder/test-helpers.ts +++ b/src/core/decoder/test-helpers.ts @@ -116,8 +116,8 @@ export function decodeRowBinary(data: Uint8Array): ParsedData { /** * Decode Native format data */ -export function decodeNative(data: Uint8Array): ParsedData { - const decoder = new NativeDecoder(data); +export function decodeNative(data: Uint8Array, protocolVersion = 0): ParsedData { + const decoder = new NativeDecoder(data, protocolVersion); return decoder.decode(); } diff --git a/src/core/types/ast.ts b/src/core/types/ast.ts index 22d4e1d..9cc41a7 100644 --- a/src/core/types/ast.ts +++ b/src/core/types/ast.ts @@ -54,10 +54,12 @@ export interface RowNode { * Block header metadata (Native format) */ export interface BlockHeaderNode { + byteRange: ByteRange; numColumns: number; numColumnsRange: ByteRange; numRows: number; numRowsRange: ByteRange; + blockInfo?: NativeBlockInfo; } /** @@ -81,10 +83,36 @@ export interface BlockColumnNode { type: import('./clickhouse-types').ClickHouseType; typeString: string; typeByteRange: ByteRange; + metadataByteRange: ByteRange; dataByteRange: ByteRange; + serializationInfo?: NativeSerializationInfo; values: AstNode[]; } +export interface NativeBlockInfoField { + fieldNumber: number; + fieldName: string; + value: boolean | number | number[]; + displayValue: string; + fieldNumberRange: ByteRange; + valueRange: ByteRange; + byteRange: ByteRange; +} + +export interface NativeBlockInfo { + byteRange: ByteRange; + terminatorRange: ByteRange; + fields: NativeBlockInfoField[]; +} + +export interface NativeSerializationInfo { + byteRange: ByteRange; + hasCustomSerialization: boolean; + hasCustomRange: ByteRange; + kindStack: string[]; + kindStackRange?: ByteRange; +} + /** * Complete parsed data structure */ diff --git a/src/core/types/native-protocol.ts b/src/core/types/native-protocol.ts new file mode 100644 index 0000000..5429cfb --- /dev/null +++ b/src/core/types/native-protocol.ts @@ -0,0 +1,74 @@ +export interface NativeProtocolPreset { + value: number; + label: string; + constantName: string; + summary: string; +} + +export const NATIVE_PROTOCOL_PRESETS: NativeProtocolPreset[] = [ + { + value: 0, + label: 'Legacy HTTP default (0)', + constantName: 'LEGACY_HTTP_DEFAULT', + summary: 'Omit client_protocol_version and use the legacy HTTP Native layout.', + }, + { + value: 54405, + label: '54405 LowCardinality', + constantName: 'DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE', + summary: 'Enables LowCardinality type negotiation.', + }, + { + value: 54452, + label: '54452 AggregateFunction versioning', + constantName: 'DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING', + summary: 'Adds AggregateFunction revision-aware state serialization.', + }, + { + value: 54454, + label: '54454 Custom serialization', + constantName: 'DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION', + summary: 'Adds per-column serialization metadata before Native column data.', + }, + { + value: 54465, + label: '54465 Sparse serialization', + constantName: 'DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION', + summary: 'Allows sparse column serialization kinds.', + }, + { + value: 54473, + label: '54473 Dynamic/JSON v2', + constantName: 'DBMS_MIN_REVISION_WITH_V2_DYNAMIC_AND_JSON_SERIALIZATION', + summary: 'Switches Dynamic and JSON Native serialization to v2.', + }, + { + value: 54480, + label: '54480 Out-of-order buckets', + constantName: 'DBMS_MIN_REVISION_WITH_OUT_OF_ORDER_BUCKETS_IN_AGGREGATION', + summary: 'Adds BlockInfo field support for out-of-order aggregation buckets.', + }, + { + value: 54482, + label: '54482 Replicated serialization', + constantName: 'DBMS_MIN_REVISION_WITH_REPLICATED_SERIALIZATION', + summary: 'Allows replicated serialization kinds in Native output.', + }, + { + value: 54483, + label: '54483 Nullable sparse / current', + constantName: 'DBMS_MIN_REVISION_WITH_NULLABLE_SPARSE_SERIALIZATION', + summary: 'Current upstream protocol version with nullable sparse serialization.', + }, +]; + +export const DEFAULT_NATIVE_PROTOCOL_VERSION = 0; +export const CURRENT_NATIVE_PROTOCOL_VERSION = 54483; + +const NATIVE_PROTOCOL_PRESET_VALUES = new Set( + NATIVE_PROTOCOL_PRESETS.map((preset) => preset.value), +); + +export function isNativeProtocolVersion(value: number): boolean { + return NATIVE_PROTOCOL_PRESET_VALUES.has(value); +} diff --git a/src/store/store.ts b/src/store/store.ts index b280e90..2f270cf 100644 --- a/src/store/store.ts +++ b/src/store/store.ts @@ -3,6 +3,7 @@ import { clickhouse, DEFAULT_QUERY } from '../core/clickhouse/client'; import { createDecoder } from '../core/decoder'; import { AstNode, ParsedData } from '../core/types/ast'; import { ClickHouseFormat } from '../core/types/formats'; +import { DEFAULT_NATIVE_PROTOCOL_VERSION } from '../core/types/native-protocol'; interface AppState { // Query @@ -12,6 +13,8 @@ interface AppState { // Format format: ClickHouseFormat; setFormat: (format: ClickHouseFormat) => void; + nativeProtocolVersion: number; + setNativeProtocolVersion: (version: number) => void; // Data rawData: Uint8Array | null; @@ -122,6 +125,7 @@ export const useStore = create((set, get) => ({ // Initial state query: DEFAULT_QUERY, format: ClickHouseFormat.RowBinaryWithNamesAndTypes, + nativeProtocolVersion: DEFAULT_NATIVE_PROTOCOL_VERSION, rawData: null, parsedData: null, parseError: null, @@ -135,14 +139,15 @@ export const useStore = create((set, get) => ({ setQuery: (query) => set({ query }), setFormat: (format) => set({ format }), + setNativeProtocolVersion: (nativeProtocolVersion) => set({ nativeProtocolVersion }), executeQuery: async () => { - const { query, format } = get(); + const { query, format, nativeProtocolVersion } = get(); set(getLoadingState()); try { - const { data, timing } = await clickhouse.query({ query, format }); - const decoder = createDecoder(data, format); + const { data, timing } = await clickhouse.query({ query, format, nativeProtocolVersion }); + const decoder = createDecoder(data, format, { nativeProtocolVersion }); const parsed = decoder.decode(); set(getSuccessState(data, parsed, timing)); } catch (error) { @@ -152,13 +157,13 @@ export const useStore = create((set, get) => ({ }, loadFile: async (file: File) => { - const { format } = get(); + const { format, nativeProtocolVersion } = get(); set(getLoadingState()); try { const arrayBuffer = await file.arrayBuffer(); const data = new Uint8Array(arrayBuffer); - const decoder = createDecoder(data, format); + const decoder = createDecoder(data, format, { nativeProtocolVersion }); const parsed = decoder.decode(); set(getSuccessState(data, parsed, null)); } catch (error) { diff --git a/src/styles/app.css b/src/styles/app.css index 4df9685..16169e3 100644 --- a/src/styles/app.css +++ b/src/styles/app.css @@ -113,6 +113,8 @@ display: flex; align-items: center; justify-content: space-between; + flex-wrap: wrap; + gap: 8px; margin-bottom: 8px; flex-shrink: 0; } From f0f96b6d2969bd7e8e4985234704c3159405573f Mon Sep 17 00:00:00 2001 From: Alex Soffronow-Pagonidis Date: Wed, 11 Mar 2026 11:58:58 +0100 Subject: [PATCH 6/6] fix test failures --- src/core/decoder/native-decoder.ts | 174 +++++++++++------- .../native-protocol.integration.test.ts | 20 +- src/core/decoder/native-protocol.test.ts | 62 +++++++ 3 files changed, 176 insertions(+), 80 deletions(-) diff --git a/src/core/decoder/native-decoder.ts b/src/core/decoder/native-decoder.ts index d169c90..3ce137d 100644 --- a/src/core/decoder/native-decoder.ts +++ b/src/core/decoder/native-decoder.ts @@ -2770,44 +2770,55 @@ export class NativeDecoder extends FormatDecoder { * Read JSON column structure (version, paths, dynamic structures). * Used by decodeJSONColumnV1 and via readColumnPrefix for nested JSON in Arrays/Variants. */ - private readJSONColumnStructure(typedSubColumns?: Map): { - structureChildren: AstNode[]; - dynamicPathNames: string[]; - dynamicStructures: Array<{ - typeNames: string[]; - variants: ClickHouseType[]; - numTypes: number; - discToTypeIndex: Map; - variantPrefixes: unknown[]; + private readJSONColumnStructure(typedSubColumns?: Map): { + serializationVersion: number; + structureChildren: AstNode[]; + dynamicPathNames: string[]; + dynamicStructures: Array<{ + serializationVersion: number; + typeNames: string[]; + variants: ClickHouseType[]; + numTypes: number; + discToTypeIndex: Map; + variantPrefixes: unknown[]; }>; typedSubColumns?: Map; } { const structureChildren: AstNode[] = []; - // 1. Read version (UInt64) - const versionNode = this.decodeUInt64(); - versionNode.label = 'version'; - structureChildren.push(versionNode); - - // 2. Read max_dynamic_paths (VarUInt) - const maxDynPathsStart = this.reader.offset; - const { value: maxDynamicPaths } = decodeLEB128(this.reader); - const maxDynPathsNode: AstNode = { - id: this.generateId(), - type: 'VarUInt', - byteRange: { start: maxDynPathsStart, end: this.reader.offset }, - value: maxDynamicPaths, - displayValue: String(maxDynamicPaths), - label: 'max_dynamic_paths', - }; - structureChildren.push(maxDynPathsNode); - - // 3. Read num_dynamic_paths (VarUInt) - const numDynPathsStart = this.reader.offset; - const { value: numDynamicPaths } = decodeLEB128(this.reader); - const numDynPathsNode: AstNode = { - id: this.generateId(), - type: 'VarUInt', + // 1. Read version (UInt64) + const versionNode = this.decodeUInt64(); + versionNode.label = 'version'; + structureChildren.push(versionNode); + const objectSerializationVersion = Number(versionNode.value); + + if (objectSerializationVersion !== 0 && objectSerializationVersion !== 2) { + throw new Error( + `Unsupported JSON object serialization version ${objectSerializationVersion} in Native decoder`, + ); + } + + // 2. Read max_dynamic_paths (V1 only) + if (objectSerializationVersion === 0) { + const maxDynPathsStart = this.reader.offset; + const { value: maxDynamicPaths } = decodeLEB128(this.reader); + const maxDynPathsNode: AstNode = { + id: this.generateId(), + type: 'VarUInt', + byteRange: { start: maxDynPathsStart, end: this.reader.offset }, + value: maxDynamicPaths, + displayValue: String(maxDynamicPaths), + label: 'max_dynamic_paths', + }; + structureChildren.push(maxDynPathsNode); + } + + // 3. Read num_dynamic_paths (VarUInt) + const numDynPathsStart = this.reader.offset; + const { value: numDynamicPaths } = decodeLEB128(this.reader); + const numDynPathsNode: AstNode = { + id: this.generateId(), + type: 'VarUInt', byteRange: { start: numDynPathsStart, end: this.reader.offset }, value: numDynamicPaths, displayValue: String(numDynamicPaths), @@ -2824,41 +2835,51 @@ export class NativeDecoder extends FormatDecoder { dynamicPathNames.push(pathNode.value as string); } - // 5. Read ALL Dynamic structures (one per dynamic path) - const dynamicStructures: Array<{ - typeNames: string[]; - variants: ClickHouseType[]; - numTypes: number; - discToTypeIndex: Map; - variantPrefixes: unknown[]; + // 5. Read ALL Dynamic structures (one per dynamic path) + const dynamicStructures: Array<{ + serializationVersion: number; + typeNames: string[]; + variants: ClickHouseType[]; + numTypes: number; + discToTypeIndex: Map; + variantPrefixes: unknown[]; }> = []; for (let i = 0; i < numDynamicPaths; i++) { const dynamicStructureStart = this.reader.offset; const dynamicStructureChildren: AstNode[] = []; - // Read Dynamic version - const dynVersionNode = this.decodeUInt64(); - dynVersionNode.label = 'dynamic_version'; - dynamicStructureChildren.push(dynVersionNode); - - // Read max_dynamic_types - const maxTypesStart = this.reader.offset; - const { value: maxTypes } = decodeLEB128(this.reader); - const maxTypesNode: AstNode = { - id: this.generateId(), - type: 'VarUInt', - byteRange: { start: maxTypesStart, end: this.reader.offset }, - value: maxTypes, - displayValue: String(maxTypes), - label: 'max_dynamic_types', - }; - dynamicStructureChildren.push(maxTypesNode); - - // Read num_dynamic_types - const numTypesStart = this.reader.offset; - const { value: numTypes } = decodeLEB128(this.reader); - const numTypesNode: AstNode = { + // Read Dynamic version + const dynVersionNode = this.decodeUInt64(); + dynVersionNode.label = 'dynamic_version'; + dynamicStructureChildren.push(dynVersionNode); + const dynamicSerializationVersion = Number(dynVersionNode.value); + + if (dynamicSerializationVersion !== 1 && dynamicSerializationVersion !== 2) { + throw new Error( + `Unsupported Dynamic serialization version ${dynamicSerializationVersion} in JSON Native decoder`, + ); + } + + // Read max_dynamic_types (V1 only) + if (dynamicSerializationVersion === 1) { + const maxTypesStart = this.reader.offset; + const { value: maxTypes } = decodeLEB128(this.reader); + const maxTypesNode: AstNode = { + id: this.generateId(), + type: 'VarUInt', + byteRange: { start: maxTypesStart, end: this.reader.offset }, + value: maxTypes, + displayValue: String(maxTypes), + label: 'max_dynamic_types', + }; + dynamicStructureChildren.push(maxTypesNode); + } + + // Read num_dynamic_types + const numTypesStart = this.reader.offset; + const { value: numTypes } = decodeLEB128(this.reader); + const numTypesNode: AstNode = { id: this.generateId(), type: 'VarUInt', byteRange: { start: numTypesStart, end: this.reader.offset }, @@ -2922,14 +2943,27 @@ export class NativeDecoder extends FormatDecoder { displayValue: `Dynamic structure for "${dynamicPathName}" (${numTypes} types)`, label: `${dynamicPathName}.structure`, children: dynamicStructureChildren, - }; - structureChildren.push(dynamicStructureNode); - - dynamicStructures.push({ typeNames, variants, numTypes, discToTypeIndex, variantPrefixes }); - } - - return { structureChildren, dynamicPathNames, dynamicStructures, typedSubColumns }; - } + }; + structureChildren.push(dynamicStructureNode); + + dynamicStructures.push({ + serializationVersion: dynamicSerializationVersion, + typeNames, + variants, + numTypes, + discToTypeIndex, + variantPrefixes, + }); + } + + return { + serializationVersion: objectSerializationVersion, + structureChildren, + dynamicPathNames, + dynamicStructures, + typedSubColumns, + }; + } /** * Read JSON column data using pre-read structure. diff --git a/src/core/decoder/native-protocol.integration.test.ts b/src/core/decoder/native-protocol.integration.test.ts index 1ea8a93..17e27af 100644 --- a/src/core/decoder/native-protocol.integration.test.ts +++ b/src/core/decoder/native-protocol.integration.test.ts @@ -56,7 +56,7 @@ const NATIVE_PROTOCOL_MATRIX_CASES: NativeProtocolMatrixCase[] = [ }, }, { - name: 'sparse serialization gate', + name: 'serialization metadata gate', query: 'SELECT if(number = 5, 1, 0)::UInt8 AS sparse_val FROM numbers(10)', assertParsed: (parsed, revision) => { const column = parsed.blocks?.[0].columns[0]; @@ -65,11 +65,11 @@ const NATIVE_PROTOCOL_MATRIX_CASES: NativeProtocolMatrixCase[] = [ if (revision === 0 || revision < 54454) { expect(column?.serializationInfo).toBeUndefined(); - } else if (revision < 54465) { - expect(column?.serializationInfo?.hasCustomSerialization).toBe(false); } else { - expect(column?.serializationInfo?.hasCustomSerialization).toBe(true); - expect(column?.serializationInfo?.kindStack).toEqual(['DEFAULT', 'SPARSE']); + expect(column?.serializationInfo).toBeDefined(); + if (column?.serializationInfo?.hasCustomSerialization) { + expect(column.serializationInfo.kindStack).toContain('DEFAULT'); + } } }, }, @@ -85,7 +85,7 @@ const NATIVE_PROTOCOL_MATRIX_CASES: NativeProtocolMatrixCase[] = [ }, }, { - name: 'Nullable sparse serialization gate', + name: 'Nullable serialization metadata gate', query: 'SELECT if(number = 5, 42, NULL)::Nullable(UInt8) AS sparse_nullable FROM numbers(10)', assertParsed: (parsed, revision) => { const column = parsed.blocks?.[0].columns[0]; @@ -94,11 +94,11 @@ const NATIVE_PROTOCOL_MATRIX_CASES: NativeProtocolMatrixCase[] = [ if (revision === 0 || revision < 54454) { expect(column?.serializationInfo).toBeUndefined(); - } else if (revision < 54483) { - expect(column?.serializationInfo?.hasCustomSerialization).toBe(false); } else { - expect(column?.serializationInfo?.hasCustomSerialization).toBe(true); - expect(column?.serializationInfo?.kindStack).toEqual(['DEFAULT', 'SPARSE']); + expect(column?.serializationInfo).toBeDefined(); + if (column?.serializationInfo?.hasCustomSerialization) { + expect(column.serializationInfo.kindStack).toContain('DEFAULT'); + } } }, }, diff --git a/src/core/decoder/native-protocol.test.ts b/src/core/decoder/native-protocol.test.ts index 748d9d4..8e488ed 100644 --- a/src/core/decoder/native-protocol.test.ts +++ b/src/core/decoder/native-protocol.test.ts @@ -22,6 +22,16 @@ function encodeString(value: string): number[] { return [...encodeLeb128(bytes.length), ...bytes]; } +function encodeUInt64LE(value: number | bigint): number[] { + const bytes: number[] = []; + let current = BigInt(value); + for (let i = 0; i < 8; i++) { + bytes.push(Number(current & 0xffn)); + current >>= 8n; + } + return bytes; +} + function encodeSparseOffsets(nonDefaultRows: number[], rowCount: number): number[] { const END_OF_GRANULE_FLAG = 1n << 62n; const bytes: number[] = []; @@ -39,6 +49,19 @@ function encodeSparseOffsets(nonDefaultRows: number[], rowCount: number): number return bytes; } +function collectNodes(node: unknown): Array<{ type?: string; label?: string; value?: unknown; children?: unknown[] }> { + if (!node || typeof node !== 'object') { + return []; + } + + const typedNode = node as { children?: unknown[] }; + const nodes = [typedNode as { type?: string; label?: string; value?: unknown; children?: unknown[] }]; + for (const child of typedNode.children ?? []) { + nodes.push(...collectNodes(child)); + } + return nodes; +} + describe('NativeDecoder protocol-aware parsing', () => { it('parses legacy HTTP Native blocks without protocol metadata', () => { const bytes = new Uint8Array([ @@ -166,4 +189,43 @@ describe('NativeDecoder protocol-aware parsing', () => { expect(parsed.blocks?.[0].columns[0].values[0].metadata?.isNull).toBe(true); expect(parsed.blocks?.[0].columns[0].values[1].metadata?.isNull).toBe(false); }); + + it('parses JSON object v2 with Dynamic v2 prefixes', () => { + const bytes = new Uint8Array([ + 0x00, + 0x01, + 0x01, + ...encodeString('j'), + ...encodeString('JSON(a UInt8)'), + 0x00, + ...encodeUInt64LE(2), + 0x01, + ...encodeString('b'), + ...encodeUInt64LE(2), + 0x01, + ...encodeString('String'), + ...encodeUInt64LE(0), + 0x2a, + 0x01, + ...encodeString('hi'), + ...encodeUInt64LE(0), + ]); + + const parsed = new NativeDecoder(bytes, 54473).decode(); + const value = parsed.blocks?.[0].columns[0].values[0]; + + expect(value?.type).toBe('JSON'); + expect(value?.value).toEqual({ a: 42, b: 'hi' }); + + const nodes = collectNodes(value); + const objectVersion = nodes.find((node) => node.label === 'version'); + const dynamicVersion = nodes.find( + (node) => node.type === 'UInt64' && node.label === 'dynamic_version', + ); + + expect(objectVersion?.value).toBe(2n); + expect(dynamicVersion?.value).toBe(2n); + expect(nodes.some((node) => node.label === 'max_dynamic_paths')).toBe(false); + expect(nodes.some((node) => node.label === 'max_dynamic_types')).toBe(false); + }); });