From ef19d18b92002e2c66741284c94dcc29183f8046 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 7 Jan 2026 23:40:48 +0100 Subject: [PATCH] Add support for ALIAS columns in segments for engine=Hybrid --- src/Storages/StorageDistributed.cpp | 19 ++-- .../03645_hybrid_alias_columns.reference | 9 ++ .../03645_hybrid_alias_columns.sql | 97 +++++++++++++++++++ 3 files changed, 118 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/03645_hybrid_alias_columns.reference create mode 100644 tests/queries/0_stateless/03645_hybrid_alias_columns.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d518887b3f6d..57bd7e753075 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -2247,8 +2247,9 @@ void StorageDistributed::setCachedColumnsToCast(ColumnsDescription columns) if (!cached_columns_to_cast.empty() && log) { Names columns_with_types; - columns_with_types.reserve(cached_columns_to_cast.getAllPhysical().size()); - for (const auto & col : cached_columns_to_cast.getAllPhysical()) + const auto cached_columns = cached_columns_to_cast.getAllPhysical(); + columns_with_types.reserve(cached_columns.size()); + for (const auto & col : cached_columns) columns_with_types.emplace_back(col.name + " " + col.type->getName()); LOG_DEBUG(log, "Hybrid auto-cast will apply to: [{}]", fmt::join(columns_with_types, ", ")); } @@ -2420,12 +2421,15 @@ void registerStorageHybrid(StorageFactory & factory) if (columns_to_use.empty()) columns_to_use = first_segment_columns; + const auto physical_columns = columns_to_use.getAllPhysical(); + NameSet columns_to_cast_names; auto validate_segment_schema = [&](const ColumnsDescription & segment_columns, const String & segment_name) { - for (const auto & column : columns_to_use.getAllPhysical()) + for (const auto & column : physical_columns) { - auto found = segment_columns.tryGetPhysical(column.name); + // all columns defined as physical in hybrid should exists in segments (but can be aliases there) + auto found = segment_columns.tryGetColumn(GetColumnsOptions(GetColumnsOptions::AllPhysicalAndAliases), column.name); if (!found) { throw Exception( @@ -2434,6 +2438,7 @@ void registerStorageHybrid(StorageFactory & factory) segment_name, column.name); } + // if the type of the column is the segment differs - we need to add it to the list of columns which require casts if (!found->type->equals(*column.type)) columns_to_cast_names.emplace(column.name); } @@ -2467,8 +2472,6 @@ void registerStorageHybrid(StorageFactory & factory) "TableFunctionRemote did not return a StorageDistributed or StorageProxy, got: {}", actual_type); } - const auto physical_columns = columns_to_use.getAllPhysical(); - auto validate_predicate = [&](ASTPtr & predicate, size_t argument_index) { try @@ -2619,7 +2622,9 @@ void registerStorageHybrid(StorageFactory & factory) if (!columns_to_cast_names.empty()) { NamesAndTypesList cast_cols; - for (const auto & col : columns_to_use.getAllPhysical()) + + // 'physical' columns of Hybrid will be read from segments, and may need CASTS + for (const auto & col : physical_columns) { if (columns_to_cast_names.contains(col.name)) cast_cols.emplace_back(col.name, col.type); diff --git a/tests/queries/0_stateless/03645_hybrid_alias_columns.reference b/tests/queries/0_stateless/03645_hybrid_alias_columns.reference new file mode 100644 index 000000000000..d20eba96c358 --- /dev/null +++ b/tests/queries/0_stateless/03645_hybrid_alias_columns.reference @@ -0,0 +1,9 @@ +test1 +1 ['foo1','bar1_before'] foo1 +2 ['foo2','bar2_after'] foo2 +Insert into Hybrid with EPHEMERAL column +2 0A0B0C0D +Select from Hybrid with EPHEMERAL column +1 5A90B714 +2 0A0B0C0D +10 01020304 diff --git a/tests/queries/0_stateless/03645_hybrid_alias_columns.sql b/tests/queries/0_stateless/03645_hybrid_alias_columns.sql new file mode 100644 index 000000000000..ab9dde1eae0d --- /dev/null +++ b/tests/queries/0_stateless/03645_hybrid_alias_columns.sql @@ -0,0 +1,97 @@ +SET allow_experimental_hybrid_table = 1, + prefer_localhost_replica = 0; + +DROP TABLE IF EXISTS test_hybrid_alias_cast; +DROP TABLE IF EXISTS test_hybrid_alias_after; +DROP TABLE IF EXISTS test_hybrid_alias_before; + +CREATE TABLE test_hybrid_alias_after +( + a UInt32, + arr Array(String), + arr_1 ALIAS arr[1] +) +ENGINE = MergeTree() +ORDER BY (a, arr[1]) +SETTINGS index_granularity = 1; + +CREATE TABLE test_hybrid_alias_before +( + a UInt32, + arr Array(String), + arr_1 MATERIALIZED arr[1] +) +ENGINE = MergeTree() +ORDER BY (a, arr_1) +SETTINGS index_granularity = 1; + +INSERT INTO test_hybrid_alias_after VALUES (1, ['foo1', 'bar1_after']), (2, ['foo2', 'bar2_after']); +INSERT INTO test_hybrid_alias_before VALUES (1, ['foo1', 'bar1_before']), (2, ['foo2', 'bar2_before']); + +CREATE TABLE test_hybrid_alias_cast +( + a UInt32, + arr Array(String), + arr_1 String +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_alias_after'), + a >= 2, + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_alias_before'), + a < 2 +); + +SELECT 'test1'; +SELECT * FROM test_hybrid_alias_cast WHERE arr_1 like 'foo%' ORDER BY a; + +DROP TABLE test_hybrid_alias_cast; +DROP TABLE test_hybrid_alias_after; +DROP TABLE test_hybrid_alias_before; + +DROP TABLE IF EXISTS test_hybrid_ephem; +DROP TABLE IF EXISTS test_hybrid_ephem_after; +DROP TABLE IF EXISTS test_hybrid_ephem_before; + +CREATE TABLE test_hybrid_ephem_after +( + id UInt64, + unhexed String EPHEMERAL, + hexed FixedString(4) DEFAULT unhex(unhexed) +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_hybrid_ephem_before +( + id UInt64, + hexed FixedString(4) +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_hybrid_ephem_after (id, unhexed) VALUES (1, '5a90b714'); +INSERT INTO test_hybrid_ephem_before (id, hexed) VALUES (10, unhex('01020304')); + +CREATE TABLE test_hybrid_ephem +( + id UInt64, + unhexed String EPHEMERAL, + hexed FixedString(4) DEFAULT unhex(unhexed) +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_ephem_after'), + id < 10, + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_ephem_before'), + id >= 10 +); + +SELECT 'Insert into Hybrid with EPHEMERAL column'; +INSERT INTO test_hybrid_ephem (id, unhexed) VALUES (2, '0a0b0c0d'); +SELECT id, hex(hexed) FROM test_hybrid_ephem_after WHERE id = 2; + +SELECT 'Select from Hybrid with EPHEMERAL column'; +SELECT id, hex(hexed) FROM test_hybrid_ephem ORDER BY id; + +DROP TABLE test_hybrid_ephem; +DROP TABLE test_hybrid_ephem_after; +DROP TABLE test_hybrid_ephem_before;