From 67c5920fb4dfb7d0b8e2fb6830c0f316f5a36880 Mon Sep 17 00:00:00 2001 From: lihangyu-x Date: Wed, 3 Jun 2026 20:24:21 +0800 Subject: [PATCH] [fix](be) Materialize variant defaults when copying ranges ### What problem does this PR solve? Issue Number: close #0 Related PR: #0 Problem Summary: Copying a VARIANT subcolumn range could skip the source subcolumn's pending default suffix. The destination variant kept the requested logical row count while its finalized root column could remain physically shorter than the copied range. Exchange and join paths that copy blocks containing such VARIANT columns could then read missing rows, fail, or return unstable results. The fix appends the remaining default rows after copied physical parts so the logical and physical row counts stay aligned. ### Release note Fix an issue where queries using VARIANT columns through exchange or join paths could fail or return unstable results when copied VARIANT subcolumns contained pending default rows. ### Check List (For Author) - Test - [x] Unit Test: ./run-be-ut.sh --run --filter='ColumnVariantTest.insert_range_from_materializes_pending_default_suffix' - [x] Build: ./build.sh --be - [x] Manual test: attached local-shuffle LEFT ANTI query loop, 8 workers x 100 iterations, all 800 results were 0 with no ColumnVector or insert_range_from errors. - [x] Manual test: constructed complex VARIANT hash join with local shuffle enabled and disabled returned identical hashes; local shuffle enabled loop was stable for 100 iterations. - Behavior changed: - [x] Yes. VARIANT range copies now materialize pending default rows so copied columns remain physically aligned with logical row counts. - Does this need documentation? - [x] No. --- be/src/core/column/column_variant.cpp | 4 +++ be/test/core/column/column_variant_test.cpp | 28 +++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index 88cd501acb3bf6..f9e7282de229c0 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -458,6 +458,10 @@ void ColumnVariant::Subcolumn::insert_range_from(const Subcolumn& src, size_t st if (pos < src.data.size() && processed_rows < end) { size_t part_end = end - processed_rows; insert_from_part(src.data[pos], src.data_types[pos], 0, part_end); + processed_rows = end; + } + if (processed_rows < end) { + data.back()->insert_many_defaults(end - processed_rows); } } diff --git a/be/test/core/column/column_variant_test.cpp b/be/test/core/column/column_variant_test.cpp index dff9e2c0ae5a77..1cf0d71a7c1427 100644 --- a/be/test/core/column/column_variant_test.cpp +++ b/be/test/core/column/column_variant_test.cpp @@ -34,6 +34,7 @@ #include "core/column/subcolumn_tree.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_factory.hpp" +#include "core/data_type/data_type_number.h" #include "core/data_type/define_primitive_type.h" #include "core/field.h" #include "core/string_ref.h" @@ -1061,6 +1062,33 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) { } } +TEST_F(ColumnVariantTest, insert_range_from_materializes_pending_default_suffix) { + auto nested = ColumnInt64::create(); + nested->insert_value(7); + auto null_map = ColumnUInt8::create(); + null_map->insert_value(0); + + auto root_type = make_nullable(std::make_shared()); + auto root_column = ColumnNullable::create(std::move(nested), std::move(null_map)); + ColumnVariant::Subcolumn root(std::move(root_column), root_type, true, true); + root.increment_default_counter(); + + ColumnVariant::Subcolumns subcolumns; + subcolumns.create_root(std::move(root)); + auto src = ColumnVariant::create(0, false, std::move(subcolumns)); + EXPECT_EQ(src->size(), 2); + + auto dst = ColumnVariant::create(0, false); + dst->insert_range_from(*src, 0, 2); + dst->finalize(); + + const auto& copied_root = + assert_cast(*static_cast(*dst).get_root()); + EXPECT_EQ(copied_root.size(), 2); + EXPECT_EQ(copied_root.get_null_map_data()[0], 0); + EXPECT_EQ(copied_root.get_null_map_data()[1], 1); +} + TEST_F(ColumnVariantTest, is_variable_length) { EXPECT_TRUE(column_variant->is_variable_length()); }