diff --git a/be/src/core/data_type_serde/data_type_variant_serde.cpp b/be/src/core/data_type_serde/data_type_variant_serde.cpp index 8692136cbca65d..efd35971dcbbb9 100644 --- a/be/src/core/data_type_serde/data_type_variant_serde.cpp +++ b/be/src/core/data_type_serde/data_type_variant_serde.cpp @@ -17,6 +17,8 @@ #include "core/data_type_serde/data_type_variant_serde.h" +#include + #include #include @@ -37,6 +39,32 @@ #include "util/jsonb_writer.h" namespace doris { +namespace { + +template +Status write_variant_column_to_arrow_impl(const IColumn& column, const ColumnVariant& var, + const NullMap* null_map, BuilderType& builder, + int64_t start, int64_t end, const cctz::time_zone& ctz) { + DataTypeSerDe::FormatOptions options; + options.timezone = &ctz; + for (int64_t i = start; i < end; ++i) { + if (null_map && (*null_map)[cast_set(i)]) { + RETURN_IF_ERROR(checkArrowStatus(builder.AppendNull(), column.get_name(), + builder.type()->name())); + continue; + } + + std::string serialized_value; + var.serialize_one_row_to_string(i, &serialized_value, options); + const auto serialized_size = + cast_set(serialized_value.size()); + RETURN_IF_ERROR(checkArrowStatus(builder.Append(serialized_value.data(), serialized_size), + column.get_name(), builder.type()->name())); + } + return Status::OK(); +} + +} // namespace #include "common/compile_check_begin.h" @@ -130,23 +158,16 @@ Status DataTypeVariantSerDe::write_column_to_arrow(const IColumn& column, const int64_t start, int64_t end, const cctz::time_zone& ctz) const { const auto* var = check_and_get_column(column); - auto& builder = assert_cast(*array_builder); - FormatOptions options; - options.timezone = &ctz; - for (size_t i = start; i < end; ++i) { - if (null_map && (*null_map)[i]) { - RETURN_IF_ERROR(checkArrowStatus(builder.AppendNull(), column.get_name(), - array_builder->type()->name())); - } else { - std::string serialized_value; - var->serialize_one_row_to_string(i, &serialized_value, options); - RETURN_IF_ERROR( - checkArrowStatus(builder.Append(serialized_value.data(), - static_cast(serialized_value.size())), - column.get_name(), array_builder->type()->name())); - } + if (array_builder->type()->id() == arrow::Type::LARGE_STRING) { + auto& builder = assert_cast(*array_builder); + return write_variant_column_to_arrow_impl(column, *var, null_map, builder, start, end, ctz); + } else if (array_builder->type()->id() == arrow::Type::STRING) { + auto& builder = assert_cast(*array_builder); + return write_variant_column_to_arrow_impl(column, *var, null_map, builder, start, end, ctz); + } else { + return Status::InvalidArgument("Unsupported arrow type for variant column: {}", + array_builder->type()->name()); } - return Status::OK(); } void DataTypeVariantSerDe::to_string(const IColumn& column, size_t row_num, BufferWritable& bw, diff --git a/be/test/core/data_type_serde/data_type_serde_test.cpp b/be/test/core/data_type_serde/data_type_serde_test.cpp index 9e402179a63f04..6cfb0cb4d10144 100644 --- a/be/test/core/data_type_serde/data_type_serde_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_test.cpp @@ -18,6 +18,7 @@ #include "core/data_type_serde/data_type_serde.h" +#include #include #include #include @@ -48,6 +49,7 @@ #include "core/data_type/data_type_number.h" #include "core/data_type/data_type_quantilestate.h" #include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_variant.h" #include "core/types.h" #include "core/value/bitmap_value.h" #include "core/value/hll.h" @@ -600,4 +602,27 @@ TEST(DataTypeSerDeTest, DeserializeFromSparseColumnTest) { EXPECT_EQ(subcolumn.get_least_common_base_type_id(), PrimitiveType::TYPE_JSONB); } } + +TEST(DataTypeSerDeTest, VariantWriteColumnToArrowSupportsLargeString) { + auto variant_column = ColumnVariant::create(0, false); + VariantMap root; + root.try_emplace(PathInData(), FieldWithDataType {.field = Field::create_field( + String("variant value", 13))}); + variant_column->try_insert(Field::create_field(std::move(root))); + + auto data_type = std::make_shared(); + auto serde = data_type->get_serde(0); + arrow::LargeStringBuilder builder; + auto ctz = cctz::utc_time_zone(); + auto st = serde->write_column_to_arrow(*variant_column, nullptr, &builder, 0, + variant_column->size(), ctz); + EXPECT_TRUE(st.ok()) << st.to_string(); + + std::shared_ptr array; + ASSERT_TRUE(builder.Finish(&array).ok()); + auto* string_array = dynamic_cast(array.get()); + ASSERT_NE(string_array, nullptr); + ASSERT_EQ(string_array->length(), 1); + EXPECT_EQ(string_array->GetString(0), "variant value"); +} } // namespace doris