Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions be/src/core/data_type_serde/data_type_variant_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include "core/data_type_serde/data_type_variant_serde.h"

#include <arrow/array/builder_binary.h>

#include <cstdint>
#include <string>

Expand All @@ -37,6 +39,32 @@
#include "util/jsonb_writer.h"

namespace doris {
namespace {

template <typename BuilderType>
Status write_variant_column_to_arrow_impl(const IColumn& column, const ColumnVariant& var,
const NullMap* null_map, BuilderType& builder,
int64_t start, int64_t end, const cctz::time_zone& ctz) {
DataTypeSerDe::FormatOptions options;
options.timezone = &ctz;
for (int64_t i = start; i < end; ++i) {
if (null_map && (*null_map)[cast_set<size_t>(i)]) {
RETURN_IF_ERROR(checkArrowStatus(builder.AppendNull(), column.get_name(),
builder.type()->name()));
continue;
}

std::string serialized_value;
var.serialize_one_row_to_string(i, &serialized_value, options);
const auto serialized_size =
cast_set<typename BuilderType::offset_type>(serialized_value.size());
RETURN_IF_ERROR(checkArrowStatus(builder.Append(serialized_value.data(), serialized_size),
column.get_name(), builder.type()->name()));
}
return Status::OK();
}

} // namespace

#include "common/compile_check_begin.h"

Expand Down Expand Up @@ -130,23 +158,16 @@ Status DataTypeVariantSerDe::write_column_to_arrow(const IColumn& column, const
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
const auto* var = check_and_get_column<ColumnVariant>(column);
auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder);
FormatOptions options;
options.timezone = &ctz;
for (size_t i = start; i < end; ++i) {
if (null_map && (*null_map)[i]) {
RETURN_IF_ERROR(checkArrowStatus(builder.AppendNull(), column.get_name(),
array_builder->type()->name()));
} else {
std::string serialized_value;
var->serialize_one_row_to_string(i, &serialized_value, options);
RETURN_IF_ERROR(
checkArrowStatus(builder.Append(serialized_value.data(),
static_cast<int>(serialized_value.size())),
column.get_name(), array_builder->type()->name()));
}
if (array_builder->type()->id() == arrow::Type::LARGE_STRING) {
auto& builder = assert_cast<arrow::LargeStringBuilder&>(*array_builder);
return write_variant_column_to_arrow_impl(column, *var, null_map, builder, start, end, ctz);
} else if (array_builder->type()->id() == arrow::Type::STRING) {
auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder);
return write_variant_column_to_arrow_impl(column, *var, null_map, builder, start, end, ctz);
} else {
return Status::InvalidArgument("Unsupported arrow type for variant column: {}",
array_builder->type()->name());
}
return Status::OK();
}

void DataTypeVariantSerDe::to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
Expand Down
25 changes: 25 additions & 0 deletions be/test/core/data_type_serde/data_type_serde_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "core/data_type_serde/data_type_serde.h"

#include <arrow/api.h>
#include <gen_cpp/types.pb.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
Expand Down Expand Up @@ -48,6 +49,7 @@
#include "core/data_type/data_type_number.h"
#include "core/data_type/data_type_quantilestate.h"
#include "core/data_type/data_type_string.h"
#include "core/data_type/data_type_variant.h"
#include "core/types.h"
#include "core/value/bitmap_value.h"
#include "core/value/hll.h"
Expand Down Expand Up @@ -600,4 +602,27 @@ TEST(DataTypeSerDeTest, DeserializeFromSparseColumnTest) {
EXPECT_EQ(subcolumn.get_least_common_base_type_id(), PrimitiveType::TYPE_JSONB);
}
}

TEST(DataTypeSerDeTest, VariantWriteColumnToArrowSupportsLargeString) {
auto variant_column = ColumnVariant::create(0, false);
VariantMap root;
root.try_emplace(PathInData(), FieldWithDataType {.field = Field::create_field<TYPE_STRING>(
String("variant value", 13))});
variant_column->try_insert(Field::create_field<TYPE_VARIANT>(std::move(root)));

auto data_type = std::make_shared<DataTypeVariant>();
auto serde = data_type->get_serde(0);
arrow::LargeStringBuilder builder;
auto ctz = cctz::utc_time_zone();
auto st = serde->write_column_to_arrow(*variant_column, nullptr, &builder, 0,
variant_column->size(), ctz);
EXPECT_TRUE(st.ok()) << st.to_string();

std::shared_ptr<arrow::Array> array;
ASSERT_TRUE(builder.Finish(&array).ok());
auto* string_array = dynamic_cast<arrow::LargeStringArray*>(array.get());
ASSERT_NE(string_array, nullptr);
ASSERT_EQ(string_array->length(), 1);
EXPECT_EQ(string_array->GetString(0), "variant value");
}
} // namespace doris
Loading