Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions clickhouse/types/type_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ bool TypeParser::Parse(TypeAst* type) {
type_->code = Type::String;
break;
}
case Token::QuotedIdentifier:
case Token::Name:
if (!type_->name.empty()) {
// A second Name token on the same element means the
Expand Down Expand Up @@ -260,6 +261,35 @@ TypeParser::Token TypeParser::NextToken() {
}
return Token{Token::QuotedString, StringView(cur_++, 1)};
}
case '"':
case '`':
{
const auto quote = *cur_;
++cur_;
// Two escape forms are recognised, both quote-specific (e.g.
// inside a backtick-quoted identifier only backtick escapes
// apply; a doubled double-quote is treated as two literals):
// \q – backslash followed by the opening quote character
// qq – two consecutive opening quote characters
scratch_.clear();
for (; cur_ < end_; ++cur_) {
if (*cur_ == '\\' && cur_ + 1 < end_ && *(cur_ + 1) == quote) {
scratch_ += quote;
++cur_;
} else if (*cur_ == quote) {
if (cur_ + 1 < end_ && *(cur_ + 1) == quote) {
scratch_ += quote;
++cur_;
} else {
++cur_;
return Token{Token::QuotedIdentifier, StringView{scratch_}};
}
} else {
scratch_ += *cur_;
}
}
return Token{Token::Invalid, StringView()};
}

default: {
const char* st = cur_;
Expand Down
6 changes: 6 additions & 0 deletions clickhouse/types/type_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class TypeParser {
RPar,
Comma,
QuotedString, // string with quotation marks included
QuotedIdentifier,
EOS,
};

Expand All @@ -84,6 +85,11 @@ class TypeParser {

TypeAst* type_;
std::stack<TypeAst*> open_elements_;
// Backing storage for unescaped QuotedIdentifier token values. When a
// quoted identifier contains escape sequences the unescaped content is
// written here and the returned StringView points into this string.
// Valid only until the next NextToken() call.
std::string scratch_;
};


Expand Down
46 changes: 34 additions & 12 deletions clickhouse/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,24 +473,46 @@ LowCardinalityType::LowCardinalityType(TypeRef nested_type) : Type(LowCardinalit
LowCardinalityType::~LowCardinalityType() {
}

// Checks if `name` is a valid plain identifier (must not be quoted).
// The condition for this is a match against `^[a-zA-Z_][0-9a-zA-Z_]*$`
static bool IsPlainIdentifier(const std::string& name) {
if (name.empty()) return false;
auto is_alpha_or_under = [](char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; };
auto is_alnum_or_under = [&is_alpha_or_under](char c) { return is_alpha_or_under(c) || (c >= '0' && c <= '9'); };
if (!is_alpha_or_under(name[0])) return false;
for (size_t i = 1; i < name.size(); ++i)
if (!is_alnum_or_under(name[i])) return false;
return true;
}

// Appends a fieldname, potentially quoting it and escaping backticks.
static void AppendFieldname(const std::string& name, std::string& out) {
if (IsPlainIdentifier(name)) {
out += name;
return;
}
out += '`';
for (char c : name) {
if (c == '`')
out += "``";
else
out += c;
}
out += '`';
}

std::string TupleType::GetName() const {
std::string result("Tuple(");
bool has_complete_names = !item_names_.empty();

if (!item_types_.empty()) {
if (has_complete_names) {
result += item_names_[0] + " " + item_types_[0]->GetName();
} else {
result += item_types_[0]->GetName();
}
}

for (size_t i = 1; i < item_types_.size(); ++i) {
for (size_t i = 0; i < item_types_.size(); ++i) {
if (i > 0)
result += ", ";
if (has_complete_names) {
result += ", " + item_names_[i] + " " + item_types_[i]->GetName();
} else {
result += ", " + item_types_[i]->GetName();
AppendFieldname(item_names_[i], result);
result += ' ';
}
result += item_types_[i]->GetName();
}

result += ")";
Expand Down
16 changes: 14 additions & 2 deletions ut/abnormal_column_names_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,20 @@ INSTANTIATE_TEST_SUITE_P(ClientColumnNames, AbnormalColumnNamesClientTest,
.SetSendRetries(1)
.SetPingBeforeQuery(true)
.SetCompressionMethod(CompressionMethod::None),
{"select 123,231,113", "select 'ABC','AAA','BBB','CCC'"},
{"123,231,113", "'ABC','AAA','BBB','CCC'"},
/* queries = */ {
"select 123,231,113",
"select 'ABC','AAA','BBB','CCC'",
"select 'A.B','C.D'",
"select 'A`B','C``D'",
"select 'A\\`B','C\\`\\`D'"
},
/* expected column names = */ {
"123,231,113",
"'ABC','AAA','BBB','CCC'",
"'A.B','C.D'",
"'A`B','C``D'",
"'A`B','C``D'"
},
}
));

9 changes: 9 additions & 0 deletions ut/columns_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,15 @@ TEST(ColumnsCase, TupleSlice){
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "3");
}

TEST(ColumnsCase, TupleWithQuotedFieldNames) {
auto col = CreateColumnByType("Tuple(`a.b` Int8, `c.d` String)");
ASSERT_NE(col, nullptr);
const auto& names = col->AsStrict<ColumnTuple>()->Type()->As<TupleType>()->GetItemNames();
ASSERT_EQ(names.size(), 2u);
EXPECT_EQ(names[0], "a.b");
EXPECT_EQ(names[1], "c.d");
}

TEST(ColumnsCase, TimeAppend) {
auto col = std::make_shared<ColumnTime>();
col->Append(1);
Expand Down
59 changes: 59 additions & 0 deletions ut/roundtrip_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,65 @@ TEST_P(RoundtripCase, TupleTNullableString) {
EXPECT_TRUE(CompareRecursive(*col, *result_typed));
}

TEST_P(RoundtripCase, TupleWithQuotedFieldNames) {
auto col_a = std::make_shared<ColumnInt8>(std::vector<int8_t>{1});
auto col_b = std::make_shared<ColumnInt16>(std::vector<int16_t>{2});
auto col_c = std::make_shared<ColumnInt32>(std::vector<int32_t>{3});
auto col = std::make_shared<ColumnTuple>(
std::vector<ColumnRef>({col_a, col_b, col_c}),
std::vector<std::string>{"a.a", "b`b", "c``c"}
);

auto result = RoundtripColumnValues(*client_, col)->AsStrict<ColumnTuple>();
EXPECT_TRUE(CompareRecursive(*col->At(0), *result->At(0)));
EXPECT_TRUE(CompareRecursive(*col->At(1), *result->At(1)));
EXPECT_TRUE(CompareRecursive(*col->At(2), *result->At(2)));

const auto& names = result->Type()->As<TupleType>()->GetItemNames();
ASSERT_EQ(names.size(), 3u);
EXPECT_EQ(names[0], "a.a");
EXPECT_EQ(names[1], "b`b");
EXPECT_EQ(names[2], "c``c");
}

TEST_P(RoundtripCase, SelectTupleByFieldNames) {
auto col_a = std::make_shared<ColumnInt8>(std::vector<int8_t>{1});
auto col_b = std::make_shared<ColumnInt16>(std::vector<int16_t>{2});
auto col_c = std::make_shared<ColumnInt32>(std::vector<int32_t>{3});
auto col = std::make_shared<ColumnTuple>(
std::vector<ColumnRef>({col_a, col_b, col_c}),
std::vector<std::string>{"a.a", "b`b", "c``c"}
);

// skip result, we will do it manually with a separate SELECT statement
RoundtripColumnValues(*client_, col)->AsStrict<ColumnTuple>();

// NOTE: Each backtick must be escaped with either "\\" (double "\\" so the compiler
// turns it into "\") or a double backtick, "``". When we create or receive the columns,
// this escaping is done automatically by the type parser, but when we write queries
// ourselves, the escaping has to be done manually.
client_->BeginSelect(
"SELECT "
" col.`a.a`, "
" col.`b``b`, col.`b\\`b`, "
" col.`c````c`, col.`c\\`\\`c` "
"FROM temporary_roundtrip_table "
"ORDER BY id");

Block last_block;
while (auto tmp = client_->NextBlock()) {
if (tmp->GetRowCount() > 0) {
last_block = *tmp;
}
}

EXPECT_TRUE(CompareRecursive(*col->At(0), *last_block.At(0)));
EXPECT_TRUE(CompareRecursive(*col->At(1), *last_block.At(1)));
EXPECT_TRUE(CompareRecursive(*col->At(1), *last_block.At(2)));
EXPECT_TRUE(CompareRecursive(*col->At(2), *last_block.At(3)));
EXPECT_TRUE(CompareRecursive(*col->At(2), *last_block.At(4)));
}

TEST_P(RoundtripCase, Map_TString_TNullableString) {
using Key = ColumnString;
using Value = ColumnNullableT<ColumnString>;
Expand Down
71 changes: 71 additions & 0 deletions ut/type_parser_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,77 @@ TEST(TypeParserCase, ParseNamedTuple) {
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseNamedTuple_BacktickQuotedFieldNames) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a.b` Int8, `c.d` String)").Parse(&ast));
ASSERT_EQ(ast.meta, TypeAst::Tuple);
ASSERT_EQ(ast.elements.size(), 2u);

ASSERT_EQ(ast.elements[0].element_name, "a.b");
ASSERT_EQ(ast.elements[0].name, "Int8");
ASSERT_EQ(ast.elements[0].code, Type::Int8);

ASSERT_EQ(ast.elements[1].element_name, "c.d");
ASSERT_EQ(ast.elements[1].name, "String");
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseNamedTuple_DoubleQuotedFieldNames) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(\"a.b\" Int8, \"c.d\" String)").Parse(&ast));
ASSERT_EQ(ast.meta, TypeAst::Tuple);
ASSERT_EQ(ast.elements.size(), 2u);

ASSERT_EQ(ast.elements[0].element_name, "a.b");
ASSERT_EQ(ast.elements[0].name, "Int8");
ASSERT_EQ(ast.elements[0].code, Type::Int8);

ASSERT_EQ(ast.elements[1].element_name, "c.d");
ASSERT_EQ(ast.elements[1].name, "String");
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseNamedTuple_UnterminatedQuote) {
TypeAst ast;
EXPECT_FALSE(TypeParser("Tuple(`a.b Int8)").Parse(&ast));
EXPECT_FALSE(TypeParser("Tuple(a.b` Int8)").Parse(&ast));
}

TEST(TypeParserCase, ParseNamedTuple_DoubledBacktickEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a``b` UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a`b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_BackslashBacktickEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a\\`b` UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a`b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_DoubleQuoteNotEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a\"\"b` UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a\"\"b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_DoubledDoubleQuoteEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(\"a\"\"b\" UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a\"b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_BacktickNotEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(\"a``b\" UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a``b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseDecimal) {
TypeAst ast;
TypeParser("Decimal(12, 5)").Parse(&ast);
Expand Down
Loading