diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h index 68abe7101102804..72aa4cbb4dcffdb 100644 --- a/be/src/vec/core/sort_cursor.h +++ b/be/src/vec/core/sort_cursor.h @@ -206,7 +206,7 @@ struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { } block->clear(); THROW_IF_ERROR(_block_supplier(block.get(), &_is_eof)); - DCHECK(!block->empty() || _is_eof); + DCHECK(!block->empty() xor _is_eof); if (!block->empty()) { DCHECK_EQ(_ordering_expr.size(), desc.size()); for (int i = 0; i < desc.size(); ++i) { diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 7f1ee0cd85032f6..3cc32156d20f945 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -272,6 +272,7 @@ struct WhichDataType { bool is_struct() const { return idx == TypeIndex::Struct; } bool is_map() const { return idx == TypeIndex::Map; } bool is_set() const { return idx == TypeIndex::Set; } + bool is_fixed_length_object() const { return idx == TypeIndex::FixedLengthObject; } bool is_nothing() const { return idx == TypeIndex::Nothing; } bool is_nullable() const { return idx == TypeIndex::Nullable; } @@ -371,6 +372,11 @@ bool is_string_or_fixed_string(const T& data_type) { return WhichDataType(data_type).is_string_or_fixed_string(); } +template +bool is_fixed_length_object(const T& data_type) { + return WhichDataType(data_type).is_fixed_length_object(); +} + inline bool is_not_decimal_but_comparable_to_decimal(const DataTypePtr& data_type) { WhichDataType which(data_type); return which.is_int() || which.is_uint(); diff --git a/be/test/testutil/column_helper.h b/be/test/testutil/column_helper.h index 8454a73f8469138..09e83c882bbf7f6 100644 --- a/be/test/testutil/column_helper.h +++ b/be/test/testutil/column_helper.h @@ -21,6 +21,7 @@ #include #include +#include "vec/columns/column_nullable.h" #include "vec/core/block.h" #include "vec/data_types/data_type_string.h" @@ -28,20 +29,32 @@ namespace doris::vectorized { struct ColumnHelper { public: template - static ColumnPtr create_column(const std::vector& datas) { + static ColumnPtr create_column(const std::vector& data) { auto column = DataType::ColumnType::create(); if constexpr (std::is_same_v) { - for (const auto& data : datas) { - column->insert_data(data.data(), data.size()); + for (const auto& datum : data) { + column->insert_data(datum.data(), datum.size()); } } else { - for (const auto& data : datas) { - column->insert_value(data); + for (const auto& datum : data) { + column->insert_value(datum); } } return std::move(column); } + template + static ColumnPtr create_nullable_column( + const std::vector& data, + const std::vector& null_map) { + auto null_col = ColumnUInt8::create(); + for (const auto& datum : null_map) { + null_col->insert_value(datum); + } + auto ptr = create_column(data); + return ColumnNullable::create(std::move(ptr), std::move(null_col)); + } + static bool column_equal(const ColumnPtr& column1, const ColumnPtr& column2) { if (column1->size() != column2->size()) { return false; @@ -67,13 +80,22 @@ struct ColumnHelper { } template - static Block create_block(const std::vector& datas) { - auto column = create_column(datas); + static Block create_block(const std::vector& data) { + auto column = create_column(data); auto data_type = std::make_shared(); Block block({ColumnWithTypeAndName(column, data_type, "column")}); return block; } + template + static Block create_nullable_block(const std::vector& data, + const std::vector& null_map) { + auto column = create_nullable_column(data, null_map); + auto data_type = std::make_shared(std::make_shared()); + Block block({ColumnWithTypeAndName(column, data_type, "column")}); + return block; + } + template static ColumnWithTypeAndName create_column_with_name( const std::vector& datas) { diff --git a/be/test/vec/data_types/common_data_type_test.h b/be/test/vec/data_types/common_data_type_test.h index 643f8669fbec3ad..0b85e3977c21b6c 100644 --- a/be/test/vec/data_types/common_data_type_test.h +++ b/be/test/vec/data_types/common_data_type_test.h @@ -22,6 +22,8 @@ #include #include +#include "agent/be_exec_version_manager.h" +#include "olap/schema.h" #include "vec/columns/column.h" #include "vec/core/field.h" #include "vec/core/types.h" @@ -52,6 +54,10 @@ namespace doris::vectorized { static bool gen_check_data_in_assert = true; class CommonDataTypeTest : public ::testing::Test { +public: + CommonDataTypeTest() = default; + void TestBody() override {} + protected: // Helper function to load data from CSV, with index which splited by spliter and load to columns void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& columns, @@ -164,7 +170,8 @@ class CommonDataTypeTest : public ::testing::Test { ASSERT_EQ(const_col->operator[](i), default_const_col->operator[](i)); } // get_uncompressed_serialized_bytes - ASSERT_EQ(data_type->get_uncompressed_serialized_bytes(*column, 0), + ASSERT_EQ(data_type->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()), uncompressed_serialized_bytes); } @@ -249,7 +256,7 @@ class CommonDataTypeTest : public ::testing::Test { } // nt be_exec_version, PBlock* pblock, size_t* uncompressed_bytes, // size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type, - size_t be_exec_version = 2; + size_t be_exec_version = BeExecVersionManager::get_newest_version(); auto pblock = std::make_unique(); size_t uncompressed_bytes = 0; size_t compressed_bytes = 0; @@ -262,9 +269,9 @@ class CommonDataTypeTest : public ::testing::Test { st = block_1->deserialize(*pblock); ASSERT_EQ(st.ok(), true); // check block_1 and block is same - for (int i = 0; i < block->rows(); ++i) { - auto& col = block->get_by_position(i); - auto& col_1 = block_1->get_by_position(i); + for (auto col_idx = 0; col_idx < block->columns(); ++col_idx) { + auto& col = block->get_by_position(col_idx); + auto& col_1 = block_1->get_by_position(col_idx); ASSERT_EQ(col.column->size(), col_1.column->size()); for (int j = 0; j < col.column->size(); ++j) { ASSERT_EQ(col.column->operator[](j), col_1.column->operator[](j)); diff --git a/be/test/vec/data_types/data_type_agg_state_test.cpp b/be/test/vec/data_types/data_type_agg_state_test.cpp new file mode 100644 index 000000000000000..192573de4f34c09 --- /dev/null +++ b/be/test/vec/data_types/data_type_agg_state_test.cpp @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_agg_state.h" + +#include +#include +#include + +#include +#include + +#include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" +#include "vec/columns/column_fixed_length_object.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeAggStateTest : public ::testing::TestWithParam { +public: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique(); + } + std::unique_ptr helper; + DataTypePtr sub_type = std::make_shared(); + DataTypes sub_types = {sub_type}; + // DataTypeAggState---> column_fixed_length_object + DataTypePtr datatype_agg_state_count = std::make_shared( + sub_types, false, "count", BeExecVersionManager::get_newest_version()); + // DataTypeAggState---> column_string + DataTypePtr datatype_agg_state_hll_union = std::make_shared( + sub_types, false, "hll_union", BeExecVersionManager::get_newest_version()); + int rows_value; +}; + +TEST_P(DataTypeAggStateTest, MetaInfoTest) { + TypeDescriptor agg_state_type_descriptor = {PrimitiveType::TYPE_AGG_STATE}; + auto col_meta = std::make_shared(); + col_meta->set_type(PGenericType_TypeId_AGG_STATE); + CommonDataTypeTest::DataTypeMetaInfo agg_state_meta_info_to_assert = { + .type_id = TypeIndex::AggState, + .type_as_type_descriptor = &agg_state_type_descriptor, + .family_name = "AggState", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_AGG_STATE, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = false, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = Field(String()), + }; + helper->meta_info_assert(datatype_agg_state_count, agg_state_meta_info_to_assert); +} + +TEST_P(DataTypeAggStateTest, CreateColumnTest) { + Field default_field = Field(String()); + std::cout << "create_column_assert: " << datatype_agg_state_count->get_name() << std::endl; + auto column = (datatype_agg_state_count)->create_column(); + ASSERT_EQ(column->size(), 0); + column->insert_default(); + auto fixed_length_column = ColumnFixedLengthObject::create(8); + fixed_length_column->insert(default_field); + ASSERT_EQ(fixed_length_column->size(), 1); + + for (int i = 0; i < 1; ++i) { + ASSERT_EQ(fixed_length_column->operator[](i), column->operator[](i)); + } + // get_uncompressed_serialized_bytes + ASSERT_EQ(datatype_agg_state_count->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()), + 25); +} + +void insert_data_agg_state(MutableColumns* agg_state_cols, DataTypePtr datatype_agg_state, + int rows_value, std::vector* data_strs = nullptr) { + auto column_fixed = datatype_agg_state->create_column(); + agg_state_cols->push_back(column_fixed->get_ptr()); + std::cout << "insert_data_agg_state: " << datatype_agg_state->get_name() << " " + << column_fixed->get_name() << std::endl; + if (column_fixed->is_column_string()) { + ASSERT_TRUE(is_string(assert_cast(datatype_agg_state.get()) + ->get_serialized_type())); + auto* column = assert_cast((*agg_state_cols)[0].get()); + for (size_t i = 0; i != rows_value; ++i) { + auto val = std::to_string(i); + column->insert_data(val.c_str(), val.size()); + if (data_strs) { + data_strs->push_back(val); + } + // std::cout<<"insert_data_agg_state: "<get_data_at(i).to_string()<((*agg_state_cols)[0].get())->set_item_size(8); + column_fixed->resize(rows_value); + ASSERT_TRUE(is_fixed_length_object( + assert_cast(datatype_agg_state.get()) + ->get_serialized_type())); + auto& data = assert_cast((*agg_state_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + data[i] = i; + } + } + std::cout << "finish insert data" << std::endl; +} + +// // not support function: get_filed + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeAggStateTest, FromAndToStringTest) { + MutableColumns agg_state_cols; + std::vector data_strs; + insert_data_agg_state(&agg_state_cols, datatype_agg_state_hll_union, rows_value, &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + datatype_agg_state_hll_union->to_string_batch(*agg_state_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), agg_state_cols[0]->get_ptr()->size()); + // from_string assert col_to to assert_column and check same with mutableColumn + auto assert_column = datatype_agg_state_hll_union->create_column(); + for (int i = 0; i < col_to->size(); ++i) { + std::string s = col_to->get_data_at(i).to_string(); + std::cout << "s: " << s << std::endl; + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), + datatype_agg_state_hll_union->from_string(rb, assert_column.get())); + ASSERT_EQ(assert_column->operator[](i), agg_state_cols[0]->get_ptr()->operator[](i)) + << "i: " << i << " s: " << s + << " datatype: " << datatype_agg_state_hll_union->get_name() + << " assert_column: " << assert_column->get_name() + << " mutableColumn:" << agg_state_cols[0]->get_ptr()->get_name() << std::endl; + } + std::cout << "finish to_string_batch | from_string test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(agg_state_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < agg_state_cols[0]->get_ptr()->size(); ++i) { + datatype_agg_state_hll_union->to_string(*agg_state_cols[0]->get_ptr(), i, + buffer_writer); + std::string res = + datatype_agg_state_hll_union->to_string(*agg_state_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(data_strs[i], ser_col->get_data_at(i).to_string()); + } + // check ser_col to assert_column and check same with mutableColumn + auto assert_column_1 = datatype_agg_state_hll_union->create_column(); + for (int i = 0; i < ser_col->size(); ++i) { + std::string s = ser_col->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), + datatype_agg_state_hll_union->from_string(rb, assert_column_1.get())); + auto aaa = assert_column_1->operator[](i); + ASSERT_EQ(assert_column_1->operator[](i), agg_state_cols[0]->get_ptr()->operator[](i)); + } + std::cout << "finish to_string | from_string test" << std::endl; + } +} + +// // serialize / deserialize +TEST_P(DataTypeAggStateTest, SerializeDeserializeTest) { + MutableColumns agg_state_cols; + insert_data_agg_state(&agg_state_cols, datatype_agg_state_hll_union, rows_value); + + auto* column = assert_cast(agg_state_cols[0].get()); + auto size = datatype_agg_state_hll_union->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr buf = std::make_unique(size); + auto* result = datatype_agg_state_hll_union->serialize( + *column, buf.get(), BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_agg_state_hll_union->create_column(); + datatype_agg_state_hll_union->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast(column2.get()); + ASSERT_EQ(column->get_data_at(i).to_string(), column_res->get_data_at(i).to_string()); + } + helper->serialize_deserialize_assert(agg_state_cols, {datatype_agg_state_hll_union}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// // serialize / deserialize +TEST_P(DataTypeAggStateTest, SerializeDeserializeTest2) { + MutableColumns agg_state_cols; + insert_data_agg_state(&agg_state_cols, datatype_agg_state_count, rows_value); + + auto* column = assert_cast(agg_state_cols[0].get()); + auto size = datatype_agg_state_count->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr buf = std::make_unique(size); + auto* result = datatype_agg_state_count->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_agg_state_count->create_column(); + datatype_agg_state_count->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast(column2.get()); + ASSERT_EQ(column->get_data_at(i).to_string(), column_res->get_data_at(i).to_string()); + } + helper->serialize_deserialize_assert(agg_state_cols, {datatype_agg_state_count}); + std::cout << "finish serialize deserialize test2" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeAggStateTest, ::testing::Values(0, 1, 31)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_array_test.cpp b/be/test/vec/data_types/data_type_array_test.cpp index d50ae0be26b9a08..1c0ee6f97efb4a3 100644 --- a/be/test/vec/data_types/data_type_array_test.cpp +++ b/be/test/vec/data_types/data_type_array_test.cpp @@ -363,59 +363,59 @@ TEST_F(DataTypeArrayTest, CreateColumnTest) { auto type = remove_nullable(array_types[i]); // any different nested type in arr with same default array ? Field default_field_array = Array(); - create_column_assert(type, default_field_array, 16); + create_column_assert(type, default_field_array, 51); // 17 * 3 } { auto type = remove_nullable(array_types[13]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 24); + create_column_assert(type, default_field_array, 59); // add addtional sizeof(8) } // for decimal32/64/128/256 here uncompressed size is 16 // one scalar type for (int i = 14; i < 18; i++) { auto type = remove_nullable(array_types[i]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 16); + create_column_assert(type, default_field_array, 51); } // for array-array-scala for (int i = 18; i < 31; i++) { auto type = remove_nullable(array_types[i]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 28); + create_column_assert(type, default_field_array, 85); // 17 * 5 } { // string type auto type = remove_nullable(array_types[31]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 36); + create_column_assert(type, default_field_array, 93); // add addtional sizeof(8) } for (int i = 32; i < 36; i++) { auto type = remove_nullable(array_types[i]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 28); + create_column_assert(type, default_field_array, 85); // 17 * 5 } // for array-map { auto type = remove_nullable(array_types[36]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 44); + create_column_assert(type, default_field_array, 127); // 17 * 7 + 8 add addtional sizeof(8) type = remove_nullable(array_types[39]); default_field_array = Array(); - create_column_assert(type, default_field_array, 44); + create_column_assert(type, default_field_array, 127); } { auto type = remove_nullable(array_types[37]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 36); + create_column_assert(type, default_field_array, 119); type = remove_nullable(array_types[38]); default_field_array = Array(); - create_column_assert(type, default_field_array, 36); + create_column_assert(type, default_field_array, 119); // 17 * 7 } // for array-struct { auto type = remove_nullable(array_types[40]); Field default_field_array = Array(); - create_column_assert(type, default_field_array, 76); + create_column_assert(type, default_field_array, 297); // 17 * 17 } } diff --git a/be/test/vec/data_types/data_type_bitmap_test.cpp b/be/test/vec/data_types/data_type_bitmap_test.cpp new file mode 100644 index 000000000000000..58291f06a79d41d --- /dev/null +++ b/be/test/vec/data_types/data_type_bitmap_test.cpp @@ -0,0 +1,218 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_bitmap.h" + +#include +#include +#include + +#include + +#include "agent/be_exec_version_manager.h" +#include "util/bitmap_value.h" +#include "vec/columns/column.h" +#include "vec/common/assert_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeBitMapTest : public ::testing::TestWithParam { +public: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique(); + } + std::unique_ptr helper; + DataTypePtr dt_bitmap = + DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_OBJECT, 0, 0); + int rows_value; +}; + +TEST_P(DataTypeBitMapTest, MetaInfoTest) { + TypeDescriptor bitmap_type_descriptor = {PrimitiveType::TYPE_OBJECT}; + auto col_meta = std::make_shared(); + col_meta->set_type(PGenericType_TypeId_BITMAP); + CommonDataTypeTest::DataTypeMetaInfo bitmap_meta_info_to_assert = { + .type_id = TypeIndex::BitMap, + .type_as_type_descriptor = &bitmap_type_descriptor, + .family_name = "BitMap", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_OBJECT, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = true, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = BitmapValue::empty_bitmap(), + }; + helper->meta_info_assert(dt_bitmap, bitmap_meta_info_to_assert); +} + +TEST_P(DataTypeBitMapTest, CreateColumnTest) { + Field default_field_bitmap = BitmapValue::empty_bitmap(); + helper->create_column_assert(dt_bitmap, default_field_bitmap, 17); +} + +void insert_data_bitmap(MutableColumns* bitmap_cols, DataTypePtr dt_bitmap, int rows_value, + std::vector* data_strs = nullptr) { + auto serde_bitmap = dt_bitmap->get_serde(1); + auto column_bitmap = dt_bitmap->create_column(); + + bitmap_cols->push_back(column_bitmap->get_ptr()); + DataTypeSerDeSPtrs serde = {dt_bitmap->get_serde()}; + auto& data = assert_cast((*bitmap_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + BitmapValue bitmap_value; + for (size_t j = 0; j <= i; ++j) { + bitmap_value.add(j); + } + if (data_strs) { + data_strs->push_back(bitmap_value.to_string()); + } + std::string memory_buffer(bitmap_value.getSizeInBytes(), '0'); + bitmap_value.write_to(memory_buffer.data()); + data.emplace_back(std::move(bitmap_value)); + } + std::cout << "finish insert data" << std::endl; +} + +// not support function: get_filed + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeBitMapTest, FromAndToStringTest) { + MutableColumns bitmap_cols; + std::vector data_strs; + insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value, &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + dt_bitmap->to_string_batch(*bitmap_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), bitmap_cols[0]->get_ptr()->size()); + // from_string assert col_to to assert_column and check same with mutableColumn + auto assert_column = dt_bitmap->create_column(); + for (int i = 0; i < col_to->size(); ++i) { + std::string s = col_to->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_bitmap->from_string(rb, assert_column.get())); + ASSERT_EQ(assert_column->operator[](i), bitmap_cols[0]->get_ptr()->operator[](i)) + << "i: " << i << " s: " << s << " datatype: " << dt_bitmap->get_name() + << " assert_column: " << assert_column->get_name() + << " mutableColumn:" << bitmap_cols[0]->get_ptr()->get_name() << std::endl; + } + std::cout << "finish to_string_batch | from_string test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(bitmap_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < bitmap_cols[0]->get_ptr()->size(); ++i) { + dt_bitmap->to_string(*bitmap_cols[0]->get_ptr(), i, buffer_writer); + std::string res = dt_bitmap->to_string(*bitmap_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(res, data_strs[i]); + } + // check ser_col to assert_column and check same with mutableColumn + auto assert_column_1 = dt_bitmap->create_column(); + for (int i = 0; i < ser_col->size(); ++i) { + std::string s = ser_col->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_bitmap->from_string(rb, assert_column_1.get())); + auto aaa = assert_column_1->operator[](i); + ASSERT_EQ(assert_column_1->operator[](i), bitmap_cols[0]->get_ptr()->operator[](i)); + } + std::cout << "finish to_string | from_string test" << std::endl; + } +} + +// serialize / deserialize +TEST_P(DataTypeBitMapTest, SerializeDeserializeTest) { + MutableColumns bitmap_cols; + insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value); + + auto* column = assert_cast(bitmap_cols[0].get()); + auto size = dt_bitmap->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr buf = std::make_unique(size); + auto* result = + dt_bitmap->serialize(*column, buf.get(), BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = dt_bitmap->create_column(); + dt_bitmap->deserialize(buf.get(), &column2, BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast(column2.get()); + ASSERT_EQ(column->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + helper->serialize_deserialize_assert(bitmap_cols, {dt_bitmap}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// serialize / deserialize as stream +TEST_P(DataTypeBitMapTest, SerializeDeserializeAsStreamTest) { + MutableColumns bitmap_cols; + insert_data_bitmap(&bitmap_cols, dt_bitmap, rows_value); + + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto* column_data = assert_cast(bitmap_cols[0].get()); + auto c = dt_bitmap->create_column(); + auto* column_res = assert_cast(c.get()); + column_res->resize(rows_value); + for (size_t i = 0; i != rows_value; ++i) { + doris::vectorized::DataTypeBitMap::serialize_as_stream(column_data->get_element(i), + buffer_writer); + buffer_writer.commit(); + BufferReadable buffer_readable(ser_col->get_data_at(i)); + doris::vectorized::DataTypeBitMap::deserialize_as_stream(column_res->get_element(i), + buffer_readable); + ASSERT_EQ(column_data->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + std::cout << "finish serialize deserialize as stream test" << std::endl; +} +// sh run-be-ut.sh --run --filter=Params/DataTypeBitMapTest.* +// need rows_value to cover bitmap all type: empty/single/set/bitmap +INSTANTIATE_TEST_SUITE_P(Params, DataTypeBitMapTest, ::testing::Values(0, 1, 31, 1024)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_fixed_length_object_test.cpp b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp new file mode 100644 index 000000000000000..09762819eb75de3 --- /dev/null +++ b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_fixed_length_object.h" + +#include +#include +#include + +#include + +#include "agent/be_exec_version_manager.h" +#include "util/bitmap_value.h" +#include "vec/columns/column.h" +#include "vec/columns/column_fixed_length_object.h" +#include "vec/common/assert_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// 4. serialize/serialize_as_stream/deserialize/deserialize_as_stream +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeFixedLengthObjectTest : public ::testing::TestWithParam { +public: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique(); + } + std::unique_ptr helper; + int rows_value; + DataTypePtr datatype_fixed_length = std::make_shared(); +}; + +TEST_P(DataTypeFixedLengthObjectTest, MetaInfoTest) { + TypeDescriptor bitmap_type_descriptor = {PrimitiveType::INVALID_TYPE}; + auto col_meta = std::make_shared(); + col_meta->set_type(PGenericType_TypeId_FIXEDLENGTHOBJECT); + CommonDataTypeTest::DataTypeMetaInfo bitmap_meta_info_to_assert = { + .type_id = TypeIndex::FixedLengthObject, + .type_as_type_descriptor = &bitmap_type_descriptor, + .family_name = "DataTypeFixedLengthObject", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_NONE, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = false, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = false, + .default_field = Field(String()), + }; + helper->meta_info_assert(datatype_fixed_length, bitmap_meta_info_to_assert); +} + +TEST_P(DataTypeFixedLengthObjectTest, CreateColumnTest) { + Field default_field = Field(String()); + std::cout << "create_column_assert: " << datatype_fixed_length->get_name() << std::endl; + auto column = (datatype_fixed_length)->create_column(); + ASSERT_EQ(column->size(), 0); + auto fixed_length_column = ColumnFixedLengthObject::create(8); + fixed_length_column->insert(default_field); + ASSERT_EQ(fixed_length_column->size(), 1); + auto default_const_col = ColumnFixedLengthObject::create(8); + auto data = fixed_length_column->get_data_at(0); + default_const_col->insert_data(data.data, data.size); + for (int i = 0; i < 1; ++i) { + ASSERT_EQ(fixed_length_column->operator[](i), default_const_col->operator[](i)); + } + // get_uncompressed_serialized_bytes + ASSERT_EQ(datatype_fixed_length->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()), + 17); +} + +void insert_data_fixed_length_data(MutableColumns* fixed_length_cols, + DataTypePtr datatype_fixed_length, int rows_value, + std::vector* data_strs = nullptr) { + auto serde_fixed_length = datatype_fixed_length->get_serde(1); + auto column_fixed = ColumnFixedLengthObject::create(sizeof(size_t)); + column_fixed->resize(rows_value); + fixed_length_cols->push_back(column_fixed->get_ptr()); + DataTypeSerDeSPtrs serde = {datatype_fixed_length->get_serde()}; + auto& data = assert_cast((*fixed_length_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + data[i] = i; + } + std::cout << "finish insert data" << std::endl; +} + +// not support function: get_filed to_string | to_string_batch | from_string + +// serialize / deserialize +TEST_P(DataTypeFixedLengthObjectTest, SerializeDeserializeTest) { + MutableColumns fixed_length_cols; + insert_data_fixed_length_data(&fixed_length_cols, datatype_fixed_length, rows_value); + + auto* column = assert_cast(fixed_length_cols[0].get()); + auto size = datatype_fixed_length->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr buf = std::make_unique(size); + auto* result = datatype_fixed_length->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_fixed_length->create_column(); + datatype_fixed_length->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast(column2.get()); + ASSERT_EQ(column->get_data()[i], column_res->get_data()[i]); + } + helper->serialize_deserialize_assert(fixed_length_cols, {datatype_fixed_length}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeFixedLengthObjectTest, ::testing::Values(0, 1, 31, 1024)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_hll_test.cpp b/be/test/vec/data_types/data_type_hll_test.cpp new file mode 100644 index 000000000000000..e16f60452175f69 --- /dev/null +++ b/be/test/vec/data_types/data_type_hll_test.cpp @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/data_type_hll.h" + +#include +#include +#include + +#include + +#include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" + +// this test is gonna to be a data type test template for all DataType which should make ut test to coverage the function defined +// for example DataTypeHLL should test this function: +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeHLLTest : public ::testing::TestWithParam { +protected: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique(); + } + +public: + std::unique_ptr helper; + int rows_value; + DataTypePtr dt_hll = + DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_HLL, 0, 0); +}; + +TEST_P(DataTypeHLLTest, MetaInfoTest) { + TypeDescriptor hll_type_descriptor = {PrimitiveType::TYPE_HLL}; + auto col_meta = std::make_shared(); + col_meta->set_type(PGenericType_TypeId_HLL); + CommonDataTypeTest::DataTypeMetaInfo hll_meta_info_to_assert = { + .type_id = TypeIndex::HLL, + .type_as_type_descriptor = &hll_type_descriptor, + .family_name = "HLL", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_HLL, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = true, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = HyperLogLog::empty(), + }; + helper->meta_info_assert(dt_hll, hll_meta_info_to_assert); +} + +TEST_P(DataTypeHLLTest, CreateColumnTest) { + Field default_field_hll = HyperLogLog::empty(); + helper->create_column_assert(dt_hll, default_field_hll, 17); +} + +void insert_data_hll(MutableColumns* hll_cols, DataTypePtr datetype_hll, int rows_value, + std::vector* data_strs = nullptr) { + auto serde_hll = datetype_hll->get_serde(1); + auto column_hll = datetype_hll->create_column(); + + hll_cols->push_back(column_hll->get_ptr()); + DataTypeSerDeSPtrs serde = {datetype_hll->get_serde()}; + auto& data = assert_cast((*hll_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + HyperLogLog hll_value; + for (size_t j = 0; j <= i; ++j) { + hll_value.update(j); + } + if (data_strs) { + data_strs->push_back(hll_value.to_string()); + } + std::string memory_buffer(hll_value.max_serialized_size(), '0'); + hll_value.serialize(reinterpret_cast(memory_buffer.data())); + data.emplace_back(std::move(hll_value)); + } + std::cout << "finish insert data" << std::endl; +} + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeHLLTest, FromAndToStringTest) { + MutableColumns hll_cols; + std::vector data_strs; + insert_data_hll(&hll_cols, dt_hll, rows_value, &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + dt_hll->to_string_batch(*hll_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), hll_cols[0]->get_ptr()->size()); + // from_string assert col_to to assert_column and check same with mutableColumn + auto assert_column = dt_hll->create_column(); + for (int i = 0; i < col_to->size(); ++i) { + std::string s = col_to->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_hll->from_string(rb, assert_column.get())); + ASSERT_EQ(assert_column->operator[](i), hll_cols[0]->get_ptr()->operator[](i)) + << "i: " << i << " s: " << s << " datatype: " << dt_hll->get_name() + << " assert_column: " << assert_column->get_name() + << " mutableColumn:" << hll_cols[0]->get_ptr()->get_name() << std::endl; + } + std::cout << "finish to_string_batch | from_string test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(hll_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < hll_cols[0]->get_ptr()->size(); ++i) { + dt_hll->to_string(*hll_cols[0]->get_ptr(), i, buffer_writer); + std::string res = dt_hll->to_string(*hll_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(res, "HLL()"); // HLL to_string is not implemented + } + // check ser_col to assert_column and check same with mutableColumn + auto assert_column_1 = dt_hll->create_column(); + for (int i = 0; i < ser_col->size(); ++i) { + std::string s = ser_col->get_data_at(i).to_string(); + ReadBuffer rb(s.data(), s.size()); + ASSERT_EQ(Status::OK(), dt_hll->from_string(rb, assert_column_1.get())); + auto aaa = assert_column_1->operator[](i); + ASSERT_EQ(assert_column_1->operator[](i), hll_cols[0]->get_ptr()->operator[](i)); + } + std::cout << "finish to_string | from_string test" << std::endl; + } +} + +// serialize / deserialize +TEST_P(DataTypeHLLTest, SerializeDeserializeTest) { + MutableColumns hll_cols; + insert_data_hll(&hll_cols, dt_hll, rows_value); + + auto* column = assert_cast(hll_cols[0].get()); + auto size = dt_hll->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr buf = std::make_unique(size); + auto* result = + dt_hll->serialize(*column, buf.get(), BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = dt_hll->create_column(); + dt_hll->deserialize(buf.get(), &column2, BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast(column2.get()); + ASSERT_EQ(column->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + helper->serialize_deserialize_assert(hll_cols, {dt_hll}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// serialize / deserialize as stream +TEST_P(DataTypeHLLTest, SerializeDeserializeAsStreamTest) { + MutableColumns hll_cols; + insert_data_hll(&hll_cols, dt_hll, rows_value); + + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto* column_data = assert_cast(hll_cols[0].get()); + auto c = dt_hll->create_column(); + auto* column_res = assert_cast(c.get()); + column_res->resize(rows_value); + for (size_t i = 0; i != rows_value; ++i) { + doris::vectorized::DataTypeHLL::serialize_as_stream(column_data->get_element(i), + buffer_writer); + buffer_writer.commit(); + BufferReadable buffer_readable(ser_col->get_data_at(i)); + doris::vectorized::DataTypeHLL::deserialize_as_stream(column_res->get_element(i), + buffer_readable); + ASSERT_EQ(column_data->get_data()[i].to_string(), column_res->get_data()[i].to_string()); + } + std::cout << "finish serialize deserialize as stream test" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeHLLTest, ::testing::Values(0, 1, 10, 100)); + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/data_types/data_type_ip_test.cpp b/be/test/vec/data_types/data_type_ip_test.cpp index c500c7cf2dd416a..d26806b7dcb434c 100644 --- a/be/test/vec/data_types/data_type_ip_test.cpp +++ b/be/test/vec/data_types/data_type_ip_test.cpp @@ -126,8 +126,8 @@ TEST_F(DataTypeIPTest, MetaInfoTest) { TEST_F(DataTypeIPTest, CreateColumnTest) { Field default_field_ipv4 = IPv4(0); Field default_field_ipv6 = IPv6(0); - create_column_assert(dt_ipv4, default_field_ipv4, 4); - create_column_assert(dt_ipv6, default_field_ipv6, 4); + create_column_assert(dt_ipv4, default_field_ipv4, 17); + create_column_assert(dt_ipv6, default_field_ipv6, 17); } TEST_F(DataTypeIPTest, GetFieldTest) { diff --git a/be/test/vec/data_types/data_type_quantile_state_test.cpp b/be/test/vec/data_types/data_type_quantile_state_test.cpp new file mode 100644 index 000000000000000..dcd8d58503c1783 --- /dev/null +++ b/be/test/vec/data_types/data_type_quantile_state_test.cpp @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include + +#include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/common_data_type_serder_test.h" +#include "vec/data_types/common_data_type_test.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_quantilestate.h" + +// this test is gonna to be a data type test template for all DataType which should make ut test to coverage the function defined +// for example DataTypeQuantileState should test this function: +// 1. datatype meta info: +// get_type_id, get_type_as_type_descriptor, get_storage_field_type, have_subtypes, get_pdata_type (const IDataType *data_type), to_pb_column_meta (PColumnMeta *col_meta) +// get_family_name, get_is_parametric, should_align_right_in_pretty_formats +// text_can_contain_only_valid_utf8 +// have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory +// get_precision, get_scale +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region +// 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) +// 3. serde related: get_serde (int nesting_level=1) +// to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to) +// serialize (const IColumn &column, char *buf, int be_exec_version), deserialize (const char *buf, MutableColumnPtr *column, int be_exec_version) + +namespace doris::vectorized { + +class DataTypeQuantileStateTest : public ::testing::TestWithParam { +protected: + void SetUp() override { + rows_value = GetParam(); + helper = std::make_unique(); + } + +public: + std::unique_ptr helper; + int rows_value; + DataTypePtr datatype_quantile_state = DataTypeFactory::instance().create_data_type( + FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE, 0, 0); +}; + +TEST_P(DataTypeQuantileStateTest, MetaInfoTest) { + TypeDescriptor quantile_state_type_descriptor = {PrimitiveType::TYPE_QUANTILE_STATE}; + auto col_meta = std::make_shared(); + col_meta->set_type(PGenericType_TypeId_QUANTILE_STATE); + CommonDataTypeTest::DataTypeMetaInfo quantile_state_meta_info_to_assert = { + .type_id = TypeIndex::QuantileState, + .type_as_type_descriptor = &quantile_state_type_descriptor, + .family_name = "QuantileState", + .has_subtypes = false, + .storage_field_type = doris::FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE, + .should_align_right_in_pretty_formats = false, + .text_can_contain_only_valid_utf8 = true, + .have_maximum_size_of_value = false, + .size_of_value_in_memory = size_t(-1), + .precision = size_t(-1), + .scale = size_t(-1), + .is_null_literal = false, + .is_value_represented_by_number = false, + .pColumnMeta = col_meta.get(), + .is_value_unambiguously_represented_in_contiguous_memory_region = true, + .default_field = QuantileState(), + }; + helper->meta_info_assert(datatype_quantile_state, quantile_state_meta_info_to_assert); +} + +TEST_P(DataTypeQuantileStateTest, CreateColumnTest) { + Field default_field_quantile_state = QuantileState(); + helper->create_column_assert(datatype_quantile_state, default_field_quantile_state, 17); +} + +void insert_data_quantile_state(MutableColumns* quantile_state_cols, + DataTypePtr datetype_quantile_state, int rows_value, + std::vector* data_strs = nullptr) { + auto serde_quantile_state = datetype_quantile_state->get_serde(1); + auto column_quantile_state = datetype_quantile_state->create_column(); + + quantile_state_cols->push_back(column_quantile_state->get_ptr()); + DataTypeSerDeSPtrs serde = {datetype_quantile_state->get_serde()}; + auto& data = assert_cast((*quantile_state_cols)[0].get())->get_data(); + for (size_t i = 0; i != rows_value; ++i) { + QuantileState quantile_state_value; + for (size_t j = 0; j <= i; ++j) { + quantile_state_value.add_value(j); + } + std::string memory_buffer(quantile_state_value.get_serialized_size(), '0'); + quantile_state_value.serialize(reinterpret_cast(memory_buffer.data())); + data.emplace_back(std::move(quantile_state_value)); + } + std::cout << "finish insert data" << std::endl; +} + +// test to_string | to_string_batch | from_string +TEST_P(DataTypeQuantileStateTest, FromAndToStringTest) { + MutableColumns quantile_state_cols; + std::vector data_strs; + insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state, rows_value, + &data_strs); + + { + // to_string_batch | from_string + auto col_to = ColumnString::create(); + datatype_quantile_state->to_string_batch(*quantile_state_cols[0]->get_ptr(), *col_to); + ASSERT_EQ(col_to->size(), quantile_state_cols[0]->get_ptr()->size()); + std::cout << "finish to_string_batch | from_string not support test" << std::endl; + } + + { + // to_string | from_string + auto ser_col = ColumnString::create(); + ser_col->reserve(quantile_state_cols[0]->get_ptr()->size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + for (int i = 0; i < quantile_state_cols[0]->get_ptr()->size(); ++i) { + datatype_quantile_state->to_string(*quantile_state_cols[0]->get_ptr(), i, + buffer_writer); + std::string res = + datatype_quantile_state->to_string(*quantile_state_cols[0]->get_ptr(), i); + buffer_writer.commit(); + EXPECT_EQ(res, "QuantileState()"); // QuantileState to_string is not implemented + } + std::cout << "finish to_string | from_string not support test" << std::endl; + } +} + +// serialize / deserialize +TEST_P(DataTypeQuantileStateTest, SerializeDeserializeTest) { + MutableColumns quantile_state_cols; + insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state, rows_value); + + auto* column = assert_cast(quantile_state_cols[0].get()); + auto size = datatype_quantile_state->get_uncompressed_serialized_bytes( + *column, BeExecVersionManager::get_newest_version()); + std::unique_ptr buf = std::make_unique(size); + auto* result = datatype_quantile_state->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = datatype_quantile_state->create_column(); + datatype_quantile_state->deserialize(buf.get(), &column2, + BeExecVersionManager::get_newest_version()); + for (size_t i = 0; i != rows_value; ++i) { + auto* column_res = assert_cast(column2.get()); + ASSERT_EQ(column->get_data()[i].get_serialized_size(), + column_res->get_data()[i].get_serialized_size()); + } + helper->serialize_deserialize_assert(quantile_state_cols, {datatype_quantile_state}); + std::cout << "finish serialize deserialize test" << std::endl; +} + +// serialize / deserialize as stream +TEST_P(DataTypeQuantileStateTest, SerializeDeserializeAsStreamTest) { + MutableColumns quantile_state_cols; + insert_data_quantile_state(&quantile_state_cols, datatype_quantile_state, rows_value); + + auto ser_col = ColumnString::create(); + VectorBufferWriter buffer_writer(*ser_col.get()); + auto* column_data = assert_cast(quantile_state_cols[0].get()); + auto c = datatype_quantile_state->create_column(); + auto* column_res = assert_cast(c.get()); + column_res->resize(rows_value); + for (size_t i = 0; i != rows_value; ++i) { + doris::vectorized::DataTypeQuantileState::serialize_as_stream(column_data->get_element(i), + buffer_writer); + buffer_writer.commit(); + BufferReadable buffer_readable(ser_col->get_data_at(i)); + doris::vectorized::DataTypeQuantileState::deserialize_as_stream(column_res->get_element(i), + buffer_readable); + ASSERT_EQ(column_data->get_data()[i].get_serialized_size(), + column_res->get_data()[i].get_serialized_size()); + } + std::cout << "finish serialize deserialize as stream test" << std::endl; +} + +INSTANTIATE_TEST_SUITE_P(Params, DataTypeQuantileStateTest, ::testing::Values(0, 1, 100, 1000)); +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/runtime/sort_merger_test.cpp b/be/test/vec/runtime/sort_merger_test.cpp new file mode 100644 index 000000000000000..a36b436deecef22 --- /dev/null +++ b/be/test/vec/runtime/sort_merger_test.cpp @@ -0,0 +1,252 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "testutil/column_helper.h" +#include "testutil/mock/mock_slot_ref.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/runtime/vsorted_run_merger.h" + +namespace doris::vectorized { + +class SortMergerTest : public testing::Test { +public: + SortMergerTest() = default; + ~SortMergerTest() override = default; + void SetUp() override {} + void TearDown() override {} +}; + +TEST(SortMergerTest, NULL_FIRST_ASC) { + /** + * in: [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4] + * out: [NULL, NULL, NULL, NULL, NULL], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4], [4], [4], [4], [4] + */ + const int num_children = 5; + const int batch_size = 5; + std::vector round; + round.resize(num_children, 0); + const int num_round = 2; + + std::unique_ptr merger; + auto profile = std::make_shared(""); + auto ordering_expr = MockSlotRef::create_mock_contexts( + std::make_shared(std::make_shared())); + { + std::vector is_asc_order = {true}; + std::vector nulls_first = {true}; + const int limit = -1; + const int offset = 0; + merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size, + limit, offset, profile.get())); + } + { + std::vector child_block_suppliers; + for (int child_idx = 0; child_idx < num_children; child_idx++) { + vectorized::BlockSupplier block_supplier = + [&, round_vec = &round, num_round = num_round, id = child_idx]( + vectorized::Block* block, bool* eos) { + *eos = ++((*round_vec)[id]) == num_round; + if (*eos) { + return Status::OK(); + } + *block = ColumnHelper::create_nullable_block( + {0, (*round_vec)[id] + 0, (*round_vec)[id] + 1, + (*round_vec)[id] + 2, (*round_vec)[id] + 3}, + {1, 0, 0, 0, 0}); + + return Status::OK(); + }; + child_block_suppliers.push_back(block_supplier); + } + EXPECT_TRUE(merger->prepare(child_block_suppliers).ok()); + } + { + for (int block_idx = 0; block_idx < num_children * (num_round - 1) - 1; block_idx++) { + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + auto expect_block = + block_idx == 0 + ? ColumnHelper::create_nullable_column({0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1}) + : ColumnHelper::create_nullable_column( + {block_idx, block_idx, block_idx, block_idx, block_idx}, + {0, 0, 0, 0, 0}); + EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block)) + << block_idx; + EXPECT_EQ(block.rows(), batch_size); + EXPECT_FALSE(eos); + } + for (int block_idx = 0; block_idx < num_children; block_idx++) { + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + auto expect_block = ColumnHelper::create_nullable_column({4}, {0}); + EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block)) + << ((ColumnInt64*)((ColumnNullable*)block.get_by_position(0).column.get()) + ->get_nested_column_ptr() + .get()) + ->get_data()[0]; + EXPECT_EQ(block.rows(), 1); + EXPECT_FALSE(eos); + } + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + EXPECT_EQ(block.rows(), 0); + EXPECT_TRUE(eos); + } +} + +TEST(SortMergerTest, NULL_LAST_DESC) { + /** + * in: [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL] + * out: [4, 4, 4, 4, 4], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1], [NULL], [NULL], [NULL], [NULL], [NULL] + */ + const int num_children = 5; + const int batch_size = 5; + std::vector round; + round.resize(num_children, 0); + const int num_round = 2; + + std::unique_ptr merger; + auto profile = std::make_shared(""); + auto ordering_expr = MockSlotRef::create_mock_contexts( + std::make_shared(std::make_shared())); + { + std::vector is_asc_order = {false}; + std::vector nulls_first = {false}; + const int limit = -1; + const int offset = 0; + merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size, + limit, offset, profile.get())); + } + { + std::vector child_block_suppliers; + for (int child_idx = 0; child_idx < num_children; child_idx++) { + vectorized::BlockSupplier block_supplier = + [&, round_vec = &round, num_round = num_round, id = child_idx]( + vectorized::Block* block, bool* eos) { + *eos = ++((*round_vec)[id]) == num_round; + if (*eos) { + return Status::OK(); + } + *block = ColumnHelper::create_nullable_block( + {(*round_vec)[id] + 3, (*round_vec)[id] + 2, (*round_vec)[id] + 1, + (*round_vec)[id] + 0, 0}, + {0, 0, 0, 0, 1}); + + return Status::OK(); + }; + child_block_suppliers.push_back(block_supplier); + } + EXPECT_TRUE(merger->prepare(child_block_suppliers).ok()); + } + { + for (int block_idx = 0; block_idx < num_children * (num_round - 1) - 1; block_idx++) { + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + auto expect_block = ColumnHelper::create_nullable_column( + {4 - block_idx, 4 - block_idx, 4 - block_idx, 4 - block_idx, 4 - block_idx}, + {0, 0, 0, 0, 0}); + EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block)) + << block_idx; + EXPECT_EQ(block.rows(), batch_size); + EXPECT_FALSE(eos); + } + for (int block_idx = 0; block_idx < num_children; block_idx++) { + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + auto expect_block = ColumnHelper::create_nullable_column({0}, {1}); + EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block)) + << ((ColumnInt64*)((ColumnNullable*)block.get_by_position(0).column.get()) + ->get_nested_column_ptr() + .get()) + ->get_data()[0]; + EXPECT_EQ(block.rows(), 1); + EXPECT_FALSE(eos); + } + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + EXPECT_EQ(block.rows(), 0); + EXPECT_TRUE(eos); + } +} + +TEST(SortMergerTest, TEST_LIMIT) { + /** + * in: [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4] + * offset = 20, limit = 1 + * out: [4] + */ + const int num_children = 5; + const int batch_size = 5; + std::vector round; + round.resize(num_children, 0); + const int num_round = 2; + + std::unique_ptr merger; + auto profile = std::make_shared(""); + auto ordering_expr = MockSlotRef::create_mock_contexts( + std::make_shared(std::make_shared())); + { + std::vector is_asc_order = {true}; + std::vector nulls_first = {true}; + const int limit = 1; + const int offset = 20; + merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size, + limit, offset, profile.get())); + } + { + std::vector child_block_suppliers; + for (int child_idx = 0; child_idx < num_children; child_idx++) { + vectorized::BlockSupplier block_supplier = + [&, round_vec = &round, num_round = num_round, id = child_idx]( + vectorized::Block* block, bool* eos) { + *eos = ++((*round_vec)[id]) == num_round; + if (*eos) { + return Status::OK(); + } + *block = ColumnHelper::create_nullable_block( + {0, (*round_vec)[id] + 0, (*round_vec)[id] + 1, + (*round_vec)[id] + 2, (*round_vec)[id] + 3}, + {1, 0, 0, 0, 0}); + + return Status::OK(); + }; + child_block_suppliers.push_back(block_supplier); + } + EXPECT_TRUE(merger->prepare(child_block_suppliers).ok()); + } + { + vectorized::Block block; + bool eos = false; + EXPECT_TRUE(merger->get_next(&block, &eos).ok()); + auto expect_block = ColumnHelper::create_nullable_column({4}, {0}); + EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block)); + EXPECT_EQ(block.rows(), 1); + EXPECT_TRUE(eos); + } +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java index 7f7c9511e8167a0..fc74cadebfce041 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lag.java @@ -21,7 +21,9 @@ import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -39,9 +41,11 @@ public class Lag extends WindowFunction implements TernaryExpression, Explicitly static { List signatures = Lists.newArrayList(); - trivialTypes.forEach(t -> - signatures.add(FunctionSignature.ret(t).args(t, BigIntType.INSTANCE, t)) - ); + trivialTypes.forEach(t -> { + signatures.add(FunctionSignature.ret(t).args(t, BigIntType.INSTANCE, t)); + signatures.add(FunctionSignature.ret(t).args(t, BigIntType.INSTANCE)); + signatures.add(FunctionSignature.ret(t).args(t)); + }); SIGNATURES = ImmutableList.copyOf(signatures); } @@ -51,21 +55,21 @@ public Lag(Expression child, Expression offset, Expression defaultValue) { super("lag", child, offset, defaultValue); } - private Lag(List children) { - super("lag", children); + public Lag(Expression child, Expression offset) { + this(child, offset, new NullLiteral(child.getDataType())); + } + + public Lag(Expression child) { + this(child, new BigIntLiteral(1L), new NullLiteral(child.getDataType())); } public Expression getOffset() { - if (children().size() <= 1) { - throw new AnalysisException("Not set offset of Lead(): " + this.toSql()); - } + Preconditions.checkArgument(children.size() == 3); return child(1); } public Expression getDefaultValue() { - if (children.size() <= 2) { - throw new AnalysisException("Not set default value of Lead(): " + this.toSql()); - } + Preconditions.checkArgument(children.size() == 3); return child(2); } @@ -80,7 +84,13 @@ public boolean nullable() { @Override public Lag withChildren(List children) { Preconditions.checkArgument(children.size() >= 1 && children.size() <= 3); - return new Lag(children); + if (children.size() == 1) { + return new Lag(children.get(0)); + } else if (children.size() == 2) { + return new Lag(children.get(0), children.get(1)); + } else { + return new Lag(children.get(0), children.get(1), children.get(2)); + } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java index ec6a4b7b85c2003..251141a68cb2220 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/Lead.java @@ -21,7 +21,9 @@ import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -41,9 +43,11 @@ public class Lead extends WindowFunction implements TernaryExpression, Explicitl static { List signatures = Lists.newArrayList(); - trivialTypes.forEach(t -> - signatures.add(FunctionSignature.ret(t).args(t, BigIntType.INSTANCE, t)) - ); + trivialTypes.forEach(t -> { + signatures.add(FunctionSignature.ret(t).args(t, BigIntType.INSTANCE, t)); + signatures.add(FunctionSignature.ret(t).args(t, BigIntType.INSTANCE)); + signatures.add(FunctionSignature.ret(t).args(t)); + }); SIGNATURES = ImmutableList.copyOf(signatures); } @@ -53,21 +57,21 @@ public Lead(Expression child, Expression offset, Expression defaultValue) { super("lead", child, offset, defaultValue); } - private Lead(List children) { - super("lead", children); + public Lead(Expression child, Expression offset) { + this(child, offset, new NullLiteral(child.getDataType())); + } + + public Lead(Expression child) { + this(child, new BigIntLiteral(1L), new NullLiteral(child.getDataType())); } public Expression getOffset() { - if (children().size() <= 1) { - throw new AnalysisException("Not set offset of Lead(): " + this.toSql()); - } + Preconditions.checkArgument(children.size() == 3); return child(1); } public Expression getDefaultValue() { - if (children.size() <= 2) { - throw new AnalysisException("Not set default value of Lead(): " + this.toSql()); - } + Preconditions.checkArgument(children.size() == 3); return child(2); } @@ -114,7 +118,13 @@ public List getSignatures() { @Override public Lead withChildren(List children) { Preconditions.checkArgument(children.size() >= 1 && children.size() <= 3); - return new Lead(children); + if (children.size() == 1) { + return new Lead(children.get(0)); + } else if (children.size() == 2) { + return new Lead(children.get(0), children.get(1)); + } else { + return new Lead(children.get(0), children.get(1), children.get(2)); + } } @Override diff --git a/regression-test/data/correctness_p0/test_lag_lead_window.out b/regression-test/data/correctness_p0/test_lag_lead_window.out index 0f1a9112dd749ae..041314a1c6535df 100644 --- a/regression-test/data/correctness_p0/test_lag_lead_window.out +++ b/regression-test/data/correctness_p0/test_lag_lead_window.out @@ -19,3 +19,33 @@ c 2022-09-06T00:00:02 2022-09-06T00:00:01 b 2022-09-06T00:00:01 2022-09-06T00:00 a 2022-09-06T00:00 2022-08-30T00:00 +-- !select_lag_1 -- +a 2022-09-06T00:00 \N +b 2022-09-06T00:00:01 \N +c 2022-09-06T00:00:02 \N + +-- !select_lag_2 -- +a 2022-09-06T00:00 \N +b 2022-09-06T00:00:01 \N +c 2022-09-06T00:00:02 \N + +-- !select_lag_3 -- +a 2022-09-06T00:00 \N +b 2022-09-06T00:00:01 \N +c 2022-09-06T00:00:02 \N + +-- !select_lag_4 -- +a 2022-09-06T00:00 \N +b 2022-09-06T00:00:01 \N +c 2022-09-06T00:00:02 \N + +-- !select_lag_5 -- +a 2022-09-06T00:00 \N +b 2022-09-06T00:00:01 \N +c 2022-09-06T00:00:02 \N + +-- !select_lag_6 -- +a 2022-09-06T00:00 \N +b 2022-09-06T00:00:01 \N +c 2022-09-06T00:00:02 \N + diff --git a/regression-test/data/nereids_p0/sql_functions/window_functions/test_window_function.out b/regression-test/data/nereids_p0/sql_functions/window_functions/test_window_function.out index 5cfe5a4280b628f..c68c93691d3ce37 100644 --- a/regression-test/data/nereids_p0/sql_functions/window_functions/test_window_function.out +++ b/regression-test/data/nereids_p0/sql_functions/window_functions/test_window_function.out @@ -432,6 +432,78 @@ USA Pete Hello 9223372036854775807 1 9223372036854775807 2 +-- !lag_1 -- +\N \N \N +-9223372036854775807 false \N +-9223372036854775807 true false +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false \N +11011902 true false +11011902 true true +11011903 false \N +11011905 false \N +11011920 true \N +11011920 true true +9223372036854775807 false \N +9223372036854775807 false false + +-- !lag_2 -- +\N \N \N +-9223372036854775807 false \N +-9223372036854775807 true false +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false \N +11011902 true false +11011902 true true +11011903 false \N +11011905 false \N +11011920 true \N +11011920 true true +9223372036854775807 false \N +9223372036854775807 false false + +-- !lead_1 -- +\N \N \N +-9223372036854775807 false true +-9223372036854775807 true \N +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false true +11011902 true true +11011902 true \N +11011903 false \N +11011905 false \N +11011920 true true +11011920 true \N +9223372036854775807 false false +9223372036854775807 false \N + +-- !lead_2 -- +\N \N \N +-9223372036854775807 false true +-9223372036854775807 true \N +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false true +11011902 true true +11011902 true \N +11011903 false \N +11011905 false \N +11011920 true true +11011920 true \N +9223372036854775807 false false +9223372036854775807 false \N + -- !window_error1 -- \N \N -9223372036854775807 true diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out b/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out index 5cfe5a4280b628f..c68c93691d3ce37 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out @@ -432,6 +432,78 @@ USA Pete Hello 9223372036854775807 1 9223372036854775807 2 +-- !lag_1 -- +\N \N \N +-9223372036854775807 false \N +-9223372036854775807 true false +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false \N +11011902 true false +11011902 true true +11011903 false \N +11011905 false \N +11011920 true \N +11011920 true true +9223372036854775807 false \N +9223372036854775807 false false + +-- !lag_2 -- +\N \N \N +-9223372036854775807 false \N +-9223372036854775807 true false +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false \N +11011902 true false +11011902 true true +11011903 false \N +11011905 false \N +11011920 true \N +11011920 true true +9223372036854775807 false \N +9223372036854775807 false false + +-- !lead_1 -- +\N \N \N +-9223372036854775807 false true +-9223372036854775807 true \N +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false true +11011902 true true +11011902 true \N +11011903 false \N +11011905 false \N +11011920 true true +11011920 true \N +9223372036854775807 false false +9223372036854775807 false \N + +-- !lead_2 -- +\N \N \N +-9223372036854775807 false true +-9223372036854775807 true \N +-11011907 false \N +-11011903 true \N +123456 true \N +7210457 false \N +11011902 false true +11011902 true true +11011902 true \N +11011903 false \N +11011905 false \N +11011920 true true +11011920 true \N +9223372036854775807 false false +9223372036854775807 false \N + -- !window_error1 -- \N \N -9223372036854775807 true diff --git a/regression-test/suites/correctness_p0/test_lag_lead_window.groovy b/regression-test/suites/correctness_p0/test_lag_lead_window.groovy index cf76d541e6e2878..1dfccca58ee6f94 100644 --- a/regression-test/suites/correctness_p0/test_lag_lead_window.groovy +++ b/regression-test/suites/correctness_p0/test_lag_lead_window.groovy @@ -51,5 +51,14 @@ suite("test_lag_lead_window") { qt_select_default """ select id, create_time, lead(create_time, 1, '2022-09-06 00:00:00') over (order by create_time desc) as "prev_time" from test1; """ qt_select_default """ select id, create_time, lead(create_time, 1, date_sub('2022-09-06 00:00:00', interval 7 day)) over (order by create_time desc) as "prev_time" from test1; """ + + qt_select_lag_1 """ select id, create_time, lag(create_time) over(partition by id) as "prev_time" from test1 order by 1 ,2 ; """ + qt_select_lag_2 """ select id, create_time, lag(create_time,1) over(partition by id) as "prev_time" from test1 order by 1 ,2 ; """ + qt_select_lag_3 """ select id, create_time, lag(create_time,1,NULL) over(partition by id) as "prev_time" from test1 order by 1 ,2 ; """ + qt_select_lag_4 """ select id, create_time, lead(create_time) over(partition by id) as "prev_time" from test1 order by 1 ,2 ; """ + qt_select_lag_5 """ select id, create_time, lead(create_time,1) over(partition by id) as "prev_time" from test1 order by 1 ,2 ; """ + qt_select_lag_6 """ select id, create_time, lead(create_time,1,NULL) over(partition by id) as "prev_time" from test1 order by 1 ,2 ; """ + sql """ DROP TABLE IF EXISTS test1 """ + } diff --git a/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_serde_prop.groovy index b996d94f95d6df7..af13461c6257bd8 100644 --- a/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_serde_prop.groovy +++ b/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_serde_prop.groovy @@ -19,6 +19,7 @@ suite("test_trino_hive_serde_prop", "external_docker,hive,external_docker_hive,p String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled != null && enabled.equalsIgnoreCase("true")) { + sql """ set nereids_timeout_second=60; """ def host_ips = new ArrayList() String[][] backends = sql """ show backends """ for (def b in backends) { diff --git a/regression-test/suites/nereids_p0/sql_functions/window_functions/test_window_function.groovy b/regression-test/suites/nereids_p0/sql_functions/window_functions/test_window_function.groovy index 8ec7518255d88b8..100a3142b0f946b 100644 --- a/regression-test/suites/nereids_p0/sql_functions/window_functions/test_window_function.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/window_functions/test_window_function.groovy @@ -366,31 +366,24 @@ suite("test_window_function") { rows between unbounded preceding and current row) as wj from baseall order by ${k1}, wj""" - // test error - test { - sql("select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lag(${k2}) over (partition by ${k1} order by ${k3}) from baseall") - exception "" - } + qt_lag_1 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1},${k2}, lag(${k2}) over (partition by ${k1} order by ${k3},${k2}) from baseall order by 1,2;" + test { sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lag(${k2}, -1, 1) over (partition by ${k1} order by ${k3}) from baseall" exception "" } - test { - sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lag(${k2}, 1) over (partition by ${k1} order by ${k3}) from baseall" - exception "" - } - test { - sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lead(${k2}) over (partition by ${k1} order by ${k3}) from baseall" - exception "" - } + + qt_lag_2 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2}, lag(${k2}, 1) over (partition by ${k1} order by ${k3},${k2}) from baseall order by 1,2;" + + qt_lead_1 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2}, lead(${k2}) over (partition by ${k1} order by ${k3},${k2}) from baseall order by 1,2;" + test { sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lead(${k2}, -1, 1) over (partition by ${k1} order by ${k3}) from baseall" exception "" } - test { - sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lead(${k2}, 1) over (partition by ${k1} order by ${k3}) from baseall" - exception "" - } + + qt_lead_2 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2}, lead(${k2}, 1) over (partition by ${k1} order by ${k3},${k2}) from baseall order by 1,2;" + qt_window_error1"""select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, first_value(${k2}) over (partition by ${k1}) from baseall order by ${k1}""" qt_window_error2"""select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, first_value(${k2}) over (order by ${k3}) from baseall""" qt_window_error3"""select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, max(${k2}) over (order by ${k3}) from baseall""" diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy index c27a6bdbbdd3e7f..45dc7b0fdba8b06 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy @@ -364,31 +364,25 @@ suite("test_window_function") { rows between unbounded preceding and current row) as wj from baseall order by ${k1}, wj""" - // test error - test { - sql("select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lag(${k2}) over (partition by ${k1} order by ${k3}) from baseall") - exception "" - } + qt_lag_1 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2}, lag(${k2}) over (partition by ${k1} order by ${k3},${k2}) from baseall order by 1,2;" + test { sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lag(${k2}, -1, 1) over (partition by ${k1} order by ${k3}) from baseall" exception "" } - test { - sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lag(${k2}, 1) over (partition by ${k1} order by ${k3}) from baseall" - exception "" - } - test { - sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lead(${k2}) over (partition by ${k1} order by ${k3}) from baseall" - exception "" - } + + qt_lag_2 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2},lag(${k2}, 1) over (partition by ${k1} order by ${k3}, ${k2}) from baseall order by 1,2;" + + + qt_lead_1 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2}, lead(${k2}) over (partition by ${k1} order by ${k3}, ${k2}) from baseall order by 1,2;" + test { sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lead(${k2}, -1, 1) over (partition by ${k1} order by ${k3}) from baseall" exception "" } - test { - sql"select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, lead(${k2}, 1) over (partition by ${k1} order by ${k3}) from baseall" - exception "" - } + + qt_lead_2 "select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, ${k2}, lead(${k2}, 1) over (partition by ${k1} order by ${k3},${k2}) from baseall order by 1,2;" + qt_window_error1"""select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, first_value(${k2}) over (partition by ${k1}) from baseall order by ${k1}""" qt_window_error2"""select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, first_value(${k2}) over (order by ${k3}) from baseall""" qt_window_error3"""select /*+SET_VAR(parallel_pipeline_task_num=1) */ ${k1}, max(${k2}) over (order by ${k3}) from baseall"""