From 3b1db376bc2e21bc7799c6b1011cd56aa8dd60cf Mon Sep 17 00:00:00 2001 From: Mryange Date: Fri, 26 Jun 2026 11:13:36 +0800 Subject: [PATCH] [refactor](be) Use ColumnArrayView in array functions ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: Refactor array_contains, array_position, countequal, array_remove, and arrays_overlap to read array arguments through ColumnArrayView and ColumnView instead of expanding const columns or manually unpacking nullable array internals. This keeps const and nullable handling in the shared view abstraction and reduces duplicated array access code. Add BE unit coverage for const array and const target argument cases. ### Release note None ### Check List (For Author) - Test: Not run (per request); only ran clang-format-16 and git diff --check. - Behavior changed: No - Does this need documentation: No --- be/src/core/column/column_array_view.h | 2 + .../function/array/function_array_index.h | 183 ++--------- .../function/array/function_array_remove.h | 310 ++---------------- .../function/array/function_arrays_overlap.h | 141 ++++---- .../function/function_array_index_test.cpp | 25 ++ .../function/function_array_remove_test.cpp | 115 +++++++ .../function/function_arrays_overlap_test.cpp | 22 ++ 7 files changed, 285 insertions(+), 513 deletions(-) create mode 100644 be/test/exprs/function/function_array_remove_test.cpp diff --git a/be/src/core/column/column_array_view.h b/be/src/core/column/column_array_view.h index cc74d6e3c7088d..841a339a5abfca 100644 --- a/be/src/core/column/column_array_view.h +++ b/be/src/core/column/column_array_view.h @@ -120,6 +120,8 @@ struct ColumnArrayView { return false; } + bool is_nullable() const { return outer_null_map != nullptr; } + // Index-based access: uses offsets[actual - 1] (PaddedPODArray sentinel guarantees [-1] is valid) ArrayDataView operator[](size_t idx) const { size_t actual = is_const ? 0 : idx; diff --git a/be/src/exprs/function/array/function_array_index.h b/be/src/exprs/function/array/function_array_index.h index 742bb944494510..0984e803792859 100644 --- a/be/src/exprs/function/array/function_array_index.h +++ b/be/src/exprs/function/array/function_array_index.h @@ -32,6 +32,7 @@ #include "core/call_on_type_index.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h" @@ -218,121 +219,40 @@ class FunctionArrayIndex : public IFunction { } private: - ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, - const IColumn& nested_column, const IColumn& right_column, - const UInt8* right_nested_null_map, - const UInt8* outer_null_map) const { - // check array nested column type and get data - const auto& str_offs = reinterpret_cast(nested_column).get_offsets(); - const auto& str_chars = reinterpret_cast(nested_column).get_chars(); - - // check right column type and get data - const auto& right_offs = reinterpret_cast(right_column).get_offsets(); - const auto& right_chars = reinterpret_cast(right_column).get_chars(); - - // prepare return data - auto dst = PrimitiveTypeTraits::ColumnType::create(offsets.size(), 0); - auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(offsets.size(), 0); - auto& dst_null_data = dst_null_column->get_data(); - - // process - for (size_t row = 0; row < offsets.size(); ++row) { - if (outer_null_map && outer_null_map[row]) { - dst_null_data[row] = true; - continue; - } - dst_null_data[row] = false; - typename PrimitiveTypeTraits::CppType res = 0; - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - - size_t right_off = right_offs[row - 1]; - size_t right_len = right_offs[row] - right_off; - for (size_t pos = 0; pos < len; ++pos) { - // match null value - if (right_nested_null_map && right_nested_null_map[row] && nested_null_map && - nested_null_map[pos + off]) { - ConcreteAction::apply(res, pos); - if constexpr (!ConcreteAction::resume_execution) { - break; - } - } - // some is null while another is not - if (right_nested_null_map && nested_null_map && - right_nested_null_map[row] != nested_null_map[pos + off]) { - continue; - } - if (nested_null_map && nested_null_map[pos + off]) { - continue; - } - size_t str_pos = str_offs[pos + off - 1]; - size_t str_len = str_offs[pos + off] - str_pos; - const char* left_raw_v = reinterpret_cast(&str_chars[str_pos]); - const char* right_raw_v = reinterpret_cast(&right_chars[right_off]); - // StringRef operator == using vec impl - if (StringRef(left_raw_v, str_len) == StringRef(right_raw_v, right_len)) { - ConcreteAction::apply(res, pos); - if constexpr (!ConcreteAction::resume_execution) { - break; - } - } - } - dst_data[row] = res; - } - - if (outer_null_map == nullptr) { - return dst; - } - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } - - template - ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, - const IColumn& nested_column, const IColumn& right_column, - const UInt8* right_nested_null_map, - const UInt8* outer_null_map) const { - // check array nested column type and get data - const auto& nested_data = - reinterpret_cast(nested_column).get_data(); - - // check right column type and get data - const auto& right_data = reinterpret_cast(right_column).get_data(); - + template + ColumnPtr _execute_view(const ColumnArrayView& array_view, + const ColumnView& right_view) const { // prepare return data - auto dst = PrimitiveTypeTraits::ColumnType::create(offsets.size(), 0); + auto dst = PrimitiveTypeTraits::ColumnType::create(array_view.size(), 0); auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(offsets.size(), 0); + auto dst_null_column = ColumnUInt8::create(array_view.size(), 0); auto& dst_null_data = dst_null_column->get_data(); // process - for (size_t row = 0; row < offsets.size(); ++row) { - if (outer_null_map && outer_null_map[row]) { + for (size_t row = 0; row < array_view.size(); ++row) { + if (array_view.is_null_at(row)) { dst_null_data[row] = true; continue; } dst_null_data[row] = false; typename PrimitiveTypeTraits::CppType res = 0; - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - for (size_t pos = 0; pos < len; ++pos) { + const auto array_data = array_view[row]; + for (size_t pos = 0; pos < array_data.size(); ++pos) { // match null value - if (right_nested_null_map && right_nested_null_map[row] && nested_null_map && - nested_null_map[pos + off]) { + if (right_view.is_null_at(row) && array_data.is_null_at(pos)) { ConcreteAction::apply(res, pos); if constexpr (!ConcreteAction::resume_execution) { break; } } // some is null while another is not - if (right_nested_null_map && nested_null_map && - right_nested_null_map[row] != nested_null_map[pos + off]) { + if (right_view.is_null_at(row) != array_data.is_null_at(pos)) { continue; } - if (nested_null_map && nested_null_map[pos + off]) { + if (array_data.is_null_at(pos)) { continue; } - if (nested_data[pos + off] == right_data[row]) { + if (array_data.value_at(pos) == right_view.value_at(row)) { ConcreteAction::apply(res, pos); if constexpr (!ConcreteAction::resume_execution) { break; @@ -342,89 +262,40 @@ class FunctionArrayIndex : public IFunction { dst_data[row] = res; } - if (outer_null_map == nullptr) { + if (!array_view.is_nullable()) { return dst; } return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); } - template - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, - const UInt8* nested_null_map, const IColumn& nested_column, - const IColumn& right_column, - const UInt8* right_nested_null_map, - const UInt8* outer_null_map) const { - if (is_column(right_column)) { - return _execute_number( - offsets, nested_null_map, nested_column, right_column, right_nested_null_map, - outer_null_map); - } - return nullptr; - } - Status _execute_dispatch(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { - // extract array offsets and nested data - auto left_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); if (block.get_by_position(arguments[0]).type->get_primitive_type() != TYPE_ARRAY) { return Status::InvalidArgument(get_name() + " first argument must be array, but got " + block.get_by_position(arguments[0]).type->get_name()); } - const ColumnArray* array_column = nullptr; - const UInt8* array_null_map = nullptr; - if (const auto* nullable_array = check_and_get_column(left_column.get())) { - array_column = - reinterpret_cast(&nullable_array->get_nested_column()); - array_null_map = nullable_array->get_null_map_column().get_data().data(); - } else { - array_column = reinterpret_cast(left_column.get()); - } - const auto& offsets = array_column->get_offsets(); - const UInt8* nested_null_map = nullptr; - ColumnPtr nested_column = nullptr; - if (const auto* nested_null_column = - check_and_get_column(&array_column->get_data())) { - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = nested_null_column->get_nested_column_ptr(); - } else { - nested_column = array_column->get_data_ptr(); - } - - // get right column - ColumnPtr right_full_column = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - ColumnPtr right_column = right_full_column; - const UInt8* right_nested_null_map = nullptr; - if (const auto* nested_null_column = - check_and_get_column(right_column.get())) { - right_column = nested_null_column->get_nested_column_ptr(); - right_nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - } // execute auto array_type = remove_nullable(block.get_by_position(arguments[0]).type); auto left_element_type = remove_nullable( assert_cast(array_type.get())->get_nested_type()); auto right_type = remove_nullable(block.get_by_position(arguments[1]).type); + auto left_element_primitive_type = left_element_type->get_primitive_type(); + auto right_primitive_type = right_type->get_primitive_type(); ColumnPtr return_column = nullptr; - if (is_string_type(right_type->get_primitive_type()) && - is_string_type(left_element_type->get_primitive_type())) { - return_column = _execute_string(offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } else if (right_type->get_primitive_type() == left_element_type->get_primitive_type()) { + if (right_primitive_type == left_element_primitive_type || + (is_string_type(right_primitive_type) && is_string_type(left_element_primitive_type))) { auto call = [&](const auto& type) -> bool { using DispatchType = std::decay_t; - auto col = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - if (col) { - return_column = std::move(col); - return true; - } - return false; + constexpr PrimitiveType PType = DispatchType::PType; + auto array_view = + ColumnArrayView::create(block.get_by_position(arguments[0]).column); + auto right_view = + ColumnView::create(block.get_by_position(arguments[1]).column); + return_column = _execute_view(array_view, right_view); + return true; }; - dispatch_switch_scalar(right_type->get_primitive_type(), call); + dispatch_switch_all(left_element_primitive_type, call); } if (return_column) { diff --git a/be/src/exprs/function/array/function_array_remove.h b/be/src/exprs/function/array/function_array_remove.h index fe05b64d0b2222..5f471da86e9947 100644 --- a/be/src/exprs/function/array/function_array_remove.h +++ b/be/src/exprs/function/array/function_array_remove.h @@ -19,12 +19,9 @@ #include #include -#include #include #include -#include -#include #include #include @@ -37,6 +34,7 @@ #include "core/call_on_type_index.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_decimal.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" @@ -92,207 +90,30 @@ class FunctionArrayRemove : public IFunction { } private: - template - ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, - const IColumn& right_column, const UInt8* nested_null_map, - const UInt8* right_nested_null_map, - const ColumnUInt8* array_null_map) const { - // check array nested column type and get data - const auto& src_data = reinterpret_cast(nested_column).get_data(); - - // check target column type and get data - const auto& target_data = reinterpret_cast(right_column).get_data(); - - PaddedPODArray* dst_null_map = nullptr; - MutableColumnPtr array_nested_column = nullptr; - IColumn* dst_column = nullptr; - if (nested_null_map) { - auto dst_nested_column = - ColumnNullable::create(nested_column.clone_empty(), ColumnUInt8::create()); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column->get_nested_column_ptr().get(); - dst_null_map = &dst_nested_column->get_null_map_data(); - dst_null_map->reserve(offsets.back()); - } else { - auto dst_nested_column = nested_column.clone_empty(); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column.get(); - } - - auto& dst_data = reinterpret_cast(*dst_column).get_data(); - dst_data.reserve(offsets.back()); - - auto dst_offsets_column = ColumnArray::ColumnOffsets::create(); - auto& dst_offsets = dst_offsets_column->get_data(); - dst_offsets.reserve(offsets.size()); - - size_t cur = 0; - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - - if (len == 0) { - // case: array:[], target:1 ==> [] - dst_offsets.push_back(cur); - continue; - } - - size_t cur_count = 0; - for (size_t pos = 0; pos < len; ++pos) { - // left is null, right is null - if (nested_null_map && nested_null_map[off + pos] && right_nested_null_map && - right_nested_null_map[row]) { - continue; - } - - // left is null, right is not null - if (nested_null_map && nested_null_map[off + pos]) { - // case: array:[Null], target:1 ==> [Null] - dst_data.push_back(typename NestedColumnType::value_type()); - ++cur_count; - dst_null_map->push_back(1); - continue; - } - - // left is not null, right is null - if (right_nested_null_map && right_nested_null_map[row]) { - dst_data.push_back(src_data[off + pos]); - ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } - continue; - } - - // left is not null, right is not null - if (src_data[off + pos] == target_data[row]) { - continue; - } else { - dst_data.push_back(src_data[off + pos]); - ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } - } - } - - cur += cur_count; - dst_offsets.push_back(cur); - } - - auto dst = - ColumnArray::create(std::move(array_nested_column), std::move(dst_offsets_column)); - if (array_null_map) { - auto dst_null_column = ColumnUInt8::create(); - dst_null_column->insert_range_from(*array_null_map, 0, offsets.size()); - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } else { - return dst; - } - } - - ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, - const IColumn& right_column, const UInt8* nested_null_map, - const UInt8* right_nested_null_map, - const ColumnUInt8* array_null_map) const { - // check array nested column type and get data - const auto& src_offs = reinterpret_cast(nested_column).get_offsets(); - const auto& src_chars = reinterpret_cast(nested_column).get_chars(); - - // check right column type and get data - const auto& target_offs = reinterpret_cast(right_column).get_offsets(); - const auto& target_chars = reinterpret_cast(right_column).get_chars(); - - PaddedPODArray* dst_null_map = nullptr; - MutableColumnPtr array_nested_column = nullptr; - IColumn* dst_column = nullptr; - if (nested_null_map) { - auto dst_nested_column = - ColumnNullable::create(nested_column.clone_empty(), ColumnUInt8::create()); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column->get_nested_column_ptr().get(); - dst_null_map = &dst_nested_column->get_null_map_data(); - dst_null_map->reserve(offsets.back()); - } else { - auto dst_nested_column = nested_column.clone_empty(); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column.get(); - } - - auto& dst_offs = reinterpret_cast(*dst_column).get_offsets(); - auto& dst_chars = reinterpret_cast(*dst_column).get_chars(); - dst_offs.reserve(src_offs.size()); - dst_chars.reserve(src_offs.back()); + template + ColumnPtr _execute_view(const IColumn& array_data_column, + const ColumnArrayView& array_view, + const ColumnView& right_view) const { + auto array_nested_column = array_data_column.clone_empty(); + array_nested_column->reserve(array_data_column.size()); auto dst_offsets_column = ColumnArray::ColumnOffsets::create(); auto& dst_offsets = dst_offsets_column->get_data(); - dst_offsets.reserve(offsets.size()); + dst_offsets.reserve(array_view.size()); size_t cur = 0; - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - - if (len == 0) { - // case: array:[], target:'str' ==> [] - dst_offsets.push_back(cur); - continue; - } - - size_t target_off = target_offs[row - 1]; - size_t target_len = target_offs[row] - target_off; - + for (size_t row = 0; row < array_view.size(); ++row) { + const auto array_data = array_view[row]; size_t cur_count = 0; - for (size_t pos = 0; pos < len; ++pos) { - // left is null, right is null - if (nested_null_map && nested_null_map[off + pos] && right_nested_null_map && - right_nested_null_map[row]) { - continue; - } - - // left is null, right is not null - if (nested_null_map && nested_null_map[off + pos]) { - // case: array:[Null], target:'str' ==> [Null] - // dst_chars.push_back(0); - dst_offs.push_back(dst_offs.back()); - ++cur_count; - dst_null_map->push_back(1); + for (size_t pos = 0; pos < array_data.size(); ++pos) { + // Keep null values unless the remove target is also null. + if (array_data.is_null_at(pos) && right_view.is_null_at(row)) { continue; } - size_t src_pos = src_offs[pos + off - 1]; - size_t src_len = src_offs[pos + off] - src_pos; - - // left is not null, right is null - if (right_nested_null_map && right_nested_null_map[row]) { - const size_t old_size = dst_chars.size(); - const size_t new_size = old_size + src_len; - dst_chars.resize(new_size); - memcpy(&dst_chars[old_size], &src_chars[src_pos], src_len); - dst_offs.push_back(new_size); - ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } - continue; - } - - // left is not null, right is not null - const char* src_raw_v = reinterpret_cast(&src_chars[src_pos]); - const char* target_raw_v = reinterpret_cast(&target_chars[target_off]); - - if (std::string_view(src_raw_v, src_len) == - std::string_view(target_raw_v, target_len)) { - continue; - } else { - const size_t old_size = dst_chars.size(); - const size_t new_size = old_size + src_len; - dst_chars.resize(new_size); - memcpy(&dst_chars[old_size], &src_chars[src_pos], src_len); - dst_offs.push_back(new_size); + if (array_data.is_null_at(pos) || right_view.is_null_at(row) || + !(array_data.value_at(pos) == right_view.value_at(row))) { + array_nested_column->insert_from(array_data_column, array_data.offset + pos); ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } } } @@ -302,116 +123,53 @@ class FunctionArrayRemove : public IFunction { auto dst = ColumnArray::create(std::move(array_nested_column), std::move(dst_offsets_column)); - if (array_null_map) { - auto dst_null_column = ColumnUInt8::create(); - dst_null_column->insert_range_from(*array_null_map, 0, offsets.size()); - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } else { + if (!array_view.is_nullable()) { return dst; } - } - template - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, - const IColumn& nested_column, const IColumn& right_column, - const UInt8* nested_null_map, - const UInt8* right_nested_null_map, - const ColumnUInt8* array_null_map) const { - if (is_column(right_column)) { - return _execute_number( - offsets, nested_column, right_column, nested_null_map, right_nested_null_map, - array_null_map); + auto dst_null_column = ColumnUInt8::create(array_view.size(), 0); + auto& dst_null_map = dst_null_column->get_data(); + for (size_t row = 0; row < array_view.size(); ++row) { + dst_null_map[row] = array_view.is_null_at(row); } - return nullptr; + return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); } ColumnPtr _execute_dispatch(const ColumnsWithTypeAndName& arguments, size_t input_rows_count) const { // check array nested column type and get data - auto left_column = arguments[0].column->convert_to_full_column_if_const(); + const auto& [left_column, is_const] = unpack_if_const(arguments[0].column); const ColumnArray* array_column = nullptr; - const ColumnUInt8* array_null_map = nullptr; if (const auto* nullable_array = check_and_get_column(left_column.get())) { array_column = reinterpret_cast(&nullable_array->get_nested_column()); - array_null_map = &nullable_array->get_null_map_column(); nullable_array->sanity_check(); } else { array_column = reinterpret_cast(left_column.get()); } - const auto& offsets = array_column->get_offsets(); - const UInt8* nested_null_map = nullptr; - ColumnPtr nested_column = nullptr; - if (const auto* nested_null_column = - check_and_get_column(&array_column->get_data())) { - nested_null_column->sanity_check(); - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = nested_null_column->get_nested_column_ptr(); - } else { - nested_column = array_column->get_data_ptr(); - } - - // get right column - ColumnPtr right_full_column = arguments[1].column->convert_to_full_column_if_const(); - ColumnPtr right_column = right_full_column; - const UInt8* right_nested_null_map = nullptr; - if (const auto* nested_null_column = - check_and_get_column(right_column.get())) { - right_column = nested_null_column->get_nested_column_ptr(); - right_nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - } + DCHECK(is_const ? array_column->get_offsets().size() == 1 + : array_column->get_offsets().size() == input_rows_count); // execute auto array_type = remove_nullable(arguments[0].type); auto left_element_type = remove_nullable( assert_cast(array_type.get())->get_nested_type()); auto right_type = remove_nullable(arguments[1].type); + auto left_element_primitive_type = left_element_type->get_primitive_type(); + auto right_primitive_type = right_type->get_primitive_type(); ColumnPtr res = nullptr; - if (is_string_type(right_type->get_primitive_type()) && - is_string_type(left_element_type->get_primitive_type())) { - res = _execute_string(offsets, *nested_column, *right_column, nested_null_map, - right_nested_null_map, array_null_map); - } else if (is_number(right_type->get_primitive_type()) && - is_number(left_element_type->get_primitive_type())) { + if (right_primitive_type == left_element_primitive_type || + (is_string_type(right_primitive_type) && is_string_type(left_element_primitive_type))) { auto call = [&](const auto& type) -> bool { using DispatchType = std::decay_t; - res = _execute_number_expanded( - offsets, *nested_column, *right_column, nested_null_map, - right_nested_null_map, array_null_map); + constexpr PrimitiveType PType = DispatchType::PType; + auto array_view = ColumnArrayView::create(arguments[0].column); + auto right_view = ColumnView::create(arguments[1].column); + res = _execute_view(array_column->get_data(), array_view, right_view); return true; }; - if (!dispatch_switch_number(left_element_type->get_primitive_type(), call)) { - throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, - "not support left type " + left_element_type->get_name()); - } - } else if (is_date_v2_or_datetime_v2(right_type->get_primitive_type()) && - is_date_v2_or_datetime_v2(left_element_type->get_primitive_type())) { - if (left_element_type->get_primitive_type() == PrimitiveType::TYPE_DATEV2) { - res = _execute_number_expanded(offsets, *nested_column, *right_column, - nested_null_map, right_nested_null_map, - array_null_map); - } else if (left_element_type->get_primitive_type() == PrimitiveType::TYPE_DATETIMEV2) { - res = _execute_number_expanded( - offsets, *nested_column, *right_column, nested_null_map, - right_nested_null_map, array_null_map); - } - } else if (is_timestamptz_type(right_type->get_primitive_type()) && - is_timestamptz_type(left_element_type->get_primitive_type())) { - res = _execute_number_expanded( - offsets, *nested_column, *right_column, nested_null_map, right_nested_null_map, - array_null_map); - } else if (is_ip(right_type->get_primitive_type()) && - is_ip(left_element_type->get_primitive_type())) { - if (left_element_type->get_primitive_type() == TYPE_IPV4) { - res = _execute_number_expanded(offsets, *nested_column, *right_column, - nested_null_map, right_nested_null_map, - array_null_map); - } else if (left_element_type->get_primitive_type() == TYPE_IPV6) { - res = _execute_number_expanded(offsets, *nested_column, *right_column, - nested_null_map, right_nested_null_map, - array_null_map); - } + dispatch_switch_all(left_element_primitive_type, call); } return res; } diff --git a/be/src/exprs/function/array/function_arrays_overlap.h b/be/src/exprs/function/array/function_arrays_overlap.h index d71643945ca796..c515c08a1ff50f 100644 --- a/be/src/exprs/function/array/function_arrays_overlap.h +++ b/be/src/exprs/function/array/function_arrays_overlap.h @@ -31,6 +31,7 @@ #include "core/block/column_numbers.h" #include "core/block/column_with_type_and_name.h" #include "core/column/column.h" +#include "core/column/column_array_view.h" #include "core/column/column_nullable.h" #include "core/column/column_vector.h" #include "core/data_type/data_type.h" @@ -57,28 +58,28 @@ template class ColumnStr; using ColumnString = ColumnStr; -template +template struct OverlapSetImpl { - using ElementNativeType = typename NativeType::Type; + using ArrayView = ArrayDataView; + using ElementNativeType = + typename NativeType::ColumnType::value_type>::Type; using Set = phmap::flat_hash_set>; Set set; bool has_null = false; - void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + void insert_array(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { has_null = true; continue; } - set.insert(vec[i]); + set.insert(array.value_at(i)); } } - bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + bool find_any(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { if (has_null) { return true; } else { @@ -86,7 +87,7 @@ struct OverlapSetImpl { } } - if (set.contains(vec[i])) { + if (set.contains(array.value_at(i))) { return true; } } @@ -95,27 +96,26 @@ struct OverlapSetImpl { }; template <> -struct OverlapSetImpl { +struct OverlapSetImpl { + using ArrayView = ArrayDataView; using ElementNativeType = Int128; using Set = phmap::flat_hash_set>; Set set; bool has_null = false; - void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + void insert_array(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { has_null = true; continue; } - set.insert(vec[i].value()); + set.insert(array.value_at(i).value()); } } - bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + bool find_any(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { if (has_null) { return true; } else { @@ -123,7 +123,7 @@ struct OverlapSetImpl { } } - if (set.contains(vec[i].value())) { + if (set.contains(array.value_at(i).value())) { return true; } } @@ -132,24 +132,25 @@ struct OverlapSetImpl { }; template <> -struct OverlapSetImpl { +struct OverlapSetImpl { + using ArrayView = ArrayDataView; using Set = phmap::flat_hash_set>; Set set; bool has_null = false; - void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + void insert_array(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { has_null = true; continue; } - set.insert(column->get_data_at(i)); + set.insert(array.value_at(i)); } } - bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + bool find_any(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { if (has_null) { return true; } else { @@ -157,7 +158,7 @@ struct OverlapSetImpl { } } - if (set.contains(column->get_data_at(i))) { + if (set.contains(array.value_at(i))) { return true; } } @@ -279,31 +280,15 @@ class FunctionArraysOverlap : public IFunction { "with rows: {}", get_name(), req_id, input_rows_count); }); - auto left_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto right_column = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - ColumnArrayExecutionData left_exec_data; - ColumnArrayExecutionData right_exec_data; - Status ret = Status::InvalidArgument( "execute failed, unsupported types for function {}({}, {})", get_name(), block.get_by_position(arguments[0]).type->get_name(), block.get_by_position(arguments[1]).type->get_name()); - - // extract array column - if (!extract_column_array_info(*left_column, left_exec_data) || - !extract_column_array_info(*right_column, right_exec_data)) { - return ret; - } // prepare return column auto dst_nested_col = ColumnUInt8::create(input_rows_count, 0); auto dst_null_map = ColumnUInt8::create(input_rows_count, 0); UInt8* dst_null_map_data = dst_null_map->get_data().data(); - RETURN_IF_ERROR(_execute_nullable(left_exec_data, dst_null_map_data)); - RETURN_IF_ERROR(_execute_nullable(right_exec_data, dst_null_map_data)); - // execute overlap check auto array_type = remove_nullable(block.get_by_position(arguments[0]).type); auto left_element_type = remove_nullable( @@ -311,12 +296,15 @@ class FunctionArraysOverlap : public IFunction { auto call = [&](const auto& type) -> bool { using DispatchType = std::decay_t; - ret = _execute_internal( - left_exec_data, right_exec_data, dst_null_map_data, - dst_nested_col->get_data().data()); + constexpr PrimitiveType PType = DispatchType::PType; + auto left_view = + ColumnArrayView::create(block.get_by_position(arguments[0]).column); + auto right_view = + ColumnArrayView::create(block.get_by_position(arguments[1]).column); + ret = _execute_internal(left_view, right_view, dst_null_map_data, + dst_nested_col->get_data().data()); return true; }; - if (!dispatch_switch_all(left_element_type->get_primitive_type(), call)) { ret = Status::InvalidArgument("execute failed, not support type {} in function {}", left_element_type->get_name(), get_name()); @@ -331,13 +319,14 @@ class FunctionArraysOverlap : public IFunction { } private: - static Status _execute_nullable(const ColumnArrayExecutionData& data, UInt8* dst_nullmap_data) { - for (ssize_t row = 0; row < data.offsets_ptr->size(); ++row) { + template + static Status _execute_nullable(const ColumnArrayView& data, UInt8* dst_nullmap_data) { + for (ssize_t row = 0; row < data.size(); ++row) { if (dst_nullmap_data[row]) { continue; } - if (data.array_nullmap_data && data.array_nullmap_data[row]) { + if (data.is_null_at(row)) { dst_nullmap_data[row] = 1; continue; } @@ -345,44 +334,34 @@ class FunctionArraysOverlap : public IFunction { return Status::OK(); } - template - Status _execute_internal(const ColumnArrayExecutionData& left_data, - const ColumnArrayExecutionData& right_data, UInt8* dst_nullmap_data, + template + Status _execute_internal(const ColumnArrayView& left_data, + const ColumnArrayView& right_data, UInt8* dst_nullmap_data, UInt8* dst_data) const { - using ExecutorImpl = OverlapSetImpl; - for (ssize_t row = 0; row < left_data.offsets_ptr->size(); ++row) { + using ExecutorImpl = OverlapSetImpl; + RETURN_IF_ERROR(_execute_nullable(left_data, dst_nullmap_data)); + RETURN_IF_ERROR(_execute_nullable(right_data, dst_nullmap_data)); + for (ssize_t row = 0; row < left_data.size(); ++row) { // arrays_overlap(null, null) -> null if (dst_nullmap_data[row]) { continue; } dst_nullmap_data[row] = 0; - ssize_t left_start = (*left_data.offsets_ptr)[row - 1]; - ssize_t left_size = (*left_data.offsets_ptr)[row] - left_start; - ssize_t right_start = (*right_data.offsets_ptr)[row - 1]; - ssize_t right_size = (*right_data.offsets_ptr)[row] - right_start; - if (left_size == 0 || right_size == 0) { + const auto left_array = left_data[row]; + const auto right_array = right_data[row]; + if (left_array.size() == 0 || right_array.size() == 0) { dst_data[row] = 0; continue; } - const auto* small_data = &left_data; - const auto* large_data = &right_data; - - ssize_t small_start = left_start; - ssize_t large_start = right_start; - ssize_t small_size = left_size; - ssize_t large_size = right_size; - if (right_size < left_size) { - std::swap(small_data, large_data); - std::swap(small_start, large_start); - std::swap(small_size, large_size); - } + const auto& small_data = + right_array.size() < left_array.size() ? right_array : left_array; + const auto& large_data = + right_array.size() < left_array.size() ? left_array : right_array; ExecutorImpl impl; - impl.insert_array(small_data->nested_col.get(), small_data->nested_nullmap_data, - small_start, small_size); - dst_data[row] = impl.find_any(large_data->nested_col.get(), - large_data->nested_nullmap_data, large_start, large_size); + impl.insert_array(small_data); + dst_data[row] = impl.find_any(large_data); } return Status::OK(); } diff --git a/be/test/exprs/function/function_array_index_test.cpp b/be/test/exprs/function/function_array_index_test.cpp index 806c1b3a727e13..a39570c58ae7b0 100644 --- a/be/test/exprs/function/function_array_index_test.cpp +++ b/be/test/exprs/function/function_array_index_test.cpp @@ -221,4 +221,29 @@ TEST(function_array_index_test, array_position) { } } +TEST(function_array_index_test, const_arguments) { + TestArray vec = {Int32(1), Null(), Int32(3), Int32(1)}; + + { + InputTypeSet input_types = {Consted {PrimitiveType::TYPE_ARRAY}, PrimitiveType::TYPE_INT, + PrimitiveType::TYPE_INT}; + DataSet data_set = {{{vec, Int32(1)}, UInt8(1)}}; + static_cast( + check_function("array_contains", input_types, data_set)); + } + { + InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT, + Consted {PrimitiveType::TYPE_INT}}; + DataSet data_set = {{{vec, Null()}, Int64(2)}}; + static_cast( + check_function("array_position", input_types, data_set)); + } + { + InputTypeSet input_types = {Consted {PrimitiveType::TYPE_ARRAY}, PrimitiveType::TYPE_INT, + Consted {PrimitiveType::TYPE_INT}}; + DataSet data_set = {{{vec, Int32(1)}, Int64(2)}}; + static_cast(check_function("countequal", input_types, data_set)); + } +} + } // namespace doris diff --git a/be/test/exprs/function/function_array_remove_test.cpp b/be/test/exprs/function/function_array_remove_test.cpp new file mode 100644 index 00000000000000..772a93943df529 --- /dev/null +++ b/be/test/exprs/function/function_array_remove_test.cpp @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "core/block/block.h" +#include "core/column/column_const.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_nullable.h" +#include "exprs/function/function_test_util.h" +#include "exprs/function/simple_function_factory.h" + +namespace doris { + +static void check_array_remove_case(DataTypePtr array_type, DataTypePtr element_type, + const TestArray& array, const AnyType& target, + const TestArray& expected, bool const_array, + bool const_target) { + MutableColumnPtr array_column = array_type->create_column(); + ASSERT_TRUE(insert_cell(array_column, array_type, array)); + MutableColumnPtr target_column = element_type->create_column(); + ASSERT_TRUE(insert_cell(target_column, element_type, target)); + + constexpr size_t row_size = 1; + if (const_array) { + array_column = ColumnConst::create(std::move(array_column), row_size); + } + if (const_target) { + target_column = ColumnConst::create(std::move(target_column), row_size); + } + + Block block; + block.insert({std::move(array_column), array_type, "array"}); + block.insert({std::move(target_column), element_type, "target"}); + + DataTypePtr return_type = array_type; + FunctionBasePtr func = SimpleFunctionFactory::instance().get_function( + "array_remove", block.get_columns_with_type_and_name(), return_type); + ASSERT_NE(func, nullptr); + + ColumnNumbers arguments = {0, 1}; + std::vector arg_types = {array_type, element_type}; + std::vector> constant_cols = {nullptr, nullptr}; + if (const_array) { + constant_cols[0] = std::make_shared(block.get_by_position(0).column); + } + if (const_target) { + constant_cols[1] = std::make_shared(block.get_by_position(1).column); + } + + FunctionUtils fn_utils(return_type, arg_types, false); + auto* fn_ctx = fn_utils.get_fn_ctx(); + fn_ctx->set_constant_cols(constant_cols); + ASSERT_TRUE(func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL).ok()); + ASSERT_TRUE(func->open(fn_ctx, FunctionContext::THREAD_LOCAL).ok()); + + block.insert({nullptr, return_type, "result"}); + auto result_idx = block.columns() - 1; + ASSERT_TRUE(func->execute(fn_ctx, block, arguments, result_idx, row_size).ok()); + static_cast(func->close(fn_ctx, FunctionContext::THREAD_LOCAL)); + static_cast(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + + MutableColumnPtr expected_column = return_type->create_column(); + ASSERT_TRUE(insert_cell(expected_column, return_type, expected)); + Field result_value; + block.get_by_position(result_idx).column->get(0, result_value); + Field expected_value; + expected_column->get(0, expected_value); + EXPECT_EQ(result_value, expected_value) + << "result: " << return_type->to_string(*block.get_by_position(result_idx).column, 0) + << ", expected: " << return_type->to_string(*expected_column, 0); +} + +TEST(function_array_remove_test, const_arguments) { + { + auto element_type = make_nullable(std::make_shared()); + auto array_type = std::make_shared(element_type); + TestArray array = {Int32(1), Null(), Int32(2), Int32(1)}; + TestArray expected = {Null(), Int32(2)}; + check_array_remove_case(array_type, element_type, array, Int32(1), expected, true, false); + } + { + auto element_type = make_nullable(std::make_shared()); + auto array_type = std::make_shared(element_type); + TestArray array = {std::string("abc"), Null(), std::string("def"), std::string("abc")}; + TestArray expected = {Null(), std::string("def")}; + check_array_remove_case(array_type, element_type, array, std::string("abc"), expected, + false, true); + } + { + auto element_type = make_nullable(std::make_shared()); + auto array_type = std::make_shared(element_type); + TestArray array = {Int32(1), Null(), Int32(2), Null()}; + TestArray expected = {Int32(1), Int32(2)}; + check_array_remove_case(array_type, element_type, array, Null(), expected, true, true); + } +} + +} // namespace doris diff --git a/be/test/exprs/function/function_arrays_overlap_test.cpp b/be/test/exprs/function/function_arrays_overlap_test.cpp index e3e3599b2eb840..9aa0dd1dd730fe 100644 --- a/be/test/exprs/function/function_arrays_overlap_test.cpp +++ b/be/test/exprs/function/function_arrays_overlap_test.cpp @@ -152,4 +152,26 @@ TEST(function_arrays_overlap_test, arrays_overlap) { } } +TEST(function_arrays_overlap_test, const_arguments) { + { + InputTypeSet input_types = {Consted {PrimitiveType::TYPE_ARRAY}, PrimitiveType::TYPE_INT, + PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT}; + TestArray left = {Int32(1), Null(), Int32(3)}; + TestArray right = {Int32(4), Null()}; + DataSet data_set = {{{left, right}, UInt8(1)}}; + static_cast( + check_function("arrays_overlap", input_types, data_set)); + } + { + InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR, + Consted {PrimitiveType::TYPE_ARRAY}, + PrimitiveType::TYPE_VARCHAR}; + TestArray left = {std::string("abc"), std::string("def")}; + TestArray right = {std::string("xyz"), std::string("abc")}; + DataSet data_set = {{{left, right}, UInt8(1)}}; + static_cast( + check_function("arrays_overlap", input_types, data_set)); + } +} + } // namespace doris