diff --git a/include/neug/compiler/gopt/g_expr_converter.h b/include/neug/compiler/gopt/g_expr_converter.h index 87ede2ea5..2c88ead80 100644 --- a/include/neug/compiler/gopt/g_expr_converter.h +++ b/include/neug/compiler/gopt/g_expr_converter.h @@ -113,6 +113,10 @@ class GExprConverter { const binder::Expression& expr, const std::vector& schemaAlias); + std::unique_ptr<::common::Expression> convertToListFunc( + const binder::Expression& expr, + const std::vector& schemaAlias); + std::unique_ptr<::common::Expression> convertCaseExpression( const binder::CaseExpression& expr, const std::vector& schemaAlias); diff --git a/include/neug/compiler/gopt/g_scalar_type.h b/include/neug/compiler/gopt/g_scalar_type.h index 5f6ac2dfe..fb4555f7f 100644 --- a/include/neug/compiler/gopt/g_scalar_type.h +++ b/include/neug/compiler/gopt/g_scalar_type.h @@ -26,6 +26,7 @@ #include "neug/compiler/function/schema/vector_node_rel_functions.h" #include "neug/compiler/function/string/vector_string_functions.h" #include "neug/compiler/function/struct/vector_struct_functions.h" +#include "neug/utils/exception/exception.h" namespace neug { namespace gopt { @@ -46,7 +47,8 @@ enum ScalarType { LABEL, PATTERN_EXTRACT, // startNode, endNode, nodes, rels PROPERTIES, // properties(nodes(), 'name') - TO_ARRAY, + TO_LIST, // 不固定长度数组,数组元素类型一致 + TO_TUPLE, // tuple类型,元素类型不一致 UPPER, LOWER, REVERSE, @@ -113,7 +115,14 @@ class GScalarType { } else if (func.name == function::PropertiesFunction::name) { return ScalarType::PROPERTIES; } else if (func.name == function::ListCreationFunction::name) { - return ScalarType::TO_ARRAY; + const auto& type = expr.getDataType(); + if (type.getLogicalTypeID() == common::LogicalTypeID::LIST) { + return ScalarType::TO_LIST; + } else if (type.getLogicalTypeID() == common::LogicalTypeID::STRUCT) { + return ScalarType::TO_TUPLE; + } + THROW_EXCEPTION_WITH_FILE_LINE("Invalid data type: " + type.toString() + + " for function: " + func.name); } else if (func.name == function::UpperFunction::name) { return ScalarType::UPPER; } else if (func.name == function::LowerFunction::name) { diff --git a/include/neug/execution/common/types/value.h b/include/neug/execution/common/types/value.h index c8465e823..8e09a8eb8 100644 --- a/include/neug/execution/common/types/value.h +++ b/include/neug/execution/common/types/value.h @@ -26,6 +26,7 @@ #include "neug/common/types.h" #include "neug/execution/common/types/graph_types.h" #include "neug/execution/utils/numeric_cast.h" +#include "neug/utils/property/list_view.h" namespace neug { class Property; @@ -721,5 +722,10 @@ Value performCastToString(const Value& input); void encode_value(const Value& val, Encoder& encoder); +// Convert a storage-layer ListView into an execution-layer Value::LIST. +// The ListView's underlying buffer must remain valid for the duration of +// this call (the resulting Value owns its data independently). +Value ListViewToValue(const neug::ListView& lv); + } // namespace execution } // namespace neug \ No newline at end of file diff --git a/include/neug/execution/expression/exprs/struct_expr.h b/include/neug/execution/expression/exprs/struct_expr.h index 09bd0256b..54e339984 100644 --- a/include/neug/execution/expression/exprs/struct_expr.h +++ b/include/neug/execution/expression/exprs/struct_expr.h @@ -37,5 +37,18 @@ class TupleExpr : public ExprBase { std::vector> exprs_; DataType type_; }; + +class ListExpr : public ExprBase { + public: + ListExpr(std::vector>&& exprs, DataType list_type); + ~ListExpr() override = default; + const DataType& type() const override { return type_; } + std::unique_ptr bind(const IStorageInterface* storage, + const ParamsMap& params) const override; + + private: + std::vector> exprs_; + DataType type_; +}; } // namespace execution } // namespace neug \ No newline at end of file diff --git a/include/neug/utils/property/column.h b/include/neug/utils/property/column.h index 762c7ad2f..b96a10406 100644 --- a/include/neug/utils/property/column.h +++ b/include/neug/utils/property/column.h @@ -33,6 +33,7 @@ #include "neug/utils/file_utils.h" #include "neug/utils/likely.h" #include "neug/utils/mmap_array.h" +#include "neug/utils/property/list_view.h" #include "neug/utils/property/property.h" #include "neug/utils/property/types.h" #include "neug/utils/serialization/out_archive.h" @@ -427,6 +428,204 @@ class TypedColumn : public ColumnBase { using StringColumn = TypedColumn; +// --------------------------------------------------------------------------- +// list_storage_item +// --------------------------------------------------------------------------- +// Index entry used by ListColumn. Wider than string_item (which has a 16-bit +// length field) to accommodate large list blobs. +struct list_storage_item { + uint64_t offset; // byte offset in the ListColumn data buffer + uint32_t length; // byte length of the serialized blob + uint32_t padding{0}; +}; +static_assert(sizeof(list_storage_item) == 16, + "list_storage_item size must be 16 bytes"); + +// --------------------------------------------------------------------------- +// ListColumn +// --------------------------------------------------------------------------- +// Stores a column of list-typed property values. Each entry is a serialized +// binary blob produced by ListViewBuilder::finish_pod() or +// ListViewBuilder::finish_varlen(). +// +// Storage layout on disk (prefix = column name): +// .items -- mmap_array: offset+length per entry +// .data -- mmap_array: packed blob storage +// .pos -- uint64_t: committed write frontier in data buffer +// +// Reading: +// ListView lv = col.get_view(idx); +// // access via lv.GetElem() / lv.GetChildStringView() etc. +// +// Writing: +// ListViewBuilder b; +// b.append_pod(val); // or b.append_blob(sv); +// Property p = Property::from_list_data(b.finish_pod()); +// col.set_any(idx, p, /*insert_safe=*/true); +class ListColumn : public ColumnBase { + public: + explicit ListColumn(const DataType& list_type) + : list_type_(list_type), size_(0), pos_(0) {} + ~ListColumn() override { close(); } + + void open(const std::string& name, const std::string& snapshot_dir, + const std::string& work_dir) override { + std::string basic = snapshot_dir + "/" + name; + if (std::filesystem::exists(basic + ".items")) { + items_.open(basic + ".items", false, false); + data_.open(basic + ".data", false, false); + size_ = items_.size(); + init_pos(basic + ".pos"); + } else if (!work_dir.empty()) { + std::string work = work_dir + "/" + name; + items_.open(work + ".items", true); + data_.open(work + ".data", true); + size_ = items_.size(); + init_pos(work + ".pos"); + } else { + size_ = 0; + pos_.store(0); + } + } + + void open_in_memory(const std::string& prefix) override { + if (!prefix.empty()) { + items_.open(prefix + ".items", false); + data_.open(prefix + ".data", false); + size_ = items_.size(); + init_pos(prefix + ".pos"); + } else { + size_ = 0; + pos_.store(0); + } + } + + void open_with_hugepages(const std::string& prefix) override { + if (!prefix.empty()) { + items_.open_with_hugepages(prefix + ".items"); + data_.open_with_hugepages(prefix + ".data"); + size_ = items_.size(); + init_pos(prefix + ".pos"); + } else { + size_ = 0; + pos_.store(0); + } + } + + void close() override { + items_.reset(); + data_.reset(); + } + + void dump(const std::string& filename) override { + size_t pos_val = pos_.load(); + write_file(filename + ".pos", &pos_val, sizeof(pos_val), 1); + items_.dump(filename + ".items"); + data_.dump(filename + ".data"); + } + + size_t size() const override { return size_; } + + void resize(size_t size) override { + std::unique_lock lk(rw_mutex_); + items_.resize(size); + // Keep at least as much data space as already committed. + size_t needed = std::max(data_.size(), pos_.load()); + data_.resize(std::max(needed, size * 64)); // 64B heuristic per list + size_ = size; + } + + void resize(size_t size, const Property& default_value) override { + if (default_value.type() != DataTypeId::kList && + default_value.type() != DataTypeId::kEmpty) { + THROW_RUNTIME_ERROR("Default value type does not match list column"); + } + resize(size); + // Leave entries zero-initialized (empty lists) for new slots. + } + + DataTypeId type() const override { return DataTypeId::kList; } + + // Return the full DataType::List(...) of this column. + const DataType& list_type() const { return list_type_; } + + // Store a pre-built blob (from ListViewBuilder::finish_*) at index idx. + // The blob bytes are copied into the internal data buffer. + void set_value(size_t idx, std::string_view blob) { + if (idx >= size_) { + THROW_RUNTIME_ERROR("Index out of range in ListColumn::set_value"); + } + size_t offset = pos_.fetch_add(blob.size()); + if (offset + blob.size() > data_.size()) { + std::unique_lock lk(rw_mutex_); + if (offset + blob.size() > data_.size()) { + data_.resize( + std::max(data_.size() * 2, offset + blob.size() + blob.size())); + } + } + if (!blob.empty()) { + std::memcpy(data_.data() + offset, blob.data(), blob.size()); + } + items_.set(idx, {static_cast(offset), + static_cast(blob.size())}); + } + + void set_any(size_t idx, const Property& value, bool insert_safe) override { + set_value(idx, value.as_list_data()); + } + + ListView get_view(size_t idx) const { + assert(idx < size_); + const auto& item = items_.get(idx); + return ListView(list_type_, + std::string_view(data_.data() + item.offset, item.length)); + } + + Property get_prop(size_t idx) const override { + const auto& item = items_.get(idx); + return Property::from_list_data( + std::string_view(data_.data() + item.offset, item.length)); + } + + void set_prop(size_t idx, const Property& prop) override { + set_value(idx, prop.as_list_data()); + } + + void ingest(uint32_t idx, OutArchive& arc) override { + std::string_view sv; + arc >> sv; + set_value(idx, sv); + } + + void ensure_writable(const std::string& work_dir) override { + items_.ensure_writable(work_dir); + data_.ensure_writable(work_dir); + } + + private: + void init_pos(const std::string& pos_path) { + if (std::filesystem::exists(pos_path)) { + size_t v = 0; + read_file(pos_path, &v, sizeof(v), 1); + pos_.store(v); + } else { + size_t total = 0; + for (size_t i = 0; i < items_.size(); ++i) { + const auto& it = items_.get(i); + total = std::max(total, static_cast(it.offset) + it.length); + } + pos_.store(total); + } + } + + DataType list_type_; + mmap_array items_; + mmap_array data_; + size_t size_; + std::atomic pos_; + mutable std::shared_mutex rw_mutex_; +}; + std::shared_ptr CreateColumn(DataType type); /// Create RefColumn for ease of usage for hqps diff --git a/include/neug/utils/property/list_view.h b/include/neug/utils/property/list_view.h new file mode 100644 index 000000000..e7f0b0495 --- /dev/null +++ b/include/neug/utils/property/list_view.h @@ -0,0 +1,241 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "neug/common/extra_type_info.h" +#include "neug/common/types.h" + +namespace neug { + +// --------------------------------------------------------------------------- +// is_pod_type +// --------------------------------------------------------------------------- +// Returns true when a DataTypeId corresponds to a fixed-width, trivially- +// copyable element type. POD lists use a dense packed encoding; non-POD lists +// (varchar, nested list) use an offset-array encoding. +inline bool is_pod_type(DataTypeId id) { + switch (id) { + case DataTypeId::kBoolean: + case DataTypeId::kInt8: + case DataTypeId::kInt16: + case DataTypeId::kInt32: + case DataTypeId::kInt64: + case DataTypeId::kUInt8: + case DataTypeId::kUInt16: + case DataTypeId::kUInt32: + case DataTypeId::kUInt64: + case DataTypeId::kFloat: + case DataTypeId::kDouble: + case DataTypeId::kDate: + case DataTypeId::kTimestampMs: + case DataTypeId::kInterval: + return true; + default: + return false; + } +} + +// --------------------------------------------------------------------------- +// Binary encoding +// --------------------------------------------------------------------------- +// +// POD list (child type is fixed-size) +// ┌─────────────────────────┬──────────────────────────────────────────┐ +// │ count : uint32_t │ T[0] T[1] … T[count-1] │ +// └─────────────────────────┴──────────────────────────────────────────┘ +// Total bytes: 4 + count * sizeof(T) +// +// Non-POD list (varchar or nested list — variable-width elements) +// ┌──────────┬────────────────────────────────────────┬──────────────────────┐ +// │ count │ off[0] off[1] … off[count-1] off[count] (sentinel) │ +// │ uint32_t │ uint32_t × (count+1) │ data[0] data[1] … │ +// └──────────┴────────────────────────────────────────┴──────────────────────┘ +// off[] are relative to the start of the data region (first byte after the +// last offset entry). The sentinel off[count] equals the total data size. +// Length of element i = off[i+1] - off[i]. + +// --------------------------------------------------------------------------- +// ListView +// --------------------------------------------------------------------------- +// Zero-copy, read-only view over a serialized list stored in a contiguous +// string_view buffer. The caller must ensure that the underlying memory +// outlives all ListView instances derived from it. +// +// type_ holds the *enclosing* List DataType; the child element type is +// obtained via ListType::GetChildType(type_). +struct ListView { + using length_t = uint32_t; + using offset_t = uint32_t; + + ListView(const DataType& type, std::string_view data) + : type_(type), data_(data) {} + ~ListView() = default; + + // Number of elements stored in this list. + size_t size() const { + if (data_.size() < sizeof(uint32_t)) { + return 0; + } + return static_cast( + *reinterpret_cast(data_.data())); + } + + // Direct element access for POD child types. + // The template argument T must match the actual element C++ type. + template + const T& GetElem(size_t idx) const { + assert(is_pod_type(ListType::GetChildType(type_).id())); + assert(idx < size()); + return reinterpret_cast(data_.data() + sizeof(uint32_t))[idx]; + } + + // Element access for varchar (string) child type. + std::string_view GetChildStringView(size_t idx) const { + return child_span(idx); + } + + // Element access for a nested List child type. + // The returned ListView borrows from the same underlying buffer. + ListView GetChildListView(size_t idx) const { + const DataType& child = ListType::GetChildType(type_); + return ListView(child, child_span(idx)); + } + + const DataType& type_; + std::string_view data_; + + private: + // Returns the raw byte span for the variable-length element at index idx. + // Must only be called for non-POD encodings. + std::string_view child_span(size_t idx) const { + assert(!data_.empty()); + const uint32_t count = *reinterpret_cast(data_.data()); + assert(idx < static_cast(count)); + const uint32_t* offsets = + reinterpret_cast(data_.data() + sizeof(uint32_t)); + // data region starts right after the (count+1) offsets + const char* data_start = + data_.data() + sizeof(uint32_t) + (count + 1) * sizeof(uint32_t); + return std::string_view(data_start + offsets[idx], + offsets[idx + 1] - offsets[idx]); + } +}; + +// --------------------------------------------------------------------------- +// ListViewBuilder +// --------------------------------------------------------------------------- +// Assembles a binary blob for a single list value to be stored in a +// ListColumn. The builder accumulates elements and produces an owned +// std::string via finish_pod() or finish_varlen(). +// +// The two finish methods correspond to the two encodings above. Callers are +// expected to choose the right one based on is_pod_type(child_type.id()): +// +// if (is_pod_type(child_type.id())) { +// ListViewBuilder b; +// for (auto v : values) b.append_pod(v); +// std::string blob = b.finish_pod(); +// } else { +// ListViewBuilder b; +// for (auto sv : strings) b.append_blob(sv); +// std::string blob = b.finish_varlen(); +// } +class ListViewBuilder { + public: + ListViewBuilder() = default; + + // Append a single POD element (trivially copyable, fixed size). + template + void append_pod(const T& val) { + static_assert(std::is_trivially_copyable_v, + "append_pod requires a trivially copyable type"); + const char* bytes = reinterpret_cast(&val); + pod_data_.insert(pod_data_.end(), bytes, bytes + sizeof(T)); + count_++; + } + + // Append a variable-length element: either a raw string or a nested list + // blob (the result of another ListViewBuilder::finish_*() call). + void append_blob(std::string_view sv) { + var_offsets_.push_back(static_cast(var_data_.size())); + var_data_.insert(var_data_.end(), sv.data(), sv.data() + sv.size()); + count_++; + } + + // Produce the final blob for POD lists. + // Layout: [count: uint32][T[0]]...[T[count-1]] + template + std::string finish_pod() const { + std::string result(sizeof(uint32_t) + pod_data_.size(), '\0'); + char* p = result.data(); + *reinterpret_cast(p) = count_; + p += sizeof(uint32_t); + if (!pod_data_.empty()) { + std::memcpy(p, pod_data_.data(), pod_data_.size()); + } + return result; + } + + // Produce the final blob for non-POD lists. + // Layout: [count: uint32][off[0]: uint32]...[off[count]: uint32][data...] + std::string finish_varlen() const { + const uint32_t sentinel = static_cast(var_data_.size()); + const size_t offset_bytes = (count_ + 1) * sizeof(uint32_t); + std::string result(sizeof(uint32_t) + offset_bytes + var_data_.size(), + '\0'); + char* p = result.data(); + *reinterpret_cast(p) = count_; + p += sizeof(uint32_t); + if (count_ > 0) { + std::memcpy(p, var_offsets_.data(), count_ * sizeof(uint32_t)); + } + p += count_ * sizeof(uint32_t); + *reinterpret_cast(p) = sentinel; + p += sizeof(uint32_t); + if (!var_data_.empty()) { + std::memcpy(p, var_data_.data(), var_data_.size()); + } + return result; + } + + // Reset for reuse. + void reset() { + count_ = 0; + pod_data_.clear(); + var_offsets_.clear(); + var_data_.clear(); + } + + uint32_t count() const { return count_; } + + private: + uint32_t count_ = 0; + + // Accumulates raw bytes for POD lists. + std::vector pod_data_; + + // Accumulate offset + data for non-POD lists. + std::vector var_offsets_; + std::vector var_data_; +}; + +} // namespace neug diff --git a/include/neug/utils/property/property.h b/include/neug/utils/property/property.h index 04fe6ff7a..86b84e0c4 100644 --- a/include/neug/utils/property/property.h +++ b/include/neug/utils/property/property.h @@ -21,6 +21,7 @@ #include #include +#include "neug/utils/property/list_view.h" #include "neug/utils/property/types.h" #include "neug/utils/serialization/in_archive.h" #include "neug/utils/serialization/out_archive.h" @@ -142,6 +143,28 @@ class Property { value_.s = v; } + // Store a raw serialized list blob (output of ListViewBuilder::finish_*). + // The pointed-to memory must outlive this Property (same rule as + // set_string_view). + void set_list_data(std::string_view v) { + type_ = DataTypeId::kList; + value_.s = v; // value_.s and value_.lv share the same union slot + } + + // Retrieve the raw list blob. + std::string_view as_list_data() const { + assert(type() == DataTypeId::kList); + return value_.s; + } + + // Convenience wrapper: build a zero-copy ListView directly from this + // property. The caller must supply the full DataType (with child-type + // info) because Property only stores the raw blob. + ListView as_list_view(const DataType& type) const { + assert(this->type() == DataTypeId::kList); + return ListView(type, value_.s); + } + void set_float(float v) { type_ = DataTypeId::kFloat; value_.f = v; @@ -258,6 +281,8 @@ class Property { return as_bool() ? "true" : "false"; } else if (type == DataTypeId::kEmpty) { return "EMPTY"; + } else if (type == DataTypeId::kList) { + return "LIST[" + std::to_string(as_list_data().size()) + "B]"; } else { return "UNKNOWN"; } @@ -301,6 +326,12 @@ class Property { return ret; } + static Property from_list_data(std::string_view v) { + Property ret; + ret.set_list_data(v); + return ret; + } + static Property from_float(float v) { Property ret; ret.set_float(v); diff --git a/proto/expr.proto b/proto/expr.proto index 8049fd8ce..6a4b5d306 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -164,6 +164,10 @@ message ToTuple { repeated Expression fields = 1; } +message ToList { + repeated Expression fields = 1; +} + message VariableKeyValue { common.Value key = 1; oneof value { @@ -305,6 +309,7 @@ message ExprOpr { ToInterval to_interval = 21; ToTuple to_tuple = 22; ScalarFunction scalar_func = 23; + ToList to_list = 24; } // The data of type of ExprOpr common.IrDataType node_type = 12; diff --git a/src/compiler/gopt/g_ddl_converter.cpp b/src/compiler/gopt/g_ddl_converter.cpp index 882ecd0f4..c10be5310 100644 --- a/src/compiler/gopt/g_ddl_converter.cpp +++ b/src/compiler/gopt/g_ddl_converter.cpp @@ -171,7 +171,7 @@ GDDLConverter::convertToCreateVertexSchema( } auto* propertyDef = create_vertex->add_properties(); propertyDef->set_name(prop.getName()); - auto irType = typeConverter.convertSimpleLogicalType(prop.getType()); + auto irType = typeConverter.convertLogicalType(prop.getType()); *propertyDef->mutable_type() = std::move(*irType->mutable_data_type()); propertyDef->set_allocated_default_value( exprConverter.convertDefaultValue(prop).release()); @@ -265,7 +265,7 @@ GDDLConverter::convertToCreateEdgeGroupSchema( } auto* propertyDef = create_edge->add_properties(); propertyDef->set_name(prop.getName()); - auto irType = typeConverter.convertSimpleLogicalType(prop.getType()); + auto irType = typeConverter.convertLogicalType(prop.getType()); *propertyDef->mutable_type() = std::move(*irType->mutable_data_type()); propertyDef->set_allocated_default_value( exprConverter.convertDefaultValue(prop).release()); @@ -310,7 +310,7 @@ GDDLConverter::convertToCreateEdgeSchema( } auto* propertyDef = create_edge->add_properties(); propertyDef->set_name(prop.getName()); - auto irType = typeConverter.convertSimpleLogicalType(prop.getType()); + auto irType = typeConverter.convertLogicalType(prop.getType()); *propertyDef->mutable_type() = std::move(*irType->mutable_data_type()); propertyDef->set_allocated_default_value( exprConverter.convertDefaultValue(prop).release()); @@ -404,7 +404,7 @@ GDDLConverter::convertToAddVertexPropertySchema( // Add property definition auto* property = add_property->add_properties(); property->set_name(propertyDef.getName()); - auto irType = typeConverter.convertSimpleLogicalType(propertyDef.getType()); + auto irType = typeConverter.convertLogicalType(propertyDef.getType()); *property->mutable_type() = std::move(*irType->mutable_data_type()); property->set_allocated_default_value( exprConverter.convertDefaultValue(propertyDef).release()); @@ -447,7 +447,7 @@ GDDLConverter::convertToAddEdgePropertySchema(const planner::LogicalAlter& op) { // Add property definition auto* property = add_property->add_properties(); property->set_name(propertyDef.getName()); - auto irType = typeConverter.convertSimpleLogicalType(propertyDef.getType()); + auto irType = typeConverter.convertLogicalType(propertyDef.getType()); *property->mutable_type() = std::move(*irType->mutable_data_type()); property->set_allocated_default_value( exprConverter.convertDefaultValue(propertyDef).release()); diff --git a/src/compiler/gopt/g_expr_converter.cpp b/src/compiler/gopt/g_expr_converter.cpp index 315c04d5c..adf0127fe 100644 --- a/src/compiler/gopt/g_expr_converter.cpp +++ b/src/compiler/gopt/g_expr_converter.cpp @@ -424,10 +424,8 @@ std::unique_ptr<::common::Expression> GExprConverter::convertRegexFunc( std::unique_ptr<::common::Value> GExprConverter::convertToLiteralArray( const common::Value& value, const common::LogicalType& childType) { - if (value.children.empty()) { - THROW_EXCEPTION_WITH_FILE_LINE( - "Array function should have at least one child"); - } + // Empty list is valid (e.g. the implicit default for a list column with no + // DEFAULT clause). The proto repeated field simply stays empty. auto valuePB = std::make_unique<::common::Value>(); switch (childType.getLogicalTypeID()) { case common::LogicalTypeID::INT32: { @@ -720,6 +718,31 @@ std::unique_ptr<::common::Expression> GExprConverter::convertToTupleFunc( return exprPB; } +std::unique_ptr<::common::Expression> GExprConverter::convertToListFunc( + const binder::Expression& expr, + const std::vector& schemaAlias) { + if (expr.getChildren().empty()) { + THROW_EXCEPTION_WITH_FILE_LINE( + "Array function should have at least one child"); + } + auto listPB = std::make_unique<::common::ToList>(); + for (auto child : expr.getChildren()) { + auto exprPB = convert(*child, schemaAlias); + if (exprPB->operators_size() == 0) { + THROW_EXCEPTION_WITH_FILE_LINE( + "convert child of array function failed, empty expression"); + } + auto fieldPB = listPB->add_fields(); + *fieldPB = std::move(*exprPB); + } + auto exprPB = std::make_unique<::common::Expression>(); + auto opr = exprPB->add_operators(); + opr->set_allocated_to_list(listPB.release()); + opr->set_allocated_node_type( + typeConverter.convertLogicalType(expr.getDataType().copy()).release()); + return exprPB; +} + std::unique_ptr<::common::Expression> GExprConverter::convertCaseExpression( const binder::CaseExpression& expr, const std::vector& schemaAlias) { @@ -763,7 +786,9 @@ std::unique_ptr<::common::Expression> GExprConverter::convertScalarFunc( return convertPatternExtractFunc(expr, schemaAlias); } else if (scalarType.getType() == PROPERTIES) { return convertPropertiesFunc(expr, schemaAlias); - } else if (scalarType.getType() == TO_ARRAY) { + } else if (scalarType.getType() == TO_LIST) { + return convertToListFunc(expr, schemaAlias); + } else if (scalarType.getType() == TO_TUPLE) { return convertToTupleFunc(expr, schemaAlias); } else if (scalarType.getType() == STARTS_WITH || scalarType.getType() == ENDS_WITH || diff --git a/src/execution/common/types/value.cc b/src/execution/common/types/value.cc index b81455914..e1728f923 100644 --- a/src/execution/common/types/value.cc +++ b/src/execution/common/types/value.cc @@ -22,6 +22,7 @@ #include "neug/execution/common/types/value.h" #include "neug/utils/encoder.h" #include "neug/utils/exception/exception.h" +#include "neug/utils/property/list_view.h" #include "neug/utils/property/property.h" namespace neug { @@ -741,6 +742,54 @@ rapidjson::Value Value::ToJson(const Value& value, return rapidjson::Value(); // unreachable } +// Recursively serialize an execution Value::LIST back into the binary blob +// format understood by ListView / ListColumn. +static std::string ValueToListBlob(const Value& list_val) { + assert(list_val.type().id() == DataTypeId::kList); + const DataType& child_type = ListType::GetChildType(list_val.type()); + const auto& children = ListValue::GetChildren(list_val); + + ListViewBuilder builder; + if (is_pod_type(child_type.id())) { + switch (child_type.id()) { +#define APPEND_POD(type_enum, cpp_type) \ + case DataTypeId::type_enum: \ + for (const auto& c : children) { \ + builder.append_pod(c.GetValue()); \ + } \ + return builder.finish_pod(); + APPEND_POD(kBoolean, bool) + APPEND_POD(kInt32, int32_t) + APPEND_POD(kInt64, int64_t) + APPEND_POD(kUInt32, uint32_t) + APPEND_POD(kUInt64, uint64_t) + APPEND_POD(kFloat, float) + APPEND_POD(kDouble, double) + APPEND_POD(kDate, date_t) + APPEND_POD(kTimestampMs, timestamp_ms_t) + APPEND_POD(kInterval, interval_t) +#undef APPEND_POD + default: + break; + } + } + // Non-POD: varchar or nested list + if (child_type.id() == DataTypeId::kVarchar) { + for (const auto& c : children) { + builder.append_blob(StringValue::Get(c)); + } + } else if (child_type.id() == DataTypeId::kList) { + for (const auto& c : children) { + builder.append_blob(ValueToListBlob(c)); + } + } else { + THROW_NOT_SUPPORTED_EXCEPTION( + "Unsupported list child type in ValueToListBlob: " + + std::to_string(static_cast(child_type.id()))); + } + return builder.finish_varlen(); +} + Property value_to_property(const Value& value) { switch (value.type().id()) { case DataTypeId::kBoolean: @@ -765,6 +814,10 @@ Property value_to_property(const Value& value) { return Property::from_datetime(value.GetValue()); case DataTypeId::kInterval: return Property::from_interval(value.GetValue()); + case DataTypeId::kList: { + std::string blob = ValueToListBlob(value); + return Property::from_list_data(blob); + } default: THROW_NOT_SUPPORTED_EXCEPTION( "Unexpected type: " + @@ -801,6 +854,10 @@ Value property_to_value(const Property& property, const DataType& type) { return Value::TIMESTAMPMS(property.as_datetime()); case DataTypeId::kInterval: return Value::INTERVAL(property.as_interval()); + case DataTypeId::kList: { + ListView lv(type, property.as_list_data()); + return ListViewToValue(lv); + } default: THROW_NOT_SUPPORTED_EXCEPTION( "Unexpected property type: " + std::to_string(property.type()) + @@ -877,6 +934,71 @@ void encode_value(const Value& val, Encoder& encoder) { } } +Value ListViewToValue(const neug::ListView& lv) { + const DataType& child_type = ListType::GetChildType(lv.type_); + const size_t n = lv.size(); + std::vector children; + children.reserve(n); + + switch (child_type.id()) { + case DataTypeId::kBoolean: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::BOOLEAN(lv.GetElem(i))); + break; + case DataTypeId::kInt32: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::INT32(lv.GetElem(i))); + break; + case DataTypeId::kInt64: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::INT64(lv.GetElem(i))); + break; + case DataTypeId::kUInt32: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::UINT32(lv.GetElem(i))); + break; + case DataTypeId::kUInt64: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::UINT64(lv.GetElem(i))); + break; + case DataTypeId::kFloat: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::FLOAT(lv.GetElem(i))); + break; + case DataTypeId::kDouble: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::DOUBLE(lv.GetElem(i))); + break; + case DataTypeId::kDate: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::DATE(lv.GetElem(i))); + break; + case DataTypeId::kTimestampMs: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::TIMESTAMPMS(lv.GetElem(i))); + break; + case DataTypeId::kInterval: + for (size_t i = 0; i < n; ++i) + children.push_back(Value::INTERVAL(lv.GetElem(i))); + break; + case DataTypeId::kVarchar: + for (size_t i = 0; i < n; ++i) { + auto sv = lv.GetChildStringView(i); + children.push_back(Value::STRING(std::string(sv))); + } + break; + case DataTypeId::kList: + for (size_t i = 0; i < n; ++i) + children.push_back(ListViewToValue(lv.GetChildListView(i))); + break; + default: + THROW_NOT_SUPPORTED_EXCEPTION("ListViewToValue: unsupported child type " + + child_type.ToString()); + } + + return Value::LIST(child_type, std::move(children)); +} + Value performCastToString(const Value& input) { std::string ret{}; switch (input.type().id()) { diff --git a/src/execution/expression/expr.cc b/src/execution/expression/expr.cc index 93d71f374..db851dd27 100644 --- a/src/execution/expression/expr.cc +++ b/src/execution/expression/expr.cc @@ -119,6 +119,20 @@ static std::unique_ptr build_expr( return std::make_unique(std::move(exprs_vec)); } + case ::common::ExprOpr::kToList: { + const auto& list_fields = opr.to_list().fields(); + std::vector> exprs_vec; + for (int i = 0; i < list_fields.size(); ++i) { + exprs_vec.emplace_back( + parse_expression(list_fields[i], ctx_meta, var_type)); + } + DataType list_type = opr.has_node_type() + ? parse_from_ir_data_type(opr.node_type()) + : DataType::List(exprs_vec[0]->type()); + return std::make_unique(std::move(exprs_vec), + std::move(list_type)); + } + case ::common::ExprOpr::kToDate: { Date date(opr.to_date().date_str()); return std::make_unique(Value::DATE(date)); @@ -317,6 +331,7 @@ std::unique_ptr parse_expression(const ::common::Expression& expr, case ::common::ExprOpr::kToDate: case ::common::ExprOpr::kToDatetime: case ::common::ExprOpr::kToTuple: + case ::common::ExprOpr::kToList: case ::common::ExprOpr::kScalarFunc: case ::common::ExprOpr::kPathFunc: { opr_stack2.push(*it); diff --git a/src/execution/expression/exprs/struct_expr.cc b/src/execution/expression/exprs/struct_expr.cc index 0bec6233a..ca8ce586e 100644 --- a/src/execution/expression/exprs/struct_expr.cc +++ b/src/execution/expression/exprs/struct_expr.cc @@ -15,6 +15,8 @@ #include "neug/execution/expression/exprs/struct_expr.h" +#include "neug/common/types.h" + namespace neug { namespace execution { class BindedTupleExpr : public VertexExprBase, @@ -64,5 +66,60 @@ std::unique_ptr TupleExpr::bind( } return std::make_unique(std::move(bound_exprs), type_); } + +ListExpr::ListExpr(std::vector>&& exprs, + DataType list_type) + : exprs_(std::move(exprs)), type_(std::move(list_type)) {} + +class BindedListExpr : public VertexExprBase, + public EdgeExprBase, + public RecordExprBase { + public: + BindedListExpr(std::vector>&& exprs, + const DataType& type) + : exprs_(std::move(exprs)), type_(type) {} + const DataType& type() const override { return type_; } + + Value eval_record(const Context& ctx, size_t idx) const override { + std::vector values; + for (const auto& expr : exprs_) { + values.push_back(expr->Cast().eval_record(ctx, idx)); + } + const DataType& child = ListType::GetChildType(type_); + return Value::LIST(child, std::move(values)); + } + + Value eval_vertex(label_t v_label, vid_t v_id) const override { + std::vector values; + for (const auto& expr : exprs_) { + values.push_back(expr->Cast().eval_vertex(v_label, v_id)); + } + const DataType& child = ListType::GetChildType(type_); + return Value::LIST(child, std::move(values)); + } + Value eval_edge(const LabelTriplet& label, vid_t src, vid_t dst, + const void* data_ptr) const override { + std::vector values; + for (const auto& expr : exprs_) { + values.push_back( + expr->Cast().eval_edge(label, src, dst, data_ptr)); + } + const DataType& child = ListType::GetChildType(type_); + return Value::LIST(child, std::move(values)); + } + + private: + std::vector> exprs_; + DataType type_; +}; + +std::unique_ptr ListExpr::bind(const IStorageInterface* storage, + const ParamsMap& params) const { + std::vector> bound_exprs; + for (const auto& expr : exprs_) { + bound_exprs.push_back(expr->bind(storage, params)); + } + return std::make_unique(std::move(bound_exprs), type_); +} } // namespace execution } // namespace neug \ No newline at end of file diff --git a/src/utils/pb_utils.cc b/src/utils/pb_utils.cc index 3122cd9f1..b73145395 100644 --- a/src/utils/pb_utils.cc +++ b/src/utils/pb_utils.cc @@ -209,8 +209,19 @@ bool data_type_to_property_type(const common::DataType& data_type, return temporal_type_to_property_type(data_type.temporal(), out_type); } case common::DataType::kArray: { - LOG(ERROR) << "Array type is not supported"; - return false; + // A List/Array property: recursively resolve the element type. + const auto& array_type = data_type.array(); + if (!array_type.has_component_type()) { + LOG(ERROR) << "Array type missing component type: " + << data_type.DebugString(); + return false; + } + DataType child_type; + if (!data_type_to_property_type(array_type.component_type(), child_type)) { + return false; + } + out_type = DataType::List(child_type); + break; } case common::DataType::kMap: { LOG(ERROR) << "Map type is not supported"; @@ -224,6 +235,7 @@ bool data_type_to_property_type(const common::DataType& data_type, LOG(ERROR) << "Unknown data type: " << data_type.DebugString(); return false; } + return true; } bool common_value_to_value(const DataType& type, const common::Value& value, @@ -279,6 +291,58 @@ bool common_value_to_value(const DataType& type, const common::Value& value, case common::Value::kDate: out_value = execution::Value::DATE(Date(value.date().item())); break; + case common::Value::kI32Array: { + // INT32[] default value + DataType child_type = ListType::GetChildType(type); + std::vector items; + for (auto v : value.i32_array().item()) { + items.emplace_back(execution::Value::INT32(v)); + } + out_value = execution::Value::LIST(child_type, std::move(items)); + break; + } + case common::Value::kI64Array: { + // INT64[] default value + DataType child_type = ListType::GetChildType(type); + std::vector items; + for (auto v : value.i64_array().item()) { + items.emplace_back(execution::Value::INT64(v)); + } + out_value = execution::Value::LIST(child_type, std::move(items)); + break; + } + case common::Value::kF64Array: { + // FLOAT[] / DOUBLE[] default value + DataType child_type = ListType::GetChildType(type); + std::vector items; + if (child_type.id() == DataTypeId::kFloat) { + for (auto v : value.f64_array().item()) { + items.emplace_back(execution::Value::FLOAT(static_cast(v))); + } + } else { + for (auto v : value.f64_array().item()) { + items.emplace_back(execution::Value::DOUBLE(v)); + } + } + out_value = execution::Value::LIST(child_type, std::move(items)); + break; + } + case common::Value::kStrArray: { + // STRING[] default value + DataType child_type = ListType::GetChildType(type); + std::vector items; + uint16_t max_len = STRING_DEFAULT_MAX_LENGTH; + if (child_type.RawExtraTypeInfo()) { + max_len = child_type.RawExtraTypeInfo() + ->Cast() + .max_length; + } + for (const auto& s : value.str_array().item()) { + items.emplace_back(execution::Value::VARCHAR(s, max_len)); + } + out_value = execution::Value::LIST(child_type, std::move(items)); + break; + } default: LOG(ERROR) << "Unknown value type: " << value.DebugString(); return false; diff --git a/src/utils/property/column.cc b/src/utils/property/column.cc index c1c6a8097..6b821b089 100644 --- a/src/utils/property/column.cc +++ b/src/utils/property/column.cc @@ -74,6 +74,9 @@ std::shared_ptr CreateColumn(DataType type) { case DataTypeId::kEmpty: { return std::make_shared>(); } + case DataTypeId::kList: { + return std::make_shared(type); + } default: { THROW_NOT_SUPPORTED_EXCEPTION("Unsupported type for column: " + type.ToString()); diff --git a/src/utils/property/property.cc b/src/utils/property/property.cc index b3946e43c..20b90d7a1 100644 --- a/src/utils/property/property.cc +++ b/src/utils/property/property.cc @@ -57,6 +57,12 @@ Property get_default_value(const DataTypeId& type) { case DataTypeId::kInterval: default_value.set_interval(Interval()); break; + case DataTypeId::kList: + // An empty list blob (no elements) serves as the default value for list + // properties. ListView::size() returns 0 when the blob is shorter than + // 4 bytes, so an empty string_view is a valid representation. + default_value.set_list_data(std::string_view{}); + break; default: THROW_NOT_SUPPORTED_EXCEPTION( "Unsupported property type for default value: " + std::to_string(type) + diff --git a/src/utils/yaml_utils.cc b/src/utils/yaml_utils.cc index e6ca90f3f..6ced4564c 100644 --- a/src/utils/yaml_utils.cc +++ b/src/utils/yaml_utils.cc @@ -76,6 +76,11 @@ YAML::Node property_type_to_yaml(const DataType& type) { case DataTypeId::kInterval: node["temporal"] = config_parsing::TemporalTypeToYAML(type.id()); break; + case DataTypeId::kList: { + auto child_type = ListType::GetChildType(type); + node["array"]["component_type"] = property_type_to_yaml(child_type); + break; + } default: THROW_INVALID_ARGUMENT_EXCEPTION( "Unrecognized property type for YAML encoding: " + type.ToString()); diff --git a/tests/storage/CMakeLists.txt b/tests/storage/CMakeLists.txt index aba1f79c0..0fd879c5d 100644 --- a/tests/storage/CMakeLists.txt +++ b/tests/storage/CMakeLists.txt @@ -20,4 +20,6 @@ add_neug_test(vertex_table_benchmark vertex_table_benchmark.cc) add_neug_test(test_vertex_table test_vertex_table.cc) -add_neug_test(edge_table_test test_edge_table.cc) \ No newline at end of file +add_neug_test(edge_table_test test_edge_table.cc) + +add_neug_test(test_list_column test_list_column.cc) \ No newline at end of file diff --git a/tests/storage/test_list_column.cc b/tests/storage/test_list_column.cc new file mode 100644 index 000000000..fb3718289 --- /dev/null +++ b/tests/storage/test_list_column.cc @@ -0,0 +1,527 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include "neug/common/types.h" +#include "neug/execution/common/types/value.h" +#include "neug/utils/property/column.h" +#include "neug/utils/property/list_view.h" +#include "neug/utils/property/property.h" + +using namespace neug; +using namespace neug::execution; + +// --------------------------------------------------------------------------- +// is_pod_type +// --------------------------------------------------------------------------- +TEST(IsPodTypeTest, PodIds) { + EXPECT_TRUE(is_pod_type(DataTypeId::kBoolean)); + EXPECT_TRUE(is_pod_type(DataTypeId::kInt8)); + EXPECT_TRUE(is_pod_type(DataTypeId::kInt16)); + EXPECT_TRUE(is_pod_type(DataTypeId::kInt32)); + EXPECT_TRUE(is_pod_type(DataTypeId::kInt64)); + EXPECT_TRUE(is_pod_type(DataTypeId::kUInt8)); + EXPECT_TRUE(is_pod_type(DataTypeId::kUInt16)); + EXPECT_TRUE(is_pod_type(DataTypeId::kUInt32)); + EXPECT_TRUE(is_pod_type(DataTypeId::kUInt64)); + EXPECT_TRUE(is_pod_type(DataTypeId::kFloat)); + EXPECT_TRUE(is_pod_type(DataTypeId::kDouble)); + EXPECT_TRUE(is_pod_type(DataTypeId::kDate)); + EXPECT_TRUE(is_pod_type(DataTypeId::kTimestampMs)); + EXPECT_TRUE(is_pod_type(DataTypeId::kInterval)); +} + +TEST(IsPodTypeTest, NonPodIds) { + EXPECT_FALSE(is_pod_type(DataTypeId::kVarchar)); + EXPECT_FALSE(is_pod_type(DataTypeId::kList)); + EXPECT_FALSE(is_pod_type(DataTypeId::kStruct)); +} + +// --------------------------------------------------------------------------- +// ListViewBuilder + ListView — POD element types +// --------------------------------------------------------------------------- +TEST(ListViewBuilderTest, Int32Roundtrip) { + DataType list_type = DataType::List(DataType(DataTypeId::kInt32)); + + ListViewBuilder b; + b.append_pod(10); + b.append_pod(20); + b.append_pod(30); + EXPECT_EQ(b.count(), 3u); + + std::string blob = b.finish_pod(); + + // Expected size: 4 (count) + 3*4 (ints) = 16 + EXPECT_EQ(blob.size(), 4u + 3u * sizeof(int32_t)); + + ListView lv(list_type, blob); + ASSERT_EQ(lv.size(), 3u); + EXPECT_EQ(lv.GetElem(0), 10); + EXPECT_EQ(lv.GetElem(1), 20); + EXPECT_EQ(lv.GetElem(2), 30); +} + +TEST(ListViewBuilderTest, DoubleRoundtrip) { + DataType list_type = DataType::List(DataType(DataTypeId::kDouble)); + + ListViewBuilder b; + b.append_pod(1.5); + b.append_pod(2.5); + std::string blob = b.finish_pod(); + + ListView lv(list_type, blob); + ASSERT_EQ(lv.size(), 2u); + EXPECT_DOUBLE_EQ(lv.GetElem(0), 1.5); + EXPECT_DOUBLE_EQ(lv.GetElem(1), 2.5); +} + +TEST(ListViewBuilderTest, BoolRoundtrip) { + DataType list_type = DataType::List(DataType(DataTypeId::kBoolean)); + + ListViewBuilder b; + b.append_pod(true); + b.append_pod(false); + b.append_pod(true); + std::string blob = b.finish_pod(); + + ListView lv(list_type, blob); + ASSERT_EQ(lv.size(), 3u); + EXPECT_EQ(lv.GetElem(0), true); + EXPECT_EQ(lv.GetElem(1), false); + EXPECT_EQ(lv.GetElem(2), true); +} + +TEST(ListViewBuilderTest, EmptyPodList) { + DataType list_type = DataType::List(DataType(DataTypeId::kInt64)); + + ListViewBuilder b; + std::string blob = b.finish_pod(); + + // size: 4 (count=0) + 0 elements = 4 + EXPECT_EQ(blob.size(), 4u); + + ListView lv(list_type, blob); + EXPECT_EQ(lv.size(), 0u); +} + +// --------------------------------------------------------------------------- +// ListViewBuilder + ListView — non-POD (varchar) elements +// --------------------------------------------------------------------------- +TEST(ListViewBuilderTest, VarcharRoundtrip) { + DataType list_type = DataType::List(DataType::Varchar(256)); + + ListViewBuilder b; + b.append_blob("hello"); + b.append_blob("world"); + b.append_blob("foo"); + EXPECT_EQ(b.count(), 3u); + + std::string blob = b.finish_varlen(); + + ListView lv(list_type, blob); + ASSERT_EQ(lv.size(), 3u); + EXPECT_EQ(lv.GetChildStringView(0), "hello"); + EXPECT_EQ(lv.GetChildStringView(1), "world"); + EXPECT_EQ(lv.GetChildStringView(2), "foo"); +} + +TEST(ListViewBuilderTest, VarcharEmptyList) { + DataType list_type = DataType::List(DataType::Varchar(256)); + + ListViewBuilder b; + std::string blob = b.finish_varlen(); + + ListView lv(list_type, blob); + EXPECT_EQ(lv.size(), 0u); +} + +TEST(ListViewBuilderTest, VarcharWithEmptyElement) { + DataType list_type = DataType::List(DataType::Varchar(256)); + + ListViewBuilder b; + b.append_blob(""); + b.append_blob("abc"); + b.append_blob(""); + std::string blob = b.finish_varlen(); + + ListView lv(list_type, blob); + ASSERT_EQ(lv.size(), 3u); + EXPECT_EQ(lv.GetChildStringView(0), ""); + EXPECT_EQ(lv.GetChildStringView(1), "abc"); + EXPECT_EQ(lv.GetChildStringView(2), ""); +} + +// --------------------------------------------------------------------------- +// Nested List> +// --------------------------------------------------------------------------- +TEST(ListViewBuilderTest, NestedIntList) { + DataType inner_type = DataType::List(DataType(DataTypeId::kInt32)); + DataType outer_type = DataType::List(inner_type); + + // Build inner blobs + ListViewBuilder inner_b0; + inner_b0.append_pod(1); + inner_b0.append_pod(2); + std::string inner0 = inner_b0.finish_pod(); + + ListViewBuilder inner_b1; + inner_b1.append_pod(3); + inner_b1.append_pod(4); + inner_b1.append_pod(5); + std::string inner1 = inner_b1.finish_pod(); + + // Build outer blob + ListViewBuilder outer_b; + outer_b.append_blob(inner0); + outer_b.append_blob(inner1); + std::string outer_blob = outer_b.finish_varlen(); + + ListView outer(outer_type, outer_blob); + ASSERT_EQ(outer.size(), 2u); + + ListView child0 = outer.GetChildListView(0); + ASSERT_EQ(child0.size(), 2u); + EXPECT_EQ(child0.GetElem(0), 1); + EXPECT_EQ(child0.GetElem(1), 2); + + ListView child1 = outer.GetChildListView(1); + ASSERT_EQ(child1.size(), 3u); + EXPECT_EQ(child1.GetElem(0), 3); + EXPECT_EQ(child1.GetElem(1), 4); + EXPECT_EQ(child1.GetElem(2), 5); +} + +// --------------------------------------------------------------------------- +// ListViewBuilder reset() reuse +// --------------------------------------------------------------------------- +TEST(ListViewBuilderTest, ResetReuse) { + DataType list_type = DataType::List(DataType(DataTypeId::kInt32)); + ListViewBuilder b; + + b.append_pod(100); + b.append_pod(200); + EXPECT_EQ(b.count(), 2u); + + b.reset(); + EXPECT_EQ(b.count(), 0u); + + b.append_pod(42); + std::string blob = b.finish_pod(); + ListView lv(list_type, blob); + ASSERT_EQ(lv.size(), 1u); + EXPECT_EQ(lv.GetElem(0), 42); +} + +// --------------------------------------------------------------------------- +// ListColumn — set / get +// --------------------------------------------------------------------------- +TEST(ListColumnTest, SetAndGetIntList) { + std::string tmp_dir = "/tmp/test_list_column_int"; + if (std::filesystem::exists(tmp_dir)) { + std::filesystem::remove_all(tmp_dir); + } + std::filesystem::create_directories(tmp_dir); + + DataType list_type = DataType::List(DataType(DataTypeId::kInt32)); + ListColumn col(list_type); + col.open("col", tmp_dir, tmp_dir); + col.resize(4); + + // Row 0: [10, 20, 30] + { + ListViewBuilder b; + b.append_pod(10); + b.append_pod(20); + b.append_pod(30); + col.set_value(0, b.finish_pod()); + } + // Row 1: [] + { + ListViewBuilder b; + col.set_value(1, b.finish_pod()); + } + // Row 2: [99] + { + ListViewBuilder b; + b.append_pod(99); + col.set_value(2, b.finish_pod()); + } + // Row 3: [-1, -2] + { + ListViewBuilder b; + b.append_pod(-1); + b.append_pod(-2); + col.set_value(3, b.finish_pod()); + } + + // Verify + { + ListView lv = col.get_view(0); + ASSERT_EQ(lv.size(), 3u); + EXPECT_EQ(lv.GetElem(0), 10); + EXPECT_EQ(lv.GetElem(1), 20); + EXPECT_EQ(lv.GetElem(2), 30); + } + { + ListView lv = col.get_view(1); + EXPECT_EQ(lv.size(), 0u); + } + { + ListView lv = col.get_view(2); + ASSERT_EQ(lv.size(), 1u); + EXPECT_EQ(lv.GetElem(0), 99); + } + { + ListView lv = col.get_view(3); + ASSERT_EQ(lv.size(), 2u); + EXPECT_EQ(lv.GetElem(0), -1); + EXPECT_EQ(lv.GetElem(1), -2); + } +} + +TEST(ListColumnTest, SetAndGetVarcharList) { + std::string tmp_dir = "/tmp/test_list_column_varchar"; + if (std::filesystem::exists(tmp_dir)) { + std::filesystem::remove_all(tmp_dir); + } + std::filesystem::create_directories(tmp_dir); + + DataType list_type = DataType::List(DataType::Varchar(256)); + ListColumn col(list_type); + col.open("col", tmp_dir, tmp_dir); + col.resize(2); + + // Row 0: ["alice", "bob"] + { + ListViewBuilder b; + b.append_blob("alice"); + b.append_blob("bob"); + col.set_value(0, b.finish_varlen()); + } + // Row 1: ["single"] + { + ListViewBuilder b; + b.append_blob("single"); + col.set_value(1, b.finish_varlen()); + } + + { + ListView lv = col.get_view(0); + ASSERT_EQ(lv.size(), 2u); + EXPECT_EQ(lv.GetChildStringView(0), "alice"); + EXPECT_EQ(lv.GetChildStringView(1), "bob"); + } + { + ListView lv = col.get_view(1); + ASSERT_EQ(lv.size(), 1u); + EXPECT_EQ(lv.GetChildStringView(0), "single"); + } +} + +// --------------------------------------------------------------------------- +// ListColumn — dump and reload from disk +// --------------------------------------------------------------------------- +TEST(ListColumnTest, DumpAndReload) { + std::string work_dir = "/tmp/test_list_column_dump_work"; + std::string snap_dir = "/tmp/test_list_column_dump_snap"; + if (std::filesystem::exists(work_dir)) { + std::filesystem::remove_all(work_dir); + } + if (std::filesystem::exists(snap_dir)) { + std::filesystem::remove_all(snap_dir); + } + std::filesystem::create_directories(work_dir); + std::filesystem::create_directories(snap_dir); + + DataType list_type = DataType::List(DataType(DataTypeId::kInt64)); + + // Write phase: open with work_dir, then dump to snap_dir + { + ListColumn col(list_type); + col.open("scores", work_dir, work_dir); + col.resize(3); + + ListViewBuilder b; + b.append_pod(100L); + b.append_pod(200L); + col.set_value(0, b.finish_pod()); + + b.reset(); + b.append_pod(300L); + col.set_value(1, b.finish_pod()); + + b.reset(); + col.set_value(2, b.finish_pod()); // empty + + col.dump(snap_dir + "/scores"); + } + + // Reload phase: open with snap_dir as snapshot + { + ListColumn col(list_type); + col.open("scores", snap_dir, ""); + + { + ListView lv = col.get_view(0); + ASSERT_EQ(lv.size(), 2u); + EXPECT_EQ(lv.GetElem(0), 100L); + EXPECT_EQ(lv.GetElem(1), 200L); + } + { + ListView lv = col.get_view(1); + ASSERT_EQ(lv.size(), 1u); + EXPECT_EQ(lv.GetElem(0), 300L); + } + { + ListView lv = col.get_view(2); + EXPECT_EQ(lv.size(), 0u); + } + } +} + +// --------------------------------------------------------------------------- +// ListViewToValue bridge +// --------------------------------------------------------------------------- +TEST(ListViewToValueTest, Int32ToValue) { + DataType list_type = DataType::List(DataType(DataTypeId::kInt32)); + + ListViewBuilder b; + b.append_pod(5); + b.append_pod(6); + b.append_pod(7); + std::string blob = b.finish_pod(); + + ListView lv(list_type, blob); + Value val = ListViewToValue(lv); + + ASSERT_EQ(val.type().id(), DataTypeId::kList); + const auto& children = ListValue::GetChildren(val); + ASSERT_EQ(children.size(), 3u); + EXPECT_EQ(children[0].GetValue(), 5); + EXPECT_EQ(children[1].GetValue(), 6); + EXPECT_EQ(children[2].GetValue(), 7); +} + +TEST(ListViewToValueTest, DoubleToValue) { + DataType list_type = DataType::List(DataType(DataTypeId::kDouble)); + + ListViewBuilder b; + b.append_pod(3.14); + b.append_pod(2.72); + std::string blob = b.finish_pod(); + + ListView lv(list_type, blob); + Value val = ListViewToValue(lv); + + ASSERT_EQ(val.type().id(), DataTypeId::kList); + const auto& children = ListValue::GetChildren(val); + ASSERT_EQ(children.size(), 2u); + EXPECT_DOUBLE_EQ(children[0].GetValue(), 3.14); + EXPECT_DOUBLE_EQ(children[1].GetValue(), 2.72); +} + +TEST(ListViewToValueTest, VarcharToValue) { + DataType list_type = DataType::List(DataType::Varchar(256)); + + ListViewBuilder b; + b.append_blob("foo"); + b.append_blob("bar"); + std::string blob = b.finish_varlen(); + + ListView lv(list_type, blob); + Value val = ListViewToValue(lv); + + ASSERT_EQ(val.type().id(), DataTypeId::kList); + const auto& children = ListValue::GetChildren(val); + ASSERT_EQ(children.size(), 2u); + EXPECT_EQ(StringValue::Get(children[0]), "foo"); + EXPECT_EQ(StringValue::Get(children[1]), "bar"); +} + +TEST(ListViewToValueTest, EmptyListToValue) { + DataType list_type = DataType::List(DataType(DataTypeId::kInt32)); + + ListViewBuilder b; + std::string blob = b.finish_pod(); + + ListView lv(list_type, blob); + Value val = ListViewToValue(lv); + + ASSERT_EQ(val.type().id(), DataTypeId::kList); + const auto& children = ListValue::GetChildren(val); + EXPECT_EQ(children.size(), 0u); +} + +TEST(ListViewToValueTest, NestedListToValue) { + DataType inner_type = DataType::List(DataType(DataTypeId::kInt32)); + DataType outer_type = DataType::List(inner_type); + + ListViewBuilder inner_b; + inner_b.append_pod(1); + inner_b.append_pod(2); + std::string inner_blob = inner_b.finish_pod(); + + ListViewBuilder outer_b; + outer_b.append_blob(inner_blob); + std::string outer_blob = outer_b.finish_varlen(); + + ListView outer_lv(outer_type, outer_blob); + Value outer_val = ListViewToValue(outer_lv); + + ASSERT_EQ(outer_val.type().id(), DataTypeId::kList); + const auto& outer_children = ListValue::GetChildren(outer_val); + ASSERT_EQ(outer_children.size(), 1u); + + const Value& inner_val = outer_children[0]; + ASSERT_EQ(inner_val.type().id(), DataTypeId::kList); + const auto& inner_children = ListValue::GetChildren(inner_val); + ASSERT_EQ(inner_children.size(), 2u); + EXPECT_EQ(inner_children[0].GetValue(), 1); + EXPECT_EQ(inner_children[1].GetValue(), 2); +} + +// --------------------------------------------------------------------------- +// Property extensions — from_list_data / as_list_data / set_list_data +// --------------------------------------------------------------------------- +TEST(PropertyListTest, FromAndAsListData) { + DataType list_type = DataType::List(DataType(DataTypeId::kInt32)); + ListViewBuilder b; + b.append_pod(1); + b.append_pod(2); + std::string blob = b.finish_pod(); + + Property p = Property::from_list_data(blob); + EXPECT_EQ(p.type(), DataTypeId::kList); + EXPECT_EQ(p.as_list_data(), blob); +} + +TEST(PropertyListTest, SetListData) { + DataType list_type = DataType::List(DataType::Varchar(64)); + ListViewBuilder b; + b.append_blob("x"); + b.append_blob("y"); + std::string blob = b.finish_varlen(); + + Property p; + p.set_list_data(blob); + EXPECT_EQ(p.type(), DataTypeId::kList); + EXPECT_EQ(p.as_list_data(), blob); +} diff --git a/tools/python_bind/neug/proto/basic_type_pb2.py b/tools/python_bind/neug/proto/basic_type_pb2.py index ead7caed0..769b31c01 100644 --- a/tools/python_bind/neug/proto/basic_type_pb2.py +++ b/tools/python_bind/neug/proto/basic_type_pb2.py @@ -19,7 +19,6 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "basic_type_pb2", globals()) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b"\n!com.alibaba.graphscope.proto.typeB\006Common" _PRIMITIVETYPE._serialized_start = 1613 diff --git a/tools/python_bind/neug/proto/common_pb2.py b/tools/python_bind/neug/proto/common_pb2.py index 77eabbf7c..0725ff5d4 100644 --- a/tools/python_bind/neug/proto/common_pb2.py +++ b/tools/python_bind/neug/proto/common_pb2.py @@ -19,7 +19,6 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "common_pb2", globals()) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b"\n!com.alibaba.graphscope.gaia.protoB\006Common" _NONE._serialized_start = 24 diff --git a/tools/python_bind/neug/proto/error_pb2.py b/tools/python_bind/neug/proto/error_pb2.py index fd5fe2167..83c784676 100644 --- a/tools/python_bind/neug/proto/error_pb2.py +++ b/tools/python_bind/neug/proto/error_pb2.py @@ -19,7 +19,6 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "error_pb2", globals()) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b"\n(com.alibaba.graphscope.interactive.protoP\001" _CODE._serialized_start = 37 diff --git a/tools/python_bind/neug/proto/expr_pb2.py b/tools/python_bind/neug/proto/expr_pb2.py index 8fe0692a2..16233ea1b 100644 --- a/tools/python_bind/neug/proto/expr_pb2.py +++ b/tools/python_bind/neug/proto/expr_pb2.py @@ -23,7 +23,6 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "expr_pb2", globals()) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = ( b"\n!com.alibaba.graphscope.gaia.protoB\017OuterExpression" diff --git a/tools/python_bind/neug/proto/results_pb2.py b/tools/python_bind/neug/proto/results_pb2.py index 476406a73..529b4447a 100644 --- a/tools/python_bind/neug/proto/results_pb2.py +++ b/tools/python_bind/neug/proto/results_pb2.py @@ -23,7 +23,6 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "results_pb2", globals()) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = ( b"\n!com.alibaba.graphscope.gaia.protoB\010IrResult" diff --git a/tools/python_bind/neug/proto/type_pb2.py b/tools/python_bind/neug/proto/type_pb2.py index eb418904c..092e984ee 100644 --- a/tools/python_bind/neug/proto/type_pb2.py +++ b/tools/python_bind/neug/proto/type_pb2.py @@ -24,7 +24,6 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "type_pb2", globals()) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = ( b"\n!com.alibaba.graphscope.gaia.protoB\010DataType" diff --git a/tools/python_bind/setup.py b/tools/python_bind/setup.py index 30c104868..470ab42da 100644 --- a/tools/python_bind/setup.py +++ b/tools/python_bind/setup.py @@ -80,7 +80,6 @@ def __init__(self, name: str, sourcedir: str = "") -> None: class CMakeBuild(build_ext): - def initialize_options(self): super().initialize_options() # We set the build_temp to the local build/ directory diff --git a/tools/python_bind/tests/test_ddl.py b/tools/python_bind/tests/test_ddl.py index 9765039fb..8c047ddcb 100644 --- a/tools/python_bind/tests/test_ddl.py +++ b/tools/python_bind/tests/test_ddl.py @@ -487,3 +487,20 @@ def test_drop_add_edge_table_column(): # assert list(ret) == [["unknown"], ["unknown"], ["unknown"], ["unknown"], ["test"]] conn2.close() db2.close() + + +def test_list_type(): + db_dir = "/tmp/test_list_type" + shutil.rmtree(db_dir, ignore_errors=True) + db = Database(db_dir, "w") + conn = db.connect() + res = conn.execute("Return ['tag1', 'tag2'];") + assert list(res) == [[["tag1", "tag2"]]] + conn.execute( + "CREATE NODE TABLE TestNode(id INT64, tags STRING[], PRIMARY KEY(id));" + ) + conn.execute("CREATE (:TestNode {id: 1, tags: ['tag1', 'tag2']});") + res = conn.execute("Match (n:TestNode) Return n.tags;") + assert list(res) == [[["tag1", "tag2"]]] + conn.close() + db.close() diff --git a/tools/python_bind/tests/test_lsqb.py b/tools/python_bind/tests/test_lsqb.py index 3335a8333..00d578869 100644 --- a/tools/python_bind/tests/test_lsqb.py +++ b/tools/python_bind/tests/test_lsqb.py @@ -60,7 +60,6 @@ def tearDown(self): self.db.close() def test_queries(self): - submit_cypher_query( conn=self.conn, query="MATCH (n:Country) return n limit 10;",