diff --git a/include/my_base.h b/include/my_base.h index 413a4cf6e594..c224cf80a02b 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -105,10 +105,11 @@ enum ha_key_alg { SEs default algorithm for keys in mysql_prepare_create_table(). */ HA_KEY_ALG_SE_SPECIFIC = 0, - HA_KEY_ALG_BTREE = 1, /* B-tree. */ - HA_KEY_ALG_RTREE = 2, /* R-tree, for spatial searches */ - HA_KEY_ALG_HASH = 3, /* HASH keys (HEAP, NDB). */ - HA_KEY_ALG_FULLTEXT = 4 /* FULLTEXT. */ + HA_KEY_ALG_BTREE = 1, /* B-tree. */ + HA_KEY_ALG_RTREE = 2, /* R-tree, for spatial searches */ + HA_KEY_ALG_HASH = 3, /* HASH keys (HEAP, NDB). */ + HA_KEY_ALG_FULLTEXT = 4, /* FULLTEXT. */ + HA_KEY_ALG_VECTOR = 5 /* VECTOR. */ }; /* Storage media types */ @@ -521,6 +522,8 @@ enum ha_base_keytype { #define HA_USES_COMMENT (1 << 12) /** Key was automatically created to support Foreign Key constraint. */ #define HA_GENERATED_KEY (1 << 13) +/** Vector key (Percona). */ +#define HA_VECTOR (1 << 30) /** TokuDB CLUSTERING key */ #define HA_CLUSTERING (1 << 31) @@ -528,7 +531,7 @@ enum ha_base_keytype { #define HA_KEYFLAG_MASK \ (HA_NOSAME | HA_PACK_KEY | HA_AUTO_KEY | HA_BINARY_PACK_KEY | HA_FULLTEXT | \ HA_UNIQUE_CHECK | HA_SPATIAL | HA_NULL_ARE_EQUAL | HA_GENERATED_KEY | \ - HA_CLUSTERING) + HA_VECTOR | HA_CLUSTERING) /** Fulltext index uses [pre]parser */ #define HA_USES_PARSER (1 << 14) @@ -845,7 +848,7 @@ is the global server default. */ have been disabled. The most important parameters set here is records per key on - all indexes. block_size and primary key ref_length. + all indexes. block_size and primar key ref_length. For each index there is an array of rec_per_key. As an example if we have an index with three attributes a,b and c diff --git a/mysql-test/suite/percona/r/vector_index_syntax.result b/mysql-test/suite/percona/r/vector_index_syntax.result new file mode 100644 index 000000000000..2671ef1fbcff --- /dev/null +++ b/mysql-test/suite/percona/r/vector_index_syntax.result @@ -0,0 +1,136 @@ +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6 }' +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `v1` (`v1`) /*!80021 SECONDARY_ENGINE_ATTRIBUTE '{"M": 6, "type": "hnsw"}' */ +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +SHOW INDEXES FROM t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Visible Expression +t1 0 PRIMARY 1 id A 0 NULL NULL BTREE YES NULL +t1 1 v1 1 v1 A 0 1 NULL YES VECTOR YES NULL +ALTER TABLE t1 DROP PRIMARY KEY; +ERROR HY000: Vector index can only be created in tables with a BIGINT UNSIGNED primary key. +ALTER TABLE t1 DROP INDEX v1; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +SHOW INDEXES FROM t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Visible Expression +t1 0 PRIMARY 1 id A 0 NULL NULL BTREE YES NULL +DROP TABLE t1; +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ) +); +CREATE VECTOR INDEX v1 ON t1( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6 }'; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `v1` (`v1`) /*!80021 SECONDARY_ENGINE_ATTRIBUTE '{"M": 6, "type": "hnsw"}' */ +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +SHOW INDEXES FROM t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Visible Expression +t1 0 PRIMARY 1 id A 0 NULL NULL BTREE YES NULL +t1 1 v1 1 v1 A 0 1 NULL YES VECTOR YES NULL +ALTER TABLE t1 DROP INDEX v1; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +SHOW INDEXES FROM t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Visible Expression +t1 0 PRIMARY 1 id A 0 NULL NULL BTREE YES NULL +DROP TABLE t1; +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) +); +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: type" at pos 1: '#' +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : "6" }' +); +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: type" at pos 3: '#/M' +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6.0 }' +); +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: type" at pos 3: '#/M' +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : [] }' +); +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: type" at pos 3: '#/M' +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "no such" : "key" }' +); +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: additionalProperties" at pos 11: '#/no%20such' +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ), +VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6, "no such" : "key" }' +); +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: additionalProperties" at pos 11: '#/no%20such' +CREATE TABLE t1 ( +id BIGINT UNSIGNED PRIMARY KEY, +v1 VECTOR( 1234 ) +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +CREATE VECTOR INDEX idx_v1 ON t1 (v1) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : "6" }'; +ERROR HY000: Invalid json attribute, error: "Validation failed for vector index options: type" at pos 3: '#/M' +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +SHOW INDEXES FROM t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Visible Expression +t1 0 PRIMARY 1 id A 0 NULL NULL BTREE YES NULL +CREATE VECTOR INDEX idx_v1 ON t1 (v1) +SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 1, "ef_construction" : 2, "max_elements" : 3 }'; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` bigint unsigned NOT NULL, + `v1` vector(1234) DEFAULT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `idx_v1` (`v1`) /*!80021 SECONDARY_ENGINE_ATTRIBUTE '{"M": 1, "type": "hnsw", "max_elements": 3, "ef_construction": 2}' */ +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +SHOW INDEXES FROM t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Visible Expression +t1 0 PRIMARY 1 id A 0 NULL NULL BTREE YES NULL +t1 1 idx_v1 1 v1 A 0 1 NULL YES VECTOR YES NULL +ALTER TABLE t1 DROP PRIMARY KEY; +ERROR HY000: Vector index can only be created in tables with a BIGINT UNSIGNED primary key. +DROP TABLE t1; diff --git a/mysql-test/suite/percona/t/vector_index_syntax.test b/mysql-test/suite/percona/t/vector_index_syntax.test new file mode 100644 index 000000000000..84d15c450fb0 --- /dev/null +++ b/mysql-test/suite/percona/t/vector_index_syntax.test @@ -0,0 +1,96 @@ +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6 }' +); +SHOW CREATE TABLE t1; +SHOW INDEXES FROM t1; + +--error ER_VECTOR_INDEX_NEEDS_PK +ALTER TABLE t1 DROP PRIMARY KEY; + +ALTER TABLE t1 DROP INDEX v1; +SHOW CREATE TABLE t1; +SHOW INDEXES FROM t1; + +DROP TABLE t1; + + +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ) +); + +CREATE VECTOR INDEX v1 ON t1( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6 }'; +SHOW CREATE TABLE t1; +SHOW INDEXES FROM t1; + +ALTER TABLE t1 DROP INDEX v1; +SHOW CREATE TABLE t1; +SHOW INDEXES FROM t1; + +DROP TABLE t1; + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) +); + + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : "6" }' +); + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6.0 }' +); + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : [] }' +); + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "no such" : "key" }' +); + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ), + VECTOR KEY( v1 ) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 6, "no such" : "key" }' +); + +CREATE TABLE t1 ( + id BIGINT UNSIGNED PRIMARY KEY, + v1 VECTOR( 1234 ) +); +SHOW CREATE TABLE t1; + +--error ER_INVALID_JSON_ATTRIBUTE +CREATE VECTOR INDEX idx_v1 ON t1 (v1) SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : "6" }'; +SHOW CREATE TABLE t1; +SHOW INDEXES FROM t1; + +CREATE VECTOR INDEX idx_v1 ON t1 (v1) +SECONDARY_ENGINE_ATTRIBUTE '{ "type" : "hnsw", "M" : 1, "ef_construction" : 2, "max_elements" : 3 }'; +SHOW CREATE TABLE t1; +SHOW INDEXES FROM t1; + +--error ER_VECTOR_INDEX_NEEDS_PK +ALTER TABLE t1 DROP PRIMARY KEY; + +DROP TABLE t1; diff --git a/share/messages_to_clients.txt b/share/messages_to_clients.txt index 7e1ac246d6c1..9e254d0b8fec 100644 --- a/share/messages_to_clients.txt +++ b/share/messages_to_clients.txt @@ -11043,6 +11043,12 @@ ER_LOG_NAME_NOT_MATCHING_SEC_LOG_PATH_CLIENT # Start of Percona Server 8.4/9.7 error messages to be sent to client # +ER_VECTOR_INDEX_NEEDS_PK + eng "Vector index can only be created in tables with a BIGINT UNSIGNED primary key." + +ER_ONLY_SINGLE_VECTOR_INDEX_ALLOWED + eng "A table can have at most one vector index." + start-error-number 7100 # diff --git a/sql/create_field.cc b/sql/create_field.cc index 6bce8626d3a5..59aa1fd7b571 100644 --- a/sql/create_field.cc +++ b/sql/create_field.cc @@ -23,6 +23,7 @@ #include "sql/create_field.h" +#include "field_types.h" #include "m_string.h" #include "mysql/strings/dtoa.h" #include "sql-common/my_decimal.h" @@ -774,7 +775,8 @@ size_t Create_field::key_length() const { case MYSQL_TYPE_JSON: case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_STRING: - case MYSQL_TYPE_VARCHAR: { + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VECTOR: { return std::min(max_display_width_in_bytes(), static_cast(MAX_FIELD_BLOBLENGTH)); } @@ -788,10 +790,6 @@ size_t Create_field::key_length() const { } return pack_length() + (max_display_width_in_bytes() & 7 ? 1 : 0); } - /* LCOV_EXCL_START */ - case MYSQL_TYPE_VECTOR: - assert(false); // Key on VECTOR type column is not supported. - /* LCOV_EXCL_STOP */ default: { return pack_length(is_array); } diff --git a/sql/dd/dd_table.cc b/sql/dd/dd_table.cc index 41eba2091944..5ee9fca44348 100644 --- a/sql/dd/dd_table.cc +++ b/sql/dd/dd_table.cc @@ -812,6 +812,9 @@ static dd::Index::enum_index_algorithm dd_get_new_index_algorithm_type( case HA_KEY_ALG_FULLTEXT: return dd::Index::IA_FULLTEXT; + + case HA_KEY_ALG_VECTOR: + return dd::Index::IA_VECTOR; } /* purecov: begin deadcode */ @@ -823,6 +826,8 @@ static dd::Index::enum_index_algorithm dd_get_new_index_algorithm_type( } static dd::Index::enum_index_type dd_get_new_index_type(const KEY *key) { + if (key->flags & HA_VECTOR) return dd::Index::IT_VECTOR; + if (key->flags & HA_FULLTEXT) return dd::Index::IT_FULLTEXT; if (key->flags & HA_SPATIAL) return dd::Index::IT_SPATIAL; @@ -907,6 +912,7 @@ static void fill_dd_index_elements_from_key_parts( case dd::Index::IT_MULTIPLE: case dd::Index::IT_FULLTEXT: case dd::Index::IT_SPATIAL: + case dd::Index::IT_VECTOR: if (key_part == key_parts) const_cast(key_col_obj) ->set_column_key(dd::Column::CK_MULTIPLE); diff --git a/sql/dd/impl/tables/indexes.cc b/sql/dd/impl/tables/indexes.cc index 28cb7cc09ace..ca9ae3b9645c 100644 --- a/sql/dd/impl/tables/indexes.cc +++ b/sql/dd/impl/tables/indexes.cc @@ -62,7 +62,8 @@ Indexes::Indexes() { " 'UNIQUE',\n" " 'MULTIPLE',\n" " 'FULLTEXT',\n" - " 'SPATIAL'\n" + " 'SPATIAL',\n" + " 'VECTOR'\n" ") NOT NULL"); m_target_def.add_field(FIELD_ALGORITHM, "FIELD_ALGORITHM", "algorithm ENUM(\n" @@ -70,7 +71,8 @@ Indexes::Indexes() { " 'BTREE',\n" " 'RTREE',\n" " 'HASH',\n" - " 'FULLTEXT'\n" + " 'FULLTEXT',\n" + " 'VECTOR'\n" ") NOT NULL"); m_target_def.add_field(FIELD_IS_ALGORITHM_EXPLICIT, "FIELD_IS_ALGORITHM_EXPLICIT", diff --git a/sql/dd/types/index.h b/sql/dd/types/index.h index 58b50119ef10..38196897bd42 100644 --- a/sql/dd/types/index.h +++ b/sql/dd/types/index.h @@ -61,7 +61,8 @@ class Index : virtual public Entity_object { IT_UNIQUE, IT_MULTIPLE, IT_FULLTEXT, - IT_SPATIAL + IT_SPATIAL, + IT_VECTOR }; enum enum_index_algorithm // similar to ha_key_alg @@ -70,7 +71,8 @@ class Index : virtual public Entity_object { IA_BTREE, IA_RTREE, IA_HASH, - IA_FULLTEXT + IA_FULLTEXT, + IA_VECTOR }; public: diff --git a/sql/dd_table_share.cc b/sql/dd_table_share.cc index f595eb26b0f3..266d519cc647 100644 --- a/sql/dd_table_share.cc +++ b/sql/dd_table_share.cc @@ -228,6 +228,9 @@ static enum ha_key_alg dd_get_old_index_algorithm_type( case dd::Index::IA_FULLTEXT: return HA_KEY_ALG_FULLTEXT; + case dd::Index::IA_VECTOR: + return HA_KEY_ALG_VECTOR; + default: assert(!"Should not hit here"); /* purecov: deadcode */ } @@ -347,6 +350,8 @@ static bool prepare_share(THD *thd, TABLE_SHARE *share, share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT); assert(!(share->key_info[key].flags & HA_SPATIAL) || share->key_info[key].algorithm == HA_KEY_ALG_RTREE); + assert(!(share->key_info[key].flags & HA_VECTOR) || + share->key_info[key].algorithm == HA_KEY_ALG_VECTOR); if (primary_key >= MAX_KEY && (keyinfo->flags & HA_NOSAME)) { /* @@ -1395,6 +1400,9 @@ static bool fill_index_from_dd(THD *thd, TABLE_SHARE *share, case dd::Index::IT_SPATIAL: keyinfo->flags = HA_SPATIAL; break; + case dd::Index::IT_VECTOR: + keyinfo->flags = HA_VECTOR; + break; case dd::Index::IT_PRIMARY: case dd::Index::IT_UNIQUE: keyinfo->flags = HA_NOSAME; diff --git a/sql/field.cc b/sql/field.cc index 755ded932f5f..81a18c06ff99 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -34,6 +34,7 @@ #include #include "decimal.h" +#include "field_types.h" #include "my_alloc.h" #include "my_byteorder.h" #include "my_compare.h" @@ -75,7 +76,7 @@ #include "sql/mysqld_cs.h" #include "sql/protocol.h" #include "sql/psi_memory_key.h" -#include "sql/spatial.h" // Geometry +#include "sql/spatial.h" // Geometry #include "sql/sql_base.h" #include "sql/sql_class.h" // THD #include "sql/sql_exception_handler.h" // handle_std_exception @@ -1656,6 +1657,7 @@ bool Field::type_can_have_key_part(enum enum_field_types type) { case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_STRING: case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_VECTOR: return true; default: return false; diff --git a/sql/handler.h b/sql/handler.h index f8dcfd5d831b..9c31bbb7b1ce 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -535,6 +535,7 @@ enum class SelectExecutedIn : bool { kPrimaryEngine, kSecondaryEngine }; ANALYZE TABLE on it */ #define HA_ONLINE_ANALYZE (1LL << 56) +#define HA_CAN_VECTOR (1LL << 57) /* Bits in index_flags(index_number) for what you can do with index. @@ -2160,6 +2161,10 @@ using fix_default_table_encryption_t = bool (*)(ulong value, bool is_starting); */ typedef bool (*redo_log_set_state_t)(THD *thd, bool enable); +struct HA_CREATE_INFO; +using validate_engine_attributes_t = bool (*)(THD *thd, const char *db_name, + HA_CREATE_INFO *create_info, + const Alter_info *alter_info); /** @brief Retrieve ha_statistics from SE. @@ -3038,6 +3043,7 @@ struct handlerton { fix_tablespaces_empty_uuid_t fix_tablespaces_empty_uuid; fix_default_table_encryption_t fix_default_table_encryption; redo_log_set_state_t redo_log_set_state; + validate_engine_attributes_t validate_engine_attributes{nullptr}; get_table_statistics_t get_table_statistics; get_column_statistics_t get_column_statistics; @@ -3299,7 +3305,6 @@ inline constexpr const decltype(handlerton::flags) inline constexpr const decltype(handlerton::flags) HTON_SECONDARY_SUPPORTS_TEMPORARY_TABLE(1 << 25); - /** Start of Percona specific HTON_* defines */ /** @@ -3322,7 +3327,6 @@ inline constexpr const decltype(handlerton::flags) /** End of Percona specific HTON_* defines */ - /* Whether the handlerton is a secondary engine. */ inline bool hton_is_secondary_engine(const handlerton *hton) { return hton != nullptr && (hton->flags & HTON_IS_SECONDARY_ENGINE) != 0U; @@ -7050,16 +7054,16 @@ class handler { for details. */ [[nodiscard]] int ha_fast_update(THD *thd, - mem_root_deque &update_fields, - mem_root_deque &update_values, - Item *conds); + mem_root_deque &update_fields, + mem_root_deque &update_values, + Item *conds); /** @brief Offload an upsert to the storage engine. See handler::upsert() for details. */ [[nodiscard]] int ha_upsert(THD *thd, mem_root_deque &update_fields, - mem_root_deque &update_values); + mem_root_deque &update_values); private: /** @@ -7082,11 +7086,11 @@ class handler { handler::ha_update_row(...) does not accept conditions. */ [[nodiscard]] virtual int fast_update(THD *thd [[maybe_unused]], - mem_root_deque &update_fields - [[maybe_unused]], - mem_root_deque &update_values - [[maybe_unused]], - Item *conds [[maybe_unused]]) { + mem_root_deque &update_fields + [[maybe_unused]], + mem_root_deque &update_values + [[maybe_unused]], + Item *conds [[maybe_unused]]) { return ENOTSUP; } @@ -7107,10 +7111,10 @@ class handler { @return an error if the insert should be terminated. */ [[nodiscard]] virtual int upsert(THD *thd [[maybe_unused]], - mem_root_deque &update_fields - [[maybe_unused]], - mem_root_deque &update_values - [[maybe_unused]]) { + mem_root_deque &update_fields + [[maybe_unused]], + mem_root_deque &update_values + [[maybe_unused]]) { return ENOTSUP; } @@ -7619,7 +7623,6 @@ class handler { int get_lock_type() const { return m_lock_type; } - public: /* Read-free replication interface */ diff --git a/sql/key_spec.h b/sql/key_spec.h index ea8abdabe621..45bb045a505c 100644 --- a/sql/key_spec.h +++ b/sql/key_spec.h @@ -44,7 +44,8 @@ enum keytype { KEYTYPE_FULLTEXT = 4, KEYTYPE_SPATIAL = 8, KEYTYPE_FOREIGN = 16, - KEYTYPE_CLUSTERING = 32 + KEYTYPE_CLUSTERING = 32, + KEYTYPE_VECTOR = 64, }; enum fk_option { diff --git a/sql/sql_show.cc b/sql/sql_show.cc index d76ae77859db..5b6fd2a4f621 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -2739,6 +2739,8 @@ bool store_create_info(THD *thd, Table_ref *table_list, String *packet, packet->append(STRING_WITH_LEN("FULLTEXT KEY ")); else if (key_info->flags & HA_SPATIAL) packet->append(STRING_WITH_LEN("SPATIAL KEY ")); + else if (key_info->flags & HA_VECTOR) + packet->append(STRING_WITH_LEN("VECTOR KEY ")); else if (key_info->flags & HA_CLUSTERING) packet->append(STRING_WITH_LEN("CLUSTERING KEY ")); else @@ -2773,7 +2775,7 @@ bool store_create_info(THD *thd, Table_ref *table_list, String *packet, if (key_part->field && (key_part->length != table->field[key_part->fieldnr - 1]->key_length() && - !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL)))) { + !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL | HA_VECTOR)))) { packet->append_parenthesized((long)key_part->length / key_part->field->charset()->mbmaxlen); } @@ -5521,6 +5523,8 @@ static int get_schema_tmp_table_keys_record(THD *thd, Table_ref *tables, // INDEX_TYPE if (key_info->flags & HA_SPATIAL) str = "SPATIAL"; + else if (key_info->flags & HA_VECTOR) + str = "VECTOR"; else { const ha_key_alg key_alg = key_info->algorithm; /* If index algorithm is implicit get SE default. */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 7545e2612530..2062d97643f1 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -5167,8 +5167,8 @@ static bool prepare_key_column(THD *thd, HA_CREATE_INFO *create_info, return true; } - // VECTOR columns cannot be used as keys - if (sql_field->sql_type == MYSQL_TYPE_VECTOR) { + if (sql_field->sql_type == MYSQL_TYPE_VECTOR && + false /* !(key_info->flags & HA_VECTOR) */) { my_error(ER_NON_SCALAR_USED_AS_KEY, MYF(0), column->get_field_name()); return true; } @@ -5219,6 +5219,13 @@ static bool prepare_key_column(THD *thd, HA_CREATE_INFO *create_info, data prefix, ignoring column->length). */ column_length = is_blob(sql_field->sql_type); + } else if (key->type == KEYTYPE_VECTOR) { + // VECTOR indexes are only allowed on VECTOR columns. + if (sql_field->sql_type != MYSQL_TYPE_VECTOR) { + my_error(ER_UNKNOWN_ERROR, MYF(0)); + return true; + } + column_length = 1; // Dummy value. } else { switch (sql_field->sql_type) { case MYSQL_TYPE_GEOMETRY: @@ -5825,7 +5832,7 @@ static bool prepare_self_ref_fk_parent_key( for (const KEY *key = key_info_buffer; key < key_info_buffer + key_count; key++) { // We can't use FULLTEXT or SPATIAL indexes. - if (key->flags & (HA_FULLTEXT | HA_SPATIAL)) continue; + if (key->flags & (HA_FULLTEXT | HA_SPATIAL | HA_VECTOR)) continue; if (hton->foreign_keys_flags & HTON_FKS_NEED_DIFFERENT_PARENT_AND_SUPPORTING_KEYS) { @@ -6028,7 +6035,7 @@ static const KEY *find_fk_supporting_key(handlerton *hton, for (const KEY *key = key_info_buffer; key < key_info_buffer + key_count; key++) { // We can't use FULLTEXT or SPATIAL indexes. - if (key->flags & (HA_FULLTEXT | HA_SPATIAL)) continue; + if (key->flags & (HA_FULLTEXT | HA_SPATIAL | HA_VECTOR)) continue; if (key->algorithm == HA_KEY_ALG_HASH) { if (hton->foreign_keys_flags & HTON_FKS_WITH_SUPPORTING_HASH_KEYS) { @@ -7678,6 +7685,17 @@ static bool prepare_key( switch (static_cast(key->type)) { case KEYTYPE_MULTIPLE: break; + case KEYTYPE_VECTOR: + if (!(file->ha_table_flags() & HA_CAN_VECTOR)) { + my_error(ER_UNKNOWN_ERROR, MYF(0)); + return true; + } + if (key->columns.size() != 1) { + my_error(ER_TOO_MANY_KEY_PARTS, MYF(0), 1); + return true; + } + key_info->flags |= HA_VECTOR; + break; case KEYTYPE_FULLTEXT: if (!(file->ha_table_flags() & HA_CAN_FULLTEXT)) { my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0)); @@ -7780,6 +7798,9 @@ static bool prepare_key( } else if (key_info->flags & HA_FULLTEXT) { assert(!key->key_create_info.is_algorithm_explicit); key_info->algorithm = HA_KEY_ALG_FULLTEXT; + } else if (key_info->flags & HA_VECTOR) { + assert(!key->key_create_info.is_algorithm_explicit); + key_info->algorithm = HA_KEY_ALG_VECTOR; } else { if (key->key_create_info.is_algorithm_explicit) { if (key->key_create_info.algorithm != HA_KEY_ALG_RTREE) { @@ -8665,6 +8686,7 @@ bool mysql_prepare_create_table( uint key_number = 0; bool primary_key = false; + uint vector_key_number = 0; // First prepare non-foreign keys so that they are ready when // we prepare foreign keys. @@ -8681,6 +8703,14 @@ bool mysql_prepare_create_table( primary_key = true; } + if (key->type == KEYTYPE_VECTOR) { + if (vector_key_number) { + my_error(ER_ONLY_SINGLE_VECTOR_INDEX_ALLOWED, MYF(0)); + return true; + } + ++vector_key_number; + } + if (key->type != KEYTYPE_FOREIGN) { if (prepare_key(thd, error_schema_name, error_table_name, create_info, &alter_info->create_list, key, key_info_buffer, key_info, @@ -8713,6 +8743,12 @@ bool mysql_prepare_create_table( return true; } + // We allow VECTOR keys only with tables with PK + if (!primary_key && vector_key_number) { + my_error(ER_VECTOR_INDEX_NEEDS_PK, MYF(0)); + return true; + } + /* At this point all KEY objects are for indexes are fully constructed. So we can check for duplicate indexes for keys for which it was requested. @@ -8740,6 +8776,26 @@ bool mysql_prepare_create_table( /* Sort keys in optimized order */ std::sort(*key_info_buffer, *key_info_buffer + *key_count, sort_keys()); + // We allow VECTOR indexes only on tables with BIGINT UNSIGNED PKs. + if (vector_key_number) { + assert(primary_key); + const KEY &primary_info = *key_info_buffer[0]; + + if (primary_info.actual_key_parts > 1) { + my_error(ER_UNKNOWN_ERROR, MYF(0)); + return true; + } + + for (it.rewind(), field_no = 0; (sql_field = it++); field_no++) { + if (field_no >= primary_info.key_part[0].fieldnr) break; + } + assert(sql_field); + if (sql_field->sql_type != MYSQL_TYPE_LONGLONG || !sql_field->is_unsigned) { + my_error(ER_VECTOR_INDEX_NEEDS_PK, MYF(0)); + return true; + } + } + /* Normal keys are done, now prepare foreign keys. @@ -16327,6 +16383,8 @@ bool prepare_fields_and_keys(THD *thd, const dd::Table *src_table, TABLE *table, key_type = KEYTYPE_UNIQUE; } else if (key_info->flags & HA_FULLTEXT) key_type = KEYTYPE_FULLTEXT; + else if (key_info->flags & HA_VECTOR) + key_type = KEYTYPE_VECTOR; else key_type = KEYTYPE_MULTIPLE; if (key_info->flags & HA_CLUSTERING) @@ -20607,6 +20665,11 @@ static bool check_engine(THD *thd, const char *db_name, const char *table_name, } } + if (auto vea = (*new_engine)->validate_engine_attributes; + vea != nullptr && vea(thd, db_name, create_info, alter_info)) { + return true; + } + return false; } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 36166da79d13..52e2c1abe38c 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -2141,6 +2141,7 @@ CHARSET_INFO *warn_on_deprecated_user_defined_collation( %type opt_index_options index_options opt_fulltext_index_options fulltext_index_options opt_spatial_index_options spatial_index_options + opt_vector_index_options vector_index_options %type opt_index_lock_and_algorithm @@ -2148,6 +2149,7 @@ CHARSET_INFO *warn_on_deprecated_user_defined_collation( spatial_index_option index_type_clause opt_index_type_clause + vector_index_option %type alter_algorithm_option_value alter_algorithm_option @@ -3586,7 +3588,16 @@ create_index_stmt: $11.algo.get_or_default(), $11.lock.get_or_default()); } + | CREATE VECTOR_SYM INDEX_SYM ident ON_SYM table_ident + '(' key_list_with_expression ')' opt_index_lock_and_algorithm opt_index_options + { + $$= NEW_PTN PT_create_index_stmt(@$, YYMEM_ROOT, KEYTYPE_VECTOR, $4, + nullptr, $6, $8, $11, + $10.algo.get_or_default(), + $10.lock.get_or_default()); + } ; + /* Only a limited subset of are allowed in CREATE COMPRESSION_DICTIONARY. @@ -7081,6 +7092,12 @@ table_constraint_def: { $$= NEW_PTN PT_inline_index_definition(@$, KEYTYPE_SPATIAL, $3, nullptr, $5, $7); } + | VECTOR_SYM opt_key_or_index opt_ident '(' key_list_with_expression ')' + opt_vector_index_options + { + $$= NEW_PTN PT_inline_index_definition(@$, KEYTYPE_VECTOR, $3, + nullptr, $5, $7); + } | opt_constraint_name constraint_key_type opt_index_name_and_type '(' key_list_with_expression ')' opt_index_options { @@ -8102,6 +8119,30 @@ spatial_index_option: common_index_option ; +opt_vector_index_options: + %empty { $$.init(YYMEM_ROOT); } + | vector_index_options + ; + +vector_index_options: + vector_index_option + { + $$.init(YYMEM_ROOT); + if ($$.push_back($1)) + MYSQL_YYABORT; // OOM + } + | vector_index_options vector_index_option + { + if ($1.push_back($2)) + MYSQL_YYABORT; // OOM + $$= $1; + } + ; + +vector_index_option: + common_index_option + ; + opt_index_options: %empty { $$.init(YYMEM_ROOT); } | index_options @@ -16540,7 +16581,6 @@ ident_keywords_unambiguous: | XML_SYM | YEAR_SYM | ZONE_SYM - | VECTOR_SYM ; /* diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index a2ada22aedcf..f325c6c7a698 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -159,6 +159,7 @@ SET(INNOBASE_SOURCES handler/handler0alter.cc handler/i_s.cc handler/p_s.cc + handler/vec0vec.cc handler/xtradb_i_s.cc ibuf/ibuf0ibuf.cc lob/lob0bulk.cc diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 2b32c3406c19..eb9f8f2556e1 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -4655,7 +4655,8 @@ bool btr_validate_index( /* Full Text index are implemented by auxiliary tables, not the B-tree */ - if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) { + if (dict_index_is_online_ddl(index) || + (index->type & (DICT_FTS | DICT_VECTOR))) { return (true); } diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index f9c399510334..9fc20ea1815c 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -438,7 +438,7 @@ dberr_t dict_create_index_tree_in_mem(dict_index_t *index, trx_t *trx) { DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail", return (DB_OUT_OF_MEMORY);); - if (index->type == DICT_FTS) { + if (index->type & (DICT_FTS | DICT_VECTOR)) { /* FTS index does not need an index tree */ return (DB_SUCCESS); } diff --git a/storage/innobase/dict/dict0dd.cc b/storage/innobase/dict/dict0dd.cc index cc73c4fe5a42..f843e5a2c273 100644 --- a/storage/innobase/dict/dict0dd.cc +++ b/storage/innobase/dict/dict0dd.cc @@ -1979,7 +1979,7 @@ void dd_visit_keys_with_too_long_parts( std::function visitor) { for (uint key_num = 0; key_num < table->s->keys; key_num++) { const KEY &key = table->key_info[key_num]; - if (!(key.flags & (HA_SPATIAL | HA_FULLTEXT))) { + if (!(key.flags & (HA_SPATIAL | HA_FULLTEXT | HA_VECTOR))) { for (unsigned i = 0; i < key.user_defined_key_parts; i++) { const KEY_PART_INFO *key_part = &key.key_part[i]; if (max_part_len < key_part->length) { @@ -2909,7 +2909,7 @@ MY_COMPILER_DIAGNOSTIC_POP() */ static inline uint16_t get_index_prefix_len(const KEY &key, const KEY_PART_INFO *key_part) { - if (key.flags & (HA_SPATIAL | HA_FULLTEXT)) { + if (key.flags & (HA_SPATIAL | HA_FULLTEXT | HA_VECTOR)) { return 0; } @@ -2969,6 +2969,10 @@ template const dict_index_t *dd_find_index( ut_ad(!table->is_intrinsic()); type = DICT_FTS; n_uniq = 0; + } else if (key.flags & HA_VECTOR) { + ut_ad(!table->is_intrinsic()); + type = DICT_VECTOR; + n_uniq = 0; } else if (key_num == form->primary_key) { ut_ad(key.flags & HA_NOSAME); ut_ad(n_uniq > 0); @@ -2977,7 +2981,7 @@ template const dict_index_t *dd_find_index( type = (key.flags & HA_NOSAME) ? DICT_UNIQUE : 0; } - ut_ad(!!(type & DICT_FTS) == (n_uniq == 0)); + ut_ad(!!(type & (DICT_FTS | DICT_VECTOR)) == (n_uniq == 0)); dict_index_t *index = dict_mem_index_create(table->name.m_name, key.name, 0, type, n_fields); @@ -5206,7 +5210,7 @@ dict_table_t *dd_open_table_one(dd::cache::Dictionary_client *client, } ut_ad(root > 1); - ut_ad(index->type & DICT_FTS || root != FIL_NULL || + ut_ad(index->type & (DICT_FTS | DICT_VECTOR) || root != FIL_NULL || dict_table_is_discarded(m_table)); ut_ad(id != 0); index->page = root; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index b71913703d5e..56ca82c39324 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -218,6 +218,9 @@ static dict_index_t *dict_index_build_internal_fts( dict_table_t *table, /*!< in: table */ dict_index_t *index); /*!< in: user representation of an FTS index */ +static dict_index_t *dict_index_build_internal_vec(dict_table_t *table, + dict_index_t *index); + /** Removes an index from the dictionary cache. */ static void dict_index_remove_from_cache_low( dict_table_t *table, /*!< in/out: table */ @@ -2216,7 +2219,7 @@ static bool dict_index_too_big_for_tree(const dict_table_t *table, const dict_index_t *new_index) { /* FTS index consists of auxiliary tables, they shall be excluded from index row size check */ - if (new_index->type & DICT_FTS) { + if (new_index->type & (DICT_FTS | DICT_VECTOR)) { return (false); } @@ -2445,6 +2448,8 @@ dberr_t dict_index_add_to_cache_w_vcol(dict_table_t *table, dict_index_t *index, if (index->type == DICT_FTS) { new_index = dict_index_build_internal_fts(table, index); + } else if (index->type == DICT_VECTOR) { + new_index = dict_index_build_internal_vec(table, index); } else if (index->is_clustered()) { new_index = dict_index_build_internal_clust(table, index); } else { @@ -3283,6 +3288,35 @@ static dict_index_t *dict_index_build_internal_fts( return (new_index); } + +static dict_index_t *dict_index_build_internal_vec( + dict_table_t *table, /*!< in: table */ + dict_index_t *index) /*!< in: user representation of an FTS index */ +{ + ut_ad(table && index); + ut_ad(index->type == DICT_VECTOR); + ut_ad(!dict_sys_mutex_own()); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + /* Create a new index */ + auto new_index = dict_mem_index_create(table->name.m_name, index->name, + index->space, index->type, index->n_fields); + + /* Copy other relevant data from the old index struct to the new + struct: it inherits the values */ + + new_index->n_user_defined_cols = index->n_fields; + + new_index->id = index->id; + + /* Copy fields from index to new_index */ + dict_index_copy(new_index, index, table, 0, index->n_fields); + + new_index->n_uniq = 0; + new_index->cached = true; + + return (new_index); +} /*====================== FOREIGN KEY PROCESSING ========================*/ /** Checks if a table is referenced by foreign keys. @@ -3369,7 +3403,7 @@ NOT NULL */ index = table->first_index(); while (index != nullptr) { - if (types_idx != index && !(index->type & DICT_FTS) && + if (types_idx != index && !(index->type & (DICT_FTS | DICT_VECTOR)) && !dict_index_is_spatial(index) && !index->to_be_dropped && (!(index->uncommitted && ((index->online_status == ONLINE_INDEX_ABORTED_DROPPED) || @@ -3627,6 +3661,7 @@ bool dict_index_check_search_tuple( ut_ad(index->page >= FSP_FIRST_INODE_PAGE_NO); ut_ad(dtuple_check_typed(tuple)); ut_ad(!(index->type & DICT_FTS)); + ut_ad(!(index->type & DICT_VECTOR)); return true; } #endif /* UNIV_DEBUG */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 69c632abe36a..19402f85b40f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1,3 +1,5 @@ +// clang-format off + /***************************************************************************** Copyright (c) 2000, 2025, Oracle and/or its affiliates. @@ -44,6 +46,7 @@ this program; if not, write to the Free Software Foundation, Inc., /** @file ha_innodb.cc */ #include +#include #ifndef UNIV_HOTBACKUP #include "my_config.h" #endif /* !UNIV_HOTBACKUP */ @@ -210,6 +213,8 @@ this program; if not, write to the Free Software Foundation, Inc., #include "sql-common/json_binary.h" #include "sql-common/json_dom.h" +#include "vec0vec.h" + #include "os0enc.h" #include "os0file.h" @@ -3231,7 +3236,7 @@ ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg) HA_ATTACHABLE_TRX_COMPATIBLE | HA_CAN_INDEX_VIRTUAL_GENERATED_COLUMN | HA_DESCENDING_INDEX | HA_MULTI_VALUED_KEY_SUPPORT | HA_BLOB_PARTIAL_UPDATE | HA_SUPPORTS_GEOGRAPHIC_GEOMETRY_COLUMN | - HA_SUPPORTS_DEFAULT_EXPRESSION | HA_ONLINE_ANALYZE), + HA_SUPPORTS_DEFAULT_EXPRESSION | HA_ONLINE_ANALYZE | HA_CAN_VECTOR), m_start_of_scan(), m_stored_select_lock_type(LOCK_NONE_UNSET), m_mysql_has_locked() {} @@ -4847,6 +4852,29 @@ static bool innobase_redo_set_state(THD *thd, bool enable) { return (false); } +// clang-format on + +static bool innobase_validate_engine_attributes(THD *thd, const char *db_name, + HA_CREATE_INFO *create_info, + const Alter_info *alter_info) { + for (const auto *key : alter_info->key_list) { + auto opts = + to_string_view(key->key_create_info.m_secondary_engine_attribute); + switch (key->type) { + case KEYTYPE_VECTOR: + return storage::innobase::vec0vec::validate_options(opts); + break; + default: + break; + } + } + + return false; +} + +// clang-format off + + /** Return partitioning flags. */ static uint innobase_partition_flags() { return (HA_CAN_EXCHANGE_PARTITION | HA_CANNOT_PARTITION_FK | @@ -5726,8 +5754,6 @@ static PSI_metric_info_v1 data_metrics[] = { export_vars.innodb_data_written) }; -// clang-format on - static PSI_meter_info_v1 inno_meter[] = { {"mysql.inno", "MySql InnoDB metrics", 10, 0, 0, inno_metrics, std::size(inno_metrics)}, @@ -5793,13 +5819,13 @@ static int innodb_init(void *p) { innobase_hton->lock_hton_log = innobase_lock_hton_log; innobase_hton->unlock_hton_log = innobase_unlock_hton_log; innobase_hton->collect_hton_log_info = innobase_collect_hton_log_info; - innobase_hton->flags = HTON_SUPPORTS_EXTENDED_KEYS | - HTON_SUPPORTS_FOREIGN_KEYS | HTON_SUPPORTS_ATOMIC_DDL | - HTON_CAN_RECREATE | HTON_SUPPORTS_SECONDARY_ENGINE | - HTON_SUPPORTS_TABLE_ENCRYPTION | - HTON_SUPPORTS_GENERATED_INVISIBLE_PK | - HTON_SUPPORTS_BULK_LOAD | HTON_SUPPORTS_SQL_FK | - HTON_SUPPORTS_ONLINE_BACKUPS | HTON_SUPPORTS_COMPRESSED_COLUMNS; + innobase_hton->flags = + HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS | + HTON_SUPPORTS_ATOMIC_DDL | HTON_CAN_RECREATE | + HTON_SUPPORTS_SECONDARY_ENGINE | HTON_SUPPORTS_TABLE_ENCRYPTION | + HTON_SUPPORTS_GENERATED_INVISIBLE_PK | HTON_SUPPORTS_BULK_LOAD | + HTON_SUPPORTS_SQL_FK | HTON_SUPPORTS_ONLINE_BACKUPS | + HTON_SUPPORTS_COMPRESSED_COLUMNS; // TODO(WL9440): to be enabled when distance scan is implemented in innodb. //| HTON_SUPPORTS_DISTANCE_SCAN; @@ -5851,6 +5877,7 @@ static int innodb_init(void *p) { innobase_fix_default_table_encryption; innobase_hton->redo_log_set_state = innobase_redo_set_state; + innobase_hton->validate_engine_attributes = innobase_validate_engine_attributes; innobase_hton->post_ddl = innobase_post_ddl; @@ -8346,7 +8373,7 @@ int ha_innobase::open(const char *name, int, uint open_flags, dict_table_autoinc_unlock(ib_table); } - /* Set plugin parser for fulltext index */ + /* Set plugin parser for fulltext index / handle vector index. */ for (uint i = 0; i < table->s->keys; i++) { if (table->key_info[i].flags & HA_USES_PARSER) { dict_index_t *index = innobase_get_index(i); @@ -11057,7 +11084,7 @@ int ha_innobase::index_read( : HA_ERR_TABLE_DEF_CHANGED; } - if (index->type & DICT_FTS) { + if (index->type & (DICT_FTS | DICT_VECTOR)) { return HA_ERR_KEY_NOT_FOUND; } @@ -12841,6 +12868,8 @@ inline int create_index( ind_type = DICT_SPATIAL; } else if (key->flags & HA_FULLTEXT) { ind_type = DICT_FTS; + } else if (key->flags & HA_VECTOR) { + ind_type = DICT_VECTOR; } if (ind_type == DICT_SPATIAL) { @@ -13002,6 +13031,7 @@ inline int create_index( } ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS)); + ut_ad(key->flags & HA_VECTOR || !(index->type & DICT_VECTOR)); multi_val_idx = ((index->type & DICT_MULTI_VALUE) == DICT_MULTI_VALUE); @@ -14097,7 +14127,7 @@ bool create_table_info_t::innobase_table_flags() { if (fts_doc_id_index_bad) { goto index_bad; } - } else if (key->flags & HA_SPATIAL) { + } else if (key->flags & (HA_SPATIAL | HA_VECTOR)) { assert(~m_create_info->options & (HA_LEX_CREATE_TMP_TABLE | HA_LEX_CREATE_INTERNAL_TMP_TABLE)); } @@ -15763,6 +15793,7 @@ int ha_innobase::get_extra_columns_and_keys(const HA_CREATE_INFO *, continue; case dd::Index::IT_FULLTEXT: case dd::Index::IT_SPATIAL: + case dd::Index::IT_VECTOR: ut_d(ut_error); } break; @@ -15773,6 +15804,12 @@ int ha_innobase::get_extra_columns_and_keys(const HA_CREATE_INFO *, } ut_d(ut_error); ut_o(break); + case dd::Index::IA_VECTOR: + if (i->type() == dd::Index::IT_VECTOR) { + continue; + } + ut_d(ut_error); + ut_o(break); } my_error(ER_UNSUPPORTED_INDEX_ALGORITHM, MYF(0), i->name().c_str()); @@ -15800,6 +15837,7 @@ int ha_innobase::get_extra_columns_and_keys(const HA_CREATE_INFO *, case dd::Index::IT_MULTIPLE: case dd::Index::IT_FULLTEXT: case dd::Index::IT_SPATIAL: + case dd::Index::IT_VECTOR: my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0), fts_doc_id_index->name().c_str()); push_warning(thd, Sql_condition::SL_WARNING, ER_WRONG_NAME_FOR_INDEX, @@ -18372,7 +18410,8 @@ void ha_innobase::info_low_key(uint flag, const dict_table_t *ib_table) { /* We do not maintain stats for fulltext or spatial indexes. Thus, we can't calculate pct_cached below because we need dict_index_t::stat_n_leaf_pages for that. See dict_stats_should_ignore_index(). */ - if ((key->flags & HA_FULLTEXT) || (key->flags & HA_SPATIAL)) { + if ((key->flags & HA_FULLTEXT) || (key->flags & HA_SPATIAL) || + (key->flags & HA_VECTOR)) { pct_cached = IN_MEMORY_ESTIMATE_UNKNOWN; } else { pct_cached = index_pct_cached(index); @@ -18390,7 +18429,8 @@ void ha_innobase::info_low_key(uint flag, const dict_table_t *ib_table) { } for (ulong j = 0; j < key->actual_key_parts; j++) { - if ((key->flags & HA_FULLTEXT) || (key->flags & HA_SPATIAL)) { + if ((key->flags & HA_FULLTEXT) || (key->flags & HA_SPATIAL) || + (key->flags & HA_VECTOR)) { /* The record per key does not apply to FTS or Spatial indexes. */ key->set_records_per_key(j, 1.0f); continue; @@ -18716,7 +18756,7 @@ static bool innobase_get_index_column_cardinality( } DEBUG_SYNC(thd, "innodb.after_init_check"); - if (index->type & (DICT_FTS | DICT_SPATIAL)) { + if (index->type & (DICT_FTS | DICT_SPATIAL | DICT_VECTOR)) { /* For these indexes innodb_rec_per_key is fixed as 1.0 */ *cardinality = ib_table->stat_n_rows; diff --git a/storage/innobase/handler/vec0vec.cc b/storage/innobase/handler/vec0vec.cc new file mode 100644 index 000000000000..bcf676a00d2b --- /dev/null +++ b/storage/innobase/handler/vec0vec.cc @@ -0,0 +1,116 @@ +/***************************************************************************** + +Copyright (c) 2025, Percona Inc. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License, version 2.0, as published by the +Free Software Foundation. + +This program is designed to work with certain software (including +but not limited to OpenSSL) that is licensed under separate terms, +as designated in a particular file or component or in included license +documentation. The authors of MySQL hereby grant you an additional +permission to link the program and your derivative works with the +separately licensed software that they have either included with +the program or referenced in the documentation. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, +for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#include "vec0vec.h" + +#include +#include +#include + +// ut0ut.h isn't self-contained. +#include "ut0mem.h" +#include "ut0test.h" +#include "ut0ut.h" + +#include +#include +#include +#include + +#include "my_sys.h" +#include "mysqld_error.h" + +using namespace std; +using rapidjson::Document; +using rapidjson::SchemaDocument; +using rapidjson::SchemaValidator; + +namespace storage::innobase::vec0vec { + +bool validate_options(string_view opts) { + Document schema_doc; + schema_doc.Parse(schema_json); + assert(!schema_doc.HasParseError()); + + SchemaDocument schema(schema_doc); + SchemaValidator validator(schema); + + Document doc; + doc.Parse(opts.data(), opts.length()); + + if (!doc.Accept(validator)) { + rapidjson::StringBuffer sb; + validator.GetInvalidDocumentPointer().StringifyUriFragment(sb); + my_error(ER_INVALID_JSON_ATTRIBUTE, MYF(0), + ("Validation failed for vector index options: " + + string(validator.GetInvalidSchemaKeyword())) + .c_str(), + sb.GetSize(), string(sb.GetString(), sb.GetSize()).c_str()); + return true; + } + + return false; +} + +VectorIndexParam parse_options(string_view opts) { + if (opts.empty()) { + return std::monostate{}; + } + + Document schema_doc; + schema_doc.Parse(schema_json); + assert(!schema_doc.HasParseError()); + + SchemaDocument schema(schema_doc); + SchemaValidator validator(schema); + + Document doc; + auto &gendoc = doc.Parse(opts.data(), opts.length()); + + if (validate_options(opts)) { + ib::warn(ER_IB_MSG_466) << "Vector table index options validation failed, " + "using default values. Options: " + << opts; + } + + auto get_or_default = [&gendoc](const char *key, auto &member) { + if (gendoc.HasMember(key)) { + member = gendoc[key].GetInt(); + } + }; + + // since there's only HNSW for now. + HnswParam param; + + get_or_default("M", param.M); + get_or_default("max_elements", param.max_elements); + get_or_default("ef_construction", param.ef_construction); + + return param; +} + +} // namespace storage::innobase::vec0vec diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 599f550996b8..7400492d2ae7 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -111,9 +111,10 @@ constexpr uint32_t DICT_VIRTUAL = 128; constexpr uint32_t DICT_SDI = 256; /** Multi-value index */ constexpr uint32_t DICT_MULTI_VALUE = 512; +constexpr uint32_t DICT_VECTOR = 1024; /** number of bits used for SYS_INDEXES.TYPE */ -constexpr uint32_t DICT_IT_BITS = 10; +constexpr uint32_t DICT_IT_BITS = 11; /** @} */ #if 0 /* not implemented, retained for history */ diff --git a/storage/innobase/include/vec0vec.h b/storage/innobase/include/vec0vec.h new file mode 100644 index 000000000000..b78e48c872aa --- /dev/null +++ b/storage/innobase/include/vec0vec.h @@ -0,0 +1,61 @@ +/***************************************************************************** + +Copyright (c) 2025, Percona Inc. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License, version 2.0, as published by the +Free Software Foundation. + +This program is designed to work with certain software (including +but not limited to OpenSSL) that is licensed under separate terms, +as designated in a particular file or component or in included license +documentation. The authors of MySQL hereby grant you an additional +permission to link the program and your derivative works with the +separately licensed software that they have either included with +the program or referenced in the documentation. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, +for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#pragma once + +#include +#include + +namespace storage::innobase::vec0vec { + +static const constexpr auto schema_json = R"( +{ + "type": "object", + "properties": { + "type": { "type": "string", "enum": ["hnsw"] }, + "M": { "type": "integer", "minimum": 1 }, + "max_elements": { "type": "integer", "minimum": 1 }, + "ef_construction": { "type": "integer", "minimum": 1 } + }, + "required": ["type"], + "additionalProperties": false +} +)"; + +bool validate_options(std::string_view opts); + +struct HnswParam { + int M{25}; + int max_elements{10000}; + int ef_construction{200}; +}; + +using VectorIndexParam = std::variant; + +VectorIndexParam parse_options(std::string_view opts); + +} // namespace storage::innobase::vec0vec diff --git a/storage/temptable/src/handler.cc b/storage/temptable/src/handler.cc index e5b577920fbf..aa1c9bd5c8a6 100644 --- a/storage/temptable/src/handler.cc +++ b/storage/temptable/src/handler.cc @@ -877,6 +877,7 @@ ulong Handler::index_flags(uint index_no, uint, bool) const { case HA_KEY_ALG_SE_SPECIFIC: case HA_KEY_ALG_RTREE: case HA_KEY_ALG_FULLTEXT: + case HA_KEY_ALG_VECTOR: flags = 0; break; } diff --git a/storage/temptable/src/table.cc b/storage/temptable/src/table.cc index 16847e1773d2..1463e1145f05 100644 --- a/storage/temptable/src/table.cc +++ b/storage/temptable/src/table.cc @@ -290,6 +290,7 @@ void Table::indexes_create() { case HA_KEY_ALG_SE_SPECIFIC: case HA_KEY_ALG_RTREE: case HA_KEY_ALG_FULLTEXT: + case HA_KEY_ALG_VECTOR: DBUG_ABORT(); } }