diff --git a/crates/ruvector-postgres/Dockerfile b/crates/ruvector-postgres/Dockerfile index 3dd7a1dfb..413ed6549 100644 --- a/crates/ruvector-postgres/Dockerfile +++ b/crates/ruvector-postgres/Dockerfile @@ -69,17 +69,20 @@ RUN mkdir -p /opt/ruvector/models && \ echo "Model cache size: $(du -sh /opt/ruvector/models)" && \ ls -la /opt/ruvector/models/ -# Copy the pre-built SQL schema file (with sparse functions removed) -# cargo pgrx schema doesn't work reliably in Docker, so we use the hand-crafted file +# Copy the pre-built SQL schema files (with sparse functions removed) +# cargo pgrx schema doesn't work reliably in Docker, so we use the hand-crafted files +# Both versions are needed: 0.1.0 for legacy and 2.0.0 to match ruvector.control default_version RUN cp /build/sql/ruvector--0.1.0.sql /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql && \ - echo "SQL schema copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql) functions" + cp /build/sql/ruvector--2.0.0.sql /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql && \ + echo "SQL schema (0.1.0) copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql) functions" && \ + echo "SQL schema (2.0.0) copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql) functions" # Verify the extension files are complete RUN ls -la /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ && \ - echo "=== First 20 lines of SQL ===" && \ - head -20 /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql && \ - echo "=== CREATE FUNCTION count ===" && \ - grep -c "CREATE FUNCTION\|CREATE OR REPLACE FUNCTION" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql + echo "=== ruvector--2.0.0.sql CREATE FUNCTION count ===" && \ + grep -c "CREATE FUNCTION\|CREATE OR REPLACE FUNCTION" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql && \ + echo "=== Verify embedding functions present in 2.0.0 ===" && \ + grep -c "ruvector_embed" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql # Runtime stage FROM postgres:17-bookworm diff --git a/crates/ruvector-postgres/docker/init.sql b/crates/ruvector-postgres/docker/init.sql index e549dbf34..e16c05583 100644 --- a/crates/ruvector-postgres/docker/init.sql +++ b/crates/ruvector-postgres/docker/init.sql @@ -50,5 +50,10 @@ BEGIN RAISE NOTICE 'Inner product: %', inner_product_arr(ARRAY[1.0, 2.0, 3.0]::real[], ARRAY[1.0, 2.0, 3.0]::real[]); RAISE NOTICE 'Cosine distance: %', cosine_distance_arr(ARRAY[1.0, 0.0, 0.0]::real[], ARRAY[0.0, 1.0, 0.0]::real[]); + -- Test embedding functions + RAISE NOTICE 'Testing embedding functions...'; + RAISE NOTICE 'Default model: %', ruvector_default_model(); + RAISE NOTICE 'MiniLM dims: %', ruvector_embedding_dims('all-MiniLM-L6-v2'); + RAISE NOTICE 'All basic tests passed!'; END $$; diff --git a/crates/ruvector-postgres/sql/embeddings.sql b/crates/ruvector-postgres/sql/embeddings.sql index 739514ede..fccd660d2 100644 --- a/crates/ruvector-postgres/sql/embeddings.sql +++ b/crates/ruvector-postgres/sql/embeddings.sql @@ -1,18 +1,21 @@ -- ============================================================================ -- Embedding Generation Functions -- ============================================================================ +-- These functions require the 'embeddings' feature flag at compile time. +-- The Docker image builds with --features embeddings, so they are available. +-- pgrx generates C symbols with _wrapper suffix. -- Generate embedding from text using default or specified model CREATE OR REPLACE FUNCTION ruvector_embed(text text, model_name text DEFAULT 'all-MiniLM-L6-v2') RETURNS real[] AS 'MODULE_PATHNAME', 'ruvector_embed_wrapper' -LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; -- Generate embeddings for multiple texts in batch CREATE OR REPLACE FUNCTION ruvector_embed_batch(texts text[], model_name text DEFAULT 'all-MiniLM-L6-v2') RETURNS real[][] AS 'MODULE_PATHNAME', 'ruvector_embed_batch_wrapper' -LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; -- List all available embedding models CREATE OR REPLACE FUNCTION ruvector_embedding_models() @@ -23,46 +26,53 @@ RETURNS TABLE ( is_loaded boolean ) AS 'MODULE_PATHNAME', 'ruvector_embedding_models_wrapper' -LANGUAGE C IMMUTABLE STRICT; +LANGUAGE C VOLATILE PARALLEL SAFE; -- Load embedding model into memory CREATE OR REPLACE FUNCTION ruvector_load_model(model_name text) RETURNS boolean AS 'MODULE_PATHNAME', 'ruvector_load_model_wrapper' -LANGUAGE C STRICT; +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; -- Unload embedding model from memory CREATE OR REPLACE FUNCTION ruvector_unload_model(model_name text) RETURNS boolean AS 'MODULE_PATHNAME', 'ruvector_unload_model_wrapper' -LANGUAGE C STRICT; +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; -- Get information about a specific model CREATE OR REPLACE FUNCTION ruvector_model_info(model_name text) RETURNS jsonb AS 'MODULE_PATHNAME', 'ruvector_model_info_wrapper' -LANGUAGE C IMMUTABLE STRICT; +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; -- Set default embedding model CREATE OR REPLACE FUNCTION ruvector_set_default_model(model_name text) RETURNS boolean AS 'MODULE_PATHNAME', 'ruvector_set_default_model_wrapper' -LANGUAGE C STRICT; +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; -- Get current default embedding model CREATE OR REPLACE FUNCTION ruvector_default_model() RETURNS text AS 'MODULE_PATHNAME', 'ruvector_default_model_wrapper' -LANGUAGE C IMMUTABLE STRICT; +LANGUAGE C VOLATILE PARALLEL SAFE; -- Get embedding generation statistics CREATE OR REPLACE FUNCTION ruvector_embedding_stats() RETURNS jsonb AS 'MODULE_PATHNAME', 'ruvector_embedding_stats_wrapper' -LANGUAGE C IMMUTABLE STRICT; +LANGUAGE C VOLATILE PARALLEL SAFE; -- Get dimensions for a specific model CREATE OR REPLACE FUNCTION ruvector_embedding_dims(model_name text) RETURNS integer AS 'MODULE_PATHNAME', 'ruvector_embedding_dims_wrapper' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- Convenience: text → ruvector type in one call +CREATE OR REPLACE FUNCTION ruvector_embed_vec(text_input text, model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS ruvector +AS $$ + SELECT replace(replace(ruvector_embed(text_input, model_name)::text, '{', '['), '}', ']')::ruvector; +$$ LANGUAGE SQL VOLATILE STRICT PARALLEL SAFE; diff --git a/crates/ruvector-postgres/sql/ruvector--0.1.0.sql b/crates/ruvector-postgres/sql/ruvector--0.1.0.sql index dca520c32..b41ffecc1 100644 --- a/crates/ruvector-postgres/sql/ruvector--0.1.0.sql +++ b/crates/ruvector-postgres/sql/ruvector--0.1.0.sql @@ -780,9 +780,80 @@ COMMENT ON FUNCTION graph_bipartite_score(real[], real[], real) IS 'Compute bipa -- ============================================================================ -- Embedding Generation Functions -- ============================================================================ --- Note: Embedding functions require the 'embeddings' feature flag to be enabled --- during compilation. These functions are not available in the default build. --- To enable, build with: cargo pgrx package --features embeddings +-- These functions require the 'embeddings' feature flag at compile time. +-- The Docker image builds with --features embeddings, so they are available. + +-- Generate embedding from text using default or specified model +CREATE OR REPLACE FUNCTION ruvector_embed(text text, model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS real[] +AS 'MODULE_PATHNAME', 'ruvector_embed_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Generate embeddings for multiple texts in batch +CREATE OR REPLACE FUNCTION ruvector_embed_batch(texts text[], model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS real[][] +AS 'MODULE_PATHNAME', 'ruvector_embed_batch_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- List all available embedding models +CREATE OR REPLACE FUNCTION ruvector_embedding_models() +RETURNS TABLE ( + model_name text, + dimensions integer, + description text, + is_loaded boolean +) +AS 'MODULE_PATHNAME', 'ruvector_embedding_models_wrapper' +LANGUAGE C VOLATILE PARALLEL SAFE; + +-- Load embedding model into memory +CREATE OR REPLACE FUNCTION ruvector_load_model(model_name text) +RETURNS boolean +AS 'MODULE_PATHNAME', 'ruvector_load_model_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Unload embedding model from memory +CREATE OR REPLACE FUNCTION ruvector_unload_model(model_name text) +RETURNS boolean +AS 'MODULE_PATHNAME', 'ruvector_unload_model_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Get information about a specific model +CREATE OR REPLACE FUNCTION ruvector_model_info(model_name text) +RETURNS jsonb +AS 'MODULE_PATHNAME', 'ruvector_model_info_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Set default embedding model +CREATE OR REPLACE FUNCTION ruvector_set_default_model(model_name text) +RETURNS boolean +AS 'MODULE_PATHNAME', 'ruvector_set_default_model_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Get current default embedding model +CREATE OR REPLACE FUNCTION ruvector_default_model() +RETURNS text +AS 'MODULE_PATHNAME', 'ruvector_default_model_wrapper' +LANGUAGE C VOLATILE PARALLEL SAFE; + +-- Get embedding generation statistics +CREATE OR REPLACE FUNCTION ruvector_embedding_stats() +RETURNS jsonb +AS 'MODULE_PATHNAME', 'ruvector_embedding_stats_wrapper' +LANGUAGE C VOLATILE PARALLEL SAFE; + +-- Get dimensions for a specific model +CREATE OR REPLACE FUNCTION ruvector_embedding_dims(model_name text) +RETURNS integer +AS 'MODULE_PATHNAME', 'ruvector_embedding_dims_wrapper' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- Convenience: text → ruvector type in one call +CREATE OR REPLACE FUNCTION ruvector_embed_vec(text_input text, model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS ruvector +AS $$ + SELECT replace(replace(ruvector_embed(text_input, model_name)::text, '{', '['), '}', ']')::ruvector; +$$ LANGUAGE SQL VOLATILE STRICT PARALLEL SAFE; -- ============================================================================ -- HNSW Access Method diff --git a/crates/ruvector-postgres/sql/ruvector--2.0.0.sql b/crates/ruvector-postgres/sql/ruvector--2.0.0.sql index c62b692df..d83a967b0 100644 --- a/crates/ruvector-postgres/sql/ruvector--2.0.0.sql +++ b/crates/ruvector-postgres/sql/ruvector--2.0.0.sql @@ -781,9 +781,80 @@ COMMENT ON FUNCTION graph_bipartite_score(real[], real[], real) IS 'Compute bipa -- ============================================================================ -- Embedding Generation Functions -- ============================================================================ --- Note: Embedding functions require the 'embeddings' feature flag to be enabled --- during compilation. These functions are not available in the default build. --- To enable, build with: cargo pgrx package --features embeddings +-- These functions require the 'embeddings' feature flag at compile time. +-- The Docker image builds with --features embeddings, so they are available. + +-- Generate embedding from text using default or specified model +CREATE OR REPLACE FUNCTION ruvector_embed(text text, model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS real[] +AS 'MODULE_PATHNAME', 'ruvector_embed_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Generate embeddings for multiple texts in batch +CREATE OR REPLACE FUNCTION ruvector_embed_batch(texts text[], model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS real[][] +AS 'MODULE_PATHNAME', 'ruvector_embed_batch_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- List all available embedding models +CREATE OR REPLACE FUNCTION ruvector_embedding_models() +RETURNS TABLE ( + model_name text, + dimensions integer, + description text, + is_loaded boolean +) +AS 'MODULE_PATHNAME', 'ruvector_embedding_models_wrapper' +LANGUAGE C VOLATILE PARALLEL SAFE; + +-- Load embedding model into memory +CREATE OR REPLACE FUNCTION ruvector_load_model(model_name text) +RETURNS boolean +AS 'MODULE_PATHNAME', 'ruvector_load_model_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Unload embedding model from memory +CREATE OR REPLACE FUNCTION ruvector_unload_model(model_name text) +RETURNS boolean +AS 'MODULE_PATHNAME', 'ruvector_unload_model_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Get information about a specific model +CREATE OR REPLACE FUNCTION ruvector_model_info(model_name text) +RETURNS jsonb +AS 'MODULE_PATHNAME', 'ruvector_model_info_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Set default embedding model +CREATE OR REPLACE FUNCTION ruvector_set_default_model(model_name text) +RETURNS boolean +AS 'MODULE_PATHNAME', 'ruvector_set_default_model_wrapper' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + +-- Get current default embedding model +CREATE OR REPLACE FUNCTION ruvector_default_model() +RETURNS text +AS 'MODULE_PATHNAME', 'ruvector_default_model_wrapper' +LANGUAGE C VOLATILE PARALLEL SAFE; + +-- Get embedding generation statistics +CREATE OR REPLACE FUNCTION ruvector_embedding_stats() +RETURNS jsonb +AS 'MODULE_PATHNAME', 'ruvector_embedding_stats_wrapper' +LANGUAGE C VOLATILE PARALLEL SAFE; + +-- Get dimensions for a specific model +CREATE OR REPLACE FUNCTION ruvector_embedding_dims(model_name text) +RETURNS integer +AS 'MODULE_PATHNAME', 'ruvector_embedding_dims_wrapper' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- Convenience: text → ruvector type in one call +CREATE OR REPLACE FUNCTION ruvector_embed_vec(text_input text, model_name text DEFAULT 'all-MiniLM-L6-v2') +RETURNS ruvector +AS $$ + SELECT replace(replace(ruvector_embed(text_input, model_name)::text, '{', '['), '}', ']')::ruvector; +$$ LANGUAGE SQL VOLATILE STRICT PARALLEL SAFE; -- ============================================================================ -- HNSW Access Method