Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions crates/ruvector-postgres/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,20 @@ RUN mkdir -p /opt/ruvector/models && \
echo "Model cache size: $(du -sh /opt/ruvector/models)" && \
ls -la /opt/ruvector/models/

# Copy the pre-built SQL schema file (with sparse functions removed)
# cargo pgrx schema doesn't work reliably in Docker, so we use the hand-crafted file
# Copy the pre-built SQL schema files (with sparse functions removed)
# cargo pgrx schema doesn't work reliably in Docker, so we use the hand-crafted files
# Both versions are needed: 0.1.0 for legacy and 2.0.0 to match ruvector.control default_version
RUN cp /build/sql/ruvector--0.1.0.sql /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql && \
echo "SQL schema copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql) functions"
cp /build/sql/ruvector--2.0.0.sql /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql && \
echo "SQL schema (0.1.0) copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql) functions" && \
echo "SQL schema (2.0.0) copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql) functions"

# Verify the extension files are complete
RUN ls -la /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ && \
echo "=== First 20 lines of SQL ===" && \
head -20 /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql && \
echo "=== CREATE FUNCTION count ===" && \
grep -c "CREATE FUNCTION\|CREATE OR REPLACE FUNCTION" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql
echo "=== ruvector--2.0.0.sql CREATE FUNCTION count ===" && \
grep -c "CREATE FUNCTION\|CREATE OR REPLACE FUNCTION" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql && \
echo "=== Verify embedding functions present in 2.0.0 ===" && \
grep -c "ruvector_embed" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--2.0.0.sql

# Runtime stage
FROM postgres:17-bookworm
Expand Down
5 changes: 5 additions & 0 deletions crates/ruvector-postgres/docker/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,10 @@ BEGIN
RAISE NOTICE 'Inner product: %', inner_product_arr(ARRAY[1.0, 2.0, 3.0]::real[], ARRAY[1.0, 2.0, 3.0]::real[]);
RAISE NOTICE 'Cosine distance: %', cosine_distance_arr(ARRAY[1.0, 0.0, 0.0]::real[], ARRAY[0.0, 1.0, 0.0]::real[]);

-- Test embedding functions
RAISE NOTICE 'Testing embedding functions...';
RAISE NOTICE 'Default model: %', ruvector_default_model();
RAISE NOTICE 'MiniLM dims: %', ruvector_embedding_dims('all-MiniLM-L6-v2');

RAISE NOTICE 'All basic tests passed!';
END $$;
28 changes: 19 additions & 9 deletions crates/ruvector-postgres/sql/embeddings.sql
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
-- ============================================================================
-- Embedding Generation Functions
-- ============================================================================
-- These functions require the 'embeddings' feature flag at compile time.
-- The Docker image builds with --features embeddings, so they are available.
-- pgrx generates C symbols with _wrapper suffix.

-- Generate embedding from text using default or specified model
CREATE OR REPLACE FUNCTION ruvector_embed(text text, model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS real[]
AS 'MODULE_PATHNAME', 'ruvector_embed_wrapper'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Generate embeddings for multiple texts in batch
CREATE OR REPLACE FUNCTION ruvector_embed_batch(texts text[], model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS real[][]
AS 'MODULE_PATHNAME', 'ruvector_embed_batch_wrapper'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- List all available embedding models
CREATE OR REPLACE FUNCTION ruvector_embedding_models()
Expand All @@ -23,46 +26,53 @@ RETURNS TABLE (
is_loaded boolean
)
AS 'MODULE_PATHNAME', 'ruvector_embedding_models_wrapper'
LANGUAGE C IMMUTABLE STRICT;
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Load embedding model into memory
CREATE OR REPLACE FUNCTION ruvector_load_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_load_model_wrapper'
LANGUAGE C STRICT;
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Unload embedding model from memory
CREATE OR REPLACE FUNCTION ruvector_unload_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_unload_model_wrapper'
LANGUAGE C STRICT;
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Get information about a specific model
CREATE OR REPLACE FUNCTION ruvector_model_info(model_name text)
RETURNS jsonb
AS 'MODULE_PATHNAME', 'ruvector_model_info_wrapper'
LANGUAGE C IMMUTABLE STRICT;
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Set default embedding model
CREATE OR REPLACE FUNCTION ruvector_set_default_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_set_default_model_wrapper'
LANGUAGE C STRICT;
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Get current default embedding model
CREATE OR REPLACE FUNCTION ruvector_default_model()
RETURNS text
AS 'MODULE_PATHNAME', 'ruvector_default_model_wrapper'
LANGUAGE C IMMUTABLE STRICT;
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Get embedding generation statistics
CREATE OR REPLACE FUNCTION ruvector_embedding_stats()
RETURNS jsonb
AS 'MODULE_PATHNAME', 'ruvector_embedding_stats_wrapper'
LANGUAGE C IMMUTABLE STRICT;
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Get dimensions for a specific model
CREATE OR REPLACE FUNCTION ruvector_embedding_dims(model_name text)
RETURNS integer
AS 'MODULE_PATHNAME', 'ruvector_embedding_dims_wrapper'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

-- Convenience: text → ruvector type in one call
CREATE OR REPLACE FUNCTION ruvector_embed_vec(text_input text, model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS ruvector
AS $$
SELECT replace(replace(ruvector_embed(text_input, model_name)::text, '{', '['), '}', ']')::ruvector;
$$ LANGUAGE SQL VOLATILE STRICT PARALLEL SAFE;
77 changes: 74 additions & 3 deletions crates/ruvector-postgres/sql/ruvector--0.1.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -780,9 +780,80 @@ COMMENT ON FUNCTION graph_bipartite_score(real[], real[], real) IS 'Compute bipa
-- ============================================================================
-- Embedding Generation Functions
-- ============================================================================
-- Note: Embedding functions require the 'embeddings' feature flag to be enabled
-- during compilation. These functions are not available in the default build.
-- To enable, build with: cargo pgrx package --features embeddings
-- These functions require the 'embeddings' feature flag at compile time.
-- The Docker image builds with --features embeddings, so they are available.

-- Generate embedding from text using default or specified model
CREATE OR REPLACE FUNCTION ruvector_embed(text text, model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS real[]
AS 'MODULE_PATHNAME', 'ruvector_embed_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Generate embeddings for multiple texts in batch
CREATE OR REPLACE FUNCTION ruvector_embed_batch(texts text[], model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS real[][]
AS 'MODULE_PATHNAME', 'ruvector_embed_batch_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- List all available embedding models
CREATE OR REPLACE FUNCTION ruvector_embedding_models()
RETURNS TABLE (
model_name text,
dimensions integer,
description text,
is_loaded boolean
)
AS 'MODULE_PATHNAME', 'ruvector_embedding_models_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Load embedding model into memory
CREATE OR REPLACE FUNCTION ruvector_load_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_load_model_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Unload embedding model from memory
CREATE OR REPLACE FUNCTION ruvector_unload_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_unload_model_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Get information about a specific model
CREATE OR REPLACE FUNCTION ruvector_model_info(model_name text)
RETURNS jsonb
AS 'MODULE_PATHNAME', 'ruvector_model_info_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Set default embedding model
CREATE OR REPLACE FUNCTION ruvector_set_default_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_set_default_model_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Get current default embedding model
CREATE OR REPLACE FUNCTION ruvector_default_model()
RETURNS text
AS 'MODULE_PATHNAME', 'ruvector_default_model_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Get embedding generation statistics
CREATE OR REPLACE FUNCTION ruvector_embedding_stats()
RETURNS jsonb
AS 'MODULE_PATHNAME', 'ruvector_embedding_stats_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Get dimensions for a specific model
CREATE OR REPLACE FUNCTION ruvector_embedding_dims(model_name text)
RETURNS integer
AS 'MODULE_PATHNAME', 'ruvector_embedding_dims_wrapper'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

-- Convenience: text → ruvector type in one call
CREATE OR REPLACE FUNCTION ruvector_embed_vec(text_input text, model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS ruvector
AS $$
SELECT replace(replace(ruvector_embed(text_input, model_name)::text, '{', '['), '}', ']')::ruvector;
$$ LANGUAGE SQL VOLATILE STRICT PARALLEL SAFE;

-- ============================================================================
-- HNSW Access Method
Expand Down
77 changes: 74 additions & 3 deletions crates/ruvector-postgres/sql/ruvector--2.0.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,80 @@ COMMENT ON FUNCTION graph_bipartite_score(real[], real[], real) IS 'Compute bipa
-- ============================================================================
-- Embedding Generation Functions
-- ============================================================================
-- Note: Embedding functions require the 'embeddings' feature flag to be enabled
-- during compilation. These functions are not available in the default build.
-- To enable, build with: cargo pgrx package --features embeddings
-- These functions require the 'embeddings' feature flag at compile time.
-- The Docker image builds with --features embeddings, so they are available.

-- Generate embedding from text using default or specified model
CREATE OR REPLACE FUNCTION ruvector_embed(text text, model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS real[]
AS 'MODULE_PATHNAME', 'ruvector_embed_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Generate embeddings for multiple texts in batch
CREATE OR REPLACE FUNCTION ruvector_embed_batch(texts text[], model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS real[][]
AS 'MODULE_PATHNAME', 'ruvector_embed_batch_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- List all available embedding models
CREATE OR REPLACE FUNCTION ruvector_embedding_models()
RETURNS TABLE (
model_name text,
dimensions integer,
description text,
is_loaded boolean
)
AS 'MODULE_PATHNAME', 'ruvector_embedding_models_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Load embedding model into memory
CREATE OR REPLACE FUNCTION ruvector_load_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_load_model_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Unload embedding model from memory
CREATE OR REPLACE FUNCTION ruvector_unload_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_unload_model_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Get information about a specific model
CREATE OR REPLACE FUNCTION ruvector_model_info(model_name text)
RETURNS jsonb
AS 'MODULE_PATHNAME', 'ruvector_model_info_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Set default embedding model
CREATE OR REPLACE FUNCTION ruvector_set_default_model(model_name text)
RETURNS boolean
AS 'MODULE_PATHNAME', 'ruvector_set_default_model_wrapper'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;

-- Get current default embedding model
CREATE OR REPLACE FUNCTION ruvector_default_model()
RETURNS text
AS 'MODULE_PATHNAME', 'ruvector_default_model_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Get embedding generation statistics
CREATE OR REPLACE FUNCTION ruvector_embedding_stats()
RETURNS jsonb
AS 'MODULE_PATHNAME', 'ruvector_embedding_stats_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

-- Get dimensions for a specific model
CREATE OR REPLACE FUNCTION ruvector_embedding_dims(model_name text)
RETURNS integer
AS 'MODULE_PATHNAME', 'ruvector_embedding_dims_wrapper'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

-- Convenience: text → ruvector type in one call
CREATE OR REPLACE FUNCTION ruvector_embed_vec(text_input text, model_name text DEFAULT 'all-MiniLM-L6-v2')
RETURNS ruvector
AS $$
SELECT replace(replace(ruvector_embed(text_input, model_name)::text, '{', '['), '}', ']')::ruvector;
$$ LANGUAGE SQL VOLATILE STRICT PARALLEL SAFE;

-- ============================================================================
-- HNSW Access Method
Expand Down