From 213331f4c9ba840b8f1158e4e24e1edb5ec00a73 Mon Sep 17 00:00:00 2001 From: Cedric Hurst Date: Sun, 1 Feb 2026 15:53:09 -0600 Subject: [PATCH 1/3] feat: unify database_url config for Postgres and SQLite Extend BASIC_MEMORY_DATABASE_URL to support: - SQLite URLs with custom paths for project-local databases - Postgres URLs with ?search_path= for schema isolation Changes: - config.py: Parse SQLite URLs in app_database_path property - db.py: Extract search_path from Postgres URL, pass to server_settings - db.py: Add ensure_schema_exists() for auto-creating schemas - alembic/env.py: Pass version_table_schema for migration tracking - cli/commands/db.py: Handle Postgres case in reset command Closes #539 Co-Authored-By: Claude Opus 4.5 Signed-off-by: Cedric Hurst --- src/basic_memory/alembic/env.py | 36 ++++ src/basic_memory/cli/commands/db.py | 41 ++-- src/basic_memory/config.py | 33 ++- src/basic_memory/db.py | 104 +++++++++- test-int/test_database_url_integration.py | 238 ++++++++++++++++++++++ tests/test_database_url.py | 224 ++++++++++++++++++++ 6 files changed, 654 insertions(+), 22 deletions(-) create mode 100644 test-int/test_database_url_integration.py create mode 100644 tests/test_database_url.py diff --git a/src/basic_memory/alembic/env.py b/src/basic_memory/alembic/env.py index 94b15bdf..61010d06 100644 --- a/src/basic_memory/alembic/env.py +++ b/src/basic_memory/alembic/env.py @@ -98,14 +98,50 @@ def run_migrations_offline() -> None: context.run_migrations() +def get_version_table_schema(connection) -> str | None: + """Determine the schema for Alembic's version table. + + Args: + connection: Database connection + + Returns: + Schema name if Postgres with non-public search_path, else None + + Why: When using schema isolation, Alembic's alembic_version table should + be in the same schema as the application tables. + """ + if connection.dialect.name != "postgresql": + return None + + # Check if database_url has search_path parameter + if not app_config.database_url or "search_path=" not in app_config.database_url: + return None + + from urllib.parse import urlparse, parse_qs + + parsed = urlparse(app_config.database_url) + query_params = parse_qs(parsed.query) + search_path = query_params.get("search_path", ["public"])[0] + + # Only set version_table_schema for non-public schemas + return search_path if search_path != "public" else None + + def do_run_migrations(connection): """Execute migrations with the given connection.""" + # --- Schema-Aware Migration Tracking --- + # Trigger: Postgres with non-public search_path in database_url + # Why: Alembic version table should be in same schema as application tables + # Outcome: version_table_schema passed to context.configure() + version_table_schema = get_version_table_schema(connection) + context.configure( connection=connection, target_metadata=target_metadata, include_object=include_object, render_as_batch=True, compare_type=True, + version_table_schema=version_table_schema, ) with context.begin_transaction(): context.run_migrations() diff --git a/src/basic_memory/cli/commands/db.py b/src/basic_memory/cli/commands/db.py index 17b8130b..1f9dbc2e 100644 --- a/src/basic_memory/cli/commands/db.py +++ b/src/basic_memory/cli/commands/db.py @@ -61,23 +61,34 @@ def reset( logger.info("Resetting database...") config_manager = ConfigManager() app_config = config_manager.config - # Get database path + # Get database path (None for Postgres) db_path = app_config.app_database_path - # Delete the database file and WAL files if they exist - for suffix in ["", "-shm", "-wal"]: - path = db_path.parent / f"{db_path.name}{suffix}" - if path.exists(): - try: - path.unlink() - logger.info(f"Deleted: {path}") - except OSError as e: - console.print( - f"[red]Error:[/red] Cannot delete {path.name}: {e}\n" - "The database may be in use by another process (e.g., MCP server).\n" - "Please close Claude Desktop or any other Basic Memory clients and try again." - ) - raise typer.Exit(1) + # --- SQLite vs Postgres Handling --- + # Trigger: db_path is None when using Postgres backend + # Why: Postgres doesn't use a local file, so file deletion doesn't apply + # Outcome: skip file deletion for Postgres, only run migrations + if db_path is not None: + # Delete the database file and WAL files if they exist (SQLite only) + for suffix in ["", "-shm", "-wal"]: + path = db_path.parent / f"{db_path.name}{suffix}" + if path.exists(): + try: + path.unlink() + logger.info(f"Deleted: {path}") + except OSError as e: + console.print( + f"[red]Error:[/red] Cannot delete {path.name}: {e}\n" + "The database may be in use by another process (e.g., MCP server).\n" + "Please close Claude Desktop or any other Basic Memory clients and try again." + ) + raise typer.Exit(1) + else: + console.print( + "[yellow]Note:[/yellow] Using Postgres backend. " + "Database files cannot be deleted directly.\n" + "Running migrations to recreate tables..." + ) # Create a new empty database (preserves project configuration) try: diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index bd70cde5..1d6e1354 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -316,12 +316,37 @@ def model_post_init(self, __context: Any) -> None: self.default_project = next(iter(self.projects.keys())) @property - def app_database_path(self) -> Path: + def app_database_path(self) -> Optional[Path]: """Get the path to the app-level database. This is the single database that will store all knowledge data across all projects. + + Returns: + Path to SQLite database file, or None for Postgres backends. """ + # --- SQLite URL Handling --- + # Trigger: database_url is set and starts with "sqlite" + # Why: allows project-local SQLite databases via URL configuration + # Outcome: extracts path from URL (e.g., sqlite+aiosqlite:///.basic-memory/memory.db) + if self.database_url: + if self.database_url.startswith("sqlite"): + from urllib.parse import urlparse + + parsed = urlparse(self.database_url) + # parsed.path will be "/.basic-memory/memory.db" - strip leading / + path = parsed.path[1:] if parsed.path.startswith("/") else parsed.path + resolved_path = Path(path).resolve() + # Ensure parent directory exists + if not resolved_path.parent.exists(): # pragma: no cover + resolved_path.parent.mkdir(parents=True, exist_ok=True) + return resolved_path + else: + # Postgres or other backend - no file path + return None + + # --- Default SQLite Path --- + # No database_url set - use default ~/.basic-memory/memory.db database_path = Path.home() / DATA_DIR_NAME / APP_DATABASE_NAME if not database_path.exists(): # pragma: no cover database_path.parent.mkdir(parents=True, exist_ok=True) @@ -329,11 +354,11 @@ def app_database_path(self) -> Path: return database_path @property - def database_path(self) -> Path: + def database_path(self) -> Optional[Path]: """Get SQLite database path. - Rreturns the app-level database path - for backward compatibility in the codebase. + Returns the app-level database path for backward compatibility. + Returns None when using Postgres backend. """ # Load the app-level database path from the global config diff --git a/src/basic_memory/db.py b/src/basic_memory/db.py index c440e37d..79c34319 100644 --- a/src/basic_memory/db.py +++ b/src/basic_memory/db.py @@ -83,7 +83,15 @@ def get_db_url( logger.info(f"Using Postgres database: {config.database_url}") return config.database_url - # SQLite databases + # --- SQLite URL Handling --- + # Trigger: database_url is set with a SQLite URL + # Why: allows custom SQLite paths via URL configuration + # Outcome: use the provided URL instead of constructing from db_path + if config.database_url and config.database_url.startswith("sqlite"): + logger.info(f"Using SQLite database from URL: {config.database_url}") + return config.database_url + + # SQLite databases (default behavior) if db_type == cls.MEMORY: logger.info("Using in-memory SQLite database") return "sqlite+aiosqlite://" @@ -206,6 +214,34 @@ def enable_wal_mode(dbapi_conn, connection_record): return engine +def extract_search_path_from_url(db_url: str) -> tuple[str, str]: + """Extract search_path from Postgres URL and return clean URL. + + Args: + db_url: Postgres connection URL, possibly with ?search_path=schema + + Returns: + Tuple of (clean_url without search_path, search_path value) + + Why: asyncpg rejects search_path as a URL query parameter, so we extract it + and pass it via server_settings instead. + """ + from urllib.parse import urlparse, parse_qs, urlencode, urlunparse + + parsed = urlparse(db_url) + query_params = parse_qs(parsed.query) + + # Extract search_path, default to "public" + search_path_list = query_params.pop("search_path", ["public"]) + search_path = search_path_list[0] if search_path_list else "public" + + # Rebuild URL without search_path + new_query = urlencode(query_params, doseq=True) + clean_url = urlunparse(parsed._replace(query=new_query)) + + return clean_url, search_path + + def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngine: """Create Postgres async engine with appropriate configuration. @@ -216,10 +252,16 @@ def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngi Returns: Configured async engine for Postgres """ + # --- Extract search_path from URL --- + # Trigger: URL contains ?search_path=schema parameter + # Why: asyncpg rejects search_path as URL param, must pass via server_settings + # Outcome: clean URL for asyncpg, search_path passed to server_settings + clean_url, search_path = extract_search_path_from_url(db_url) + # Use NullPool connection issues. # Assume connection pooler like PgBouncer handles connection pooling. engine = create_async_engine( - db_url, + clean_url, echo=False, poolclass=NullPool, # No pooling - fresh connection per request connect_args={ @@ -233,10 +275,12 @@ def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngi "application_name": "basic-memory", # Statement timeout for queries (30s to allow for cold start) "statement_timeout": "30s", + # Schema isolation via search_path (extracted from URL or default "public") + "search_path": search_path, }, }, ) - logger.debug("Created Postgres engine with NullPool (no connection pooling)") + logger.debug(f"Created Postgres engine with search_path={search_path}") return engine @@ -365,6 +409,44 @@ async def engine_session_factory( _session_maker = None +def get_search_path_from_config(app_config: BasicMemoryConfig) -> Optional[str]: + """Extract search_path from config's database_url if present. + + Args: + app_config: BasicMemoryConfig with database_url + + Returns: + search_path value if present and not "public", else None + """ + if not app_config.database_url: + return None + + if not app_config.database_url.startswith("postgresql"): + return None + + _, search_path = extract_search_path_from_url(app_config.database_url) + return search_path if search_path != "public" else None + + +async def ensure_schema_exists(engine: AsyncEngine, schema: str) -> None: + """Create schema if it doesn't exist (Postgres only). + + Args: + engine: AsyncEngine connected to Postgres + schema: Schema name to create + + Why: When using search_path for schema isolation, the schema must exist + before migrations can create tables in it. + """ + if not schema or schema == "public": + return + + async with engine.begin() as conn: + # Use text() to execute raw SQL - schema names are trusted config values + await conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"')) + logger.info(f"Ensured schema exists: {schema}") + + async def run_migrations( app_config: BasicMemoryConfig, database_type=DatabaseType.FILESYSTEM ): # pragma: no cover @@ -393,6 +475,22 @@ async def run_migrations( db_url = DatabaseType.get_db_url(app_config.database_path, database_type, app_config) config.set_main_option("sqlalchemy.url", db_url) + # --- Schema Creation for Postgres --- + # Trigger: Postgres backend with non-public search_path in URL + # Why: schema must exist before Alembic can create tables in it + # Outcome: CREATE SCHEMA IF NOT EXISTS runs before migrations + search_path = get_search_path_from_config(app_config) + if search_path and ( + database_type == DatabaseType.POSTGRES + or app_config.database_backend == DatabaseBackend.POSTGRES + ): + # Create a temporary engine just for schema creation + temp_engine = _create_postgres_engine(db_url, app_config) + try: + await ensure_schema_exists(temp_engine, search_path) + finally: + await temp_engine.dispose() + command.upgrade(config, "head") logger.info("Migrations completed successfully") diff --git a/test-int/test_database_url_integration.py b/test-int/test_database_url_integration.py new file mode 100644 index 00000000..88ae13fd --- /dev/null +++ b/test-int/test_database_url_integration.py @@ -0,0 +1,238 @@ +"""Integration tests for database_url configuration flexibility. + +Tests: +1. SQLite with custom URL creates database in expected location +2. Postgres with search_path creates tables in isolated schema +3. Alembic migrations track in the correct schema +""" + +import os +import pytest +import pytest_asyncio +from pathlib import Path +from sqlalchemy import text +from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.pool import NullPool +from testcontainers.postgres import PostgresContainer + +from basic_memory.config import BasicMemoryConfig, DatabaseBackend +from basic_memory.db import ( + _create_postgres_engine, + extract_search_path_from_url, + ensure_schema_exists, +) + + +class TestPostgresSchemaIsolation: + """Integration tests for Postgres schema isolation via search_path.""" + + @pytest.fixture(scope="class") + def postgres_container(self): + """Start a Postgres container for schema isolation tests.""" + # Skip if not running Postgres tests + if os.environ.get("BASIC_MEMORY_TEST_POSTGRES", "").lower() not in ("1", "true", "yes"): + pytest.skip("Postgres tests disabled - set BASIC_MEMORY_TEST_POSTGRES=1") + + with PostgresContainer("postgres:16-alpine") as postgres: + yield postgres + + @pytest.fixture + def postgres_url(self, postgres_container): + """Get asyncpg URL from testcontainer.""" + sync_url = postgres_container.get_connection_url() + return sync_url.replace("postgresql+psycopg2", "postgresql+asyncpg") + + @pytest_asyncio.fixture + async def clean_schema(self, postgres_url): + """Create and clean up a test schema.""" + schema_name = "test_schema" + + # Create a basic engine to set up/tear down schema + engine = create_async_engine(postgres_url, poolclass=NullPool) + + async with engine.begin() as conn: + # Drop schema if exists from previous test + await conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema_name}" CASCADE')) + + yield schema_name + + # Cleanup + async with engine.begin() as conn: + await conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema_name}" CASCADE')) + + await engine.dispose() + + @pytest.mark.asyncio + async def test_ensure_schema_exists_creates_schema(self, postgres_url, clean_schema): + """ensure_schema_exists() creates the schema if it doesn't exist.""" + schema_name = clean_schema + + # Create engine with search_path pointing to our test schema + url_with_schema = f"{postgres_url}?search_path={schema_name}" + config = BasicMemoryConfig( + database_backend=DatabaseBackend.POSTGRES, + database_url=url_with_schema, + ) + + engine = _create_postgres_engine(url_with_schema, config) + + try: + # Verify schema doesn't exist yet + async with engine.begin() as conn: + result = await conn.execute( + text( + "SELECT schema_name FROM information_schema.schemata " + f"WHERE schema_name = '{schema_name}'" + ) + ) + assert result.fetchone() is None, "Schema should not exist yet" + + # Call ensure_schema_exists + await ensure_schema_exists(engine, schema_name) + + # Verify schema now exists + async with engine.begin() as conn: + result = await conn.execute( + text( + "SELECT schema_name FROM information_schema.schemata " + f"WHERE schema_name = '{schema_name}'" + ) + ) + row = result.fetchone() + assert row is not None, "Schema should exist after ensure_schema_exists" + assert row[0] == schema_name + finally: + await engine.dispose() + + @pytest.mark.asyncio + async def test_ensure_schema_exists_idempotent(self, postgres_url, clean_schema): + """ensure_schema_exists() can be called multiple times safely.""" + schema_name = clean_schema + url_with_schema = f"{postgres_url}?search_path={schema_name}" + config = BasicMemoryConfig( + database_backend=DatabaseBackend.POSTGRES, + database_url=url_with_schema, + ) + + engine = _create_postgres_engine(url_with_schema, config) + + try: + # Call twice - should not raise + await ensure_schema_exists(engine, schema_name) + await ensure_schema_exists(engine, schema_name) + + # Verify schema exists + async with engine.begin() as conn: + result = await conn.execute( + text( + "SELECT schema_name FROM information_schema.schemata " + f"WHERE schema_name = '{schema_name}'" + ) + ) + assert result.fetchone() is not None + finally: + await engine.dispose() + + @pytest.mark.asyncio + async def test_ensure_schema_exists_skips_public(self, postgres_url): + """ensure_schema_exists() does nothing for public schema.""" + config = BasicMemoryConfig( + database_backend=DatabaseBackend.POSTGRES, + database_url=postgres_url, + ) + + engine = _create_postgres_engine(postgres_url, config) + + try: + # Should not raise and should not try to create public + await ensure_schema_exists(engine, "public") + await ensure_schema_exists(engine, "") + await ensure_schema_exists(engine, None) # type: ignore + finally: + await engine.dispose() + + @pytest.mark.asyncio + async def test_engine_uses_search_path(self, postgres_url, clean_schema): + """Engine created with search_path URL uses that schema for queries.""" + schema_name = clean_schema + url_with_schema = f"{postgres_url}?search_path={schema_name}" + config = BasicMemoryConfig( + database_backend=DatabaseBackend.POSTGRES, + database_url=url_with_schema, + ) + + engine = _create_postgres_engine(url_with_schema, config) + + try: + # Create the schema first + await ensure_schema_exists(engine, schema_name) + + # Create a test table (should go into our schema) + async with engine.begin() as conn: + await conn.execute(text("CREATE TABLE test_table (id INTEGER PRIMARY KEY)")) + + # Verify table is in our schema, not public + async with engine.begin() as conn: + result = await conn.execute( + text( + "SELECT table_schema FROM information_schema.tables " + "WHERE table_name = 'test_table'" + ) + ) + row = result.fetchone() + assert row is not None, "Table should exist" + assert row[0] == schema_name, f"Table should be in {schema_name}, not {row[0]}" + finally: + await engine.dispose() + + @pytest.mark.asyncio + async def test_search_path_with_multiple_schemas(self, postgres_url): + """search_path with multiple schemas uses first for table creation.""" + # Test that comma-separated schemas are handled + url = f"{postgres_url}?search_path=custom_schema,public" + clean_url, search_path = extract_search_path_from_url(url) + + assert search_path == "custom_schema,public" + assert "search_path" not in clean_url + + +class TestSQLiteUrlIntegration: + """Integration tests for SQLite URL configuration.""" + + def test_relative_path_creates_database(self, tmp_path, monkeypatch): + """SQLite URL with relative path creates database in expected location.""" + # Change to tmp_path + monkeypatch.chdir(tmp_path) + + # Create config with relative SQLite URL + config = BasicMemoryConfig( + database_url="sqlite+aiosqlite:///.basic-memory/memory.db" + ) + + # Get the path + db_path = config.app_database_path + assert db_path is not None + + # Verify it resolves to tmp_path + assert db_path.parent.name == ".basic-memory" + assert db_path.name == "memory.db" + + # The path should be absolute after resolve() + assert db_path.is_absolute() + + def test_project_local_database(self, tmp_path, monkeypatch): + """Project can have its own local database via URL configuration.""" + project_dir = tmp_path / "my-project" + project_dir.mkdir() + monkeypatch.chdir(project_dir) + + # Configure project-local database + config = BasicMemoryConfig( + database_url="sqlite+aiosqlite:///.basic-memory/memory.db" + ) + + db_path = config.app_database_path + assert db_path is not None + + # Should be inside project directory + assert str(project_dir) in str(db_path) diff --git a/tests/test_database_url.py b/tests/test_database_url.py new file mode 100644 index 00000000..73776a5e --- /dev/null +++ b/tests/test_database_url.py @@ -0,0 +1,224 @@ +"""Tests for database_url configuration flexibility. + +Tests SQLite URL path extraction and Postgres search_path handling. +""" + +import pytest +from pathlib import Path +from unittest.mock import patch + +from basic_memory.config import BasicMemoryConfig +from basic_memory.db import extract_search_path_from_url, get_search_path_from_config + + +class TestSQLiteUrlParsing: + """Test SQLite URL path extraction in app_database_path.""" + + def test_relative_path_extracts_correctly(self, tmp_path, monkeypatch): + """sqlite+aiosqlite:///.basic-memory/memory.db -> .basic-memory/memory.db""" + # Change to tmp_path so relative path resolves there + monkeypatch.chdir(tmp_path) + + config = BasicMemoryConfig( + database_url="sqlite+aiosqlite:///.basic-memory/memory.db" + ) + + # Should resolve relative to cwd + assert config.app_database_path is not None + assert config.app_database_path.name == "memory.db" + assert ".basic-memory" in str(config.app_database_path) + + def test_no_database_url_uses_default(self, config_home): + """No database_url = default ~/.basic-memory/memory.db""" + config = BasicMemoryConfig(database_url=None) + + assert config.app_database_path is not None + assert str(config.app_database_path).endswith("memory.db") + assert ".basic-memory" in str(config.app_database_path) + + def test_postgres_url_returns_none_for_path(self): + """Postgres URL should return None for app_database_path.""" + config = BasicMemoryConfig( + database_url="postgresql+asyncpg://user:pass@host/db" + ) + + assert config.app_database_path is None + + def test_sqlite_url_with_absolute_path(self, tmp_path): + """SQLite URL with absolute path extracts correctly.""" + db_path = tmp_path / "custom" / "database.db" + # Create parent directory + db_path.parent.mkdir(parents=True, exist_ok=True) + + config = BasicMemoryConfig( + database_url=f"sqlite+aiosqlite:///{db_path}" + ) + + assert config.app_database_path is not None + # The resolved path should match (accounting for symlink resolution) + assert config.app_database_path.name == "database.db" + + def test_sqlite_url_with_dot_relative_path(self, tmp_path, monkeypatch): + """sqlite+aiosqlite:///./.basic-memory/memory.db works with dot prefix.""" + monkeypatch.chdir(tmp_path) + + config = BasicMemoryConfig( + database_url="sqlite+aiosqlite:///./.basic-memory/memory.db" + ) + + assert config.app_database_path is not None + assert config.app_database_path.name == "memory.db" + + +class TestPostgresSearchPath: + """Test Postgres search_path extraction.""" + + def test_extract_search_path_from_url_with_schema(self): + """search_path should be extracted from URL.""" + url = "postgresql+asyncpg://user:pass@host/db?search_path=myschema" + clean_url, search_path = extract_search_path_from_url(url) + + assert search_path == "myschema" + assert "search_path" not in clean_url + assert clean_url == "postgresql+asyncpg://user:pass@host/db" + + def test_extract_search_path_defaults_to_public(self): + """No search_path param = default to public.""" + url = "postgresql+asyncpg://user:pass@host/db" + clean_url, search_path = extract_search_path_from_url(url) + + assert search_path == "public" + assert clean_url == url + + def test_extract_search_path_preserves_other_params(self): + """Other URL parameters should be preserved.""" + url = "postgresql+asyncpg://user:pass@host/db?sslmode=require&search_path=myschema" + clean_url, search_path = extract_search_path_from_url(url) + + assert search_path == "myschema" + assert "sslmode=require" in clean_url + assert "search_path" not in clean_url + + def test_extract_search_path_with_comma_separated_schemas(self): + """Multiple schemas in search_path should use first one.""" + url = "postgresql+asyncpg://user:pass@host/db?search_path=myschema,public" + clean_url, search_path = extract_search_path_from_url(url) + + # First schema in the list + assert search_path == "myschema,public" + + def test_get_search_path_from_config_with_schema(self): + """get_search_path_from_config returns schema when non-public.""" + config = BasicMemoryConfig( + database_url="postgresql+asyncpg://user:pass@host/db?search_path=myschema" + ) + + search_path = get_search_path_from_config(config) + assert search_path == "myschema" + + def test_get_search_path_from_config_returns_none_for_public(self): + """get_search_path_from_config returns None for public schema.""" + config = BasicMemoryConfig( + database_url="postgresql+asyncpg://user:pass@host/db?search_path=public" + ) + + search_path = get_search_path_from_config(config) + assert search_path is None + + def test_get_search_path_from_config_returns_none_for_sqlite(self): + """get_search_path_from_config returns None for SQLite URLs.""" + config = BasicMemoryConfig( + database_url="sqlite+aiosqlite:///.basic-memory/memory.db" + ) + + search_path = get_search_path_from_config(config) + assert search_path is None + + def test_get_search_path_from_config_returns_none_for_no_url(self): + """get_search_path_from_config returns None when no database_url.""" + config = BasicMemoryConfig(database_url=None) + + search_path = get_search_path_from_config(config) + assert search_path is None + + +class TestBackwardsCompatibility: + """Test that existing setups work unchanged.""" + + def test_no_config_uses_default_sqlite_path(self, config_home): + """No database_url configuration uses default SQLite path.""" + config = BasicMemoryConfig() + + # Should still get the default path + assert config.app_database_path is not None + assert config.app_database_path.name == "memory.db" + + def test_postgres_backend_without_search_path_works(self): + """Postgres without search_path uses public schema.""" + config = BasicMemoryConfig( + database_url="postgresql+asyncpg://user:pass@host/db" + ) + + # app_database_path should be None for Postgres + assert config.app_database_path is None + + # search_path should default to public + search_path = get_search_path_from_config(config) + assert search_path is None # None means "use default public" + + def test_database_url_field_description_unchanged(self): + """Verify database_url field exists with expected properties.""" + field_info = BasicMemoryConfig.model_fields["database_url"] + + assert field_info.default is None + assert "Database connection URL" in (field_info.description or "") + + +class TestDatabaseTypeGetDbUrl: + """Test DatabaseType.get_db_url() handles custom URLs.""" + + def test_sqlite_url_in_database_url_is_used(self, tmp_path, monkeypatch): + """get_db_url() should return SQLite URL from database_url config.""" + from basic_memory.db import DatabaseType + + monkeypatch.chdir(tmp_path) + custom_url = "sqlite+aiosqlite:///.custom/test.db" + config = BasicMemoryConfig(database_url=custom_url) + + result = DatabaseType.get_db_url( + db_path=tmp_path / "ignored.db", + db_type=DatabaseType.FILESYSTEM, + config=config, + ) + + assert result == custom_url + + def test_no_database_url_uses_db_path(self, tmp_path): + """get_db_url() constructs URL from db_path when no database_url.""" + from basic_memory.db import DatabaseType + + config = BasicMemoryConfig(database_url=None) + db_path = tmp_path / "test.db" + + result = DatabaseType.get_db_url( + db_path=db_path, + db_type=DatabaseType.FILESYSTEM, + config=config, + ) + + assert result == f"sqlite+aiosqlite:///{db_path}" + + def test_postgres_url_returned_for_postgres_type(self): + """get_db_url() returns Postgres URL for POSTGRES type.""" + from basic_memory.db import DatabaseType + + pg_url = "postgresql+asyncpg://user:pass@host/db" + config = BasicMemoryConfig(database_url=pg_url) + + result = DatabaseType.get_db_url( + db_path=None, # type: ignore + db_type=DatabaseType.POSTGRES, + config=config, + ) + + assert result == pg_url From 9750f8a45539a219f94f7329e037c33f4f98e0b4 Mon Sep 17 00:00:00 2001 From: Cedric Hurst Date: Mon, 2 Feb 2026 12:35:04 -0600 Subject: [PATCH 2/3] fix: address code review findings for database_url feature - Use SQLAlchemy DDL (CreateSchema/DropSchema) instead of raw SQL to eliminate SQL injection risk in schema operations - DRY up alembic/env.py by reusing extract_search_path_from_url - Fix type errors by propagating Optional[Path] through call chain - Implement reset_postgres_database() for actual Postgres reset - Handle None db_path in project_service.get_system_status() Co-Authored-By: Claude Opus 4.5 Signed-off-by: Cedric Hurst --- src/basic_memory/alembic/env.py | 11 +--- src/basic_memory/cli/commands/db.py | 5 +- src/basic_memory/db.py | 65 +++++++++++++++++--- src/basic_memory/services/project_service.py | 11 ++-- 4 files changed, 68 insertions(+), 24 deletions(-) diff --git a/src/basic_memory/alembic/env.py b/src/basic_memory/alembic/env.py index 61010d06..9969fb40 100644 --- a/src/basic_memory/alembic/env.py +++ b/src/basic_memory/alembic/env.py @@ -113,17 +113,12 @@ def get_version_table_schema(connection) -> str | None: if connection.dialect.name != "postgresql": return None - # Check if database_url has search_path parameter - if not app_config.database_url or "search_path=" not in app_config.database_url: + if not app_config.database_url: return None - from urllib.parse import urlparse, parse_qs + from basic_memory.db import extract_search_path_from_url - parsed = urlparse(app_config.database_url) - query_params = parse_qs(parsed.query) - search_path = query_params.get("search_path", ["public"])[0] - - # Only set version_table_schema for non-public schemas + _, search_path = extract_search_path_from_url(app_config.database_url) return search_path if search_path != "public" else None diff --git a/src/basic_memory/cli/commands/db.py b/src/basic_memory/cli/commands/db.py index 1f9dbc2e..006923c4 100644 --- a/src/basic_memory/cli/commands/db.py +++ b/src/basic_memory/cli/commands/db.py @@ -84,11 +84,12 @@ def reset( ) raise typer.Exit(1) else: + # Postgres: drop and recreate schema/tables console.print( "[yellow]Note:[/yellow] Using Postgres backend. " - "Database files cannot be deleted directly.\n" - "Running migrations to recreate tables..." + "Dropping and recreating tables..." ) + run_with_cleanup(db.reset_postgres_database(app_config)) # Create a new empty database (preserves project configuration) try: diff --git a/src/basic_memory/db.py b/src/basic_memory/db.py index 79c34319..180a92d8 100644 --- a/src/basic_memory/db.py +++ b/src/basic_memory/db.py @@ -53,12 +53,15 @@ class DatabaseType(Enum): @classmethod def get_db_url( - cls, db_path: Path, db_type: "DatabaseType", config: Optional[BasicMemoryConfig] = None + cls, + db_path: Optional[Path], + db_type: "DatabaseType", + config: Optional[BasicMemoryConfig] = None, ) -> str: """Get SQLAlchemy URL for database path. Args: - db_path: Path to SQLite database file (ignored for Postgres) + db_path: Path to SQLite database file (None for Postgres, ignored if database_url set) db_type: Type of database (MEMORY, FILESYSTEM, or POSTGRES) config: Optional config to check for database backend and URL @@ -286,14 +289,14 @@ def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngi def _create_engine_and_session( - db_path: Path, + db_path: Optional[Path], db_type: DatabaseType = DatabaseType.FILESYSTEM, config: Optional[BasicMemoryConfig] = None, ) -> tuple[AsyncEngine, async_sessionmaker[AsyncSession]]: """Internal helper to create engine and session maker. Args: - db_path: Path to database file (used for SQLite, ignored for Postgres) + db_path: Path to database file (None for Postgres, ignored if database_url set) db_type: Type of database (MEMORY, FILESYSTEM, or POSTGRES) config: Optional explicit config. If not provided, reads from ConfigManager. Prefer passing explicitly from composition roots. @@ -319,7 +322,7 @@ def _create_engine_and_session( async def get_or_create_db( - db_path: Path, + db_path: Optional[Path], db_type: DatabaseType = DatabaseType.FILESYSTEM, ensure_migrations: bool = True, config: Optional[BasicMemoryConfig] = None, @@ -327,7 +330,7 @@ async def get_or_create_db( """Get or create database engine and session maker. Args: - db_path: Path to database file + db_path: Path to database file (None for Postgres, ignored if database_url set) db_type: Type of database ensure_migrations: Whether to run migrations config: Optional explicit config. If not provided, reads from ConfigManager. @@ -371,7 +374,7 @@ async def shutdown_db() -> None: # pragma: no cover @asynccontextmanager async def engine_session_factory( - db_path: Path, + db_path: Optional[Path], db_type: DatabaseType = DatabaseType.MEMORY, config: Optional[BasicMemoryConfig] = None, ) -> AsyncGenerator[tuple[AsyncEngine, async_sessionmaker[AsyncSession]], None]: @@ -381,7 +384,7 @@ async def engine_session_factory( for each test. For production use, use get_or_create_db() instead. Args: - db_path: Path to database file + db_path: Path to database file (None for Postgres, ignored if database_url set) db_type: Type of database config: Optional explicit config. If not provided, reads from ConfigManager. """ @@ -441,12 +444,54 @@ async def ensure_schema_exists(engine: AsyncEngine, schema: str) -> None: if not schema or schema == "public": return + from sqlalchemy.schema import CreateSchema + async with engine.begin() as conn: - # Use text() to execute raw SQL - schema names are trusted config values - await conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"')) + # Use SQLAlchemy's CreateSchema DDL for proper identifier quoting + await conn.execute(CreateSchema(schema, if_not_exists=True)) logger.info(f"Ensured schema exists: {schema}") +async def reset_postgres_database(app_config: BasicMemoryConfig) -> None: + """Reset Postgres database by dropping and recreating schema/tables. + + Args: + app_config: Configuration with database_url + + Why: For Postgres, we can't just delete a file like SQLite. We need to + drop tables or schema to reset the database. + """ + if not app_config.database_url: + raise ValueError("database_url must be set for Postgres reset") + + from sqlalchemy.schema import DropSchema, CreateSchema + + # Get search_path from config + search_path = get_search_path_from_config(app_config) + db_url = app_config.database_url + engine = _create_postgres_engine(db_url, app_config) + + try: + async with engine.begin() as conn: + if search_path and search_path != "public": + # Custom schema: drop and recreate the entire schema + logger.info(f"Dropping schema: {search_path}") + await conn.execute(DropSchema(search_path, cascade=True, if_exists=True)) + await conn.execute(CreateSchema(search_path)) + logger.info(f"Recreated schema: {search_path}") + else: + # Public schema: drop only Basic Memory tables + # Order matters due to foreign key constraints + # SECURITY: table_names is a hardcoded constant list - not user input + table_names = ["relation", "observation", "entity", "project", "alembic_version"] + for table_name in table_names: + logger.info(f"Dropping table: {table_name}") + await conn.execute(text(f'DROP TABLE IF EXISTS "{table_name}" CASCADE')) + logger.info("Dropped all Basic Memory tables") + finally: + await engine.dispose() + + async def run_migrations( app_config: BasicMemoryConfig, database_type=DatabaseType.FILESYSTEM ): # pragma: no cover diff --git a/src/basic_memory/services/project_service.py b/src/basic_memory/services/project_service.py index 924d20b2..f866f19f 100644 --- a/src/basic_memory/services/project_service.py +++ b/src/basic_memory/services/project_service.py @@ -868,10 +868,13 @@ def get_system_status(self) -> SystemStatus: """Get system status information.""" import basic_memory - # Get database information + # Get database information (None for Postgres backend) db_path = self.config_manager.config.database_path - db_size = db_path.stat().st_size if db_path.exists() else 0 - db_size_readable = f"{db_size / (1024 * 1024):.2f} MB" + if db_path and db_path.exists(): + db_size = db_path.stat().st_size + db_size_readable = f"{db_size / (1024 * 1024):.2f} MB" + else: + db_size_readable = "N/A (Postgres)" # Get watch service status if available watch_status = None @@ -886,7 +889,7 @@ def get_system_status(self) -> SystemStatus: return SystemStatus( version=basic_memory.__version__, - database_path=str(db_path), + database_path=str(db_path) if db_path else "Postgres (external)", database_size=db_size_readable, watch_status=watch_status, timestamp=datetime.now(), From 9c7ff1488b03d1f29fcd54ec4a77c4d0725ce493 Mon Sep 17 00:00:00 2001 From: Cedric Hurst Date: Mon, 2 Feb 2026 12:47:07 -0600 Subject: [PATCH 3/3] fix: don't override database default search_path Only set search_path in server_settings when explicitly provided in the database URL. This preserves database/role-level default schemas for deployments that rely on them. Addresses review feedback from Codex. Co-Authored-By: Claude Opus 4.5 Signed-off-by: Cedric Hurst --- src/basic_memory/alembic/env.py | 4 ++-- src/basic_memory/db.py | 40 +++++++++++++++++++-------------- tests/test_database_url.py | 14 +++++++----- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/src/basic_memory/alembic/env.py b/src/basic_memory/alembic/env.py index 9969fb40..55c6708c 100644 --- a/src/basic_memory/alembic/env.py +++ b/src/basic_memory/alembic/env.py @@ -105,7 +105,7 @@ def get_version_table_schema(connection) -> str | None: connection: Database connection Returns: - Schema name if Postgres with non-public search_path, else None + Schema name if Postgres with explicit search_path, else None Why: When using schema isolation, Alembic's alembic_version table should be in the same schema as the application tables. @@ -119,7 +119,7 @@ def get_version_table_schema(connection) -> str | None: from basic_memory.db import extract_search_path_from_url _, search_path = extract_search_path_from_url(app_config.database_url) - return search_path if search_path != "public" else None + return search_path def do_run_migrations(connection): diff --git a/src/basic_memory/db.py b/src/basic_memory/db.py index 180a92d8..23ad3e67 100644 --- a/src/basic_memory/db.py +++ b/src/basic_memory/db.py @@ -217,26 +217,28 @@ def enable_wal_mode(dbapi_conn, connection_record): return engine -def extract_search_path_from_url(db_url: str) -> tuple[str, str]: +def extract_search_path_from_url(db_url: str) -> tuple[str, Optional[str]]: """Extract search_path from Postgres URL and return clean URL. Args: db_url: Postgres connection URL, possibly with ?search_path=schema Returns: - Tuple of (clean_url without search_path, search_path value) + Tuple of (clean_url without search_path, search_path value or None) Why: asyncpg rejects search_path as a URL query parameter, so we extract it - and pass it via server_settings instead. + and pass it via server_settings instead. Returns None if no search_path was + explicitly provided, allowing the database/role default to be used. """ from urllib.parse import urlparse, parse_qs, urlencode, urlunparse parsed = urlparse(db_url) query_params = parse_qs(parsed.query) - # Extract search_path, default to "public" - search_path_list = query_params.pop("search_path", ["public"]) - search_path = search_path_list[0] if search_path_list else "public" + # Extract search_path if explicitly provided, otherwise None + # Why: Don't override database/role-level default search_path + search_path_list = query_params.pop("search_path", None) + search_path = search_path_list[0] if search_path_list else None # Rebuild URL without search_path new_query = urlencode(query_params, doseq=True) @@ -258,9 +260,19 @@ def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngi # --- Extract search_path from URL --- # Trigger: URL contains ?search_path=schema parameter # Why: asyncpg rejects search_path as URL param, must pass via server_settings - # Outcome: clean URL for asyncpg, search_path passed to server_settings + # Outcome: clean URL for asyncpg, search_path passed to server_settings (if provided) clean_url, search_path = extract_search_path_from_url(db_url) + # Build server_settings - only include search_path if explicitly provided + # Why: Don't override database/role-level default search_path when not specified + server_settings: dict[str, str] = { + "application_name": "basic-memory", + # Statement timeout for queries (30s to allow for cold start) + "statement_timeout": "30s", + } + if search_path: + server_settings["search_path"] = search_path + # Use NullPool connection issues. # Assume connection pooler like PgBouncer handles connection pooling. engine = create_async_engine( @@ -274,16 +286,10 @@ def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngi "command_timeout": 30, # Allow 30s for initial connection (Neon wake-up time) "timeout": 30, - "server_settings": { - "application_name": "basic-memory", - # Statement timeout for queries (30s to allow for cold start) - "statement_timeout": "30s", - # Schema isolation via search_path (extracted from URL or default "public") - "search_path": search_path, - }, + "server_settings": server_settings, }, ) - logger.debug(f"Created Postgres engine with search_path={search_path}") + logger.debug(f"Created Postgres engine with search_path={search_path or '(database default)'}") return engine @@ -419,7 +425,7 @@ def get_search_path_from_config(app_config: BasicMemoryConfig) -> Optional[str]: app_config: BasicMemoryConfig with database_url Returns: - search_path value if present and not "public", else None + search_path value if explicitly provided, else None """ if not app_config.database_url: return None @@ -428,7 +434,7 @@ def get_search_path_from_config(app_config: BasicMemoryConfig) -> Optional[str]: return None _, search_path = extract_search_path_from_url(app_config.database_url) - return search_path if search_path != "public" else None + return search_path async def ensure_schema_exists(engine: AsyncEngine, schema: str) -> None: diff --git a/tests/test_database_url.py b/tests/test_database_url.py index 73776a5e..7e849bd3 100644 --- a/tests/test_database_url.py +++ b/tests/test_database_url.py @@ -82,12 +82,13 @@ def test_extract_search_path_from_url_with_schema(self): assert "search_path" not in clean_url assert clean_url == "postgresql+asyncpg://user:pass@host/db" - def test_extract_search_path_defaults_to_public(self): - """No search_path param = default to public.""" + def test_extract_search_path_returns_none_when_not_provided(self): + """No search_path param = None (use database default).""" url = "postgresql+asyncpg://user:pass@host/db" clean_url, search_path = extract_search_path_from_url(url) - assert search_path == "public" + # None means "don't override database/role default" + assert search_path is None assert clean_url == url def test_extract_search_path_preserves_other_params(self): @@ -116,14 +117,15 @@ def test_get_search_path_from_config_with_schema(self): search_path = get_search_path_from_config(config) assert search_path == "myschema" - def test_get_search_path_from_config_returns_none_for_public(self): - """get_search_path_from_config returns None for public schema.""" + def test_get_search_path_from_config_returns_public_when_explicit(self): + """get_search_path_from_config returns 'public' when explicitly set.""" config = BasicMemoryConfig( database_url="postgresql+asyncpg://user:pass@host/db?search_path=public" ) + # Explicit "public" should be returned (user intentionally set it) search_path = get_search_path_from_config(config) - assert search_path is None + assert search_path == "public" def test_get_search_path_from_config_returns_none_for_sqlite(self): """get_search_path_from_config returns None for SQLite URLs."""