From 317217dd930afb79b2386c6d0e0ebef884e51791 Mon Sep 17 00:00:00 2001 From: Vatche Isahagian Date: Wed, 1 Apr 2026 16:35:12 -0400 Subject: [PATCH 1/5] fix(tests): add postgres dependencies to dev group for testing Added psycopg[binary]>=3.1 and pgvector>=0.3 to dev dependency group to ensure all unit tests can run during development and CI. This fixes the test collection error for test_postgres_backend.py while keeping postgres support optional for end users (via the pgvector optional dependency group). --- pyproject.toml | 2 ++ uv.lock | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 7cb9539..9d4d060 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,9 @@ dev = [ "anyio", "detect-secrets", "mypy", + "pgvector>=0.3", "pre-commit", + "psycopg[binary]>=3.1", "pytest", "pytest-cov", "pytest-retry", diff --git a/uv.lock b/uv.lock index 3ec0f26..114c320 100644 --- a/uv.lock +++ b/uv.lock @@ -1050,7 +1050,9 @@ dev = [ { name = "anyio" }, { name = "detect-secrets" }, { name = "mypy" }, + { name = "pgvector" }, { name = "pre-commit" }, + { name = "psycopg", extra = ["binary"] }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-retry" }, @@ -1103,7 +1105,9 @@ dev = [ { name = "anyio" }, { name = "detect-secrets", git = "https://github.com/ibm/detect-secrets?branch=master" }, { name = "mypy" }, + { name = "pgvector", specifier = ">=0.3" }, { name = "pre-commit" }, + { name = "psycopg", extras = ["binary"], specifier = ">=3.1" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-retry" }, From 89b7d63fc3e2bda62f4b16f909493f75b46df732 Mon Sep 17 00:00:00 2001 From: Vatche Isahagian Date: Wed, 1 Apr 2026 21:45:32 -0400 Subject: [PATCH 2/5] test: restructure test suite into four distinct execution tiers --- README.md | 51 +++++----- docs/LOW_CODE_TRACING.md | 10 +- pyproject.toml | 6 +- tests/conftest.py | 23 ++--- tests/e2e/test_e2e_pipeline.py | 123 ++++++++++++++++++++---- tests/unit/test_cli.py | 1 - tests/unit/test_extract_trajectories.py | 5 +- tests/unit/test_mcp_server.py | 2 + tests/unit/test_phoenix_sync.py | 4 +- tests/unit/test_tracing.py | 4 +- 10 files changed, 155 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index c189314..46a8ce5 100644 --- a/README.md +++ b/README.md @@ -116,28 +116,29 @@ See the [Low-Code Tracing Guide](docs/LOW_CODE_TRACING.md#6-understanding-tip-pr ### Running Tests -```bash -uv run pytest -``` - -#### Phoenix Sync Tests - -Tests for the Phoenix trajectory sync functionality are **skipped by default** since they require familiarity with the Phoenix integration. To include them: - -```bash -# Run all tests including Phoenix tests -uv run pytest --run-phoenix - -# Run only Phoenix tests -uv run pytest -m phoenix -``` - -#### End-to-End (E2E) Low-Code Verification - -To run the full end-to-end verification pipeline (Agent -> Trace -> Tip): - -```bash -EVOLVE_E2E=true uv run pytest tests/e2e/test_e2e_pipeline.py -s -``` - -See [docs/LOW_CODE_TRACING.md](docs/LOW_CODE_TRACING.md#end-to-end-verification) for more details. +The test suite is organized into 4 cleanly isolated tiers depending on infrastructure requirements: + +1. **Unit Tests (Default)** + Fast, fully-mocked tests verifying core logic and offline pipeline schemas. + ```bash + uv run pytest + ``` + +2. **Platform Integration Tests** + Fast filesystem-level integration tests verifying local tool installation and idempotency. + ```bash + uv run pytest -m platform_integrations + ``` + +3. **End-to-End Infrastructure Tests** + Heavy tests that autonomously spin up a background Phoenix server and simulate full agent workflows. + ```bash + uv run pytest -m e2e --run-e2e + ``` + *(See [docs/LOW_CODE_TRACING.md](docs/LOW_CODE_TRACING.md#end-to-end-verification) for more details).* + +4. **LLM Evaluation Tests** + Tests needing active LLM inference to test resolution pipelines (requires LLM API keys). + ```bash + uv run pytest -m llm + ``` diff --git a/docs/LOW_CODE_TRACING.md b/docs/LOW_CODE_TRACING.md index a6f6122..18c43a6 100644 --- a/docs/LOW_CODE_TRACING.md +++ b/docs/LOW_CODE_TRACING.md @@ -200,7 +200,7 @@ curl "http://localhost:6006/v1/projects/test-agent/spans?limit=5" cd evolve_repo EVOLVE_BACKEND=filesystem \ EVOLVE_TIPS_MODEL="gpt-4" \ -uv run python -m evolve.frontend.cli.cli sync phoenix \ +uv run python -m evolve.cli sync phoenix \ --project test-agent \ --include-errors ``` @@ -209,7 +209,7 @@ uv run python -m evolve.frontend.cli.cli sync phoenix \ ```bash EVOLVE_BACKEND=filesystem \ -uv run python -m evolve.frontend.cli.cli entities list evolve --type guideline +uv run python -m evolve.cli entities list evolve --type guideline ``` ### 6. Understanding Tip Provenance (Metadata) @@ -246,7 +246,7 @@ Evolve includes a comprehensive E2E verification suite to ensure that tracing an You can run the full regression suite using `pytest`: ```bash -EVOLVE_E2E=true uv run pytest tests/e2e/test_e2e_pipeline.py -s +uv run pytest -m e2e --run-e2e -s ``` ### Running Specific Tests @@ -255,10 +255,10 @@ To test a specific agent framework: ```bash # Test smolagents -EVOLVE_E2E=true uv run pytest tests/e2e/test_e2e_pipeline.py -k smolagents -s +uv run pytest tests/e2e/test_e2e_pipeline.py -k smolagents -m e2e --run-e2e -s # Test OpenAI Agents -EVOLVE_E2E=true uv run pytest tests/e2e/test_e2e_pipeline.py -k openai_agents -s +uv run pytest tests/e2e/test_e2e_pipeline.py -k openai_agents -m e2e --run-e2e -s ``` ### What It Tests diff --git a/pyproject.toml b/pyproject.toml index 9d4d060..cbaae11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,12 +87,12 @@ evolve = ["**/*.jinja2"] package = true [tool.pytest.ini_options] -addopts = "--ignore=explorations -m 'not phoenix and not llm'" +addopts = "--ignore=explorations -m 'not llm and not e2e'" markers = [ "e2e", "unit", - "phoenix", - "llm" + "llm", + "platform_integrations" ] anyio_mode = "auto" diff --git a/tests/conftest.py b/tests/conftest.py index 38a8919..1c48fc2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,22 +31,19 @@ def mock_sentence_transformer(request): def pytest_addoption(parser): """Add custom command line options.""" parser.addoption( - "--run-phoenix", + "--run-e2e", action="store_true", default=False, - help="Run Phoenix sync tests (skipped by default)", + help="Run End-to-End infrastructure tests (skipped by default)", ) def pytest_configure(config): - """Override marker filter when --run-phoenix is passed.""" - if config.getoption("--run-phoenix"): - # Remove the default marker filter to include phoenix tests - # Get current markexpr and modify it - markexpr = config.getoption("markexpr", default="") - if markexpr == "not phoenix": - config.option.markexpr = "" - elif "not phoenix" in markexpr: - # Remove "not phoenix" from the expression - new_expr = markexpr.replace("not phoenix and ", "").replace(" and not phoenix", "").replace("not phoenix", "") - config.option.markexpr = new_expr.strip() + """Override marker filter when relevant flags are passed.""" + new_expr = config.getoption("markexpr", default="") + + if config.getoption("--run-e2e"): + # Remove "not e2e" from the expression + new_expr = new_expr.replace("not e2e and ", "").replace(" and not e2e", "").replace("not e2e", "") + + config.option.markexpr = new_expr.strip() diff --git a/tests/e2e/test_e2e_pipeline.py b/tests/e2e/test_e2e_pipeline.py index ab74094..b974a44 100644 --- a/tests/e2e/test_e2e_pipeline.py +++ b/tests/e2e/test_e2e_pipeline.py @@ -18,10 +18,74 @@ {"name": "manual_phoenix", "script": "examples/low_code/manual_phoenix_demo.py", "project_prefix": "verify-manual"}, {"name": "simple_openai", "script": "examples/low_code/simple_openai.py", "project_prefix": "verify-simple-openai"}, ] +import urllib.request +import urllib.error + +@pytest.fixture(scope="session", autouse=True) +def phoenix_server(): + """Ensure a Phoenix server is running before executing E2E tests, and shut it down afterward.""" + # 1. Check if it's already running locally + try: + urllib.request.urlopen("http://localhost:6006/status", timeout=2) + print("\nPhoenix is already running on port 6006.") + yield "http://localhost:6006" + return + except (urllib.error.URLError, ConnectionError): + pass + + import sys + print("\nStarting local Phoenix server for E2E tests...") + + env = os.environ.copy() + env["PHOENIX_PORT"] = "6006" + + # Start it using the current python executable to avoid 'uv run' overhead + # We use run_in_thread=True and a sleepy while loop because run_in_thread=False + # can crash the fastAPI uvicorn startup in some MacOS environments. + script = ( + "import phoenix as px; import time; px.launch_app(run_in_thread=True); " + "import sys; sys.stdout.flush(); time.sleep(86400)" + ) + + proc = subprocess.Popen( + [sys.executable, "-c", script], + env=env, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True + ) + + # Poll until the server is responsive + max_retries = 30 + for _ in range(max_retries): + try: + # specifically hit the status endpoint + urllib.request.urlopen("http://localhost:6006/status", timeout=2) + print("Phoenix server is up and running.") + break + except Exception: + # Also check if process crashed early + if proc.poll() is not None: + stderr_output = proc.stderr.read() if proc.stderr else "Unknown error" + pytest.fail(f"Phoenix server process crashed unexpectedly: {stderr_output}") + time.sleep(1) + else: + proc.terminate() + stderr_output = proc.stderr.read() if proc.stderr else "Unknown error" + pytest.fail(f"Failed to start local Phoenix server within 30 seconds. Stderr: {stderr_output}") + + yield "http://localhost:6006" + + # Cleanup: shut down Phoenix when tests are done + print("\nShutting down local Phoenix server...") + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + proc.kill() @pytest.mark.e2e -@pytest.mark.phoenix @pytest.mark.parametrize("agent_config", AGENTS_TO_TEST, ids=[a["name"] for a in AGENTS_TO_TEST]) def test_e2e_pipeline_agent(agent_config): """ @@ -58,7 +122,16 @@ def test_e2e_pipeline_agent(agent_config): if not os.path.exists(script_path): pytest.fail(f"Script not found: {script_path}") - result = subprocess.run(["uv", "run", "python", script_path], env=env, capture_output=True, text=True) + try: + result = subprocess.run(["uv", "run", "python", script_path], env=env, capture_output=True, text=True, timeout=90) + except subprocess.TimeoutExpired as e: + print(f"❌ Agent execution timed out after 90s") + # Still try to capture what we can from stdout/stderr if possible + stdout = e.stdout if e.stdout else "" + stderr = e.stderr if e.stderr else "" + print("STDOUT:", stdout) + print("STDERR:", stderr) + pytest.fail(f"Agent execution timed out for {agent_name}") if result.returncode != 0: print(f"❌ Agent failed with exit code {result.returncode}") @@ -87,7 +160,11 @@ def test_e2e_pipeline_agent(agent_config): except Exception as e: print(f"ERROR:{{e}}") """ - result = subprocess.run(["uv", "run", "python", "-c", check_script], capture_output=True, text=True) + try: + result = subprocess.run(["uv", "run", "python", "-c", check_script], capture_output=True, text=True, timeout=30) + except subprocess.TimeoutExpired: + print("❌ Phoenix trace verification script timed out") + pytest.fail(f"Trace verification timed out for {project_name}") output = result.stdout + result.stderr if "FOUND_TRACES" in output: @@ -103,9 +180,7 @@ def test_e2e_pipeline_agent(agent_config): sync_command = [ "uv", "run", - "python", - "-m", - "evolve.frontend.cli.cli", + "evolve", "sync", "phoenix", "--project", @@ -128,36 +203,42 @@ def test_e2e_pipeline_agent(agent_config): tips_found = False sync_start = time.time() timeout = 120 # 2 minute timeout for sync + output_lines = [] try: while True: if time.time() - sync_start > timeout: - print("❌ Timeout waiting for tips generation") + print(f"❌ Timeout waiting for tips generation ({timeout}s)") break line = process.stdout.readline() - if not line and process.poll() is not None: - break - - if line: - line_stripped = line.strip() - # print(f"[Sync] {line_stripped}") # Optional: verbose logging - - # Check target log pattern - match = re.search(r"generated (\d+) tips", line_stripped) - if match: - count = match.group(1) - print(f"\n✅ SUCCESS: Generated {count} tips!") - tips_found = True + if not line: + if process.poll() is not None: break + time.sleep(0.1) # Avoid tight loop if no output but process alive + continue + + output_lines.append(line) + line_stripped = line.strip() + # print(f"[Sync] {line_stripped}") # Optional: verbose logging + + # Check target log pattern + match = re.search(r"generated (\d+) tips", line_stripped) + if match: + count = match.group(1) + print(f"\n✅ SUCCESS: Generated {count} tips!") + tips_found = True + break finally: if process.poll() is None: print("Stopping sync process...") process.terminate() try: - process.wait(timeout=5) + process.wait(timeout=10) except subprocess.TimeoutExpired: process.kill() if not tips_found: + full_output = "".join(output_lines) + print(f"Final Sync Output:\n{full_output}") pytest.fail(f"Failed to detect tip generation for {agent_name} within {timeout}s.") diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 6520591..08ab136 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -559,7 +559,6 @@ def test_sync_help(self): @pytest.mark.unit -@pytest.mark.phoenix class TestSyncPhoenix: """Tests for 'evolve sync phoenix' command.""" diff --git a/tests/unit/test_extract_trajectories.py b/tests/unit/test_extract_trajectories.py index 9edd394..87c62ab 100644 --- a/tests/unit/test_extract_trajectories.py +++ b/tests/unit/test_extract_trajectories.py @@ -21,9 +21,8 @@ get_trajectories, ) -# Mark all tests in this module as phoenix tests (skipped by default) -pytestmark = pytest.mark.phoenix - +# Mark all tests in this module as unit tests +pytestmark = pytest.mark.unit # ============================================================================= # parse_content() Tests diff --git a/tests/unit/test_mcp_server.py b/tests/unit/test_mcp_server.py index 458136d..d5ad714 100644 --- a/tests/unit/test_mcp_server.py +++ b/tests/unit/test_mcp_server.py @@ -3,6 +3,8 @@ import pytest from unittest.mock import patch, MagicMock +pytestmark = pytest.mark.unit + from evolve.frontend.mcp.mcp_server import save_trajectory, create_entity from evolve.schema.conflict_resolution import EntityUpdate diff --git a/tests/unit/test_phoenix_sync.py b/tests/unit/test_phoenix_sync.py index 59018ca..4e4621f 100644 --- a/tests/unit/test_phoenix_sync.py +++ b/tests/unit/test_phoenix_sync.py @@ -8,8 +8,8 @@ from evolve.sync.phoenix_sync import PhoenixSync, SyncResult from evolve.schema.tips import TipGenerationResult -# Mark all tests in this module as phoenix tests (skipped by default) -pytestmark = pytest.mark.phoenix +# Mark all tests in this module as unit tests +pytestmark = pytest.mark.unit @pytest.fixture diff --git a/tests/unit/test_tracing.py b/tests/unit/test_tracing.py index a1adbba..0d168f7 100644 --- a/tests/unit/test_tracing.py +++ b/tests/unit/test_tracing.py @@ -12,6 +12,9 @@ import pytest from unittest.mock import patch, MagicMock +# Mark all tests in this module as unit tests +pytestmark = pytest.mark.unit + class TestFrameworkDetection: """Tests for detect_installed_frameworks()""" @@ -197,7 +200,6 @@ def test_returns_provider_after_setup(self): auto._tracer_provider = original_provider -@pytest.mark.unit class TestTracingIntegration: """Integration-style tests for the tracing module.""" From 52f1ed3cb54f0579314246668d66ed4e1fce57b9 Mon Sep 17 00:00:00 2001 From: Vatche Isahagian Date: Wed, 1 Apr 2026 22:11:05 -0400 Subject: [PATCH 3/5] fix(tests): resolve linting errors and force isolated filesystem config in unit test fixture --- tests/e2e/test_e2e_pipeline.py | 27 ++++++++++----------------- tests/unit/test_client.py | 6 +++++- tests/unit/test_mcp_server.py | 4 ++-- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tests/e2e/test_e2e_pipeline.py b/tests/e2e/test_e2e_pipeline.py index b974a44..f4dd65c 100644 --- a/tests/e2e/test_e2e_pipeline.py +++ b/tests/e2e/test_e2e_pipeline.py @@ -4,6 +4,8 @@ import os import datetime import pytest +import urllib.request +import urllib.error from evolve.config.phoenix import phoenix_settings # Configuration @@ -18,8 +20,7 @@ {"name": "manual_phoenix", "script": "examples/low_code/manual_phoenix_demo.py", "project_prefix": "verify-manual"}, {"name": "simple_openai", "script": "examples/low_code/simple_openai.py", "project_prefix": "verify-simple-openai"}, ] -import urllib.request -import urllib.error + @pytest.fixture(scope="session", autouse=True) def phoenix_server(): @@ -34,26 +35,18 @@ def phoenix_server(): pass import sys + print("\nStarting local Phoenix server for E2E tests...") - + env = os.environ.copy() env["PHOENIX_PORT"] = "6006" - + # Start it using the current python executable to avoid 'uv run' overhead # We use run_in_thread=True and a sleepy while loop because run_in_thread=False # can crash the fastAPI uvicorn startup in some MacOS environments. - script = ( - "import phoenix as px; import time; px.launch_app(run_in_thread=True); " - "import sys; sys.stdout.flush(); time.sleep(86400)" - ) + script = "import phoenix as px; import time; px.launch_app(run_in_thread=True); import sys; sys.stdout.flush(); time.sleep(86400)" - proc = subprocess.Popen( - [sys.executable, "-c", script], - env=env, - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - text=True - ) + proc = subprocess.Popen([sys.executable, "-c", script], env=env, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) # Poll until the server is responsive max_retries = 30 @@ -125,7 +118,7 @@ def test_e2e_pipeline_agent(agent_config): try: result = subprocess.run(["uv", "run", "python", script_path], env=env, capture_output=True, text=True, timeout=90) except subprocess.TimeoutExpired as e: - print(f"❌ Agent execution timed out after 90s") + print("❌ Agent execution timed out after 90s") # Still try to capture what we can from stdout/stderr if possible stdout = e.stdout if e.stdout else "" stderr = e.stderr if e.stderr else "" @@ -217,7 +210,7 @@ def test_e2e_pipeline_agent(agent_config): break time.sleep(0.1) # Avoid tight loop if no output but process alive continue - + output_lines.append(line) line_stripped = line.strip() # print(f"[Sync] {line_stripped}") # Optional: verbose logging diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 89324fa..c2ec8d4 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -12,9 +12,13 @@ from evolve.frontend.client.evolve_client import EvolveClient +from evolve.config.evolve import EvolveConfig + + @pytest.fixture(scope="module") def evolve_client() -> EvolveClient: - evolve_client = EvolveClient() + config = EvolveConfig(backend="filesystem") + evolve_client = EvolveClient(config=config) return evolve_client diff --git a/tests/unit/test_mcp_server.py b/tests/unit/test_mcp_server.py index d5ad714..6e2d0f9 100644 --- a/tests/unit/test_mcp_server.py +++ b/tests/unit/test_mcp_server.py @@ -3,11 +3,11 @@ import pytest from unittest.mock import patch, MagicMock -pytestmark = pytest.mark.unit - from evolve.frontend.mcp.mcp_server import save_trajectory, create_entity from evolve.schema.conflict_resolution import EntityUpdate +pytestmark = pytest.mark.unit + @pytest.fixture def mock_get_client(): From 187ef5dc9e3f455f67aca78853a84deae754ad32 Mon Sep 17 00:00:00 2001 From: Vatche Isahagian Date: Wed, 1 Apr 2026 22:15:35 -0400 Subject: [PATCH 4/5] docs: standardize cli commands to use evolve entrypoint across all readmes --- README.md | 16 +++++++++++----- README_phoenix_sync.md | 12 ++++++------ docs/LOW_CODE_TRACING.md | 4 ++-- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 46a8ce5..47ceeba 100644 --- a/README.md +++ b/README.md @@ -118,26 +118,32 @@ See the [Low-Code Tracing Guide](docs/LOW_CODE_TRACING.md#6-understanding-tip-pr The test suite is organized into 4 cleanly isolated tiers depending on infrastructure requirements: -1. **Unit Tests (Default)** - Fast, fully-mocked tests verifying core logic and offline pipeline schemas. +1. **Default Local Suite** + Runs both fast logic tests (`unit`) and filesystem script verifications (`platform_integrations`). ```bash uv run pytest ``` -2. **Platform Integration Tests** +2. **Unit Tests (Only)** + Fast, fully-mocked tests verifying core logic and offline pipeline schemas. + ```bash + uv run pytest -m unit + ``` + +3. **Platform Integration Tests** Fast filesystem-level integration tests verifying local tool installation and idempotency. ```bash uv run pytest -m platform_integrations ``` -3. **End-to-End Infrastructure Tests** +4. **End-to-End Infrastructure Tests** Heavy tests that autonomously spin up a background Phoenix server and simulate full agent workflows. ```bash uv run pytest -m e2e --run-e2e ``` *(See [docs/LOW_CODE_TRACING.md](docs/LOW_CODE_TRACING.md#end-to-end-verification) for more details).* -4. **LLM Evaluation Tests** +5. **LLM Evaluation Tests** Tests needing active LLM inference to test resolution pipelines (requires LLM API keys). ```bash uv run pytest -m llm diff --git a/README_phoenix_sync.md b/README_phoenix_sync.md index 8049af4..cf4953f 100644 --- a/README_phoenix_sync.md +++ b/README_phoenix_sync.md @@ -32,20 +32,20 @@ No additional dependencies required - uses only stdlib for Phoenix API calls. ```bash # Basic sync with defaults -uv run python -m evolve.cli.cli sync phoenix +uv run evolve sync phoenix # Custom Phoenix URL and namespace -uv run python -m evolve.cli.cli sync phoenix \ +uv run evolve sync phoenix \ --url http://phoenix.example.com:6006 \ --namespace my_namespace # Fetch more spans and include errors -uv run python -m evolve.cli.cli sync phoenix \ +uv run evolve sync phoenix \ --limit 500 \ --include-errors # Full options -uv run python -m evolve.cli.cli sync phoenix \ +uv run evolve sync phoenix \ --url http://localhost:6006 \ --namespace production \ --project my_project \ @@ -145,7 +145,7 @@ Two entity types are stored: ```bash # Sync every hour -0 * * * * cd /path/to/evolve && uv run python -m evolve.cli.cli sync phoenix --limit 100 +0 * * * * cd /path/to/evolve && uv run evolve sync phoenix --limit 100 ``` ### Systemd Timer @@ -158,7 +158,7 @@ Description=Evolve Phoenix Sync [Service] Type=oneshot WorkingDirectory=/path/to/evolve -ExecStart=/path/to/uv run python -m evolve.cli.cli sync phoenix +ExecStart=/path/to/uv run evolve sync phoenix Environment=PHOENIX_URL=http://localhost:6006 Environment=EVOLVE_NAMESPACE_ID=production ``` diff --git a/docs/LOW_CODE_TRACING.md b/docs/LOW_CODE_TRACING.md index 18c43a6..8294f10 100644 --- a/docs/LOW_CODE_TRACING.md +++ b/docs/LOW_CODE_TRACING.md @@ -200,7 +200,7 @@ curl "http://localhost:6006/v1/projects/test-agent/spans?limit=5" cd evolve_repo EVOLVE_BACKEND=filesystem \ EVOLVE_TIPS_MODEL="gpt-4" \ -uv run python -m evolve.cli sync phoenix \ +uv run evolve sync phoenix \ --project test-agent \ --include-errors ``` @@ -209,7 +209,7 @@ uv run python -m evolve.cli sync phoenix \ ```bash EVOLVE_BACKEND=filesystem \ -uv run python -m evolve.cli entities list evolve --type guideline +uv run evolve entities list evolve --type guideline ``` ### 6. Understanding Tip Provenance (Metadata) From 079fbbd2c9e5379e28b9e1d28b9fae75eea60bd7 Mon Sep 17 00:00:00 2001 From: Vatche Isahagian Date: Wed, 1 Apr 2026 22:16:33 -0400 Subject: [PATCH 5/5] test: dynamically inject E2E phoenix fixture URL --- tests/e2e/test_e2e_pipeline.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/e2e/test_e2e_pipeline.py b/tests/e2e/test_e2e_pipeline.py index f4dd65c..898756e 100644 --- a/tests/e2e/test_e2e_pipeline.py +++ b/tests/e2e/test_e2e_pipeline.py @@ -6,10 +6,8 @@ import pytest import urllib.request import urllib.error -from evolve.config.phoenix import phoenix_settings # Configuration -PHOENIX_URL = phoenix_settings.url # Use a session-scope timestamp or generate per test? # Per-test ensures no collisions even if run in parallel (though these should satisfy sequential) TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") @@ -80,7 +78,7 @@ def phoenix_server(): @pytest.mark.e2e @pytest.mark.parametrize("agent_config", AGENTS_TO_TEST, ids=[a["name"] for a in AGENTS_TO_TEST]) -def test_e2e_pipeline_agent(agent_config): +def test_e2e_pipeline_agent(agent_config, phoenix_server): """ Runs the full E2E pipeline for a specific agent configuration: 1. Executing the agent script @@ -144,7 +142,7 @@ def test_e2e_pipeline_agent(agent_config): import phoenix as px import sys try: - c = px.Client(endpoint='{PHOENIX_URL}') + c = px.Client(endpoint='{phoenix_server}') df = c.get_spans_dataframe(project_name='{project_name}') if df is not None and not df.empty: print(f"FOUND_TRACES:{{len(df)}}")