diff --git a/.github/workflows/adk-py-test.yaml b/.github/workflows/adk-py-test.yaml index 093ee05e..f3e3d8ca 100644 --- a/.github/workflows/adk-py-test.yaml +++ b/.github/workflows/adk-py-test.yaml @@ -15,40 +15,30 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 + - name: Set up mise + uses: jdx/mise-action@v3 with: - python-version: ${{ inputs.python-version }} - - - name: Set up uv - uses: astral-sh/setup-uv@v3 - with: - enable-cache: true - cache-dependency-glob: | - py/requirements-build.txt - py/requirements-dev.txt - py/setup.py - py/noxfile.py - integrations/adk-py/pyproject.toml - integrations/adk-py/uv.lock + cache: true + experimental: true + install_args: python@${{ inputs.python-version }} uv - name: Install deprecated package compatibility dependencies working-directory: integrations/adk-py run: | - uv sync + mise exec python@${{ inputs.python-version }} -- uv sync - name: Lint deprecated compatibility package working-directory: integrations/adk-py run: | - uv run ruff check src + mise exec python@${{ inputs.python-version }} -- uv run ruff check $(git ls-files '*.py' | grep -v 'examples/') - name: Run deprecated compatibility tests working-directory: integrations/adk-py run: | - uv run pytest src/tests/test_reexports.py + mise exec python@${{ inputs.python-version }} -- uv run pytest src/tests/test_reexports.py - name: Test deprecated package import working-directory: integrations/adk-py run: | - uv run python -c "import braintrust_adk; print('braintrust_adk imported successfully')" - uv run python -c "from braintrust_adk import setup_braintrust; print('setup_braintrust imported successfully')" + mise exec python@${{ inputs.python-version }} -- uv run python -c "import braintrust_adk; print('braintrust_adk imported successfully')" + mise exec python@${{ inputs.python-version }} -- uv run python -c "from braintrust_adk import setup_braintrust; print('setup_braintrust imported successfully')" diff --git a/.github/workflows/langchain-py-test.yaml b/.github/workflows/langchain-py-test.yaml index 127905a5..2ab767a8 100644 --- a/.github/workflows/langchain-py-test.yaml +++ b/.github/workflows/langchain-py-test.yaml @@ -22,29 +22,27 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up uv - uses: astral-sh/setup-uv@v3 + - name: Set up mise + uses: jdx/mise-action@v3 with: - enable-cache: true - cache-dependency-glob: | - integrations/langchain-py/pyproject.toml - integrations/langchain-py/uv.lock + cache: true + experimental: true + install_args: python@${{ inputs.python-version }} uv - name: Install dependencies run: | - uv python install ${{ inputs.python-version }} - uv sync + mise exec python@${{ inputs.python-version }} -- uv sync - name: Lint with ruff if: ${{ inputs.os == 'ubuntu-latest' }} run: | - uv run ruff check $(git ls-files '*.py' | grep -v 'examples/') + mise exec python@${{ inputs.python-version }} -- uv run ruff check $(git ls-files '*.py' | grep -v 'examples/') - name: Run tests run: | - uv run pytest src + mise exec python@${{ inputs.python-version }} -- uv run pytest src - name: Test import run: | - uv run python -c "import braintrust_langchain; print('braintrust_langchain imported successfully')" - uv run python -c "from braintrust_langchain import BraintrustCallbackHandler; print('BraintrustCallbackHandler imported successfully')" + mise exec python@${{ inputs.python-version }} -- uv run python -c "import braintrust_langchain; print('braintrust_langchain imported successfully')" + mise exec python@${{ inputs.python-version }} -- uv run python -c "from braintrust_langchain import BraintrustCallbackHandler; print('BraintrustCallbackHandler imported successfully')" diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 27cd1e2c..4ea347a8 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -11,10 +11,14 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch full history for proper diff - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v3.0.0 + - name: Set up mise + uses: jdx/mise-action@v3 with: - extra_args: --from-ref origin/${{ github.base_ref || 'main' }} --to-ref HEAD + cache: true + experimental: true + - name: Run pre-commit + run: | + mise exec -- pre-commit run --from-ref origin/${{ github.base_ref || 'main' }} --to-ref HEAD diff --git a/.github/workflows/publish-py-sdk.yaml b/.github/workflows/publish-py-sdk.yaml index c500dd06..8158f46c 100644 --- a/.github/workflows/publish-py-sdk.yaml +++ b/.github/workflows/publish-py-sdk.yaml @@ -46,15 +46,15 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for changelog generation - - name: Set up Python - uses: actions/setup-python@v5 + - name: Set up mise + uses: jdx/mise-action@v3 with: - python-version: "3.13" + cache: true + experimental: true + install_args: python@3.13 - name: Build and verify - working-directory: py run: | - make install-dev - make verify-build + mise exec python@3.13 -- make -C py install-dev verify-build - name: Upload build artifacts uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/py.yaml b/.github/workflows/py.yaml index 6e098fd1..eaefedd7 100644 --- a/.github/workflows/py.yaml +++ b/.github/workflows/py.yaml @@ -26,25 +26,27 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 + - name: Set up mise + uses: jdx/mise-action@v3 with: - python-version: ${{ matrix.python-version }} + cache: true + experimental: true + install_args: python@${{ matrix.python-version }} - name: Install dependencies run: | - cd py && make install-dev + mise exec python@${{ matrix.python-version }} -- make -C py install-dev - name: Test whether the Python SDK can be installed run: | # This is already done by make install-dev, but we're keeping this as a separate step # to explicitly verify that installation works - python -m uv pip install -e ./py[all] + mise exec python@${{ matrix.python-version }} -- python -m uv pip install -e ./py[all] - name: Test whether the Python SDK can be imported run: | - python -c 'import braintrust' + mise exec python@${{ matrix.python-version }} -- python -c 'import braintrust' - name: Run nox tests (shard ${{ matrix.shard }}/2) shell: bash run: | - cd py && ./scripts/nox-matrix.sh ${{ matrix.shard }} 2 + mise exec python@${{ matrix.python-version }} -- bash ./py/scripts/nox-matrix.sh ${{ matrix.shard }} 2 adk-py: uses: ./.github/workflows/adk-py-test.yaml @@ -68,13 +70,15 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 + - name: Set up mise + uses: jdx/mise-action@v3 with: - python-version: "3.13" + cache: true + experimental: true + install_args: python@3.13 - name: Install build dependencies and build wheel run: | - cd py && make install-build-deps && make build + mise exec python@3.13 -- make -C py install-build-deps build - name: Upload wheel as artifact uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test-publish-py-sdk.yaml b/.github/workflows/test-publish-py-sdk.yaml index d7427dd7..6eee653d 100644 --- a/.github/workflows/test-publish-py-sdk.yaml +++ b/.github/workflows/test-publish-py-sdk.yaml @@ -33,18 +33,18 @@ jobs: - uses: actions/checkout@v4 with: ref: ${{ github.event.inputs.ref }} - - name: Set up Python - uses: actions/setup-python@v5 + - name: Set up mise + uses: jdx/mise-action@v3 with: - python-version: "3.13" + cache: true + experimental: true + install_args: python@3.13 - name: Install build dependencies - working-directory: py run: | - make install-dev + mise exec python@3.13 -- make -C py install-dev - name: Build and verify - working-directory: py run: | - make verify-build + mise exec python@3.13 -- make -C py verify-build - name: Get version from built wheel id: get_version run: | diff --git a/.tool-versions b/.tool-versions index 814a2d00..aabd9423 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,2 +1,3 @@ python 3.13.3 pre-commit 4.2.0 +uv 0.7.8 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..ab9d11e8 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,131 @@ +# Braintrust SDK Agent Guide + +Guide for contributing to the Braintrust Python SDK repository. + +## Defaults + +- Use `mise` as the source of truth for tools and environment. +- Prefer `py/` commands over root `make` targets when working on the SDK itself. +- Keep changes narrow and run the smallest relevant test session first. +- Do not rely on optional provider packages being installed unless the active nox session installs them. + +## Repo Map + +- `py/`: main Python package, tests, examples, nox sessions, release build +- `integrations/`: separate integration packages +- `internal/golden/`: compatibility and golden projects +- `docs/`: supporting docs + +Important code areas in `py/src/braintrust/`: + +- core SDK modules: top-level package files +- wrappers/integrations: `wrappers/` +- temporal: `contrib/temporal/` +- CLI/devserver: `cli/`, `devserver/` +- tests: colocated `test_*.py` + +## Setup + +Preferred repo bootstrap: + +```bash +mise install +make develop +``` + +Package-focused setup: + +```bash +cd py +make install-dev +``` + +Install optional provider dependencies only if needed: + +```bash +cd py +make install-optional +``` + +## Commands + +Preferred SDK workflow: + +```bash +cd py +make lint +make test-core +nox -l +``` + +Targeted wrapper/session runs: + +```bash +cd py +nox -s "test_openai(latest)" +nox -s "test_openai(latest)" -- -k "test_chat_metrics" +``` + +Root `Makefile` exists as a convenience wrapper. The authoritative SDK workflow is in `py/Makefile` and `py/noxfile.py`. + +## Tests + +`py/noxfile.py` is the source of truth for compatibility coverage. + +Key facts: + +- `test_core` runs without optional vendor packages. +- wrapper coverage is split across dedicated nox sessions by provider/version. +- `pylint` installs the broad dependency surface before checking files. +- `test-wheel` is a wheel sanity check and requires a built wheel first. + +When changing behavior, run the narrowest affected session first, then expand only if needed. + +## VCR + +VCR cassette directories: + +- `py/src/braintrust/cassettes/` +- `py/src/braintrust/wrappers/cassettes/` +- `py/src/braintrust/devserver/cassettes/` + +Behavior from `py/src/braintrust/conftest.py`: + +- local default: `record_mode="once"` +- CI default: `record_mode="none"` +- wheel-mode skips VCR-marked tests +- test fixtures inject dummy API keys and reset global state + +Common commands: + +```bash +cd py +nox -s "test_openai(latest)" +nox -s "test_openai(latest)" -- --disable-vcr +nox -s "test_openai(latest)" -- --vcr-record=all -k "test_openai_chat_metrics" +``` + +Only re-record cassettes when the behavior change is intentional. If in doubt, ask the user. + +## Build Notes + +Build from `py/`: + +```bash +cd py +make build +``` + +Important caveat: + +- `py/scripts/template-version.sh` rewrites `py/src/braintrust/version.py` during build. +- `py/Makefile` restores that file afterward with `git checkout`. + +Avoid editing `py/src/braintrust/version.py` while also running build commands. + +## Editing Guidance + +- Keep tests near the code they cover. +- Reuse existing fixtures and cassette patterns. +- If a change affects examples or integrations, update the nearest example or focused test. +- For CLI/devserver changes, consider whether wheel-mode behavior also needs coverage. diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 549dff99..00000000 --- a/CLAUDE.md +++ /dev/null @@ -1,27 +0,0 @@ -# Braintrust SDK - -Python client for Braintrust, plus wrapper libraries for OpenAI, Anthropic, and other AI providers. - -## Structure - -``` -├── py/ # Python SDK (see py/CLAUDE.md) -├── integrations/ # Python integrations (adk-py, langchain-py) -├── internal/ # Golden tests -└── scripts/ # Dev scripts -``` - -## Quick Reference - -| Task | Command | -| ------------- | ------------- | -| Run all tests | `make test` | -| Lint/format | `make fixup` | -| Python lint | `make pylint` | - -## Setup - -```bash -make develop # Create venv and install deps -source env.sh # Activate environment -``` diff --git a/CLAUDE.md b/CLAUDE.md new file mode 120000 index 00000000..47dc3e3d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f36a3d0f..f795ceb9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,173 +2,149 @@ Guide for contributing to the Braintrust Python SDK. -## Repository Structure - -``` -braintrust-sdk-python/ -├── py/ # Python SDK -│ ├── src/braintrust/ # Source code -│ │ ├── wrappers/ # Provider wrappers (OpenAI, Anthropic, Google, etc.) -│ │ ├── contrib/ # Community integrations (Temporal, etc.) -│ │ ├── devserver/ # Local dev server / CLI -│ │ └── conftest.py # Shared test fixtures -│ ├── noxfile.py # Test session definitions -│ └── Makefile # Build/test commands -├── integrations/ -│ ├── langchain-py/ # LangChain integration -│ └── adk-py/ # Google ADK integration -├── internal/ # Golden tests -├── scripts/ # Dev scripts -├── docs/ # Documentation -└── Makefile # Top-level commands -``` - ## Setup ### Prerequisites -- Python 3.10+ (3.9 supported but some test sessions are skipped) -- [uv](https://github.com/astral-sh/uv) (installed automatically by `make install-dev`) +- Python 3.10+ +- [mise](https://mise.jdx.dev/) for tool installation and repo-local environment management ### Getting Started ```bash -# Clone the repo git clone https://github.com/braintrustdata/braintrust-sdk-python.git cd braintrust-sdk-python - -# Create venv and install all dependencies +mise install make develop - -# Activate the environment -source env.sh ``` -### Python SDK Development +If you use `mise activate` in your shell, entering the repo will automatically expose the configured tools. If you do not, you can still run commands explicitly with `mise exec -- ...`. + +## Repo Layout + +- `py/`: main Python SDK +- `integrations/`: separate integration packages such as LangChain and ADK +- `internal/golden/`: golden and compatibility projects +- `docs/`: supporting docs + +Most SDK changes should happen under `py/`. + +## Common Workflows + +### Python SDK ```bash cd py - -# Install dev dependencies make install-dev - -# Install optional provider packages (for wrapper development) -make install-optional +make test-core +make lint +nox -l ``` -## Running Tests +Run a focused session: -### Python SDK +```bash +cd py +nox -s "test_openai(latest)" +``` -Tests use [nox](https://nox.thea.codes/) to run across different dependency versions. +Run a single test subset: ```bash cd py +nox -s "test_openai(latest)" -- -k "test_chat_metrics" +``` -# Run all test sessions -make test +Install optional provider packages only when you need them: -# Run core tests only (no optional dependencies) -make test-core +```bash +cd py +make install-optional +``` -# List all available sessions -nox -l +### Repo-Level Commands -# Run a specific session -nox -s "test_openai(latest)" -nox -s "test_anthropic(latest)" -nox -s "test_temporal(latest)" +The root `Makefile` is a convenience wrapper around `py/Makefile`. -# Run a single test within a session -nox -s "test_openai(latest)" -- -k "test_chat_metrics" +Useful root commands: + +```bash +make fixup +make test-core +make lint ``` -### Integration Tests +`make test-wheel` requires a built wheel first. + +### Integration Packages + +LangChain: ```bash -# LangChain cd integrations/langchain-py uv sync uv run pytest src - -# ADK -cd integrations/adk-py -uv sync -uv run pytest ``` -### Linting +ADK: ```bash -# From repo root — runs pre-commit hooks (formatting, etc.) -make fixup - -# Python-specific lint (pylint) -cd py && make lint +cd integrations/adk-py +uv sync +uv run pytest ``` -## VCR Cassette Testing - -Tests for API provider wrappers use VCR.py to record and replay HTTP interactions. This means most tests run without real API keys. +## Testing Notes -See [docs/vcr-testing.md](docs/vcr-testing.md) for full details. Key points: +The SDK uses [nox](https://nox.thea.codes/) for compatibility testing across optional providers and versions. `py/noxfile.py` is the source of truth for available sessions. -- **Locally:** VCR records new cassettes on first run (`record_mode="once"`). You need a real API key to record. -- **In CI:** VCR only replays existing cassettes (`record_mode="none"`). No API keys needed. -- **Modifying tests:** If your change alters the HTTP request a test makes, you must re-record the cassette locally with a real API key and commit it. -- **New tests:** Add `@pytest.mark.vcr`, record the cassette locally, and commit the cassette file. +### VCR Tests -## CI Overview +Many wrapper and devserver tests use VCR cassettes. -CI runs on GitHub Actions. All workflows are in `.github/workflows/`. +- Locally, missing cassettes can be recorded with `record_mode="once"`. +- In CI, missing cassettes fail because `record_mode="none"` is used. +- If your change intentionally changes HTTP behavior, re-record the affected cassettes and commit them. -### Workflows +Useful example: -| Workflow | File | Trigger | What it does | -|---|---|---|---| -| **py** | `py.yaml` | PR (py/integrations changes), push to main | Runs nox test matrix across Python 3.10–3.13 on Ubuntu + Windows, plus integration tests | -| **langchain-py** | `langchain-py-test.yaml` | Called by `py.yaml` | Lint + tests for the LangChain integration | -| **adk-py** | `adk-py-test.yaml` | Called by `py.yaml` | Lint + tests for the Google ADK integration | -| **lint** | `lint.yaml` | PR | Pre-commit hooks and formatting checks | -| **publish** | `publish-py-sdk.yaml` | Tag push (`py-sdk-v*.*.*`) | Build, test wheel, publish to PyPI, create GitHub release | -| **test-publish** | `test-publish-py-sdk.yaml` | Manual dispatch | Publish to TestPyPI for pre-release validation | +```bash +cd py +nox -s "test_openai(latest)" -- --vcr-record=all -k "test_openai_chat_metrics" +``` -### No API Key Secrets Required +### Fixtures -CI workflows do **not** use real API key secrets (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GEMINI_API_KEY`). Tests rely on VCR cassettes with dummy API keys provided by test fixtures. This means: +Shared test fixtures live in `py/src/braintrust/conftest.py`. -- Forks can run CI without configuring any secrets. -- The `test_latest_wrappers_novcr` nox session (which disables VCR) is automatically skipped in CI. +Common ones include: -### Test Sharding +- dummy API key setup for VCR-backed tests +- Braintrust global state reset between tests +- wheel-mode skipping for VCR tests -The main `py.yaml` workflow shards nox sessions across 2 parallel jobs per Python version/OS combination using `scripts/nox-matrix.sh`. +The `memory_logger` fixture from `braintrust.test_helpers` is useful for asserting on logged spans without a real Braintrust backend. -## Test Fixtures +## CI -Key auto-applied fixtures defined in `py/src/braintrust/conftest.py`: +GitHub Actions workflows live in `.github/workflows/`. -| Fixture | Purpose | -|---|---| -| `setup_braintrust` | Sets dummy API keys (OpenAI, Google, Anthropic) for VCR tests | -| `override_app_url_for_tests` | Points `BRAINTRUST_APP_URL` to production for consistent behavior | -| `reset_braintrust_state` | Resets global SDK state after each test | -| `skip_vcr_tests_in_wheel_mode` | Skips VCR tests when testing from an installed wheel | +Main workflows: -The `memory_logger` fixture (from `braintrust.test_helpers`) lets you capture logged spans in-memory without a real Braintrust connection: +- `py.yaml`: SDK test matrix +- `langchain-py-test.yaml`: LangChain integration tests +- `adk-py-test.yaml`: ADK integration tests +- `lint.yaml`: pre-commit and formatting checks +- `publish-py-sdk.yaml`: PyPI release +- `test-publish-py-sdk.yaml`: TestPyPI release validation -```python -def test_something(memory_logger): - # ... exercise code that logs spans ... - spans = memory_logger.pop() - assert len(spans) == 1 -``` +CI uses VCR cassettes and dummy credentials, so forks do not need provider API secrets for normal test runs. ## Submitting Changes -1. Create a branch for your changes. -2. Make your changes and add/update tests. -3. If you modified VCR tests, re-record cassettes and commit them. -4. Run `make fixup` to format and lint. -5. Run relevant test sessions to verify (e.g. `nox -s "test_openai(latest)"`). +1. Make your change in the narrowest relevant area. +2. Add or update tests. +3. Re-record cassettes if the HTTP behavior change is intentional. +4. Run the smallest relevant local checks first, then broader ones if needed. +5. Run `make fixup` before opening a PR. 6. Open a pull request against `main`. diff --git a/Makefile b/Makefile index 7709d09a..9185d785 100644 --- a/Makefile +++ b/Makefile @@ -1,59 +1,45 @@ SHELL := /bin/bash -ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -VENV_PRE_COMMIT := ${ROOT_DIR}/venv/.pre_commit -VENV_DOCS_REBUILD := ${ROOT_DIR}/venv/.docs_rebuild -.PHONY: all -all: ${VENV_PRE_COMMIT} +.PHONY: help develop install-dev install-deps fixup test test-core test-wheel lint pylint nox -.PHONY: py -py: ${VENV_PYTHON_PACKAGES} - bash -c 'source venv/bin/activate' +develop: install-dev + mise exec -- pre-commit install + @echo "Use 'mise activate' in your shell for automatic tool and env activation." -VENV_INITIALIZED := venv/.initialized - -${VENV_INITIALIZED}: - rm -rf venv && python -m venv venv - @touch ${VENV_INITIALIZED} - -VENV_PYTHON_PACKAGES := venv/.python_packages - -${VENV_PYTHON_PACKAGES}: ${VENV_INITIALIZED} - bash -c 'source venv/bin/activate && python -m pip install --upgrade pip setuptools' - bash -c 'source venv/bin/activate && python -m pip install -e py[all]' - @touch $@ - -${VENV_PRE_COMMIT}: ${VENV_PYTHON_PACKAGES} - bash -c 'source venv/bin/activate && pre-commit install' - @touch $@ - -develop: ${VENV_PRE_COMMIT} - @echo "--\nRun "source env.sh" to enter development mode!" - -.PHONY: install-dev install-dev: mise install -.PHONY: install-deps install-deps: - cd py && make install-dev + mise exec -- $(MAKE) -C py install-dev fixup: - source env.sh && pre-commit run --all-files + mise exec -- pre-commit run --all-files -.PHONY: test test-py nox pylint +test: + mise exec -- $(MAKE) -C py test -test: test-py-core test-py-sdk +test-core: + mise exec -- $(MAKE) -C py test-core -test-py-core: - source env.sh && python -m unittest discover ./core/py/src +test-wheel: + mise exec -- $(MAKE) -C py test-wheel -test-py-sdk: nox - source env.sh && cd py && pytest +lint pylint: + mise exec -- $(MAKE) -C py lint +nox: test -nox: - cd py && make test +help: + @echo "Available targets:" + @echo " develop - Install tools with mise, install py/ deps, and install pre-commit hooks" + @echo " fixup - Run pre-commit hooks across the repo" + @echo " install-deps - Install Python SDK dependencies via py/Makefile" + @echo " install-dev - Install pinned tools and create/update the repo env via mise" + @echo " lint - Run Python SDK lint checks via py/Makefile" + @echo " pylint - Alias for lint" + @echo " nox - Alias for test" + @echo " test - Run the Python SDK nox matrix via py/Makefile" + @echo " test-core - Run Python SDK core tests via py/Makefile" + @echo " test-wheel - Run Python SDK wheel sanity tests via py/Makefile (requires a built wheel)" -pylint: - cd py && make lint +.DEFAULT_GOAL := help diff --git a/integrations/adk-py/README.md b/integrations/adk-py/README.md index e1552283..4ada8177 100644 --- a/integrations/adk-py/README.md +++ b/integrations/adk-py/README.md @@ -136,6 +136,30 @@ Once you've set up the integration, you can view your traces in the Braintrust d - Token usage and latency metrics - Any errors or warnings +## Development + +To contribute to this integration: + +```bash +# Clone the repository +git clone https://github.com/braintrustdata/braintrust-sdk-python.git +cd sdk/integrations/adk-py + +mise install +uv sync + +# Run examples +cd examples + +# simple programmatic agent call +uv run manual.py + +# or use the adk web UI +uv run adk web --port 8888 +``` + +If your shell is not configured with `mise activate`, prefix commands with `mise exec --`. + ## Documentation - [Braintrust Documentation](https://www.braintrust.dev/docs) diff --git a/integrations/adk-py/examples/Makefile b/integrations/adk-py/examples/Makefile index 6de2f5bf..5e089f8c 100644 --- a/integrations/adk-py/examples/Makefile +++ b/integrations/adk-py/examples/Makefile @@ -1,44 +1,33 @@ -# Variables PORT ?= 8888 -UV := uv run +UV ?= uv +RUN := $(UV) run -# Phony targets .PHONY: dev help clean install test lint format check - -# Default target .DEFAULT_GOAL := help -# Development server dev: - $(UV) adk web --port $(PORT) + $(RUN) adk web --port $(PORT) -# Install dependencies install: - uv sync + $(UV) sync -# Run tests test: - $(UV) pytest + $(RUN) pytest -# Lint code lint: - $(UV) ruff check . + $(RUN) ruff check . -# Format code format: - $(UV) ruff format . + $(RUN) ruff format . -# Type check check: - $(UV) mypy . + $(RUN) mypy . -# Clean up temporary files clean: find . -type f -name "*.pyc" -delete find . -type d -name "__pycache__" -delete find . -type d -name "*.egg-info" -exec rm -rf {} + -# Show help help: @echo "Available targets:" @echo " dev - Start development server (default port: $(PORT))" diff --git a/integrations/langchain-py/Makefile b/integrations/langchain-py/Makefile index d515c98b..67cc3125 100644 --- a/integrations/langchain-py/Makefile +++ b/integrations/langchain-py/Makefile @@ -1,38 +1,28 @@ -SHELL := /bin/bash -ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -VENV_PRE_COMMIT := ${ROOT_DIR}/venv/.pre_commit +UV ?= uv +RUN := $(UV) run -.PHONY: all -all: ${VENV_PRE_COMMIT} +.PHONY: help install test lint format fixup -.PHONY: py -py: ${VENV_PYTHON_PACKAGES} - bash -c 'source venv/bin/activate' +install: + $(UV) sync -VENV_INITIALIZED := venv/.initialized - -${VENV_INITIALIZED}: - rm -rf venv && python -m venv venv - @touch ${VENV_INITIALIZED} - -VENV_PYTHON_PACKAGES := venv/.python_packages - -${VENV_PYTHON_PACKAGES}: ${VENV_INITIALIZED} - bash -c 'source venv/bin/activate && python -m pip install --upgrade pip setuptools' - bash -c 'source venv/bin/activate && python -m pip install -e .[all]' - @touch $@ +test: + $(RUN) pytest src -${VENV_PRE_COMMIT}: ${VENV_PYTHON_PACKAGES} - bash -c 'source venv/bin/activate && pre-commit install' - @touch $@ +lint: + $(RUN) ruff check . -develop: ${VENV_PRE_COMMIT} - @echo "--\nRun "source env.sh" to enter development mode!" +format: + $(RUN) ruff format . -fixup: - source env.sh && pre-commit run --all-files +fixup: format -.PHONY: test +help: + @echo "Available targets:" + @echo " fixup - Format the integration sources with ruff" + @echo " format - Format the integration sources with ruff" + @echo " install - Install dependencies with uv sync" + @echo " lint - Run ruff checks" + @echo " test - Run pytest against src/" -test: - source env.sh && pytest +.DEFAULT_GOAL := help diff --git a/mise.toml b/mise.toml index 313028d1..f92d09ea 100644 --- a/mise.toml +++ b/mise.toml @@ -12,7 +12,6 @@ _.file = ".env" [tools] ruff = "0.12.7" -uv = "latest" [hooks] postinstall = "make install-deps" diff --git a/py/CLAUDE.md b/py/CLAUDE.md deleted file mode 100644 index b61382a1..00000000 --- a/py/CLAUDE.md +++ /dev/null @@ -1,101 +0,0 @@ -# Python SDK - -## Setup - -To run examples or use optional integrations, install the extra dependencies: - -```bash -make install-dev # Development dependencies -make install-optional # Optional integration dependencies -``` - -## Running Tests - -```bash -make test # All tests via nox -make test-core # Core tests only -make lint # Pylint + formatting -make clean # Remove build artifacts -``` - -**Run a single test:** - -```bash -nox -s "test_openai(latest)" -- -k "test_chat_metrics" -``` - -**Common test sessions:** - -```bash -nox -l # List all sessions -nox -s "test_openai(latest)" # OpenAI wrapper (latest version) -nox -s "test_anthropic(latest)" # Anthropic wrapper -nox -s "test_temporal(latest)" # Temporal integration -nox -s test_openai # All OpenAI versions -``` - -- we use pytest, so you don't need to add extra messages to assert. `assert x -== 1` is usually enough. - -## VCR Cassettes - -Tests use VCR to record HTTP interactions so they run without live API calls. - -**Cassette location:** `src/braintrust/wrappers/cassettes/` - -**Using in tests:** - -```python -@pytest.mark.vcr -def test_openai_chat_metrics(memory_logger): - client = wrap_openai(openai.OpenAI()) - response = client.chat.completions.create(...) -``` - -**VCR commands:** - -```bash -# Run tests normally (play back from cassettes) -nox -s "test_openai(latest)" - -# Run with real API calls (no VCR) -export OPENAI_API_KEY="sk-..." -nox -s "test_openai(latest)" -- --disable-vcr - -# Record new cassettes (overwrites existing) -export OPENAI_API_KEY="sk-..." -nox -s "test_openai(latest)" -- --vcr-record=all - -# Record only missing cassettes -nox -s "test_openai(latest)" -- --vcr-record=once - -# Record a single test's cassette -nox -s "test_openai(latest)" -- --vcr-record=all -k "test_openai_chat_metrics" - -# Fail if cassette is missing (CI mode) -nox -s "test_openai(latest)" -- --vcr-record=none -``` - -**Recording modes:** - -- `once` (default) - record if cassette missing, play back otherwise -- `new_episodes` - record new interactions, play back existing -- `all` - always record, overwrite cassettes -- `none` - only play back, fail if missing - -## Test Fixtures - -**Memory logger** - test span recording without real logging: - -```python -def test_something(memory_logger): - # ... do work ... - spans = memory_logger.pop() - assert len(spans) == 1 -``` - -**Auto-applied fixtures** (conftest.py): - -- `override_app_url_for_tests` - sets BRAINTRUST_APP_URL -- `setup_braintrust` - sets dummy API key env vars (OpenAI, Google, Anthropic) for VCR tests -- `reset_braintrust_state` - resets global state after each test diff --git a/py/Makefile b/py/Makefile index 3267bcd6..bfdae330 100644 --- a/py/Makefile +++ b/py/Makefile @@ -1,4 +1,8 @@ -.PHONY: lint test test-wheel _template-version clean fixup build verify-build verify help install-build-deps install-dev _check-git-clean +PYTHON ?= python +UV := $(PYTHON) -m uv +UV_VERSION := $(shell awk '$$1=="uv" { print $$2 }' ../.tool-versions) + +.PHONY: lint test test-wheel _template-version clean fixup build verify-build verify help install-build-deps install-dev install-optional test-core _check-git-clean clean: rm -rf build dist @@ -29,7 +33,7 @@ _template-version: @bash scripts/template-version.sh build: clean _template-version - python -m build + $(PYTHON) -m build # Restore the original version file after the build git checkout src/braintrust/version.py @@ -44,16 +48,17 @@ verify-build: _check-git-clean build test-wheel verify: lint test install-build-deps: - python -m pip install uv==0.7.8 - python -m uv pip install -e . - python -m uv pip install -r requirements-build.txt + $(if $(UV_VERSION),,$(error Failed to read uv version from ../.tool-versions)) + $(PYTHON) -m pip install uv==$(UV_VERSION) + $(UV) pip install -e . + $(UV) pip install -r requirements-build.txt install-dev: install-build-deps - python -m uv pip install -r requirements-dev.txt + $(UV) pip install -r requirements-dev.txt install-optional: install-dev - python -m uv pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith - python -m uv pip install -e .[temporal,otel] + $(UV) pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith + $(UV) pip install -e .[temporal,otel] .DEFAULT_GOAL := help help: