diff --git a/.github/workflows/adk-py-test.yaml b/.github/workflows/adk-py-test.yaml
index 093ee05e..f3e3d8ca 100644
--- a/.github/workflows/adk-py-test.yaml
+++ b/.github/workflows/adk-py-test.yaml
@@ -15,40 +15,30 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          python-version: ${{ inputs.python-version }}
-
-      - name: Set up uv
-        uses: astral-sh/setup-uv@v3
-        with:
-          enable-cache: true
-          cache-dependency-glob: |
-            py/requirements-build.txt
-            py/requirements-dev.txt
-            py/setup.py
-            py/noxfile.py
-            integrations/adk-py/pyproject.toml
-            integrations/adk-py/uv.lock
+          cache: true
+          experimental: true
+          install_args: python@${{ inputs.python-version }} uv
 
       - name: Install deprecated package compatibility dependencies
         working-directory: integrations/adk-py
         run: |
-          uv sync
+          mise exec python@${{ inputs.python-version }} -- uv sync
 
       - name: Lint deprecated compatibility package
         working-directory: integrations/adk-py
         run: |
-          uv run ruff check src
+          mise exec python@${{ inputs.python-version }} -- uv run ruff check $(git ls-files '*.py' | grep -v 'examples/')
 
       - name: Run deprecated compatibility tests
         working-directory: integrations/adk-py
         run: |
-          uv run pytest src/tests/test_reexports.py
+          mise exec python@${{ inputs.python-version }} -- uv run pytest src/tests/test_reexports.py
 
       - name: Test deprecated package import
         working-directory: integrations/adk-py
         run: |
-          uv run python -c "import braintrust_adk; print('braintrust_adk imported successfully')"
-          uv run python -c "from braintrust_adk import setup_braintrust; print('setup_braintrust imported successfully')"
+          mise exec python@${{ inputs.python-version }} -- uv run python -c "import braintrust_adk; print('braintrust_adk imported successfully')"
+          mise exec python@${{ inputs.python-version }} -- uv run python -c "from braintrust_adk import setup_braintrust; print('setup_braintrust imported successfully')"
diff --git a/.github/workflows/langchain-py-test.yaml b/.github/workflows/langchain-py-test.yaml
index 127905a5..2ab767a8 100644
--- a/.github/workflows/langchain-py-test.yaml
+++ b/.github/workflows/langchain-py-test.yaml
@@ -22,29 +22,27 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          enable-cache: true
-          cache-dependency-glob: |
-            integrations/langchain-py/pyproject.toml
-            integrations/langchain-py/uv.lock
+          cache: true
+          experimental: true
+          install_args: python@${{ inputs.python-version }} uv
 
       - name: Install dependencies
         run: |
-          uv python install ${{ inputs.python-version }}
-          uv sync
+          mise exec python@${{ inputs.python-version }} -- uv sync
 
       - name: Lint with ruff
         if: ${{ inputs.os == 'ubuntu-latest' }}
         run: |
-          uv run ruff check $(git ls-files '*.py' | grep -v 'examples/')
+          mise exec python@${{ inputs.python-version }} -- uv run ruff check $(git ls-files '*.py' | grep -v 'examples/')
 
       - name: Run tests
         run: |
-          uv run pytest src
+          mise exec python@${{ inputs.python-version }} -- uv run pytest src
 
       - name: Test import
         run: |
-          uv run python -c "import braintrust_langchain; print('braintrust_langchain imported successfully')"
-          uv run python -c "from braintrust_langchain import BraintrustCallbackHandler; print('BraintrustCallbackHandler imported successfully')"
+          mise exec python@${{ inputs.python-version }} -- uv run python -c "import braintrust_langchain; print('braintrust_langchain imported successfully')"
+          mise exec python@${{ inputs.python-version }} -- uv run python -c "from braintrust_langchain import BraintrustCallbackHandler; print('BraintrustCallbackHandler imported successfully')"
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 27cd1e2c..4ea347a8 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -11,10 +11,14 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0 # Fetch full history for proper diff
-      - uses: actions/setup-python@v3
-      - uses: pre-commit/action@v3.0.0
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          extra_args: --from-ref origin/${{ github.base_ref || 'main' }} --to-ref HEAD
+          cache: true
+          experimental: true
+      - name: Run pre-commit
+        run: |
+          mise exec -- pre-commit run --from-ref origin/${{ github.base_ref || 'main' }} --to-ref HEAD
diff --git a/.github/workflows/publish-py-sdk.yaml b/.github/workflows/publish-py-sdk.yaml
index c500dd06..8158f46c 100644
--- a/.github/workflows/publish-py-sdk.yaml
+++ b/.github/workflows/publish-py-sdk.yaml
@@ -46,15 +46,15 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0 # Fetch all history for changelog generation
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          python-version: "3.13"
+          cache: true
+          experimental: true
+          install_args: python@3.13
       - name: Build and verify
-        working-directory: py
         run: |
-          make install-dev
-          make verify-build
+          mise exec python@3.13 -- make -C py install-dev verify-build
       - name: Upload build artifacts
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/py.yaml b/.github/workflows/py.yaml
index 6e098fd1..eaefedd7 100644
--- a/.github/workflows/py.yaml
+++ b/.github/workflows/py.yaml
@@ -26,25 +26,27 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          python-version: ${{ matrix.python-version }}
+          cache: true
+          experimental: true
+          install_args: python@${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          cd py && make install-dev
+          mise exec python@${{ matrix.python-version }} -- make -C py install-dev
       - name: Test whether the Python SDK can be installed
         run: |
           # This is already done by make install-dev, but we're keeping this as a separate step
           # to explicitly verify that installation works
-          python -m uv pip install -e ./py[all]
+          mise exec python@${{ matrix.python-version }} -- python -m uv pip install -e ./py[all]
       - name: Test whether the Python SDK can be imported
         run: |
-          python -c 'import braintrust'
+          mise exec python@${{ matrix.python-version }} -- python -c 'import braintrust'
       - name: Run nox tests (shard ${{ matrix.shard }}/2)
         shell: bash
         run: |
-          cd py && ./scripts/nox-matrix.sh ${{ matrix.shard }} 2
+          mise exec python@${{ matrix.python-version }} -- bash ./py/scripts/nox-matrix.sh ${{ matrix.shard }} 2
 
   adk-py:
     uses: ./.github/workflows/adk-py-test.yaml
@@ -68,13 +70,15 @@ jobs:
     timeout-minutes: 10
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          python-version: "3.13"
+          cache: true
+          experimental: true
+          install_args: python@3.13
       - name: Install build dependencies and build wheel
         run: |
-          cd py && make install-build-deps && make build
+          mise exec python@3.13 -- make -C py install-build-deps build
       - name: Upload wheel as artifact
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-publish-py-sdk.yaml b/.github/workflows/test-publish-py-sdk.yaml
index d7427dd7..6eee653d 100644
--- a/.github/workflows/test-publish-py-sdk.yaml
+++ b/.github/workflows/test-publish-py-sdk.yaml
@@ -33,18 +33,18 @@ jobs:
       - uses: actions/checkout@v4
         with:
           ref: ${{ github.event.inputs.ref }}
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Set up mise
+        uses: jdx/mise-action@v3
         with:
-          python-version: "3.13"
+          cache: true
+          experimental: true
+          install_args: python@3.13
       - name: Install build dependencies
-        working-directory: py
         run: |
-          make install-dev
+          mise exec python@3.13 -- make -C py install-dev
       - name: Build and verify
-        working-directory: py
         run: |
-          make verify-build
+          mise exec python@3.13 -- make -C py verify-build
       - name: Get version from built wheel
         id: get_version
         run: |
diff --git a/.tool-versions b/.tool-versions
index 814a2d00..aabd9423 100644
--- a/.tool-versions
+++ b/.tool-versions
@@ -1,2 +1,3 @@
 python 3.13.3
 pre-commit 4.2.0
+uv 0.7.8
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 00000000..ab9d11e8
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,131 @@
+# Braintrust SDK Agent Guide
+
+Guide for contributing to the Braintrust Python SDK repository.
+
+## Defaults
+
+- Use `mise` as the source of truth for tools and environment.
+- Prefer `py/` commands over root `make` targets when working on the SDK itself.
+- Keep changes narrow and run the smallest relevant test session first.
+- Do not rely on optional provider packages being installed unless the active nox session installs them.
+
+## Repo Map
+
+- `py/`: main Python package, tests, examples, nox sessions, release build
+- `integrations/`: separate integration packages
+- `internal/golden/`: compatibility and golden projects
+- `docs/`: supporting docs
+
+Important code areas in `py/src/braintrust/`:
+
+- core SDK modules: top-level package files
+- wrappers/integrations: `wrappers/`
+- temporal: `contrib/temporal/`
+- CLI/devserver: `cli/`, `devserver/`
+- tests: colocated `test_*.py`
+
+## Setup
+
+Preferred repo bootstrap:
+
+```bash
+mise install
+make develop
+```
+
+Package-focused setup:
+
+```bash
+cd py
+make install-dev
+```
+
+Install optional provider dependencies only if needed:
+
+```bash
+cd py
+make install-optional
+```
+
+## Commands
+
+Preferred SDK workflow:
+
+```bash
+cd py
+make lint
+make test-core
+nox -l
+```
+
+Targeted wrapper/session runs:
+
+```bash
+cd py
+nox -s "test_openai(latest)"
+nox -s "test_openai(latest)" -- -k "test_chat_metrics"
+```
+
+Root `Makefile` exists as a convenience wrapper. The authoritative SDK workflow is in `py/Makefile` and `py/noxfile.py`.
+
+## Tests
+
+`py/noxfile.py` is the source of truth for compatibility coverage.
+
+Key facts:
+
+- `test_core` runs without optional vendor packages.
+- wrapper coverage is split across dedicated nox sessions by provider/version.
+- `pylint` installs the broad dependency surface before checking files.
+- `test-wheel` is a wheel sanity check and requires a built wheel first.
+
+When changing behavior, run the narrowest affected session first, then expand only if needed.
+
+## VCR
+
+VCR cassette directories:
+
+- `py/src/braintrust/cassettes/`
+- `py/src/braintrust/wrappers/cassettes/`
+- `py/src/braintrust/devserver/cassettes/`
+
+Behavior from `py/src/braintrust/conftest.py`:
+
+- local default: `record_mode="once"`
+- CI default: `record_mode="none"`
+- wheel-mode skips VCR-marked tests
+- test fixtures inject dummy API keys and reset global state
+
+Common commands:
+
+```bash
+cd py
+nox -s "test_openai(latest)"
+nox -s "test_openai(latest)" -- --disable-vcr
+nox -s "test_openai(latest)" -- --vcr-record=all -k "test_openai_chat_metrics"
+```
+
+Only re-record cassettes when the behavior change is intentional. If in doubt, ask the user.
+
+## Build Notes
+
+Build from `py/`:
+
+```bash
+cd py
+make build
+```
+
+Important caveat:
+
+- `py/scripts/template-version.sh` rewrites `py/src/braintrust/version.py` during build.
+- `py/Makefile` restores that file afterward with `git checkout`.
+
+Avoid editing `py/src/braintrust/version.py` while also running build commands.
+
+## Editing Guidance
+
+- Keep tests near the code they cover.
+- Reuse existing fixtures and cassette patterns.
+- If a change affects examples or integrations, update the nearest example or focused test.
+- For CLI/devserver changes, consider whether wheel-mode behavior also needs coverage.
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index 549dff99..00000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Braintrust SDK
-
-Python client for Braintrust, plus wrapper libraries for OpenAI, Anthropic, and other AI providers.
-
-## Structure
-
-```
-├── py/             # Python SDK (see py/CLAUDE.md)
-├── integrations/   # Python integrations (adk-py, langchain-py)
-├── internal/       # Golden tests
-└── scripts/        # Dev scripts
-```
-
-## Quick Reference
-
-| Task          | Command       |
-| ------------- | ------------- |
-| Run all tests | `make test`   |
-| Lint/format   | `make fixup`  |
-| Python lint   | `make pylint` |
-
-## Setup
-
-```bash
-make develop      # Create venv and install deps
-source env.sh     # Activate environment
-```
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 00000000..47dc3e3d
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f36a3d0f..f795ceb9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,173 +2,149 @@
 
 Guide for contributing to the Braintrust Python SDK.
 
-## Repository Structure
-
-```
-braintrust-sdk-python/
-├── py/                  # Python SDK
-│   ├── src/braintrust/  # Source code
-│   │   ├── wrappers/    # Provider wrappers (OpenAI, Anthropic, Google, etc.)
-│   │   ├── contrib/     # Community integrations (Temporal, etc.)
-│   │   ├── devserver/   # Local dev server / CLI
-│   │   └── conftest.py  # Shared test fixtures
-│   ├── noxfile.py       # Test session definitions
-│   └── Makefile         # Build/test commands
-├── integrations/
-│   ├── langchain-py/    # LangChain integration
-│   └── adk-py/          # Google ADK integration
-├── internal/            # Golden tests
-├── scripts/             # Dev scripts
-├── docs/                # Documentation
-└── Makefile             # Top-level commands
-```
-
 ## Setup
 
 ### Prerequisites
 
-- Python 3.10+ (3.9 supported but some test sessions are skipped)
-- [uv](https://github.com/astral-sh/uv) (installed automatically by `make install-dev`)
+- Python 3.10+
+- [mise](https://mise.jdx.dev/) for tool installation and repo-local environment management
 
 ### Getting Started
 
 ```bash
-# Clone the repo
 git clone https://github.com/braintrustdata/braintrust-sdk-python.git
 cd braintrust-sdk-python
-
-# Create venv and install all dependencies
+mise install
 make develop
-
-# Activate the environment
-source env.sh
 ```
 
-### Python SDK Development
+If you use `mise activate` in your shell, entering the repo will automatically expose the configured tools. If you do not, you can still run commands explicitly with `mise exec -- ...`.
+
+## Repo Layout
+
+- `py/`: main Python SDK
+- `integrations/`: separate integration packages such as LangChain and ADK
+- `internal/golden/`: golden and compatibility projects
+- `docs/`: supporting docs
+
+Most SDK changes should happen under `py/`.
+
+## Common Workflows
+
+### Python SDK
 
 ```bash
 cd py
-
-# Install dev dependencies
 make install-dev
-
-# Install optional provider packages (for wrapper development)
-make install-optional
+make test-core
+make lint
+nox -l
 ```
 
-## Running Tests
+Run a focused session:
 
-### Python SDK
+```bash
+cd py
+nox -s "test_openai(latest)"
+```
 
-Tests use [nox](https://nox.thea.codes/) to run across different dependency versions.
+Run a single test subset:
 
 ```bash
 cd py
+nox -s "test_openai(latest)" -- -k "test_chat_metrics"
+```
 
-# Run all test sessions
-make test
+Install optional provider packages only when you need them:
 
-# Run core tests only (no optional dependencies)
-make test-core
+```bash
+cd py
+make install-optional
+```
 
-# List all available sessions
-nox -l
+### Repo-Level Commands
 
-# Run a specific session
-nox -s "test_openai(latest)"
-nox -s "test_anthropic(latest)"
-nox -s "test_temporal(latest)"
+The root `Makefile` is a convenience wrapper around `py/Makefile`.
 
-# Run a single test within a session
-nox -s "test_openai(latest)" -- -k "test_chat_metrics"
+Useful root commands:
+
+```bash
+make fixup
+make test-core
+make lint
 ```
 
-### Integration Tests
+`make test-wheel` requires a built wheel first.
+
+### Integration Packages
+
+LangChain:
 
 ```bash
-# LangChain
 cd integrations/langchain-py
 uv sync
 uv run pytest src
-
-# ADK
-cd integrations/adk-py
-uv sync
-uv run pytest
 ```
 
-### Linting
+ADK:
 
 ```bash
-# From repo root — runs pre-commit hooks (formatting, etc.)
-make fixup
-
-# Python-specific lint (pylint)
-cd py && make lint
+cd integrations/adk-py
+uv sync
+uv run pytest
 ```
 
-## VCR Cassette Testing
-
-Tests for API provider wrappers use VCR.py to record and replay HTTP interactions. This means most tests run without real API keys.
+## Testing Notes
 
-See [docs/vcr-testing.md](docs/vcr-testing.md) for full details. Key points:
+The SDK uses [nox](https://nox.thea.codes/) for compatibility testing across optional providers and versions. `py/noxfile.py` is the source of truth for available sessions.
 
-- **Locally:** VCR records new cassettes on first run (`record_mode="once"`). You need a real API key to record.
-- **In CI:** VCR only replays existing cassettes (`record_mode="none"`). No API keys needed.
-- **Modifying tests:** If your change alters the HTTP request a test makes, you must re-record the cassette locally with a real API key and commit it.
-- **New tests:** Add `@pytest.mark.vcr`, record the cassette locally, and commit the cassette file.
+### VCR Tests
 
-## CI Overview
+Many wrapper and devserver tests use VCR cassettes.
 
-CI runs on GitHub Actions. All workflows are in `.github/workflows/`.
+- Locally, missing cassettes can be recorded with `record_mode="once"`.
+- In CI, missing cassettes fail because `record_mode="none"` is used.
+- If your change intentionally changes HTTP behavior, re-record the affected cassettes and commit them.
 
-### Workflows
+Useful example:
 
-| Workflow | File | Trigger | What it does |
-|---|---|---|---|
-| **py** | `py.yaml` | PR (py/integrations changes), push to main | Runs nox test matrix across Python 3.10–3.13 on Ubuntu + Windows, plus integration tests |
-| **langchain-py** | `langchain-py-test.yaml` | Called by `py.yaml` | Lint + tests for the LangChain integration |
-| **adk-py** | `adk-py-test.yaml` | Called by `py.yaml` | Lint + tests for the Google ADK integration |
-| **lint** | `lint.yaml` | PR | Pre-commit hooks and formatting checks |
-| **publish** | `publish-py-sdk.yaml` | Tag push (`py-sdk-v*.*.*`) | Build, test wheel, publish to PyPI, create GitHub release |
-| **test-publish** | `test-publish-py-sdk.yaml` | Manual dispatch | Publish to TestPyPI for pre-release validation |
+```bash
+cd py
+nox -s "test_openai(latest)" -- --vcr-record=all -k "test_openai_chat_metrics"
+```
 
-### No API Key Secrets Required
+### Fixtures
 
-CI workflows do **not** use real API key secrets (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GEMINI_API_KEY`). Tests rely on VCR cassettes with dummy API keys provided by test fixtures. This means:
+Shared test fixtures live in `py/src/braintrust/conftest.py`.
 
-- Forks can run CI without configuring any secrets.
-- The `test_latest_wrappers_novcr` nox session (which disables VCR) is automatically skipped in CI.
+Common ones include:
 
-### Test Sharding
+- dummy API key setup for VCR-backed tests
+- Braintrust global state reset between tests
+- wheel-mode skipping for VCR tests
 
-The main `py.yaml` workflow shards nox sessions across 2 parallel jobs per Python version/OS combination using `scripts/nox-matrix.sh`.
+The `memory_logger` fixture from `braintrust.test_helpers` is useful for asserting on logged spans without a real Braintrust backend.
 
-## Test Fixtures
+## CI
 
-Key auto-applied fixtures defined in `py/src/braintrust/conftest.py`:
+GitHub Actions workflows live in `.github/workflows/`.
 
-| Fixture | Purpose |
-|---|---|
-| `setup_braintrust` | Sets dummy API keys (OpenAI, Google, Anthropic) for VCR tests |
-| `override_app_url_for_tests` | Points `BRAINTRUST_APP_URL` to production for consistent behavior |
-| `reset_braintrust_state` | Resets global SDK state after each test |
-| `skip_vcr_tests_in_wheel_mode` | Skips VCR tests when testing from an installed wheel |
+Main workflows:
 
-The `memory_logger` fixture (from `braintrust.test_helpers`) lets you capture logged spans in-memory without a real Braintrust connection:
+- `py.yaml`: SDK test matrix
+- `langchain-py-test.yaml`: LangChain integration tests
+- `adk-py-test.yaml`: ADK integration tests
+- `lint.yaml`: pre-commit and formatting checks
+- `publish-py-sdk.yaml`: PyPI release
+- `test-publish-py-sdk.yaml`: TestPyPI release validation
 
-```python
-def test_something(memory_logger):
-    # ... exercise code that logs spans ...
-    spans = memory_logger.pop()
-    assert len(spans) == 1
-```
+CI uses VCR cassettes and dummy credentials, so forks do not need provider API secrets for normal test runs.
 
 ## Submitting Changes
 
-1. Create a branch for your changes.
-2. Make your changes and add/update tests.
-3. If you modified VCR tests, re-record cassettes and commit them.
-4. Run `make fixup` to format and lint.
-5. Run relevant test sessions to verify (e.g. `nox -s "test_openai(latest)"`).
+1. Make your change in the narrowest relevant area.
+2. Add or update tests.
+3. Re-record cassettes if the HTTP behavior change is intentional.
+4. Run the smallest relevant local checks first, then broader ones if needed.
+5. Run `make fixup` before opening a PR.
 6. Open a pull request against `main`.
diff --git a/Makefile b/Makefile
index 7709d09a..9185d785 100644
--- a/Makefile
+++ b/Makefile
@@ -1,59 +1,45 @@
 SHELL := /bin/bash
-ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
-VENV_PRE_COMMIT := ${ROOT_DIR}/venv/.pre_commit
-VENV_DOCS_REBUILD := ${ROOT_DIR}/venv/.docs_rebuild
 
-.PHONY: all
-all: ${VENV_PRE_COMMIT}
+.PHONY: help develop install-dev install-deps fixup test test-core test-wheel lint pylint nox
 
-.PHONY: py
-py: ${VENV_PYTHON_PACKAGES}
-	bash -c 'source venv/bin/activate'
+develop: install-dev
+	mise exec -- pre-commit install
+	@echo "Use 'mise activate' in your shell for automatic tool and env activation."
 
-VENV_INITIALIZED := venv/.initialized
-
-${VENV_INITIALIZED}:
-	rm -rf venv && python -m venv venv
-	@touch ${VENV_INITIALIZED}
-
-VENV_PYTHON_PACKAGES := venv/.python_packages
-
-${VENV_PYTHON_PACKAGES}: ${VENV_INITIALIZED}
-	bash -c 'source venv/bin/activate && python -m pip install --upgrade pip setuptools'
-	bash -c 'source venv/bin/activate && python -m pip install -e py[all]'
-	@touch $@
-
-${VENV_PRE_COMMIT}: ${VENV_PYTHON_PACKAGES}
-	bash -c 'source venv/bin/activate && pre-commit install'
-	@touch $@
-
-develop: ${VENV_PRE_COMMIT}
-	@echo "--\nRun "source env.sh" to enter development mode!"
-
-.PHONY: install-dev
 install-dev:
 	mise install
 
-.PHONY: install-deps
 install-deps:
-	cd py && make install-dev
+	mise exec -- $(MAKE) -C py install-dev
 
 fixup:
-	source env.sh && pre-commit run --all-files
+	mise exec -- pre-commit run --all-files
 
-.PHONY: test test-py nox pylint
+test:
+	mise exec -- $(MAKE) -C py test
 
-test: test-py-core test-py-sdk
+test-core:
+	mise exec -- $(MAKE) -C py test-core
 
-test-py-core:
-	source env.sh && python -m unittest discover ./core/py/src
+test-wheel:
+	mise exec -- $(MAKE) -C py test-wheel
 
-test-py-sdk: nox
-	source env.sh && cd py && pytest
+lint pylint:
+	mise exec -- $(MAKE) -C py lint
 
+nox: test
 
-nox:
-	cd py && make test
+help:
+	@echo "Available targets:"
+	@echo "  develop      - Install tools with mise, install py/ deps, and install pre-commit hooks"
+	@echo "  fixup        - Run pre-commit hooks across the repo"
+	@echo "  install-deps - Install Python SDK dependencies via py/Makefile"
+	@echo "  install-dev  - Install pinned tools and create/update the repo env via mise"
+	@echo "  lint         - Run Python SDK lint checks via py/Makefile"
+	@echo "  pylint       - Alias for lint"
+	@echo "  nox          - Alias for test"
+	@echo "  test         - Run the Python SDK nox matrix via py/Makefile"
+	@echo "  test-core    - Run Python SDK core tests via py/Makefile"
+	@echo "  test-wheel   - Run Python SDK wheel sanity tests via py/Makefile (requires a built wheel)"
 
-pylint:
-	cd py && make lint
+.DEFAULT_GOAL := help
diff --git a/integrations/adk-py/README.md b/integrations/adk-py/README.md
index e1552283..4ada8177 100644
--- a/integrations/adk-py/README.md
+++ b/integrations/adk-py/README.md
@@ -136,6 +136,30 @@ Once you've set up the integration, you can view your traces in the Braintrust d
    - Token usage and latency metrics
    - Any errors or warnings
 
+## Development
+
+To contribute to this integration:
+
+```bash
+# Clone the repository
+git clone https://github.com/braintrustdata/braintrust-sdk-python.git
+cd sdk/integrations/adk-py
+
+mise install
+uv sync
+
+# Run examples
+cd examples
+
+# simple programmatic agent call
+uv run manual.py
+
+# or use the adk web UI
+uv run adk web --port 8888
+```
+
+If your shell is not configured with `mise activate`, prefix commands with `mise exec --`.
+
 ## Documentation
 
 - [Braintrust Documentation](https://www.braintrust.dev/docs)
diff --git a/integrations/adk-py/examples/Makefile b/integrations/adk-py/examples/Makefile
index 6de2f5bf..5e089f8c 100644
--- a/integrations/adk-py/examples/Makefile
+++ b/integrations/adk-py/examples/Makefile
@@ -1,44 +1,33 @@
-# Variables
 PORT ?= 8888
-UV := uv run
+UV ?= uv
+RUN := $(UV) run
 
-# Phony targets
 .PHONY: dev help clean install test lint format check
-
-# Default target
 .DEFAULT_GOAL := help
 
-# Development server
 dev:
-	$(UV) adk web --port $(PORT)
+	$(RUN) adk web --port $(PORT)
 
-# Install dependencies
 install:
-	uv sync
+	$(UV) sync
 
-# Run tests
 test:
-	$(UV) pytest
+	$(RUN) pytest
 
-# Lint code
 lint:
-	$(UV) ruff check .
+	$(RUN) ruff check .
 
-# Format code
 format:
-	$(UV) ruff format .
+	$(RUN) ruff format .
 
-# Type check
 check:
-	$(UV) mypy .
+	$(RUN) mypy .
 
-# Clean up temporary files
 clean:
 	find . -type f -name "*.pyc" -delete
 	find . -type d -name "__pycache__" -delete
 	find . -type d -name "*.egg-info" -exec rm -rf {} +
 
-# Show help
 help:
 	@echo "Available targets:"
 	@echo "  dev      - Start development server (default port: $(PORT))"
diff --git a/integrations/langchain-py/Makefile b/integrations/langchain-py/Makefile
index d515c98b..67cc3125 100644
--- a/integrations/langchain-py/Makefile
+++ b/integrations/langchain-py/Makefile
@@ -1,38 +1,28 @@
-SHELL := /bin/bash
-ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
-VENV_PRE_COMMIT := ${ROOT_DIR}/venv/.pre_commit
+UV ?= uv
+RUN := $(UV) run
 
-.PHONY: all
-all: ${VENV_PRE_COMMIT}
+.PHONY: help install test lint format fixup
 
-.PHONY: py
-py: ${VENV_PYTHON_PACKAGES}
-	bash -c 'source venv/bin/activate'
+install:
+	$(UV) sync
 
-VENV_INITIALIZED := venv/.initialized
-
-${VENV_INITIALIZED}:
-	rm -rf venv && python -m venv venv
-	@touch ${VENV_INITIALIZED}
-
-VENV_PYTHON_PACKAGES := venv/.python_packages
-
-${VENV_PYTHON_PACKAGES}: ${VENV_INITIALIZED}
-	bash -c 'source venv/bin/activate && python -m pip install --upgrade pip setuptools'
-	bash -c 'source venv/bin/activate && python -m pip install -e .[all]'
-	@touch $@
+test:
+	$(RUN) pytest src
 
-${VENV_PRE_COMMIT}: ${VENV_PYTHON_PACKAGES}
-	bash -c 'source venv/bin/activate && pre-commit install'
-	@touch $@
+lint:
+	$(RUN) ruff check .
 
-develop: ${VENV_PRE_COMMIT}
-	@echo "--\nRun "source env.sh" to enter development mode!"
+format:
+	$(RUN) ruff format .
 
-fixup:
-	source env.sh && pre-commit run --all-files
+fixup: format
 
-.PHONY: test
+help:
+	@echo "Available targets:"
+	@echo "  fixup   - Format the integration sources with ruff"
+	@echo "  format  - Format the integration sources with ruff"
+	@echo "  install - Install dependencies with uv sync"
+	@echo "  lint    - Run ruff checks"
+	@echo "  test    - Run pytest against src/"
 
-test:
-	source env.sh && pytest
+.DEFAULT_GOAL := help
diff --git a/mise.toml b/mise.toml
index 313028d1..f92d09ea 100644
--- a/mise.toml
+++ b/mise.toml
@@ -12,7 +12,6 @@ _.file = ".env"
 
 [tools]
 ruff = "0.12.7"
-uv = "latest"
 
 [hooks]
 postinstall = "make install-deps"
diff --git a/py/CLAUDE.md b/py/CLAUDE.md
deleted file mode 100644
index b61382a1..00000000
--- a/py/CLAUDE.md
+++ /dev/null
@@ -1,101 +0,0 @@
-# Python SDK
-
-## Setup
-
-To run examples or use optional integrations, install the extra dependencies:
-
-```bash
-make install-dev        # Development dependencies
-make install-optional   # Optional integration dependencies
-```
-
-## Running Tests
-
-```bash
-make test                    # All tests via nox
-make test-core               # Core tests only
-make lint                    # Pylint + formatting
-make clean                   # Remove build artifacts
-```
-
-**Run a single test:**
-
-```bash
-nox -s "test_openai(latest)" -- -k "test_chat_metrics"
-```
-
-**Common test sessions:**
-
-```bash
-nox -l                           # List all sessions
-nox -s "test_openai(latest)"     # OpenAI wrapper (latest version)
-nox -s "test_anthropic(latest)"  # Anthropic wrapper
-nox -s "test_temporal(latest)"   # Temporal integration
-nox -s test_openai               # All OpenAI versions
-```
-
-- we use pytest, so you don't need to add extra messages to assert. `assert x
-== 1` is usually enough.
-
-## VCR Cassettes
-
-Tests use VCR to record HTTP interactions so they run without live API calls.
-
-**Cassette location:** `src/braintrust/wrappers/cassettes/`
-
-**Using in tests:**
-
-```python
-@pytest.mark.vcr
-def test_openai_chat_metrics(memory_logger):
-    client = wrap_openai(openai.OpenAI())
-    response = client.chat.completions.create(...)
-```
-
-**VCR commands:**
-
-```bash
-# Run tests normally (play back from cassettes)
-nox -s "test_openai(latest)"
-
-# Run with real API calls (no VCR)
-export OPENAI_API_KEY="sk-..."
-nox -s "test_openai(latest)" -- --disable-vcr
-
-# Record new cassettes (overwrites existing)
-export OPENAI_API_KEY="sk-..."
-nox -s "test_openai(latest)" -- --vcr-record=all
-
-# Record only missing cassettes
-nox -s "test_openai(latest)" -- --vcr-record=once
-
-# Record a single test's cassette
-nox -s "test_openai(latest)" -- --vcr-record=all -k "test_openai_chat_metrics"
-
-# Fail if cassette is missing (CI mode)
-nox -s "test_openai(latest)" -- --vcr-record=none
-```
-
-**Recording modes:**
-
-- `once` (default) - record if cassette missing, play back otherwise
-- `new_episodes` - record new interactions, play back existing
-- `all` - always record, overwrite cassettes
-- `none` - only play back, fail if missing
-
-## Test Fixtures
-
-**Memory logger** - test span recording without real logging:
-
-```python
-def test_something(memory_logger):
-    # ... do work ...
-    spans = memory_logger.pop()
-    assert len(spans) == 1
-```
-
-**Auto-applied fixtures** (conftest.py):
-
-- `override_app_url_for_tests` - sets BRAINTRUST_APP_URL
-- `setup_braintrust` - sets dummy API key env vars (OpenAI, Google, Anthropic) for VCR tests
-- `reset_braintrust_state` - resets global state after each test
diff --git a/py/Makefile b/py/Makefile
index 3267bcd6..bfdae330 100644
--- a/py/Makefile
+++ b/py/Makefile
@@ -1,4 +1,8 @@
-.PHONY: lint test test-wheel _template-version clean fixup build verify-build verify help install-build-deps install-dev _check-git-clean
+PYTHON ?= python
+UV := $(PYTHON) -m uv
+UV_VERSION := $(shell awk '$$1=="uv" { print $$2 }' ../.tool-versions)
+
+.PHONY: lint test test-wheel _template-version clean fixup build verify-build verify help install-build-deps install-dev install-optional test-core _check-git-clean
 
 clean:
 	rm -rf build dist
@@ -29,7 +33,7 @@ _template-version:
 	@bash scripts/template-version.sh
 
 build: clean _template-version
-	python -m build
+	$(PYTHON) -m build
 	# Restore the original version file after the build
 	git checkout src/braintrust/version.py
 
@@ -44,16 +48,17 @@ verify-build: _check-git-clean build test-wheel
 verify: lint test
 
 install-build-deps:
-	python -m pip install uv==0.7.8
-	python -m uv pip install -e .
-	python -m uv pip install -r requirements-build.txt
+	$(if $(UV_VERSION),,$(error Failed to read uv version from ../.tool-versions))
+	$(PYTHON) -m pip install uv==$(UV_VERSION)
+	$(UV) pip install -e .
+	$(UV) pip install -r requirements-build.txt
 
 install-dev: install-build-deps
-	python -m uv pip install -r requirements-dev.txt
+	$(UV) pip install -r requirements-dev.txt
 
 install-optional: install-dev
-	python -m uv pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith
-	python -m uv pip install -e .[temporal,otel]
+	$(UV) pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith
+	$(UV) pip install -e .[temporal,otel]
 
 .DEFAULT_GOAL := help
 help: