Skip to content

Commit 7c04444

Browse files
willfreyAbhiPrasad
andauthored
fix(devserver): honor evaluator.project_id when request omits it (#372)
## Summary The dev-server's `run_eval` builds `EvalAsync(...)` kwargs with: ```python {**eval_kwargs, ..., "project_id": eval_data.get("project_id")} ``` The trailing key always wins in dict-spread merging, so a request body that omits `project_id` silently overrides the registered evaluator's `project_id` to `None`. `EvalAsync(name=..., project_id=None)` then falls back to using the eval name as the project name (per `Eval(...)` docstring: *"If specified, uses the given project ID instead of the evaluator's name to identify the project."*), so experiments route into a per-evaluator-name auto-created project instead of the project the evaluator was registered against. This bites consumers who mount the dev-server behind a custom auth layer and trigger evals from anything other than the Braintrust playground UI: every triggered run lands in a fresh eval-name-keyed project rather than the canonical project the registered `Evaluator(project_id=...)` named. ## Fix Fall back to `evaluator.project_id` when the request omits it. An explicit request-level `project_id` still takes precedence (no behavior change for the playground UI flow). ```python project_id = eval_data.get("project_id") or evaluator.project_id ``` ## Test plan - [x] `test_eval_falls_back_to_evaluator_project_id_when_request_omits_it` — registers an evaluator with a known `project_id`, POSTs `/eval` without `project_id`, asserts `EvalAsync` receives the registered id. (Fails on `main`, passes with this fix.) - [x] `test_eval_request_project_id_overrides_evaluator` — confirms an explicit request-level `project_id` still wins. - [x] Full `py/src/braintrust/devserver/` test suite green (21 passed, 2 pre-existing skips). - [x] `nox -s pylint` passes. - [x] Pre-commit hooks (ruff format, ruff check, codespell) pass. --------- Co-authored-by: Abhijeet Prasad <abhijeet@braintrustdata.com>
1 parent 25ec266 commit 7c04444

2 files changed

Lines changed: 139 additions & 1 deletion

File tree

py/src/braintrust/devserver/server.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ def stream_fn(event: SSEProgressEvent):
225225
if validated_parameters is not None and not isinstance(evaluator.parameters, RemoteEvalParameters):
226226
eval_kwargs["parameters"] = validated_parameters
227227

228+
# Honor an explicit project_id from the request when present; otherwise
229+
# fall back to the registered evaluator's project_id. Without this
230+
# fallback, requests that omit project_id silently route into a
231+
# per-evaluator-name auto-created project (Eval(project_id=None) uses
232+
# name as the project name) instead of the project the evaluator was
233+
# registered against.
234+
project_id = eval_data.get("project_id") or evaluator.project_id
235+
228236
try:
229237
eval_task = asyncio.create_task(
230238
EvalAsync(
@@ -243,7 +251,7 @@ def stream_fn(event: SSEProgressEvent):
243251
"task": task,
244252
"experiment_name": eval_data.get("experiment_name"),
245253
"parent": parent,
246-
"project_id": eval_data.get("project_id"),
254+
"project_id": project_id,
247255
},
248256
)
249257
)

py/src/braintrust/devserver/test_server_integration.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,133 @@ async def fake_eval_async(*, task, data, parameters, **_kwargs):
298298
)
299299

300300
assert response.status_code == 200
301+
302+
303+
@pytest.mark.parametrize("request_project_id", [pytest.param("", id="empty"), pytest.param("__omit__", id="omitted")])
304+
def test_eval_falls_back_to_evaluator_project_id_when_request_omits_or_empty_it(
305+
api_key, org_name, monkeypatch, request_project_id
306+
):
307+
"""run_eval must honor the registered evaluator's project_id when the request omits/empties it.
308+
309+
Regression: ``run_eval`` builds ``EvalAsync(...)`` kwargs with
310+
``{**eval_kwargs, ..., "project_id": eval_data.get("project_id")}``.
311+
The trailing key always wins in dict-spread merging, so a request
312+
that omits ``project_id`` clobbers the registered evaluator's
313+
``project_id`` to ``None``. ``EvalAsync`` then falls back to using
314+
``name`` as the project name (per ``framework.Eval`` docstring),
315+
routing experiments into a per-evaluator-name auto-created project
316+
instead of the project the evaluator was registered against.
317+
"""
318+
from braintrust import Evaluator
319+
from braintrust.devserver import server as devserver_module
320+
from braintrust.devserver.server import create_app
321+
from braintrust.logger import BraintrustState
322+
from starlette.testclient import TestClient
323+
324+
evaluator = Evaluator(
325+
project_name="ignored-project-name",
326+
eval_name="project-id-fallback-eval",
327+
data=lambda: [{"input": "ping", "expected": "pong"}],
328+
task=lambda input, _hooks: "pong",
329+
scores=[],
330+
experiment_name=None,
331+
metadata=None,
332+
project_id="evaluator-registered-project-id",
333+
)
334+
335+
captured: dict[str, Any] = {}
336+
337+
async def fake_cached_login(**_kwargs):
338+
return BraintrustState()
339+
340+
class FakeSummary:
341+
def as_dict(self):
342+
return {"experiment_name": evaluator.eval_name, "project_name": "", "scores": {}}
343+
344+
class FakeResult:
345+
summary = FakeSummary()
346+
347+
async def fake_eval_async(*, project_id, **_kwargs):
348+
captured["project_id"] = project_id
349+
return FakeResult()
350+
351+
monkeypatch.setattr(devserver_module, "cached_login", fake_cached_login)
352+
monkeypatch.setattr(devserver_module, "EvalAsync", fake_eval_async)
353+
354+
eval_request = {
355+
"name": "project-id-fallback-eval",
356+
"stream": False,
357+
"data": [{"input": "ping", "expected": "pong"}],
358+
}
359+
if request_project_id != "__omit__":
360+
eval_request["project_id"] = request_project_id
361+
362+
response = TestClient(create_app([evaluator])).post(
363+
"/eval",
364+
headers={
365+
"x-bt-auth-token": api_key,
366+
"x-bt-org-name": org_name,
367+
"Content-Type": "application/json",
368+
},
369+
json=eval_request,
370+
)
371+
372+
assert response.status_code == 200
373+
assert captured["project_id"] == "evaluator-registered-project-id"
374+
375+
376+
def test_eval_request_project_id_overrides_evaluator(api_key, org_name, monkeypatch):
377+
"""An explicit ``project_id`` in the request body still takes precedence."""
378+
from braintrust import Evaluator
379+
from braintrust.devserver import server as devserver_module
380+
from braintrust.devserver.server import create_app
381+
from braintrust.logger import BraintrustState
382+
from starlette.testclient import TestClient
383+
384+
evaluator = Evaluator(
385+
project_name="ignored-project-name",
386+
eval_name="project-id-override-eval",
387+
data=lambda: [{"input": "ping", "expected": "pong"}],
388+
task=lambda input, _hooks: "pong",
389+
scores=[],
390+
experiment_name=None,
391+
metadata=None,
392+
project_id="evaluator-registered-project-id",
393+
)
394+
395+
captured: dict[str, Any] = {}
396+
397+
async def fake_cached_login(**_kwargs):
398+
return BraintrustState()
399+
400+
class FakeSummary:
401+
def as_dict(self):
402+
return {"experiment_name": evaluator.eval_name, "project_name": "", "scores": {}}
403+
404+
class FakeResult:
405+
summary = FakeSummary()
406+
407+
async def fake_eval_async(*, project_id, **_kwargs):
408+
captured["project_id"] = project_id
409+
return FakeResult()
410+
411+
monkeypatch.setattr(devserver_module, "cached_login", fake_cached_login)
412+
monkeypatch.setattr(devserver_module, "EvalAsync", fake_eval_async)
413+
414+
response = TestClient(create_app([evaluator])).post(
415+
"/eval",
416+
headers={
417+
"x-bt-auth-token": api_key,
418+
"x-bt-org-name": org_name,
419+
"Content-Type": "application/json",
420+
},
421+
json={
422+
"name": "project-id-override-eval",
423+
"stream": False,
424+
"data": [{"input": "ping", "expected": "pong"}],
425+
"project_id": "request-explicit-project-id",
426+
},
427+
)
428+
429+
assert response.status_code == 200
430+
assert captured["project_id"] == "request-explicit-project-id"

0 commit comments

Comments
 (0)