From 37b99cc0db8cb8cf50e079d48ca70ccdf3e228b0 Mon Sep 17 00:00:00 2001 From: Andrew Kent Date: Tue, 10 Feb 2026 19:12:59 -0700 Subject: [PATCH] support top-level tags and metadata for experiments --- .../braintrust/api/BraintrustApiClient.java | 59 +++++++++++++++---- src/main/java/dev/braintrust/eval/Eval.java | 28 ++++++++- .../java/dev/braintrust/eval/EvalTest.java | 45 ++++++++++++++ 3 files changed, 119 insertions(+), 13 deletions(-) diff --git a/src/main/java/dev/braintrust/api/BraintrustApiClient.java b/src/main/java/dev/braintrust/api/BraintrustApiClient.java index 3c5e567..59d17ca 100644 --- a/src/main/java/dev/braintrust/api/BraintrustApiClient.java +++ b/src/main/java/dev/braintrust/api/BraintrustApiClient.java @@ -40,6 +40,9 @@ public interface BraintrustApiClient { /** Creates an experiment. */ Experiment getOrCreateExperiment(CreateExperimentRequest request); + /** Lists experiments for a project. */ + List listExperiments(String projectId); + /** Get project and org info for the default project ID */ Optional getProjectAndOrgInfo(); @@ -157,6 +160,17 @@ public Experiment getOrCreateExperiment(CreateExperimentRequest request) { } } + @Override + public List listExperiments(String projectId) { + try { + return getAsync("/v1/experiment?project_id=" + projectId, ExperimentList.class) + .get() + .objects(); + } catch (InterruptedException | ExecutionException e) { + throw new ApiException(e); + } + } + @Override public LoginResponse login() throws LoginException { try { @@ -546,15 +560,26 @@ public Experiment getOrCreateExperiment(CreateExperimentRequest request) { experiments.stream() .filter(exp -> exp.name().equals(request.name())) .findFirst(); - return existing.orElseGet( - () -> - new Experiment( - request.name().hashCode() + "", - request.projectId(), - request.name(), - request.description(), - "notused", - "notused")); + if (existing.isPresent()) { + return existing.get(); + } + var newExperiment = + new Experiment( + request.name().hashCode() + "", + request.projectId(), + request.name(), + request.description(), + request.tags().orElse(List.of()), + request.metadata().orElse(Map.of()), + "notused", + "notused"); + experiments.add(newExperiment); + return newExperiment; + } + + @Override + public List listExperiments(String projectId) { + return experiments.stream().filter(exp -> exp.projectId().equals(projectId)).toList(); } @Override @@ -701,16 +726,24 @@ record Project(String id, String name, String orgId, String createdAt, String up record ProjectList(List projects) {} - record ExperimentList(List experiments) {} + record ExperimentList(List objects) {} record CreateExperimentRequest( String projectId, String name, Optional description, - Optional baseExperimentId) { + Optional baseExperimentId, + Optional> tags, + Optional> metadata) { public CreateExperimentRequest(String projectId, String name) { - this(projectId, name, Optional.empty(), Optional.empty()); + this( + projectId, + name, + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty()); } } @@ -719,6 +752,8 @@ record Experiment( String projectId, String name, Optional description, + List tags, + Map metadata, String createdAt, String updatedAt) {} diff --git a/src/main/java/dev/braintrust/eval/Eval.java b/src/main/java/dev/braintrust/eval/Eval.java index adc60c8..64fc7bc 100644 --- a/src/main/java/dev/braintrust/eval/Eval.java +++ b/src/main/java/dev/braintrust/eval/Eval.java @@ -33,6 +33,8 @@ public final class Eval { private final @Nonnull Dataset dataset; private final @Nonnull Task task; private final @Nonnull List> scorers; + private final @Nonnull List tags; + private final @Nonnull Map metadata; private Eval(Builder builder) { this.experimentName = builder.experimentName; @@ -52,6 +54,8 @@ private Eval(Builder builder) { this.dataset = builder.dataset; this.task = Objects.requireNonNull(builder.task); this.scorers = List.copyOf(builder.scorers); + this.tags = List.copyOf(builder.tags); + this.metadata = Map.copyOf(builder.metadata); } /** Runs the evaluation and returns results. */ @@ -62,7 +66,9 @@ public EvalResult run() { orgAndProject.project().id(), experimentName, Optional.empty(), - Optional.empty())); + Optional.empty(), + tags.isEmpty() ? Optional.empty() : Optional.of(tags), + metadata.isEmpty() ? Optional.empty() : Optional.of(metadata))); dataset.forEach(datasetCase -> evalOne(experiment.id(), datasetCase)); var experimentUrl = "%s/experiments/%s" @@ -163,6 +169,8 @@ public static final class Builder { private @Nullable Tracer tracer = null; private @Nullable Task task; private @Nonnull List> scorers = List.of(); + private @Nonnull List tags = List.of(); + private @Nonnull Map metadata = Map.of(); public Eval build() { if (config == null) { @@ -256,5 +264,23 @@ public final Builder scorers(Scorer... scorers) { this.scorers = List.of(scorers); return this; } + + /** Sets tags for the experiment. */ + public Builder tags(List tags) { + this.tags = List.copyOf(tags); + return this; + } + + /** Sets tags for the experiment (varargs convenience method). */ + public Builder tags(String... tags) { + this.tags = List.of(tags); + return this; + } + + /** Sets metadata for the experiment. */ + public Builder metadata(Map metadata) { + this.metadata = Map.copyOf(metadata); + return this; + } } } diff --git a/src/test/java/dev/braintrust/eval/EvalTest.java b/src/test/java/dev/braintrust/eval/EvalTest.java index 0134795..29687ab 100644 --- a/src/test/java/dev/braintrust/eval/EvalTest.java +++ b/src/test/java/dev/braintrust/eval/EvalTest.java @@ -6,6 +6,7 @@ import dev.braintrust.Origin; import dev.braintrust.TestHarness; +import dev.braintrust.VCR; import dev.braintrust.api.BraintrustApiClient; import dev.braintrust.trace.BraintrustTracing; import io.opentelemetry.api.common.AttributeKey; @@ -262,4 +263,48 @@ public void evalRootSpanPassesOriginIfPresent() { } assertEquals(2, numRootSpans.get(), "should test for origin presence and absence"); } + + @Test + @SneakyThrows + void evalWithExperimentTagsAndMetadata() { + // This test requires real API calls - skip in replay mode + if (TestHarness.getVcrMode() == VCR.VcrMode.REPLAY) { + // TODO: need a vcr solution for dynamically created objects + return; + } + + var experimentName = "unit-test-eval-experiment-tags-metadata"; + var expectedTags = List.of("java-sdk", "unit-test"); + var expectedMetadata = Map.of("model", "gpt-4o", "version", "1.0"); + + var eval = + testHarness + .braintrust() + .evalBuilder() + .name(experimentName) + .cases(DatasetCase.of("strawberry", "fruit")) + .taskFunction(food -> "fruit") + .scorers(Scorer.of("scorer", result -> 1.0)) + .tags(expectedTags) + .metadata(expectedMetadata) + .build(); + + eval.run(); + testHarness.awaitExportedSpans(); + + // Query the experiment from Braintrust API to verify tags and metadata + var experiments = + testHarness + .braintrust() + .apiClient() + .listExperiments(TestHarness.defaultProjectId()); + var experiment = + experiments.stream() + .filter(e -> e.name().equals(experimentName)) + .findFirst() + .orElseThrow(() -> new AssertionError("Experiment not found")); + + assertEquals(expectedTags, experiment.tags(), "Experiment should have tags"); + assertEquals(expectedMetadata, experiment.metadata(), "Experiment should have metadata"); + } }