Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 47 additions & 12 deletions src/main/java/dev/braintrust/api/BraintrustApiClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ public interface BraintrustApiClient {
/** Creates an experiment. */
Experiment getOrCreateExperiment(CreateExperimentRequest request);

/** Lists experiments for a project. */
List<Experiment> listExperiments(String projectId);

/** Get project and org info for the default project ID */
Optional<OrganizationAndProjectInfo> getProjectAndOrgInfo();

Expand Down Expand Up @@ -157,6 +160,17 @@ public Experiment getOrCreateExperiment(CreateExperimentRequest request) {
}
}

@Override
public List<Experiment> listExperiments(String projectId) {
try {
return getAsync("/v1/experiment?project_id=" + projectId, ExperimentList.class)
.get()
.objects();
} catch (InterruptedException | ExecutionException e) {
throw new ApiException(e);
}
}

@Override
public LoginResponse login() throws LoginException {
try {
Expand Down Expand Up @@ -546,15 +560,26 @@ public Experiment getOrCreateExperiment(CreateExperimentRequest request) {
experiments.stream()
.filter(exp -> exp.name().equals(request.name()))
.findFirst();
return existing.orElseGet(
() ->
new Experiment(
request.name().hashCode() + "",
request.projectId(),
request.name(),
request.description(),
"notused",
"notused"));
if (existing.isPresent()) {
return existing.get();
}
var newExperiment =
new Experiment(
request.name().hashCode() + "",
request.projectId(),
request.name(),
request.description(),
request.tags().orElse(List.of()),
request.metadata().orElse(Map.of()),
"notused",
"notused");
experiments.add(newExperiment);
return newExperiment;
}

@Override
public List<Experiment> listExperiments(String projectId) {
return experiments.stream().filter(exp -> exp.projectId().equals(projectId)).toList();
}

@Override
Expand Down Expand Up @@ -701,16 +726,24 @@ record Project(String id, String name, String orgId, String createdAt, String up

record ProjectList(List<Project> projects) {}

record ExperimentList(List<Experiment> experiments) {}
record ExperimentList(List<Experiment> objects) {}

record CreateExperimentRequest(
String projectId,
String name,
Optional<String> description,
Optional<String> baseExperimentId) {
Optional<String> baseExperimentId,
Optional<List<String>> tags,
Optional<Map<String, Object>> metadata) {

public CreateExperimentRequest(String projectId, String name) {
this(projectId, name, Optional.empty(), Optional.empty());
this(
projectId,
name,
Optional.empty(),
Optional.empty(),
Optional.empty(),
Optional.empty());
}
}

Expand All @@ -719,6 +752,8 @@ record Experiment(
String projectId,
String name,
Optional<String> description,
List<String> tags,
Map<String, Object> metadata,
String createdAt,
String updatedAt) {}

Expand Down
28 changes: 27 additions & 1 deletion src/main/java/dev/braintrust/eval/Eval.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public final class Eval<INPUT, OUTPUT> {
private final @Nonnull Dataset<INPUT, OUTPUT> dataset;
private final @Nonnull Task<INPUT, OUTPUT> task;
private final @Nonnull List<Scorer<INPUT, OUTPUT>> scorers;
private final @Nonnull List<String> tags;
private final @Nonnull Map<String, Object> metadata;

private Eval(Builder<INPUT, OUTPUT> builder) {
this.experimentName = builder.experimentName;
Expand All @@ -52,6 +54,8 @@ private Eval(Builder<INPUT, OUTPUT> builder) {
this.dataset = builder.dataset;
this.task = Objects.requireNonNull(builder.task);
this.scorers = List.copyOf(builder.scorers);
this.tags = List.copyOf(builder.tags);
this.metadata = Map.copyOf(builder.metadata);
}

/** Runs the evaluation and returns results. */
Expand All @@ -62,7 +66,9 @@ public EvalResult run() {
orgAndProject.project().id(),
experimentName,
Optional.empty(),
Optional.empty()));
Optional.empty(),
tags.isEmpty() ? Optional.empty() : Optional.of(tags),
metadata.isEmpty() ? Optional.empty() : Optional.of(metadata)));
dataset.forEach(datasetCase -> evalOne(experiment.id(), datasetCase));
var experimentUrl =
"%s/experiments/%s"
Expand Down Expand Up @@ -163,6 +169,8 @@ public static final class Builder<INPUT, OUTPUT> {
private @Nullable Tracer tracer = null;
private @Nullable Task<INPUT, OUTPUT> task;
private @Nonnull List<Scorer<INPUT, OUTPUT>> scorers = List.of();
private @Nonnull List<String> tags = List.of();
private @Nonnull Map<String, Object> metadata = Map.of();

public Eval<INPUT, OUTPUT> build() {
if (config == null) {
Expand Down Expand Up @@ -256,5 +264,23 @@ public final Builder<INPUT, OUTPUT> scorers(Scorer<INPUT, OUTPUT>... scorers) {
this.scorers = List.of(scorers);
return this;
}

/** Sets tags for the experiment. */
public Builder<INPUT, OUTPUT> tags(List<String> tags) {
this.tags = List.copyOf(tags);
return this;
}

/** Sets tags for the experiment (varargs convenience method). */
public Builder<INPUT, OUTPUT> tags(String... tags) {
this.tags = List.of(tags);
return this;
}

/** Sets metadata for the experiment. */
public Builder<INPUT, OUTPUT> metadata(Map<String, Object> metadata) {
this.metadata = Map.copyOf(metadata);
return this;
}
}
}
45 changes: 45 additions & 0 deletions src/test/java/dev/braintrust/eval/EvalTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import dev.braintrust.Origin;
import dev.braintrust.TestHarness;
import dev.braintrust.VCR;
import dev.braintrust.api.BraintrustApiClient;
import dev.braintrust.trace.BraintrustTracing;
import io.opentelemetry.api.common.AttributeKey;
Expand Down Expand Up @@ -262,4 +263,48 @@ public void evalRootSpanPassesOriginIfPresent() {
}
assertEquals(2, numRootSpans.get(), "should test for origin presence and absence");
}

@Test
@SneakyThrows
void evalWithExperimentTagsAndMetadata() {
// This test requires real API calls - skip in replay mode
if (TestHarness.getVcrMode() == VCR.VcrMode.REPLAY) {
// TODO: need a vcr solution for dynamically created objects
return;
}

var experimentName = "unit-test-eval-experiment-tags-metadata";
var expectedTags = List.of("java-sdk", "unit-test");
var expectedMetadata = Map.<String, Object>of("model", "gpt-4o", "version", "1.0");

var eval =
testHarness
.braintrust()
.<String, String>evalBuilder()
.name(experimentName)
.cases(DatasetCase.of("strawberry", "fruit"))
.taskFunction(food -> "fruit")
.scorers(Scorer.of("scorer", result -> 1.0))
.tags(expectedTags)
.metadata(expectedMetadata)
.build();

eval.run();
testHarness.awaitExportedSpans();

// Query the experiment from Braintrust API to verify tags and metadata
var experiments =
testHarness
.braintrust()
.apiClient()
.listExperiments(TestHarness.defaultProjectId());
var experiment =
experiments.stream()
.filter(e -> e.name().equals(experimentName))
.findFirst()
.orElseThrow(() -> new AssertionError("Experiment not found"));

assertEquals(expectedTags, experiment.tags(), "Experiment should have tags");
assertEquals(expectedMetadata, experiment.metadata(), "Experiment should have metadata");
}
}