diff --git a/internal-api/src/jmh/java/datadog/trace/util/ImmutableSetBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ImmutableSetBenchmark.java
new file mode 100644
index 00000000000..13312c6c576
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/ImmutableSetBenchmark.java
@@ -0,0 +1,186 @@
+package datadog.trace.util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.TreeSet;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+/**
+ * Membership over a small, fixed, read-only string set shared across threads — split into hit and
+ * miss lookups (different cost shapes per structure).
+ *
+ *
The set is built once and only read, so a single shared instance ({@link Scope#Benchmark})
+ * read by all {@code @Threads} is realistic and contention-free. This is the read-mostly
+ * counterpart to the per-thread mutable {@link SingleThreadedSetBenchmark}, and mirrors {@link
+ * ImmutableMapBenchmark} on the set side. Sets in the tracer skew strongly toward this fixed,
+ * read-only shape.
+ *
+ *
Strategies compared:
+ *
+ *
+ * - {@code array} / {@code sortedArray} — linear scan / binary search; slow on miss.
+ *
- {@link HashSet} — idiomatic, fast; node-based, allocates per element.
+ *
- {@link TreeSet} — comparator-ordered; worth it only for a custom comparator, not speed.
+ *
- {@link java.util.Set#copyOf} (via {@link CollectionUtils#tryMakeImmutableSet}) — the JDK's
+ * compact, array-backed immutable set ({@code ImmutableCollections.SetN}), which is what the
+ * agent actually uses for fixed config sets. Java 10+; falls back to {@code HashSet} pre-10.
+ * The realistic baseline for any flat/immutable set comparison.
+ *
+ *
+ * Lookups are interned (the {@code ==} fast path where a structure has one); misses are short
+ * and never present.
+ *
+ *
Java 17 results (Apple M1, {@code @Fork(2)}, {@code @Threads(8)}; M ops/s = millions):
+ *
+ *
{@code
+ * Structure hit miss
+ * hashSet 2159 1751 (fastest)
+ * copyOf (SetN) 1946 1633
+ * array 926 584
+ * sortedArray 664 588
+ * treeSet 642 593
+ * }
+ *
+ * Key findings:
+ *
+ *
+ * - {@code HashSet} is fastest; {@link java.util.Set#copyOf} ({@code SetN}) trails by only ~10%
+ * on hit and ~7% on miss — and it's the compact, array-backed form the agent already uses for
+ * fixed config sets, so it's a strong default when the set is immutable.
+ *
- {@code array} / {@code sortedArray} / {@code treeSet} cluster at ~0.6–0.9B — they scan,
+ * binary-search, or tree-walk per lookup, so they trail the hashed structures, most visibly
+ * on the miss path.
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@State(Scope.Benchmark)
+public class ImmutableSetBenchmark {
+ static final String[] STRINGS = {
+ "foo", "bar", "baz", "quux", "hello", "world",
+ "service", "queryString", "lorem", "ipsum", "dolem", "sit"
+ };
+
+ /** Distinct String instances that are never present, for the miss path. */
+ static final String[] MISSES = newMisses();
+
+ static String[] newMisses() {
+ String[] misses = new String[STRINGS.length * 4];
+ for (int i = 0; i < misses.length; ++i) {
+ misses[i] = "dne-" + i;
+ }
+ return misses;
+ }
+
+ // Built once, never mutated -- safe to share across the reader threads.
+ String[] array;
+ String[] sortedArray;
+ HashSet hashSet;
+ TreeSet treeSet;
+ Set copyOfSet;
+
+ @Setup(Level.Trial)
+ public void setUp() {
+ array = STRINGS;
+ sortedArray = Arrays.copyOf(STRINGS, STRINGS.length);
+ Arrays.sort(sortedArray);
+ hashSet = new HashSet<>(Arrays.asList(STRINGS));
+ treeSet = new TreeSet<>(Arrays.asList(STRINGS));
+ copyOfSet = CollectionUtils.tryMakeImmutableSet(Arrays.asList(STRINGS));
+ }
+
+ /** Per-thread lookup cursor so each reader thread cycles keys independently. */
+ @State(Scope.Thread)
+ public static class Cursor {
+ int hitIndex = 0;
+ int missIndex = 0;
+
+ String nextHit() {
+ int i = hitIndex + 1;
+ if (i >= STRINGS.length) {
+ i = 0;
+ }
+ hitIndex = i;
+ return STRINGS[i];
+ }
+
+ String nextMiss() {
+ int i = missIndex + 1;
+ if (i >= MISSES.length) {
+ i = 0;
+ }
+ missIndex = i;
+ return MISSES[i];
+ }
+ }
+
+ static boolean arrayContains(String[] array, String needle) {
+ for (String s : array) {
+ if (needle.equals(s)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Benchmark
+ public boolean array_hit(Cursor cursor) {
+ return arrayContains(array, cursor.nextHit());
+ }
+
+ @Benchmark
+ public boolean array_miss(Cursor cursor) {
+ return arrayContains(array, cursor.nextMiss());
+ }
+
+ @Benchmark
+ public boolean sortedArray_hit(Cursor cursor) {
+ return Arrays.binarySearch(sortedArray, cursor.nextHit()) >= 0;
+ }
+
+ @Benchmark
+ public boolean sortedArray_miss(Cursor cursor) {
+ return Arrays.binarySearch(sortedArray, cursor.nextMiss()) >= 0;
+ }
+
+ @Benchmark
+ public boolean hashSet_hit(Cursor cursor) {
+ return hashSet.contains(cursor.nextHit());
+ }
+
+ @Benchmark
+ public boolean hashSet_miss(Cursor cursor) {
+ return hashSet.contains(cursor.nextMiss());
+ }
+
+ @Benchmark
+ public boolean treeSet_hit(Cursor cursor) {
+ return treeSet.contains(cursor.nextHit());
+ }
+
+ @Benchmark
+ public boolean treeSet_miss(Cursor cursor) {
+ return treeSet.contains(cursor.nextMiss());
+ }
+
+ @Benchmark
+ public boolean copyOf_hit(Cursor cursor) {
+ return copyOfSet.contains(cursor.nextHit());
+ }
+
+ @Benchmark
+ public boolean copyOf_miss(Cursor cursor) {
+ return copyOfSet.contains(cursor.nextMiss());
+ }
+}
diff --git a/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java
deleted file mode 100644
index 144e4748400..00000000000
--- a/internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package datadog.trace.util;
-
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.TreeSet;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.function.Supplier;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.Fork;
-import org.openjdk.jmh.annotations.Measurement;
-import org.openjdk.jmh.annotations.Threads;
-import org.openjdk.jmh.annotations.Warmup;
-
-/**
- *
- *
- *
- * Benchmark showing possible ways to represent and check if a set includes an elememt...
- * - (RECOMMENDED) HashSet - on par with TreeSet - idiomatic
- *
- (RECOMMENDED) TreeMap - on par with HashSet - better solution if custom comparator is
- * needed (see CaseInsensitiveMapBenchmark)
- *
- array - slower than HashSet
- *
- sortedArray - slowest - slower than array for common case of small arrays
- *
- *
- *
- * MacBook M1 - 8 threads - Java 21
- * 1/3 not found rate
- *
- * Benchmark Mode Cnt Score Error Units
- * SetBenchmark.contains_array thrpt 6 645561886.327 ± 100781717.494 ops/s
- * SetBenchmark.contains_hashSet thrpt 6 1536236680.235 ± 114966961.506 ops/s
- * SetBenchmark.contains_sortedArray thrpt 6 571476939.441 ± 21334620.460 ops/s
- * SetBenchmark.contains_treeSet thrpt 6 1557663759.411 ± 95343683.124 ops/s
- *
- */
-@Fork(2)
-@Warmup(iterations = 2)
-@Measurement(iterations = 3)
-@Threads(8)
-public class SetBenchmark {
- static final String[] STRINGS =
- new String[] {
- "foo",
- "bar",
- "baz",
- "quux",
- "hello",
- "world",
- "service",
- "queryString",
- "lorem",
- "ipsum",
- "dolem",
- "sit"
- };
-
- static T init(Supplier supplier) {
- return supplier.get();
- }
-
- static final String[] LOOKUPS =
- init(
- () -> {
- String[] lookups = Arrays.copyOf(STRINGS, STRINGS.length * 10);
-
- for (int i = 0; i < STRINGS.length; ++i) {
- lookups[STRINGS.length + i] = new String(STRINGS[i]);
- }
-
- // 2 / 3 of the key look-ups miss the set
- for (int i = STRINGS.length * 2; i < lookups.length; ++i) {
- lookups[i] = "dne-" + ThreadLocalRandom.current().nextInt();
- }
-
- Collections.shuffle(Arrays.asList(lookups));
- return lookups;
- });
-
- static int sharedLookupIndex = 0;
-
- static String nextString() {
- int localIndex = ++sharedLookupIndex;
- if (localIndex >= LOOKUPS.length) {
- sharedLookupIndex = localIndex = 0;
- }
- return LOOKUPS[localIndex];
- }
-
- static final String[] ARRAY = STRINGS;
-
- @Benchmark
- public boolean contains_array() {
- String needle = nextString();
- for (String str : ARRAY) {
- if (needle.equals(str)) return true;
- }
- return false;
- }
-
- static final String[] SORTED_ARRAY =
- init(
- () -> {
- String[] sorted = Arrays.copyOf(STRINGS, STRINGS.length);
- Arrays.sort(sorted);
- return sorted;
- });
-
- @Benchmark
- public boolean contains_sortedArray() {
- return (Arrays.binarySearch(SORTED_ARRAY, nextString()) != -1);
- }
-
- static final HashSet HASH_SET = new HashSet<>(Arrays.asList(STRINGS));
-
- @Benchmark
- public boolean contains_hashSet() {
- return HASH_SET.contains(nextString());
- }
-
- static final TreeSet TREE_SET = new TreeSet<>(Arrays.asList(STRINGS));
-
- @Benchmark
- public boolean contains_treeSet() {
- return HASH_SET.contains(nextString());
- }
-}
diff --git a/internal-api/src/jmh/java/datadog/trace/util/SingleThreadedSetBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/SingleThreadedSetBenchmark.java
new file mode 100644
index 00000000000..f9e9b69179a
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/SingleThreadedSetBenchmark.java
@@ -0,0 +1,198 @@
+package datadog.trace.util;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.TreeSet;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Single-threaded (uncontended) set usage: each thread builds, reads, and discards its own
+ * sets. Per-thread state ({@link Scope#Thread}); mirrors {@link SingleThreadedMapBenchmark} on the
+ * set side. Running at {@code @Threads(8)} keeps allocation / GC interactions visible without lock
+ * contention.
+ *
+ * Sets in the tracer skew read-only/fixed (see {@link ImmutableSetBenchmark}); this covers the
+ * mutable-lifecycle case for completeness and — via {@link Collections#synchronizedSet} — the
+ * uncontended synchronization tax. Because each thread owns its synchronized set, the
+ * monitor is only ever locked by one thread: biased locking ≈ free on Java ≤ 11, full uncontended
+ * CAS on Java 15+ (biased locking disabled by default, JEP 374). The unsynchronized {@code hashSet}
+ * {@code contains}/{@code iterate} methods are the in-harness baseline; the tax is the delta.
+ *
+ *
Java 17 results (Apple M1, {@code @Fork(2)}, {@code @Threads(8)}; M ops/s = millions):
+ *
+ *
{@code
+ * contains_hashSet 1291
+ * contains_synchronizedSet 808 (~37% slower — the uncontended sync tax)
+ * iterate_hashSet 91
+ * iterate_synchronizedSet 90 (one monitor acquire amortized over the walk)
+ *
+ * create_hashSet 81 clone_hashSet 48
+ * create_hashSet_sized 78 clone_synchronizedSet 47
+ * create_linkedHashSet 61 clone_linkedHashSet 59
+ * create_synchronizedSet 41 clone_treeSet 83
+ * create_treeSet 36
+ * }
+ *
+ * Key findings:
+ *
+ *
+ * - Uncontended synchronization tax on {@code contains} is ~37% (1291 → 808M ops/s) even
+ * with no contention and biased locking disabled (Java 17, JEP 374) — the full per-lock CAS
+ * cost. On {@code iterate} it nearly vanishes: a single monitor acquire amortized over the
+ * traversal.
+ *
- Construction: {@code TreeSet} is the slowest to build (~36M); the {@code synchronizedSet}
+ * wrapper adds a modest cost over plain {@code HashSet}. (Allocation-path numbers carry more
+ * run-to-run variance than the read paths.)
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@State(Scope.Thread)
+public class SingleThreadedSetBenchmark {
+ static final String[] ELEMENTS = {
+ "foo", "bar", "baz", "quux", "hello", "world",
+ "service", "queryString", "lorem", "ipsum", "dolem", "sit"
+ };
+
+ // Distinct String instances so lookups exercise equals(), not identity.
+ static final String[] EQUAL_ELEMENTS = newEqualElements();
+
+ static String[] newEqualElements() {
+ String[] copies = new String[ELEMENTS.length];
+ for (int i = 0; i < ELEMENTS.length; ++i) {
+ copies[i] = new String(ELEMENTS[i]);
+ }
+ return copies;
+ }
+
+ static void fill(Set set) {
+ for (String s : ELEMENTS) {
+ set.add(s);
+ }
+ }
+
+ // Per-thread prebuilt sets for the read + clone benchmarks (built once per trial, per thread).
+ HashSet hashSet;
+ Set synchronizedSet;
+ TreeSet treeSet;
+ LinkedHashSet linkedHashSet;
+ int index = 0;
+
+ @Setup(Level.Trial)
+ public void setUp() {
+ hashSet = new HashSet<>(Arrays.asList(ELEMENTS));
+ synchronizedSet = Collections.synchronizedSet(new HashSet<>(hashSet));
+ treeSet = new TreeSet<>(Arrays.asList(ELEMENTS));
+ linkedHashSet = new LinkedHashSet<>(Arrays.asList(ELEMENTS));
+ }
+
+ String nextLookup() {
+ if (++index >= EQUAL_ELEMENTS.length) {
+ index = 0;
+ }
+ return EQUAL_ELEMENTS[index];
+ }
+
+ // ---- construction: build cost + allocation ----
+
+ @Benchmark
+ public Set create_hashSet() {
+ HashSet set = new HashSet<>();
+ fill(set);
+ return set;
+ }
+
+ @Benchmark
+ public Set create_hashSet_sized() {
+ HashSet set = new HashSet<>(ELEMENTS.length);
+ fill(set);
+ return set;
+ }
+
+ @Benchmark
+ public Set create_synchronizedSet() {
+ Set set = Collections.synchronizedSet(new HashSet<>());
+ fill(set);
+ return set;
+ }
+
+ @Benchmark
+ public Set create_treeSet() {
+ TreeSet set = new TreeSet<>();
+ fill(set);
+ return set;
+ }
+
+ @Benchmark
+ public Set create_linkedHashSet() {
+ LinkedHashSet set = new LinkedHashSet<>();
+ fill(set);
+ return set;
+ }
+
+ // ---- copy ----
+
+ @Benchmark
+ public Set clone_hashSet() {
+ return new HashSet<>(hashSet);
+ }
+
+ @Benchmark
+ public Set clone_synchronizedSet() {
+ return Collections.synchronizedSet(new HashSet<>(hashSet));
+ }
+
+ @Benchmark
+ public Set clone_treeSet() {
+ return new TreeSet<>(treeSet);
+ }
+
+ @Benchmark
+ public Set clone_linkedHashSet() {
+ return new LinkedHashSet<>(linkedHashSet);
+ }
+
+ // ---- read: unsynchronized baseline vs uncontended synchronized (biased-locking story) ----
+
+ @Benchmark
+ public boolean contains_hashSet() {
+ return hashSet.contains(nextLookup());
+ }
+
+ @Benchmark
+ public boolean contains_synchronizedSet() {
+ return synchronizedSet.contains(nextLookup());
+ }
+
+ @Benchmark
+ public void iterate_hashSet(Blackhole blackhole) {
+ for (String s : hashSet) {
+ blackhole.consume(s);
+ }
+ }
+
+ @Benchmark
+ public void iterate_synchronizedSet(Blackhole blackhole) {
+ // Collections.synchronizedSet requires the caller to synchronize during iteration; this is the
+ // correct usage and measures one (uncontended) monitor acquire around the traversal.
+ synchronized (synchronizedSet) {
+ for (String s : synchronizedSet) {
+ blackhole.consume(s);
+ }
+ }
+ }
+}