From d00281667dca7855ee355aa11900b0766c725b3c Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 16:41:58 -0700
Subject: [PATCH 1/2] chore: Fix e2e test setup

---
 e2e/helpers/scenario-harness.ts   | 35 +++++++++++++++++++++++++++++--
 e2e/helpers/scenario-installer.ts |  1 +
 2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/e2e/helpers/scenario-harness.ts b/e2e/helpers/scenario-harness.ts
index 8148bf6b7..248ab257e 100644
--- a/e2e/helpers/scenario-harness.ts
+++ b/e2e/helpers/scenario-harness.ts
@@ -32,6 +32,7 @@ interface ScenarioResult {
 
 const tsxCliPath = createRequire(import.meta.url).resolve("tsx/cli");
 const DENO_COMMAND = process.platform === "win32" ? "deno.exe" : "deno";
+const MISE_COMMAND = process.platform === "win32" ? "mise.exe" : "mise";
 const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000;
 const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url));
 const REPO_ROOT = path.resolve(HELPERS_DIR, "../..");
@@ -285,6 +286,37 @@ async function runProcess(
   });
 }
 
+function isSpawnEnoent(error: unknown): boolean {
+  return (
+    error instanceof Error &&
+    "code" in error &&
+    (error as NodeJS.ErrnoException).code === "ENOENT"
+  );
+}
+
+async function runDenoProcess(
+  args: string[],
+  cwd: string,
+  env: Record<string, string>,
+  timeoutMs: number,
+): Promise<ScenarioResult> {
+  try {
+    return await runProcess(DENO_COMMAND, args, cwd, env, timeoutMs);
+  } catch (error) {
+    if (!isSpawnEnoent(error)) {
+      throw error;
+    }
+
+    return await runProcess(
+      MISE_COMMAND,
+      ["exec", "--", "deno", ...args],
+      cwd,
+      env,
+      timeoutMs,
+    );
+  }
+}
+
 function resolveEntryPath(scenarioDir: string, entry: string): string {
   return path.join(scenarioDir, entry);
 }
@@ -365,8 +397,7 @@ export async function runDenoScenarioDir(options: {
   timeoutMs?: number;
 }): Promise<ScenarioResult> {
   const entry = options.entry ?? "runner.case.ts";
-  const result = await runProcess(
-    DENO_COMMAND,
+  const result = await runDenoProcess(
     [
       "test",
       "--no-check",
diff --git a/e2e/helpers/scenario-installer.ts b/e2e/helpers/scenario-installer.ts
index 9eb7b19c2..aafe71483 100644
--- a/e2e/helpers/scenario-installer.ts
+++ b/e2e/helpers/scenario-installer.ts
@@ -297,6 +297,7 @@ export async function installScenarioDependencies({
     scenarioDir,
     "--ignore-workspace",
     "--frozen-lockfile",
+    "--ignore-scripts=false",
     "--strict-peer-dependencies=false",
   ];
   if (preferOffline) {

From dcb59a0c244087d5fa285e155d35278e9efc9099 Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 17:08:07 -0700
Subject: [PATCH 2/2] normalize models - too flakey

---
 .../huggingface-v281.log-payloads.json        | 16 ++++----
 .../huggingface-v281.span-events.json         | 10 ++---
 .../huggingface-v3150.log-payloads.json       | 18 ++++----
 .../huggingface-v3150.span-events.json        | 12 +++---
 .../huggingface-v41315.log-payloads.json      | 18 ++++----
 .../huggingface-v41315.span-events.json       | 12 +++---
 .../huggingface-instrumentation/assertions.ts | 41 ++++++++++++++-----
 7 files changed, 73 insertions(+), 54 deletions(-)

diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json
index 72d0b4046..1b38f452b 100644
--- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json
+++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json
@@ -92,7 +92,7 @@
     "metadata": {
       "endpointUrl": "https://router.huggingface.co",
       "max_tokens": 16,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "huggingface",
       "temperature": 0
     },
@@ -137,7 +137,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:8>",
-      "model": "llama3.1-8b",
+      "model": "<model>",
       "object": "chat.completion"
     },
     "output": [
@@ -225,7 +225,7 @@
     "metadata": {
       "endpointUrl": "https://router.huggingface.co",
       "max_tokens": 16,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "huggingface",
       "temperature": 0
     },
@@ -271,7 +271,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:13>",
-      "model": "llama3.1-8b",
+      "model": "<model>",
       "object": "chat.completion.chunk"
     },
     "output": {
@@ -361,7 +361,7 @@
     "metadata": {
       "endpointUrl": "https://router.huggingface.co",
       "max_tokens": 64,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0,
       "tool_choice": "required",
@@ -429,7 +429,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:18>",
-      "model": "llama3.1-8b",
+      "model": "<model>",
       "object": "chat.completion.chunk"
     },
     "output": {
@@ -514,7 +514,7 @@
     "metadata": {
       "endpointUrl": "https://router.huggingface.co/featherless-ai/v1/completions",
       "max_tokens": 4,
-      "model": "arcee-ai/Trinity-Large-Thinking",
+      "model": "<model>",
       "provider": "huggingface"
     },
     "metrics": {
@@ -631,7 +631,7 @@
     "log_id": "g",
     "metadata": {
       "endpointUrl": "https://router.huggingface.co/hf-inference/models/thenlper/gte-large/pipeline/feature-extraction",
-      "model": "thenlper/gte-large",
+      "model": "<model>",
       "provider": "huggingface"
     },
     "metrics": {
diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json
index ad0793623..320b17790 100644
--- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json
+++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json
@@ -32,7 +32,7 @@
     "has_output": true,
     "metadata": {
       "endpointUrl": "https://router.huggingface.co",
-      "model": "llama3.1-8b",
+      "model": "<model>",
       "provider": "huggingface"
     },
     "metric_keys": [
@@ -80,7 +80,7 @@
     "has_output": true,
     "metadata": {
       "endpointUrl": "https://router.huggingface.co",
-      "model": "llama3.1-8b",
+      "model": "<model>",
       "provider": "huggingface"
     },
     "metric_keys": [
@@ -122,7 +122,7 @@
     "has_output": true,
     "metadata": {
       "endpointUrl": "https://router.huggingface.co",
-      "model": "llama3.1-8b",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -167,7 +167,7 @@
     "metadata": {
       "endpointUrl": "https://router.huggingface.co/featherless-ai/v1/completions",
       "finish_reason": "length",
-      "model": "arcee-ai/Trinity-Large-Thinking",
+      "model": "<model>",
       "provider": "huggingface"
     },
     "metric_keys": [
@@ -208,7 +208,7 @@
     "has_output": true,
     "metadata": {
       "endpointUrl": "https://router.huggingface.co/hf-inference/models/thenlper/gte-large/pipeline/feature-extraction",
-      "model": "thenlper/gte-large",
+      "model": "<model>",
       "provider": "huggingface"
     },
     "metric_keys": [],
diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json
index 8c75b57ba..0070b1685 100644
--- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json
+++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json
@@ -91,7 +91,7 @@
     "log_id": "g",
     "metadata": {
       "max_tokens": 16,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0
     },
@@ -133,7 +133,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:8>",
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "object": "chat.completion"
     },
     "output": [
@@ -220,7 +220,7 @@
     "log_id": "g",
     "metadata": {
       "max_tokens": 16,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0
     },
@@ -263,7 +263,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:13>",
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "object": "chat.completion.chunk"
     },
     "output": {
@@ -352,7 +352,7 @@
     "log_id": "g",
     "metadata": {
       "max_tokens": 64,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0,
       "tool_choice": "required",
@@ -417,7 +417,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:18>",
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "object": "chat.completion.chunk"
     },
     "output": {
@@ -500,7 +500,7 @@
     "input": "The capital of France is",
     "log_id": "g",
     "metadata": {
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "parameters": {
         "do_sample": false,
         "max_new_tokens": 4,
@@ -601,7 +601,7 @@
     "input": "The capital of France is",
     "log_id": "g",
     "metadata": {
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "parameters": {
         "do_sample": false,
         "max_new_tokens": 4,
@@ -722,7 +722,7 @@
     "input": "Paris France",
     "log_id": "g",
     "metadata": {
-      "model": "thenlper/gte-large",
+      "model": "<model>",
       "provider": "hf-inference"
     },
     "metrics": {
diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json
index 1fb8555b4..f8c10443e 100644
--- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json
+++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json
@@ -31,7 +31,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -74,7 +74,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -111,7 +111,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -148,7 +148,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [],
@@ -183,7 +183,7 @@
     "has_output": true,
     "metadata": {
       "finish_reason": "length",
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -223,7 +223,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "thenlper/gte-large",
+      "model": "<model>",
       "provider": "hf-inference"
     },
     "metric_keys": [],
diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json
index 8c75b57ba..0070b1685 100644
--- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json
+++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json
@@ -91,7 +91,7 @@
     "log_id": "g",
     "metadata": {
       "max_tokens": 16,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0
     },
@@ -133,7 +133,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:8>",
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "object": "chat.completion"
     },
     "output": [
@@ -220,7 +220,7 @@
     "log_id": "g",
     "metadata": {
       "max_tokens": 16,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0
     },
@@ -263,7 +263,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:13>",
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "object": "chat.completion.chunk"
     },
     "output": {
@@ -352,7 +352,7 @@
     "log_id": "g",
     "metadata": {
       "max_tokens": 64,
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai",
       "temperature": 0,
       "tool_choice": "required",
@@ -417,7 +417,7 @@
     "metadata": {
       "created": 0,
       "id": "<span:18>",
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "object": "chat.completion.chunk"
     },
     "output": {
@@ -500,7 +500,7 @@
     "input": "The capital of France is",
     "log_id": "g",
     "metadata": {
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "parameters": {
         "do_sample": false,
         "max_new_tokens": 4,
@@ -601,7 +601,7 @@
     "input": "The capital of France is",
     "log_id": "g",
     "metadata": {
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "parameters": {
         "do_sample": false,
         "max_new_tokens": 4,
@@ -722,7 +722,7 @@
     "input": "Paris France",
     "log_id": "g",
     "metadata": {
-      "model": "thenlper/gte-large",
+      "model": "<model>",
       "provider": "hf-inference"
     },
     "metrics": {
diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json
index 1fb8555b4..f8c10443e 100644
--- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json
+++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json
@@ -31,7 +31,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -74,7 +74,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -111,7 +111,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -148,7 +148,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [],
@@ -183,7 +183,7 @@
     "has_output": true,
     "metadata": {
       "finish_reason": "length",
-      "model": "meta-llama/Llama-3.1-8B",
+      "model": "<model>",
       "provider": "featherless-ai"
     },
     "metric_keys": [
@@ -223,7 +223,7 @@
     "has_input": true,
     "has_output": true,
     "metadata": {
-      "model": "thenlper/gte-large",
+      "model": "<model>",
       "provider": "hf-inference"
     },
     "metric_keys": [],
diff --git a/e2e/scenarios/huggingface-instrumentation/assertions.ts b/e2e/scenarios/huggingface-instrumentation/assertions.ts
index b28787953..991ba108a 100644
--- a/e2e/scenarios/huggingface-instrumentation/assertions.ts
+++ b/e2e/scenarios/huggingface-instrumentation/assertions.ts
@@ -181,20 +181,39 @@ function normalizeMetrics(value: Json): Json {
   return normalized;
 }
 
+function normalizeModelNames(value: Json): Json {
+  if (Array.isArray(value)) {
+    return value.map((entry) => normalizeModelNames(entry as Json));
+  }
+
+  if (!isRecord(value)) {
+    return value;
+  }
+
+  const normalized: Record<string, Json> = {};
+  for (const [key, entry] of Object.entries(value)) {
+    normalized[key] =
+      key === "model" ? "<model>" : normalizeModelNames(entry as Json);
+  }
+  return normalized;
+}
+
 function normalizePayloadOutput(row: Json): Json {
   if (!isRecord(row)) {
     return row;
   }
 
-  return "output" in row
-    ? {
-        ...row,
-        output: normalizeLoggedOutput(row.output, {
-          normalizeFinishReason: true,
-          omitToolCalls: true,
-        }),
-      }
-    : row;
+  const normalized =
+    "output" in row
+      ? {
+          ...row,
+          output: normalizeLoggedOutput(row.output, {
+            normalizeFinishReason: true,
+            omitToolCalls: true,
+          }),
+        }
+      : row;
+  return normalizeModelNames(normalized);
 }
 
 function normalizeLoggedOutput(
@@ -250,7 +269,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
     "huggingface-feature-extraction-operation",
   );
 
-  return [
+  return normalizeModelNames([
     root ? summarizeWrapperContract(root, ["scenario"]) : null,
     chatOperation
       ? summarizeWrapperContract(chatOperation, ["operation"])
@@ -324,7 +343,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
           )!,
         )
       : null,
-  ] satisfies Json;
+  ] satisfies Json);
 }
 
 export function defineHuggingFaceInstrumentationAssertions(options: {