From d00281667dca7855ee355aa11900b0766c725b3c Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 16:41:58 -0700 Subject: [PATCH 1/2] chore: Fix e2e test setup --- e2e/helpers/scenario-harness.ts | 35 +++++++++++++++++++++++++++++-- e2e/helpers/scenario-installer.ts | 1 + 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/e2e/helpers/scenario-harness.ts b/e2e/helpers/scenario-harness.ts index 8148bf6b7..248ab257e 100644 --- a/e2e/helpers/scenario-harness.ts +++ b/e2e/helpers/scenario-harness.ts @@ -32,6 +32,7 @@ interface ScenarioResult { const tsxCliPath = createRequire(import.meta.url).resolve("tsx/cli"); const DENO_COMMAND = process.platform === "win32" ? "deno.exe" : "deno"; +const MISE_COMMAND = process.platform === "win32" ? "mise.exe" : "mise"; const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000; const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = path.resolve(HELPERS_DIR, "../.."); @@ -285,6 +286,37 @@ async function runProcess( }); } +function isSpawnEnoent(error: unknown): boolean { + return ( + error instanceof Error && + "code" in error && + (error as NodeJS.ErrnoException).code === "ENOENT" + ); +} + +async function runDenoProcess( + args: string[], + cwd: string, + env: Record, + timeoutMs: number, +): Promise { + try { + return await runProcess(DENO_COMMAND, args, cwd, env, timeoutMs); + } catch (error) { + if (!isSpawnEnoent(error)) { + throw error; + } + + return await runProcess( + MISE_COMMAND, + ["exec", "--", "deno", ...args], + cwd, + env, + timeoutMs, + ); + } +} + function resolveEntryPath(scenarioDir: string, entry: string): string { return path.join(scenarioDir, entry); } @@ -365,8 +397,7 @@ export async function runDenoScenarioDir(options: { timeoutMs?: number; }): Promise { const entry = options.entry ?? "runner.case.ts"; - const result = await runProcess( - DENO_COMMAND, + const result = await runDenoProcess( [ "test", "--no-check", diff --git a/e2e/helpers/scenario-installer.ts b/e2e/helpers/scenario-installer.ts index 9eb7b19c2..aafe71483 100644 --- a/e2e/helpers/scenario-installer.ts +++ b/e2e/helpers/scenario-installer.ts @@ -297,6 +297,7 @@ export async function installScenarioDependencies({ scenarioDir, "--ignore-workspace", "--frozen-lockfile", + "--ignore-scripts=false", "--strict-peer-dependencies=false", ]; if (preferOffline) { From dcb59a0c244087d5fa285e155d35278e9efc9099 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 17:08:07 -0700 Subject: [PATCH 2/2] normalize models - too flakey --- .../huggingface-v281.log-payloads.json | 16 ++++---- .../huggingface-v281.span-events.json | 10 ++--- .../huggingface-v3150.log-payloads.json | 18 ++++---- .../huggingface-v3150.span-events.json | 12 +++--- .../huggingface-v41315.log-payloads.json | 18 ++++---- .../huggingface-v41315.span-events.json | 12 +++--- .../huggingface-instrumentation/assertions.ts | 41 ++++++++++++++----- 7 files changed, 73 insertions(+), 54 deletions(-) diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json index 72d0b4046..1b38f452b 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json @@ -92,7 +92,7 @@ "metadata": { "endpointUrl": "https://router.huggingface.co", "max_tokens": 16, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "huggingface", "temperature": 0 }, @@ -137,7 +137,7 @@ "metadata": { "created": 0, "id": "", - "model": "llama3.1-8b", + "model": "", "object": "chat.completion" }, "output": [ @@ -225,7 +225,7 @@ "metadata": { "endpointUrl": "https://router.huggingface.co", "max_tokens": 16, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "huggingface", "temperature": 0 }, @@ -271,7 +271,7 @@ "metadata": { "created": 0, "id": "", - "model": "llama3.1-8b", + "model": "", "object": "chat.completion.chunk" }, "output": { @@ -361,7 +361,7 @@ "metadata": { "endpointUrl": "https://router.huggingface.co", "max_tokens": 64, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0, "tool_choice": "required", @@ -429,7 +429,7 @@ "metadata": { "created": 0, "id": "", - "model": "llama3.1-8b", + "model": "", "object": "chat.completion.chunk" }, "output": { @@ -514,7 +514,7 @@ "metadata": { "endpointUrl": "https://router.huggingface.co/featherless-ai/v1/completions", "max_tokens": 4, - "model": "arcee-ai/Trinity-Large-Thinking", + "model": "", "provider": "huggingface" }, "metrics": { @@ -631,7 +631,7 @@ "log_id": "g", "metadata": { "endpointUrl": "https://router.huggingface.co/hf-inference/models/thenlper/gte-large/pipeline/feature-extraction", - "model": "thenlper/gte-large", + "model": "", "provider": "huggingface" }, "metrics": { diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json index ad0793623..320b17790 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json @@ -32,7 +32,7 @@ "has_output": true, "metadata": { "endpointUrl": "https://router.huggingface.co", - "model": "llama3.1-8b", + "model": "", "provider": "huggingface" }, "metric_keys": [ @@ -80,7 +80,7 @@ "has_output": true, "metadata": { "endpointUrl": "https://router.huggingface.co", - "model": "llama3.1-8b", + "model": "", "provider": "huggingface" }, "metric_keys": [ @@ -122,7 +122,7 @@ "has_output": true, "metadata": { "endpointUrl": "https://router.huggingface.co", - "model": "llama3.1-8b", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -167,7 +167,7 @@ "metadata": { "endpointUrl": "https://router.huggingface.co/featherless-ai/v1/completions", "finish_reason": "length", - "model": "arcee-ai/Trinity-Large-Thinking", + "model": "", "provider": "huggingface" }, "metric_keys": [ @@ -208,7 +208,7 @@ "has_output": true, "metadata": { "endpointUrl": "https://router.huggingface.co/hf-inference/models/thenlper/gte-large/pipeline/feature-extraction", - "model": "thenlper/gte-large", + "model": "", "provider": "huggingface" }, "metric_keys": [], diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json index 8c75b57ba..0070b1685 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json @@ -91,7 +91,7 @@ "log_id": "g", "metadata": { "max_tokens": 16, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0 }, @@ -133,7 +133,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "object": "chat.completion" }, "output": [ @@ -220,7 +220,7 @@ "log_id": "g", "metadata": { "max_tokens": 16, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0 }, @@ -263,7 +263,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "object": "chat.completion.chunk" }, "output": { @@ -352,7 +352,7 @@ "log_id": "g", "metadata": { "max_tokens": 64, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0, "tool_choice": "required", @@ -417,7 +417,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "object": "chat.completion.chunk" }, "output": { @@ -500,7 +500,7 @@ "input": "The capital of France is", "log_id": "g", "metadata": { - "model": "meta-llama/Llama-3.1-8B", + "model": "", "parameters": { "do_sample": false, "max_new_tokens": 4, @@ -601,7 +601,7 @@ "input": "The capital of France is", "log_id": "g", "metadata": { - "model": "meta-llama/Llama-3.1-8B", + "model": "", "parameters": { "do_sample": false, "max_new_tokens": 4, @@ -722,7 +722,7 @@ "input": "Paris France", "log_id": "g", "metadata": { - "model": "thenlper/gte-large", + "model": "", "provider": "hf-inference" }, "metrics": { diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json index 1fb8555b4..f8c10443e 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -74,7 +74,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -111,7 +111,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -148,7 +148,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Llama-3.1-8B", + "model": "", "provider": "featherless-ai" }, "metric_keys": [], @@ -183,7 +183,7 @@ "has_output": true, "metadata": { "finish_reason": "length", - "model": "meta-llama/Llama-3.1-8B", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -223,7 +223,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "thenlper/gte-large", + "model": "", "provider": "hf-inference" }, "metric_keys": [], diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json index 8c75b57ba..0070b1685 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json @@ -91,7 +91,7 @@ "log_id": "g", "metadata": { "max_tokens": 16, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0 }, @@ -133,7 +133,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "object": "chat.completion" }, "output": [ @@ -220,7 +220,7 @@ "log_id": "g", "metadata": { "max_tokens": 16, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0 }, @@ -263,7 +263,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "object": "chat.completion.chunk" }, "output": { @@ -352,7 +352,7 @@ "log_id": "g", "metadata": { "max_tokens": 64, - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai", "temperature": 0, "tool_choice": "required", @@ -417,7 +417,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "object": "chat.completion.chunk" }, "output": { @@ -500,7 +500,7 @@ "input": "The capital of France is", "log_id": "g", "metadata": { - "model": "meta-llama/Llama-3.1-8B", + "model": "", "parameters": { "do_sample": false, "max_new_tokens": 4, @@ -601,7 +601,7 @@ "input": "The capital of France is", "log_id": "g", "metadata": { - "model": "meta-llama/Llama-3.1-8B", + "model": "", "parameters": { "do_sample": false, "max_new_tokens": 4, @@ -722,7 +722,7 @@ "input": "Paris France", "log_id": "g", "metadata": { - "model": "thenlper/gte-large", + "model": "", "provider": "hf-inference" }, "metrics": { diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json index 1fb8555b4..f8c10443e 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -74,7 +74,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -111,7 +111,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -148,7 +148,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Llama-3.1-8B", + "model": "", "provider": "featherless-ai" }, "metric_keys": [], @@ -183,7 +183,7 @@ "has_output": true, "metadata": { "finish_reason": "length", - "model": "meta-llama/Llama-3.1-8B", + "model": "", "provider": "featherless-ai" }, "metric_keys": [ @@ -223,7 +223,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "thenlper/gte-large", + "model": "", "provider": "hf-inference" }, "metric_keys": [], diff --git a/e2e/scenarios/huggingface-instrumentation/assertions.ts b/e2e/scenarios/huggingface-instrumentation/assertions.ts index b28787953..991ba108a 100644 --- a/e2e/scenarios/huggingface-instrumentation/assertions.ts +++ b/e2e/scenarios/huggingface-instrumentation/assertions.ts @@ -181,20 +181,39 @@ function normalizeMetrics(value: Json): Json { return normalized; } +function normalizeModelNames(value: Json): Json { + if (Array.isArray(value)) { + return value.map((entry) => normalizeModelNames(entry as Json)); + } + + if (!isRecord(value)) { + return value; + } + + const normalized: Record = {}; + for (const [key, entry] of Object.entries(value)) { + normalized[key] = + key === "model" ? "" : normalizeModelNames(entry as Json); + } + return normalized; +} + function normalizePayloadOutput(row: Json): Json { if (!isRecord(row)) { return row; } - return "output" in row - ? { - ...row, - output: normalizeLoggedOutput(row.output, { - normalizeFinishReason: true, - omitToolCalls: true, - }), - } - : row; + const normalized = + "output" in row + ? { + ...row, + output: normalizeLoggedOutput(row.output, { + normalizeFinishReason: true, + omitToolCalls: true, + }), + } + : row; + return normalizeModelNames(normalized); } function normalizeLoggedOutput( @@ -250,7 +269,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { "huggingface-feature-extraction-operation", ); - return [ + return normalizeModelNames([ root ? summarizeWrapperContract(root, ["scenario"]) : null, chatOperation ? summarizeWrapperContract(chatOperation, ["operation"]) @@ -324,7 +343,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { )!, ) : null, - ] satisfies Json; + ] satisfies Json); } export function defineHuggingFaceInstrumentationAssertions(options: {