diff --git a/.changeset/structured-error-grouping.md b/.changeset/structured-error-grouping.md new file mode 100644 index 00000000000..578ab3d8688 --- /dev/null +++ b/.changeset/structured-error-grouping.md @@ -0,0 +1,7 @@ +--- +'@shopify/cli-kit': patch +--- + +Group CLI crash reports on structured error signals instead of one catch-all bucket + +Bugsnag error grouping now derives a stable `slice:category:signature` hash from the original typed error's HTTP status and GraphQL `extensions.code` (falling back to the shared keyword categorizer, and to Bugsnag's stack-trace grouping for genuinely unknown errors). The same signals are emitted as `error_grouping` metadata so backend routing works regardless of CLI version. Known-transient API errors (HTTP 401/429 and `THROTTLED`) are now treated as expected and kept out of crash reporting. diff --git a/packages/cli-kit/src/private/node/analytics/error-grouping.DESIGN.md b/packages/cli-kit/src/private/node/analytics/error-grouping.DESIGN.md new file mode 100644 index 00000000000..27f85ab76b8 --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/error-grouping.DESIGN.md @@ -0,0 +1,55 @@ +# Design: Structured error grouping for crash reporting + +## Problem + +A single Bugsnag bucket (`groupingHash 16864652937831232783`) is a catch-all for ~1,170 +unrelated GraphQL/API errors across `app`/`theme`/`store`/`hydrogen` (issue #7891). It is +un-routable (five different owning teams), mixes expected/transient noise with genuine +regressions, and manufactures false P1s. Root cause: in `sendErrorToBugsnag` every error is +flattened to `new Error(error.message)` (losing its class/code), no `groupingHash` is set, and +stack traces are uniform — so the backend groups everything together. + +## Users + +- **Resiliency owners / on-call** routing CLI crash buckets to the right team. +- **Dashboards/alerting** (Bugsnag, Observe) that escalate severity on a bucket. + +## Success criteria + +1. Distinct failure families land in distinct buckets, split by product slice and a stable + semantic category (authentication / permission / rate_limit / server / …). +2. Categories are derived from **structured signals** (HTTP status, GraphQL `extensions.code`, + error class) read from the original typed error — not by regex-reparsing a stringified message. +3. Expected/handled and known-transient errors (AbortErrors, THROTTLED, 5xx, raw 401) do **not** + reach crash reporting. +4. Genuinely unknown errors keep Bugsnag's stack-trace grouping (we do not merge distinct bugs). +5. Backend routing works regardless of CLI version (structured metadata tags, not just the hash). + +## Constraints + +- **Reporter-only scope.** No changes to API throw sites or cross-team code. The original typed + error is already in scope in `sendErrorToBugsnag`; `GraphQLClientError` already carries + `statusCode` + `errors[]`, and graphql-request's `ClientError` carries `response.status`/`errors`. +- **Do not mutate `error-categorizer.ts`.** Its precedence quirks are pinned by + `error-categorizer.test.ts` / `storage.test.ts` because it is shared with Monorail analytics + (`storage.ts` emits `error:${category}:${signature}`). It is reused here only as a fallback. +- **No import cycles.** `error.ts → headers.ts → error.ts` would cycle, so transient suppression + in `error.ts` uses only the external `ClientError` type (the raw-401 path). + +## Key decisions + +1. **Group on the original error, structured-first.** A new `error-grouping.ts` extracts + `{httpStatus, code, errorClass}` and maps them to a category via an explicit decision table. + `403` and `ACCESS_DENIED` → `permission` (deliberately, fixing the prior 403→authentication + confusion). Keyword `categorizeError` is the fallback only. +2. **Suppress + drop transient.** Restore the `expected_error` skip in `sendErrorToBugsnag`; + extend `shouldReportErrorAsUnexpected` so raw `ClientError` 401/429/THROTTLED is expected. +3. **Hash + tags.** Set `event.groupingHash` (when a real category resolves) AND emit + `error_grouping` metadata (`slice_name`, `http_status`, `error_code`, `error_class`, `category`) + so dashboards/routing don't depend on the hash or on CLI upgrade adoption. + +## Out of scope (follow-ups) + +- At-source `category`/`code` field on the `FatalError` hierarchy + API wrappers. +- Folding status/code grouping into `error-categorizer.ts` (needs coordinated `storage.ts` update). +- Alerting on `*:unknown:*` / `cli:*` bucket growth; tying severity to SLO burn not volume. diff --git a/packages/cli-kit/src/private/node/analytics/error-grouping.test.ts b/packages/cli-kit/src/private/node/analytics/error-grouping.test.ts new file mode 100644 index 00000000000..fb7827dbced --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/error-grouping.test.ts @@ -0,0 +1,100 @@ +import {errorGroupingHash, errorGroupingSignals} from './error-grouping.js' +import {GraphQLClientError} from '../api/headers.js' +import {AbortError} from '../../../public/node/error.js' +import {ClientError} from 'graphql-request' +import {describe, expect, test} from 'vitest' + +function clientError(status: number, code?: string): ClientError { + const errors = code ? [{message: 'boom', extensions: {code}}] : undefined + return new ClientError({status, errors, headers: {}} as any, {query: 'q'} as any) +} + +describe('errorGroupingSignals', () => { + test('reads status, code, and class from a GraphQLClientError', () => { + const error = new GraphQLClientError('Forbidden', 403, [{extensions: {code: 'ACCESS_DENIED'}}]) + + expect(errorGroupingSignals(error)).toEqual({ + httpStatus: 403, + code: 'ACCESS_DENIED', + errorClass: 'GraphQLClientError', + }) + }) + + test('reads status and code from a raw graphql-request ClientError', () => { + const error = clientError(429, 'THROTTLED') + + expect(errorGroupingSignals(error)).toEqual({ + httpStatus: 429, + code: 'THROTTLED', + errorClass: 'ClientError', + }) + }) + + test('returns only the class name for a generic Error', () => { + expect(errorGroupingSignals(new Error('boom'))).toEqual({errorClass: 'Error'}) + }) + + test('returns an empty object for a non-Error value', () => { + expect(errorGroupingSignals('boom')).toEqual({}) + }) + + test('ignores a non-array errors value', () => { + const error = new GraphQLClientError('weird', 400, 'not-an-array' as any) + + expect(errorGroupingSignals(error)).toEqual({httpStatus: 400, errorClass: 'GraphQLClientError'}) + }) +}) + +describe('errorGroupingHash — structured decision table', () => { + test.each<[string, ClientError | GraphQLClientError, string]>([ + ['THROTTLED code -> rate_limit', clientError(400, 'THROTTLED'), 'theme:rate_limit:http-400-throttled'], + ['HTTP 429 -> rate_limit', clientError(429), 'theme:rate_limit:http-429'], + ['HTTP 401 -> authentication', clientError(401), 'theme:authentication:http-401'], + ['HTTP 403 -> permission', clientError(403), 'theme:permission:http-403'], + ['ACCESS_DENIED code -> permission', clientError(400, 'ACCESS_DENIED'), 'theme:permission:http-400-access-denied'], + ['HTTP 500 -> server', clientError(500), 'theme:server:http-500'], + ['HTTP 503 -> server', clientError(503), 'theme:server:http-503'], + ])('%s', (_, error, expected) => { + expect(errorGroupingHash(error, 'theme')).toEqual(expected) + }) + + test('403 wins over a 401-looking authentication category (permission, not authentication)', () => { + const error = new GraphQLClientError('Forbidden', 403, [{extensions: {code: 'ACCESS_DENIED'}}]) + + expect(errorGroupingHash(error, 'store')).toEqual('store:permission:http-403-access-denied') + }) + + test('prefixes the hash with the provided slice name', () => { + expect(errorGroupingHash(clientError(429), 'app')).toEqual('app:rate_limit:http-429') + expect(errorGroupingHash(clientError(429), 'cli')).toEqual('cli:rate_limit:http-429') + }) +}) + +describe('errorGroupingHash — message fallback', () => { + test('recovers an HTTP status flattened into an AbortError message', () => { + const error = new AbortError('The request responded unsuccessfully with the HTTP status 500') + + expect(errorGroupingHash(error, 'theme')).toEqual('theme:server:http-500') + }) + + test('recovers a graphql-request style "(Code: NNN)" status from a message', () => { + const error = new Error('GraphQL Error (Code: 401): {"response":{"status":401}}') + + expect(errorGroupingHash(error, 'store')).toEqual('store:authentication:http-401') + }) + + test('falls back to the keyword categorizer for an untyped, signal-less error', () => { + const error = new Error('connect ETIMEDOUT 1.2.3.4:443') + const hash = errorGroupingHash(error, 'cli') + + expect(hash).toMatch(/^cli:network:/) + }) + + test('returns undefined for an unknown error so stack-trace grouping is preserved', () => { + expect(errorGroupingHash(new Error('something nobody has categorized'), 'cli')).toBeUndefined() + }) + + test('returns undefined for a non-Error value', () => { + expect(errorGroupingHash(undefined, 'cli')).toBeUndefined() + }) +}) diff --git a/packages/cli-kit/src/private/node/analytics/error-grouping.ts b/packages/cli-kit/src/private/node/analytics/error-grouping.ts new file mode 100644 index 00000000000..fb92841faa4 --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/error-grouping.ts @@ -0,0 +1,179 @@ +import {categorizeError, ErrorCategory, formatErrorMessage} from './error-categorizer.js' +import {GraphQLClientError} from '../api/headers.js' +import {ClientError} from 'graphql-request' + +/** + * Structured signals extracted from an error, used to group it into a meaningful Bugsnag bucket. + * + * These are read from the *original* typed error (before it is flattened to a generic `Error` + * for reporting), so we group on facts the error already carries rather than by re-parsing a + * stringified message. + */ +interface ErrorGroupingSignals { + httpStatus?: number + code?: string + errorClass?: string +} + +/** + * Extracts structured grouping signals from an error object. + * + * @param error - The original error (any type). + * @returns The HTTP status, GraphQL error code, and error class name when available. + */ +export function errorGroupingSignals(error: unknown): ErrorGroupingSignals { + const signals: ErrorGroupingSignals = {} + + if (error instanceof Error) { + signals.errorClass = error.constructor.name + } + + // GraphQLClientError (handleErrors: true, status < 500) preserves the status and errors array. + if (error instanceof GraphQLClientError) { + signals.httpStatus = error.statusCode + signals.code = firstExtensionCode(error.errors) + return signals + } + + // Raw graphql-request ClientError (handleErrors: false) exposes the full response. + if (error instanceof ClientError) { + signals.httpStatus = error.response?.status + signals.code = firstExtensionCode(error.response?.errors) + } + + return signals +} + +/** + * Builds a Bugsnag grouping hash of the form `${slice}:${category}:${signature}`. + * + * Categories are resolved structured-first (HTTP status / GraphQL code), falling back to the + * shared keyword categorizer only for untyped errors. When no meaningful category can be + * resolved, returns `undefined` so the caller leaves `event.groupingHash` unset and Bugsnag's + * default stack-trace grouping applies — this avoids merging genuinely distinct unknown bugs. + * + * @param error - The original error (any type). + * @param sliceName - The product slice (`app`, `theme`, `store`, `hydrogen`, or `cli`). + * @returns The grouping hash, or `undefined` to fall back to stack-trace grouping. + */ +export function errorGroupingHash(error: unknown, sliceName: string): string | undefined { + const message = errorMessage(error) + // Object signals are authoritative; message-derived signals fill gaps (e.g. 5xx errors that the + // API layer flattens into an AbortError, dropping the structured status field). + const signals: ErrorGroupingSignals = {...signalsFromMessage(message), ...errorGroupingSignals(error)} + + const structuredCategory = categoryFromSignals(signals) + if (structuredCategory) { + return `${sliceName}:${structuredCategory}:${structuredSignature(signals)}` + } + + const groupingError = new Error(stripJsonDump(message)) + const category = categorizeError(groupingError) + if (category === ErrorCategory.Unknown) return undefined + + return `${sliceName}:${category.toLowerCase()}:${formatErrorMessage(groupingError, category)}` +} + +/** + * Resolves a semantic category from structured signals using an explicit decision table. + * + * 403 and ACCESS_DENIED map to `permission` (a forbidden request is a permission problem, not an + * authentication one). Returns `undefined` when no structured signal is present. + */ +function categoryFromSignals(signals: ErrorGroupingSignals): string | undefined { + const {httpStatus, code} = signals + + if (code === 'THROTTLED' || httpStatus === 429) return 'rate_limit' + if (httpStatus === 401) return 'authentication' + if (httpStatus === 403 || code === 'ACCESS_DENIED') return 'permission' + if (httpStatus !== undefined && httpStatus >= 500) return 'server' + + return undefined +} + +/** + * Builds a stable, low-cardinality signature slug from structured signals. + */ +function structuredSignature(signals: ErrorGroupingSignals): string { + const parts: string[] = [] + if (signals.httpStatus !== undefined) parts.push(`http-${signals.httpStatus}`) + if (signals.code) parts.push(signals.code) + if (parts.length === 0 && signals.errorClass) parts.push(signals.errorClass) + + return slugify(parts.join('-')) +} + +/** + * Recovers structured signals from an error message string. Handles both error shapes seen from + * the API layer: graphql-request's `GraphQL Error (Code: NNN): {json}` and the cli-kit wrapper's + * `... responded unsuccessfully with the HTTP status NNN ...`. + */ +function signalsFromMessage(message: string): ErrorGroupingSignals { + const signals: ErrorGroupingSignals = {} + + const statusMatch = message.match(/HTTP status (\d{3})/i) ?? message.match(/\(Code: (\d{3})\)/i) + if (statusMatch?.[1]) signals.httpStatus = Number(statusMatch[1]) + + const payload = parseEmbeddedJson(message) + if (payload?.response?.status !== undefined) { + signals.httpStatus = signals.httpStatus ?? payload.response.status + } + const code = firstExtensionCode(payload?.response?.errors) + if (code) signals.code = code + + return signals +} + +interface EmbeddedGraphQLPayload { + response?: { + status?: number + errors?: unknown + } +} + +/** + * Parses the JSON blob embedded in a graphql-request ClientError message, if present. + */ +function parseEmbeddedJson(message: string): EmbeddedGraphQLPayload | undefined { + const jsonStart = message.indexOf('{') + if (jsonStart === -1) return undefined + + try { + return JSON.parse(message.slice(jsonStart)) as EmbeddedGraphQLPayload + // eslint-disable-next-line no-catch-all/no-catch-all + } catch { + return undefined + } +} + +/** + * Returns the GraphQL `extensions.code` of the first error, when the errors value is an array. + * The API can return `errors` as a string, hence the array guard. + */ +function firstExtensionCode(errors: unknown): string | undefined { + if (!Array.isArray(errors)) return undefined + const code = (errors[0] as {extensions?: {code?: unknown}} | undefined)?.extensions?.code + return typeof code === 'string' ? code : undefined +} + +/** + * Strips a trailing JSON dump (`...: {json}`) from a message so the keyword categorizer sees the + * human-readable prefix rather than the serialized request/response (which contains the literal + * `request`, mis-routing to the network category). + */ +function stripJsonDump(message: string): string { + return message.split(': {')[0] ?? message +} + +function errorMessage(error: unknown): string { + if (error instanceof Error) return error.message + return typeof error === 'string' ? error : '' +} + +function slugify(value: string): string { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 50) +} diff --git a/packages/cli-kit/src/public/node/error-handler.test.ts b/packages/cli-kit/src/public/node/error-handler.test.ts index 8f101ab55d4..5fdfabfd180 100644 --- a/packages/cli-kit/src/public/node/error-handler.test.ts +++ b/packages/cli-kit/src/public/node/error-handler.test.ts @@ -6,13 +6,14 @@ import * as error from './error.js' import {hashString} from './crypto.js' import {isLocalEnvironment} from '../../private/node/context/service.js' import {getLastSeenUserIdAfterAuth} from '../../private/node/session.js' +import {GraphQLClientError} from '../../private/node/api/headers.js' import {settings} from '@oclif/core' import {beforeEach, describe, expect, test, vi} from 'vitest' const onNotify = vi.fn() const capturedEventHandler = vi.fn() -let lastBugsnagEvent: {addMetadata: ReturnType} | undefined +let lastBugsnagEvent: {addMetadata: ReturnType; groupingHash?: string} | undefined vi.mock('process') vi.mock('@bugsnag/js', () => { @@ -303,4 +304,39 @@ describe('sends errors to Bugsnag', () => { expect(lastBugsnagEvent).toBeDefined() expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('custom', {slice_name: 'cli'}) }) + + test('sets a structured groupingHash and error_grouping metadata for a typed API error', async () => { + await metadata.addSensitiveMetadata(() => ({ + commandStartOptions: {startTime: Date.now(), startCommand: 'theme dev', startArgs: []}, + })) + const apiError = new GraphQLClientError('Forbidden', 403, [{extensions: {code: 'ACCESS_DENIED'}}]) + + await sendErrorToBugsnag(apiError, 'unexpected_error') + + expect(lastBugsnagEvent).toBeDefined() + expect(lastBugsnagEvent!.groupingHash).toEqual('theme:permission:http-403-access-denied') + expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('error_grouping', { + slice_name: 'theme', + http_status: 403, + error_code: 'ACCESS_DENIED', + error_class: 'GraphQLClientError', + }) + }) + + test('leaves groupingHash unset for an unknown error but still tags error_grouping metadata', async () => { + await metadata.addSensitiveMetadata(() => ({ + commandStartOptions: {startTime: Date.now(), startCommand: 'app dev', startArgs: []}, + })) + + await sendErrorToBugsnag(new Error('something nobody has categorized'), 'unexpected_error') + + expect(lastBugsnagEvent).toBeDefined() + expect(lastBugsnagEvent!.groupingHash).toBeUndefined() + expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('error_grouping', { + slice_name: 'app', + http_status: undefined, + error_code: undefined, + error_class: 'Error', + }) + }) }) diff --git a/packages/cli-kit/src/public/node/error-handler.ts b/packages/cli-kit/src/public/node/error-handler.ts index d639c9d3d33..06fd96cfa54 100644 --- a/packages/cli-kit/src/public/node/error-handler.ts +++ b/packages/cli-kit/src/public/node/error-handler.ts @@ -12,6 +12,7 @@ import { } from './error.js' import {outputDebug, outputInfo} from './output.js' import {getEnvironmentData} from '../../private/node/analytics.js' +import {errorGroupingHash, errorGroupingSignals} from '../../private/node/analytics/error-grouping.js' import {isLocalEnvironment} from '../../private/node/context/service.js' import {bugsnagApiKey, reportingRateLimit} from '../../private/node/constants.js' import {CLI_KIT_VERSION} from '../common/version.js' @@ -28,6 +29,14 @@ import {realpath} from 'fs/promises' // Hardcoded list per product slices to keep analytics consistent. const ALLOWED_SLICE_NAMES = new Set(['app', 'theme', 'hydrogen', 'store']) +// Derives the product slice from the command that was running (e.g. `theme dev` -> `theme`), +// defaulting to `cli` when the command is unknown or not in the allow-list. +function sliceNameFromStartCommand(startCommand: string | undefined): string { + if (!startCommand) return 'cli' + const firstWord = startCommand.trim().split(/\s+/)[0] ?? 'cli' + return ALLOWED_SLICE_NAMES.has(firstWord) ? firstWord : 'cli' +} + export async function errorHandler( error: Error & {exitCode?: number | undefined}, config?: Interfaces.Config, @@ -147,11 +156,27 @@ export async function sendErrorToBugsnag( // Attach command metadata so we know which CLI command triggered the error const {commandStartOptions} = metadata.getAllSensitiveMetadata() const {startCommand} = commandStartOptions ?? {} + const sliceName = sliceNameFromStartCommand(startCommand) if (startCommand) { - const firstWord = startCommand.trim().split(/\s+/)[0] ?? 'cli' - const sliceName = ALLOWED_SLICE_NAMES.has(firstWord) ? firstWord : 'cli' event.addMetadata('custom', {slice_name: sliceName}) } + + // Group on structured signals from the *original* error (not the flattened + // `reportableError`), and emit those signals as metadata so backend dashboards and + // routing work without depending on the grouping hash or on CLI version adoption. + // When no meaningful category resolves we leave `groupingHash` unset so Bugsnag's + // stack-trace grouping applies and distinct unknown bugs are not merged. + const groupingHash = errorGroupingHash(error, sliceName) + if (groupingHash) { + event.groupingHash = groupingHash + } + const {httpStatus, code, errorClass} = errorGroupingSignals(error) + event.addMetadata('error_grouping', { + slice_name: sliceName, + http_status: httpStatus, + error_code: code, + error_class: errorClass, + }) } const errorHandler = (error: unknown) => { if (error) { diff --git a/packages/cli-kit/src/public/node/error.test.ts b/packages/cli-kit/src/public/node/error.test.ts index 1bba30c0633..2ad406d6e86 100644 --- a/packages/cli-kit/src/public/node/error.test.ts +++ b/packages/cli-kit/src/public/node/error.test.ts @@ -1,7 +1,13 @@ import {AbortError, BugError, handler, cleanSingleStackTracePath, shouldReportErrorAsUnexpected} from './error.js' import {renderFatalError} from './ui.js' +import {ClientError} from 'graphql-request' import {describe, expect, test, vi} from 'vitest' +function clientError(status: number, code?: string): ClientError { + const errors = code ? [{message: 'boom', extensions: {code}}] : undefined + return new ClientError({status, errors, headers: {}} as any, {query: 'q'} as any) +} + vi.mock('./ui.js') describe('handler', () => { @@ -82,4 +88,20 @@ describe('shouldReportErrorAsUnexpected helper', () => { test('returns false for unsupported platform errors', () => { expect(shouldReportErrorAsUnexpected(new Error('Unsupported platform: win32 arm64 LE'))).toBe(false) }) + + test('returns false for a raw ClientError that is rate limited (HTTP 429)', () => { + expect(shouldReportErrorAsUnexpected(clientError(429))).toBe(false) + }) + + test('returns false for a raw ClientError that is unauthenticated (HTTP 401)', () => { + expect(shouldReportErrorAsUnexpected(clientError(401))).toBe(false) + }) + + test('returns false for a raw ClientError with a THROTTLED code', () => { + expect(shouldReportErrorAsUnexpected(clientError(400, 'THROTTLED'))).toBe(false) + }) + + test('returns true for a raw ClientError that is a genuine failure (HTTP 500)', () => { + expect(shouldReportErrorAsUnexpected(clientError(500))).toBe(true) + }) }) diff --git a/packages/cli-kit/src/public/node/error.ts b/packages/cli-kit/src/public/node/error.ts index c2a7852c65c..723aa24c7a5 100644 --- a/packages/cli-kit/src/public/node/error.ts +++ b/packages/cli-kit/src/public/node/error.ts @@ -3,6 +3,7 @@ import {OutputMessage, stringifyMessage, TokenizedString} from './output.js' import {InlineToken, TokenItem, tokenItemToString} from '../../private/node/ui/components/TokenizedText.js' import {Errors} from '@oclif/core' +import {ClientError} from 'graphql-request' import type {AlertCustomSection} from './ui.js' @@ -193,6 +194,12 @@ export function shouldReportErrorAsUnexpected(error: unknown): boolean { if (!isFatal(error)) { // this means its not one of the CLI wrapped errors if (error instanceof Error) { + // Raw API errors that slip through unwrapped (e.g. the handleErrors:false path) are + // transient/environmental, not CLI bugs: rate limiting (429/THROTTLED) and unauthenticated + // requests (401). Treat them as expected so they don't pollute crash reporting. + if (isTransientApiError(error)) { + return false + } const message = error.message return !errorMessageImpliesEnvironmentIssue(message) } @@ -204,6 +211,33 @@ export function shouldReportErrorAsUnexpected(error: unknown): boolean { return false } +/** + * Detects raw graphql-request `ClientError`s that are transient/environmental rather than CLI + * bugs: HTTP 401 (unauthenticated), 429 (rate limited), or a GraphQL `THROTTLED` code. These reach + * the reporter as plain `Error`s (not `FatalError`s) and would otherwise be reported as unexpected. + * + * Scoped to the external `ClientError` type only — importing the cli-kit `GraphQLClientError` + * wrapper here would create an `error.ts → headers.ts → error.ts` import cycle. + * + * @param error - Error to be checked. + * @returns A boolean indicating if the error is a known-transient API error. + */ +function isTransientApiError(error: Error): boolean { + if (!(error instanceof ClientError)) { + return false + } + const status = error.response?.status + if (status === 401 || status === 429) { + return true + } + const errors = error.response?.errors + if (Array.isArray(errors)) { + const code = (errors[0] as {extensions?: {code?: unknown}} | undefined)?.extensions?.code + return code === 'THROTTLED' + } + return false +} + /** * Stack traces usually have file:// - we strip that and also remove the Windows drive designation. *