diff --git a/keeperhub/lib/max-retries-reconciler.ts b/keeperhub/lib/max-retries-reconciler.ts index 39c2f05f5..de7845bb7 100644 --- a/keeperhub/lib/max-retries-reconciler.ts +++ b/keeperhub/lib/max-retries-reconciler.ts @@ -1,6 +1,6 @@ /** * KEEP-1541: The Workflow DevKit's "use step" durability layer can throw - * "exceeded max retries" even when the step itself succeeded. This happens + * retry-related errors even when the step itself succeeded. This happens * because the SDK's internal state tracking encounters a conflict (e.g., * step already completed, state replay mismatch) AFTER withStepLogging has * already recorded a success log in workflow_execution_logs. The error is @@ -8,6 +8,10 @@ * then causes finalSuccess to be false -- marking the entire workflow as * "error" despite all steps completing successfully. * + * The SDK produces two error formats: + * 1. Pre-check path: 'Step "X" exceeded max retries (N retries)' + * 2. Catch path: 'Step "X" failed after N retries: {error}' + * * Fix: after all nodes finish, cross-reference failed results against the * in-memory success tracker. If a failed node has a recorded success in the * tracker, the SDK error was spurious and we override the result to success. @@ -39,17 +43,27 @@ type ReconcileOutput = { overriddenNodeIds: string[]; }; -// SDK error message substring used to identify spurious max-retries failures. -// If the SDK changes this wording, update this constant. +// SDK error message substrings used to identify spurious retry failures. +// The SDK produces two distinct formats depending on the code path. +// If the SDK changes this wording, update these constants. export const MAX_RETRIES_ERROR_MARKER = "exceeded max retries"; +export const FAILED_AFTER_RETRIES_MARKER = "failed after"; + +export function isSdkRetryError(error: string | undefined): boolean { + if (!error) { + return false; + } + return ( + error.includes(MAX_RETRIES_ERROR_MARKER) || + error.includes(FAILED_AFTER_RETRIES_MARKER) + ); +} export function getFailedMaxRetriesNodeIds( results: Record ): string[] { return Object.entries(results) - .filter( - ([, r]) => !r.success && r.error?.includes(MAX_RETRIES_ERROR_MARKER) - ) + .filter(([, r]) => !r.success && isSdkRetryError(r.error)) .map(([nodeId]) => nodeId); } diff --git a/keeperhub/lib/steps/enrich-explorer-links.ts b/keeperhub/lib/steps/enrich-explorer-links.ts index b82262645..d214fb0ab 100644 --- a/keeperhub/lib/steps/enrich-explorer-links.ts +++ b/keeperhub/lib/steps/enrich-explorer-links.ts @@ -38,3 +38,5 @@ export async function enrichExplorerLinks( ); } } + +enrichExplorerLinks.maxRetries = 0; diff --git a/keeperhub/plugins/math/steps/aggregate.ts b/keeperhub/plugins/math/steps/aggregate.ts index 42abada7a..c2e4d119a 100644 --- a/keeperhub/plugins/math/steps/aggregate.ts +++ b/keeperhub/plugins/math/steps/aggregate.ts @@ -626,4 +626,6 @@ export async function aggregateStep( ); } +aggregateStep.maxRetries = 0; + export const _integrationType = PLUGIN_NAME; diff --git a/keeperhub/plugins/protocol/steps/protocol-read.ts b/keeperhub/plugins/protocol/steps/protocol-read.ts index 21f2f8992..89bdb7e91 100644 --- a/keeperhub/plugins/protocol/steps/protocol-read.ts +++ b/keeperhub/plugins/protocol/steps/protocol-read.ts @@ -123,4 +123,6 @@ export async function protocolReadStep( return await withStepLogging(input, () => readContractCore(coreInput)); } +protocolReadStep.maxRetries = 0; + export const _integrationType = "protocol"; diff --git a/keeperhub/plugins/protocol/steps/protocol-write.ts b/keeperhub/plugins/protocol/steps/protocol-write.ts index 5fdbe74e9..552f7ace6 100644 --- a/keeperhub/plugins/protocol/steps/protocol-write.ts +++ b/keeperhub/plugins/protocol/steps/protocol-write.ts @@ -132,4 +132,6 @@ export async function protocolWriteStep( return await withStepLogging(input, () => writeContractCore(coreInput)); } +protocolWriteStep.maxRetries = 0; + export const _integrationType = "protocol"; diff --git a/keeperhub/plugins/web3/steps/batch-read-contract.ts b/keeperhub/plugins/web3/steps/batch-read-contract.ts index d7d2681c9..c6177cab7 100644 --- a/keeperhub/plugins/web3/steps/batch-read-contract.ts +++ b/keeperhub/plugins/web3/steps/batch-read-contract.ts @@ -818,4 +818,6 @@ export async function batchReadContractStep( ); } +batchReadContractStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/check-allowance.ts b/keeperhub/plugins/web3/steps/check-allowance.ts index 8dbde1f5a..d5aeb2d6e 100644 --- a/keeperhub/plugins/web3/steps/check-allowance.ts +++ b/keeperhub/plugins/web3/steps/check-allowance.ts @@ -169,4 +169,6 @@ export async function checkAllowanceStep( return withStepLogging(input, () => stepHandler(input)); } +checkAllowanceStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/check-balance.ts b/keeperhub/plugins/web3/steps/check-balance.ts index 8d2c8195f..ba567d9f7 100644 --- a/keeperhub/plugins/web3/steps/check-balance.ts +++ b/keeperhub/plugins/web3/steps/check-balance.ts @@ -217,4 +217,6 @@ export async function checkBalanceStep( ); } +checkBalanceStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/check-token-balance.ts b/keeperhub/plugins/web3/steps/check-token-balance.ts index 4ce764d49..0eaf85d16 100644 --- a/keeperhub/plugins/web3/steps/check-token-balance.ts +++ b/keeperhub/plugins/web3/steps/check-token-balance.ts @@ -487,4 +487,6 @@ export async function checkTokenBalanceStep( return withStepLogging(input, () => stepHandler(input)); } +checkTokenBalanceStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/get-transaction.ts b/keeperhub/plugins/web3/steps/get-transaction.ts index b2c29dcc5..5da6e0523 100644 --- a/keeperhub/plugins/web3/steps/get-transaction.ts +++ b/keeperhub/plugins/web3/steps/get-transaction.ts @@ -184,4 +184,6 @@ export async function getTransactionStep( ); } +getTransactionStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/query-events.ts b/keeperhub/plugins/web3/steps/query-events.ts index 353bb6d7d..f727be6ca 100644 --- a/keeperhub/plugins/web3/steps/query-events.ts +++ b/keeperhub/plugins/web3/steps/query-events.ts @@ -399,4 +399,6 @@ export async function queryEventsStep( ); } +queryEventsStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/query-transactions.ts b/keeperhub/plugins/web3/steps/query-transactions.ts index 706b37b6a..e4f3426ef 100644 --- a/keeperhub/plugins/web3/steps/query-transactions.ts +++ b/keeperhub/plugins/web3/steps/query-transactions.ts @@ -25,4 +25,6 @@ export async function queryTransactionsStep( ); } +queryTransactionsStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/read-contract.ts b/keeperhub/plugins/web3/steps/read-contract.ts index 3af92308f..3ccf0d3c2 100644 --- a/keeperhub/plugins/web3/steps/read-contract.ts +++ b/keeperhub/plugins/web3/steps/read-contract.ts @@ -55,4 +55,6 @@ export async function readContractStep( ); } +readContractStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/transfer-funds.ts b/keeperhub/plugins/web3/steps/transfer-funds.ts index 3b677e739..be94bef0b 100644 --- a/keeperhub/plugins/web3/steps/transfer-funds.ts +++ b/keeperhub/plugins/web3/steps/transfer-funds.ts @@ -60,4 +60,6 @@ export async function transferFundsStep( ); } +transferFundsStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/transfer-token.ts b/keeperhub/plugins/web3/steps/transfer-token.ts index 49c897421..fc5f37493 100644 --- a/keeperhub/plugins/web3/steps/transfer-token.ts +++ b/keeperhub/plugins/web3/steps/transfer-token.ts @@ -51,4 +51,6 @@ export async function transferTokenStep( return withStepLogging(enrichedInput, () => transferTokenCore(input)); } +transferTokenStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/keeperhub/plugins/web3/steps/write-contract.ts b/keeperhub/plugins/web3/steps/write-contract.ts index e05432542..4b3548fef 100644 --- a/keeperhub/plugins/web3/steps/write-contract.ts +++ b/keeperhub/plugins/web3/steps/write-contract.ts @@ -55,4 +55,6 @@ export async function writeContractStep( ); } +writeContractStep.maxRetries = 0; + export const _integrationType = "web3"; diff --git a/tests/unit/max-retries-reconciler.test.ts b/tests/unit/max-retries-reconciler.test.ts index bd0b41074..f300a05f3 100644 --- a/tests/unit/max-retries-reconciler.test.ts +++ b/tests/unit/max-retries-reconciler.test.ts @@ -2,12 +2,37 @@ import { describe, expect, it } from "vitest"; import { getFailedMaxRetriesNodeIds, + isSdkRetryError, reconcileMaxRetriesFailures, reconcileSdkFailures, } from "@/keeperhub/lib/max-retries-reconciler"; +describe("isSdkRetryError", () => { + it("should match 'exceeded max retries' error format", () => { + expect( + isSdkRetryError( + 'Step "step//abc//sendWebhook" exceeded max retries (0 retries)' + ) + ).toBe(true); + }); + + it("should match 'failed after N retries' error format", () => { + expect( + isSdkRetryError( + 'Step "step//abc//checkBalance" failed after 3 retries: Error: step_completed event failed' + ) + ).toBe(true); + }); + + it("should not match unrelated errors", () => { + expect(isSdkRetryError("HTTP 500 Internal Server Error")).toBe(false); + expect(isSdkRetryError("Connection refused")).toBe(false); + expect(isSdkRetryError(undefined)).toBe(false); + }); +}); + describe("getFailedMaxRetriesNodeIds", () => { - it("should return node IDs with max-retries errors", () => { + it("should return node IDs with exceeded-max-retries errors", () => { const results = { "node-1": { success: true }, "node-2": { @@ -20,7 +45,20 @@ describe("getFailedMaxRetriesNodeIds", () => { expect(getFailedMaxRetriesNodeIds(results)).toEqual(["node-2"]); }); - it("should return empty array when no max-retries errors", () => { + it("should return node IDs with failed-after-retries errors", () => { + const results = { + "node-1": { success: true }, + "node-2": { + success: false, + error: + 'Step "step//abc//checkBalance" failed after 3 retries: step_completed event failed', + }, + }; + + expect(getFailedMaxRetriesNodeIds(results)).toEqual(["node-2"]); + }); + + it("should return empty array when no retry errors", () => { const results = { "node-1": { success: true }, "node-2": { success: false, error: "Connection refused" }, @@ -31,7 +69,7 @@ describe("getFailedMaxRetriesNodeIds", () => { }); describe("reconcileMaxRetriesFailures", () => { - it("should return empty overrides when no max-retries failures exist", () => { + it("should return empty overrides when no retry failures exist", () => { const results: Record = { "node-1": { success: true }, "node-2": { success: false, error: "HTTP 500 Internal Server Error" }, @@ -47,7 +85,7 @@ describe("reconcileMaxRetriesFailures", () => { expect(overriddenNodeIds).toEqual([]); }); - it("should override to success when node has a tracked success", () => { + it("should override exceeded-max-retries failure with tracked success", () => { const results: Record< string, { success: boolean; error?: string; data?: unknown } @@ -77,6 +115,36 @@ describe("reconcileMaxRetriesFailures", () => { }); }); + it("should override failed-after-retries failure with tracked success", () => { + const results: Record< + string, + { success: boolean; error?: string; data?: unknown } + > = { + "node-1": { + success: false, + error: + 'Step "step//abc//checkBalance" failed after 0 retries: step_completed event failed', + }, + }; + + const successfulSteps = new Map([ + ["node-1", { balance: "1.5" }], + ]); + + const { overriddenNodeIds } = reconcileMaxRetriesFailures({ + results, + successfulSteps, + executionId: "exec-1", + workflowId: "wf-1", + }); + + expect(overriddenNodeIds).toEqual(["node-1"]); + expect(results["node-1"]).toEqual({ + success: true, + data: { balance: "1.5" }, + }); + }); + it("should override to success when node output is undefined", () => { const results: Record< string, @@ -133,7 +201,8 @@ describe("reconcileMaxRetriesFailures", () => { }, "node-2": { success: false, - error: 'Step "step//b//condition" exceeded max retries (0 retries)', + error: + 'Step "step//b//checkBalance" failed after 3 retries: state conflict', }, "node-3": { success: true }, }; @@ -156,7 +225,7 @@ describe("reconcileMaxRetriesFailures", () => { }); }); - it("should not produce overrides for non-max-retries failures", () => { + it("should not produce overrides for non-retry failures", () => { const results: Record< string, { success: boolean; error?: string; data?: unknown }