Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions keeperhub/lib/max-retries-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
/**
* KEEP-1541: The Workflow DevKit's "use step" durability layer can throw
* "exceeded max retries" even when the step itself succeeded. This happens
* retry-related errors even when the step itself succeeded. This happens
* because the SDK's internal state tracking encounters a conflict (e.g.,
* step already completed, state replay mismatch) AFTER withStepLogging has
* already recorded a success log in workflow_execution_logs. The error is
* caught by executeNode's catch block and stored as a failed result, which
* then causes finalSuccess to be false -- marking the entire workflow as
* "error" despite all steps completing successfully.
*
* The SDK produces two error formats:
* 1. Pre-check path: 'Step "X" exceeded max retries (N retries)'
* 2. Catch path: 'Step "X" failed after N retries: {error}'
*
* Fix: after all nodes finish, cross-reference failed results against the
* in-memory success tracker. If a failed node has a recorded success in the
* tracker, the SDK error was spurious and we override the result to success.
Expand Down Expand Up @@ -39,17 +43,27 @@ type ReconcileOutput = {
overriddenNodeIds: string[];
};

// SDK error message substring used to identify spurious max-retries failures.
// If the SDK changes this wording, update this constant.
// SDK error message substrings used to identify spurious retry failures.
// The SDK produces two distinct formats depending on the code path.
// If the SDK changes this wording, update these constants.
export const MAX_RETRIES_ERROR_MARKER = "exceeded max retries";
export const FAILED_AFTER_RETRIES_MARKER = "failed after";

export function isSdkRetryError(error: string | undefined): boolean {
if (!error) {
return false;
}
return (
error.includes(MAX_RETRIES_ERROR_MARKER) ||
error.includes(FAILED_AFTER_RETRIES_MARKER)
);
}

export function getFailedMaxRetriesNodeIds(
results: Record<string, ExecutionResult>
): string[] {
return Object.entries(results)
.filter(
([, r]) => !r.success && r.error?.includes(MAX_RETRIES_ERROR_MARKER)
)
.filter(([, r]) => !r.success && isSdkRetryError(r.error))
.map(([nodeId]) => nodeId);
}

Expand Down
2 changes: 2 additions & 0 deletions keeperhub/lib/steps/enrich-explorer-links.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@ export async function enrichExplorerLinks(
);
}
}

enrichExplorerLinks.maxRetries = 0;
2 changes: 2 additions & 0 deletions keeperhub/plugins/math/steps/aggregate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -626,4 +626,6 @@ export async function aggregateStep(
);
}

aggregateStep.maxRetries = 0;

export const _integrationType = PLUGIN_NAME;
2 changes: 2 additions & 0 deletions keeperhub/plugins/protocol/steps/protocol-read.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,6 @@ export async function protocolReadStep(
return await withStepLogging(input, () => readContractCore(coreInput));
}

protocolReadStep.maxRetries = 0;

export const _integrationType = "protocol";
2 changes: 2 additions & 0 deletions keeperhub/plugins/protocol/steps/protocol-write.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,6 @@ export async function protocolWriteStep(
return await withStepLogging(input, () => writeContractCore(coreInput));
}

protocolWriteStep.maxRetries = 0;

export const _integrationType = "protocol";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/batch-read-contract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -818,4 +818,6 @@ export async function batchReadContractStep(
);
}

batchReadContractStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/check-allowance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,6 @@ export async function checkAllowanceStep(
return withStepLogging(input, () => stepHandler(input));
}

checkAllowanceStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/check-balance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -217,4 +217,6 @@ export async function checkBalanceStep(
);
}

checkBalanceStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/check-token-balance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -487,4 +487,6 @@ export async function checkTokenBalanceStep(
return withStepLogging(input, () => stepHandler(input));
}

checkTokenBalanceStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/get-transaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,6 @@ export async function getTransactionStep(
);
}

getTransactionStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/query-events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -399,4 +399,6 @@ export async function queryEventsStep(
);
}

queryEventsStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/query-transactions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ export async function queryTransactionsStep(
);
}

queryTransactionsStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/read-contract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,6 @@ export async function readContractStep(
);
}

readContractStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/transfer-funds.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,6 @@ export async function transferFundsStep(
);
}

transferFundsStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/transfer-token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,6 @@ export async function transferTokenStep(
return withStepLogging(enrichedInput, () => transferTokenCore(input));
}

transferTokenStep.maxRetries = 0;

export const _integrationType = "web3";
2 changes: 2 additions & 0 deletions keeperhub/plugins/web3/steps/write-contract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,6 @@ export async function writeContractStep(
);
}

writeContractStep.maxRetries = 0;

export const _integrationType = "web3";
81 changes: 75 additions & 6 deletions tests/unit/max-retries-reconciler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,37 @@ import { describe, expect, it } from "vitest";

import {
getFailedMaxRetriesNodeIds,
isSdkRetryError,
reconcileMaxRetriesFailures,
reconcileSdkFailures,
} from "@/keeperhub/lib/max-retries-reconciler";

describe("isSdkRetryError", () => {
it("should match 'exceeded max retries' error format", () => {
expect(
isSdkRetryError(
'Step "step//abc//sendWebhook" exceeded max retries (0 retries)'
)
).toBe(true);
});

it("should match 'failed after N retries' error format", () => {
expect(
isSdkRetryError(
'Step "step//abc//checkBalance" failed after 3 retries: Error: step_completed event failed'
)
).toBe(true);
});

it("should not match unrelated errors", () => {
expect(isSdkRetryError("HTTP 500 Internal Server Error")).toBe(false);
expect(isSdkRetryError("Connection refused")).toBe(false);
expect(isSdkRetryError(undefined)).toBe(false);
});
});

describe("getFailedMaxRetriesNodeIds", () => {
it("should return node IDs with max-retries errors", () => {
it("should return node IDs with exceeded-max-retries errors", () => {
const results = {
"node-1": { success: true },
"node-2": {
Expand All @@ -20,7 +45,20 @@ describe("getFailedMaxRetriesNodeIds", () => {
expect(getFailedMaxRetriesNodeIds(results)).toEqual(["node-2"]);
});

it("should return empty array when no max-retries errors", () => {
it("should return node IDs with failed-after-retries errors", () => {
const results = {
"node-1": { success: true },
"node-2": {
success: false,
error:
'Step "step//abc//checkBalance" failed after 3 retries: step_completed event failed',
},
};

expect(getFailedMaxRetriesNodeIds(results)).toEqual(["node-2"]);
});

it("should return empty array when no retry errors", () => {
const results = {
"node-1": { success: true },
"node-2": { success: false, error: "Connection refused" },
Expand All @@ -31,7 +69,7 @@ describe("getFailedMaxRetriesNodeIds", () => {
});

describe("reconcileMaxRetriesFailures", () => {
it("should return empty overrides when no max-retries failures exist", () => {
it("should return empty overrides when no retry failures exist", () => {
const results: Record<string, { success: boolean; error?: string }> = {
"node-1": { success: true },
"node-2": { success: false, error: "HTTP 500 Internal Server Error" },
Expand All @@ -47,7 +85,7 @@ describe("reconcileMaxRetriesFailures", () => {
expect(overriddenNodeIds).toEqual([]);
});

it("should override to success when node has a tracked success", () => {
it("should override exceeded-max-retries failure with tracked success", () => {
const results: Record<
string,
{ success: boolean; error?: string; data?: unknown }
Expand Down Expand Up @@ -77,6 +115,36 @@ describe("reconcileMaxRetriesFailures", () => {
});
});

it("should override failed-after-retries failure with tracked success", () => {
const results: Record<
string,
{ success: boolean; error?: string; data?: unknown }
> = {
"node-1": {
success: false,
error:
'Step "step//abc//checkBalance" failed after 0 retries: step_completed event failed',
},
};

const successfulSteps = new Map<string, unknown>([
["node-1", { balance: "1.5" }],
]);

const { overriddenNodeIds } = reconcileMaxRetriesFailures({
results,
successfulSteps,
executionId: "exec-1",
workflowId: "wf-1",
});

expect(overriddenNodeIds).toEqual(["node-1"]);
expect(results["node-1"]).toEqual({
success: true,
data: { balance: "1.5" },
});
});

it("should override to success when node output is undefined", () => {
const results: Record<
string,
Expand Down Expand Up @@ -133,7 +201,8 @@ describe("reconcileMaxRetriesFailures", () => {
},
"node-2": {
success: false,
error: 'Step "step//b//condition" exceeded max retries (0 retries)',
error:
'Step "step//b//checkBalance" failed after 3 retries: state conflict',
},
"node-3": { success: true },
};
Expand All @@ -156,7 +225,7 @@ describe("reconcileMaxRetriesFailures", () => {
});
});

it("should not produce overrides for non-max-retries failures", () => {
it("should not produce overrides for non-retry failures", () => {
const results: Record<
string,
{ success: boolean; error?: string; data?: unknown }
Expand Down
Loading