From af340ef5a479c21debb281b8004b815c986700d0 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Thu, 14 May 2026 12:52:15 +0200
Subject: [PATCH 01/49] orgs
---
datalayer_core/cli/commands/authn.py | 4 +-
datalayer_core/cli/commands/subscription.py | 20 +-
datalayer_core/mixins/usage.py | 6 +-
src/components/checkout/StripeCheckout.tsx | 798 ++++++++++++--------
src/hooks/useCache.ts | 14 +-
src/models/Profile.ts | 2 +-
src/models/User.ts | 2 +-
7 files changed, 499 insertions(+), 347 deletions(-)
diff --git a/datalayer_core/cli/commands/authn.py b/datalayer_core/cli/commands/authn.py
index ccbf25d0..2ba70461 100644
--- a/datalayer_core/cli/commands/authn.py
+++ b/datalayer_core/cli/commands/authn.py
@@ -429,9 +429,9 @@ def whoami(
console.print(f" š {provider_name.capitalize()}")
# Customer UID
- if user.get("credits_customer_uid"):
+ if user.get("stripe_customer_id_s"):
console.print(
- f"\nš³ Credits Customer: {user.get('credits_customer_uid')}"
+ f"\nš³ Credits Customer: {user.get('stripe_customer_id_s')}"
)
else:
console.print("[yellow]Not authenticated[/yellow]")
diff --git a/datalayer_core/cli/commands/subscription.py b/datalayer_core/cli/commands/subscription.py
index c4d85ce7..be73efe9 100644
--- a/datalayer_core/cli/commands/subscription.py
+++ b/datalayer_core/cli/commands/subscription.py
@@ -21,7 +21,7 @@
def _extract_subscription(payload: dict[str, Any]) -> dict[str, Any]:
- return payload.get("subscription") or {}
+ return payload.get("plan") or {}
def _normalize_value(value: Any, fallback: str = "Not available") -> str:
@@ -71,12 +71,8 @@ def _as_plan_list(value: Any) -> list[dict[str, Any]]:
def _extract_available_plans(payload: dict[str, Any]) -> list[dict[str, Any]]:
subscription = _extract_subscription(payload)
candidates = [
- payload.get("available_subscriptions"),
payload.get("available_plans"),
payload.get("plans"),
- subscription.get("available_subscriptions")
- if isinstance(subscription, dict)
- else None,
subscription.get("available_plans") if isinstance(subscription, dict) else None,
subscription.get("plans") if isinstance(subscription, dict) else None,
]
@@ -572,8 +568,8 @@ def subscription_stats(
paid_count = 0
for user in users:
- status = str(user.get("subscription_status_s") or "none").lower()
- plan = str(user.get("subscription_plan_s") or "none")
+ status = str(user.get("plan_status_s") or "none").lower()
+ plan = str(user.get("plan_name_s") or "none")
status_counter[status] += 1
plan_counter[plan] += 1
@@ -663,9 +659,9 @@ def subscription_admin_users(
for user in users:
table.add_row(
_normalize_value(user.get("handle_s")),
- _normalize_value(user.get("subscription_plan_s"), fallback="none"),
- _normalize_value(user.get("subscription_status_s"), fallback="none"),
- _normalize_value(user.get("credits_customer_uid"), fallback="none"),
+ _normalize_value(user.get("plan_name_s"), fallback="none"),
+ _normalize_value(user.get("plan_status_s"), fallback="none"),
+ _normalize_value(user.get("stripe_customer_id_s"), fallback="none"),
)
console.print(table)
@@ -740,13 +736,13 @@ def subscription_dry_run(
if sub_resp.get("success", True):
sub = _extract_subscription(sub_resp)
console.print(
- "[green]OK[/green] /api/iam/v1/subscription "
+ "[green]OK[/green] /api/iam/v1/plans "
f"plan={_normalize_value(sub.get('plan_name'), 'unknown')} "
f"status={_normalize_value(sub.get('status'), 'unknown')}"
)
else:
console.print(
- "[red]FAILED[/red] /api/iam/v1/subscription "
+ "[red]FAILED[/red] /api/iam/v1/plans "
f"{sub_resp.get('message', 'Unknown error')}"
)
diff --git a/datalayer_core/mixins/usage.py b/datalayer_core/mixins/usage.py
index 80bc8f43..ae5856f3 100644
--- a/datalayer_core/mixins/usage.py
+++ b/datalayer_core/mixins/usage.py
@@ -37,7 +37,7 @@ def _get_subscription(self) -> dict[str, Any]:
"""
try:
response = self._fetch( # type: ignore
- "{}/api/iam/v1/subscription".format(self.urls.iam_url), # type: ignore
+ "{}/api/iam/v1/plans".format(self.urls.iam_url), # type: ignore
)
return response.json()
except RuntimeError as e:
@@ -54,7 +54,7 @@ def _cancel_subscription(self) -> dict[str, Any]:
"""
try:
response = self._fetch( # type: ignore
- "{}/api/iam/v1/subscription/cancel".format(self.urls.iam_url), # type: ignore
+ "{}/api/iam/v1/plans/cancel".format(self.urls.iam_url), # type: ignore
method="POST",
)
return response.json()
@@ -72,7 +72,7 @@ def _get_subscription_plans(self) -> dict[str, Any]:
"""
try:
response = self._fetch( # type: ignore
- "{}/api/iam/v1/subscription/plans".format(self.urls.iam_url), # type: ignore
+ "{}/api/iam/v1/plans/catalog".format(self.urls.iam_url), # type: ignore
)
return response.json()
except RuntimeError as e:
diff --git a/src/components/checkout/StripeCheckout.tsx b/src/components/checkout/StripeCheckout.tsx
index 7006f81b..2afa540b 100644
--- a/src/components/checkout/StripeCheckout.tsx
+++ b/src/components/checkout/StripeCheckout.tsx
@@ -311,12 +311,11 @@ export function StripeCheckout({
checkoutPortal,
appearance,
accountUid,
- showStatusUsageSummary = true,
+ showStatusUsageSummary = false,
}: StripeCheckoutProps) {
const {
useCreateTopUpPaymentIntent,
useCreateSubscriptionPaymentIntent,
- useCreateResumeSetupIntent,
useSubscriptionPlans,
useTopUpPrices,
useSubscriptionStatus,
@@ -368,7 +367,6 @@ export function StripeCheckout({
const subscriptionPaymentIntentMutation = useCreateSubscriptionPaymentIntent({
accountUid,
});
- const resumeSetupIntentMutation = useCreateResumeSetupIntent({ accountUid });
// Load stripe API
useEffect(() => {
@@ -445,7 +443,7 @@ export function StripeCheckout({
}
}, [checkoutType, refetchSubscriptionStatus, resumeSubscriptionMutation]);
- const subscription = subscriptionResp?.subscription || null;
+ const subscription = subscriptionResp?.plan || null;
const availablePlans = useMemo(() => {
const byId = new Map();
const add = (plan: any) => {
@@ -466,9 +464,9 @@ export function StripeCheckout({
});
};
plans.forEach(add);
- (subscriptionResp?.available_subscriptions || []).forEach(add);
+ (subscriptionResp?.available_plans || []).forEach(add);
return Array.from(byId.values());
- }, [plans, subscriptionResp?.available_subscriptions]);
+ }, [plans, subscriptionResp?.available_plans]);
const subscriptionStatus = subscription?.status || 'unknown';
const normalizedSubscriptionStatus = String(subscriptionStatus).toLowerCase();
@@ -894,27 +892,35 @@ export function StripeCheckout({
const onResumeSubscription = useCallback(async () => {
setPaymentMessage(null);
try {
- const clientSecret = await resumeSetupIntentMutation.mutateAsync();
- if (!clientSecret) {
- setCheckout(false);
- setPaymentClientSecret(null);
- setPaymentMessage(
- 'Unable to initialize Stripe checkout. Please try again.',
+ const resp = await resumeSubscriptionMutation.mutateAsync();
+ if (resp?.success === false) {
+ throw new Error(
+ resp?.message || 'Unable to resume your plan right now.',
);
- return;
}
- setCheckoutType('resume');
- setPaymentClientSecret(clientSecret);
- setCheckout(true);
- setPaymentMessage(null);
+
+ for (let attempt = 0; attempt < 5; attempt += 1) {
+ try {
+ await refetchSubscriptionStatus();
+ } catch {
+ // Ignore transient refetch errors and keep trying.
+ }
+ if (attempt < 4) {
+ await new Promise(resolve => setTimeout(resolve, 800));
+ }
+ }
+
+ setCheckout(false);
+ setPaymentClientSecret(null);
+ setPaymentMessage(resp?.message || 'Plan resumed successfully.');
} catch (error) {
setPaymentMessage(
error instanceof Error
? error.message
- : 'Unable to initialize resume checkout right now.',
+ : 'Unable to resume your plan right now.',
);
}
- }, [resumeSetupIntentMutation]);
+ }, [refetchSubscriptionStatus, resumeSubscriptionMutation]);
const onRefreshSubscriptionStatus = useCallback(async () => {
setPaymentMessage(null);
@@ -947,10 +953,6 @@ export function StripeCheckout({
return `${product.name} (${amount}, ${product.credits} credits)`;
}
- if (checkoutType === 'resume') {
- return 'Plan resume (card update required)';
- }
-
return null;
}, [checkoutType, product, subscriptionPlan]);
@@ -960,7 +962,10 @@ export function StripeCheckout({
marginBottom: 'var(--stack-gap-normal)',
} as const;
- const monthlySubscriptionSection = (
+ const shouldShowMonthlySubscriptionSection =
+ !isPaidSubscription || isIncompleteSubscription;
+
+ const monthlySubscriptionSection = shouldShowMonthlySubscriptionSection ? (
>
- ) : (
-
- {isCancellationScheduled
- ? `Your monthly plan will cancel on ${subscriptionPeriodEndLabel}.`
- : 'Your monthly plan is active. You can manage plan details from plan controls.'}
-
- )}
+ ) : null}
- );
+ ) : null;
const topUpSection = (
@@ -1133,341 +1132,494 @@ export function StripeCheckout({
);
- const topCards = showStatusUsageSummary ? (
-
+ const topCards =
+ showStatusUsageSummary && !isPaidSubscription ? (
-
-
- Plan status
-
- Plan: {String(currentSubscriptionPlan)}
- {isPendingSubscriptionCheckout && (
-
+
+
- Upgrade pending payment. Your Team plan is not active until card
- payment succeeds.
-
- )}
- {currentPlanPriceLabel !== 'N/A' && (
- Price: {currentPlanPriceLabel}
- )}
- {displaySubscriptionStatus && (
-
- Status: {displaySubscriptionStatus}
+ Plan status
- )}
-
+ Plan: {String(currentSubscriptionPlan)}
+ {isPendingSubscriptionCheckout && (
+
+ Upgrade pending payment. Your Team plan is not active until card
+ payment succeeds.
+
+ )}
+ {currentPlanPriceLabel !== 'N/A' && (
+ Price: {currentPlanPriceLabel}
+ )}
+ {displaySubscriptionStatus && (
+
+ Status: {displaySubscriptionStatus}
+
+ )}
-
- Current usage
-
-
-
-
-
-
- Runs: {usedRuns.toLocaleString()} / {runsTotal.toLocaleString()}
-
-
-
-
-
-
-
-
- Used in quota
-
-
-
- Remaining
-
-
-
- Over quota
-
+
+ Current usage
+
+
-
- {periodProgress ? (
- Usage period days: {periodProgress.elapsedDays} /{' '}
- {periodProgress.totalDays}
+ Runs: {usedRuns.toLocaleString()} /{' '}
+ {runsTotal.toLocaleString()}
+
-
- {periodProgress.remainingDays} day(s) remaining in current
- period
-
+
+
+ Used in quota
+
+
+
+ Remaining
+
+
+
+ Over quota
+
+
- ) : null}
-
-
- Wallet balance: {walletBalance.toLocaleString()}
-
-
- Spent credits in current period:{' '}
- {usedCredits.toLocaleString(undefined, {
- minimumFractionDigits: 2,
- maximumFractionDigits: 2,
- })}
-
-
- Wallet credits are additive on renewal and top-ups.
-
+ {periodProgress ? (
+
+
+ Usage period days: {periodProgress.elapsedDays} /{' '}
+ {periodProgress.totalDays}
+
+
+
+
+
+
+ {periodProgress.remainingDays} day(s) remaining in current
+ period
+
+
+ ) : null}
+
+
+
+ Wallet balance: {walletBalance.toLocaleString()}
+
+
+ Spent credits in current period:{' '}
+ {usedCredits.toLocaleString(undefined, {
+ minimumFractionDigits: 2,
+ maximumFractionDigits: 2,
+ })}
+
+
+ Wallet credits are additive on renewal and top-ups.
+
+
-
- {isCancellationScheduled && (
-
+ Plan will switch to Free at the end of the current period on{' '}
+ {subscriptionPeriodEndLabel}.
+
+ )}
+
- Plan will switch to Free at the end of the current period on{' '}
- {subscriptionPeriodEndLabel}.
-
- )}
-
- {subscriptionPortalUrl && (
+ {subscriptionPortalUrl && (
+
+ )}
- )}
-
- {canCancelSubscription && !cancelViewOpen && (
-
- )}
- {isIncompleteSubscription && !cancelViewOpen && (
- <>
+ {canCancelSubscription && !cancelViewOpen && (
+
+ )}
+ {isIncompleteSubscription && !cancelViewOpen && (
+ <>
+
+
+ >
+ )}
+ {isCancellationScheduled && (
-
- >
- )}
- {isCancellationScheduled && (
-
+
+ Next step:{' '}
+ {isCancellationScheduled
+ ? 'Your plan is already scheduled to switch at period end. You can keep using it until then.'
+ : isIncompleteSubscription
+ ? 'Your payment is pending. Open the in-app cancel view below to cancel this plan change or continue with payment.'
+ : isPaidSubscription
+ ? 'Keep your plan active. You can top-up credits any time.'
+ : 'Top-up credits are available on Free and Team plans.'}
+
+ {cancelViewOpen && (
+
- {resumeSubscriptionMutation.isPending
- ? 'Resuming...'
- : 'Resume plan'}
-
+
+ {isIncompleteSubscription
+ ? 'Cancel pending plan change'
+ : 'Downgrade to Free Plan'}
+
+
+ {isIncompleteSubscription
+ ? 'This pending plan change will be canceled immediately.'
+ : 'Your plan will switch at the end of the current usage period.'}
+
+
+
+
+
+
)}
-
- Next step:{' '}
- {isCancellationScheduled
- ? 'Your plan is already scheduled to switch at period end. You can keep using it until then.'
- : isIncompleteSubscription
- ? 'Your payment is pending. Open the in-app cancel view below to cancel this plan change or continue with payment.'
- : isPaidSubscription
- ? 'Keep your plan active. You can top-up credits any time.'
- : 'Top-up credits are available on Free and Team plans.'}
+
+
+ ) : null;
+
+ const currentPlanSection = isPaidSubscription ? (
+
+
+
+ Current plan
+
+
+ {String(currentSubscriptionPlan)}
+
+
+ You are currently on {String(currentSubscriptionPlan)}.
+
+ {currentPlanPriceLabel !== 'N/A' && (
+
+ {currentPlanPriceLabel}
- {cancelViewOpen && (
+ )}
+ {displaySubscriptionStatus && (
+
+
+
+ )}
+
+ {isCancellationScheduled ? (
+
+ Your downgrade to Free Plan is scheduled at period end on{' '}
+ {subscriptionPeriodEndLabel}.
+
+ ) : null}
+
+
+ {isCancellationScheduled
+ ? 'Possible action: Resume Team Plan.'
+ : 'Possible action: Downgrade to Free Plan.'}
+
+
+
+ {canCancelSubscription && !cancelViewOpen && (
+
+ )}
+ {isCancellationScheduled && (
+
+ )}
+
+
+ {cancelViewOpen && (
+
+
+ Downgrade to Free Plan
+
+
+ Your plan will switch at the end of the current usage period.
+
-
- {isIncompleteSubscription
- ? 'Cancel pending plan change'
- : 'Downgrade to Free Plan'}
-
-
- {isIncompleteSubscription
- ? 'This pending plan change will be canceled immediately.'
- : 'Your plan will switch at the end of the current usage period.'}
-
- void onConfirmCancelSubscription()}
+ disabled={cancelSubscriptionMutation.isPending}
>
-
-
-
+ {cancelSubscriptionMutation.isPending
+ ? 'Downgrading...'
+ : 'Confirm downgrade'}
+
+
- )}
-
+
+ )}
) : null;
@@ -1540,13 +1692,7 @@ export function StripeCheckout({
'Cancel',
),
),
- checkoutType === 'resume'
- ? createElement(
- Flash,
- { variant: 'warning' },
- 'Enter a new payment card to resume your plan.',
- )
- : null,
+ null,
createElement(
Elements,
{
@@ -1623,22 +1769,32 @@ export function StripeCheckout({
padding: 'var(--stack-padding-normal)',
display: 'grid',
gap: 'var(--stack-gap-normal)',
- gridTemplateColumns: ['1fr', 'minmax(0, 1fr) minmax(0, 1fr)'],
- alignItems: 'start',
+ gridTemplateColumns:
+ shouldShowMonthlySubscriptionSection || currentPlanSection
+ ? ['1fr', 'minmax(0, 1fr) minmax(0, 1fr)']
+ : ['1fr'],
+ alignItems: 'stretch',
}}
>
+ {shouldShowMonthlySubscriptionSection ? (
+
+ {monthlySubscriptionSection}
+
+ ) : null}
+ {currentPlanSection}
- {monthlySubscriptionSection}
-
-
{topUpSection}
diff --git a/src/hooks/useCache.ts b/src/hooks/useCache.ts
index 4573bed0..0e966bf3 100644
--- a/src/hooks/useCache.ts
+++ b/src/hooks/useCache.ts
@@ -5496,7 +5496,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
queryFn: async () => {
const resp = await requestDatalayer({
url: withAccountUidQuery(
- `${configuration.iamRunUrl}/api/iam/v1/subscription/plans`,
+ `${configuration.iamRunUrl}/api/iam/v1/plans/catalog`,
scope?.accountUid,
),
method: 'GET',
@@ -5561,7 +5561,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
queryFn: async () => {
return requestDatalayer({
url: withAccountUidQuery(
- `${configuration.iamRunUrl}/api/iam/v1/subscription`,
+ `${configuration.iamRunUrl}/api/iam/v1/plans`,
scope?.accountUid,
),
method: 'GET',
@@ -5581,7 +5581,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
queryKey: ['subscription', 'eligible-accounts'],
queryFn: async () => {
const resp = await requestDatalayer({
- url: `${configuration.iamRunUrl}/api/iam/v1/subscription/eligible-accounts`,
+ url: `${configuration.iamRunUrl}/api/iam/v1/plans/eligible-accounts`,
method: 'GET',
});
return resp.accounts || [];
@@ -5600,7 +5600,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
mutationFn: async () => {
return requestDatalayer({
url: withAccountUidQuery(
- `${configuration.iamRunUrl}/api/iam/v1/subscription/cancel`,
+ `${configuration.iamRunUrl}/api/iam/v1/plans/cancel`,
scope?.accountUid,
),
method: 'POST',
@@ -5624,7 +5624,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
mutationFn: async () => {
return requestDatalayer({
url: withAccountUidQuery(
- `${configuration.iamRunUrl}/api/iam/v1/subscription/resume`,
+ `${configuration.iamRunUrl}/api/iam/v1/plans/resume`,
scope?.accountUid,
),
method: 'POST',
@@ -5649,7 +5649,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
queryKey: ['subscription', 'admin', userId],
queryFn: async () => {
return requestDatalayer({
- url: `${configuration.iamRunUrl}/api/iam/v1/subscription/admin/${userId}`,
+ url: `${configuration.iamRunUrl}/api/iam/v1/plans/admin/${userId}`,
method: 'GET',
});
},
@@ -5667,7 +5667,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
return useMutation({
mutationFn: async (userId: string) => {
return requestDatalayer({
- url: `${configuration.iamRunUrl}/api/iam/v1/subscription/admin/${userId}/reset`,
+ url: `${configuration.iamRunUrl}/api/iam/v1/plans/admin/${userId}/reset`,
method: 'POST',
});
},
diff --git a/src/models/Profile.ts b/src/models/Profile.ts
index fbe41d5f..1353a898 100644
--- a/src/models/Profile.ts
+++ b/src/models/Profile.ts
@@ -48,7 +48,7 @@ export interface Profile {
/** Customer UID */
customer_uid?: string | null;
/** Credits customer UID for billing */
- credits_customer_uid?: string | null;
+ stripe_customer_id_s?: string | null;
/** Email unsubscription status */
unsubscribed_from_outbounds_b?: boolean;
/** Linked contact UID */
diff --git a/src/models/User.ts b/src/models/User.ts
index 56ee8b9f..1ca8a3fc 100644
--- a/src/models/User.ts
+++ b/src/models/User.ts
@@ -77,7 +77,7 @@ export class User implements IUser {
this.origin = u.origin_s;
this.joinDate = u.join_ts_dt ? new Date(u.join_ts_dt) : undefined;
this.credits = u.credits_i ? Number(u.credits_i) : 0;
- this.creditsCustomerId = u.credits_customer_uid;
+ this.creditsCustomerId = u.stripe_customer_id_s;
this.roles = u.roles_ss ?? [];
let iamProviders = [];
try {
From 8f90afa33356452347e00613075303e6d624eb31 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Thu, 14 May 2026 18:44:13 +0200
Subject: [PATCH 02/49] feat: stripe
---
src/components/checkout/StripeCheckout.tsx | 391 +++++++++++++++++----
src/hooks/useCache.ts | 94 ++++-
2 files changed, 415 insertions(+), 70 deletions(-)
diff --git a/src/components/checkout/StripeCheckout.tsx b/src/components/checkout/StripeCheckout.tsx
index 2afa540b..e647dc07 100644
--- a/src/components/checkout/StripeCheckout.tsx
+++ b/src/components/checkout/StripeCheckout.tsx
@@ -58,6 +58,10 @@ export interface IPrice {
* Computational credits to receive
*/
credits: number;
+ /**
+ * Whether this price is the server-selected default option
+ */
+ default?: boolean;
}
export interface ISubscriptionPlan {
@@ -69,11 +73,23 @@ export interface ISubscriptionPlan {
included_runs?: number;
}
+type TopUpConfirmation = {
+ purchasedCredits: number;
+ oldWalletBalance: number;
+ newWalletBalance: number;
+ oldAvailableCredits: number;
+ newAvailableCredits: number;
+};
+
export type StripeCheckoutProps = {
checkoutPortal: ICheckoutPortal | null;
appearance?: StripeElementsOptions['appearance'];
accountUid?: string;
showStatusUsageSummary?: boolean;
+ onCheckoutSuccess?: (event: {
+ checkoutType: 'topup' | 'subscription' | 'resume';
+ purchasedCredits?: number;
+ }) => void;
};
const PLAN_INCLUDED_RUNS_DEFAULTS: Record = {
@@ -312,6 +328,7 @@ export function StripeCheckout({
appearance,
accountUid,
showStatusUsageSummary = false,
+ onCheckoutSuccess,
}: StripeCheckoutProps) {
const {
useCreateTopUpPaymentIntent,
@@ -334,11 +351,37 @@ export function StripeCheckout({
'topup' | 'subscription' | 'resume'
>('topup');
const [cancelViewOpen, setCancelViewOpen] = useState(false);
+ const [isConfirmingCancel, setIsConfirmingCancel] = useState(false);
+ const [isResumingTransition, setIsResumingTransition] = useState(false);
const [paymentMessage, setPaymentMessage] = useState(null);
+ const [resumeConfirmationMessage, setResumeConfirmationMessage] = useState<
+ string | null
+ >(null);
+ const [isReturningFromCheckout, setIsReturningFromCheckout] = useState(false);
+ const [topUpConfirmation, setTopUpConfirmation] =
+ useState(null);
+ const [pendingTopUpTarget, setPendingTopUpTarget] = useState<{
+ targetWalletBalance: number;
+ } | null>(null);
+ const topUpPurchaseRef = useRef<{
+ purchasedCredits: number;
+ oldWalletBalance: number;
+ oldAvailableCredits: number;
+ } | null>(null);
// Get Stripe prices using TanStack Query hook
- const { data: pricesData } = useTopUpPrices();
- const items = (pricesData as IPrice[] | undefined) ?? null;
+ const {
+ data: pricesData,
+ isPending: isTopUpPricesPending,
+ isError: isTopUpPricesError,
+ error: topUpPricesError,
+ } = useTopUpPrices();
+ const items = useMemo(() => {
+ if (Array.isArray(pricesData)) {
+ return pricesData as IPrice[];
+ }
+ return [];
+ }, [pricesData]);
const sortedTopUpItems = useMemo(
() =>
[...(items ?? [])].sort(
@@ -405,12 +448,14 @@ export function StripeCheckout({
setProduct(null);
setSubscriptionPlan(null);
setPaymentMessage(null);
+ setIsReturningFromCheckout(true);
if (checkoutType === 'resume') {
try {
const resp = await resumeSubscriptionMutation.mutateAsync();
setPaymentMessage(
resp?.message || 'Payment confirmed and plan resumed successfully.',
);
+ onCheckoutSuccess?.({ checkoutType: 'resume' });
} catch (error) {
setPaymentMessage(
error instanceof Error
@@ -418,6 +463,7 @@ export function StripeCheckout({
: 'Payment confirmed, but unable to resume your plan right now.',
);
}
+ setIsReturningFromCheckout(false);
return;
}
if (checkoutType === 'subscription') {
@@ -436,12 +482,53 @@ export function StripeCheckout({
setPaymentMessage(
'Plan payment confirmed. Your plan status may take a few seconds to refresh.',
);
+ onCheckoutSuccess?.({ checkoutType: 'subscription' });
} else {
+ const topUpPurchase = topUpPurchaseRef.current;
+ const purchasedCredits = topUpPurchase?.purchasedCredits || 0;
+ if (topUpPurchase && topUpPurchase.purchasedCredits > 0) {
+ const targetWalletBalance =
+ topUpPurchase.oldWalletBalance + topUpPurchase.purchasedCredits;
+ setTopUpConfirmation({
+ purchasedCredits: topUpPurchase.purchasedCredits,
+ oldWalletBalance: topUpPurchase.oldWalletBalance,
+ newWalletBalance: targetWalletBalance,
+ oldAvailableCredits: topUpPurchase.oldAvailableCredits,
+ newAvailableCredits:
+ topUpPurchase.oldAvailableCredits + topUpPurchase.purchasedCredits,
+ });
+ setPendingTopUpTarget({
+ targetWalletBalance,
+ });
+ }
+
+ for (let attempt = 0; attempt < 5; attempt += 1) {
+ try {
+ await refetchSubscriptionStatus();
+ } catch {
+ // Keep confirmation visible even if refresh fails transiently.
+ }
+ if (attempt < 4) {
+ await new Promise(resolve => setTimeout(resolve, 800));
+ }
+ }
+
setPaymentMessage(
'Payment confirmed. Credits update may take a few seconds.',
);
+ onCheckoutSuccess?.({
+ checkoutType: 'topup',
+ purchasedCredits,
+ });
+ topUpPurchaseRef.current = null;
}
- }, [checkoutType, refetchSubscriptionStatus, resumeSubscriptionMutation]);
+ setIsReturningFromCheckout(false);
+ }, [
+ checkoutType,
+ onCheckoutSuccess,
+ refetchSubscriptionStatus,
+ resumeSubscriptionMutation,
+ ]);
const subscription = subscriptionResp?.plan || null;
const availablePlans = useMemo(() => {
@@ -636,6 +723,12 @@ export function StripeCheckout({
const walletBalance = walletIsQuota
? Math.max(0, remainingCredits)
: Math.max(0, walletBalanceRaw);
+ const displayedWalletBalance = pendingTopUpTarget
+ ? Math.max(walletBalance, pendingTopUpTarget.targetWalletBalance)
+ : walletBalance;
+ const displayedAvailableCredits = pendingTopUpTarget
+ ? Math.max(remainingCredits, pendingTopUpTarget.targetWalletBalance)
+ : remainingCredits;
const isRunsOverQuota = runsTotal > 0 && usedRuns > runsTotal;
const hasBillablePlan = useMemo(() => {
@@ -684,6 +777,18 @@ export function StripeCheckout({
return !nonCancelable;
}, [hasBillablePlan, subscriptionStatus, isCancellationScheduled]);
+ const isCancelActionPending =
+ cancelSubscriptionMutation.isPending || isConfirmingCancel;
+ const isResumeActionPending =
+ resumeSubscriptionMutation.isPending || isResumingTransition;
+ const showResumeAction = isCancellationScheduled && !isCancelActionPending;
+
+ useEffect(() => {
+ if (isResumingTransition && !isCancellationScheduled) {
+ setIsResumingTransition(false);
+ }
+ }, [isCancellationScheduled, isResumingTransition]);
+
useEffect(() => {
if (isPaidSubscription && paymentMessage) {
setPaymentMessage(null);
@@ -698,10 +803,21 @@ export function StripeCheckout({
useEffect(() => {
if (!product && sortedTopUpItems.length > 0) {
- setProduct(sortedTopUpItems[sortedTopUpItems.length - 1]);
+ const secondCard =
+ sortedTopUpItems.length > 1 ? sortedTopUpItems[1] : sortedTopUpItems[0];
+ setProduct(secondCard);
}
}, [product, sortedTopUpItems]);
+ useEffect(() => {
+ if (!pendingTopUpTarget) {
+ return;
+ }
+ if (walletBalance >= pendingTopUpTarget.targetWalletBalance) {
+ setPendingTopUpTarget(null);
+ }
+ }, [pendingTopUpTarget, walletBalance]);
+
// Auto-open the in-app cancel/downgrade view when the page is opened with
// `?action=downgrade` (e.g. from the Plan Overview "Downgrade" CTA).
// When opened with `?action=resume`, immediately trigger the resume flow.
@@ -732,6 +848,12 @@ export function StripeCheckout({
if (!product) {
return;
}
+ topUpPurchaseRef.current = {
+ purchasedCredits: Math.max(0, Number(product.credits || 0)),
+ oldWalletBalance: displayedWalletBalance,
+ oldAvailableCredits: displayedAvailableCredits,
+ };
+ setTopUpConfirmation(null);
setPaymentMessage(null);
setCheckoutType('topup');
setCheckout(true);
@@ -753,11 +875,17 @@ export function StripeCheckout({
error instanceof Error
? error.message
: 'Unable to initialize Stripe checkout. Please try again.';
+ topUpPurchaseRef.current = null;
setPaymentClientSecret(null);
setCheckout(false);
setPaymentMessage(detail);
}
- }, [topUpPaymentIntentMutation, product]);
+ }, [
+ displayedAvailableCredits,
+ displayedWalletBalance,
+ topUpPaymentIntentMutation,
+ product,
+ ]);
const startSubscriptionCheckout = useCallback(
async (planOverride?: ISubscriptionPlan | null) => {
@@ -829,6 +957,7 @@ export function StripeCheckout({
const onCancelSubscription = useCallback(() => {
setPaymentMessage(null);
+ setResumeConfirmationMessage(null);
setCancelViewOpen(true);
}, []);
@@ -838,6 +967,7 @@ export function StripeCheckout({
const onConfirmCancelSubscription = useCallback(async () => {
setPaymentMessage(null);
+ setIsConfirmingCancel(true);
try {
const resp = await cancelSubscriptionMutation.mutateAsync();
if (resp?.success === false) {
@@ -846,19 +976,6 @@ export function StripeCheckout({
);
}
- // Refresh plan status so stale "incomplete" snapshots disappear
- // as soon as cancellation is applied upstream.
- for (let attempt = 0; attempt < 5; attempt += 1) {
- try {
- await refetchSubscriptionStatus();
- } catch {
- // Ignore transient refetch errors and keep trying.
- }
- if (attempt < 4) {
- await new Promise(resolve => setTimeout(resolve, 800));
- }
- }
-
const responseStatus = String(resp?.status || '').toLowerCase();
const responseCancelAtPeriodEnd = Boolean(resp?.cancel_at_period_end);
const isNowCanceled =
@@ -876,7 +993,23 @@ export function StripeCheckout({
'Plan change requested successfully.',
);
setCancelViewOpen(false);
+ setIsConfirmingCancel(false);
+
+ // Refresh plan status in the background so UI feedback is immediate.
+ void (async () => {
+ for (let attempt = 0; attempt < 5; attempt += 1) {
+ try {
+ await refetchSubscriptionStatus();
+ } catch {
+ // Ignore transient refetch errors and keep trying.
+ }
+ if (attempt < 4) {
+ await new Promise(resolve => setTimeout(resolve, 800));
+ }
+ }
+ })();
} catch (error) {
+ setIsConfirmingCancel(false);
setPaymentMessage(
error instanceof Error
? error.message
@@ -891,6 +1024,8 @@ export function StripeCheckout({
const onResumeSubscription = useCallback(async () => {
setPaymentMessage(null);
+ setResumeConfirmationMessage(null);
+ setIsResumingTransition(true);
try {
const resp = await resumeSubscriptionMutation.mutateAsync();
if (resp?.success === false) {
@@ -899,28 +1034,44 @@ export function StripeCheckout({
);
}
- for (let attempt = 0; attempt < 5; attempt += 1) {
- try {
- await refetchSubscriptionStatus();
- } catch {
- // Ignore transient refetch errors and keep trying.
- }
- if (attempt < 4) {
- await new Promise(resolve => setTimeout(resolve, 800));
- }
- }
-
setCheckout(false);
setPaymentClientSecret(null);
- setPaymentMessage(resp?.message || 'Plan resumed successfully.');
+ setPaymentMessage(null);
+ const periodEndText =
+ subscriptionPeriodEndLabel && subscriptionPeriodEndLabel !== 'N/A'
+ ? ` through ${subscriptionPeriodEndLabel}`
+ : '';
+ setResumeConfirmationMessage(
+ `Resume complete. Your plan remains active${periodEndText} and will renew automatically after that date.`,
+ );
+ setIsResumingTransition(false);
+
+ // Refresh plan status in the background so success feedback appears fast.
+ void (async () => {
+ for (let attempt = 0; attempt < 5; attempt += 1) {
+ try {
+ await refetchSubscriptionStatus();
+ } catch {
+ // Ignore transient refetch errors and keep trying.
+ }
+ if (attempt < 4) {
+ await new Promise(resolve => setTimeout(resolve, 800));
+ }
+ }
+ })();
} catch (error) {
+ setIsResumingTransition(false);
setPaymentMessage(
error instanceof Error
? error.message
: 'Unable to resume your plan right now.',
);
}
- }, [refetchSubscriptionStatus, resumeSubscriptionMutation]);
+ }, [
+ refetchSubscriptionStatus,
+ resumeSubscriptionMutation,
+ subscriptionPeriodEndLabel,
+ ]);
const onRefreshSubscriptionStatus = useCallback(async () => {
setPaymentMessage(null);
@@ -976,10 +1127,37 @@ export function StripeCheckout({
Choose a monthly plan
{isIncompleteSubscription ? (
-
- A pending plan change already exists. Complete payment or cancel it
- from the billing portal before creating a new one.
-
+ <>
+
+ A pending plan change already exists. Complete payment or cancel it
+ from the billing portal before creating a new one.
+
+
+
+
+
+ >
) : !isPaidSubscription ? (
<>
+ {topUpConfirmation ? (
+
+
+ Top-up confirmed: +
+ {topUpConfirmation.purchasedCredits.toLocaleString()} credits
+
+
+ {`Wallet balance: ${topUpConfirmation.oldWalletBalance.toLocaleString()} to ${topUpConfirmation.newWalletBalance.toLocaleString()}`}
+
+
+ {`Available credits: ${topUpConfirmation.oldAvailableCredits.toLocaleString()} to ${topUpConfirmation.newAvailableCredits.toLocaleString()}`}
+
+
+ ) : null}
);
@@ -1337,7 +1529,7 @@ export function StripeCheckout({
as="p"
sx={{ marginBottom: 'var(--stack-gap-condensed)' }}
>
- Wallet balance: {walletBalance.toLocaleString()}
+ Wallet balance: {displayedWalletBalance.toLocaleString()}
Spent credits in current period:{' '}
@@ -1409,15 +1601,16 @@ export function StripeCheckout({
>
)}
- {isCancellationScheduled && (
+ {showResumeAction && (
)}
@@ -1467,12 +1660,17 @@ export function StripeCheckout({
@@ -217,10 +217,10 @@ Key platform features accessible through this Client and CLI:
## Documentation
-- **Command Line Interface (CLI)**: [https://docs.datalayer.app/cli/](https://docs.datalayer.app/cli/)
+- **Command Line Interface (CLI)**: [https://datalayer.ai/docs/cli/](https://datalayer.ai/docs/cli/)
- **Core Python Client**: [core.datalayer.tech/python/](https://core.datalayer.tech/python/)
-- **Platform Documentation**: [docs.datalayer.app](https://docs.datalayer.app/)
-- **API Reference**: [API documentation](https://docs.datalayer.app/api/)
+- **Platform Documentation**: [docs.datalayer.app](https://datalayer.ai/docs/)
+- **API Reference**: [API documentation](https://datalayer.ai/docs/api/)
## Development
@@ -317,7 +317,7 @@ This project is licensed under the [BSD 3-Clause License](https://github.com/dat
## Support
-- **Documentation**: [Datalayer Platform Documentation](https://docs.datalayer.app/)
+- **Documentation**: [Datalayer Platform Documentation](https://datalayer.ai/docs/)
- **Issues**: [GitHub Issues](https://github.com/datalayer/core/issues)
- **Community**: [Datalayer Platform](https://datalayer.app/)
diff --git a/datalayer_core/assets/about.md b/datalayer_core/assets/about.md
index 46eafccc..5cf726bd 100644
--- a/datalayer_core/assets/about.md
+++ b/datalayer_core/assets/about.md
@@ -1,5 +1,5 @@
## About
-Datalayer provides a command line tool allowing to list, create, terminate and open a console with runtimes.
+Datalayer is a managed AI agents platform for collaborative data analysis, designed to eliminate vendor lock-in.
-Read more on https://docs.datalayer.app
+Read more on https://datalayer.ai/docs
diff --git a/datalayer_core/base/serverapplication.py b/datalayer_core/base/serverapplication.py
index 2d88c0bc..0a00ee97 100644
--- a/datalayer_core/base/serverapplication.py
+++ b/datalayer_core/base/serverapplication.py
@@ -129,7 +129,7 @@ class Brand(Configurable):
)
docs_url = Unicode(
- "https://docs.datalayer.app",
+ "https://datalayer.ai/docs",
config=True,
help=("Documentation URL."),
)
diff --git a/datalayer_core/cli/commands/about.py b/datalayer_core/cli/commands/about.py
index 823ef578..a6a47c2e 100644
--- a/datalayer_core/cli/commands/about.py
+++ b/datalayer_core/cli/commands/about.py
@@ -8,10 +8,17 @@
import typer
from rich.console import Console
from rich.markdown import Markdown
+from rich.text import Text
# Create a Typer app for the about command
app = typer.Typer()
+FOOTER_ANSI = (
+ "\n"
+ "\033[0;32mā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·ā·\033[0m "
+ "\033[1;93mā° DATA\033[0m\033[1;92mLAYER\033[0m"
+)
+
@app.command()
def about() -> None:
@@ -24,6 +31,7 @@ def about() -> None:
with open(about_file_path) as readme:
markdown = Markdown(readme.read())
console.print(markdown)
+ console.print(Text.from_ansi(FOOTER_ANSI))
except FileNotFoundError:
console.print(f"[red]Error: Could not find about.md at {about_file_path}[/red]")
raise typer.Exit(1)
diff --git a/datalayer_core/cli/commands/ray.py b/datalayer_core/cli/commands/ray.py
index 643b975c..d467e7c9 100644
--- a/datalayer_core/cli/commands/ray.py
+++ b/datalayer_core/cli/commands/ray.py
@@ -147,6 +147,17 @@ def _normalize_logs_text(value: Any) -> str:
return text
+def _format_scope_label(kind: str, handle: str, uid: str, fallback_kind: str) -> str:
+ scope_kind = (kind or fallback_kind).strip()
+ scope_handle = (handle or "").strip()
+ scope_uid = (uid or "").strip()
+ if scope_handle:
+ return f"{scope_kind}: @{scope_handle}"
+ if scope_uid:
+ return f"{scope_kind}: {scope_uid}"
+ return ""
+
+
@clusters_app.command(name="list")
@clusters_app.command(name="ls")
def clusters_list(
@@ -166,18 +177,35 @@ def clusters_list(
table.add_column("Namespace")
table.add_column("State")
table.add_column("Workers")
+ table.add_column("Principal")
+ table.add_column("Billable")
for item in items:
metadata = item.get("metadata") or {}
status = item.get("status") or {}
+ ownership = item.get("ownership") or {}
desired = status.get("desiredWorkerReplicas")
available = status.get("availableWorkerReplicas")
workers = f"{available}/{desired}" if desired is not None else str(available or "")
+ principal = _format_scope_label(
+ str(item.get("principal_kind") or ownership.get("principal_kind") or ""),
+ str(item.get("principal_handle") or ownership.get("principal_handle") or ""),
+ str(item.get("principal_uid") or ownership.get("principal_uid") or ""),
+ "principal",
+ )
+ billable = _format_scope_label(
+ str(item.get("billable_account_kind") or ownership.get("billable_account_kind") or ""),
+ str(item.get("billable_account_handle") or ownership.get("billable_account_handle") or ""),
+ str(item.get("billable_account_uid") or ownership.get("billable_account_uid") or ""),
+ "account",
+ )
table.add_row(
str(metadata.get("name", "")),
str(metadata.get("namespace", namespace)),
str(status.get("state", "")),
workers,
+ principal,
+ billable,
)
console.print(table)
diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js
index ccc3b224..e9dad98f 100644
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@@ -160,7 +160,7 @@ module.exports = {
},
{
label: 'Datalayer Docs',
- href: 'https://docs.datalayer.app',
+ href: 'https://datalayer.ai/docs',
},
{
label: 'Datalayer Blog',
diff --git a/examples/README.md b/examples/README.md
index 44d60e6e..7c300469 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -87,7 +87,7 @@ This project is licensed under the MIT License - see the [LICENSE](../../LICENSE
## Support
-- **Documentation**: [Datalayer Platform Documentation](https://docs.datalayer.app/)
+- **Documentation**: [Datalayer Platform Documentation](https://datalayer.ai/docs/)
- **Issues**: [GitHub Issues](https://github.com/datalayer/core/issues)
- **Community**: [Datalayer Platform](https://datalayer.app/)
diff --git a/examples/decorator/README.md b/examples/decorator/README.md
index 7396249e..a82ff012 100644
--- a/examples/decorator/README.md
+++ b/examples/decorator/README.md
@@ -210,7 +210,7 @@ This project is licensed under the MIT License - see the [LICENSE](../../LICENSE
## Support
-- **Documentation**: [Datalayer Platform Documentation](https://docs.datalayer.app/)
+- **Documentation**: [Datalayer Platform Documentation](https://datalayer.ai/docs/)
- **Issues**: [GitHub Issues](https://github.com/datalayer/core/issues)
- **Community**: [Datalayer Platform](https://datalayer.app/)
diff --git a/examples/fastapi/README.md b/examples/fastapi/README.md
index 66bb5b38..332da6f1 100644
--- a/examples/fastapi/README.md
+++ b/examples/fastapi/README.md
@@ -154,7 +154,7 @@ This project is licensed under the MIT License - see the [LICENSE](../../LICENSE
## Support
-- **Documentation**: [Datalayer Platform Documentation](https://docs.datalayer.app/)
+- **Documentation**: [Datalayer Platform Documentation](https://datalayer.ai/docs/)
- **Issues**: [GitHub Issues](https://github.com/datalayer/core/issues)
- **Community**: [Datalayer Platform](https://datalayer.app/)
diff --git a/examples/nextjs/README.md b/examples/nextjs/README.md
index b0fe1dd7..d6a8f7f3 100644
--- a/examples/nextjs/README.md
+++ b/examples/nextjs/README.md
@@ -278,7 +278,7 @@ This project is licensed under the Modified BSD License - see the [LICENSE](../.
## Support
-- **Documentation**: [Datalayer Platform Documentation](https://docs.datalayer.app/)
+- **Documentation**: [Datalayer Platform Documentation](https://datalayer.ai/docs/)
- **Issues**: [GitHub Issues](https://github.com/datalayer/core/issues)
- **Community**: [Datalayer Platform](https://datalayer.app/)
diff --git a/examples/nextjs/src/components/Footer.tsx b/examples/nextjs/src/components/Footer.tsx
index d55f5c25..349383e3 100644
--- a/examples/nextjs/src/components/Footer.tsx
+++ b/examples/nextjs/src/components/Footer.tsx
@@ -61,7 +61,7 @@ export default function Footer() {
{
};
},
...DEFAULT_QUERY_OPTIONS,
+ refetchOnMount: true,
enabled: Boolean(configuration.runtimesRunUrl),
});
};
+ const useRayCluster = (namespace = 'default', clusterName = '') => {
+ return useQuery({
+ queryKey: queryKeys.ray.cluster(namespace, clusterName),
+ queryFn: async () => {
+ const resp = await requestDatalayer({
+ url: `${configuration.runtimesRunUrl}/api/runtimes/v1/ray/clusters/${encodeURIComponent(clusterName)}?namespace=${encodeURIComponent(namespace)}`,
+ method: 'GET',
+ });
+ if (!resp.success) {
+ throw new Error(resp.message || 'Failed to fetch Ray cluster');
+ }
+ return resp.cluster || null;
+ },
+ ...DEFAULT_QUERY_OPTIONS,
+ enabled:
+ Boolean(configuration.runtimesRunUrl) &&
+ Boolean(namespace) &&
+ Boolean(clusterName),
+ });
+ };
+
const useCreateRayCluster = () => {
return useMutation({
mutationFn: async (payload: Record) => {
@@ -8974,6 +8996,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
// Ray (Runtimes)
useRayClusters,
+ useRayCluster,
useCreateRayCluster,
useDeleteRayCluster,
useRayJobs,
diff --git a/src/state/substates/CoreState.ts b/src/state/substates/CoreState.ts
index 0583f66b..3d95dc54 100644
--- a/src/state/substates/CoreState.ts
+++ b/src/state/substates/CoreState.ts
@@ -44,7 +44,7 @@ let initialConfiguration: IDatalayerCoreConfig = {
logoUrl: 'https://assets.datalayer.tech/datalayer-25.svg',
logoSquareUrl: 'https://assets.datalayer.tech/datalayer-square.png',
copyright: 'Ā© 2025 Datalayer, Inc',
- docsUrl: 'https://docs.datalayer.ai',
+ docsUrl: 'https://datalayer.ai/docs',
supportUrl: 'https://datalayer.ai/support',
termsUrl: 'https://datalayer.ai/terms',
pricingUrl: 'https://datalayer.ai/pricing',
From c9247135102177105b7593e7acb7a0dc14f3fee6 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Sat, 6 Jun 2026 12:04:54 +0200
Subject: [PATCH 45/49] utils
---
datalayer_core/cli/commands/ray.py | 5 +++++
src/hooks/useCache.ts | 8 +++++++-
src/utils/Name.ts | 15 +++++++++++++++
src/utils/Snapshot.ts | 4 ++--
4 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/datalayer_core/cli/commands/ray.py b/datalayer_core/cli/commands/ray.py
index d467e7c9..b9060c9e 100644
--- a/datalayer_core/cli/commands/ray.py
+++ b/datalayer_core/cli/commands/ray.py
@@ -242,6 +242,11 @@ def clusters_create(
client = _make_client(token=token)
result = client.ray_create_cluster(payload)
+ if result.get("success") is False:
+ reason = str(result.get("message") or result.get("reason") or "Unable to create cluster")
+ console.print(f"[red]Cluster creation failed:[/red] {reason}")
+ raise typer.Exit(code=1)
+
cluster = result.get("cluster") or {}
metadata = cluster.get("metadata") or {}
console.print(
diff --git a/src/hooks/useCache.ts b/src/hooks/useCache.ts
index 96e1110b..13c17ef6 100644
--- a/src/hooks/useCache.ts
+++ b/src/hooks/useCache.ts
@@ -8570,11 +8570,17 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
const useCreateRayCluster = () => {
return useMutation({
mutationFn: async (payload: Record) => {
- return requestDatalayer({
+ const resp = await requestDatalayer({
url: `${configuration.runtimesRunUrl}/api/runtimes/v1/ray/clusters`,
method: 'POST',
body: payload,
});
+ if (resp?.success === false) {
+ throw new Error(
+ resp?.message || resp?.reason || 'Failed to create Ray cluster',
+ );
+ }
+ return resp;
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: queryKeys.ray.all() });
diff --git a/src/utils/Name.ts b/src/utils/Name.ts
index 999c0dd8..084fac53 100644
--- a/src/utils/Name.ts
+++ b/src/utils/Name.ts
@@ -3,6 +3,17 @@
* Distributed under the terms of the Modified BSD License.
*/
+import {
+ adjectives,
+ animals,
+ uniqueNamesGenerator,
+} from 'unique-names-generator';
+
+const RANDOM_TIMESTAMP_NAME_CONFIGURATION = {
+ dictionaries: [adjectives, animals],
+ separator: '-',
+};
+
export const asDisplayName = (givenName: string, familyName: string) => {
return givenName
? familyName
@@ -38,3 +49,7 @@ export const toFriendlyName = (firstName?: string, lastName?: string) => {
}
return '';
};
+
+export const createRandomTimestampName = () => {
+ return `${uniqueNamesGenerator(RANDOM_TIMESTAMP_NAME_CONFIGURATION)}-${Date.now()}`;
+};
diff --git a/src/utils/Snapshot.ts b/src/utils/Snapshot.ts
index 0bdb1b69..64496ea9 100644
--- a/src/utils/Snapshot.ts
+++ b/src/utils/Snapshot.ts
@@ -12,7 +12,7 @@ import {
/**
* Kernel snapshot description configuration.
*/
-const KERNEL_SNAPSHOT_DESCRIPTION_CONFIGURATION = {
+const CODE_SANDBOX_SNAPSHOT_DESCRIPTION_CONFIGURATION = {
dictionaries: [adjectives, animals],
separator: '-',
};
@@ -24,5 +24,5 @@ const KERNEL_SNAPSHOT_DESCRIPTION_CONFIGURATION = {
* @returns The kernel snapshot name
*/
export function createSandboxSnapshotName(suffix: string): string {
- return `${uniqueNamesGenerator(KERNEL_SNAPSHOT_DESCRIPTION_CONFIGURATION)}-${suffix}`;
+ return `${uniqueNamesGenerator(CODE_SANDBOX_SNAPSHOT_DESCRIPTION_CONFIGURATION)}-${suffix}`;
}
From eba8eff1d8eb12c22138c696a651c1f09ec94329 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Sat, 6 Jun 2026 13:33:19 +0200
Subject: [PATCH 46/49] cache
---
src/hooks/useCache.ts | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/src/hooks/useCache.ts b/src/hooks/useCache.ts
index 13c17ef6..4aced5dc 100644
--- a/src/hooks/useCache.ts
+++ b/src/hooks/useCache.ts
@@ -8658,6 +8658,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
jobName: string,
namespace = 'default',
tailLines = 200,
+ enabled = true,
) => {
return useQuery({
queryKey: queryKeys.ray.logs(namespace, jobName),
@@ -8668,6 +8669,8 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
const resp = await requestDatalayer({
url: `${configuration.runtimesRunUrl}/api/runtimes/v1/ray/jobs/${encodeURIComponent(jobName)}/logs?${params.toString()}`,
method: 'GET',
+ // Logs naturally 404 once a job is deleted; never surface a toast.
+ notifyOnError: false,
});
if (!resp.success) {
throw new Error(resp.message || 'Failed to fetch Ray job logs');
@@ -8678,7 +8681,21 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
};
},
...DEFAULT_QUERY_OPTIONS,
- enabled: Boolean(configuration.runtimesRunUrl) && Boolean(jobName),
+ enabled:
+ Boolean(configuration.runtimesRunUrl) && Boolean(jobName) && enabled,
+ refetchOnMount: 'always',
+ refetchInterval: query => {
+ const status = String(
+ (query.state.data as { status?: string } | undefined)?.status || '',
+ ).toLowerCase();
+ const terminal = [
+ 'succeeded',
+ 'failed',
+ 'stopped',
+ 'completed',
+ ].includes(status);
+ return terminal ? false : 3000;
+ },
});
};
From 390098261093fe0e590c7c5d67846e110fbf7e22 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Sat, 6 Jun 2026 20:25:56 +0200
Subject: [PATCH 47/49] evals
---
datalayer_core/cli/commands/evals.py | 1408 ++++++++++++++++++++++----
1 file changed, 1231 insertions(+), 177 deletions(-)
diff --git a/datalayer_core/cli/commands/evals.py b/datalayer_core/cli/commands/evals.py
index c6037afa..b7ace7f2 100644
--- a/datalayer_core/cli/commands/evals.py
+++ b/datalayer_core/cli/commands/evals.py
@@ -6,7 +6,9 @@
from __future__ import annotations
from datetime import datetime, timezone
+import csv
import json
+import math
import time
from pathlib import Path
from typing import Any, Optional
@@ -14,6 +16,7 @@
import typer
from rich.console import Console
from rich.table import Table
+from rich.tree import Tree
from datalayer_core.client.client import DatalayerClient
from datalayer_core.utils.urls import DatalayerURLs
@@ -24,7 +27,7 @@
invoke_without_command=True,
)
-evals_app = typer.Typer(name="evals", help="Manage evalsets.")
+evals_app = typer.Typer(name="evalsets", help="Manage evalsets.")
experiments_app = typer.Typer(name="experiments", help="Manage evalset experiments.")
runs_app = typer.Typer(name="runs", help="Launch and monitor evalset runs.")
live_app = typer.Typer(name="live", help="Inspect live evalset monitoring.")
@@ -36,6 +39,15 @@ def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+def _timestamp_slug(raw_iso: str) -> str:
+ cleaned = raw_iso.replace("-", "").replace(":", "").replace(".", "")
+ cleaned = cleaned.replace("+0000", "Z").replace("+00:00", "Z")
+ cleaned = cleaned.replace("T", "T")
+ if cleaned.endswith("Z"):
+ return cleaned
+ return f"{cleaned}Z"
+
+
def _parse_json_value(raw: Optional[str], flag_name: str) -> dict[str, Any]:
if not raw:
return {}
@@ -103,6 +115,12 @@ def _fmt_pct(raw: float | None) -> str:
return f"{raw * 100:.1f}%"
+def _style_text(value: str, style: str | None, colorize: bool) -> str:
+ if not colorize or not style:
+ return value
+ return f"[{style}]{value}[/{style}]"
+
+
def _compute_baseline_and_drift(runs: list[dict[str, Any]]) -> tuple[float | None, float | None, float | None]:
pass_rates = [rate for rate in (_run_pass_rate(run) for run in runs) if rate is not None]
if not pass_rates:
@@ -115,6 +133,117 @@ def _compute_baseline_and_drift(runs: list[dict[str, Any]]) -> tuple[float | Non
return baseline, latest, drift
+def _extract_failure_cause(run: dict[str, Any]) -> dict[str, Any] | None:
+ """Extract a structured failure cause from a run's report/summary payload."""
+ for container_key in ("report", "summary"):
+ container = run.get(container_key)
+ if isinstance(container, dict):
+ cause = container.get("failure_cause")
+ if isinstance(cause, dict) and cause:
+ return cause
+ # Fallback: synthesize a cause from legacy error fields.
+ summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
+ report = run.get("report") if isinstance(run.get("report"), dict) else {}
+ message = (
+ summary.get("failure_reason")
+ or summary.get("execution_error")
+ or report.get("error")
+ )
+ if isinstance(message, str) and message.strip():
+ return {
+ "stage": "unknown",
+ "type": "legacy_error",
+ "message": message.strip(),
+ }
+ return None
+
+
+def _format_failure_cause(cause: dict[str, Any] | None) -> str:
+ """Render a failure cause as a concise single-line string."""
+ if not isinstance(cause, dict) or not cause:
+ return ""
+ failure_type = str(cause.get("type") or "").strip()
+ message = str(cause.get("message") or "").strip()
+ parts: list[str] = []
+ if failure_type:
+ parts.append(f"[{failure_type}]")
+ if message:
+ parts.append(message)
+ return " ".join(parts).strip()
+
+
+def _failure_cause_detail_lines(cause: dict[str, Any]) -> list[str]:
+ """Render the full failure cause (message, context, diagnostics, attempts) as markdown lines."""
+ lines: list[str] = []
+ message = str(cause.get("message") or "").strip()
+ if message:
+ lines.append(f"- Message: {message}")
+ for key, label in (
+ ("stage", "Stage"),
+ ("type", "Type"),
+ ("runtime_pod_name", "Runtime pod"),
+ ("environment_name", "Environment"),
+ ("execution_url", "Execution URL"),
+ ):
+ value = str(cause.get(key) or "").strip()
+ if value:
+ lines.append(f"- {label}: `{value}`")
+
+ detail = str(cause.get("detail_excerpt") or "").strip()
+ if detail:
+ lines.append("- Detail excerpt:")
+ lines.append("")
+ lines.append("```text")
+ lines.extend(detail.splitlines() or [detail])
+ lines.append("```")
+
+ diagnostics = cause.get("diagnostics")
+ if isinstance(diagnostics, dict) and diagnostics:
+ for key, label in (
+ ("agent_runtimes_url", "Agent runtimes URL"),
+ ("run_url", "Run URL"),
+ ):
+ value = diagnostics.get(key)
+ if value:
+ lines.append(f"- {label}: `{value}`")
+ for key, label in (
+ ("route_ids", "Route IDs tried"),
+ ("discovered_agent_ids", "Discovered agent IDs"),
+ ("candidate_urls", "Candidate URLs"),
+ ):
+ value = diagnostics.get(key)
+ if isinstance(value, list) and value:
+ rendered = ", ".join(f"`{item}`" for item in value)
+ lines.append(f"- {label}: {rendered}")
+
+ attempts = diagnostics.get("attempts")
+ if isinstance(attempts, list) and attempts:
+ lines.append("- Connection attempts:")
+ attempt_rows: list[list[str]] = []
+ for attempt in attempts:
+ if not isinstance(attempt, dict):
+ continue
+ status_code = attempt.get("status_code")
+ attempt_rows.append(
+ [
+ str(attempt.get("url") or "-"),
+ "ok" if attempt.get("ok") else "failed",
+ "-" if status_code is None else str(status_code),
+ str(attempt.get("error") or "-"),
+ ]
+ )
+ if attempt_rows:
+ lines.append("")
+ lines.extend(
+ _markdown_table(
+ ["URL", "Result", "HTTP", "Error"],
+ attempt_rows,
+ ["left", "left", "right", "left"],
+ )
+ )
+ return lines
+
+
def _run_detail_record(run: dict[str, Any]) -> dict[str, Any]:
metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
@@ -128,8 +257,935 @@ def _run_detail_record(run: dict[str, Any]) -> dict[str, Any]:
"metrics": metrics,
"summary": summary,
"report": report,
+ "failure_cause": _extract_failure_cause(run),
+ }
+
+
+def _report_data(
+ client: DatalayerClient,
+ evalset_id: str,
+ run_limit: int,
+ account_uid: Optional[str],
+) -> dict[str, Any]:
+ experiments_payload = client.evals_list_experiments(
+ evalset_id=evalset_id,
+ limit=200,
+ offset=0,
+ account_uid=account_uid,
+ )
+ experiments = experiments_payload.get("experiments") or []
+
+ report: dict[str, Any] = {
+ "evalset_id": evalset_id,
+ "generated_at": _now_iso(),
+ "experiments": [],
}
+ for experiment in experiments:
+ experiment_id = str(experiment.get("id", ""))
+ experiment_name = str(experiment.get("name", experiment_id))
+
+ runs_payload = client.evals_list_runs(
+ experiment_id,
+ limit=run_limit,
+ offset=0,
+ account_uid=account_uid,
+ )
+ runs = runs_payload.get("runs") or []
+ total_runs = int(runs_payload.get("total") or len(runs))
+ baseline, latest, drift = _compute_baseline_and_drift(runs)
+
+ latest_two_delta: float | None = None
+ latest_two_run_ids: list[str] = []
+ latest_two_compare: dict[str, Any] | None = None
+ if len(runs) >= 2:
+ latest_two_run_ids = [str(runs[0].get("id", "")), str(runs[1].get("id", ""))]
+ compare_payload = client.evals_compare_runs(
+ latest_two_run_ids,
+ account_uid=account_uid,
+ )
+ compared_runs = compare_payload.get("runs") or []
+ compared_by_id = {
+ str(run.get("id", "")): run
+ for run in compared_runs
+ if isinstance(run, dict)
+ }
+ run_a = compared_by_id.get(latest_two_run_ids[0], runs[0])
+ run_b = compared_by_id.get(latest_two_run_ids[1], runs[1])
+ pass_a = _run_pass_rate(run_a)
+ pass_b = _run_pass_rate(run_b)
+ if pass_a is not None and pass_b is not None:
+ latest_two_delta = pass_a - pass_b
+ latest_two_compare = {
+ "run_ids": latest_two_run_ids,
+ "run_a": _run_detail_record(run_a),
+ "run_b": _run_detail_record(run_b),
+ "delta_pass_rate": latest_two_delta,
+ }
+
+ consecutive_comparisons: list[dict[str, Any]] = []
+ for idx in range(max(0, len(runs) - 1)):
+ run_a = runs[idx]
+ run_b = runs[idx + 1]
+ pass_a = _run_pass_rate(run_a)
+ pass_b = _run_pass_rate(run_b)
+ delta = None
+ if pass_a is not None and pass_b is not None:
+ delta = pass_a - pass_b
+ consecutive_comparisons.append(
+ {
+ "run_a_id": str(run_a.get("id", "")),
+ "run_b_id": str(run_b.get("id", "")),
+ "run_a_status": str(run_a.get("status", "")),
+ "run_b_status": str(run_b.get("status", "")),
+ "run_a_pass_rate": pass_a,
+ "run_b_pass_rate": pass_b,
+ "delta_pass_rate": delta,
+ }
+ )
+
+ pass_rates = [
+ _run_pass_rate(run)
+ for run in runs
+ if isinstance(_run_pass_rate(run), (int, float))
+ ]
+ numeric_pass_rates = [float(value) for value in pass_rates if isinstance(value, (int, float))]
+ mean_pass = sum(numeric_pass_rates) / len(numeric_pass_rates) if numeric_pass_rates else None
+ stddev_pass = None
+ if numeric_pass_rates:
+ variance = sum((value - mean_pass) ** 2 for value in numeric_pass_rates) / len(numeric_pass_rates)
+ stddev_pass = math.sqrt(variance)
+
+ report["experiments"].append(
+ {
+ "id": experiment_id,
+ "name": experiment_name,
+ "runs_total": total_runs,
+ "runs_fetched": len(runs),
+ "latest_pass_rate": latest,
+ "baseline_pass_rate": baseline,
+ "drift_delta": drift,
+ "latest_two_run_ids": latest_two_run_ids,
+ "latest_two_delta": latest_two_delta,
+ "latest_two_comparison": latest_two_compare,
+ "mean_pass_rate": mean_pass,
+ "stddev_pass_rate": stddev_pass,
+ "runs": [_run_detail_record(run) for run in runs],
+ "consecutive_comparisons": consecutive_comparisons,
+ }
+ )
+ return report
+
+
+def _ascii_bar(
+ value: float | None,
+ width: int = 28,
+ *,
+ full_blocks: bool = True,
+ colorize: bool = False,
+) -> str:
+ if value is None:
+ return "-"
+ bounded = max(0.0, min(1.0, float(value)))
+ filled = int(round(bounded * width))
+ fill_char = "ā" if full_blocks else "#"
+ empty_char = "ā" if full_blocks else "."
+ filled_part = fill_char * filled
+ empty_part = empty_char * (width - filled)
+ if not colorize:
+ return filled_part + empty_part
+ if bounded >= 0.85:
+ style = "green"
+ elif bounded >= 0.75:
+ style = "yellow"
+ else:
+ style = "red"
+ return _style_text(filled_part, style, True) + _style_text(empty_part, "grey39", True)
+
+
+def _fmt_pts(value: float) -> str:
+ return f"{value * 100:.1f}"
+
+
+def _ascii_histogram(
+ values: list[float],
+ *,
+ bins: int = 8,
+ width: int = 22,
+ min_value: float | None = None,
+ max_value: float | None = None,
+ full_blocks: bool = True,
+ colorize: bool = False,
+ drift_palette: bool = False,
+) -> list[str]:
+ if not values:
+ return ["n/a"]
+
+ lo = min_value if isinstance(min_value, (int, float)) else min(values)
+ hi = max_value if isinstance(max_value, (int, float)) else max(values)
+ if hi <= lo:
+ hi = lo + 1e-9
+
+ bins = max(2, bins)
+ counts = [0 for _ in range(bins)]
+ span = hi - lo
+ for value in values:
+ ratio = (value - lo) / span
+ idx = int(ratio * bins)
+ idx = max(0, min(bins - 1, idx))
+ counts[idx] += 1
+
+ peak = max(counts) if counts else 1
+ fill_char = "ā" if full_blocks else "#"
+ empty_char = "ā" if full_blocks else "."
+ lines: list[str] = []
+ for idx, count in enumerate(counts):
+ left = lo + (span * idx / bins)
+ right = lo + (span * (idx + 1) / bins)
+ filled = int(round((count / peak) * width)) if peak > 0 else 0
+ filled_part = fill_char * filled
+ empty_part = empty_char * (width - filled)
+ if colorize:
+ if drift_palette:
+ if right <= 0:
+ bar_style = "red"
+ elif left >= 0:
+ bar_style = "green"
+ else:
+ bar_style = "yellow"
+ elif peak > 0 and count / peak >= 0.67:
+ bar_style = "cyan"
+ elif peak > 0 and count / peak >= 0.34:
+ bar_style = "blue"
+ else:
+ bar_style = "magenta"
+ bar = _style_text(filled_part, bar_style, True) + _style_text(empty_part, "grey39", True)
+ else:
+ bar = filled_part + empty_part
+ lines.append(
+ f"{_fmt_pts(left):>6} to {_fmt_pts(right):>6} pts |{bar}| {count}"
+ )
+ return lines
+
+
+def _fmt_delta(value: float | None, *, colorize: bool = False) -> str:
+ if value is None:
+ return "n/a"
+ rendered = f"{value * 100:+.1f} pts"
+ if value > 0:
+ return _style_text(rendered, "green", colorize)
+ if value < 0:
+ return _style_text(rendered, "red", colorize)
+ return _style_text(rendered, "yellow", colorize)
+
+
+def _sparkline(values: list[float], *, colorize: bool = False) -> str:
+ if not values:
+ return "n/a"
+ ticks = "āāāāā
āāā"
+ lo = min(values)
+ hi = max(values)
+ if hi <= lo:
+ base = ticks[-2] * len(values)
+ else:
+ span = hi - lo
+ chars = []
+ for value in values:
+ idx = int(round(((value - lo) / span) * (len(ticks) - 1)))
+ idx = max(0, min(len(ticks) - 1, idx))
+ chars.append(ticks[idx])
+ base = "".join(chars)
+ if not colorize:
+ return base
+ if values[-1] >= 0.85:
+ style = "green"
+ elif values[-1] >= 0.75:
+ style = "yellow"
+ else:
+ style = "red"
+ return _style_text(base, style, True)
+
+
+def _pairwise_latest_deltas(experiments: list[dict[str, Any]]) -> list[dict[str, Any]]:
+ pairs: list[dict[str, Any]] = []
+ for idx, left in enumerate(experiments):
+ left_latest = left.get("latest_pass_rate")
+ if not isinstance(left_latest, (int, float)):
+ continue
+ for right in experiments[idx + 1 :]:
+ right_latest = right.get("latest_pass_rate")
+ if not isinstance(right_latest, (int, float)):
+ continue
+ pairs.append(
+ {
+ "left": str(left.get("name", "")),
+ "right": str(right.get("name", "")),
+ "left_latest": float(left_latest),
+ "right_latest": float(right_latest),
+ "delta": float(left_latest) - float(right_latest),
+ }
+ )
+ pairs.sort(key=lambda item: abs(item["delta"]), reverse=True)
+ return pairs
+
+
+def _markdown_table(headers: list[str], rows: list[list[str]], aligns: list[str]) -> list[str]:
+ widths = [len(header) for header in headers]
+ for row in rows:
+ for idx, cell in enumerate(row):
+ widths[idx] = max(widths[idx], len(cell))
+
+ def _pad(cell: str, width: int, align: str) -> str:
+ if align == "right":
+ return cell.rjust(width)
+ return cell.ljust(width)
+
+ header_line = "| " + " | ".join(headers[idx].ljust(widths[idx]) for idx in range(len(headers))) + " |"
+
+ sep_parts: list[str] = []
+ for idx, align in enumerate(aligns):
+ width = max(3, widths[idx])
+ if align == "right":
+ sep_parts.append("-" * (width - 1) + ":")
+ else:
+ sep_parts.append(":" + "-" * (width - 1))
+ sep_line = "| " + " | ".join(sep_parts) + " |"
+
+ body_lines = [
+ "| " + " | ".join(_pad(row[idx], widths[idx], aligns[idx]) for idx in range(len(headers))) + " |"
+ for row in rows
+ ]
+ return [header_line, sep_line, *body_lines]
+
+
+def _report_markdown(report: dict[str, Any], run_limit: int, *, colorize: bool = False) -> str:
+ evalset_id = str(report.get("evalset_id", ""))
+ generated_at = str(report.get("generated_at", ""))
+ experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
+
+ lines: list[str] = []
+ lines.append(f"# Evals Report: {evalset_id}")
+ lines.append("")
+ lines.append(f"- Generated at: {generated_at}")
+ lines.append(f"- Experiments: {len(experiments)}")
+ lines.append(f"- Run window per experiment: {run_limit}")
+ lines.append("")
+
+ lines.append("## Experiment Overview")
+ lines.append("")
+ overview_rows: list[list[str]] = []
+ for experiment in experiments:
+ runs_fetched = int(experiment.get("runs_fetched") or 0)
+ runs_total = int(experiment.get("runs_total") or 0)
+ overview_rows.append(
+ [
+ f"{experiment.get('name', '')}",
+ f"{runs_fetched}/{runs_total}",
+ _fmt_pct(experiment.get('latest_pass_rate') if isinstance(experiment.get('latest_pass_rate'), (int, float)) else None),
+ _fmt_pct(experiment.get('baseline_pass_rate') if isinstance(experiment.get('baseline_pass_rate'), (int, float)) else None),
+ _fmt_delta(experiment.get('drift_delta') if isinstance(experiment.get('drift_delta'), (int, float)) else None, colorize=colorize),
+ _fmt_delta(experiment.get('latest_two_delta') if isinstance(experiment.get('latest_two_delta'), (int, float)) else None, colorize=colorize),
+ ]
+ )
+ lines.extend(
+ _markdown_table(
+ ["Experiment", "Runs (fetched/total)", "Latest", "Baseline", "Drift", "Latest-2 Delta"],
+ overview_rows,
+ ["left", "right", "right", "right", "right", "right"],
+ )
+ )
+ lines.append("")
+
+ lines.append("## Comparison Combinations")
+ lines.append("")
+
+ ranked_latest = sorted(
+ [item for item in experiments if isinstance(item.get("latest_pass_rate"), (int, float))],
+ key=lambda item: float(item.get("latest_pass_rate") or 0.0),
+ reverse=True,
+ )
+ lines.append("### By Latest Pass Rate")
+ lines.append("")
+ latest_rows: list[list[str]] = []
+ for idx, item in enumerate(ranked_latest, start=1):
+ latest_rows.append([str(idx), f"{item.get('name', '')}", _fmt_pct(float(item.get('latest_pass_rate') or 0.0))])
+ lines.extend(_markdown_table(["Rank", "Experiment", "Latest"], latest_rows, ["right", "left", "right"]))
+ latest_values = [
+ float(item.get("latest_pass_rate"))
+ for item in ranked_latest
+ if isinstance(item.get("latest_pass_rate"), (int, float))
+ ]
+ lines.append("")
+ lines.append("Latest pass-rate histogram (pts):")
+ for hist_line in _ascii_histogram(
+ latest_values,
+ bins=8,
+ width=20,
+ min_value=0.0,
+ max_value=1.0,
+ full_blocks=True,
+ colorize=colorize,
+ ):
+ lines.append(f"`{hist_line}`")
+ lines.append("")
+
+ ranked_drift = sorted(
+ [item for item in experiments if isinstance(item.get("drift_delta"), (int, float))],
+ key=lambda item: float(item.get("drift_delta") or 0.0),
+ )
+ lines.append("### By Drift (Most Negative To Most Positive)")
+ lines.append("")
+ drift_rows: list[list[str]] = []
+ for idx, item in enumerate(ranked_drift, start=1):
+ drift_rows.append([str(idx), f"{item.get('name', '')}", _fmt_delta(float(item.get('drift_delta') or 0.0), colorize=colorize)])
+ lines.extend(_markdown_table(["Rank", "Experiment", "Drift"], drift_rows, ["right", "left", "right"]))
+ drift_values = [
+ float(item.get("drift_delta"))
+ for item in ranked_drift
+ if isinstance(item.get("drift_delta"), (int, float))
+ ]
+ lines.append("")
+ lines.append("Drift histogram (delta pts):")
+ for hist_line in _ascii_histogram(
+ drift_values,
+ bins=8,
+ width=20,
+ full_blocks=True,
+ colorize=colorize,
+ drift_palette=True,
+ ):
+ lines.append(f"`{hist_line}`")
+ lines.append("")
+
+ ranked_stability = sorted(
+ [item for item in experiments if isinstance(item.get("stddev_pass_rate"), (int, float))],
+ key=lambda item: float(item.get("stddev_pass_rate") or 0.0),
+ )
+ lines.append("### By Stability (Lowest Pass-Rate StdDev)")
+ lines.append("")
+ stability_rows: list[list[str]] = []
+ for idx, item in enumerate(ranked_stability, start=1):
+ stddev = item.get("stddev_pass_rate")
+ mean = item.get("mean_pass_rate")
+ stability_rows.append(
+ [
+ str(idx),
+ f"{item.get('name', '')}",
+ (f"{float(stddev) * 100:.2f} pts" if isinstance(stddev, (int, float)) else "n/a"),
+ (_fmt_pct(float(mean)) if isinstance(mean, (int, float)) else "n/a"),
+ ]
+ )
+ lines.extend(_markdown_table(["Rank", "Experiment", "StdDev", "Mean"], stability_rows, ["right", "left", "right", "right"]))
+ lines.append("")
+
+ pairwise = _pairwise_latest_deltas(experiments)
+ lines.append("### Pairwise Latest-Pass Deltas")
+ lines.append("")
+ pair_rows: list[list[str]] = []
+ for pair in pairwise:
+ pair_rows.append(
+ [
+ f"{pair['left']} vs {pair['right']}",
+ _fmt_pct(pair['left_latest']),
+ _fmt_pct(pair['right_latest']),
+ _fmt_delta(pair['delta'], colorize=colorize),
+ ]
+ )
+ if not pairwise:
+ pair_rows.append(["n/a", "n/a", "n/a", "n/a"])
+ lines.extend(
+ _markdown_table(
+ ["Pair", "Left Latest", "Right Latest", "Delta (Left-Right)"],
+ pair_rows,
+ ["left", "right", "right", "right"],
+ )
+ )
+ pair_deltas = [float(pair["delta"]) for pair in pairwise if isinstance(pair.get("delta"), (int, float))]
+ lines.append("")
+ lines.append("Pairwise latest-delta histogram (pts):")
+ for hist_line in _ascii_histogram(
+ pair_deltas,
+ bins=8,
+ width=20,
+ full_blocks=True,
+ colorize=colorize,
+ drift_palette=True,
+ ):
+ lines.append(f"`{hist_line}`")
+ lines.append("")
+
+ lines.append("### Insight Highlights")
+ lines.append("")
+ best_latest = ranked_latest[0] if ranked_latest else None
+ worst_latest = ranked_latest[-1] if ranked_latest else None
+ most_negative = ranked_drift[0] if ranked_drift else None
+ most_positive = ranked_drift[-1] if ranked_drift else None
+ most_stable = ranked_stability[0] if ranked_stability else None
+ if best_latest:
+ lines.append(
+ "- Top latest pass-rate: "
+ + f"{best_latest.get('name', '')} ({_fmt_pct(float(best_latest.get('latest_pass_rate') or 0.0))})."
+ )
+ if worst_latest:
+ lines.append(
+ "- Lowest latest pass-rate: "
+ + f"{worst_latest.get('name', '')} ({_fmt_pct(float(worst_latest.get('latest_pass_rate') or 0.0))})."
+ )
+ if most_positive:
+ drift_pos = float(most_positive.get("drift_delta") or 0.0)
+ lines.append(
+ "- Strongest positive drift: "
+ + f"{most_positive.get('name', '')} ({_fmt_delta(drift_pos, colorize=colorize)})."
+ )
+ if most_negative:
+ drift_neg = float(most_negative.get("drift_delta") or 0.0)
+ lines.append(
+ "- Strongest negative drift: "
+ + f"{most_negative.get('name', '')} ({_fmt_delta(drift_neg, colorize=colorize)})."
+ )
+ if most_stable:
+ std = most_stable.get("stddev_pass_rate")
+ mean = most_stable.get("mean_pass_rate")
+ lines.append(
+ "- Stability leader: "
+ + f"{most_stable.get('name', '')} "
+ + f"(stddev={(float(std) * 100):.2f} pts, mean={_fmt_pct(float(mean)) if isinstance(mean, (int, float)) else 'n/a'})."
+ )
+
+ drift_neg_count = len([value for value in drift_values if value < 0])
+ drift_flat_count = len([value for value in drift_values if value == 0])
+ drift_pos_count = len([value for value in drift_values if value > 0])
+ total = max(1, drift_neg_count + drift_flat_count + drift_pos_count)
+ neg_meter = "ā" * int(round((drift_neg_count / total) * 14))
+ flat_meter = "ā" * int(round((drift_flat_count / total) * 14))
+ pos_meter = "ā" * int(round((drift_pos_count / total) * 14))
+ neg_meter = neg_meter or "Ā·"
+ flat_meter = flat_meter or "Ā·"
+ pos_meter = pos_meter or "Ā·"
+ lines.append("")
+ lines.append("Drift balance meter:")
+ lines.append(
+ "`NEG "
+ + _style_text(neg_meter, "red", colorize)
+ + f" ({drift_neg_count}) | FLAT "
+ + _style_text(flat_meter, "yellow", colorize)
+ + f" ({drift_flat_count}) | POS "
+ + _style_text(pos_meter, "green", colorize)
+ + f" ({drift_pos_count})`"
+ )
+ lines.append("")
+
+ lines.append("## Per-Experiment Details")
+ lines.append("")
+ for experiment in experiments:
+ lines.append(f"### {experiment.get('name', '')}")
+ lines.append("")
+ lines.append("#### Run Timeline")
+ lines.append("")
+ run_rows: list[list[str]] = []
+ runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+ for idx, run in enumerate(runs, start=1):
+ pass_rate = run.get("pass_rate") if isinstance(run.get("pass_rate"), (int, float)) else None
+ cause_text = _format_failure_cause(run.get("failure_cause"))
+ run_rows.append(
+ [
+ str(idx),
+ str(run.get('id', '')),
+ str(run.get('status', '')),
+ _fmt_pct(float(pass_rate)) if isinstance(pass_rate, (int, float)) else 'n/a',
+ f"`{_ascii_bar(float(pass_rate), full_blocks=True, colorize=colorize) if isinstance(pass_rate, (int, float)) else '-'}`",
+ cause_text or "-",
+ ]
+ )
+ if not runs:
+ run_rows.append(["1", "n/a", "n/a", "n/a", "`-`", "-"])
+ lines.extend(_markdown_table(["#", "Run ID", "Status", "Pass Rate", "ASCII Trend", "Failure Cause"], run_rows, ["right", "left", "left", "right", "left", "left"]))
+ lines.append("")
+ failure_rows: list[list[str]] = []
+ for idx, run in enumerate(runs, start=1):
+ cause = run.get("failure_cause")
+ if not isinstance(cause, dict) or not cause:
+ continue
+ detail = str(cause.get("detail_excerpt") or "").strip()
+ detail_single = " ".join(detail.split())
+ if len(detail_single) > 240:
+ detail_single = detail_single[:237] + "..."
+ failure_rows.append(
+ [
+ str(idx),
+ str(run.get("id", "")),
+ str(cause.get("stage") or "-"),
+ str(cause.get("type") or "-"),
+ str(cause.get("message") or "-"),
+ detail_single or "-",
+ ]
+ )
+ if failure_rows:
+ lines.append("#### Failure Causes")
+ lines.append("")
+ lines.extend(
+ _markdown_table(
+ ["#", "Run ID", "Stage", "Type", "Message", "Detail Excerpt"],
+ failure_rows,
+ ["right", "left", "left", "left", "left", "left"],
+ )
+ )
+ lines.append("")
+ for idx, run in enumerate(runs, start=1):
+ cause = run.get("failure_cause")
+ if not isinstance(cause, dict) or not cause:
+ continue
+ detail_lines = _failure_cause_detail_lines(cause)
+ if not detail_lines:
+ continue
+ lines.append(f"Run {idx} failure detail ({run.get('id', '')})
")
+ lines.append("")
+ lines.extend(detail_lines)
+ lines.append("")
+ lines.append(" ")
+ lines.append("")
+ timeline_values = [
+ float(run.get("pass_rate"))
+ for run in runs
+ if isinstance(run.get("pass_rate"), (int, float))
+ ]
+ lines.append(
+ "Pass-rate sparkline: "
+ + f"`{_sparkline(timeline_values, colorize=colorize) if timeline_values else 'n/a'}`"
+ )
+ lines.append("")
+
+ comparisons = [
+ item for item in (experiment.get("consecutive_comparisons") or [])
+ if isinstance(item, dict)
+ ]
+ lines.append("#### Consecutive Run Deltas (A-B)")
+ lines.append("")
+ comparison_rows: list[list[str]] = []
+ for item in comparisons:
+ run_a = item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None
+ run_b = item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None
+ delta = item.get("delta_pass_rate") if isinstance(item.get("delta_pass_rate"), (int, float)) else None
+ comparison_rows.append(
+ [
+ str(item.get('run_a_id', '')),
+ str(item.get('run_b_id', '')),
+ _fmt_pct(float(run_a)) if isinstance(run_a, (int, float)) else 'n/a',
+ _fmt_pct(float(run_b)) if isinstance(run_b, (int, float)) else 'n/a',
+ _fmt_delta(float(delta), colorize=colorize) if isinstance(delta, (int, float)) else 'n/a',
+ ]
+ )
+ if not comparisons:
+ comparison_rows.append(["n/a", "n/a", "n/a", "n/a", "n/a"])
+ lines.extend(_markdown_table(["Run A", "Run B", "A Pass", "B Pass", "Delta"], comparison_rows, ["left", "left", "right", "right", "right"]))
+ lines.append("")
+
+ lines.append("## Notes")
+ lines.append("")
+ lines.append("- Drift is computed as latest - baseline.")
+ lines.append("- Baseline uses the first half of fetched runs (minimum 1, maximum 3).")
+ lines.append("- Latest-2 delta uses the latest two runs returned in the fetched window.")
+ lines.append("")
+
+ return "\n".join(lines)
+
+
+def _write_report_csv(report: dict[str, Any], output_path: Path) -> None:
+ experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
+ fieldnames = [
+ "row_type",
+ "evalset_id",
+ "experiment_id",
+ "experiment_name",
+ "run_index",
+ "run_id",
+ "run_status",
+ "run_pass_rate",
+ "runs_fetched",
+ "runs_total",
+ "baseline_pass_rate",
+ "latest_pass_rate",
+ "drift_delta",
+ "latest_two_delta",
+ "mean_pass_rate",
+ "stddev_pass_rate",
+ "failure_stage",
+ "failure_type",
+ "failure_message",
+ "generated_at",
+ ]
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ with output_path.open("w", encoding="utf-8", newline="") as stream:
+ writer = csv.DictWriter(stream, fieldnames=fieldnames)
+ writer.writeheader()
+ for experiment in experiments:
+ writer.writerow(
+ {
+ "row_type": "experiment",
+ "evalset_id": str(report.get("evalset_id", "")),
+ "experiment_id": str(experiment.get("id", "")),
+ "experiment_name": str(experiment.get("name", "")),
+ "run_index": "",
+ "run_id": "",
+ "run_status": "",
+ "run_pass_rate": "",
+ "runs_fetched": int(experiment.get("runs_fetched") or 0),
+ "runs_total": int(experiment.get("runs_total") or 0),
+ "baseline_pass_rate": experiment.get("baseline_pass_rate"),
+ "latest_pass_rate": experiment.get("latest_pass_rate"),
+ "drift_delta": experiment.get("drift_delta"),
+ "latest_two_delta": experiment.get("latest_two_delta"),
+ "mean_pass_rate": experiment.get("mean_pass_rate"),
+ "stddev_pass_rate": experiment.get("stddev_pass_rate"),
+ "failure_stage": "",
+ "failure_type": "",
+ "failure_message": "",
+ "generated_at": str(report.get("generated_at", "")),
+ }
+ )
+ runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+ for idx, run in enumerate(runs, start=1):
+ cause = run.get("failure_cause") if isinstance(run.get("failure_cause"), dict) else {}
+ writer.writerow(
+ {
+ "row_type": "run",
+ "evalset_id": str(report.get("evalset_id", "")),
+ "experiment_id": str(experiment.get("id", "")),
+ "experiment_name": str(experiment.get("name", "")),
+ "run_index": idx,
+ "run_id": str(run.get("id", "")),
+ "run_status": str(run.get("status", "")),
+ "run_pass_rate": run.get("pass_rate"),
+ "runs_fetched": int(experiment.get("runs_fetched") or 0),
+ "runs_total": int(experiment.get("runs_total") or 0),
+ "baseline_pass_rate": experiment.get("baseline_pass_rate"),
+ "latest_pass_rate": experiment.get("latest_pass_rate"),
+ "drift_delta": experiment.get("drift_delta"),
+ "latest_two_delta": experiment.get("latest_two_delta"),
+ "mean_pass_rate": experiment.get("mean_pass_rate"),
+ "stddev_pass_rate": experiment.get("stddev_pass_rate"),
+ "failure_stage": str(cause.get("stage", "")),
+ "failure_type": str(cause.get("type", "")),
+ "failure_message": str(cause.get("message", "")),
+ "generated_at": str(report.get("generated_at", "")),
+ }
+ )
+
+
+def _print_report_console(report: dict[str, Any], run_limit: int) -> None:
+ evalset_id = str(report.get("evalset_id", ""))
+ generated_at = str(report.get("generated_at", ""))
+ experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
+
+ console.rule(f"[bold cyan]Evals Report[/bold cyan] {evalset_id}")
+ console.print(f"Generated at: {generated_at}")
+ console.print(f"Experiments: {len(experiments)} | Run window per experiment: {run_limit}")
+ console.print("")
+
+ overview = Table(title="Experiment Overview")
+ overview.add_column("Experiment", style="white")
+ overview.add_column("Runs", justify="right")
+ overview.add_column("Latest", justify="right")
+ overview.add_column("Baseline", justify="right")
+ overview.add_column("Drift", justify="right")
+ overview.add_column("Latest-2", justify="right")
+ for experiment in experiments:
+ overview.add_row(
+ str(experiment.get("name", "")),
+ f"{int(experiment.get('runs_fetched') or 0)}/{int(experiment.get('runs_total') or 0)}",
+ _fmt_pct(experiment.get("latest_pass_rate") if isinstance(experiment.get("latest_pass_rate"), (int, float)) else None),
+ _fmt_pct(experiment.get("baseline_pass_rate") if isinstance(experiment.get("baseline_pass_rate"), (int, float)) else None),
+ _fmt_delta(experiment.get("drift_delta") if isinstance(experiment.get("drift_delta"), (int, float)) else None, colorize=True),
+ _fmt_delta(experiment.get("latest_two_delta") if isinstance(experiment.get("latest_two_delta"), (int, float)) else None, colorize=True),
+ )
+ console.print(overview)
+
+ ranked_latest = sorted(
+ [item for item in experiments if isinstance(item.get("latest_pass_rate"), (int, float))],
+ key=lambda item: float(item.get("latest_pass_rate") or 0.0),
+ reverse=True,
+ )
+ latest_table = Table(title="By Latest Pass Rate")
+ latest_table.add_column("Rank", justify="right", no_wrap=True)
+ latest_table.add_column("Experiment", style="white")
+ latest_table.add_column("Latest", justify="right", no_wrap=True)
+ for idx, item in enumerate(ranked_latest, start=1):
+ latest_table.add_row(str(idx), str(item.get("name", "")), _fmt_pct(float(item.get("latest_pass_rate") or 0.0)))
+ console.print(latest_table)
+ latest_values = [
+ float(item.get("latest_pass_rate"))
+ for item in ranked_latest
+ if isinstance(item.get("latest_pass_rate"), (int, float))
+ ]
+ console.print("Latest histogram:")
+ for hist_line in _ascii_histogram(
+ latest_values,
+ bins=8,
+ width=20,
+ min_value=0.0,
+ max_value=1.0,
+ full_blocks=True,
+ colorize=True,
+ ):
+ console.print(hist_line)
+
+ ranked_drift = sorted(
+ [item for item in experiments if isinstance(item.get("drift_delta"), (int, float))],
+ key=lambda item: float(item.get("drift_delta") or 0.0),
+ )
+ drift_table = Table(title="By Drift (Negative To Positive)")
+ drift_table.add_column("Rank", justify="right", no_wrap=True)
+ drift_table.add_column("Experiment", style="white")
+ drift_table.add_column("Drift", justify="right", no_wrap=True)
+ for idx, item in enumerate(ranked_drift, start=1):
+ drift_table.add_row(
+ str(idx),
+ str(item.get("name", "")),
+ _fmt_delta(float(item.get("drift_delta") or 0.0), colorize=True),
+ )
+ console.print(drift_table)
+ drift_values = [
+ float(item.get("drift_delta"))
+ for item in ranked_drift
+ if isinstance(item.get("drift_delta"), (int, float))
+ ]
+ console.print("Drift histogram:")
+ for hist_line in _ascii_histogram(
+ drift_values,
+ bins=8,
+ width=20,
+ full_blocks=True,
+ colorize=True,
+ drift_palette=True,
+ ):
+ console.print(hist_line)
+
+ pairwise = _pairwise_latest_deltas(experiments)
+ pairwise_table = Table(title="Pairwise Latest-Pass Deltas")
+ pairwise_table.add_column("Pair", style="white")
+ pairwise_table.add_column("Left", justify="right", no_wrap=True)
+ pairwise_table.add_column("Right", justify="right", no_wrap=True)
+ pairwise_table.add_column("Delta", justify="right", no_wrap=True)
+ for pair in pairwise:
+ pairwise_table.add_row(
+ f"{pair['left']} vs {pair['right']}",
+ _fmt_pct(pair["left_latest"]),
+ _fmt_pct(pair["right_latest"]),
+ _fmt_delta(pair["delta"], colorize=True),
+ )
+ if not pairwise:
+ pairwise_table.add_row("n/a", "n/a", "n/a", "n/a")
+ console.print(pairwise_table)
+
+ if ranked_latest:
+ console.print(
+ "[bold]Insight:[/bold] top latest "
+ f"[green]{ranked_latest[0].get('name', '')}[/green] "
+ f"({_fmt_pct(float(ranked_latest[0].get('latest_pass_rate') or 0.0))})"
+ )
+ if ranked_drift:
+ console.print(
+ "[bold]Insight:[/bold] strongest drift "
+ f"{ranked_drift[-1].get('name', '')} "
+ f"({_fmt_delta(float(ranked_drift[-1].get('drift_delta') or 0.0), colorize=True)})"
+ )
+ console.print("")
+
+ for experiment in experiments:
+ console.print("")
+ console.print(f"[bold]Run Timeline:[/bold] {experiment.get('name', '')}")
+ run_table = Table()
+ run_table.add_column("#", justify="right", style="cyan", no_wrap=True)
+ run_table.add_column("Run ID", style="white", no_wrap=True)
+ run_table.add_column("Status", no_wrap=True)
+ run_table.add_column("Pass Rate", justify="right", no_wrap=True)
+ run_table.add_column("Trend", style="white", no_wrap=True)
+ run_table.add_column("Failure Cause", style="red", overflow="fold")
+
+ runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+ for idx, run in enumerate(runs, start=1):
+ status_value = str(run.get("status", ""))
+ pass_rate = float(run.get("pass_rate")) if isinstance(run.get("pass_rate"), (int, float)) else None
+ cause_text = _format_failure_cause(run.get("failure_cause"))
+ run_table.add_row(
+ str(idx),
+ str(run.get("id", "")),
+ f"[{_status_style(status_value)}]{status_value}[/{_status_style(status_value)}]",
+ _fmt_pct(pass_rate),
+ _ascii_bar(pass_rate, width=28, full_blocks=True, colorize=True) if pass_rate is not None else "-",
+ cause_text or "-",
+ )
+ if not runs:
+ run_table.add_row("1", "n/a", "n/a", "n/a", "-", "-")
+ console.print(run_table)
+
+ for idx, run in enumerate(runs, start=1):
+ cause = run.get("failure_cause")
+ if not isinstance(cause, dict) or not cause:
+ continue
+ console.print(
+ f"[red bold]Run {idx} failure:[/red bold] "
+ f"[red]{str(cause.get('message') or 'Unknown failure.')}[/red]"
+ )
+ for key, label in (
+ ("stage", "stage"),
+ ("type", "type"),
+ ("execution_url", "execution url"),
+ ):
+ value = str(cause.get(key) or "").strip()
+ if value:
+ console.print(f" {label}: {value}")
+ diagnostics = cause.get("diagnostics")
+ if isinstance(diagnostics, dict):
+ for key, label in (
+ ("agent_runtimes_url", "agent runtimes url"),
+ ("run_url", "run url"),
+ ):
+ value = diagnostics.get(key)
+ if value:
+ console.print(f" {label}: {value}")
+ candidate_urls = diagnostics.get("candidate_urls")
+ if isinstance(candidate_urls, list) and candidate_urls:
+ console.print(f" candidate urls: {', '.join(str(u) for u in candidate_urls)}")
+ attempts = diagnostics.get("attempts")
+ if isinstance(attempts, list) and attempts:
+ for attempt in attempts:
+ if not isinstance(attempt, dict):
+ continue
+ outcome = "ok" if attempt.get("ok") else "failed"
+ console.print(
+ f" attempt: {attempt.get('url', '')} -> {outcome} "
+ f"{attempt.get('error') or ''}".rstrip()
+ )
+ detail = str(cause.get("detail_excerpt") or "").strip()
+ if detail:
+ console.print(f" detail: {detail}")
+
+ deltas_table = Table(title="Consecutive Run Deltas")
+ deltas_table.add_column("Run A", style="white", no_wrap=True)
+ deltas_table.add_column("Run B", style="white", no_wrap=True)
+ deltas_table.add_column("A Pass", justify="right", no_wrap=True)
+ deltas_table.add_column("B Pass", justify="right", no_wrap=True)
+ deltas_table.add_column("Delta", justify="right", no_wrap=True)
+ comparisons = [
+ item for item in (experiment.get("consecutive_comparisons") or [])
+ if isinstance(item, dict)
+ ]
+ for item in comparisons:
+ run_a = item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None
+ run_b = item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None
+ delta = item.get("delta_pass_rate") if isinstance(item.get("delta_pass_rate"), (int, float)) else None
+ deltas_table.add_row(
+ str(item.get("run_a_id", "")),
+ str(item.get("run_b_id", "")),
+ _fmt_pct(float(run_a)) if isinstance(run_a, (int, float)) else "n/a",
+ _fmt_pct(float(run_b)) if isinstance(run_b, (int, float)) else "n/a",
+ _fmt_delta(float(delta), colorize=True) if isinstance(delta, (int, float)) else "n/a",
+ )
+ if not comparisons:
+ deltas_table.add_row("n/a", "n/a", "n/a", "n/a", "n/a")
+ console.print(deltas_table)
+
@app.callback()
def evals_callback(ctx: typer.Context) -> None:
@@ -138,7 +1194,109 @@ def evals_callback(ctx: typer.Context) -> None:
typer.echo(ctx.get_help())
-@evals_app.command(name="list")
+@app.command(name="ls")
+def evals_ls(
+ token: Optional[str] = typer.Option(None, "--token", help="API token."),
+ ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
+ account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+ run_environment: Optional[str] = typer.Option(None, "--run-environment", help="Filter by run environment (ui/sdk)."),
+ kind: Optional[str] = typer.Option(None, "--kind", help="Filter by kind (batch/interactive)."),
+ q: Optional[str] = typer.Option(None, "--q", help="Search query."),
+ limit: int = typer.Option(50, "--limit", min=1, max=200),
+ offset: int = typer.Option(0, "--offset", min=0),
+ raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
+) -> None:
+ """List all evalsets and their experiments."""
+ client = _make_client(token=token, ai_agents_url=ai_agents_url)
+ evalsets_payload = client.evals_list_evals(
+ run_environment=run_environment,
+ kind=kind,
+ q=q,
+ limit=limit,
+ offset=offset,
+ account_uid=account_uid,
+ )
+ evalsets = [item for item in (evalsets_payload.get("evalsets") or []) if isinstance(item, dict)]
+
+ experiments_by_evalset: dict[str, list[dict[str, Any]]] = {}
+ for evalset in evalsets:
+ evalset_id = str(evalset.get("id", ""))
+ if not evalset_id:
+ continue
+ experiments_payload = client.evals_list_experiments(
+ evalset_id=evalset_id,
+ limit=200,
+ offset=0,
+ account_uid=account_uid,
+ )
+ experiments_by_evalset[evalset_id] = [
+ item
+ for item in (experiments_payload.get("experiments") or [])
+ if isinstance(item, dict)
+ ]
+
+ if raw:
+ console.print(
+ {
+ "evalsets": evalsets,
+ "experiments": experiments_by_evalset,
+ }
+ )
+ return
+
+ total_experiments = sum(len(items) for items in experiments_by_evalset.values())
+ tree = Tree(
+ f"[bold]Evals[/bold] ([cyan]{len(evalsets)}[/cyan] evalsets, "
+ f"[cyan]{total_experiments}[/cyan] experiments)"
+ )
+ for evalset in evalsets:
+ evalset_id = str(evalset.get("id", ""))
+ evalset_node = tree.add(
+ f"[cyan]{evalset_id}[/cyan] [white]{evalset.get('name', '')}[/white] "
+ f"(env={evalset.get('run_environment', '')}, "
+ f"kind={evalset.get('kind', '')}, "
+ f"cases={len(evalset.get('cases') or [])})"
+ )
+ experiments = experiments_by_evalset.get(evalset_id, [])
+ if not experiments:
+ evalset_node.add("[dim]no experiments[/dim]")
+ continue
+ for experiment in experiments:
+ status_value = str(experiment.get("status", ""))
+ evalset_node.add(
+ f"[cyan]{experiment.get('id', '')}[/cyan] "
+ f"[white]{experiment.get('name', '')}[/white] "
+ f"[{_status_style(status_value)}]{status_value}[/{_status_style(status_value)}]"
+ )
+ console.print(tree)
+
+
+@app.command(name="delete")
+def evals_delete_top(
+ evalset_id: str = typer.Argument(..., help="Evalset UID to delete."),
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip the confirmation prompt."),
+ token: Optional[str] = typer.Option(None, "--token", help="API token."),
+ ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
+ account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+) -> None:
+ """Delete an evalset and its associated experiments, runs, and cases."""
+ if not yes:
+ typer.confirm(
+ f"Delete evalset {evalset_id} and all associated experiments, runs, and cases?",
+ abort=True,
+ )
+ client = _make_client(token=token, ai_agents_url=ai_agents_url)
+ payload = client.evals_delete_eval(evalset_id, account_uid=account_uid)
+ cascade = payload.get("cascade") or {}
+ console.print(
+ f"[green]Eval deleted:[/green] {evalset_id} "
+ f"(experiments={cascade.get('experiments_deleted', 0)}, "
+ f"runs={cascade.get('runs_deleted', 0)}, "
+ f"cases={cascade.get('cases_deleted', 0)})"
+ )
+
+
+@evals_app.command(name="ls")
def evals_list(
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
@@ -244,203 +1402,99 @@ def evals_delete(
)
-@evals_app.command(name="compare-report")
-def evals_compare_report(
+def _render_report(
evalset_id: str = typer.Argument(..., help="Evalset ID to compare."),
run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+ output_file: Optional[str] = typer.Option(None, "--output", help="Write markdown report to file."),
+ export: bool = typer.Option(False, "--export", help="Export timestamped report files report-.md and report-.csv."),
raw: bool = typer.Option(False, "--raw", help="Print raw JSON report output."),
) -> None:
- """Generate a full comparison report for a specific evalset.
-
- The report includes:
- - Experiment-level summary (run count, latest pass rate, baseline, drift)
- - Full fetched run details per experiment
- - Per-experiment run comparisons (latest-two and consecutive run deltas)
- """
+ """Generate a full evalset report with cross-experiment comparisons."""
client = _make_client(token=token, ai_agents_url=ai_agents_url)
- experiments_payload = client.evals_list_experiments(
+ report = _report_data(
+ client=client,
evalset_id=evalset_id,
- limit=200,
- offset=0,
+ run_limit=run_limit,
account_uid=account_uid,
)
- experiments = experiments_payload.get("experiments") or []
+ experiments = report.get("experiments") or []
if not experiments:
console.print(f"[yellow]No experiments found for evalset[/yellow] {evalset_id}")
raise typer.Exit(0)
- report: dict[str, Any] = {
- "evalset_id": evalset_id,
- "generated_at": _now_iso(),
- "experiments": [],
- }
-
- summary_table = Table(title=f"Evalset Comparison Report ({evalset_id})")
- summary_table.add_column("Experiment", style="cyan")
- summary_table.add_column("Runs", style="white")
- summary_table.add_column("Latest", style="white")
- summary_table.add_column("Baseline", style="white")
- summary_table.add_column("Drift", style="white")
- summary_table.add_column("Latest 2 Delta (A-B)", style="white")
-
- for experiment in experiments:
- experiment_id = str(experiment.get("id", ""))
- experiment_name = str(experiment.get("name", experiment_id))
-
- runs_payload = client.evals_list_runs(
- experiment_id,
- limit=run_limit,
- offset=0,
- account_uid=account_uid,
- )
- runs = runs_payload.get("runs") or []
- total_runs = int(runs_payload.get("total") or len(runs))
- baseline, latest, drift = _compute_baseline_and_drift(runs)
-
- latest_two_delta: float | None = None
- latest_two_run_ids: list[str] = []
- latest_two_compare: dict[str, Any] | None = None
- if len(runs) >= 2:
- latest_two_run_ids = [str(runs[0].get("id", "")), str(runs[1].get("id", ""))]
- compare_payload = client.evals_compare_runs(
- latest_two_run_ids,
- account_uid=account_uid,
- )
- compared_runs = compare_payload.get("runs") or []
- compared_by_id = {
- str(run.get("id", "")): run
- for run in compared_runs
- if isinstance(run, dict)
- }
- run_a = compared_by_id.get(latest_two_run_ids[0], runs[0])
- run_b = compared_by_id.get(latest_two_run_ids[1], runs[1])
- pass_a = _run_pass_rate(run_a)
- pass_b = _run_pass_rate(run_b)
- if pass_a is not None and pass_b is not None:
- latest_two_delta = pass_a - pass_b
- latest_two_compare = {
- "run_ids": latest_two_run_ids,
- "run_a": _run_detail_record(run_a),
- "run_b": _run_detail_record(run_b),
- "delta_pass_rate": latest_two_delta,
- }
-
- consecutive_comparisons: list[dict[str, Any]] = []
- for idx in range(max(0, len(runs) - 1)):
- run_a = runs[idx]
- run_b = runs[idx + 1]
- pass_a = _run_pass_rate(run_a)
- pass_b = _run_pass_rate(run_b)
- delta = None
- if pass_a is not None and pass_b is not None:
- delta = pass_a - pass_b
- consecutive_comparisons.append(
- {
- "run_a_id": str(run_a.get("id", "")),
- "run_b_id": str(run_b.get("id", "")),
- "run_a_status": str(run_a.get("status", "")),
- "run_b_status": str(run_b.get("status", "")),
- "run_a_pass_rate": pass_a,
- "run_b_pass_rate": pass_b,
- "delta_pass_rate": delta,
- }
- )
-
- drift_text = "n/a" if drift is None else f"{drift * 100:+.1f} pts"
- latest_two_text = "n/a" if latest_two_delta is None else f"{latest_two_delta * 100:+.1f} pts"
-
- summary_table.add_row(
- experiment_name,
- str(total_runs),
- _fmt_pct(latest),
- _fmt_pct(baseline),
- drift_text,
- latest_two_text,
- )
-
- report["experiments"].append(
- {
- "id": experiment_id,
- "name": experiment_name,
- "runs_total": total_runs,
- "runs_fetched": len(runs),
- "latest_pass_rate": latest,
- "baseline_pass_rate": baseline,
- "drift_delta": drift,
- "latest_two_run_ids": latest_two_run_ids,
- "latest_two_delta": latest_two_delta,
- "latest_two_comparison": latest_two_compare,
- "runs": [_run_detail_record(run) for run in runs],
- "consecutive_comparisons": consecutive_comparisons,
- }
- )
-
if raw:
console.print(report)
return
- console.print(summary_table)
- for experiment_report in report.get("experiments", []):
- experiment_name = str(experiment_report.get("name", ""))
- runs_fetched = int(experiment_report.get("runs_fetched") or 0)
- runs_total = int(experiment_report.get("runs_total") or 0)
+ markdown_report = _report_markdown(report, run_limit=run_limit, colorize=False)
+ if export:
+ timestamp = _timestamp_slug(str(report.get("generated_at", _now_iso())))
+ export_markdown_path = Path(f"report-{timestamp}.md")
+ export_csv_path = Path(f"report-{timestamp}.csv")
+ export_markdown_path.write_text(markdown_report + "\n", encoding="utf-8")
+ _write_report_csv(report, export_csv_path)
+ console.print(f"[green]Markdown export written:[/green] {export_markdown_path}")
+ console.print(f"[green]CSV export written:[/green] {export_csv_path}")
+ if output_file:
+ output_path = Path(output_file)
+ output_path.write_text(markdown_report + "\n", encoding="utf-8")
+ console.print(f"[green]Report written:[/green] {output_path}")
+ _print_report_console(report, run_limit=run_limit)
+
+
+@app.command(name="report")
+def evals_report(
+ evalset_id: str = typer.Argument(..., help="Evalset ID to report."),
+ run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
+ token: Optional[str] = typer.Option(None, "--token", help="API token."),
+ ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
+ account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+ output_file: Optional[str] = typer.Option(None, "--output", help="Write markdown report to file."),
+ export: bool = typer.Option(False, "--export", help="Export timestamped report files report-.md and report-.csv."),
+ raw: bool = typer.Option(False, "--raw", help="Print raw JSON report output."),
+) -> None:
+ """Generate an evalset report in markdown with comparison combinations and ASCII plots."""
+ _render_report(
+ evalset_id=evalset_id,
+ run_limit=run_limit,
+ token=token,
+ ai_agents_url=ai_agents_url,
+ account_uid=account_uid,
+ output_file=output_file,
+ export=export,
+ raw=raw,
+ )
- run_details_table = Table(
- title=(
- f"Run Details - {experiment_name} "
- f"(fetched {runs_fetched} of {runs_total})"
- )
- )
- run_details_table.add_column("Run", style="cyan")
- run_details_table.add_column("Status", style="white")
- run_details_table.add_column("Pass Rate", style="white")
- run_details_table.add_column("Launch Source", style="white")
- run_details_table.add_column("Execution Target", style="white")
- run_details_table.add_column("Created", style="white")
-
- for run in experiment_report.get("runs") or []:
- summary = run.get("summary") or {}
- status_value = str(run.get("status", ""))
- run_details_table.add_row(
- str(run.get("id", "")),
- f"[{_status_style(status_value)}]{status_value}[/{_status_style(status_value)}]",
- _fmt_pct(run.get("pass_rate") if isinstance(run.get("pass_rate"), (int, float)) else None),
- str(summary.get("launch_source") or ""),
- str(summary.get("execution_target") or ""),
- str(run.get("created_at") or ""),
- )
- console.print(run_details_table)
-
- comparisons = experiment_report.get("consecutive_comparisons") or []
- if comparisons:
- compare_table = Table(title=f"Run Comparisons - {experiment_name} (A-B, consecutive)")
- compare_table.add_column("Run A", style="cyan")
- compare_table.add_column("Run B", style="cyan")
- compare_table.add_column("A Status", style="white")
- compare_table.add_column("B Status", style="white")
- compare_table.add_column("A Pass", style="white")
- compare_table.add_column("B Pass", style="white")
- compare_table.add_column("Delta", style="white")
- for item in comparisons:
- delta = item.get("delta_pass_rate")
- compare_table.add_row(
- str(item.get("run_a_id", "")),
- str(item.get("run_b_id", "")),
- str(item.get("run_a_status", "")),
- str(item.get("run_b_status", "")),
- _fmt_pct(item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None),
- _fmt_pct(item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None),
- "n/a" if not isinstance(delta, (int, float)) else f"{float(delta) * 100:+.1f} pts",
- )
- console.print(compare_table)
- console.print("[dim]Notes: drift = latest - baseline (baseline is avg of first runs in fetched window); latest-2 delta = A - B.[/dim]")
+@evals_app.command(name="compare-report")
+def evals_compare_report_compat(
+ evalset_id: str = typer.Argument(..., help="Evalset ID to report."),
+ run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
+ token: Optional[str] = typer.Option(None, "--token", help="API token."),
+ ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
+ account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+ output_file: Optional[str] = typer.Option(None, "--output", help="Write markdown report to file."),
+ export: bool = typer.Option(False, "--export", help="Export timestamped report files report-.md and report-.csv."),
+ raw: bool = typer.Option(False, "--raw", help="Print raw JSON report output."),
+) -> None:
+ """Compatibility alias for report. Prefer: datalayer evals report ."""
+ console.print("[yellow]Deprecated:[/yellow] use [bold]datalayer evals report [/bold].")
+ _render_report(
+ evalset_id=evalset_id,
+ run_limit=run_limit,
+ token=token,
+ ai_agents_url=ai_agents_url,
+ account_uid=account_uid,
+ output_file=output_file,
+ export=export,
+ raw=raw,
+ )
-@experiments_app.command(name="list")
+@experiments_app.command(name="ls")
def experiments_list(
evalset_id: Optional[str] = typer.Option(None, "--evalset-id", help="Filter by evalset ID."),
status: Optional[str] = typer.Option(None, "--status", help="Filter by status."),
@@ -511,7 +1565,7 @@ def experiments_create(
console.print(f"[green]Experiment created:[/green] {experiment.get('id', '')} ({experiment.get('name', '')})")
-@runs_app.command(name="list")
+@runs_app.command(name="ls")
def runs_list(
experiment_id: str = typer.Option(..., "--experiment-id", help="Experiment ID."),
limit: int = typer.Option(50, "--limit", min=1, max=200),
From a37c38a326fcc8dbd750d58c291aa89db36a0911 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Sun, 7 Jun 2026 07:07:41 +0200
Subject: [PATCH 48/49] cli: agents
---
datalayer_core/cli/__main__.py | 4 +
datalayer_core/cli/commands/agents.py | 667 +++++++++++++++++++++++
datalayer_core/cli/commands/evals.py | 176 ++++--
datalayer_core/client/client.py | 106 +++-
datalayer_core/mixins/runtimes.py | 125 +++++
datalayer_core/runtimes/agent_runtime.py | 194 +++++++
datalayer_core/runtimes/local.py | 628 +++++++++++++++++++++
7 files changed, 1867 insertions(+), 33 deletions(-)
create mode 100644 datalayer_core/cli/commands/agents.py
create mode 100644 datalayer_core/runtimes/agent_runtime.py
create mode 100644 datalayer_core/runtimes/local.py
diff --git a/datalayer_core/cli/__main__.py b/datalayer_core/cli/__main__.py
index 71e4142d..8413fcd8 100644
--- a/datalayer_core/cli/__main__.py
+++ b/datalayer_core/cli/__main__.py
@@ -10,6 +10,8 @@
from datalayer_core.__version__ import __version__
from datalayer_core.cli.commands.about import app as about_app
+from datalayer_core.cli.commands.agents import agents_ls
+from datalayer_core.cli.commands.agents import app as agents_app
from datalayer_core.cli.commands.agent_nodes import app as agent_nodes_app
from datalayer_core.cli.commands.agent_nodes import agent_nodes_ls
from datalayer_core.cli.commands.authn import (
@@ -175,6 +177,7 @@ def main_callback(
# Register commands (without name to add them at the top level)
app.add_typer(about_app)
+app.add_typer(agents_app)
app.add_typer(agent_nodes_app)
app.add_typer(auth_app)
app.add_typer(benchmarks_app)
@@ -217,6 +220,7 @@ def main_callback(
app.command(name="checkpoints-ls")(checkpoints_ls)
app.command(name="tokens-ls")(tokens_ls)
app.command(name="agent-nodes-ls")(agent_nodes_ls)
+app.command(name="agents-ls")(agents_ls)
_GLOBAL_OPTIONS_WITH_VALUES = {
diff --git a/datalayer_core/cli/commands/agents.py b/datalayer_core/cli/commands/agents.py
new file mode 100644
index 00000000..83798aef
--- /dev/null
+++ b/datalayer_core/cli/commands/agents.py
@@ -0,0 +1,667 @@
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Agent runtime commands for Datalayer CLI."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Optional
+
+import requests
+import typer
+import yaml
+from rich.console import Console
+
+from datalayer_core.client.client import DatalayerClient
+from datalayer_core.displays.runtimes import display_runtimes
+from datalayer_core.runtimes.local import (
+ DEFAULT_LOCAL_AGENT_NAME,
+ DEFAULT_LOCAL_HOST,
+ DEFAULT_LOCAL_LOG_LEVEL,
+ DEFAULT_LOCAL_PROTOCOL,
+ ensure_local_agent,
+ start_local_agent_runtime,
+ terminate_local_agent_runtime,
+)
+from datalayer_core.utils.urls import DatalayerURLs
+
+DEFAULT_AGENT_SPEC_ID = "example-simple"
+
+app = typer.Typer(
+ name="agents",
+ help="Agent runtime management commands.",
+ invoke_without_command=True,
+)
+
+console = Console()
+
+
+@app.callback()
+def agents_callback(ctx: typer.Context) -> None:
+ """Agent runtime management commands."""
+ if ctx.invoked_subcommand is None:
+ typer.echo(ctx.get_help())
+
+
+def _make_client(
+ token: Optional[str] = None,
+ iam_url: Optional[str] = None,
+ runtimes_url: Optional[str] = None,
+) -> DatalayerClient:
+ urls = DatalayerURLs.from_environment(iam_url=iam_url, runtimes_url=runtimes_url)
+ return DatalayerClient(urls=urls, token=token)
+
+
+def _is_url(value: str) -> bool:
+ lowered = value.lower()
+ return lowered.startswith("http://") or lowered.startswith("https://")
+
+
+def _load_agent_spec(spec_source: str) -> dict[str, Any]:
+ source = spec_source.strip()
+ if not source:
+ raise typer.BadParameter("--agentspec must be a non-empty URL or file path.")
+
+ raw_text = ""
+ if _is_url(source):
+ try:
+ response = requests.get(source, timeout=30)
+ except Exception as exc:
+ raise RuntimeError(
+ f"Failed to fetch --agentspec URL '{source}': {exc}"
+ ) from exc
+ if response.status_code >= 400:
+ preview = (response.text or "")[:500]
+ raise RuntimeError(
+ f"--agentspec URL returned HTTP {response.status_code}: {source}\n{preview}"
+ )
+ raw_text = response.text or ""
+ else:
+ path = Path(source)
+ if not path.exists():
+ raise RuntimeError(f"--agentspec file does not exist: {path}")
+ if not path.is_file():
+ raise RuntimeError(f"--agentspec path is not a file: {path}")
+ raw_text = path.read_text(encoding="utf-8")
+
+ try:
+ parsed = yaml.safe_load(raw_text)
+ except Exception as exc:
+ raise RuntimeError(f"Failed to parse --agentspec as YAML/JSON: {exc}") from exc
+
+ if not isinstance(parsed, dict):
+ raise RuntimeError("--agentspec must decode to an object (mapping).")
+ if not parsed:
+ raise RuntimeError("--agentspec decoded to an empty object.")
+ return parsed
+
+
+def _create_local_agent_runtime(
+ *,
+ agent_spec_id: str,
+ agent_name: str,
+ host: str,
+ port: Optional[int],
+ protocol: str,
+ log_level: str,
+ token: Optional[str],
+ raw: bool,
+) -> None:
+ """Launch a local agent-runtimes server and serve until interrupted."""
+ runtime = start_local_agent_runtime(
+ agent_spec_id=agent_spec_id,
+ agent_name=agent_name,
+ host=host,
+ port=port,
+ protocol=protocol,
+ log_level=log_level,
+ )
+
+ resolved_token = (token or "").strip()
+ if resolved_token:
+ try:
+ ensure_local_agent(
+ base_url=runtime.base_url,
+ agent_name=agent_name,
+ token=resolved_token,
+ agent_spec_id=agent_spec_id,
+ transport=protocol,
+ )
+ except Exception as exc:
+ terminate_local_agent_runtime(runtime)
+ raise RuntimeError(f"Failed to register local agent: {exc}") from exc
+
+ if raw:
+ payload = {
+ "success": True,
+ "local": True,
+ "runtime": {
+ "base_url": runtime.base_url,
+ "agent_name": runtime.agent_name,
+ "agent_spec_id": runtime.agent_spec_id,
+ "chat_endpoint": runtime.chat_endpoint,
+ },
+ }
+ console.print(json.dumps(payload, ensure_ascii=False))
+ else:
+ console.print(
+ f"[green]Local agent runtime '{agent_name}' started![/green]"
+ )
+ console.print(f"Base URL: {runtime.base_url}")
+ console.print(f"Agent spec id: {agent_spec_id}")
+ console.print(f"Chat endpoint: {runtime.chat_endpoint}")
+ console.print("[dim]Press Ctrl+C to stop the local runtime.[/dim]")
+
+ process = runtime.process
+ try:
+ if process is not None:
+ process.wait()
+ except KeyboardInterrupt:
+ console.print("\n[yellow]Stopping local agent runtime...[/yellow]")
+ finally:
+ terminate_local_agent_runtime(runtime)
+
+
+@app.command(name="ls")
+def list_agents(
+ token: Optional[str] = typer.Option(
+ None,
+ "--token",
+ help="Authentication token (Bearer token for API requests).",
+ ),
+ iam_url: Optional[str] = typer.Option(
+ None,
+ "--iam-url",
+ help="Datalayer IAM server URL",
+ ),
+ runtimes_url: Optional[str] = typer.Option(
+ None,
+ "--runtimes-url",
+ help="Datalayer Runtimes server URL",
+ ),
+) -> None:
+ """List running agent runtimes."""
+ try:
+ client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+ runtimes = client.list_runtimes()
+ runtime_dicts: list[dict[str, Any]] = []
+ for runtime in runtimes:
+ runtime_dicts.append(
+ {
+ "given_name": runtime.name,
+ "environment_name": runtime.environment,
+ "pod_name": runtime.pod_name,
+ "ingress": runtime.ingress,
+ "reservation_id": runtime.reservation_id,
+ "uid": runtime.uid,
+ "burning_rate": runtime.burning_rate,
+ "token": runtime.jupyter_token,
+ "started_at": runtime.started_at,
+ "expired_at": runtime.expired_at,
+ }
+ )
+ display_runtimes(runtime_dicts)
+ except Exception as exc:
+ console.print(f"[red]Error listing agent runtimes: {exc}[/red]")
+ raise typer.Exit(1)
+
+
+@app.command(name="create")
+def create_agent_runtime(
+ environment: Optional[str] = typer.Argument(None, help="Environment name."),
+ given_name: Optional[str] = typer.Option(
+ None,
+ "--given-name",
+ help="Custom name for the runtime.",
+ ),
+ spec_id: Optional[str] = typer.Option(
+ None,
+ "--agentspec-id",
+ help=(
+ "Agent spec id for runtime bootstrap. "
+ f"Defaults to {DEFAULT_AGENT_SPEC_ID} when --agentspec is omitted."
+ ),
+ ),
+ spec: Optional[str] = typer.Option(
+ None,
+ "--agentspec",
+ help="Agent spec source as YAML/JSON URL or local file path.",
+ ),
+ time_reservation: Optional[float] = typer.Option(
+ 10.0,
+ "--time-reservation",
+ help="Time reservation in minutes for the runtime.",
+ ),
+ billable_account_uid: Optional[str] = typer.Option(
+ None,
+ "--billable-account-uid",
+ help="Account UID to bill the runtime to (org/team).",
+ ),
+ billable_account_type: Optional[str] = typer.Option(
+ None,
+ "--billable-account-type",
+ help="Billable account type: user, organization, or team.",
+ ),
+ billable_account_handle: Optional[str] = typer.Option(
+ None,
+ "--billable-account-handle",
+ help="Billable account handle (informational).",
+ ),
+ raw: bool = typer.Option(
+ False,
+ "--raw",
+ help="Print machine-readable JSON payload.",
+ ),
+ token: Optional[str] = typer.Option(
+ None,
+ "--token",
+ help="Authentication token (Bearer token for API requests).",
+ ),
+ iam_url: Optional[str] = typer.Option(
+ None,
+ "--iam-url",
+ help="Datalayer IAM server URL",
+ ),
+ runtimes_url: Optional[str] = typer.Option(
+ None,
+ "--runtimes-url",
+ help="Datalayer Runtimes server URL",
+ ),
+ local: bool = typer.Option(
+ False,
+ "--local",
+ help="Launch the agent as a local agent-runtimes server instead of a cloud runtime.",
+ ),
+ host: str = typer.Option(
+ DEFAULT_LOCAL_HOST,
+ "--host",
+ help="Host interface for the local runtime (only with --local).",
+ ),
+ port: Optional[int] = typer.Option(
+ None,
+ "--port",
+ help="Port for the local runtime (random free port when omitted, only with --local).",
+ ),
+ protocol: str = typer.Option(
+ DEFAULT_LOCAL_PROTOCOL,
+ "--protocol",
+ help="Transport protocol for the local runtime (only with --local).",
+ ),
+ log_level: str = typer.Option(
+ DEFAULT_LOCAL_LOG_LEVEL,
+ "--log-level",
+ help="Log level for the local runtime process (only with --local).",
+ ),
+) -> None:
+ """Create a new runtime preloaded with an agent spec.
+
+ By default creates a cloud runtime. With ``--local`` it launches a local
+ ``agent-runtimes`` server and serves until interrupted (Ctrl+C).
+ """
+ import questionary
+
+ try:
+ if spec and spec_id:
+ raise typer.BadParameter(
+ "Use either --agentspec-id or --agentspec, not both."
+ )
+
+ if local:
+ if spec:
+ raise typer.BadParameter(
+ "--agentspec is not supported with --local; use --agentspec-id."
+ )
+ _create_local_agent_runtime(
+ agent_spec_id=(spec_id or "").strip() or DEFAULT_AGENT_SPEC_ID,
+ agent_name=(given_name or "").strip() or DEFAULT_LOCAL_AGENT_NAME,
+ host=host,
+ port=port,
+ protocol=protocol,
+ log_level=log_level,
+ token=token,
+ raw=raw,
+ )
+ return
+
+ client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+
+ if environment is None:
+ environments = client.list_environments()
+ if not environments:
+ console.print("[yellow]No environments available.[/yellow]")
+ raise typer.Exit(0)
+ choices = []
+ for env in environments:
+ label = env.name
+ if env.title:
+ label += f" ({env.title})"
+ choices.append(questionary.Choice(title=label, value=env.name))
+
+ selected = questionary.select(
+ "Select the environment for the new agent runtime:",
+ choices=choices,
+ ).ask()
+ if selected is None:
+ raise typer.Exit(0)
+ environment = selected
+
+ agent_spec_payload: dict[str, Any] | None = None
+ resolved_spec_id: str | None = None
+ if spec:
+ agent_spec_payload = _load_agent_spec(spec)
+ else:
+ resolved_spec_id = (spec_id or "").strip() or DEFAULT_AGENT_SPEC_ID
+
+ final_time_reservation = time_reservation or 10.0
+ runtime = client.create_runtime(
+ name=given_name,
+ environment=environment,
+ time_reservation=final_time_reservation,
+ agent_spec_id=resolved_spec_id,
+ agent_spec=agent_spec_payload,
+ billable_account_uid=billable_account_uid,
+ billable_account_type=billable_account_type,
+ billable_account_handle=billable_account_handle,
+ )
+
+ if raw:
+ payload = {
+ "success": True,
+ "runtime": {
+ "given_name": runtime.name,
+ "environment_name": runtime.environment,
+ "pod_name": runtime.pod_name,
+ "uid": runtime.uid,
+ "ingress": runtime.ingress,
+ "reservation_id": runtime.reservation_id,
+ "burning_rate": runtime.burning_rate,
+ "started_at": runtime.started_at,
+ "expired_at": runtime.expired_at,
+ },
+ "agent_spec_id": resolved_spec_id,
+ "agent_spec_source": spec or "",
+ }
+ console.print(json.dumps(payload, ensure_ascii=False))
+ return
+
+ console.print(f"[green]Agent runtime '{runtime.name}' created successfully![/green]")
+ if runtime.pod_name:
+ console.print(f"Pod: {runtime.pod_name}")
+ if runtime.ingress:
+ console.print(f"Ingress: {runtime.ingress}")
+ if resolved_spec_id:
+ console.print(f"Agent spec id: {resolved_spec_id}")
+ elif spec:
+ console.print(f"Agent spec source: {spec}")
+
+ except typer.Exit:
+ raise
+ except Exception as exc:
+ console.print("[red]Error creating agent runtime.[/red]")
+ console.print(f"[red]{exc}[/red]")
+ raise typer.Exit(1)
+
+
+@app.command(name="get")
+def get_agent_runtime(
+ pod_name: Optional[str] = typer.Argument(
+ None,
+ help="Pod name of the agent runtime to read.",
+ ),
+ raw: bool = typer.Option(
+ False,
+ "--raw",
+ help="Print machine-readable JSON payload.",
+ ),
+ token: Optional[str] = typer.Option(
+ None,
+ "--token",
+ help="Authentication token (Bearer token for API requests).",
+ ),
+ iam_url: Optional[str] = typer.Option(
+ None,
+ "--iam-url",
+ help="Datalayer IAM server URL",
+ ),
+ runtimes_url: Optional[str] = typer.Option(
+ None,
+ "--runtimes-url",
+ help="Datalayer Runtimes server URL",
+ ),
+) -> None:
+ """Read a single agent runtime by pod name."""
+ import questionary
+
+ try:
+ client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+
+ if pod_name is None:
+ runtimes = client.list_runtimes()
+ if not runtimes:
+ console.print("[yellow]No running runtimes found.[/yellow]")
+ raise typer.Exit(0)
+ choices = []
+ for runtime in runtimes:
+ label = runtime.pod_name or ""
+ if runtime.name:
+ label = f"{runtime.pod_name} ({runtime.name})"
+ if runtime.environment:
+ label += f" [{runtime.environment}]"
+ choices.append(questionary.Choice(title=label, value=runtime.pod_name))
+
+ selected = questionary.select(
+ "Select the agent runtime to read:",
+ choices=choices,
+ ).ask()
+ if selected is None:
+ raise typer.Exit(0)
+ pod_name = selected
+
+ runtime = client.get_runtime(pod_name)
+ runtime_dict = {
+ "given_name": runtime.name,
+ "environment_name": runtime.environment,
+ "pod_name": runtime.pod_name,
+ "ingress": runtime.ingress,
+ "reservation_id": runtime.reservation_id,
+ "uid": runtime.uid,
+ "burning_rate": runtime.burning_rate,
+ "token": runtime.jupyter_token,
+ "started_at": runtime.started_at,
+ "expired_at": runtime.expired_at,
+ }
+
+ if raw:
+ console.print(
+ json.dumps(
+ {"success": True, "runtime": runtime_dict}, ensure_ascii=False
+ )
+ )
+ return
+
+ display_runtimes([runtime_dict])
+
+ except typer.Exit:
+ raise
+ except Exception as exc:
+ console.print(f"[red]Error reading agent runtime: {exc}[/red]")
+ raise typer.Exit(1)
+
+
+@app.command(name="update")
+def update_agent_runtime(
+ pod_name: Optional[str] = typer.Argument(
+ None,
+ help="Pod name of the agent runtime to update.",
+ ),
+ capability: list[str] = typer.Option(
+ [],
+ "--capability",
+ help="Capability to apply (repeatable). Replaces existing capabilities.",
+ ),
+ raw: bool = typer.Option(
+ False,
+ "--raw",
+ help="Print machine-readable JSON payload.",
+ ),
+ token: Optional[str] = typer.Option(
+ None,
+ "--token",
+ help="Authentication token (Bearer token for API requests).",
+ ),
+ iam_url: Optional[str] = typer.Option(
+ None,
+ "--iam-url",
+ help="Datalayer IAM server URL",
+ ),
+ runtimes_url: Optional[str] = typer.Option(
+ None,
+ "--runtimes-url",
+ help="Datalayer Runtimes server URL",
+ ),
+) -> None:
+ """Update an agent runtime's capabilities."""
+ import questionary
+
+ try:
+ client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+
+ if pod_name is None:
+ runtimes = client.list_runtimes()
+ if not runtimes:
+ console.print("[yellow]No running runtimes found.[/yellow]")
+ raise typer.Exit(0)
+ choices = []
+ for runtime in runtimes:
+ label = runtime.pod_name or ""
+ if runtime.name:
+ label = f"{runtime.pod_name} ({runtime.name})"
+ if runtime.environment:
+ label += f" [{runtime.environment}]"
+ choices.append(questionary.Choice(title=label, value=runtime.pod_name))
+
+ selected = questionary.select(
+ "Select the agent runtime to update:",
+ choices=choices,
+ ).ask()
+ if selected is None:
+ raise typer.Exit(0)
+ pod_name = selected
+
+ client.update_runtime(pod_name, list(capability))
+
+ if raw:
+ console.print(
+ json.dumps(
+ {
+ "success": True,
+ "pod_name": pod_name,
+ "capabilities": list(capability),
+ },
+ ensure_ascii=False,
+ )
+ )
+ return
+
+ console.print(
+ f"[green]Agent runtime '{pod_name}' updated successfully![/green]"
+ )
+ if capability:
+ console.print(f"Capabilities: {', '.join(capability)}")
+
+ except typer.Exit:
+ raise
+ except Exception as exc:
+ console.print(f"[red]Error updating agent runtime: {exc}[/red]")
+ raise typer.Exit(1)
+
+
+@app.command(name="delete")
+@app.command(name="terminate")
+def terminate_agent_runtime(
+ pod_name: Optional[str] = typer.Argument(
+ None,
+ help="Pod name of the runtime to terminate.",
+ ),
+ token: Optional[str] = typer.Option(
+ None,
+ "--token",
+ help="Authentication token (Bearer token for API requests).",
+ ),
+ iam_url: Optional[str] = typer.Option(
+ None,
+ "--iam-url",
+ help="Datalayer IAM server URL",
+ ),
+ runtimes_url: Optional[str] = typer.Option(
+ None,
+ "--runtimes-url",
+ help="Datalayer Runtimes server URL",
+ ),
+) -> None:
+ """Terminate a running agent runtime."""
+ import questionary
+
+ try:
+ client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+
+ if pod_name is None:
+ runtimes = client.list_runtimes()
+ if not runtimes:
+ console.print("[yellow]No running runtimes found.[/yellow]")
+ raise typer.Exit(0)
+
+ choices = []
+ for runtime in runtimes:
+ label = runtime.pod_name or ""
+ if runtime.name:
+ label = f"{runtime.pod_name} ({runtime.name})"
+ if runtime.environment:
+ label += f" [{runtime.environment}]"
+ choices.append(questionary.Choice(title=label, value=runtime.pod_name))
+
+ selected = questionary.select(
+ "Select the agent runtime to terminate:",
+ choices=choices,
+ ).ask()
+ if selected is None:
+ raise typer.Exit(0)
+ pod_name = selected
+
+ success = client.terminate_runtime(pod_name)
+ if success:
+ console.print(
+ f"[green]Agent runtime '{pod_name}' terminated successfully![/green]"
+ )
+ else:
+ console.print(f"[red]Failed to terminate agent runtime '{pod_name}'[/red]")
+ raise typer.Exit(1)
+
+ except typer.Exit:
+ raise
+ except Exception as exc:
+ console.print(f"[red]Error terminating agent runtime: {exc}[/red]")
+ raise typer.Exit(1)
+
+
+def agents_ls(
+ token: Optional[str] = typer.Option(
+ None,
+ "--token",
+ help="Authentication token (Bearer token for API requests).",
+ ),
+ iam_url: Optional[str] = typer.Option(
+ None,
+ "--iam-url",
+ help="Datalayer IAM server URL",
+ ),
+ runtimes_url: Optional[str] = typer.Option(
+ None,
+ "--runtimes-url",
+ help="Datalayer Runtimes server URL",
+ ),
+) -> None:
+ """List running agent runtimes (root command alias)."""
+ list_agents(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
\ No newline at end of file
diff --git a/datalayer_core/cli/commands/evals.py b/datalayer_core/cli/commands/evals.py
index b7ace7f2..72f27732 100644
--- a/datalayer_core/cli/commands/evals.py
+++ b/datalayer_core/cli/commands/evals.py
@@ -9,6 +9,7 @@
import csv
import json
import math
+import re
import time
from pathlib import Path
from typing import Any, Optional
@@ -133,6 +134,49 @@ def _compute_baseline_and_drift(runs: list[dict[str, Any]]) -> tuple[float | Non
return baseline, latest, drift
+def _classify_legacy_failure(message: str) -> dict[str, Any]:
+ """Infer a structured stage/type/url from a free-form legacy error message.
+
+ Older runs (and any path that only persisted a plain error string) lack a
+ structured ``failure_cause``. Rather than rendering ``unknown`` /
+ ``legacy_error`` with an empty detail excerpt, classify the most common
+ error shapes so the report stays actionable.
+ """
+ text = message.strip()
+ lowered = text.lower()
+
+ url_match = re.search(r"https?://[^\s]+", text)
+ execution_url = url_match.group(0).rstrip(".,)") if url_match else ""
+
+ stage = "unknown"
+ failure_type = "legacy_error"
+ if "all connection attempts failed" in lowered or "connection refused" in lowered or "request failed" in lowered:
+ stage = "runtime_execution"
+ failure_type = "runtime_unreachable"
+ elif "returned http" in lowered or re.search(r"\bhttp\s*[45]\d\d\b", lowered):
+ stage = "runtime_execution"
+ failure_type = "runtime_http_error"
+ elif "traceback" in lowered:
+ stage = "runtime_execution"
+ failure_type = "runtime_traceback"
+ elif "no submitted code" in lowered or "missing" in lowered and "code" in lowered:
+ stage = "run_preparation"
+ failure_type = "missing_submitted_code"
+ elif "no interactive runtime url" in lowered or "not configured" in lowered:
+ stage = "runtime_resolution"
+ failure_type = "no_runtime_url"
+
+ cause: dict[str, Any] = {
+ "stage": stage,
+ "type": failure_type,
+ "message": text,
+ "detail_excerpt": text,
+ }
+ if execution_url:
+ cause["execution_url"] = execution_url
+ return cause
+
+
def _extract_failure_cause(run: dict[str, Any]) -> dict[str, Any] | None:
"""Extract a structured failure cause from a run's report/summary payload."""
for container_key in ("report", "summary"):
@@ -141,7 +185,7 @@ def _extract_failure_cause(run: dict[str, Any]) -> dict[str, Any] | None:
cause = container.get("failure_cause")
if isinstance(cause, dict) and cause:
return cause
- # Fallback: synthesize a cause from legacy error fields.
+ # Fallback: synthesize a structured cause from legacy error fields.
summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
report = run.get("report") if isinstance(run.get("report"), dict) else {}
message = (
@@ -150,11 +194,7 @@ def _extract_failure_cause(run: dict[str, Any]) -> dict[str, Any] | None:
or report.get("error")
)
if isinstance(message, str) and message.strip():
- return {
- "stage": "unknown",
- "type": "legacy_error",
- "message": message.strip(),
- }
+ return _classify_legacy_failure(message)
return None
@@ -1344,10 +1384,11 @@ def evals_list(
@evals_app.command(name="create")
def evals_create(
- name: str = typer.Argument(..., help="Evalset name."),
- description: str = typer.Option("", "--description", help="Evalset description."),
- run_environment: str = typer.Option("sdk", "--run-environment", help="Evalset run environment (ui/sdk)."),
- kind: str = typer.Option("batch", "--kind", help="Evalset kind (batch/interactive)."),
+ name: Optional[str] = typer.Argument(None, help="Evalset name."),
+ description: Optional[str] = typer.Option(None, "--description", help="Evalset description."),
+ run_environment: Optional[str] = typer.Option(None, "--run-environment", help="Evalset run environment (ui/sdk)."),
+ kind: Optional[str] = typer.Option(None, "--kind", help="Evalset kind (batch/interactive)."),
+ spec_file: Optional[str] = typer.Option(None, "--spec-file", help="Path to evalset spec JSON file."),
schema_json: Optional[str] = typer.Option(None, "--schema-json", help="Schema JSON object."),
metadata_json: Optional[str] = typer.Option(None, "--metadata-json", help="Metadata JSON object."),
cases_file: Optional[str] = typer.Option(None, "--cases-file", help="Path to JSON array of cases."),
@@ -1355,11 +1396,22 @@ def evals_create(
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+ raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
) -> None:
"""Create an evalset."""
- schema = _parse_json_value(schema_json, "--schema-json")
- metadata = _parse_json_value(metadata_json, "--metadata-json")
+ spec = _parse_json_file(spec_file, "--spec-file")
+ schema = _merge_dicts(
+ spec.get("schema") if isinstance(spec.get("schema"), dict) else {},
+ _parse_json_value(schema_json, "--schema-json"),
+ )
+ metadata = _merge_dicts(
+ spec.get("metadata") if isinstance(spec.get("metadata"), dict) else {},
+ _parse_json_value(metadata_json, "--metadata-json"),
+ )
+
cases: list[dict[str, Any]] = []
+ if isinstance(spec.get("cases"), list):
+ cases = [case for case in spec.get("cases") if isinstance(case, dict)]
if cases_file:
text = Path(cases_file).read_text(encoding="utf-8")
decoded = json.loads(text)
@@ -1367,18 +1419,31 @@ def evals_create(
raise typer.BadParameter("--cases-file must contain a JSON array")
cases = [case for case in decoded if isinstance(case, dict)]
+ resolved_name = str(name or spec.get("name") or "").strip()
+ if not resolved_name:
+ raise typer.BadParameter("name argument is required unless provided in --spec-file")
+ resolved_description = str(description if description is not None else spec.get("description") or "")
+ resolved_run_environment = str(run_environment if run_environment is not None else spec.get("run_environment") or "sdk")
+ resolved_kind = str(kind if kind is not None else spec.get("kind") or "batch")
+
+ spec_tags = spec.get("tags") if isinstance(spec.get("tags"), list) else []
+ resolved_tags = tags if tags else [str(tag) for tag in spec_tags if str(tag).strip()]
+
client = _make_client(token=token, ai_agents_url=ai_agents_url)
payload = client.evals_create_eval(
- name=name,
- description=description,
- run_environment=run_environment,
- kind=kind,
+ name=resolved_name,
+ description=resolved_description,
+ run_environment=resolved_run_environment,
+ kind=resolved_kind,
schema=schema,
metadata=metadata,
- tags=tags,
+ tags=resolved_tags,
cases=cases,
account_uid=account_uid,
)
+ if raw:
+ typer.echo(json.dumps(payload))
+ return
eval_record = payload.get("evalset") or {}
console.print(f"[green]Eval created:[/green] {eval_record.get('id', '')} ({eval_record.get('name', '')})")
@@ -1403,7 +1468,7 @@ def evals_delete(
def _render_report(
- evalset_id: str = typer.Argument(..., help="Evalset ID to compare."),
+ evalset_id: Optional[str],
run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
@@ -1414,15 +1479,38 @@ def _render_report(
) -> None:
"""Generate a full evalset report with cross-experiment comparisons."""
client = _make_client(token=token, ai_agents_url=ai_agents_url)
+ resolved_evalset_id = (evalset_id or "").strip()
+ if not resolved_evalset_id:
+ payload = client.evals_list_evals(
+ limit=200,
+ offset=0,
+ account_uid=account_uid,
+ )
+ evalsets = [item for item in (payload.get("evalsets") or []) if isinstance(item, dict)]
+ if not evalsets:
+ raise typer.BadParameter("No evalsets found. Provide explicitly.")
+
+ def _updated_key(item: dict[str, Any]) -> str:
+ return str(item.get("updated_at") or item.get("created_at") or "")
+
+ latest_evalset = max(evalsets, key=_updated_key)
+ resolved_evalset_id = str(latest_evalset.get("id") or "").strip()
+ if not resolved_evalset_id:
+ raise typer.BadParameter("Latest evalset does not contain an id.")
+ console.print(
+ f"[yellow]No evalset id provided.[/yellow] Using latest evalset: "
+ f"[cyan]{resolved_evalset_id}[/cyan]"
+ )
+
report = _report_data(
client=client,
- evalset_id=evalset_id,
+ evalset_id=resolved_evalset_id,
run_limit=run_limit,
account_uid=account_uid,
)
experiments = report.get("experiments") or []
if not experiments:
- console.print(f"[yellow]No experiments found for evalset[/yellow] {evalset_id}")
+ console.print(f"[yellow]No experiments found for evalset[/yellow] {resolved_evalset_id}")
raise typer.Exit(0)
if raw:
@@ -1447,7 +1535,7 @@ def _render_report(
@app.command(name="report")
def evals_report(
- evalset_id: str = typer.Argument(..., help="Evalset ID to report."),
+ evalset_id: Optional[str] = typer.Argument(None, help="Evalset ID to report. Defaults to latest updated evalset."),
run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
@@ -1471,7 +1559,7 @@ def evals_report(
@evals_app.command(name="compare-report")
def evals_compare_report_compat(
- evalset_id: str = typer.Argument(..., help="Evalset ID to report."),
+ evalset_id: Optional[str] = typer.Argument(None, help="Evalset ID to report. Defaults to latest updated evalset."),
run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
@@ -1538,29 +1626,53 @@ def experiments_list(
@experiments_app.command(name="create")
def experiments_create(
- name: str = typer.Argument(..., help="Experiment name."),
+ name: Optional[str] = typer.Argument(None, help="Experiment name."),
evalset_id: Optional[str] = typer.Option(None, "--evalset-id", help="Evalset ID."),
- description: str = typer.Option("", "--description", help="Description."),
- status: str = typer.Option("draft", "--status", help="Initial status."),
+ description: Optional[str] = typer.Option(None, "--description", help="Description."),
+ status: Optional[str] = typer.Option(None, "--status", help="Initial status."),
+ spec_file: Optional[str] = typer.Option(None, "--spec-file", help="Path to experiment spec JSON file."),
config_json: Optional[str] = typer.Option(None, "--config-json", help="Config JSON object."),
summary_json: Optional[str] = typer.Option(None, "--summary-json", help="Summary JSON object."),
tags: list[str] = typer.Option([], "--tag", help="Repeatable tag."),
token: Optional[str] = typer.Option(None, "--token", help="API token."),
ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+ raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
) -> None:
"""Create an evalset experiment."""
+ spec = _parse_json_file(spec_file, "--spec-file")
+
+ resolved_name = str(name or spec.get("name") or "").strip()
+ if not resolved_name:
+ raise typer.BadParameter("name argument is required unless provided in --spec-file")
+ resolved_evalset_id = str(evalset_id or spec.get("evalset_id") or "").strip() or None
+ resolved_description = str(description if description is not None else spec.get("description") or "")
+ resolved_status = str(status if status is not None else spec.get("status") or "draft")
+ resolved_config = _merge_dicts(
+ spec.get("config") if isinstance(spec.get("config"), dict) else {},
+ _parse_json_value(config_json, "--config-json"),
+ )
+ resolved_summary = _merge_dicts(
+ spec.get("summary") if isinstance(spec.get("summary"), dict) else {},
+ _parse_json_value(summary_json, "--summary-json"),
+ )
+ spec_tags = spec.get("tags") if isinstance(spec.get("tags"), list) else []
+ resolved_tags = tags if tags else [str(tag) for tag in spec_tags if str(tag).strip()]
+
client = _make_client(token=token, ai_agents_url=ai_agents_url)
payload = client.evals_create_experiment(
- name=name,
- evalset_id=evalset_id,
- description=description,
- status=status,
- config=_parse_json_value(config_json, "--config-json"),
- summary=_parse_json_value(summary_json, "--summary-json"),
- tags=tags,
+ name=resolved_name,
+ evalset_id=resolved_evalset_id,
+ description=resolved_description,
+ status=resolved_status,
+ config=resolved_config,
+ summary=resolved_summary,
+ tags=resolved_tags,
account_uid=account_uid,
)
+ if raw:
+ typer.echo(json.dumps(payload))
+ return
experiment = payload.get("experiment") or {}
console.print(f"[green]Experiment created:[/green] {experiment.get('id', '')} ({experiment.get('name', '')})")
diff --git a/datalayer_core/client/client.py b/datalayer_core/client/client.py
index da2ea0dd..8bd226fa 100644
--- a/datalayer_core/client/client.py
+++ b/datalayer_core/client/client.py
@@ -264,6 +264,8 @@ def create_runtime(
environment: str = DEFAULT_ENVIRONMENT,
time_reservation: Minutes = DEFAULT_TIME_RESERVATION,
snapshot_name: Optional[str] = None,
+ agent_spec_id: Optional[str] = None,
+ agent_spec: Optional[dict[str, Any]] = None,
billable_account_uid: Optional[str] = None,
billable_account_type: Optional[str] = None,
billable_account_handle: Optional[str] = None,
@@ -327,6 +329,8 @@ def create_runtime(
given_name=name,
environment_name=environment,
from_snapshot_uid=snapshot_uid,
+ agent_spec_id=agent_spec_id,
+ agent_spec=agent_spec,
credits_limit=credits_limit,
billable_account_uid=billable_account_uid,
billable_account_type=billable_account_type,
@@ -337,6 +341,8 @@ def create_runtime(
response = self._create_runtime(
given_name=name,
environment_name=environment,
+ agent_spec_id=agent_spec_id,
+ agent_spec=agent_spec,
credits_limit=credits_limit,
billable_account_uid=billable_account_uid,
billable_account_type=billable_account_type,
@@ -345,8 +351,21 @@ def create_runtime(
# Process the response and create RuntimesService object
if not response.get("success", True):
+ message = response.get("message", "Unknown error")
+ context_parts = [f"environment='{environment}'"]
+ if agent_spec_id:
+ context_parts.append(f"agent_spec_id='{agent_spec_id}'")
+ if agent_spec:
+ context_parts.append("agent_spec=")
+ reason = response.get("reason")
+ if reason:
+ context_parts.append(f"reason='{reason}'")
+ retry_after = response.get("retry_after_seconds")
+ if retry_after:
+ context_parts.append(f"retry_after_seconds={retry_after}")
+ context = ", ".join(context_parts)
raise RuntimeError(
- f"Runtime creation failed: {response.get('message', 'Unknown error')}"
+ f"Runtime creation failed ({context}): {message}"
)
runtime_data = response["runtime"]
@@ -435,6 +454,91 @@ def terminate_runtime(self, runtime: Union[RuntimeService, str]) -> bool:
else:
return False
+ def get_runtime(self, runtime: Union[RuntimeService, str]) -> RuntimeService:
+ """
+ Get a single running Runtime by pod name.
+
+ Parameters
+ ----------
+ runtime : Union[Runtime, str]
+ Runtime object or pod name string to fetch.
+
+ Returns
+ -------
+ Runtime
+ The Runtime object matching the pod name.
+
+ Raises
+ ------
+ RuntimeError
+ If the runtime cannot be retrieved.
+ """
+ pod_name = runtime.pod_name if isinstance(runtime, RuntimeService) else runtime
+ if not pod_name:
+ raise RuntimeError("A pod name is required to get a runtime.")
+
+ response = self._get_runtime(pod_name)
+ if not response.get("success", True):
+ message = response.get("message", "Unknown error")
+ raise RuntimeError(f"Failed to get runtime '{pod_name}': {message}")
+
+ runtime_data = response.get("runtime")
+ if not isinstance(runtime_data, dict):
+ raise RuntimeError(
+ f"Failed to get runtime '{pod_name}': missing 'runtime' field in response"
+ )
+
+ return RuntimeService(
+ name=runtime_data.get("given_name", pod_name),
+ environment=runtime_data.get("environment_name", ""),
+ pod_name=runtime_data.get("pod_name", pod_name),
+ token=self._token,
+ ingress=runtime_data.get("ingress"),
+ reservation_id=runtime_data.get("reservation_id"),
+ uid=runtime_data.get("uid"),
+ burning_rate=runtime_data.get("burning_rate"),
+ jupyter_token=runtime_data.get("token"),
+ run_url=self._urls.run_url,
+ iam_url=self._urls.iam_url,
+ started_at=runtime_data.get("started_at"),
+ expired_at=runtime_data.get("expired_at"),
+ )
+
+ def update_runtime(
+ self,
+ runtime: Union[RuntimeService, str],
+ capabilities: list[str],
+ ) -> bool:
+ """
+ Update a running Runtime's capabilities.
+
+ Parameters
+ ----------
+ runtime : Union[Runtime, str]
+ Runtime object or pod name string to update.
+ capabilities : list[str]
+ New capabilities to apply to the runtime.
+
+ Returns
+ -------
+ bool
+ True if the update succeeded.
+
+ Raises
+ ------
+ RuntimeError
+ If the update fails.
+ """
+ pod_name = runtime.pod_name if isinstance(runtime, RuntimeService) else runtime
+ if not pod_name:
+ raise RuntimeError("A pod name is required to update a runtime.")
+
+ response = self._update_runtime(pod_name, capabilities)
+ if not response.get("success", True):
+ message = response.get("message", "Unknown error")
+ raise RuntimeError(f"Failed to update runtime '{pod_name}': {message}")
+ return True
+
def list_secrets(self) -> list[SecretModel]:
"""
List all secrets available in the Datalayer environment.
diff --git a/datalayer_core/mixins/runtimes.py b/datalayer_core/mixins/runtimes.py
index 36d52363..e721f3e0 100644
--- a/datalayer_core/mixins/runtimes.py
+++ b/datalayer_core/mixins/runtimes.py
@@ -39,6 +39,8 @@ def _create_runtime(
given_name: Optional[str] = None,
credits_limit: Optional[float] = None,
from_snapshot_uid: Optional[str] = None,
+ agent_spec_id: Optional[str] = None,
+ agent_spec: Optional[dict[str, Any]] = None,
billable_account_uid: Optional[str] = None,
billable_account_type: Optional[str] = None,
billable_account_handle: Optional[str] = None,
@@ -111,6 +113,11 @@ def _create_runtime(
if from_snapshot_uid:
body["from"] = from_snapshot_uid
+ if agent_spec_id:
+ body["agent_spec_id"] = agent_spec_id
+ if agent_spec:
+ body["agent_spec"] = agent_spec
+
if billable_account_uid:
body["billable_account_uid"] = billable_account_uid
if billable_account_type:
@@ -287,9 +294,127 @@ def _terminate_runtime(self: Any, pod_name: str) -> dict[str, Any]:
return {"success": False, "message": error_msg}
+class RuntimesGetMixin:
+ """Mixin for reading a single Datalayer runtime."""
+
+ def _get_runtime(self: Any, pod_name: str) -> dict[str, Any]:
+ """
+ Get a single Runtime by pod name.
+
+ Parameters
+ ----------
+ pod_name : str
+ The pod name of the runtime to fetch.
+
+ Returns
+ -------
+ dict[str, Any]
+ Response containing the runtime payload.
+ """
+ try:
+ response = self._fetch(
+ "{}/api/runtimes/v1/runtimes/{}".format(
+ self.urls.runtimes_url, pod_name
+ ),
+ )
+
+ if response.status_code != 200:
+ error_msg = f"Failed to get runtime: HTTP {response.status_code}"
+ logger.error(error_msg)
+ try:
+ error_details = response.json()
+ if "message" in error_details:
+ error_msg += f" - {error_details['message']}"
+ except Exception:
+ pass
+ return {"success": False, "message": error_msg}
+
+ try:
+ result = response.json()
+ if "success" in result and not result["success"]:
+ error_msg = f"Get runtime failed: {result.get('message', 'Unknown error')}"
+ logger.error(error_msg)
+ return {"success": False, "message": error_msg}
+ return result
+ except Exception as e:
+ error_msg = f"Failed to parse runtime response: {str(e)}"
+ logger.error(error_msg)
+ return {"success": False, "message": error_msg}
+
+ except Exception as e:
+ error_msg = f"Unexpected error getting runtime {pod_name}: {str(e)}"
+ logger.error(error_msg)
+ return {"success": False, "message": error_msg}
+
+
+class RuntimesUpdateMixin:
+ """Mixin for updating a Datalayer runtime."""
+
+ def _update_runtime(
+ self: Any,
+ pod_name: str,
+ capabilities: list[str],
+ ) -> dict[str, Any]:
+ """
+ Update a Runtime's capabilities.
+
+ Parameters
+ ----------
+ pod_name : str
+ The pod name of the runtime to update.
+ capabilities : list[str]
+ New capabilities to apply to the runtime.
+
+ Returns
+ -------
+ dict[str, Any]
+ Response containing the update status.
+ """
+ try:
+ response = self._fetch(
+ "{}/api/runtimes/v1/runtimes/{}".format(
+ self.urls.runtimes_url, pod_name
+ ),
+ method="PUT",
+ json={"capabilities": capabilities},
+ )
+
+ if response.status_code not in [200, 201, 202]:
+ error_msg = f"Failed to update runtime: HTTP {response.status_code}"
+ logger.error(error_msg)
+ try:
+ error_details = response.json()
+ if "message" in error_details:
+ error_msg += f" - {error_details['message']}"
+ elif "detail" in error_details:
+ error_msg += f" - {error_details['detail']}"
+ except Exception:
+ pass
+ return {"success": False, "message": error_msg}
+
+ try:
+ result = response.json()
+ if "success" in result and not result["success"]:
+ error_msg = f"Update runtime failed: {result.get('message', 'Unknown error')}"
+ logger.error(error_msg)
+ return {"success": False, "message": error_msg}
+ return result
+ except Exception as e:
+ error_msg = f"Failed to parse runtime update response: {str(e)}"
+ logger.error(error_msg)
+ return {"success": False, "message": error_msg}
+
+ except Exception as e:
+ error_msg = f"Unexpected error updating runtime {pod_name}: {str(e)}"
+ logger.error(error_msg)
+ return {"success": False, "message": error_msg}
+
+
class RuntimesMixin(
RuntimesCreateMixin,
RuntimesListMixin,
+ RuntimesGetMixin,
+ RuntimesUpdateMixin,
RuntimesTerminateMixin,
):
"""
diff --git a/datalayer_core/runtimes/agent_runtime.py b/datalayer_core/runtimes/agent_runtime.py
new file mode 100644
index 00000000..0c25205d
--- /dev/null
+++ b/datalayer_core/runtimes/agent_runtime.py
@@ -0,0 +1,194 @@
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Cloud agent runtime provisioning helpers.
+
+Reusable logic for launching cloud ``agent-runtimes`` from a
+:class:`~datalayer_core.client.client.DatalayerClient`. Shared by the eval
+examples and the GitHub Actions integration so credit/time-reservation math,
+environment burning-rate lookup, and ``create_runtime`` error handling are not
+duplicated across consumers.
+"""
+
+from __future__ import annotations
+
+import math
+from typing import Any, Optional
+
+
+def resolve_environment_burning_rate(
+ client: Any,
+ environment_name: str,
+) -> float:
+ """Return the positive burning rate for an environment.
+
+ Parameters
+ ----------
+ client : DatalayerClient
+ An authenticated client able to list environments.
+ environment_name : str
+ The environment to look up.
+
+ Returns
+ -------
+ float
+ The environment's positive burning rate.
+
+ Raises
+ ------
+ RuntimeError
+ If the environment cannot be listed, is not found, or has no positive
+ burning rate.
+ """
+
+ def _to_float(value: Any) -> Optional[float]:
+ try:
+ if value is None:
+ return None
+ parsed = float(value)
+ if parsed > 0:
+ return parsed
+ except (TypeError, ValueError):
+ return None
+ return None
+
+ response = client._list_environments()
+ if not response.get("success", True):
+ raise RuntimeError(
+ f"Failed to list environments: {response.get('message', 'Unknown error')}"
+ )
+ environments = response.get("environments")
+ if not isinstance(environments, list):
+ raise RuntimeError(
+ "Failed to list environments: invalid environments payload."
+ )
+
+ matched_environment: Optional[dict[str, Any]] = None
+ for raw_env in environments:
+ if (
+ isinstance(raw_env, dict)
+ and str(raw_env.get("name") or "") == environment_name
+ ):
+ matched_environment = raw_env
+ break
+
+ if matched_environment is None:
+ available = [
+ str(env.get("name") or "")
+ for env in environments
+ if isinstance(env, dict)
+ ]
+ raise RuntimeError(
+ f"Environment '{environment_name}' not found for cloud runtime launch. "
+ f"Available environments: {available}"
+ )
+
+ parsed = _to_float(matched_environment.get("burning_rate"))
+ if parsed is not None:
+ return parsed
+
+ available_keys = sorted(matched_environment.keys())
+ raise RuntimeError(
+ f"Environment '{environment_name}' is missing a positive burning rate "
+ "in backend payload. Checked key: burning_rate. "
+ f"Environment keys: {available_keys}"
+ )
+
+
+def compute_time_reservation_minutes(
+ *,
+ credits_limit: float,
+ burning_rate: float,
+) -> int:
+ """Compute a time reservation (minutes) from a credits budget.
+
+ ``create_runtime`` charges ``burning_rate * 60 * time_reservation`` credits,
+ so this returns the smallest whole-minute reservation whose cost is at least
+ ``credits_limit`` (minimum 1 minute).
+
+ Raises
+ ------
+ ValueError
+ If ``burning_rate`` is not positive.
+ """
+ if burning_rate <= 0:
+ raise ValueError("burning_rate must be positive.")
+ return max(1, int(math.ceil(float(credits_limit) / (burning_rate * 60.0))))
+
+
+def create_cloud_agent_runtime(
+ client: Any,
+ *,
+ environment_name: str,
+ name: Optional[str] = None,
+ agent_spec_id: Optional[str] = None,
+ agent_spec: Optional[dict[str, Any]] = None,
+ credits_limit: Optional[float] = None,
+ time_reservation: Optional[int] = None,
+) -> Any:
+ """Create a cloud agent runtime via the core client.
+
+ Either ``time_reservation`` (in minutes) or ``credits_limit`` must be
+ provided. When only ``credits_limit`` is given, the time reservation is
+ derived from the environment's burning rate.
+
+ Parameters
+ ----------
+ client : DatalayerClient
+ An authenticated client.
+ environment_name : str
+ The runtime environment to launch in.
+ name : Optional[str]
+ Optional runtime name.
+ agent_spec_id : Optional[str]
+ Registered agent spec id (ignored when ``agent_spec`` is provided).
+ agent_spec : Optional[dict[str, Any]]
+ Inline agent spec payload (takes precedence over ``agent_spec_id``).
+ credits_limit : Optional[float]
+ Target credits budget used to derive ``time_reservation`` when the
+ latter is not supplied.
+ time_reservation : Optional[int]
+ Explicit time reservation in minutes.
+
+ Returns
+ -------
+ Any
+ The created runtime object (exposes ``pod_name`` and ``ingress``).
+
+ Raises
+ ------
+ ValueError
+ If neither ``time_reservation`` nor ``credits_limit`` is provided.
+ RuntimeError
+ If runtime creation fails or returns no ``pod_name``.
+ """
+ if time_reservation is None:
+ if credits_limit is None:
+ raise ValueError(
+ "Provide either time_reservation or credits_limit."
+ )
+ burning_rate = resolve_environment_burning_rate(client, environment_name)
+ time_reservation = compute_time_reservation_minutes(
+ credits_limit=credits_limit,
+ burning_rate=burning_rate,
+ )
+
+ try:
+ runtime = client.create_runtime(
+ name=name,
+ environment=environment_name,
+ time_reservation=int(time_reservation),
+ agent_spec_id=None if agent_spec else agent_spec_id,
+ agent_spec=agent_spec,
+ )
+ except Exception as exc:
+ spec_hint = "inline spec payload" if agent_spec else (agent_spec_id or "")
+ raise RuntimeError(
+ "Cloud runtime creation failed. "
+ f"environment={environment_name}, agent_spec={spec_hint}, error={exc}"
+ ) from exc
+
+ pod_name = str(getattr(runtime, "pod_name", "") or "").strip()
+ if not pod_name:
+ raise RuntimeError("Runtime creation succeeded but pod_name is missing.")
+ return runtime
diff --git a/datalayer_core/runtimes/local.py b/datalayer_core/runtimes/local.py
new file mode 100644
index 00000000..4fdf692e
--- /dev/null
+++ b/datalayer_core/runtimes/local.py
@@ -0,0 +1,628 @@
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Local agent runtime lifecycle helpers.
+
+Provides a reusable API to launch, register, interact with, and tear down a
+local ``agent-runtimes`` server. Shared by the ``datalayer agents`` CLI
+(``--local`` flag) and by examples so the same logic is not duplicated.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import socket
+import subprocess
+import time
+from dataclasses import dataclass, field
+from typing import Any, Optional
+from urllib.parse import urlparse
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_LOCAL_HOST = "127.0.0.1"
+DEFAULT_LOCAL_AGENT_NAME = "default"
+DEFAULT_LOCAL_PROTOCOL = "vercel-ai"
+DEFAULT_LOCAL_LOG_LEVEL = "info"
+
+# Map Datalayer Bedrock credentials onto the AWS variables the local
+# agent-runtimes server expects.
+_BEDROCK_ENV_MAPPINGS = {
+ "DATALAYER_BEDROCK_AWS_ACCESS_KEY_ID": "AWS_ACCESS_KEY_ID",
+ "DATALAYER_BEDROCK_AWS_SECRET_ACCESS_KEY": "AWS_SECRET_ACCESS_KEY",
+ "DATALAYER_BEDROCK_AWS_DEFAULT_REGION": "AWS_DEFAULT_REGION",
+}
+
+
+@dataclass
+class LocalAgentRuntime:
+ """Handle to a running local ``agent-runtimes`` server."""
+
+ base_url: str
+ agent_name: str
+ agent_spec_id: str
+ process: Optional[subprocess.Popen[Any]] = field(default=None, repr=False)
+
+ @property
+ def chat_endpoint(self) -> str:
+ """Vercel AI chat endpoint for this runtime's agent."""
+ return f"{self.base_url.rstrip('/')}/api/v1/vercel-ai/{self.agent_name}"
+
+ def terminate(self) -> None:
+ """Terminate the underlying server process (if any)."""
+ terminate_local_agent_runtime(self)
+
+
+def find_free_port(host: str = DEFAULT_LOCAL_HOST) -> int:
+ """Return a free TCP port bound on ``host``."""
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+ sock.bind((host, 0))
+ return int(sock.getsockname()[1])
+
+
+def build_agent_runtime_env() -> tuple[dict[str, str], list[str]]:
+ """Build the subprocess environment with Bedrock -> AWS variable mapping.
+
+ Returns
+ -------
+ tuple[dict[str, str], list[str]]
+ The environment mapping and the list of AWS targets that were mapped.
+ """
+ runtime_env = os.environ.copy()
+ mapped_targets: list[str] = []
+ for source, target in _BEDROCK_ENV_MAPPINGS.items():
+ value = (runtime_env.get(source) or "").strip()
+ if value:
+ runtime_env[target] = value
+ mapped_targets.append(target)
+ return runtime_env, mapped_targets
+
+
+def wait_for_local_runtime(base_url: str, timeout_seconds: int = 25) -> None:
+ """Block until the local runtime ``/health`` endpoint responds.
+
+ Parameters
+ ----------
+ base_url : str
+ Base URL of the local agent-runtimes server.
+ timeout_seconds : int
+ Maximum number of seconds to wait.
+
+ Raises
+ ------
+ RuntimeError
+ If the server does not become ready before the timeout.
+ """
+ endpoint = f"{base_url.rstrip('/')}/health"
+ deadline = time.time() + timeout_seconds
+ while time.time() < deadline:
+ try:
+ response = requests.get(endpoint, timeout=2)
+ if response.status_code < 500:
+ return
+ except Exception:
+ pass
+ time.sleep(0.5)
+ raise RuntimeError(
+ f"Local agent-runtimes server did not become ready at {endpoint} "
+ f"within {timeout_seconds}s."
+ )
+
+
+def start_local_agent_runtime(
+ *,
+ agent_spec_id: str,
+ agent_name: str = DEFAULT_LOCAL_AGENT_NAME,
+ host: str = DEFAULT_LOCAL_HOST,
+ port: Optional[int] = None,
+ protocol: str = DEFAULT_LOCAL_PROTOCOL,
+ log_level: str = DEFAULT_LOCAL_LOG_LEVEL,
+ wait: bool = True,
+) -> LocalAgentRuntime:
+ """Launch a local ``agent-runtimes`` server as a subprocess.
+
+ Parameters
+ ----------
+ agent_spec_id : str
+ Agent spec id to boot the runtime with.
+ agent_name : str
+ Registered agent name/id served by the runtime.
+ host : str
+ Host interface to bind to.
+ port : Optional[int]
+ Port to bind to. A free port is selected when omitted.
+ protocol : str
+ Transport protocol exposed by the runtime (e.g. ``vercel-ai``).
+ log_level : str
+ Log level for the runtime process.
+ wait : bool
+ Whether to block until the runtime reports healthy.
+
+ Returns
+ -------
+ LocalAgentRuntime
+ Handle pointing at the running server.
+
+ Raises
+ ------
+ RuntimeError
+ If the runtime cannot be started or does not become ready.
+ """
+ resolved_port = port or find_free_port(host)
+ scheme = "http"
+ base_url = f"{scheme}://{host}:{resolved_port}"
+
+ command = [
+ "agent-runtimes",
+ "serve",
+ "--host",
+ host,
+ "--port",
+ str(resolved_port),
+ "--protocol",
+ protocol,
+ "--agent-id",
+ agent_spec_id,
+ "--agent-name",
+ agent_name,
+ "--log-level",
+ log_level,
+ ]
+
+ runtime_env, mapped_targets = build_agent_runtime_env()
+ if mapped_targets:
+ logger.info(
+ "Launching local agent-runtimes with Bedrock env mapping: "
+ "DATALAYER_BEDROCK_* -> %s",
+ ", ".join(mapped_targets),
+ )
+ else:
+ logger.info(
+ "Launching local agent-runtimes without DATALAYER_BEDROCK_* mapping "
+ "(no DATALAYER_BEDROCK_AWS_* variables detected)."
+ )
+
+ try:
+ process = subprocess.Popen(command, env=runtime_env)
+ except FileNotFoundError as exc:
+ raise RuntimeError(
+ "Could not start local agent runtime: the 'agent-runtimes' command "
+ "was not found on PATH. Install the agent-runtimes package first."
+ ) from exc
+ except Exception as exc:
+ raise RuntimeError(
+ f"Failed to start local agent runtime: {exc}"
+ ) from exc
+
+ runtime = LocalAgentRuntime(
+ base_url=base_url,
+ agent_name=agent_name,
+ agent_spec_id=agent_spec_id,
+ process=process,
+ )
+
+ if wait:
+ try:
+ wait_for_local_runtime(base_url)
+ except Exception:
+ terminate_local_agent_runtime(runtime)
+ raise
+
+ return runtime
+
+
+def terminate_local_agent_runtime(runtime: LocalAgentRuntime) -> None:
+ """Terminate a local runtime process, escalating to kill if needed."""
+ process = runtime.process
+ if process is None or process.poll() is not None:
+ return
+ process.terminate()
+ try:
+ process.wait(timeout=5)
+ except subprocess.TimeoutExpired:
+ process.kill()
+
+
+def ensure_local_agent(
+ *,
+ base_url: str,
+ agent_name: str,
+ token: str,
+ agent_spec_id: str,
+ agent_library: str = "pydantic-ai",
+ transport: str = DEFAULT_LOCAL_PROTOCOL,
+ enable_skills: bool = True,
+ description: Optional[str] = None,
+ timeout: int = 120,
+) -> None:
+ """Ensure a local agent with the expected transport is registered.
+
+ Lists existing agents, replaces a mismatched-transport registration when
+ needed, and creates the agent if it is missing.
+
+ Raises
+ ------
+ RuntimeError
+ If the agent cannot be registered.
+ """
+ base = base_url.rstrip("/")
+ headers = {"Authorization": f"Bearer {token}"}
+
+ try:
+ response = requests.get(f"{base}/api/v1/agents", headers=headers, timeout=30)
+ payload = response.json() if response.content else {}
+ except Exception:
+ payload = {}
+
+ existing_agents = payload.get("agents") if isinstance(payload, dict) else []
+ if not isinstance(existing_agents, list):
+ existing_agents = []
+
+ for agent in existing_agents:
+ if not isinstance(agent, dict):
+ continue
+ existing_id = str(agent.get("id") or "").strip()
+ existing_name = str(agent.get("name") or "").strip()
+ if agent_name and (existing_id == agent_name or existing_name == agent_name):
+ existing_transport = str(agent.get("transport") or "").strip().lower()
+ if existing_transport in {"vercel-ai", "vercel_ai"}:
+ return
+
+ # Replace mismatched transport registration so local interactions
+ # use the Vercel AI chat endpoint.
+ delete_target = existing_id or agent_name
+ try:
+ requests.delete(
+ f"{base}/api/v1/agents/{delete_target}",
+ headers=headers,
+ timeout=30,
+ )
+ except Exception as exc:
+ raise RuntimeError(
+ "Local agent exists with incompatible transport "
+ f"'{existing_transport or 'unknown'}' and could not be "
+ f"replaced: {exc}"
+ ) from exc
+ break
+
+ body = {
+ "name": agent_name,
+ "description": description
+ or f"Local agent '{agent_name}' registered by datalayer-core.",
+ "agent_library": agent_library,
+ "transport": transport,
+ "agent_spec_id": agent_spec_id,
+ "enable_skills": enable_skills,
+ "tools": [],
+ }
+ try:
+ response = requests.post(
+ f"{base}/api/v1/agents",
+ json=body,
+ headers=headers,
+ timeout=timeout,
+ )
+ except requests.exceptions.RequestException as exc:
+ parsed = urlparse(base_url)
+ host = parsed.hostname or DEFAULT_LOCAL_HOST
+ port = parsed.port or 8000
+ scheme = parsed.scheme or "http"
+ raise RuntimeError(
+ "Local agent bootstrap request failed: "
+ f"{exc}. Start agent-runtimes first, for example: "
+ f"agent-runtimes serve --host {host} --port {port} "
+ f"--agent-id {agent_spec_id} --agent-name {agent_name} "
+ f"(base URL: {scheme}://{host}:{port})."
+ ) from exc
+
+ if response.status_code < 400:
+ return
+ body_text = response.text or ""
+ if response.status_code == 409 and "already exists" in body_text.lower():
+ return
+ raise RuntimeError(
+ f"Local agent bootstrap failed ({response.status_code}): "
+ f"{body_text or 'unknown error'}"
+ )
+
+
+def delete_local_agents(*, base_url: str, token: str) -> tuple[int, int]:
+ """Delete all locally-registered agents.
+
+ Returns
+ -------
+ tuple[int, int]
+ ``(total_agents, deleted_agents)``.
+ """
+ base = base_url.rstrip("/")
+ headers = {"Authorization": f"Bearer {token}"}
+ try:
+ response = requests.get(f"{base}/api/v1/agents", headers=headers, timeout=30)
+ payload = response.json() if response.content else {}
+ except Exception as exc:
+ logger.warning("Unable to list local agents for cleanup: %s", exc)
+ return (0, 0)
+
+ agents = payload.get("agents") if isinstance(payload, dict) else []
+ if not isinstance(agents, list):
+ agents = []
+
+ deleted = 0
+ for agent in agents:
+ if not isinstance(agent, dict):
+ continue
+ agent_id = str(agent.get("id") or "").strip()
+ if not agent_id:
+ continue
+ try:
+ requests.delete(
+ f"{base}/api/v1/agents/{agent_id}",
+ headers=headers,
+ timeout=30,
+ )
+ deleted += 1
+ except Exception as exc:
+ logger.warning("Unable to delete local agent %s: %s", agent_id, exc)
+
+ return (len(agents), deleted)
+
+
+def extract_vercel_stream_text(raw: str) -> str:
+ """Extract concatenated text deltas from a Vercel AI SSE stream."""
+ text_parts: list[str] = []
+ for line in raw.splitlines():
+ if not line.startswith("data: "):
+ continue
+ payload = line[6:].strip()
+ if not payload or payload == "[DONE]":
+ continue
+ try:
+ event = json.loads(payload)
+ except json.JSONDecodeError:
+ continue
+
+ if isinstance(event, str):
+ if event.strip():
+ text_parts.append(event)
+ continue
+ if not isinstance(event, dict):
+ continue
+
+ for key in ("delta", "text", "content", "outputText", "textDelta"):
+ value = event.get(key)
+ if isinstance(value, str) and value:
+ text_parts.append(value)
+
+ return "".join(text_parts).strip()
+
+
+def _post_vercel_ai_chat(
+ *,
+ endpoint: str,
+ token: str,
+ prompt: str,
+ timeout: int,
+ source_label: str,
+) -> dict[str, Any]:
+ """POST a single prompt to a Vercel AI chat endpoint.
+
+ Shared by local and cloud chat helpers. Failures are captured into a
+ structured ``failure_cause`` (matching the eval report schema) instead of
+ raising.
+
+ Returns
+ -------
+ dict[str, Any]
+ On success: ``{"status": "completed", "output": {...}}``.
+ On failure: ``{"status": "failed", "output": {...},
+ "failure_cause": {"stage", "type", "message", "detail_excerpt",
+ "execution_url"}}``.
+ """
+ message_id = f"chat-{int(time.time() * 1000)}"
+ parts = [{"type": "text", "text": prompt}]
+ message = {"id": message_id, "role": "user", "parts": parts}
+ body = {
+ "trigger": "submit-message",
+ "id": f"chat-{message_id}",
+ "message": message,
+ "messages": [message],
+ }
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {token}",
+ }
+ try:
+ response = requests.post(
+ endpoint,
+ json=body,
+ headers=headers,
+ timeout=timeout,
+ )
+ except requests.exceptions.RequestException as exc:
+ message_text = f"{source_label} chat request failed: {exc}"
+ return {
+ "status": "failed",
+ "output": {"text": "", "raw_stream_excerpt": ""},
+ "failure_cause": {
+ "stage": "runtime_execution",
+ "type": "runtime_unreachable",
+ "message": message_text,
+ "detail_excerpt": message_text,
+ "execution_url": endpoint,
+ },
+ }
+
+ raw = response.text or ""
+ if response.status_code >= 400:
+ message_text = f"{source_label} chat failed (HTTP {response.status_code})"
+ return {
+ "status": "failed",
+ "output": {"text": "", "raw_stream_excerpt": raw[:2000]},
+ "failure_cause": {
+ "stage": "runtime_execution",
+ "type": "runtime_http_error",
+ "message": message_text,
+ "detail_excerpt": raw[:2000] or message_text,
+ "execution_url": endpoint,
+ },
+ }
+
+ output_text = extract_vercel_stream_text(raw)
+ return {
+ "status": "completed",
+ "output": {
+ "text": output_text,
+ "raw_stream_excerpt": raw[:2000],
+ },
+ }
+
+
+def run_local_agent_chat(
+ *,
+ base_url: str,
+ agent_name: str,
+ token: str,
+ prompt: str,
+ timeout: int = 300,
+) -> dict[str, Any]:
+ """Send a single prompt to a local agent via the Vercel AI endpoint.
+
+ Failures are captured into a structured ``failure_cause`` (matching the
+ eval report schema) instead of raising, so callers can persist failed runs
+ and have them surfaced in reports.
+
+ Returns
+ -------
+ dict[str, Any]
+ On success: ``{"status": "completed", "output": {...}}``.
+ On failure: ``{"status": "failed", "output": {...},
+ "failure_cause": {"stage", "type", "message", "detail_excerpt",
+ "execution_url"}}``.
+ """
+ endpoint = f"{base_url.rstrip('/')}/api/v1/vercel-ai/{agent_name}"
+ return _post_vercel_ai_chat(
+ endpoint=endpoint,
+ token=token,
+ prompt=prompt,
+ timeout=timeout,
+ source_label="Local agent",
+ )
+
+
+def build_agent_runtimes_base_url(ingress: str) -> str:
+ """Derive the cloud ``agent-runtimes`` base URL from a runtime ingress.
+
+ A runtime's ``ingress`` (returned by :meth:`DatalayerClient.create_runtime`)
+ points at the Jupyter server path on the runtimes host, e.g.
+ ``https://r1.datalayer.run/jupyter/server//``. The
+ ``agent-runtimes`` container is exposed under the sibling path
+ ``/agent-runtimes//`` on the **same** host. Using the
+ runtime's own ingress guarantees the correct runtimes host (e.g. ``r1``)
+ rather than the IAM/control-plane host (e.g. ``prod1``).
+
+ Parameters
+ ----------
+ ingress : str
+ The runtime ingress URL.
+
+ Returns
+ -------
+ str
+ The agent-runtimes base URL (without a trailing slash).
+ """
+ base = (ingress or "").rstrip("/")
+ if "/jupyter/server/" in base:
+ base = base.replace("/jupyter/server/", "/agent-runtimes/", 1)
+ return base
+
+
+def runtime_route_candidates(
+ *,
+ agent_name: Optional[str] = None,
+ agent_spec_id: Optional[str] = None,
+ pod_name: Optional[str] = None,
+) -> list[str]:
+ """Build an ordered, de-duplicated list of Vercel AI route candidates.
+
+ The ``agent-runtimes`` server inside a cloud runtime may register its agent
+ under different names depending on how it was launched. Trying a few known
+ candidates (explicit agent name, agent spec id, pod name, then the default
+ route) makes cloud execution resilient.
+ """
+ candidates: list[str] = []
+ for value in (agent_name, agent_spec_id, pod_name, DEFAULT_LOCAL_AGENT_NAME):
+ token = str(value or "").strip()
+ if token and token not in candidates:
+ candidates.append(token)
+ return candidates
+
+
+def run_cloud_agent_chat(
+ *,
+ ingress: str,
+ token: str,
+ prompt: str,
+ route_candidates: list[str],
+ timeout: int = 300,
+) -> dict[str, Any]:
+ """Send a single prompt to a cloud runtime agent via the Vercel AI endpoint.
+
+ The execution URL is derived from the runtime's ``ingress`` (via
+ :func:`build_agent_runtimes_base_url`) so the request targets the correct
+ runtimes host (e.g. ``r1.datalayer.run``). Each route candidate is tried in
+ order until one succeeds; if all fail, the last structured failure is
+ returned with every attempted URL recorded in ``detail_excerpt``.
+
+ Returns
+ -------
+ dict[str, Any]
+ Same contract as :func:`run_local_agent_chat`.
+ """
+ base_url = build_agent_runtimes_base_url(ingress)
+ candidates = [c for c in route_candidates if str(c or "").strip()]
+ if not candidates:
+ candidates = [DEFAULT_LOCAL_AGENT_NAME]
+
+ attempted: list[str] = []
+ last_result: dict[str, Any] | None = None
+ for route in candidates:
+ endpoint = f"{base_url}/api/v1/vercel-ai/{route}"
+ attempted.append(endpoint)
+ result = _post_vercel_ai_chat(
+ endpoint=endpoint,
+ token=token,
+ prompt=prompt,
+ timeout=timeout,
+ source_label="Cloud agent",
+ )
+ if str(result.get("status") or "").strip().lower() == "completed":
+ return result
+ last_result = result
+
+ if last_result is None:
+ last_result = {
+ "status": "failed",
+ "output": {"text": "", "raw_stream_excerpt": ""},
+ "failure_cause": {
+ "stage": "runtime_execution",
+ "type": "runtime_unreachable",
+ "message": "No cloud agent route candidates available.",
+ "detail_excerpt": "No cloud agent route candidates available.",
+ "execution_url": base_url,
+ },
+ }
+ elif len(attempted) > 1:
+ failure_cause = last_result.get("failure_cause")
+ if isinstance(failure_cause, dict):
+ tried = "; ".join(attempted)
+ base_detail = str(failure_cause.get("detail_excerpt") or "")
+ failure_cause["detail_excerpt"] = (
+ f"{base_detail}\nAttempted routes: {tried}"
+ ).strip()
+ failure_cause["attempted_urls"] = attempted
+ return last_result
+
From cccbab65a767b3cdce920ea4380968c83e5d0fb3 Mon Sep 17 00:00:00 2001
From: Eric Charles
Date: Sun, 7 Jun 2026 10:02:11 +0200
Subject: [PATCH 49/49] bump: versin
---
datalayer_core/__version__.py | 2 +-
datalayer_core/runtimes/agent_runtime.py | 106 +++++++++++++++++++++++
datalayer_core/runtimes/local.py | 56 ++++++++++++
3 files changed, 163 insertions(+), 1 deletion(-)
diff --git a/datalayer_core/__version__.py b/datalayer_core/__version__.py
index 388a47f3..0bad1d00 100644
--- a/datalayer_core/__version__.py
+++ b/datalayer_core/__version__.py
@@ -3,4 +3,4 @@
"""Datalayer Core version information."""
-__version__ = "1.1.23"
+__version__ = "1.1.24"
diff --git a/datalayer_core/runtimes/agent_runtime.py b/datalayer_core/runtimes/agent_runtime.py
index 0c25205d..27856a57 100644
--- a/datalayer_core/runtimes/agent_runtime.py
+++ b/datalayer_core/runtimes/agent_runtime.py
@@ -125,6 +125,9 @@ def create_cloud_agent_runtime(
agent_spec: Optional[dict[str, Any]] = None,
credits_limit: Optional[float] = None,
time_reservation: Optional[int] = None,
+ billable_account_uid: Optional[str] = None,
+ billable_account_type: Optional[str] = None,
+ billable_account_handle: Optional[str] = None,
) -> Any:
"""Create a cloud agent runtime via the core client.
@@ -149,6 +152,12 @@ def create_cloud_agent_runtime(
latter is not supplied.
time_reservation : Optional[int]
Explicit time reservation in minutes.
+ billable_account_uid : Optional[str]
+ Optional billable account UID used for runtime billing attribution.
+ billable_account_type : Optional[str]
+ Optional billable account type (user, organization, team).
+ billable_account_handle : Optional[str]
+ Optional billable account handle.
Returns
-------
@@ -180,6 +189,9 @@ def create_cloud_agent_runtime(
time_reservation=int(time_reservation),
agent_spec_id=None if agent_spec else agent_spec_id,
agent_spec=agent_spec,
+ billable_account_uid=billable_account_uid,
+ billable_account_type=billable_account_type,
+ billable_account_handle=billable_account_handle,
)
except Exception as exc:
spec_hint = "inline spec payload" if agent_spec else (agent_spec_id or "")
@@ -192,3 +204,97 @@ def create_cloud_agent_runtime(
if not pod_name:
raise RuntimeError("Runtime creation succeeded but pod_name is missing.")
return runtime
+
+
+def terminate_cloud_agent_runtime(
+ client: Any,
+ runtime_or_pod_name: Any,
+ *,
+ raise_on_error: bool = False,
+) -> bool:
+ """Terminate a cloud runtime created for agent execution.
+
+ Parameters
+ ----------
+ client : DatalayerClient
+ An authenticated client exposing ``terminate_runtime``.
+ runtime_or_pod_name : Any
+ Runtime object (with ``pod_name``) or raw pod-name string.
+ raise_on_error : bool
+ When ``True``, raise :class:`RuntimeError` if termination fails.
+
+ Returns
+ -------
+ bool
+ ``True`` when the runtime was terminated, otherwise ``False``.
+ """
+ if isinstance(runtime_or_pod_name, str):
+ pod_name = runtime_or_pod_name.strip()
+ else:
+ pod_name = str(getattr(runtime_or_pod_name, "pod_name", "") or "").strip()
+
+ if not pod_name:
+ if raise_on_error:
+ raise RuntimeError("Cannot terminate cloud runtime: pod_name is missing.")
+ return False
+
+ try:
+ success = bool(client.terminate_runtime(pod_name))
+ except Exception as exc:
+ if raise_on_error:
+ raise RuntimeError(
+ f"Cloud runtime termination failed for pod {pod_name}: {exc}"
+ ) from exc
+ return False
+
+ if not success and raise_on_error:
+ raise RuntimeError(f"Cloud runtime termination returned unsuccessful for pod {pod_name}.")
+ return success
+
+
+def teardown_agent_execution_resources(
+ client: Any,
+ *,
+ execution_target: str,
+ cloud_runtime_or_pod_name: Any = None,
+ local_base_url: Optional[str] = None,
+ local_agent_name: Optional[str] = None,
+ token: Optional[str] = None,
+ local_runtime: Any = None,
+) -> dict[str, bool]:
+ """Teardown resources used by agent execution.
+
+ Handles both cloud and local cleanup using a single API so consumers
+ (examples, GitHub Actions) don't duplicate teardown logic.
+ """
+ result = {
+ "cloud_runtime_terminated": False,
+ "local_agent_deleted": False,
+ "local_runtime_terminated": False,
+ }
+
+ target = str(execution_target or "").strip().lower()
+ if target == "cloud":
+ if cloud_runtime_or_pod_name:
+ result["cloud_runtime_terminated"] = terminate_cloud_agent_runtime(
+ client,
+ cloud_runtime_or_pod_name,
+ )
+ return result
+
+ if target == "local":
+ if local_base_url and token and local_agent_name:
+ from datalayer_core.runtimes.local import delete_local_agent
+
+ result["local_agent_deleted"] = delete_local_agent(
+ base_url=local_base_url,
+ token=token,
+ agent_name=local_agent_name,
+ )
+ if local_runtime is not None:
+ from datalayer_core.runtimes.local import terminate_local_agent_runtime
+
+ terminate_local_agent_runtime(local_runtime)
+ result["local_runtime_terminated"] = True
+
+ return result
diff --git a/datalayer_core/runtimes/local.py b/datalayer_core/runtimes/local.py
index 4fdf692e..3ab44ca4 100644
--- a/datalayer_core/runtimes/local.py
+++ b/datalayer_core/runtimes/local.py
@@ -371,6 +371,62 @@ def delete_local_agents(*, base_url: str, token: str) -> tuple[int, int]:
return (len(agents), deleted)
+def delete_local_agent(*, base_url: str, token: str, agent_name: str) -> bool:
+ """Delete a single locally-registered agent by id or name.
+
+ Parameters
+ ----------
+ base_url : str
+ Local agent-runtimes base URL.
+ token : str
+ Bearer token used for local API calls.
+ agent_name : str
+ Agent id or name to delete.
+
+ Returns
+ -------
+ bool
+ ``True`` when a matching agent was found and delete accepted.
+ """
+ target_name = str(agent_name or "").strip()
+ if not target_name:
+ return False
+
+ base = base_url.rstrip("/")
+ headers = {"Authorization": f"Bearer {token}"}
+ try:
+ response = requests.get(f"{base}/api/v1/agents", headers=headers, timeout=30)
+ payload = response.json() if response.content else {}
+ except Exception as exc:
+ logger.warning("Unable to list local agents for cleanup: %s", exc)
+ return False
+
+ agents = payload.get("agents") if isinstance(payload, dict) else []
+ if not isinstance(agents, list):
+ return False
+
+ for agent in agents:
+ if not isinstance(agent, dict):
+ continue
+ agent_id = str(agent.get("id") or "").strip()
+ name = str(agent.get("name") or "").strip()
+ if target_name not in {agent_id, name}:
+ continue
+ delete_target = agent_id or target_name
+ try:
+ response = requests.delete(
+ f"{base}/api/v1/agents/{delete_target}",
+ headers=headers,
+ timeout=30,
+ )
+ return response.status_code < 400
+ except Exception as exc:
+ logger.warning("Unable to delete local agent %s: %s", delete_target, exc)
+ return False
+
+ return False
+
+
def extract_vercel_stream_text(raw: str) -> str:
"""Extract concatenated text deltas from a Vercel AI SSE stream."""
text_parts: list[str] = []