From 4244282e7f7123eb063690d1baa8a7b978c416a1 Mon Sep 17 00:00:00 2001 From: Oleksii Dolhov Date: Thu, 11 Jun 2026 14:02:01 +0300 Subject: [PATCH] feat(ui): per-schedule performance scorecards on Agent Detail (#1115) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface per-schedule performance on the Overview tab and the Schedules tab, both from a SINGLE compact aggregate (no N per-schedule round-trips) — extends #1107 (Overview) and generalises #868 (per-schedule deep analytics). Backend: - db `get_agent_schedules_summary(agent, hours)` — one rollup row per non-deleted schedule (zero-run schedules included): terminal success_rate (success / (success + failed[incl. error]); None when zero terminal), NULL-skipping avg_duration_ms, cost_total, context_avg, tool_call_total (parsed over newest 5,000 rows agent-wide, tool_calls_sampled flag), and last-run outcome. Cheap grouped SQL; iso_cutoff window (Invariant #16). - GET /api/agents/{name}/schedules/analytics-summary?window=7d|14d|30d (AuthorizedAgent). Declared BEFORE /{schedule_id} in routers/schedules.py so the literal segment isn't captured as a schedule_id (Invariant #4) — putting it in analytics.py would be shadowed (schedules_router mounts first). - models: ScheduleSummaryRow + AgentSchedulesSummaryResponse (Invariant #14). Frontend (single fetch, two consumers — Invariant #7): - executions.js fetchSchedulesSummary, cached per ${name}:${window} like fetchAgentAnalytics. - OverviewPanel: "Schedules performance" section, honors the existing 7/14/30d window selector, each row deep-links to the Schedules tab; hidden at zero. - SchedulesPanel: inline mini-stats per row (success rate, avg duration, runs, last-run dot) — badge style, no new chart/modal — from the same call. Tests: tests/unit/test_1115_schedules_summary.py (6) — terminal success rate, NULL-skip avg, tool-call total, zero-run-still-appears, soft-delete excluded, out-of-window excluded. Full analytics suites green (30 passed). Frontend prod build clean; endpoint verified live across 7/14/30d windows. Related to #1115 Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/memory/architecture.md | 3 +- .../feature-flows/agent-overview-dashboard.md | 18 ++ src/backend/database.py | 3 + src/backend/db/schedules.py | 179 +++++++++++++ src/backend/models.py | 42 ++++ src/backend/routers/schedules.py | 35 ++- src/frontend/src/components/OverviewPanel.vue | 73 +++++- .../src/components/SchedulesPanel.vue | 72 ++++++ src/frontend/src/stores/executions.js | 30 +++ tests/unit/test_1115_schedules_summary.py | 237 ++++++++++++++++++ 10 files changed, 686 insertions(+), 6 deletions(-) create mode 100644 tests/unit/test_1115_schedules_summary.py diff --git a/docs/memory/architecture.md b/docs/memory/architecture.md index 2e2d4c59..deccdb15 100644 --- a/docs/memory/architecture.md +++ b/docs/memory/architecture.md @@ -223,7 +223,7 @@ Channel DB modules: `db/slack_channels.py` (workspace connections, channel-agent - `stores/auth.js` - Email/admin authentication + JWT - `stores/collaborations.js` - Collaboration graph state, WebSocket integration - `stores/loops.js` - Sequential agent loops UI state, agent-scoped, WebSocket-driven (#1106) -- `stores/executions.js` - Fleet execution list/stats + agent Overview analytics (`fetchAgentAnalytics`, cached per `${name}:${window}`, never polled) (#1107) +- `stores/executions.js` - Fleet execution list/stats + agent Overview analytics (`fetchAgentAnalytics`, cached per `${name}:${window}`, never polled) (#1107) + per-schedule performance rollups (`fetchSchedulesSummary`, same `${name}:${window}` cache; one fetch shared by the Overview "Schedules performance" section and the Schedules-tab inline stats) (#1115) - `stores/sessions.js` - Session tab state **Real-time:** WebSocket client at `utils/websocket.js` with auto-reconnect; tracks `_eid` and replays via `last-event-id` — see [Real-time Delivery](#real-time-delivery-reliability-003-306). @@ -558,6 +558,7 @@ Lookup keys: S-01/E-02/L-03 shipped via #653; S-02/E-01/E-05/B-01 (Phase 2) and | GET/PUT/DELETE | `/api/agents/{name}/schedules/{id}` | Get / update (same 400 on timeout) / soft-delete | | POST | `/api/agents/{name}/schedules/{id}/enable` · `/disable` · `/trigger` | Enable / disable / manual trigger | | GET | `/api/agents/{name}/schedules/{id}/executions` | Execution history | +| GET | `/api/agents/{name}/schedules/analytics-summary` | **Per-schedule performance rollups for the whole agent** in one compact call (#1115). `?window=` ∈ {7d,14d,30d}→168/336/720h (422 otherwise). One row per **non-deleted** schedule (zero-run schedules included): terminal `success_rate` (`None`→`—` when zero terminal), `avg_duration_ms` (NULL-skip), `cost_total`, `context_avg`, `tool_call_total`, last-run outcome. Backs BOTH the Overview "Schedules performance" section AND the Schedules-tab inline stats from a single fetch (no N round-trips). **Declared before `/{id}` in `routers/schedules.py`** so the literal `analytics-summary` isn't captured as a `schedule_id` (Invariant #4). DB: `get_agent_schedules_summary` (generalises #868). Tool-call totals parsed over the newest 5,000 rows agent-wide (`tool_calls_sampled` flag) | | GET | `/api/agents/{name}/schedules/{id}/analytics` | Per-schedule analytics: counts, success rate, duration p50/p95/p99, cost, tool-call top-5, daily timeline. `?window_hours=` ∈ {24,168,720}, default 168 (#868). Percentiles Python-side over the newest 5,000 success rows (`sampled:true` reported when capped); counts + timeline full-set; UTC day buckets gap-filled. Tenant boundary in the DB layer (`agent_name` passed through) — `AuthorizedAgent` validates only the path agent name, NOT that `schedule_id` belongs to it, so the DB-layer filter is the actual boundary. Soft-deleted schedules 404 | | POST/GET/DELETE | `/api/agents/{name}/schedules/{id}/webhook` | Generate/rotate token · status + URL · revoke (WEBHOOK-001) | diff --git a/docs/memory/feature-flows/agent-overview-dashboard.md b/docs/memory/feature-flows/agent-overview-dashboard.md index 51ebc797..7f222cc1 100644 --- a/docs/memory/feature-flows/agent-overview-dashboard.md +++ b/docs/memory/feature-flows/agent-overview-dashboard.md @@ -153,11 +153,29 @@ capabilities, platforms, tools) is tucked behind a native collapsible - The three tab-validity sites were deduped into one `DEEP_LINK_TABS` constant (includes `'overview'`); the invalid-tab fallback resets to `'overview'`. +## Schedules performance section (#1115) +Below the trend charts, a **Schedules performance** section lists the agent's +non-deleted schedules — one compact scorecard each (command/name + cron, +terminal success rate, avg duration, runs-in-window, tool calls), honoring the +same 7/14/30d window selector. Each row deep-links to the Schedules tab (the +#868 per-schedule deep view stays the drill-in target). Hidden when the agent +has no schedules. The Schedules tab itself (`SchedulesPanel.vue`) renders the +**same** rollups as inline mini-stats per row. Both consume one +`executions.js` `fetchSchedulesSummary(name, window)` call +(`GET /api/agents/{name}/schedules/analytics-summary`, DB +`get_agent_schedules_summary`) — cached per `${name}:${window}`, no +N per-schedule round-trips. + ## Testing - `tests/unit/test_agent_analytics.py` — bucketing (incl. `Other` fallback), terminal-based success rate, **full-set avg vs sampled p95** correctness, NULL-skipping context avg, window boundary, empty agent, gap-filled timeline. 12 tests; mirrors the #868 `test_schedule_analytics.py` fixture machinery. +- `tests/unit/test_1115_schedules_summary.py` — per-schedule rollups: + terminal success rate (incl. `error`, non-terminal excluded), NULL-skipping + avg duration, tool-call total (malformed JSON skipped), zero-run schedule + still appears (rate `None`), soft-deleted excluded, out-of-window excluded. + 6 tests. ## Related Flows - [executions-dashboard.md](executions-dashboard.md) — fleet-level sibling; shares diff --git a/src/backend/database.py b/src/backend/database.py index 9ff797bb..571027f1 100644 --- a/src/backend/database.py +++ b/src/backend/database.py @@ -1603,6 +1603,9 @@ def get_schedule_analytics(self, schedule_id: str, hours: int, def get_agent_analytics(self, agent_name: str, hours: int): return self._schedule_ops.get_agent_analytics(agent_name, hours) + def get_agent_schedules_summary(self, agent_name: str, hours: int): + return self._schedule_ops.get_agent_schedules_summary(agent_name, hours) + def get_agent_token_stats(self, agent_name: str): return self._schedule_ops.get_agent_token_stats(agent_name) diff --git a/src/backend/db/schedules.py b/src/backend/db/schedules.py index ac631cb0..d5f88b36 100644 --- a/src/backend/db/schedules.py +++ b/src/backend/db/schedules.py @@ -58,6 +58,31 @@ def _bucket_for_trigger(trigger: Optional[str]) -> str: return _TRIGGER_BUCKETS.get(trigger or "", _OTHER_BUCKET) +# #1115: max chars for the per-schedule "command" label derived from a +# schedule's message (the Overview/Schedules-tab scorecard headline). +_SCHEDULE_LABEL_MAX = 80 + + +def _schedule_command_label(message: Optional[str]) -> str: + """Short headline for a schedule's scorecard, derived from its message. + + Uses the first non-empty line (the command/intent, e.g. ``/do-something``), + collapsed and truncated. Empty when the message is blank — the frontend + falls back to the schedule name. + """ + if not message: + return "" + for line in message.splitlines(): + stripped = line.strip() + if stripped: + return ( + stripped[: _SCHEDULE_LABEL_MAX - 1] + "…" + if len(stripped) > _SCHEDULE_LABEL_MAX + else stripped + ) + return "" + + # #73: chunk size for scoped `IN (...)` lookups. SQLite caps host parameters at # SQLITE_MAX_VARIABLE_NUMBER (999 before SQLite 3.32). Keep a safe margin below # that so a large accessible-agent set can't blow the limit. Read as a module @@ -1779,6 +1804,160 @@ def get_schedule_analytics( "sample_size": sample_size, } + def get_agent_schedules_summary(self, agent_name: str, hours: int) -> Dict: + """Per-schedule performance rollups for an agent over a window (#1115). + + ONE row per non-deleted schedule (zero-run schedules included, with + zeros), so both the Overview "Schedules performance" section and the + Schedules-tab inline stats render from a single call — no N per-schedule + round-trips. The #868 deep view stays the drill-in target. + + Per schedule: terminal **success_rate** (success / (success + failed + [incl. ``error``]); ``None`` when zero terminal runs so the UI shows + ``—`` not a false 0%), **avg_duration_ms** (NULL-skipping AVG), + **cost_total**, **context_avg** (NULL-skipping), **tool_call_total**, + and last-run outcome. Read-only / DB-sourced (renders when stopped). + + Tool-call totals are parsed from the newest ``_PERCENTILE_ROWSET_CAP`` + rows agent-wide (matches the #868 sampling discipline); ``tool_calls_ + sampled`` flags when that cap was hit. Window uses ``iso_cutoff`` + (Invariant #16). + """ + cutoff = iso_cutoff(hours) + cap = _PERCENTILE_ROWSET_CAP + FAILED_STATES = (TaskExecutionStatus.FAILED, "error") + + with get_db_connection() as conn: + cursor = conn.cursor() + + # Schedules (non-deleted) — the authoritative row set so a + # zero-run schedule still appears. + cursor.execute( + """ + SELECT id, name, message, cron_expression, enabled + FROM agent_schedules + WHERE agent_name = ? AND deleted_at IS NULL + ORDER BY created_at ASC + """, + (agent_name,), + ) + schedule_rows = cursor.fetchall() + + # One grouped aggregate for every schedule's executions in window. + cursor.execute( + """ + SELECT + schedule_id, + COUNT(*) AS total, + SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) AS success_count, + SUM(CASE WHEN status IN ('failed', 'error') THEN 1 ELSE 0 END) AS failed_count, + SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count, + SUM(COALESCE(cost, 0)) AS cost_total, + AVG(CASE WHEN duration_ms IS NOT NULL THEN duration_ms END) AS avg_duration_ms, + AVG(CASE WHEN context_used IS NOT NULL THEN context_used END) AS context_avg + FROM schedule_executions + WHERE agent_name = ? AND started_at > ? + GROUP BY schedule_id + """, + (agent_name, cutoff), + ) + agg_by_sched = {r["schedule_id"]: r for r in cursor.fetchall()} + + # Last-run outcome per schedule. SQLite's bare-column-with-MAX + # rule returns the row holding the max started_at. + cursor.execute( + """ + SELECT schedule_id, MAX(started_at) AS last_run_at, status AS last_status + FROM schedule_executions + WHERE agent_name = ? AND started_at > ? + GROUP BY schedule_id + """, + (agent_name, cutoff), + ) + last_by_sched = {r["schedule_id"]: r for r in cursor.fetchall()} + + # Tool-call totals — bounded JSON parse over newest rows agent-wide + # (cap + 1 to detect sampling), attributed back per schedule. + cursor.execute( + """ + SELECT schedule_id, tool_calls + FROM schedule_executions + WHERE agent_name = ? AND started_at > ? AND tool_calls IS NOT NULL + ORDER BY started_at DESC + LIMIT ? + """, + (agent_name, cutoff, cap + 1), + ) + tool_rows = cursor.fetchall() + + tool_calls_sampled = len(tool_rows) > cap + tool_total_by_sched: Dict[str, int] = defaultdict(int) + for row in tool_rows[:cap]: + raw = row["tool_calls"] + if not raw: + continue + try: + parsed = json.loads(raw) + except (json.JSONDecodeError, TypeError): + continue + if not isinstance(parsed, list): + continue + for entry in parsed: + if isinstance(entry, dict) and (entry.get("name") or entry.get("tool")): + tool_total_by_sched[row["schedule_id"]] += 1 + + schedules: List[Dict] = [] + for s in schedule_rows: + sid = s["id"] + agg = agg_by_sched.get(sid) + last = last_by_sched.get(sid) + + if agg: + total = int(agg["total"] or 0) + success_count = int(agg["success_count"] or 0) + failed_count = int(agg["failed_count"] or 0) + cancelled_count = int(agg["cancelled_count"] or 0) + cost_total = round(float(agg["cost_total"] or 0.0), 4) + avg_duration_ms = ( + int(agg["avg_duration_ms"]) if agg["avg_duration_ms"] is not None else None + ) + context_avg = ( + int(agg["context_avg"]) if agg["context_avg"] is not None else None + ) + else: + total = success_count = failed_count = cancelled_count = 0 + cost_total = 0.0 + avg_duration_ms = context_avg = None + + terminal = success_count + failed_count + success_rate = round(success_count / terminal, 4) if terminal else None + + schedules.append({ + "schedule_id": sid, + "name": s["name"], + "command": _schedule_command_label(s["message"]), + "cron_expression": s["cron_expression"], + "enabled": bool(s["enabled"]), + "total_executions": total, + "success_count": success_count, + "failed_count": failed_count, + "cancelled_count": cancelled_count, + "success_rate": success_rate, + "avg_duration_ms": avg_duration_ms, + "cost_total": cost_total, + "context_avg": context_avg, + "tool_call_total": tool_total_by_sched.get(sid, 0), + "last_run_at": last["last_run_at"] if last else None, + "last_run_status": last["last_status"] if last else None, + }) + + return { + "window_hours": hours, + "schedule_count": len(schedules), + "tool_calls_sampled": tool_calls_sampled, + "schedules": schedules, + } + def get_agent_analytics(self, agent_name: str, hours: int) -> Dict: """Compute agent-scoped execution analytics over a rolling window (#1107). diff --git a/src/backend/models.py b/src/backend/models.py index 2baba783..f6884a78 100644 --- a/src/backend/models.py +++ b/src/backend/models.py @@ -708,3 +708,45 @@ class AgentAnalyticsResponse(BaseModel): timeline: List[AgentAnalyticsTimelinePoint] = [] sampled: bool = False sample_size: int = 0 + + +class ScheduleSummaryRow(BaseModel): + """One per-schedule performance rollup (#1115). + + `success_rate` is terminal-based (success / (success + failed [incl. + `error`])) and `None` when there were zero terminal runs in the window — + the UI renders `—`, not a false 0%. `avg_duration_ms` / `context_avg` are + `None` when nothing measurable ran. A zero-run schedule still appears + (all counts 0, rates `None`). + """ + schedule_id: str + name: str + command: str = "" + cron_expression: str + enabled: bool + total_executions: int + success_count: int + failed_count: int + cancelled_count: int + success_rate: Optional[float] = None + avg_duration_ms: Optional[int] = None + cost_total: float + context_avg: Optional[int] = None + tool_call_total: int + last_run_at: Optional[str] = None + last_run_status: Optional[str] = None + + +class AgentSchedulesSummaryResponse(BaseModel): + """Response envelope for GET /api/agents/{name}/schedules/analytics-summary (#1115). + + One compact rollup row per non-deleted schedule for the window — consumed + by BOTH the Overview "Schedules performance" section and the Schedules-tab + inline stats from a single call (no N per-schedule round-trips). + `tool_calls_sampled` flags when the agent-wide tool-call parse pool was + capped. UTC window via `iso_cutoff`. + """ + window_hours: int + schedule_count: int + tool_calls_sampled: bool = False + schedules: List[ScheduleSummaryRow] = [] diff --git a/src/backend/routers/schedules.py b/src/backend/routers/schedules.py index 9b12f3d7..b3c19b71 100644 --- a/src/backend/routers/schedules.py +++ b/src/backend/routers/schedules.py @@ -8,7 +8,7 @@ "scheduler" as an agent name. """ -from fastapi import APIRouter, Depends, HTTPException, Request, status +from fastapi import APIRouter, Depends, HTTPException, Query, Request, status from typing import List, Optional from pydantic import BaseModel from datetime import datetime @@ -17,12 +17,15 @@ import logging import httpx -from models import User, ScheduleAnalyticsResponse +from models import User, ScheduleAnalyticsResponse, AgentSchedulesSummaryResponse from dependencies import get_current_user, get_authorized_agent, AuthorizedAgent, CurrentUser from database import db, Schedule, ScheduleCreate, ScheduleExecution from services.platform_audit_service import platform_audit_service, AuditEventType _ANALYTICS_VALID_WINDOWS = frozenset({24, 168, 720}) # #868 +# #1115: Overview/Schedules-tab scorecard windows → hours (matches the #1107 +# Overview selector: 7 / 14 / 30 days). +_SUMMARY_WINDOW_HOURS = {"7d": 168, "14d": 336, "30d": 720} logger = logging.getLogger(__name__) @@ -341,6 +344,34 @@ async def get_schedule_analytics( return ScheduleAnalyticsResponse(**analytics) +@router.get( + "/{name}/schedules/analytics-summary", + response_model=AgentSchedulesSummaryResponse, +) +async def get_agent_schedules_summary( + name: AuthorizedAgent, + window: str = Query("7d", description="One of 7d, 14d, 30d"), +): + """Per-schedule performance rollups for the agent over a window (#1115). + + ONE compact row per non-deleted schedule — consumed by BOTH the Overview + "Schedules performance" section and the Schedules-tab inline stats from a + single call (no N per-schedule round-trips). Zero-run schedules are + included. Read-only / DB-sourced (renders when the agent is stopped). The + #868 per-schedule deep view stays the drill-in target. + + NOTE: declared BEFORE `/{name}/schedules/{schedule_id}` so the literal + `analytics-summary` segment isn't captured as a schedule_id (Invariant #4). + """ + hours = _SUMMARY_WINDOW_HOURS.get(window) + if hours is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=f"window must be one of {sorted(_SUMMARY_WINDOW_HOURS)}", + ) + return AgentSchedulesSummaryResponse(**db.get_agent_schedules_summary(name, hours)) + + @router.get("/{name}/schedules/{schedule_id}", response_model=ScheduleResponse) async def get_schedule( name: AuthorizedAgent, diff --git a/src/frontend/src/components/OverviewPanel.vue b/src/frontend/src/components/OverviewPanel.vue index f450caea..a33d427b 100644 --- a/src/frontend/src/components/OverviewPanel.vue +++ b/src/frontend/src/components/OverviewPanel.vue @@ -73,6 +73,7 @@ const syncFailures = ref(0) const health = ref(null) // AgentHealthDetail const healthTrend = ref(null) // { dates, uptime, latency } const schedulesCount = ref(null) +const schedulesPerf = ref(null) // #1115 per-schedule rollups (window-keyed) const skillsCount = ref(null) const recent = ref([]) @@ -168,14 +169,29 @@ async function loadAnalytics() { if (!agentName.value) return analyticsLoading.value = true try { - analytics.value = await executionsStore.fetchAgentAnalytics(agentName.value, window.value) - } catch { - analytics.value = null + const [a, s] = await Promise.allSettled([ + executionsStore.fetchAgentAnalytics(agentName.value, window.value), + executionsStore.fetchSchedulesSummary(agentName.value, window.value), + ]) + analytics.value = a.status === 'fulfilled' ? a.value : null + schedulesPerf.value = s.status === 'fulfilled' ? s.value : null } finally { analyticsLoading.value = false } } +// #1115 per-schedule scorecard formatters (shared style with the Schedules tab). +function fmtSuccessRate(rate) { + return rate == null ? '—' : `${Math.round(rate * 100)}%` +} +function successRateClass(rate) { + if (rate == null) return 'text-gray-400 dark:text-gray-500' + if (rate >= 0.9) return 'text-status-success-600 dark:text-status-success-400' + if (rate >= 0.5) return 'text-status-warning-600 dark:text-status-warning-400' + return 'text-status-danger-600 dark:text-status-danger-400' +} +// Reuses the existing `fmtDuration` defined for the Duration chart below. + async function loadSidecars() { const name = agentName.value if (!name) return @@ -356,6 +372,57 @@ onMounted(() => {

+ +
+
+

Schedules performance

+ last {{ window }} +
+
+ +
+

+ Tool counts sampled over the newest runs. +

+
+

Health & reliability

diff --git a/src/frontend/src/components/SchedulesPanel.vue b/src/frontend/src/components/SchedulesPanel.vue index f807e206..a1e43646 100644 --- a/src/frontend/src/components/SchedulesPanel.vue +++ b/src/frontend/src/components/SchedulesPanel.vue @@ -328,6 +328,39 @@
+ +
+ + 7d success + + {{ fmtSuccessRate(perfBySchedule[schedule.id].success_rate) }} + + + + avg + {{ fmtPerfDuration(perfBySchedule[schedule.id].avg_duration_ms) }} + + + {{ perfBySchedule[schedule.id].total_executions }} + runs + + + + {{ perfBySchedule[schedule.id].last_run_status }} + +
+
{{ schedule.message.substring(0, 150) }}{{ schedule.message.length > 150 ? '...' : '' }}
@@ -619,6 +652,7 @@ import ConfirmDialog from './ConfirmDialog.vue' import ModelSelector from './ModelSelector.vue' import ScheduleAnalyticsCard from './ScheduleAnalyticsCard.vue' import { useAuthStore } from '../stores/auth' +import { useExecutionsStore } from '../stores/executions' // Platform default model fetched from /api/settings/feature-flags (#831) const platformDefaultModel = ref('') @@ -635,6 +669,13 @@ const props = defineProps({ }) const authStore = useAuthStore() +const executionsStore = useExecutionsStore() + +// #1115: inline per-schedule stats. Same single aggregate the Overview uses +// (cached in the executions store) — no per-row fetches. 7d window matches the +// Overview default so the cache is shared. +const PERF_WINDOW = '7d' +const perfBySchedule = ref({}) // schedule_id -> rollup row // State const schedules = ref([]) @@ -766,6 +807,37 @@ async function loadSchedules() { } finally { loading.value = false } + loadPerf() +} + +// #1115: fetch the per-schedule rollups (one call, store-cached) and index +// by schedule_id for inline row stats. Best-effort — failure leaves rows +// without stats, never blocks the list. +async function loadPerf() { + try { + const summary = await executionsStore.fetchSchedulesSummary(props.agentName, PERF_WINDOW) + const map = {} + for (const row of summary?.schedules || []) map[row.schedule_id] = row + perfBySchedule.value = map + } catch (e) { + console.error('Failed to load schedule performance:', e) + } +} + +function fmtSuccessRate(rate) { + return rate == null ? '—' : `${Math.round(rate * 100)}%` +} +function successRateClass(rate) { + if (rate == null) return 'text-gray-400 dark:text-gray-500' + if (rate >= 0.9) return 'text-status-success-600 dark:text-status-success-400' + if (rate >= 0.5) return 'text-status-warning-600 dark:text-status-warning-400' + return 'text-status-danger-600 dark:text-status-danger-400' +} +function fmtPerfDuration(ms) { + if (ms == null) return '—' + if (ms < 1000) return `${ms}ms` + if (ms < 60000) return `${(ms / 1000).toFixed(1)}s` + return `${Math.round(ms / 60000)}m` } // Save schedule (create or update) diff --git a/src/frontend/src/stores/executions.js b/src/frontend/src/stores/executions.js index 22b255e2..54755105 100644 --- a/src/frontend/src/stores/executions.js +++ b/src/frontend/src/stores/executions.js @@ -32,6 +32,14 @@ export const useExecutionsStore = defineStore('executions', () => { const analyticsCache = ref({}) const analyticsLoading = ref(false) + // --- per-schedule performance rollups (#1115) --- + // One compact call per (agent, window), cached + shared by the Overview + // "Schedules performance" section AND the Schedules-tab inline stats — so + // neither surface issues N per-schedule round-trips. Same fetch-once + // discipline as analytics above. Keyed `${name}:${window}`. + const schedulesSummaryCache = ref({}) + const schedulesSummaryLoading = ref(false) + // --- getters --- const hasActiveFilters = computed(() => filters.value.agent || @@ -127,6 +135,25 @@ export const useExecutionsStore = defineStore('executions', () => { } } + // Per-schedule performance summary (#1115). Cached per (agent, window); + // returns the cached payload unless `force` or first fetch. One call backs + // both the Overview section and the Schedules-tab inline stats. + async function fetchSchedulesSummary(name, window = '7d', { force = false } = {}) { + const key = `${name}:${window}` + if (!force && schedulesSummaryCache.value[key]) return schedulesSummaryCache.value[key] + schedulesSummaryLoading.value = true + try { + const res = await axios.get( + `/api/agents/${encodeURIComponent(name)}/schedules/analytics-summary`, + { params: { window }, headers: authStore.authHeader } + ) + schedulesSummaryCache.value = { ...schedulesSummaryCache.value, [key]: res.data } + return res.data + } finally { + schedulesSummaryLoading.value = false + } + } + function setFilter(key, value) { filters.value[key] = value refresh() @@ -178,6 +205,9 @@ export const useExecutionsStore = defineStore('executions', () => { analyticsCache, analyticsLoading, fetchAgentAnalytics, + schedulesSummaryCache, + schedulesSummaryLoading, + fetchSchedulesSummary, fetchExecutions, fetchStats, loadMore, diff --git a/tests/unit/test_1115_schedules_summary.py b/tests/unit/test_1115_schedules_summary.py new file mode 100644 index 00000000..919f5256 --- /dev/null +++ b/tests/unit/test_1115_schedules_summary.py @@ -0,0 +1,237 @@ +"""Tests for per-schedule performance rollups (#1115). + +Exercises `ScheduleOperations.get_agent_schedules_summary` against an +ephemeral SQLite. Same fixture machinery as `test_agent_analytics.py` (#1107) +and `test_schedule_analytics.py` (#868) — `db.connection.DB_PATH` monkeypatched +at a tmp file, stale sibling `db.*` stubs popped from `sys.modules`. + +Locked behaviour (from the issue's AC): + * ONE rollup row per non-deleted schedule; a zero-run schedule still appears. + * `success_rate` terminal-based: success / (success + failed [incl. `error`]); + `None` when zero terminal runs (UI shows `—`, not a false 0%). + * `avg_duration_ms` NULL-skipping; `tool_call_total` parsed from tool_calls JSON. + * Soft-deleted schedules (`deleted_at` set) are excluded. + * Window honored via iso_cutoff — rows outside the window don't count. +""" +from __future__ import annotations + +import json +import sqlite3 +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path + +import pytest + + +_BACKEND = Path(__file__).resolve().parent.parent.parent / "src" / "backend" +_BACKEND_STR = str(_BACKEND) +while _BACKEND_STR in sys.path: + sys.path.remove(_BACKEND_STR) +sys.path.insert(0, _BACKEND_STR) + + +_STUBBED_MODULE_NAMES = ["db.schedules", "db.users", "db.agents", "db.monitoring"] + + +@pytest.fixture(autouse=True) +def _restore_sys_modules(): + saved = {n: sys.modules.get(n) for n in _STUBBED_MODULE_NAMES} + for name in _STUBBED_MODULE_NAMES: + sys.modules.pop(name, None) + try: + yield + finally: + for name, value in saved.items(): + if value is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = value + + +def _make_db_schema(conn: sqlite3.Connection) -> None: + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE users ( + id INTEGER PRIMARY KEY, username TEXT UNIQUE NOT NULL, + password_hash TEXT, role TEXT DEFAULT 'user', + auth0_sub TEXT, name TEXT, picture TEXT, email TEXT, + created_at TEXT, updated_at TEXT, last_login TEXT + ) + """ + ) + cur.execute( + """ + CREATE TABLE agent_schedules ( + id TEXT PRIMARY KEY, + agent_name TEXT NOT NULL, + name TEXT NOT NULL, + cron_expression TEXT NOT NULL, + message TEXT NOT NULL, + enabled INTEGER DEFAULT 1, + created_at TEXT NOT NULL, + deleted_at TEXT + ) + """ + ) + cur.execute( + """ + CREATE TABLE schedule_executions ( + id TEXT PRIMARY KEY, + schedule_id TEXT NOT NULL, + agent_name TEXT NOT NULL, + status TEXT NOT NULL, + started_at TEXT NOT NULL, + completed_at TEXT, + duration_ms INTEGER, + cost REAL, + context_used INTEGER, + tool_calls TEXT, + triggered_by TEXT NOT NULL DEFAULT 'schedule', + message TEXT NOT NULL DEFAULT '' + ) + """ + ) + cur.execute("INSERT INTO users(id, username, role) VALUES (1, 'owner', 'user')") + conn.commit() + + +@pytest.fixture +def tmp_db(tmp_path, monkeypatch): + db_path = tmp_path / "trinity.db" + conn = sqlite3.connect(str(db_path)) + _make_db_schema(conn) + conn.close() + monkeypatch.setenv("TRINITY_DB_PATH", str(db_path)) + monkeypatch.delitem(sys.modules, "db.connection", raising=False) + try: + import db.connection as connection_mod + except ImportError: + pytest.skip("backend venv required") + monkeypatch.setattr(connection_mod, "DB_PATH", str(db_path)) + return str(db_path) + + +@pytest.fixture +def ops(tmp_db): + try: + from db.schedules import ScheduleOperations + from db.users import UserOperations + from db.agents import AgentOperations + except ImportError: + pytest.skip("backend venv required") + user_ops = UserOperations() + agent_ops = AgentOperations(user_ops) + return ScheduleOperations(user_ops, agent_ops) + + +def _iso_ago(minutes: int = 0, hours: int = 0, days: int = 0) -> str: + when = datetime.now(timezone.utc) - timedelta(minutes=minutes, hours=hours, days=days) + return when.strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + +def _add_schedule(db_path, sid, agent="agent-1", name="S", message="/do-it", + cron="*/5 * * * *", enabled=1, deleted_at=None): + conn = sqlite3.connect(db_path) + conn.execute( + "INSERT INTO agent_schedules(id, agent_name, name, cron_expression, message, " + "enabled, created_at, deleted_at) VALUES (?,?,?,?,?,?,?,?)", + (sid, agent, name, cron, message, enabled, _iso_ago(days=1), deleted_at), + ) + conn.commit() + conn.close() + + +_seq = [0] + + +def _add_exec(db_path, sid, agent="agent-1", *, status="success", started_at=None, + duration_ms=1000, cost=0.0, context_used=None, tool_calls=None): + if started_at is None: + started_at = _iso_ago(minutes=5) + _seq[0] += 1 + conn = sqlite3.connect(db_path) + conn.execute( + "INSERT INTO schedule_executions(id, schedule_id, agent_name, status, started_at, " + "duration_ms, cost, context_used, tool_calls) VALUES (?,?,?,?,?,?,?,?,?)", + (f"x-{_seq[0]}", sid, agent, status, started_at, duration_ms, cost, + context_used, tool_calls), + ) + conn.commit() + conn.close() + + +# ---------------------------------------------------------------------- + +def test_success_rate_is_terminal_based(tmp_db, ops): + _add_schedule(tmp_db, "s1") + # 3 success, 1 failed, 1 error → terminal=5, rate = 3/5 = 0.6. + # 1 running is NON-terminal: counted in total but NOT in the rate. + for _ in range(3): + _add_exec(tmp_db, "s1", status="success", duration_ms=1000) + _add_exec(tmp_db, "s1", status="failed", duration_ms=500) + _add_exec(tmp_db, "s1", status="error", duration_ms=None) + _add_exec(tmp_db, "s1", status="running", duration_ms=None) + + out = ops.get_agent_schedules_summary("agent-1", 168) + row = out["schedules"][0] + assert out["schedule_count"] == 1 + assert row["total_executions"] == 6 + assert row["success_count"] == 3 + assert row["failed_count"] == 2 # failed + error + assert row["success_rate"] == 0.6 # 3 / (3 + 2) + assert row["command"] == "/do-it" + + +def test_avg_duration_skips_nulls(tmp_db, ops): + _add_schedule(tmp_db, "s1") + _add_exec(tmp_db, "s1", status="success", duration_ms=200) + _add_exec(tmp_db, "s1", status="success", duration_ms=400) + _add_exec(tmp_db, "s1", status="failed", duration_ms=None) # NULL skipped + out = ops.get_agent_schedules_summary("agent-1", 168) + assert out["schedules"][0]["avg_duration_ms"] == 300 # (200+400)/2 + + +def test_tool_call_total_parsed_from_json(tmp_db, ops): + _add_schedule(tmp_db, "s1") + _add_exec(tmp_db, "s1", tool_calls=json.dumps([{"name": "Bash"}, {"name": "Read"}])) + _add_exec(tmp_db, "s1", tool_calls=json.dumps([{"tool": "Grep"}])) + _add_exec(tmp_db, "s1", tool_calls="not json") # malformed → skipped + _add_exec(tmp_db, "s1", tool_calls=None) # none → skipped + out = ops.get_agent_schedules_summary("agent-1", 168) + assert out["schedules"][0]["tool_call_total"] == 3 + + +def test_zero_run_schedule_still_appears(tmp_db, ops): + _add_schedule(tmp_db, "s1", name="busy") + _add_schedule(tmp_db, "s2", name="idle") + _add_exec(tmp_db, "s1", status="success") + out = ops.get_agent_schedules_summary("agent-1", 168) + by_id = {r["schedule_id"]: r for r in out["schedules"]} + assert set(by_id) == {"s1", "s2"} + idle = by_id["s2"] + assert idle["total_executions"] == 0 + assert idle["success_rate"] is None # zero terminal → None, not 0.0 + assert idle["avg_duration_ms"] is None + assert idle["tool_call_total"] == 0 + assert idle["last_run_status"] is None + + +def test_soft_deleted_schedule_excluded(tmp_db, ops): + _add_schedule(tmp_db, "live") + _add_schedule(tmp_db, "gone", deleted_at=_iso_ago(hours=1)) + _add_exec(tmp_db, "live", status="success") + _add_exec(tmp_db, "gone", status="success") + out = ops.get_agent_schedules_summary("agent-1", 168) + assert {r["schedule_id"] for r in out["schedules"]} == {"live"} + + +def test_out_of_window_runs_excluded(tmp_db, ops): + _add_schedule(tmp_db, "s1") + _add_exec(tmp_db, "s1", status="success", started_at=_iso_ago(minutes=30)) + _add_exec(tmp_db, "s1", status="failed", started_at=_iso_ago(days=10)) # outside 7d + out = ops.get_agent_schedules_summary("agent-1", 168) + row = out["schedules"][0] + assert row["total_executions"] == 1 + assert row["success_rate"] == 1.0