diff --git a/client/src/Hooks/useMonitorForm.ts b/client/src/Hooks/useMonitorForm.ts index 963409fc8a..c96b1bc7a9 100644 --- a/client/src/Hooks/useMonitorForm.ts +++ b/client/src/Hooks/useMonitorForm.ts @@ -14,6 +14,11 @@ const getBaseDefaults = (data?: Monitor | null) => ({ notifications: data?.notifications || [], statusWindowSize: data?.statusWindowSize || 5, statusWindowThreshold: data?.statusWindowThreshold || 60, + + escalationEnabled: data?.escalationEnabled ?? false, + escalationDelay: data?.escalationDelay ?? 60000, + escalationNotifications: data?.escalationNotifications || [], + geoCheckEnabled: data?.geoCheckEnabled ?? false, geoCheckLocations: data?.geoCheckLocations || [], geoCheckInterval: data?.geoCheckInterval || 300000, diff --git a/client/src/Pages/CreateMonitor/index.tsx b/client/src/Pages/CreateMonitor/index.tsx index 15b76eab36..7508685a5c 100644 --- a/client/src/Pages/CreateMonitor/index.tsx +++ b/client/src/Pages/CreateMonitor/index.tsx @@ -211,12 +211,25 @@ const CreateMonitorPage = () => { const watchedType = watch("type") as MonitorType; const watchedUseAdvancedMatching = watch("useAdvancedMatching") as boolean; + const watchedEscalationEnabled = watch("escalationEnabled") as boolean; + const watchedEscalationDelay = watch("escalationDelay") as number | undefined; const watchGeoCheckEnabled = watch("geoCheckEnabled") as boolean; + const [escalationDelayInput, setEscalationDelayInput] = useState("1"); + const [isEscalationDelayFocused, setIsEscalationDelayFocused] = useState(false); useEffect(() => { clearErrors(); }, [watchedType, clearErrors]); + useEffect(() => { + if (isEscalationDelayFocused) { + return; + } + + const minutes = Math.max(1, Math.round((watchedEscalationDelay ?? 60000) / 60000)); + setEscalationDelayInput(String(minutes)); + }, [watchedEscalationDelay, isEscalationDelayFocused]); + const generalSettingsConfig = useMemo( () => getGeneralSettingsConfig(watchedType, t), [watchedType, t] @@ -765,6 +778,157 @@ const CreateMonitorPage = () => { } /> + + ( + + field.onChange(e.target.checked)} + /> + + {t("pages.createMonitor.form.escalation.option.enabled.label")} + + + )} + /> + + {watchedEscalationEnabled && ( + <> + ( + setIsEscalationDelayFocused(true)} + onBlur={() => { + setIsEscalationDelayFocused(false); + const parsedMinutes = Number(escalationDelayInput); + + if ( + escalationDelayInput.trim() === "" || + Number.isNaN(parsedMinutes) || + parsedMinutes <= 0 + ) { + setEscalationDelayInput("1"); + field.onChange(60000); + return; + } + + const minutes = Math.max(1, Math.round(parsedMinutes)); + setEscalationDelayInput(String(minutes)); + field.onChange(minutes * 60000); + }} + onChange={(e) => { + const rawValue = e.target.value; + setEscalationDelayInput(rawValue); + + if (rawValue.trim() === "") { + return; + } + + const parsedMinutes = Number(rawValue); + + if (Number.isNaN(parsedMinutes)) { + return; + } + + if (parsedMinutes <= 0) { + field.onChange(60000); + return; + } + + const minutes = Math.max(1, Math.round(parsedMinutes)); + field.onChange(minutes * 60000); + }} + inputProps={{ min: 0, step: 1 }} + fullWidth + error={!!fieldState.error} + helperText={fieldState.error?.message ?? ""} + /> + )} + /> + + { + const notificationOptions = (notifications ?? []).map((n) => ({ + ...n, + name: n.notificationName, + })); + const selectedNotifications = notificationOptions.filter((n) => + (field.value ?? []).includes(n.id) + ); + + return ( + + option.name} + onChange={(_: unknown, newValue: typeof notificationOptions) => { + field.onChange(newValue.map((n) => n.id)); + }} + isOptionEqualToValue={(option, value) => option.id === value.id} + /> + {selectedNotifications.length > 0 && ( + + {selectedNotifications.map((notification, index) => ( + + + {notification.notificationName} + + { + field.onChange( + (field.value ?? []).filter( + (id: string) => id !== notification.id + ) + ); + }} + aria-label="Remove escalation notification" + > + + + {index < selectedNotifications.length - 1 && } + + ))} + + )} + + ); + }} + /> + + )} + + } + /> + {(watchedType === "http" || watchedType === "grpc" || watchedType === "websocket") && ( diff --git a/client/src/Types/Monitor.ts b/client/src/Types/Monitor.ts index 053b517d1d..de5352e2bc 100644 --- a/client/src/Types/Monitor.ts +++ b/client/src/Types/Monitor.ts @@ -79,6 +79,10 @@ export interface Monitor { recentChecks: CheckSnapshot[]; createdAt: string; updatedAt: string; + + escalationEnabled: boolean; + escalationDelay: number; + escalationNotifications: string[]; } export type MonitorWithChecks = Monitor; diff --git a/client/src/Validation/monitor.ts b/client/src/Validation/monitor.ts index 9acffe6fed..c7029a21d7 100644 --- a/client/src/Validation/monitor.ts +++ b/client/src/Validation/monitor.ts @@ -21,6 +21,14 @@ const baseSchema = z.object({ .number({ message: "Threshold percentage is required" }) .min(1, "Incident percentage must be at least 1") .max(100, "Incident percentage must be at most 100"), + + escalationEnabled: z.boolean().optional(), + escalationDelay: z + .number() + .min(60000, "Escalation delay must be at least 1 minutes") + .optional(), + escalationNotifications: z.array(z.string()).optional(), + geoCheckEnabled: z.boolean().optional(), geoCheckLocations: z.array(z.enum(GeoContinents)).optional(), geoCheckInterval: z diff --git a/client/src/locales/en.json b/client/src/locales/en.json index 92a21939f3..88fff85149 100644 --- a/client/src/locales/en.json +++ b/client/src/locales/en.json @@ -543,6 +543,26 @@ "description": "Select the notification channels you want to use", "title": "Notifications" }, + "escalation": { + "title": "Escalation Rules", + "description": "Configure an escalation notification if a monitor stays down for a defined amount of time.", + "option": { + "enabled": { + "label": "Enable escalation" + }, + "delay": { + "label": "Escalate after", + "value": { + "oneMinute": "1 minute", + "fiveMinutes": "5 minutes", + "tenMinutes": "10 minutes", + "fifteenMinutes": "15 minutes", + "thirtyMinutes": "30 minutes", + "sixtyMinutes": "60 minutes" + } + } + } + }, "type": { "description": "Select the type of check to perform", "optionDockerDescription": "Use Docker to monitor if a container is running.", diff --git a/server/src/db/models/Incident.ts b/server/src/db/models/Incident.ts index 82e2b5eb2b..812e55e11a 100644 --- a/server/src/db/models/Incident.ts +++ b/server/src/db/models/Incident.ts @@ -9,6 +9,8 @@ type IncidentDocumentBase = Omit( type: String, default: null, }, + + escalatedAt: { + type: Date, + default: null, + }, + }, { timestamps: true } ); diff --git a/server/src/db/models/Monitor.ts b/server/src/db/models/Monitor.ts index 036aeadad6..4b9390f43e 100644 --- a/server/src/db/models/Monitor.ts +++ b/server/src/db/models/Monitor.ts @@ -18,11 +18,14 @@ type CheckSnapshotDocument = Omit & { createdAt: Dat type MonitorDocumentBase = Omit< Monitor, - "id" | "userId" | "teamId" | "notifications" | "selectedDisks" | "statusWindow" | "recentChecks" | "createdAt" | "updatedAt" + "id" | "userId" | "teamId" | "notifications" | "escalationNotifications" | "selectedDisks" | "statusWindow" | "recentChecks" | "createdAt" | "updatedAt" > & { statusWindow: boolean[]; recentChecks: CheckSnapshotDocument[]; notifications: Types.ObjectId[]; + + escalationNotifications: Types.ObjectId[]; + selectedDisks: string[]; matchMethod?: MonitorMatchMethod; }; @@ -284,6 +287,23 @@ const MonitorSchema = new Schema( ref: "Notification", }, ], + + escalationEnabled: { + type: Boolean, + default: false, + }, + escalationDelay: { + type: Number, + default: 60000, + }, + escalationNotifications: [ + { + type: Schema.Types.ObjectId, + ref: "Notification", + }, + ], + + secret: { type: String, }, diff --git a/server/src/repositories/monitors/MongoMonitorsRepository.ts b/server/src/repositories/monitors/MongoMonitorsRepository.ts index b2d7594483..0854229073 100644 --- a/server/src/repositories/monitors/MongoMonitorsRepository.ts +++ b/server/src/repositories/monitors/MongoMonitorsRepository.ts @@ -351,6 +351,7 @@ class MongoMonitorsRepository implements IMonitorsRepository { }; const notificationIds = (doc.notifications ?? []).map((notification) => toStringId(notification)); + const escalationNotificationIds = (doc.escalationNotifications ?? []).map((notification) => toStringId(notification)); return { id: toStringId(doc._id), @@ -374,6 +375,9 @@ class MongoMonitorsRepository implements IMonitorsRepository { interval: doc.interval, uptimePercentage: doc.uptimePercentage ?? undefined, notifications: notificationIds, + escalationEnabled: doc.escalationEnabled ?? false, + escalationDelay: doc.escalationDelay ?? 60000, + escalationNotifications: escalationNotificationIds, secret: doc.secret ?? undefined, cpuAlertThreshold: doc.cpuAlertThreshold, cpuAlertCounter: doc.cpuAlertCounter, @@ -410,6 +414,7 @@ class MongoMonitorsRepository implements IMonitorsRepository { }; const notificationIds = (doc.notifications ?? []).map((notification: unknown) => toStringId(notification)); + const escalationNotificationIds = (doc.escalationNotifications ?? []).map((notification: unknown) => toStringId(notification)); return { id: toStringId(doc._id), @@ -433,6 +438,9 @@ class MongoMonitorsRepository implements IMonitorsRepository { interval: doc.interval, uptimePercentage: doc.uptimePercentage ?? undefined, notifications: notificationIds, + escalationEnabled: doc.escalationEnabled ?? false, + escalationDelay: doc.escalationDelay ?? 60000, + escalationNotifications: escalationNotificationIds, secret: doc.secret ?? undefined, cpuAlertThreshold: doc.cpuAlertThreshold, cpuAlertCounter: doc.cpuAlertCounter, diff --git a/server/src/service/business/monitorService.ts b/server/src/service/business/monitorService.ts index 71c9d9d906..b9215095f9 100644 --- a/server/src/service/business/monitorService.ts +++ b/server/src/service/business/monitorService.ts @@ -567,6 +567,9 @@ export class MonitorService implements IMonitorService { id: "", teamId, userId, + escalationEnabled: (monitor as Partial).escalationEnabled ?? false, + escalationDelay: (monitor as Partial).escalationDelay ?? 60000, + escalationNotifications: (monitor as Partial).escalationNotifications ?? [], recentChecks: [], createdAt: "", updatedAt: "", diff --git a/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts b/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts index b6908127b2..a0a5c7effc 100644 --- a/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts +++ b/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts @@ -177,6 +177,46 @@ export class SuperSimpleQueueHelper implements ISuperSimpleQueueHelper { stack: error instanceof Error ? error.stack : undefined, }); }); + + // Step 8. Handle escalation notifications for active incidents (best effort, don't wait) + if (statusChangeResult.monitor.escalationEnabled && statusChangeResult.monitor.status === "down") { + this.incidentsRepository + .findActiveByMonitorId(statusChangeResult.monitor.id, statusChangeResult.monitor.teamId) + .then(async (activeIncident) => { + if (!activeIncident || activeIncident.escalatedAt) { + return; + } + + const incidentStartTime = new Date(activeIncident.startTime).getTime(); + const elapsedTimeMs = Date.now() - incidentStartTime; + const escalationDelayMs = statusChangeResult.monitor.escalationDelay ?? 60000; + + if (elapsedTimeMs < escalationDelayMs) { + return; + } + + const escalationSent = await this.notificationsService.handleEscalation( + statusChangeResult.monitor, + status, + activeIncident, + elapsedTimeMs + ); + + if (escalationSent) { + await this.incidentsRepository.updateById(activeIncident.id, activeIncident.teamId, { + escalatedAt: Date.now().toString(), + }); + } + }) + .catch((error: unknown) => { + this.logger.warn({ + message: `Error sending escalation for job ${monitor.id}: ${error instanceof Error ? error.message : "Unknown error"}`, + service: SERVICE_NAME, + method: "getMonitorJob", + stack: error instanceof Error ? error.stack : undefined, + }); + }); + } } catch (error: unknown) { this.logger.warn({ message: error instanceof Error ? error.message : "Unknown error", diff --git a/server/src/service/infrastructure/notificationsService.ts b/server/src/service/infrastructure/notificationsService.ts index c75477c88c..06685a1042 100644 --- a/server/src/service/infrastructure/notificationsService.ts +++ b/server/src/service/infrastructure/notificationsService.ts @@ -7,6 +7,8 @@ import type { ISettingsService } from "@/service/system/settingsService.js"; import { ILogger } from "@/utils/logger.js"; import type { INotificationMessageBuilder } from "@/service/infrastructure/notificationMessageBuilder.js"; +import type { Incident } from "@/types/incident.js"; + export interface INotificationsService { createNotification: (notificationData: Partial, userId: string, teamId: string) => Promise; findById: (id: string, teamId: string) => Promise; @@ -14,6 +16,7 @@ export interface INotificationsService { updateById(id: string, teamId: string, updateData: Partial): Promise; deleteById: (id: string, teamId: string) => Promise; handleNotifications: (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, decision: MonitorActionDecision) => Promise; + handleEscalation: (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, incident: Incident, elapsedTimeMs: number) => Promise; sendTestNotification: (notification: Partial) => Promise; testAllNotifications: (notificationIds: string[]) => Promise; @@ -129,7 +132,8 @@ export class NotificationsService implements INotificationsService { }); } // Return true if all notifications succeeded - return succeeded === notifications.length; + // Mark escalation as sent when at least one channel succeeds to avoid duplicate repeats. + return succeeded > 0; }; handleNotifications = async (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, decision: MonitorActionDecision) => { @@ -141,6 +145,88 @@ export class NotificationsService implements INotificationsService { return await this.sendNotifications(monitor, monitorStatusResponse, decision); }; + createEscalationMessage = (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, incident: Incident, elapsedTimeMs: number): NotificationMessage => { + const settings = this.settingsService.getSettings(); + const clientHost = settings.clientHost || "Host not defined"; + + const details = [ + `URL: ${monitor.url}`, + "Status: Down", + `Monitor Type: ${monitor.type}`, + `Incident Start Time: ${incident.startTime}`, + `Incident Duration: ${Math.floor(elapsedTimeMs / 60000)} minutes`, + ]; + + if (monitorStatusResponse.message) { + details.push(`Error: ${monitorStatusResponse.message}`); + } + + return { + type: "monitor_down", + severity: "critical", + monitor: { + id: monitor.id, + name: monitor.name, + url: monitor.url, + type: monitor.type, + status: "down", + }, + content: { + title: `Escalation: Monitor "${monitor.name}" is down`, + summary: `Monitor "${monitor.name}" has been down for ${Math.floor(elapsedTimeMs / 60000)} minutes. This is an escalation alert.`, + details, + timestamp: new Date(), + }, + clientHost, + metadata: { + teamId: monitor.teamId, + notificationReason: "escalation", + }, + }; + }; + + handleEscalation = async (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, incident: Incident, elapsedTimeMs: number) => { + if (!monitor.escalationEnabled || !monitor.escalationNotifications || monitor.escalationNotifications.length === 0) { + return false; + } + + return await this.sendEscalations(monitor, monitorStatusResponse, incident, elapsedTimeMs); + }; + + sendEscalations = async (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, incident: Incident, elapsedTimeMs: number) => { + const escalationIds = monitor.escalationNotifications ?? []; + const notifications = await this.notificationsRepository.findNotificationsByIds(escalationIds); + + if (notifications.length === 0) { + return false; + } + + const notificationMessage = this.createEscalationMessage( + monitor, + monitorStatusResponse, + incident, + elapsedTimeMs + ); + + const tasks = notifications.map((notification) => + this.send(notification, monitor, monitorStatusResponse, {} as MonitorActionDecision, notificationMessage) + ); + + const outcomes = await Promise.all(tasks); + const succeeded = outcomes.filter(Boolean).length; + const failed = outcomes.length - succeeded; + + if (failed > 0) { + this.logger.warn({ + message: `Escalation send completed with ${succeeded} success, ${failed} failure(s)`, + service: SERVICE_NAME, + method: "sendEscalations", + }); + } + + return succeeded === notifications.length; + }; + sendTestNotification = async (notification: Partial) => { switch (notification.type) { case "email": diff --git a/server/src/types/incident.ts b/server/src/types/incident.ts index 6b076ff835..7e5073512e 100644 --- a/server/src/types/incident.ts +++ b/server/src/types/incident.ts @@ -16,6 +16,7 @@ export interface Incident { resolvedBy?: string | null; resolvedByEmail?: string | null; comment?: string | null; + escalatedAt?: string | null; createdAt: string; updatedAt: string; } diff --git a/server/src/types/monitor.ts b/server/src/types/monitor.ts index f29ce75d78..80093e59f3 100644 --- a/server/src/types/monitor.ts +++ b/server/src/types/monitor.ts @@ -56,6 +56,10 @@ export interface Monitor { recentChecks: CheckSnapshot[]; createdAt: string; updatedAt: string; + + escalationEnabled: boolean; + escalationDelay: number; + escalationNotifications: string[]; } export interface MonitorsSummary { diff --git a/server/src/validation/monitorValidation.ts b/server/src/validation/monitorValidation.ts index df000ecef2..49b14c576f 100644 --- a/server/src/validation/monitorValidation.ts +++ b/server/src/validation/monitorValidation.ts @@ -2,6 +2,7 @@ import { z } from "zod"; import { booleanCoercion } from "./shared.js"; import { GeoContinents } from "@/types/geoCheck.js"; import { MonitorMatchMethods, MonitorTypes } from "@/types/monitor.js"; +import { es } from "zod/v4/locales"; export const getMonitorByIdParamValidation = z.object({ monitorId: z.string().min(1, "Monitor ID is required"), @@ -78,6 +79,10 @@ export const createMonitorBodyValidation = z.object({ geoCheckEnabled: z.boolean().optional(), geoCheckLocations: z.array(z.enum(GeoContinents)).optional(), geoCheckInterval: z.number().min(300000).optional(), + + escalationEnabled: z.boolean().optional(), + escalationDelay: z.number().min(60000).optional(), + escalationNotifications: z.array(z.string()).optional(), }); export const editMonitorBodyValidation = z.object({ @@ -107,6 +112,10 @@ export const editMonitorBodyValidation = z.object({ geoCheckEnabled: z.boolean().optional(), geoCheckLocations: z.array(z.enum(GeoContinents)).optional(), geoCheckInterval: z.number().min(300000).optional(), + + escalationEnabled: z.boolean().optional(), + escalationDelay: z.number().min(60000).optional(), + escalationNotifications: z.array(z.string()).optional(), }); export const pauseMonitorParamValidation = z.object({ @@ -162,6 +171,10 @@ const importedMonitorSchema = z.object({ geoCheckInterval: z.number().min(300000).default(300000), createdAt: z.string().optional(), updatedAt: z.string().optional(), + + escalationEnabled: z.boolean().default(false), + escalationDelay: z.number().min(60000).default(60000), + escalationNotifications: z.array(z.string()).default([]), }); export const importMonitorsBodyValidation = z.object({