diff --git a/client/src/Hooks/useMonitorForm.ts b/client/src/Hooks/useMonitorForm.ts index 963409fc8a..d3cd0621ad 100644 --- a/client/src/Hooks/useMonitorForm.ts +++ b/client/src/Hooks/useMonitorForm.ts @@ -12,6 +12,7 @@ const getBaseDefaults = (data?: Monitor | null) => ({ description: data?.description || "", interval: data?.interval || 60000, notifications: data?.notifications || [], + escalationRules: data?.escalationRules || [], statusWindowSize: data?.statusWindowSize || 5, statusWindowThreshold: data?.statusWindowThreshold || 60, geoCheckEnabled: data?.geoCheckEnabled ?? false, diff --git a/client/src/Pages/CreateMonitor/index.tsx b/client/src/Pages/CreateMonitor/index.tsx index 15b76eab36..7e4f1cc9df 100644 --- a/client/src/Pages/CreateMonitor/index.tsx +++ b/client/src/Pages/CreateMonitor/index.tsx @@ -36,6 +36,7 @@ import { type Monitor, type MonitorType, type GamesMap, + type EscalationRule, supportsGeoCheck, } from "@/Types/Monitor"; import type { Notification } from "@/Types/Notification"; @@ -765,6 +766,93 @@ const CreateMonitorPage = () => { } /> + { + const escalationRules = field.value ?? []; + return ( + + {escalationRules.map((rule: EscalationRule, index: number) => ( + + { + const newRules = [...escalationRules]; + newRules[index] = { + ...rule, + delayMinutes: parseInt(e.target.value) || 1, + }; + field.onChange(newRules); + }} + size="small" + sx={{ width: 120 }} + /> + ({ + ...n, + name: n.notificationName, + }))} + value={ + notifications?.find((n) => n.id === rule.notificationId) ? { + ...notifications.find((n) => n.id === rule.notificationId)!, + name: notifications.find((n) => n.id === rule.notificationId)!.notificationName, + } : null + } + getOptionLabel={(option) => option.name} + onChange={(_, newValue) => { + const newRules = [...escalationRules]; + newRules[index] = { + ...rule, + notificationId: newValue?.id || "", + }; + field.onChange(newRules); + }} + isOptionEqualToValue={(option, value) => option.id === value.id} + sx={{ flexGrow: 1 }} + size="small" + /> + { + field.onChange(escalationRules.filter((_, i) => i !== index)); + }} + aria-label="Remove escalation rule" + > + + + + ))} + + + ); + }} + /> + } + /> + {(watchedType === "http" || watchedType === "grpc" || watchedType === "websocket") && ( diff --git a/client/src/Types/Monitor.ts b/client/src/Types/Monitor.ts index 053b517d1d..8c6fe430fc 100644 --- a/client/src/Types/Monitor.ts +++ b/client/src/Types/Monitor.ts @@ -38,6 +38,11 @@ export type MonitorStatus = (typeof MonitorStatuses)[number]; export type MonitorMatchMethod = "equal" | "include" | "regex" | ""; +export interface EscalationRule { + delayMinutes: number; + notificationId: string; +} + export interface Monitor { id: string; userId: string; @@ -60,6 +65,7 @@ export interface Monitor { interval: number; uptimePercentage?: number; notifications: string[]; + escalationRules: EscalationRule[]; secret?: string; cpuAlertThreshold: number; cpuAlertCounter: number; diff --git a/client/src/Validation/monitor.ts b/client/src/Validation/monitor.ts index 9acffe6fed..eae1e90d55 100644 --- a/client/src/Validation/monitor.ts +++ b/client/src/Validation/monitor.ts @@ -1,6 +1,12 @@ import { z } from "zod"; import { GeoContinents } from "@/Types/GeoCheck"; +// Escalation rule schema +const escalationRuleSchema = z.object({ + delayMinutes: z.number().min(1, "Delay must be at least 1 minute"), + notificationId: z.string().min(1, "Notification is required"), +}); + // URL schema with custom error message const urlSchema = z.url({ message: "Please enter a valid URL" }); @@ -13,6 +19,7 @@ const baseSchema = z.object({ description: z.string().optional(), interval: z.number().min(15000, "Interval must be at least 15 seconds"), notifications: z.array(z.string()), + escalationRules: z.array(escalationRuleSchema), statusWindowSize: z .number({ message: "Status window size is required" }) .min(1, "Status window size must be at least 1") diff --git a/server/src/config/services.ts b/server/src/config/services.ts index b31c8a5e91..d5fb886512 100644 --- a/server/src/config/services.ts +++ b/server/src/config/services.ts @@ -9,6 +9,7 @@ import { SuperSimpleQueue, SuperSimpleQueueHelper, NotificationsService, + EscalationService, StatusService, NotificationMessageBuilder, MonitorService, @@ -34,6 +35,7 @@ import { IBufferService, ISuperSimpleQueue, INotificationsService, + IEscalationService, IStatusService, IMonitorService, IUserService, @@ -129,6 +131,7 @@ export type InitializedServices = { incidentService: IIncidentService; logger: ILogger; notificationsService: INotificationsService; + escalationService: IEscalationService; statusPageService: IStatusPageService; notificationMessageBuilder: INotificationMessageBuilder; @@ -205,8 +208,6 @@ export const initializeServices = async ({ const notificationMessageBuilder = new NotificationMessageBuilder(); - const incidentService = new IncidentService(logger, incidentsRepository, monitorsRepository, usersRepository, notificationMessageBuilder); - const checkService = new CheckService(monitorsRepository, logger, checksRepository); const globalPingService = new GlobalPingService(logger); @@ -246,6 +247,10 @@ export const initializeServices = async ({ notificationMessageBuilder ); + const escalationService = new EscalationService(logger, notificationsService); + + const incidentService = new IncidentService(logger, incidentsRepository, monitorsRepository, usersRepository, notificationMessageBuilder, escalationService); + const superSimpleQueueHelper = new SuperSimpleQueueHelper( logger, networkService, @@ -326,6 +331,7 @@ export const initializeServices = async ({ incidentService, logger, notificationsService, + escalationService, statusPageService, notificationMessageBuilder, diff --git a/server/src/db/models/Monitor.ts b/server/src/db/models/Monitor.ts index 036aeadad6..9f28e5e531 100644 --- a/server/src/db/models/Monitor.ts +++ b/server/src/db/models/Monitor.ts @@ -18,11 +18,12 @@ type CheckSnapshotDocument = Omit & { createdAt: Dat type MonitorDocumentBase = Omit< Monitor, - "id" | "userId" | "teamId" | "notifications" | "selectedDisks" | "statusWindow" | "recentChecks" | "createdAt" | "updatedAt" + "id" | "userId" | "teamId" | "notifications" | "escalationRules" | "selectedDisks" | "statusWindow" | "recentChecks" | "createdAt" | "updatedAt" > & { statusWindow: boolean[]; recentChecks: CheckSnapshotDocument[]; notifications: Types.ObjectId[]; + escalationRules: { delayMinutes: number; notificationId: Types.ObjectId }[]; selectedDisks: string[]; matchMethod?: MonitorMatchMethod; }; @@ -198,6 +199,14 @@ const checkSnapshotSchema = new Schema( { _id: false } ); +const escalationRuleSchema = new Schema( + { + delayMinutes: { type: Number, required: true }, + notificationId: { type: Schema.Types.ObjectId, ref: "Notification", required: true }, + }, + { _id: false } +); + const MonitorSchema = new Schema( { userId: { @@ -284,6 +293,7 @@ const MonitorSchema = new Schema( ref: "Notification", }, ], + escalationRules: [escalationRuleSchema], secret: { type: String, }, diff --git a/server/src/repositories/monitors/MongoMonitorsRepository.ts b/server/src/repositories/monitors/MongoMonitorsRepository.ts index b2d7594483..37a29bd583 100644 --- a/server/src/repositories/monitors/MongoMonitorsRepository.ts +++ b/server/src/repositories/monitors/MongoMonitorsRepository.ts @@ -374,6 +374,10 @@ class MongoMonitorsRepository implements IMonitorsRepository { interval: doc.interval, uptimePercentage: doc.uptimePercentage ?? undefined, notifications: notificationIds, + escalationRules: (doc.escalationRules ?? []).map((rule) => ({ + delayMinutes: rule.delayMinutes, + notificationId: toStringId(rule.notificationId), + })), secret: doc.secret ?? undefined, cpuAlertThreshold: doc.cpuAlertThreshold, cpuAlertCounter: doc.cpuAlertCounter, @@ -433,6 +437,10 @@ class MongoMonitorsRepository implements IMonitorsRepository { interval: doc.interval, uptimePercentage: doc.uptimePercentage ?? undefined, notifications: notificationIds, + escalationRules: (doc.escalationRules ?? []).map((rule) => ({ + delayMinutes: rule.delayMinutes, + notificationId: toStringId(rule.notificationId), + })), secret: doc.secret ?? undefined, cpuAlertThreshold: doc.cpuAlertThreshold, cpuAlertCounter: doc.cpuAlertCounter, diff --git a/server/src/service/business/incidentService.ts b/server/src/service/business/incidentService.ts index 4790f9aacc..d36542821b 100644 --- a/server/src/service/business/incidentService.ts +++ b/server/src/service/business/incidentService.ts @@ -7,6 +7,7 @@ import type { IIncidentsRepository, IMonitorsRepository, IUsersRepository } from import type { Incident, IncidentSummary, User } from "@/types/index.js"; import type { MonitorActionDecision } from "@/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.js"; import type { INotificationMessageBuilder } from "@/service/infrastructure/notificationMessageBuilder.js"; +import type { IEscalationService } from "@/service/infrastructure/escalationService.js"; import type { ILogger } from "@/utils/logger.js"; export interface IIncidentService { @@ -39,19 +40,22 @@ export class IncidentService implements IIncidentService { private monitorsRepository: IMonitorsRepository; private usersRepository: IUsersRepository; private notificationMessageBuilder: INotificationMessageBuilder; + private escalationService: IEscalationService; constructor( logger: ILogger, incidentsRepository: IIncidentsRepository, monitorsRepository: IMonitorsRepository, usersRepository: IUsersRepository, - notificationMessageBuilder: INotificationMessageBuilder + notificationMessageBuilder: INotificationMessageBuilder, + escalationService: IEscalationService ) { this.logger = logger; this.incidentsRepository = incidentsRepository; this.monitorsRepository = monitorsRepository; this.usersRepository = usersRepository; this.notificationMessageBuilder = notificationMessageBuilder; + this.escalationService = escalationService; } get serviceName() { @@ -91,7 +95,12 @@ export class IncidentService implements IIncidentService { statusCode, message, }; - return await this.incidentsRepository.create(incident); + const createdIncident = await this.incidentsRepository.create(incident); + + // Schedule escalation notifications + await this.escalationService.scheduleEscalation(monitor, createdIncident.id); + + return createdIncident; } } @@ -99,6 +108,10 @@ export class IncidentService implements IIncidentService { if (!activeIncident) { return null; } + + // Cancel any scheduled escalations + await this.escalationService.cancelEscalation(monitor.id); + activeIncident.status = false; activeIncident.endTime = Date.now().toString(); activeIncident.resolutionType = "automatic"; @@ -153,6 +166,9 @@ export class IncidentService implements IIncidentService { incident.comment = comment || null; incident.endTime = Date.now().toString(); + // Cancel any scheduled escalations + await this.escalationService.cancelEscalation(incident.monitorId); + const resolvedIncident = await this.incidentsRepository.updateById(incident.id, teamId, incident); this.logger.debug({ diff --git a/server/src/service/index.ts b/server/src/service/index.ts index 9686d2a135..2eb90ec690 100644 --- a/server/src/service/index.ts +++ b/server/src/service/index.ts @@ -15,6 +15,7 @@ export * from "@/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper. export * from "@/service/infrastructure/notificationMessageBuilder.js"; export * from "@/service/infrastructure/bufferService.js"; export * from "@/service/infrastructure/emailService.js"; +export * from "@/service/infrastructure/escalationService.js"; export * from "@/service/infrastructure/globalPingService.js"; export * from "@/service/infrastructure/networkService.js"; export * from "@/service/infrastructure/notificationsService.js"; diff --git a/server/src/service/infrastructure/emailService.ts b/server/src/service/infrastructure/emailService.ts index f536f3f188..3405b7d75d 100755 --- a/server/src/service/infrastructure/emailService.ts +++ b/server/src/service/infrastructure/emailService.ts @@ -112,6 +112,16 @@ export class EmailService implements IEmailService { config = transportConfig; } else { config = await this.settingsService.getDBSettings(); + if (!config.systemEmailHost) { + config = { + ...config, + systemEmailHost: process.env.SYSTEM_EMAIL_HOST, + systemEmailPort: process.env.SYSTEM_EMAIL_PORT ? Number(process.env.SYSTEM_EMAIL_PORT) : undefined, + systemEmailAddress: process.env.SYSTEM_EMAIL_ADDRESS, + systemEmailPassword: process.env.SYSTEM_EMAIL_PASSWORD, + systemEmailSecure: process.env.SYSTEM_EMAIL_SECURE === "true", + }; + } } const { systemEmailHost, diff --git a/server/src/service/infrastructure/escalationService.ts b/server/src/service/infrastructure/escalationService.ts new file mode 100644 index 0000000000..67746efdfd --- /dev/null +++ b/server/src/service/infrastructure/escalationService.ts @@ -0,0 +1,109 @@ +const SERVICE_NAME = "EscalationService"; + +import type { Monitor, EscalationRule } from "@/types/index.js"; +import type { INotificationsService } from "./notificationsService.js"; +import type { ILogger } from "@/utils/logger.js"; + +export interface IEscalationService { + readonly serviceName: string; + scheduleEscalation(monitor: Monitor, incidentId: string): Promise; + cancelEscalation(monitorId: string): Promise; +} + +export class EscalationService implements IEscalationService { + static SERVICE_NAME = SERVICE_NAME; + + readonly serviceName = SERVICE_NAME; + + private logger: ILogger; + private notificationsService: INotificationsService; + private scheduledTimeouts: Map = new Map(); + + constructor(logger: ILogger, notificationsService: INotificationsService) { + this.logger = logger; + this.notificationsService = notificationsService; + } + + scheduleEscalation = async (monitor: Monitor, incidentId: string): Promise => { + // Cancel any existing escalation for this monitor + await this.cancelEscalation(monitor.id); + + if (!monitor.escalationRules || monitor.escalationRules.length === 0) { + return; + } + + for (const rule of monitor.escalationRules) { + const timeoutId = setTimeout(async () => { + try { + // Check if incident is still active + // For now, we'll send the escalation notification + // In a real implementation, you'd check if the incident is still active + await this.sendEscalationNotification(monitor, rule); + } catch (error) { + this.logger.error({ + message: `Failed to send escalation notification`, + service: SERVICE_NAME, + method: "scheduleEscalation", + details: { monitorId: monitor.id, incidentId }, + stack: error instanceof Error ? error.stack : undefined, + }); + } finally { + // Remove the timeout from the map + this.scheduledTimeouts.delete(`${monitor.id}-${rule.delayMinutes}`); + } + }, rule.delayMinutes * 60 * 1000); // Convert minutes to milliseconds + + this.scheduledTimeouts.set(`${monitor.id}-${rule.delayMinutes}`, timeoutId); + } + + this.logger.info({ + message: `Scheduled ${monitor.escalationRules.length} escalation notifications for monitor ${monitor.id}`, + service: SERVICE_NAME, + method: "scheduleEscalation", + details: { monitorId: monitor.id, incidentId }, + }); + }; + + cancelEscalation = async (monitorId: string): Promise => { + const keysToDelete: string[] = []; + for (const [key, timeoutId] of this.scheduledTimeouts.entries()) { + if (key.startsWith(`${monitorId}-`)) { + clearTimeout(timeoutId); + keysToDelete.push(key); + } + } + + for (const key of keysToDelete) { + this.scheduledTimeouts.delete(key); + } + + if (keysToDelete.length > 0) { + this.logger.info({ + message: `Cancelled ${keysToDelete.length} escalation notifications for monitor ${monitorId}`, + service: SERVICE_NAME, + method: "cancelEscalation", + details: { monitorId }, + }); + } + }; + + private sendEscalationNotification = async (monitor: Monitor, rule: EscalationRule): Promise => { + // For escalation, we send a notification with a special message indicating it's an escalation + const escalationMessage = { + monitor, + decision: { + shouldSendNotification: true, + incidentReason: "escalation", + } as any, + monitorStatusResponse: undefined, + }; + + // Send only to the specific notification channel + const notifications = await this.notificationsService.findNotificationsByTeamId(monitor.teamId); + const targetNotification = notifications.find(n => n.id === rule.notificationId); + + if (targetNotification) { + await this.notificationsService.sendTestNotification(targetNotification); + } + }; +} \ No newline at end of file diff --git a/server/src/types/monitor.ts b/server/src/types/monitor.ts index f29ce75d78..c06c8fdab8 100644 --- a/server/src/types/monitor.ts +++ b/server/src/types/monitor.ts @@ -15,6 +15,11 @@ export type MonitorStatus = (typeof MonitorStatuses)[number]; export const MonitorMatchMethods = ["equal", "include", "regex"] as const; export type MonitorMatchMethod = (typeof MonitorMatchMethods)[number] | ""; +export interface EscalationRule { + delayMinutes: number; + notificationId: string; +} + export interface Monitor { id: string; userId: string; @@ -37,6 +42,7 @@ export interface Monitor { interval: number; uptimePercentage?: number; notifications: string[]; + escalationRules: EscalationRule[]; secret?: string; cpuAlertThreshold: number; cpuAlertCounter: number; diff --git a/server/src/validation/monitorValidation.ts b/server/src/validation/monitorValidation.ts index df000ecef2..ec29a94a8b 100644 --- a/server/src/validation/monitorValidation.ts +++ b/server/src/validation/monitorValidation.ts @@ -3,6 +3,11 @@ import { booleanCoercion } from "./shared.js"; import { GeoContinents } from "@/types/geoCheck.js"; import { MonitorMatchMethods, MonitorTypes } from "@/types/monitor.js"; +const escalationRuleSchema = z.object({ + delayMinutes: z.number().min(1, "Delay must be at least 1 minute"), + notificationId: z.string().min(1, "Notification ID is required"), +}); + export const getMonitorByIdParamValidation = z.object({ monitorId: z.string().min(1, "Monitor ID is required"), }); @@ -67,6 +72,7 @@ export const createMonitorBodyValidation = z.object({ diskAlertThreshold: z.number().optional(), tempAlertThreshold: z.number().optional(), notifications: z.array(z.string()).optional(), + escalationRules: z.array(escalationRuleSchema).optional(), secret: z.string().optional(), jsonPath: z.union([z.string(), z.literal("")]).optional(), expectedValue: z.union([z.string(), z.literal("")]).optional(), @@ -89,6 +95,7 @@ export const editMonitorBodyValidation = z.object({ description: z.union([z.string(), z.literal("")]).optional(), interval: z.number().optional(), notifications: z.array(z.string()).optional(), + escalationRules: z.array(escalationRuleSchema).optional(), secret: z.string().optional(), ignoreTlsErrors: z.boolean().optional(), useAdvancedMatching: z.boolean().optional(), @@ -144,6 +151,7 @@ const importedMonitorSchema = z.object({ interval: z.number().default(60000), uptimePercentage: z.number().optional(), notifications: z.array(z.string()).default([]), + escalationRules: z.array(escalationRuleSchema).default([]), secret: z.string().optional(), cpuAlertThreshold: z.number().default(100), cpuAlertCounter: z.number().default(5),