From cb01a5d7f7c84b8780a3a08f06273a62369de9d3 Mon Sep 17 00:00:00 2001 From: Daniel Carrillo Date: Fri, 17 Apr 2026 17:14:37 +0200 Subject: [PATCH] feat: maintenance mode for monitors --- README.md | 45 ++++++++++++++++++ src/api/status.test.ts | 31 ++++++++++++ src/api/status.ts | 12 +++-- src/config/config.ts | 31 ++++++++++++ src/config/types.ts | 6 +++ src/processor/maintenance-import.ts | 2 + src/processor/processor.ts | 18 +++++-- src/processor/types.ts | 4 +- src/types.ts | 16 ++++++- src/utils/maintenance.test.ts | 50 ++++++++++++++++++++ src/utils/maintenance.ts | 27 +++++++++++ status-page/src/components/MonitorCard.astro | 50 +++++++++++++++++--- status-page/src/pages/index.astro | 2 +- wrangler.example.toml | 7 +++ 14 files changed, 279 insertions(+), 22 deletions(-) create mode 100644 src/processor/maintenance-import.ts create mode 100644 src/utils/maintenance.test.ts create mode 100644 src/utils/maintenance.ts diff --git a/README.md b/README.md index ad9b5cc..b8e49a3 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ Live [example](https://uptime.ifconfig.es/). - [Configuration](#configuration) - [Settings](#settings) - [Monitor Types](#monitor-types) + - [Maintenance Windows](#maintenance-windows) - [Regional Monitoring](#regional-monitoring) - [Alerts](#alerts) - [Status Page](#status-page) @@ -151,6 +152,49 @@ Each monitor can override the global default\_\* settings: ### Monitor Types +### Maintenance Windows + +You can configure scheduled maintenance windows for individual monitors, suspending alerts while downtime is still tracked for metrics. + +**Syntax** +Each monitor supports a `maintenance` array, with one or more objects specifying a `start` and `end` timestamp in UTC ISO8601 format. + +```yaml +- name: 'web-api' + type: http + target: 'https://example.com/health' + maintenance: + - start: '2026-05-10T23:00:00Z' + end: '2026-05-11T01:00:00Z' + - start: '2026-06-01T02:00:00Z' + end: '2026-06-01T03:30:00Z' + alerts: ['default'] +``` + +**Behavior** + +- At any time when `now` (UTC) is within a window, the monitor is shown as "maintenance": + - Status page and API both clearly show "maintenance" (distinct from "up" or "down"). + - Downtime during maintenance _is still counted_ for metrics and reporting. + - All alerts are suppressed—no notifications are sent for failures during maintenance. +- Malformed/invalid windows are logged with a warning and ignored. +- Windows are strictly parsed as UTC. Both `start` and `end` must be present and valid. +- If any part of maintenance is ongoing upon startup, status immediately reflects "maintenance". +- Overlapping or adjacent windows are treated as separate, but merged for state computation. + +**Example Config** + +```yaml +monitors: + - name: 'api-maintenance' + type: http + target: 'https://api.example.com/health' + maintenance: + - start: '2026-05-10T23:00:00Z' + end: '2026-05-11T01:00:00Z' + alerts: ['default'] +``` + **HTTP** ```yaml @@ -367,6 +411,7 @@ npm run check:pages # pages (astro check + tsc) - [ ] Add support for TLS checks (certificate validity, expiration). Apparently, the Workers API does not support certificate data access, even at the socket level. An external service may be required. - [ ] Refine the status page to look... well... less IA generated. +- [x] Per-monitor maintenance windows (docs and config example added) - [ ] Initial support for incident management (manual status overrides, incident timeline). - [x] Branded status page (simple custom banner). - [ ] Add support for notifications other than webhooks. diff --git a/src/api/status.test.ts b/src/api/status.test.ts index 79500fe..7924ea5 100644 --- a/src/api/status.test.ts +++ b/src/api/status.test.ts @@ -108,6 +108,37 @@ describe('getStatusApiData', () => { expect(result.title).toBe('Test Status Page'); }); + it('surfaces maintenance status and excludes from up/down counts', async () => { + const now = Math.floor(Date.now() / 1000); + const db = mockD1Database({ + states: [ + { monitor_name: 'up-monitor', current_status: 'up', last_checked: now }, + { monitor_name: 'maint', current_status: 'maintenance', last_checked: now }, + { monitor_name: 'down-monitor', current_status: 'down', last_checked: now }, + ], + hourly: [], + recent: [ + { + monitor_name: 'maint', + checked_at: now - 10, + status: 'maintenance', + response_time_ms: 0, + }, + ], + }); + const result = await getStatusApiData(db, testConfig); + const maint = result.monitors.find(m => m.name === 'maint'); + expect(maint).toBeDefined(); + expect(maint!.status).toBe('maintenance'); + expect(maint!.recentChecks[0]).toEqual({ + timestamp: now - 10, + status: 'maintenance', + responseTimeMs: 0, + }); + // Only up and down counted in summary + expect(result.summary).toEqual({ total: 3, operational: 1, down: 1 }); + }); + it('does not count unknown status monitors as down', async () => { const now = Math.floor(Date.now() / 1000); const db = mockD1Database({ diff --git a/src/api/status.ts b/src/api/status.ts index 7460a3f..8a7ec60 100644 --- a/src/api/status.ts +++ b/src/api/status.ts @@ -70,15 +70,17 @@ export async function getStatusApiData( const dailyHistory = computeDailyHistory(hourly); const uptimePercent = computeOverallUptime(hourly); - const status: 'up' | 'down' | 'unknown' = - state.current_status === 'up' || state.current_status === 'down' - ? state.current_status - : 'unknown'; + const status: 'up' | 'down' | 'unknown' | 'maintenance' = + state.current_status === 'maintenance' + ? 'maintenance' + : state.current_status === 'up' || state.current_status === 'down' + ? state.current_status + : 'unknown'; const rawChecks = checksByMonitor.get(state.monitor_name) ?? []; const apiRecentChecks: ApiRecentCheck[] = rawChecks.map(c => ({ timestamp: c.checked_at, - status: c.status === 'up' ? ('up' as const) : ('down' as const), + status: c.status === 'maintenance' ? 'maintenance' : c.status === 'up' ? 'up' : 'down', responseTimeMs: c.response_time_ms ?? 0, })); diff --git a/src/config/config.ts b/src/config/config.ts index 2b72163..d05c93b 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -68,6 +68,37 @@ function applyDefaults(raw: RawYamlConfig): Config { failureThreshold: m.failure_threshold ?? settings.defaultFailureThreshold, alerts: m.alerts ?? [], region: m.region && isValidRegion(m.region) ? m.region : undefined, + maintenance: Array.isArray(m.maintenance) + ? m.maintenance.filter((w: any) => { + if ( + !w || + typeof w !== 'object' || + typeof w.start !== 'string' || + typeof w.end !== 'string' + ) + return false; + const startMs = Date.parse(w.start); + const endMs = Date.parse(w.end); + if ( + isNaN(startMs) || + isNaN(endMs) || + !w.start.endsWith('Z') || + !w.end.endsWith('Z') || + endMs <= startMs + ) { + console.warn( + JSON.stringify({ + event: 'invalid_maintenance_window', + start: w.start, + end: w.end, + monitor: m.name, + }) + ); + return false; + } + return true; + }) + : undefined, }; const type = (m.type as 'http' | 'tcp' | 'dns') ?? 'http'; diff --git a/src/config/types.ts b/src/config/types.ts index c2574c6..002253d 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -19,6 +19,11 @@ export type WebhookAlert = AlertBase & { export type Alert = WebhookAlert; // | EmailAlert | ... interface MonitorBase { + /** + * List of maintenance windows. If now is >= start and < end, + * monitor is treated as "maintenance". Times must be ISO8601 UTC (with 'Z'). + */ + maintenance?: { start: string; end: string }[]; name: string; target: string; timeoutMs: number; @@ -87,5 +92,6 @@ export type RawYamlConfig = { failure_threshold?: number; alerts?: string[]; region?: string; // Cloudflare region code for regional checks + maintenance?: { start: string; end: string }[]; }>; }; diff --git a/src/processor/maintenance-import.ts b/src/processor/maintenance-import.ts new file mode 100644 index 0000000..ff0eb60 --- /dev/null +++ b/src/processor/maintenance-import.ts @@ -0,0 +1,2 @@ +// Temporary import for next edit +import { isInMaintenance } from '../utils/maintenance.js'; diff --git a/src/processor/processor.ts b/src/processor/processor.ts index 0acfaf3..6df0a05 100644 --- a/src/processor/processor.ts +++ b/src/processor/processor.ts @@ -7,6 +7,7 @@ import type { AlertCall, StateUpdate, } from './types.js'; +import { isInMaintenance } from '../utils/maintenance.js'; export function processResults( results: CheckResult[], @@ -35,6 +36,8 @@ export function processResults( if (!monitor) { continue; } + // Maintenance check + const inMaintenance = isInMaintenance(monitor.maintenance, new Date()); const state = stateMap.get(result.name) ?? { monitor_name: result.name, @@ -56,15 +59,23 @@ export function processResults( const newState: StateUpdate = { monitorName: result.name, - currentStatus: state.current_status, + currentStatus: inMaintenance ? 'maintenance' : state.current_status, consecutiveFailures: state.consecutive_failures, lastStatusChange: state.last_status_change, lastChecked: now, }; + // Only update downtime/failure/recovery/alerts logic if not in maintenance + if (inMaintenance) { + // Alert suppression: no alerts for down or recovery + // But downtime is recorded (dbWrite above) + // State persists in 'maintenance', reset nothing + actions.stateUpdates.push(newState); + continue; + } + if (result.status === 'down') { newState.consecutiveFailures = state.consecutive_failures + 1; - if ( newState.consecutiveFailures >= monitor.failureThreshold && state.current_status === 'up' @@ -86,10 +97,8 @@ export function processResults( } else { newState.consecutiveFailures = 0; newState.currentStatus = 'up'; - if (state.current_status === 'down') { newState.lastStatusChange = now; - for (const alertName of monitor.alerts) { const alert: AlertCall = { alertName, @@ -104,7 +113,6 @@ export function processResults( newState.lastStatusChange = state.last_status_change; } } - actions.stateUpdates.push(newState); } diff --git a/src/processor/types.ts b/src/processor/types.ts index cafe6ce..0312119 100644 --- a/src/processor/types.ts +++ b/src/processor/types.ts @@ -8,7 +8,7 @@ export type CheckResult = { export type MonitorState = { monitor_name: string; - current_status: 'up' | 'down'; + current_status: 'up' | 'down' | 'maintenance'; consecutive_failures: number; last_status_change: number; last_checked: number; @@ -33,7 +33,7 @@ export type AlertCall = { export type StateUpdate = { monitorName: string; - currentStatus: string; + currentStatus: 'up' | 'down' | 'maintenance'; consecutiveFailures: number; lastStatusChange: number; lastChecked: number; diff --git a/src/types.ts b/src/types.ts index 9b1eab2..ad0ff98 100644 --- a/src/types.ts +++ b/src/types.ts @@ -108,7 +108,14 @@ export type StatusApiResponse = { export type ApiMonitorStatus = { name: string; - status: 'up' | 'down' | 'unknown'; + /** + * Current status of the monitor. + * 'up' - healthy + * 'down' - failing + * 'unknown' - initial/undefined + * 'maintenance' - within a configured maintenance window (alerts suppressed, shown as maintenance in UI) + */ + status: 'up' | 'down' | 'unknown' | 'maintenance'; lastChecked: number | undefined; uptimePercent: number; dailyHistory: ApiDayStatus[]; @@ -122,6 +129,11 @@ export type ApiDayStatus = { export type ApiRecentCheck = { timestamp: number; - status: 'up' | 'down'; + /** + * Status for a single check event. + * Usually 'up' or 'down', + * but 'maintenance' if check occurred during a maintenance window. + */ + status: 'up' | 'down' | 'maintenance'; responseTimeMs: number; }; diff --git a/src/utils/maintenance.test.ts b/src/utils/maintenance.test.ts new file mode 100644 index 0000000..ed7bed1 --- /dev/null +++ b/src/utils/maintenance.test.ts @@ -0,0 +1,50 @@ +import { describe, it, expect } from 'vitest'; +import { isInMaintenance, MaintenanceWindow } from './maintenance'; + +function utc(date: string) { + // Shortcut for Date creation + return new Date(date); +} + +describe('isInMaintenance', () => { + it('returns false when maintenance undefined or empty', () => { + expect(isInMaintenance(undefined, utc('2026-05-01T10:00:00Z'))).toBe(false); + expect(isInMaintenance([], utc('2026-05-01T10:00:00Z'))).toBe(false); + }); + + it('includes and excludes at precise boundaries', () => { + const mw: MaintenanceWindow[] = [ + { start: '2026-05-01T10:00:00Z', end: '2026-05-01T12:00:00Z' }, + ]; + expect(isInMaintenance(mw, utc('2026-05-01T09:59:59Z'))).toBe(false); + expect(isInMaintenance(mw, utc('2026-05-01T10:00:00Z'))).toBe(true); // start boundary, inclusive + expect(isInMaintenance(mw, utc('2026-05-01T11:59:59Z'))).toBe(true); + expect(isInMaintenance(mw, utc('2026-05-01T12:00:00Z'))).toBe(false); // end boundary, exclusive + }); + + it('handles overlapping windows', () => { + const mw: MaintenanceWindow[] = [ + { start: '2026-05-01T10:00:00Z', end: '2026-05-01T11:00:00Z' }, + { start: '2026-05-01T10:30:00Z', end: '2026-05-01T11:30:00Z' }, + ]; + expect(isInMaintenance(mw, utc('2026-05-01T10:45:00Z'))).toBe(true); + expect(isInMaintenance(mw, utc('2026-05-01T11:15:00Z'))).toBe(true); + expect(isInMaintenance(mw, utc('2026-05-01T11:30:00Z'))).toBe(false); + }); + + it('ignores malformed windows (should not reach here)', () => { + // A test for the future if parser passes bad data. Should stay false. + const mw = [{ start: 'bad', end: 'also-bad' }] as any; + expect(isInMaintenance(mw, utc('2026-05-01T10:00:00Z'))).toBe(false); + }); + + it('prefers the first valid match if multiple windows overlap', () => { + const mw: MaintenanceWindow[] = [ + { start: '2026-05-01T08:00:00Z', end: '2026-05-01T11:00:00Z' }, + { start: '2026-05-01T10:00:00Z', end: '2026-05-01T12:00:00Z' }, + ]; + expect(isInMaintenance(mw, utc('2026-05-01T09:00:00Z'))).toBe(true); + expect(isInMaintenance(mw, utc('2026-05-01T11:00:00Z'))).toBe(true); + expect(isInMaintenance(mw, utc('2026-05-01T12:01:00Z'))).toBe(false); + }); +}); diff --git a/src/utils/maintenance.ts b/src/utils/maintenance.ts new file mode 100644 index 0000000..71867bf --- /dev/null +++ b/src/utils/maintenance.ts @@ -0,0 +1,27 @@ +// Utility to determine if a monitor is in maintenance based on maintenance windows and current time +// All times must be strict ISO8601 with 'Z' (UTC). End is exclusive. Windows must be validated beforehand. + +export interface MaintenanceWindow { + start: string; // ISO8601 UTC + end: string; // ISO8601 UTC +} + +/** + * Returns true if now is within any valid maintenance window. + * start is inclusive, end is exclusive (UTC). + * Malformed windows should have been filtered out by config parser. + * Overlapping windows are fine. + */ +export function isInMaintenance(maintenance: MaintenanceWindow[] | undefined, now: Date): boolean { + if (!maintenance || maintenance.length === 0) return false; + const nowMs = now.getTime(); + for (const w of maintenance) { + const startMs = Date.parse(w.start); + const endMs = Date.parse(w.end); + if (isNaN(startMs) || isNaN(endMs) || endMs <= startMs) continue; // skip malformed + if (nowMs >= startMs && nowMs < endMs) { + return true; + } + } + return false; +} diff --git a/status-page/src/components/MonitorCard.astro b/status-page/src/components/MonitorCard.astro index 280bcf4..bf010b7 100644 --- a/status-page/src/components/MonitorCard.astro +++ b/status-page/src/components/MonitorCard.astro @@ -1,5 +1,7 @@ --- import type { ApiMonitorStatus } from '@worker/types'; +// Allow maintenance as a valid runtime state: +type MonitorStatus = ApiMonitorStatus['status'] | 'maintenance'; import UptimeBars from './UptimeBars.astro'; interface Props { @@ -41,13 +43,19 @@ const chartData = JSON.stringify({
-

{monitor.name}

- {uptimeFormatted}% - {lastCheckedText} + role="status" + aria-label={`Status: ${(monitor.status as any) === 'up' ? 'Operational' : (monitor.status as any) === 'down' ? 'Down' : (monitor.status as any) === 'maintenance' ? 'Maintenance' : 'Unknown'}`} + title={`${(monitor.status as any) === 'up' ? 'Operational' : (monitor.status as any) === 'down' ? 'Down' : (monitor.status as any) === 'maintenance' ? 'Maintenance' : 'Unknown'}`} + + >
+

+ {monitor.name} + {(monitor.status as any) === 'maintenance' && ( + Maintenance + )} +

+ {uptimeFormatted}% + {lastCheckedText} @@ -166,6 +174,34 @@ const chartData = JSON.stringify({ animation: pulse-glow 3s ease-in-out infinite; } + .status-dot-maintenance { + background: var(--maintenance, #bfa21a); + box-shadow: 0 0 16px #ffe066cc, 0 0 32px #bfa21ab0; + border-color: #f8e16c; + } + .status-dot-maintenance::after { + background: var(--maintenance, #f8e16c); + animation: pulse-glow 1.5s ease-in-out infinite; + opacity: 0.4; + } + + .uptime-maintenance { + color: var(--maintenance, #bfa21a); + } + + .maintenance-badge { + display: inline-block; + margin-left: 0.5em; + padding: 0.1em 0.5em; + font-size: var(--text-2xs, 12px); + background: var(--maintenance, #fff5bf); + color: var(--maintenance, #bfa21a); + border-radius: 4px; + font-weight: 600; + letter-spacing: 0.03em; + vertical-align: middle; + } + @keyframes pulse-glow { 0%, 100% { opacity: 0.3; diff --git a/status-page/src/pages/index.astro b/status-page/src/pages/index.astro index bde7bc4..fd59a2e 100644 --- a/status-page/src/pages/index.astro +++ b/status-page/src/pages/index.astro @@ -32,7 +32,7 @@ try { } // Sort: down monitors first, then unknown, then up -const sortOrder = { down: 0, unknown: 1, up: 2 } as const; +const sortOrder = { down: 0, maintenance: 1, unknown: 2, up: 3 } as const; const sortedMonitors = data ? [...data.monitors].sort( (a, b) => (sortOrder[a.status] ?? 1) - (sortOrder[b.status] ?? 1) ) : []; diff --git a/wrangler.example.toml b/wrangler.example.toml index ffb7e9d..6598d0e 100644 --- a/wrangler.example.toml +++ b/wrangler.example.toml @@ -88,6 +88,13 @@ monitors: headers: Authorization: "Basic ${BASIC_AUTH}" # BASIC_AUTH must be defined as secret in Cloudflare alerts: ["default"] + # Optional maintenance windows - ISO 8601 UTC format + maintenance: + - start: "2026-05-10T23:00:00Z" # Begin maintenance, UTC + end: "2026-05-11T01:00:00Z" # End maintenance (exclusive), UTC + - start: "2026-06-01T02:00:00Z" + end: "2026-06-01T03:30:00Z" + # Monitors in maintenance do NOT send alerts, but still accumulate downtime for reporting # Regional monitoring examples # Run checks from specific Cloudflare regions