feat: maintenance mode for monitors (#7)

This commit is contained in:
2026-04-17 18:42:23 +02:00
committed by GitHub
parent 0ab0221276
commit 3b074977ed
14 changed files with 279 additions and 22 deletions

View File

@@ -16,6 +16,7 @@ Live [example](https://uptime.ifconfig.es/).
- [Configuration](#configuration) - [Configuration](#configuration)
- [Settings](#settings) - [Settings](#settings)
- [Monitor Types](#monitor-types) - [Monitor Types](#monitor-types)
- [Maintenance Windows](#maintenance-windows)
- [Regional Monitoring](#regional-monitoring) - [Regional Monitoring](#regional-monitoring)
- [Alerts](#alerts) - [Alerts](#alerts)
- [Status Page](#status-page) - [Status Page](#status-page)
@@ -151,6 +152,49 @@ Each monitor can override the global default\_\* settings:
### Monitor Types ### Monitor Types
### Maintenance Windows
You can configure scheduled maintenance windows for individual monitors, suspending alerts while downtime is still tracked for metrics.
**Syntax**
Each monitor supports a `maintenance` array, with one or more objects specifying a `start` and `end` timestamp in UTC ISO8601 format.
```yaml
- name: 'web-api'
type: http
target: 'https://example.com/health'
maintenance:
- start: '2026-05-10T23:00:00Z'
end: '2026-05-11T01:00:00Z'
- start: '2026-06-01T02:00:00Z'
end: '2026-06-01T03:30:00Z'
alerts: ['default']
```
**Behavior**
- At any time when `now` (UTC) is within a window, the monitor is shown as "maintenance":
- Status page and API both clearly show "maintenance" (distinct from "up" or "down").
- Downtime during maintenance _is still counted_ for metrics and reporting.
- All alerts are suppressed—no notifications are sent for failures during maintenance.
- Malformed/invalid windows are logged with a warning and ignored.
- Windows are strictly parsed as UTC. Both `start` and `end` must be present and valid.
- If any part of maintenance is ongoing upon startup, status immediately reflects "maintenance".
- Overlapping or adjacent windows are treated as separate, but merged for state computation.
**Example Config**
```yaml
monitors:
- name: 'api-maintenance'
type: http
target: 'https://api.example.com/health'
maintenance:
- start: '2026-05-10T23:00:00Z'
end: '2026-05-11T01:00:00Z'
alerts: ['default']
```
**HTTP** **HTTP**
```yaml ```yaml
@@ -367,6 +411,7 @@ npm run check:pages # pages (astro check + tsc)
- [ ] Add support for TLS checks (certificate validity, expiration). Apparently, the Workers API does not support certificate data access, even at the socket level. An external service may be required. - [ ] Add support for TLS checks (certificate validity, expiration). Apparently, the Workers API does not support certificate data access, even at the socket level. An external service may be required.
- [ ] Refine the status page to look... well... less IA generated. - [ ] Refine the status page to look... well... less IA generated.
- [x] Per-monitor maintenance windows (docs and config example added)
- [ ] Initial support for incident management (manual status overrides, incident timeline). - [ ] Initial support for incident management (manual status overrides, incident timeline).
- [x] Branded status page (simple custom banner). - [x] Branded status page (simple custom banner).
- [ ] Add support for notifications other than webhooks. - [ ] Add support for notifications other than webhooks.

View File

@@ -108,6 +108,37 @@ describe('getStatusApiData', () => {
expect(result.title).toBe('Test Status Page'); expect(result.title).toBe('Test Status Page');
}); });
it('surfaces maintenance status and excludes from up/down counts', async () => {
const now = Math.floor(Date.now() / 1000);
const db = mockD1Database({
states: [
{ monitor_name: 'up-monitor', current_status: 'up', last_checked: now },
{ monitor_name: 'maint', current_status: 'maintenance', last_checked: now },
{ monitor_name: 'down-monitor', current_status: 'down', last_checked: now },
],
hourly: [],
recent: [
{
monitor_name: 'maint',
checked_at: now - 10,
status: 'maintenance',
response_time_ms: 0,
},
],
});
const result = await getStatusApiData(db, testConfig);
const maint = result.monitors.find(m => m.name === 'maint');
expect(maint).toBeDefined();
expect(maint!.status).toBe('maintenance');
expect(maint!.recentChecks[0]).toEqual({
timestamp: now - 10,
status: 'maintenance',
responseTimeMs: 0,
});
// Only up and down counted in summary
expect(result.summary).toEqual({ total: 3, operational: 1, down: 1 });
});
it('does not count unknown status monitors as down', async () => { it('does not count unknown status monitors as down', async () => {
const now = Math.floor(Date.now() / 1000); const now = Math.floor(Date.now() / 1000);
const db = mockD1Database({ const db = mockD1Database({

View File

@@ -70,15 +70,17 @@ export async function getStatusApiData(
const dailyHistory = computeDailyHistory(hourly); const dailyHistory = computeDailyHistory(hourly);
const uptimePercent = computeOverallUptime(hourly); const uptimePercent = computeOverallUptime(hourly);
const status: 'up' | 'down' | 'unknown' = const status: 'up' | 'down' | 'unknown' | 'maintenance' =
state.current_status === 'up' || state.current_status === 'down' state.current_status === 'maintenance'
? state.current_status ? 'maintenance'
: 'unknown'; : state.current_status === 'up' || state.current_status === 'down'
? state.current_status
: 'unknown';
const rawChecks = checksByMonitor.get(state.monitor_name) ?? []; const rawChecks = checksByMonitor.get(state.monitor_name) ?? [];
const apiRecentChecks: ApiRecentCheck[] = rawChecks.map(c => ({ const apiRecentChecks: ApiRecentCheck[] = rawChecks.map(c => ({
timestamp: c.checked_at, timestamp: c.checked_at,
status: c.status === 'up' ? ('up' as const) : ('down' as const), status: c.status === 'maintenance' ? 'maintenance' : c.status === 'up' ? 'up' : 'down',
responseTimeMs: c.response_time_ms ?? 0, responseTimeMs: c.response_time_ms ?? 0,
})); }));

View File

@@ -68,6 +68,37 @@ function applyDefaults(raw: RawYamlConfig): Config {
failureThreshold: m.failure_threshold ?? settings.defaultFailureThreshold, failureThreshold: m.failure_threshold ?? settings.defaultFailureThreshold,
alerts: m.alerts ?? [], alerts: m.alerts ?? [],
region: m.region && isValidRegion(m.region) ? m.region : undefined, region: m.region && isValidRegion(m.region) ? m.region : undefined,
maintenance: Array.isArray(m.maintenance)
? m.maintenance.filter((w: any) => {
if (
!w ||
typeof w !== 'object' ||
typeof w.start !== 'string' ||
typeof w.end !== 'string'
)
return false;
const startMs = Date.parse(w.start);
const endMs = Date.parse(w.end);
if (
isNaN(startMs) ||
isNaN(endMs) ||
!w.start.endsWith('Z') ||
!w.end.endsWith('Z') ||
endMs <= startMs
) {
console.warn(
JSON.stringify({
event: 'invalid_maintenance_window',
start: w.start,
end: w.end,
monitor: m.name,
})
);
return false;
}
return true;
})
: undefined,
}; };
const type = (m.type as 'http' | 'tcp' | 'dns') ?? 'http'; const type = (m.type as 'http' | 'tcp' | 'dns') ?? 'http';

View File

@@ -19,6 +19,11 @@ export type WebhookAlert = AlertBase & {
export type Alert = WebhookAlert; // | EmailAlert | ... export type Alert = WebhookAlert; // | EmailAlert | ...
interface MonitorBase { interface MonitorBase {
/**
* List of maintenance windows. If now is >= start and < end,
* monitor is treated as "maintenance". Times must be ISO8601 UTC (with 'Z').
*/
maintenance?: { start: string; end: string }[];
name: string; name: string;
target: string; target: string;
timeoutMs: number; timeoutMs: number;
@@ -87,5 +92,6 @@ export type RawYamlConfig = {
failure_threshold?: number; failure_threshold?: number;
alerts?: string[]; alerts?: string[];
region?: string; // Cloudflare region code for regional checks region?: string; // Cloudflare region code for regional checks
maintenance?: { start: string; end: string }[];
}>; }>;
}; };

View File

@@ -0,0 +1,2 @@
// Temporary import for next edit
import { isInMaintenance } from '../utils/maintenance.js';

View File

@@ -7,6 +7,7 @@ import type {
AlertCall, AlertCall,
StateUpdate, StateUpdate,
} from './types.js'; } from './types.js';
import { isInMaintenance } from '../utils/maintenance.js';
export function processResults( export function processResults(
results: CheckResult[], results: CheckResult[],
@@ -35,6 +36,8 @@ export function processResults(
if (!monitor) { if (!monitor) {
continue; continue;
} }
// Maintenance check
const inMaintenance = isInMaintenance(monitor.maintenance, new Date());
const state = stateMap.get(result.name) ?? { const state = stateMap.get(result.name) ?? {
monitor_name: result.name, monitor_name: result.name,
@@ -56,15 +59,23 @@ export function processResults(
const newState: StateUpdate = { const newState: StateUpdate = {
monitorName: result.name, monitorName: result.name,
currentStatus: state.current_status, currentStatus: inMaintenance ? 'maintenance' : state.current_status,
consecutiveFailures: state.consecutive_failures, consecutiveFailures: state.consecutive_failures,
lastStatusChange: state.last_status_change, lastStatusChange: state.last_status_change,
lastChecked: now, lastChecked: now,
}; };
// Only update downtime/failure/recovery/alerts logic if not in maintenance
if (inMaintenance) {
// Alert suppression: no alerts for down or recovery
// But downtime is recorded (dbWrite above)
// State persists in 'maintenance', reset nothing
actions.stateUpdates.push(newState);
continue;
}
if (result.status === 'down') { if (result.status === 'down') {
newState.consecutiveFailures = state.consecutive_failures + 1; newState.consecutiveFailures = state.consecutive_failures + 1;
if ( if (
newState.consecutiveFailures >= monitor.failureThreshold && newState.consecutiveFailures >= monitor.failureThreshold &&
state.current_status === 'up' state.current_status === 'up'
@@ -86,10 +97,8 @@ export function processResults(
} else { } else {
newState.consecutiveFailures = 0; newState.consecutiveFailures = 0;
newState.currentStatus = 'up'; newState.currentStatus = 'up';
if (state.current_status === 'down') { if (state.current_status === 'down') {
newState.lastStatusChange = now; newState.lastStatusChange = now;
for (const alertName of monitor.alerts) { for (const alertName of monitor.alerts) {
const alert: AlertCall = { const alert: AlertCall = {
alertName, alertName,
@@ -104,7 +113,6 @@ export function processResults(
newState.lastStatusChange = state.last_status_change; newState.lastStatusChange = state.last_status_change;
} }
} }
actions.stateUpdates.push(newState); actions.stateUpdates.push(newState);
} }

View File

@@ -8,7 +8,7 @@ export type CheckResult = {
export type MonitorState = { export type MonitorState = {
monitor_name: string; monitor_name: string;
current_status: 'up' | 'down'; current_status: 'up' | 'down' | 'maintenance';
consecutive_failures: number; consecutive_failures: number;
last_status_change: number; last_status_change: number;
last_checked: number; last_checked: number;
@@ -33,7 +33,7 @@ export type AlertCall = {
export type StateUpdate = { export type StateUpdate = {
monitorName: string; monitorName: string;
currentStatus: string; currentStatus: 'up' | 'down' | 'maintenance';
consecutiveFailures: number; consecutiveFailures: number;
lastStatusChange: number; lastStatusChange: number;
lastChecked: number; lastChecked: number;

View File

@@ -108,7 +108,14 @@ export type StatusApiResponse = {
export type ApiMonitorStatus = { export type ApiMonitorStatus = {
name: string; name: string;
status: 'up' | 'down' | 'unknown'; /**
* Current status of the monitor.
* 'up' - healthy
* 'down' - failing
* 'unknown' - initial/undefined
* 'maintenance' - within a configured maintenance window (alerts suppressed, shown as maintenance in UI)
*/
status: 'up' | 'down' | 'unknown' | 'maintenance';
lastChecked: number | undefined; lastChecked: number | undefined;
uptimePercent: number; uptimePercent: number;
dailyHistory: ApiDayStatus[]; dailyHistory: ApiDayStatus[];
@@ -122,6 +129,11 @@ export type ApiDayStatus = {
export type ApiRecentCheck = { export type ApiRecentCheck = {
timestamp: number; timestamp: number;
status: 'up' | 'down'; /**
* Status for a single check event.
* Usually 'up' or 'down',
* but 'maintenance' if check occurred during a maintenance window.
*/
status: 'up' | 'down' | 'maintenance';
responseTimeMs: number; responseTimeMs: number;
}; };

View File

@@ -0,0 +1,50 @@
import { describe, it, expect } from 'vitest';
import { isInMaintenance, MaintenanceWindow } from './maintenance';
function utc(date: string) {
// Shortcut for Date creation
return new Date(date);
}
describe('isInMaintenance', () => {
it('returns false when maintenance undefined or empty', () => {
expect(isInMaintenance(undefined, utc('2026-05-01T10:00:00Z'))).toBe(false);
expect(isInMaintenance([], utc('2026-05-01T10:00:00Z'))).toBe(false);
});
it('includes and excludes at precise boundaries', () => {
const mw: MaintenanceWindow[] = [
{ start: '2026-05-01T10:00:00Z', end: '2026-05-01T12:00:00Z' },
];
expect(isInMaintenance(mw, utc('2026-05-01T09:59:59Z'))).toBe(false);
expect(isInMaintenance(mw, utc('2026-05-01T10:00:00Z'))).toBe(true); // start boundary, inclusive
expect(isInMaintenance(mw, utc('2026-05-01T11:59:59Z'))).toBe(true);
expect(isInMaintenance(mw, utc('2026-05-01T12:00:00Z'))).toBe(false); // end boundary, exclusive
});
it('handles overlapping windows', () => {
const mw: MaintenanceWindow[] = [
{ start: '2026-05-01T10:00:00Z', end: '2026-05-01T11:00:00Z' },
{ start: '2026-05-01T10:30:00Z', end: '2026-05-01T11:30:00Z' },
];
expect(isInMaintenance(mw, utc('2026-05-01T10:45:00Z'))).toBe(true);
expect(isInMaintenance(mw, utc('2026-05-01T11:15:00Z'))).toBe(true);
expect(isInMaintenance(mw, utc('2026-05-01T11:30:00Z'))).toBe(false);
});
it('ignores malformed windows (should not reach here)', () => {
// A test for the future if parser passes bad data. Should stay false.
const mw = [{ start: 'bad', end: 'also-bad' }] as any;
expect(isInMaintenance(mw, utc('2026-05-01T10:00:00Z'))).toBe(false);
});
it('prefers the first valid match if multiple windows overlap', () => {
const mw: MaintenanceWindow[] = [
{ start: '2026-05-01T08:00:00Z', end: '2026-05-01T11:00:00Z' },
{ start: '2026-05-01T10:00:00Z', end: '2026-05-01T12:00:00Z' },
];
expect(isInMaintenance(mw, utc('2026-05-01T09:00:00Z'))).toBe(true);
expect(isInMaintenance(mw, utc('2026-05-01T11:00:00Z'))).toBe(true);
expect(isInMaintenance(mw, utc('2026-05-01T12:01:00Z'))).toBe(false);
});
});

27
src/utils/maintenance.ts Normal file
View File

@@ -0,0 +1,27 @@
// Utility to determine if a monitor is in maintenance based on maintenance windows and current time
// All times must be strict ISO8601 with 'Z' (UTC). End is exclusive. Windows must be validated beforehand.
export interface MaintenanceWindow {
start: string; // ISO8601 UTC
end: string; // ISO8601 UTC
}
/**
* Returns true if now is within any valid maintenance window.
* start is inclusive, end is exclusive (UTC).
* Malformed windows should have been filtered out by config parser.
* Overlapping windows are fine.
*/
export function isInMaintenance(maintenance: MaintenanceWindow[] | undefined, now: Date): boolean {
if (!maintenance || maintenance.length === 0) return false;
const nowMs = now.getTime();
for (const w of maintenance) {
const startMs = Date.parse(w.start);
const endMs = Date.parse(w.end);
if (isNaN(startMs) || isNaN(endMs) || endMs <= startMs) continue; // skip malformed
if (nowMs >= startMs && nowMs < endMs) {
return true;
}
}
return false;
}

View File

@@ -1,5 +1,7 @@
--- ---
import type { ApiMonitorStatus } from '@worker/types'; import type { ApiMonitorStatus } from '@worker/types';
// Allow maintenance as a valid runtime state:
type MonitorStatus = ApiMonitorStatus['status'] | 'maintenance';
import UptimeBars from './UptimeBars.astro'; import UptimeBars from './UptimeBars.astro';
interface Props { interface Props {
@@ -41,13 +43,19 @@ const chartData = JSON.stringify({
<div class="monitor-head"> <div class="monitor-head">
<div <div
class:list={['status-dot', `status-dot-${monitor.status}`]} class:list={['status-dot', `status-dot-${monitor.status}`]}
role="status" role="status"
aria-label={`Status: ${monitor.status === 'up' ? 'Operational' : monitor.status === 'down' ? 'Down' : 'Unknown'}`} aria-label={`Status: ${(monitor.status as any) === 'up' ? 'Operational' : (monitor.status as any) === 'down' ? 'Down' : (monitor.status as any) === 'maintenance' ? 'Maintenance' : 'Unknown'}`}
title={`${monitor.status === 'up' ? 'Operational' : monitor.status === 'down' ? 'Down' : 'Unknown'}`} title={`${(monitor.status as any) === 'up' ? 'Operational' : (monitor.status as any) === 'down' ? 'Down' : (monitor.status as any) === 'maintenance' ? 'Maintenance' : 'Unknown'}`}
></div>
<h3 class="monitor-name" id={`monitor-${monitor.name.replace(/\s+/g, '-').toLowerCase()}-title`} title={monitor.name}>{monitor.name}</h3> ></div>
<span class:list={['monitor-uptime', `uptime-${monitor.status}`]}>{uptimeFormatted}%</span> <h3 class="monitor-name" id={`monitor-${monitor.name.replace(/\s+/g, '-').toLowerCase()}-title`} title={monitor.name}>
<span class="monitor-meta">{lastCheckedText}</span> {monitor.name}
{(monitor.status as any) === 'maintenance' && (
<span class="maintenance-badge" title="Scheduled Maintenance">Maintenance</span>
)}
</h3>
<span class:list={['monitor-uptime', `uptime-${monitor.status}`]} aria-label={(monitor.status as any) === 'maintenance' ? 'Scheduled Maintenance - uptime value reflects unmonitored state' : undefined}>{uptimeFormatted}%</span>
<span class="monitor-meta">{lastCheckedText}</span>
</div> </div>
<UptimeBars dailyHistory={monitor.dailyHistory} /> <UptimeBars dailyHistory={monitor.dailyHistory} />
@@ -166,6 +174,34 @@ const chartData = JSON.stringify({
animation: pulse-glow 3s ease-in-out infinite; animation: pulse-glow 3s ease-in-out infinite;
} }
.status-dot-maintenance {
background: var(--maintenance, #bfa21a);
box-shadow: 0 0 16px #ffe066cc, 0 0 32px #bfa21ab0;
border-color: #f8e16c;
}
.status-dot-maintenance::after {
background: var(--maintenance, #f8e16c);
animation: pulse-glow 1.5s ease-in-out infinite;
opacity: 0.4;
}
.uptime-maintenance {
color: var(--maintenance, #bfa21a);
}
.maintenance-badge {
display: inline-block;
margin-left: 0.5em;
padding: 0.1em 0.5em;
font-size: var(--text-2xs, 12px);
background: var(--maintenance, #fff5bf);
color: var(--maintenance, #bfa21a);
border-radius: 4px;
font-weight: 600;
letter-spacing: 0.03em;
vertical-align: middle;
}
@keyframes pulse-glow { @keyframes pulse-glow {
0%, 100% { 0%, 100% {
opacity: 0.3; opacity: 0.3;

View File

@@ -32,7 +32,7 @@ try {
} }
// Sort: down monitors first, then unknown, then up // Sort: down monitors first, then unknown, then up
const sortOrder = { down: 0, unknown: 1, up: 2 } as const; const sortOrder = { down: 0, maintenance: 1, unknown: 2, up: 3 } as const;
const sortedMonitors = data ? [...data.monitors].sort( const sortedMonitors = data ? [...data.monitors].sort(
(a, b) => (sortOrder[a.status] ?? 1) - (sortOrder[b.status] ?? 1) (a, b) => (sortOrder[a.status] ?? 1) - (sortOrder[b.status] ?? 1)
) : []; ) : [];

View File

@@ -88,6 +88,13 @@ monitors:
headers: headers:
Authorization: "Basic ${BASIC_AUTH}" # BASIC_AUTH must be defined as secret in Cloudflare Authorization: "Basic ${BASIC_AUTH}" # BASIC_AUTH must be defined as secret in Cloudflare
alerts: ["default"] alerts: ["default"]
# Optional maintenance windows - ISO 8601 UTC format
maintenance:
- start: "2026-05-10T23:00:00Z" # Begin maintenance, UTC
end: "2026-05-11T01:00:00Z" # End maintenance (exclusive), UTC
- start: "2026-06-01T02:00:00Z"
end: "2026-06-01T03:30:00Z"
# Monitors in maintenance do NOT send alerts, but still accumulate downtime for reporting
# Regional monitoring examples # Regional monitoring examples
# Run checks from specific Cloudflare regions # Run checks from specific Cloudflare regions