feat(api): improve health endpoint with verbose mode and better naming

This commit is contained in:
Sam Chau
2026-01-25 23:36:06 +10:30
parent bef616e595
commit 317e66b5fc
3 changed files with 175 additions and 125 deletions

View File

@@ -9,8 +9,17 @@ health:
- `healthy`: All components functioning normally
- `degraded`: Core functionality works but some components have issues
- `unhealthy`: Core functionality is broken
Use `?verbose=true` for detailed component information.
tags:
- System
parameters:
- name: verbose
in: query
description: Include detailed component information
schema:
type: boolean
default: false
responses:
'200':
description: Health check response

View File

@@ -1,44 +1,45 @@
DatabaseHealth:
SqliteHealth:
type: object
required:
- status
- responseTimeMs
- migration
properties:
status:
$ref: './common.yaml#/ComponentStatus'
responseTimeMs:
type: number
description: Database query response time in milliseconds
migration:
type: integer
description: Current migration version
message:
type: string
description: Error message if unhealthy
DatabasesHealth:
ReposHealth:
type: object
required:
- status
- total
- enabled
- cached
- disabled
properties:
status:
$ref: './common.yaml#/ComponentStatus'
total:
type: integer
description: Total number of PCD databases configured
enabled:
type: integer
description: Number of enabled databases
cached:
type: integer
description: Number of databases with compiled cache
disabled:
type: integer
description: Number of disabled databases (compilation errors)
message:
type: string
description: Additional status information
# Verbose fields
total:
type: integer
description: Total number of PCD repos configured (verbose only)
enabled:
type: integer
description: Number of enabled repos (verbose only)
cached:
type: integer
description: Number of repos with compiled cache (verbose only)
disabled:
type: integer
description: Number of disabled repos (verbose only)
JobsHealth:
type: object
@@ -47,16 +48,17 @@ JobsHealth:
properties:
status:
$ref: './common.yaml#/ComponentStatus'
message:
type: string
description: Additional status information
# Verbose fields
lastRun:
type: object
additionalProperties:
type: string
format: date-time
nullable: true
description: Last run time for each job
message:
type: string
description: Additional status information
description: Last run time for each job (verbose only)
BackupsHealth:
type: object
@@ -69,23 +71,24 @@ BackupsHealth:
enabled:
type: boolean
description: Whether backups are enabled
message:
type: string
description: Additional status information
# Verbose fields
lastBackup:
type: string
format: date-time
nullable: true
description: Timestamp of last backup
description: Timestamp of last backup (verbose only)
count:
type: integer
description: Number of backup files
totalSizeBytes:
type: integer
description: Total size of all backups in bytes
description: Number of backup files (verbose only)
totalSize:
type: string
description: Human-readable total size of all backups (verbose only)
retentionDays:
type: integer
description: Configured retention period in days
message:
type: string
description: Additional status information
description: Configured retention period in days (verbose only)
LogsHealth:
type: object
@@ -94,25 +97,26 @@ LogsHealth:
properties:
status:
$ref: './common.yaml#/ComponentStatus'
totalSizeBytes:
type: integer
description: Total size of log files in bytes
message:
type: string
description: Additional status information
# Verbose fields
totalSize:
type: string
description: Human-readable total size of log files (verbose only)
fileCount:
type: integer
description: Number of log files
description: Number of log files (verbose only)
oldestLog:
type: string
format: date
nullable: true
description: Date of oldest log file
description: Date of oldest log file (verbose only)
newestLog:
type: string
format: date
nullable: true
description: Date of newest log file
message:
type: string
description: Additional status information
description: Date of newest log file (verbose only)
HealthResponse:
type: object
@@ -138,16 +142,16 @@ HealthResponse:
components:
type: object
required:
- database
- databases
- sqlite
- repos
- jobs
- backups
- logs
properties:
database:
$ref: '#/DatabaseHealth'
databases:
$ref: '#/DatabasesHealth'
sqlite:
$ref: '#/SqliteHealth'
repos:
$ref: '#/ReposHealth'
jobs:
$ref: '#/JobsHealth'
backups:

View File

@@ -1,16 +1,15 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from '@sveltejs/kit';
import { db } from '$db/db.ts';
import { migrationRunner } from '$db/migrations.ts';
import { databaseInstancesQueries } from '$db/queries/databaseInstances.ts';
import { jobsQueries } from '$db/queries/jobs.ts';
import { backupSettingsQueries } from '$db/queries/backupSettings.ts';
import { appInfoQueries } from '$db/queries/appInfo.ts';
import { getCache } from '$pcd/cache.ts';
import { config } from '$config';
import type { components } from '$api/v1.d.ts';
type HealthResponse = components['schemas']['HealthResponse'];
type ComponentStatus = components['schemas']['ComponentStatus'];
type ComponentStatus = 'healthy' | 'degraded' | 'unhealthy';
// Track startup time for uptime calculation
const startupTime = Date.now();
@@ -19,19 +18,32 @@ const startupTime = Date.now();
const LOG_SIZE_WARN_BYTES = 100 * 1024 * 1024; // 100MB
const LOG_SIZE_CRITICAL_BYTES = 500 * 1024 * 1024; // 500MB
export const GET: RequestHandler = async () => {
const response: HealthResponse = {
status: 'healthy',
/**
* Format bytes to human-readable string
*/
function formatBytes(bytes: number): string {
if (bytes === 0) return '0 B';
const units = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(1024));
const value = bytes / Math.pow(1024, i);
return `${value.toFixed(i > 0 ? 1 : 0)} ${units[i]}`;
}
export const GET: RequestHandler = async ({ url }) => {
const verbose = url.searchParams.get('verbose') === 'true';
const sqlite = checkSqlite();
const repos = checkRepos(verbose);
const jobs = checkJobs(verbose);
const backups = await checkBackups(verbose);
const logs = await checkLogs(verbose);
const response = {
status: 'healthy' as ComponentStatus,
timestamp: new Date().toISOString(),
version: appInfoQueries.getVersion(),
uptime: Math.floor((Date.now() - startupTime) / 1000),
components: {
database: checkDatabase(),
databases: checkDatabases(),
jobs: checkJobs(),
backups: await checkBackups(),
logs: await checkLogs()
}
components: { sqlite, repos, jobs, backups, logs }
};
response.status = determineOverallStatus(response.components);
@@ -40,22 +52,30 @@ export const GET: RequestHandler = async () => {
return json(response, { status: httpStatus });
};
function determineOverallStatus(components: HealthResponse['components']): ComponentStatus {
interface Components {
sqlite: { status: ComponentStatus };
repos: { status: ComponentStatus };
jobs: { status: ComponentStatus };
backups: { status: ComponentStatus };
logs: { status: ComponentStatus };
}
function determineOverallStatus(components: Components): ComponentStatus {
const statuses = [
components.database.status,
components.databases.status,
components.sqlite.status,
components.repos.status,
components.jobs.status,
components.backups.status,
components.logs.status
];
// If database is unhealthy, everything is unhealthy
if (components.database.status === 'unhealthy') {
// If sqlite is unhealthy, everything is unhealthy
if (components.sqlite.status === 'unhealthy') {
return 'unhealthy';
}
// If all PCD databases are unhealthy, system is unhealthy
if (components.databases.status === 'unhealthy') {
// If all PCD repos are unhealthy, system is unhealthy
if (components.repos.status === 'unhealthy') {
return 'unhealthy';
}
@@ -67,27 +87,30 @@ function determineOverallStatus(components: HealthResponse['components']): Compo
return 'healthy';
}
function checkDatabase(): HealthResponse['components']['database'] {
function checkSqlite() {
const start = performance.now();
try {
db.queryFirst('SELECT 1');
const responseTimeMs = Math.round((performance.now() - start) * 100) / 100;
const migration = migrationRunner.getCurrentVersion();
return {
status: 'healthy',
responseTimeMs
status: 'healthy' as ComponentStatus,
responseTimeMs,
migration
};
} catch (error) {
return {
status: 'unhealthy',
status: 'unhealthy' as ComponentStatus,
responseTimeMs: -1,
migration: 0,
message: error instanceof Error ? error.message : 'Database query failed'
};
}
}
function checkDatabases(): HealthResponse['components']['databases'] {
function checkRepos(verbose: boolean) {
try {
const allDatabases = databaseInstancesQueries.getAll();
const enabledDatabases = allDatabases.filter((d) => d.enabled === 1);
@@ -111,46 +134,42 @@ function checkDatabases(): HealthResponse['components']['databases'] {
if (total === 0) {
status = 'healthy';
message = 'No databases configured';
message = 'No repos configured';
} else if (enabled === 0) {
status = 'unhealthy';
message = 'All databases are disabled';
message = 'All repos are disabled';
} else if (disabled > 0) {
status = 'degraded';
message = `${disabled} database(s) disabled due to errors`;
message = `${disabled} repo(s) disabled due to errors`;
} else if (cachedCount < enabled) {
status = 'degraded';
message = `${enabled - cachedCount} database(s) not cached`;
message = `${enabled - cachedCount} repo(s) not cached`;
}
return {
status,
total,
enabled,
cached: cachedCount,
disabled,
message
};
// Minimal response
const result: Record<string, unknown> = { status };
if (message) result.message = message;
// Verbose adds counts
if (verbose) {
result.total = total;
result.enabled = enabled;
result.cached = cachedCount;
result.disabled = disabled;
}
return result as { status: ComponentStatus; message?: string };
} catch (error) {
return {
status: 'unhealthy',
total: 0,
enabled: 0,
cached: 0,
disabled: 0,
message: error instanceof Error ? error.message : 'Failed to check databases'
status: 'unhealthy' as ComponentStatus,
message: error instanceof Error ? error.message : 'Failed to check repos'
};
}
}
function checkJobs(): HealthResponse['components']['jobs'] {
function checkJobs(verbose: boolean) {
try {
const jobs = jobsQueries.getAll();
const lastRun: Record<string, string | null> = {};
for (const job of jobs) {
lastRun[job.name] = job.last_run_at ?? null;
}
// Check if sync_arr job is stale (hasn't run in 5+ minutes when it should run every minute)
const syncArrJob = jobs.find((j) => j.name === 'sync_arr');
@@ -167,20 +186,29 @@ function checkJobs(): HealthResponse['components']['jobs'] {
}
}
return {
status,
lastRun,
message
};
// Minimal response
const result: Record<string, unknown> = { status };
if (message) result.message = message;
// Verbose adds lastRun for all jobs
if (verbose) {
const lastRun: Record<string, string | null> = {};
for (const job of jobs) {
lastRun[job.name] = job.last_run_at ?? null;
}
result.lastRun = lastRun;
}
return result as { status: ComponentStatus; message?: string };
} catch (error) {
return {
status: 'unhealthy',
status: 'unhealthy' as ComponentStatus,
message: error instanceof Error ? error.message : 'Failed to check jobs'
};
}
}
async function checkBackups(): Promise<HealthResponse['components']['backups']> {
async function checkBackups(verbose: boolean) {
try {
const settings = backupSettingsQueries.get();
const enabled = settings?.enabled === 1;
@@ -188,7 +216,7 @@ async function checkBackups(): Promise<HealthResponse['components']['backups']>
if (!enabled) {
return {
status: 'healthy',
status: 'healthy' as ComponentStatus,
enabled: false,
message: 'Backups disabled'
};
@@ -233,25 +261,29 @@ async function checkBackups(): Promise<HealthResponse['components']['backups']>
}
}
return {
status,
enabled,
lastBackup,
count,
totalSizeBytes,
retentionDays,
message
};
// Minimal response
const result: Record<string, unknown> = { status, enabled };
if (message) result.message = message;
// Verbose adds details
if (verbose) {
result.lastBackup = lastBackup;
result.count = count;
result.totalSize = formatBytes(totalSizeBytes);
result.retentionDays = retentionDays;
}
return result as { status: ComponentStatus; enabled: boolean; message?: string };
} catch (error) {
return {
status: 'unhealthy',
status: 'unhealthy' as ComponentStatus,
enabled: false,
message: error instanceof Error ? error.message : 'Failed to check backups'
};
}
}
async function checkLogs(): Promise<HealthResponse['components']['logs']> {
async function checkLogs(verbose: boolean) {
try {
const logPath = config.paths.logs;
let totalSizeBytes = 0;
@@ -289,23 +321,28 @@ async function checkLogs(): Promise<HealthResponse['components']['logs']> {
if (totalSizeBytes > LOG_SIZE_CRITICAL_BYTES) {
status = 'degraded';
message = `Log directory is very large (${Math.round(totalSizeBytes / 1024 / 1024)}MB)`;
message = `Log directory is very large (${formatBytes(totalSizeBytes)})`;
} else if (totalSizeBytes > LOG_SIZE_WARN_BYTES) {
status = 'degraded';
message = `Log directory is getting large (${Math.round(totalSizeBytes / 1024 / 1024)}MB)`;
message = `Log directory is getting large (${formatBytes(totalSizeBytes)})`;
}
return {
status,
totalSizeBytes,
fileCount,
oldestLog,
newestLog,
message
};
// Minimal response
const result: Record<string, unknown> = { status };
if (message) result.message = message;
// Verbose adds details
if (verbose) {
result.totalSize = formatBytes(totalSizeBytes);
result.fileCount = fileCount;
result.oldestLog = oldestLog;
result.newestLog = newestLog;
}
return result as { status: ComponentStatus; message?: string };
} catch (error) {
return {
status: 'unhealthy',
status: 'unhealthy' as ComponentStatus,
message: error instanceof Error ? error.message : 'Failed to check logs'
};
}