import { useCallback, useEffect, useMemo, useState } from "react" import { Link } from "react-router" import { Activity, AlertTriangle, Cpu, Database, Globe, HardDrive, Loader2, RefreshCw, RotateCw, Server, Users, Zap, } from "lucide-react" import { ArcadiaError, useArcadiaClient } from "@crema/arcadia-client" import { AlertBanner } from "@crema/feedback-ui" import { BarChart, Donut, Heatmap, LineChart, Sparkline, type ChartDatum, type SeriesPoint, } from "@crema/chart-ui" import { KpiTile, formatCompact, formatPercent } from "@crema/dashboard-ui" import { ComponentRow, IncidentTimeline, OverallStatus, type ComponentState, type StatusComponent, type StatusIncident, } from "@crema/status-ui" import { ChoroplethMap, WorldMapSvg, } from "@crema/map-ui" import { formatBytes } from "@crema/file-ui" import { AppShell } from "~/components/layout/app-shell" import { Badge } from "~/components/ui/badge" import { Button } from "~/components/ui/button" import { Card, CardContent, CardDescription, CardHeader, CardTitle, } from "~/components/ui/card" import { Tabs, TabsContent, TabsList, TabsTrigger } from "~/components/ui/tabs" import { getActiveSessions, getAuditStats, getInfrastructureSummary, getJobStats, getRateLimits, getRecentJobs, getSpaces, listDroplets, retryJob, type ActiveSession, type AuditStats, type Droplet, type InfrastructureSummary, type JobStats, type ObanJob, type RateLimit, type Space, } from "~/lib/arcadia/monitoring" import { getHealth, getHostStats, SUBSYSTEMS, type HealthSubsystem, type HostStats, type OverallHealth, type SubsystemHealth, } from "~/lib/arcadia/health" import { pageTitle } from "~/lib/page-meta" import { useSession } from "~/lib/session" import { useRegisterAdminContext } from "~/lib/admin-context" export const meta = () => pageTitle("Monitoring") interface DashboardData { jobStats: JobStats | null recentJobs: ObanJob[] sessions: { sessions: ActiveSession[]; count: number } | null rateLimits: RateLimit[] infraSummary: InfrastructureSummary | null spaces: Space[] droplets: Droplet[] auditStats: AuditStats | null health: OverallHealth | null host: HostStats | null } const EMPTY: DashboardData = { jobStats: null, recentJobs: [], sessions: null, rateLimits: [], infraSummary: null, spaces: [], droplets: [], auditStats: null, health: null, host: null, } export default function MonitoringRoute() { const session = useSession() const arcadia = useArcadiaClient() const [data, setData] = useState(EMPTY) const [loading, setLoading] = useState(true) const [error, setError] = useState(null) const [autoRefresh, setAutoRefresh] = useState(true) const refresh = useCallback(async () => { setError(null) setLoading(true) try { const [ jobStats, recentJobs, sessions, rateLimits, infraSummary, spaces, droplets, auditStats, health, host, ] = await Promise.all([ getJobStats(arcadia).catch(() => null), getRecentJobs(arcadia, { limit: 50 }).catch(() => []), getActiveSessions(arcadia).catch(() => null), getRateLimits(arcadia).catch(() => []), getInfrastructureSummary(arcadia), getSpaces(arcadia), listDroplets(arcadia), getAuditStats(arcadia, { from: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(), }).catch(() => null), getHealth(arcadia).catch(() => null), getHostStats(arcadia).catch(() => null), ]) setData({ jobStats, recentJobs, sessions, rateLimits, infraSummary, spaces, droplets, auditStats, health, host, }) } catch (err) { setError(err instanceof ArcadiaError ? err.message : "Failed to load monitoring data.") } finally { setLoading(false) } }, [arcadia]) useEffect(() => { if (session) refresh() }, [session, refresh]) // Auto-refresh every 30s for the live feel. useEffect(() => { if (!session || !autoRefresh) return const t = setInterval(refresh, 30000) return () => clearInterval(t) }, [session, autoRefresh, refresh]) const components = useMemo(() => buildStatusComponents(data), [data]) const summary = useMemo( () => ({ jobs: data.jobStats?.counts ?? {}, jobs_executing: data.jobStats?.counts?.executing ?? 0, jobs_retryable: data.jobStats?.counts?.retryable ?? 0, sessions_24h: data.sessions?.count ?? 0, droplets: data.droplets.length, spaces: data.spaces.length, audit_total_7d: data.auditStats?.total ?? 0, }), [data], ) useRegisterAdminContext("monitoring", summary) if (!session) { return (
Sign in required Monitoring requires an admin session.
) } return (

Server stats & health

Live view of background jobs, active sessions, infrastructure, and audit activity. Refreshes every 30s.

{error ? ( setError(null)}> {error} ) : null} {/* Service status board */}
Service health {data.health ? `Live probes from /api/v1/health · checked ${new Date( data.health.checked_at, ).toLocaleTimeString()}` : "Live probes from /api/v1/health (unavailable — backend may be down or older than the per-subsystem probe rollout)."}
{components.map((c) => ( ))}
{/* KPI tiles */}
} /> } tone={ (data.jobStats?.counts?.executing ?? 0) > 0 ? "info" : "neutral" } /> } tone={ (data.jobStats?.counts?.retryable ?? 0) > 0 ? "warning" : "neutral" } /> } />
{data.host ? (
} tone={ (data.host.cpu.util_pct ?? 0) > 90 ? "negative" : (data.host.cpu.util_pct ?? 0) > 70 ? "warning" : "neutral" } /> } tone={ data.host.cpu.load_avg_1 != null && data.host.cpu.num_cpus && data.host.cpu.load_avg_1 > data.host.cpu.num_cpus ? "warning" : "neutral" } /> } tone={ memoryUsedPct(data.host.memory) > 90 ? "negative" : memoryUsedPct(data.host.memory) > 75 ? "warning" : "neutral" } /> } tone={ busiestDiskPct(data.host.disks) > 90 ? "negative" : busiestDiskPct(data.host.disks) > 75 ? "warning" : "neutral" } />
) : null} Host Background jobs Sessions Audit activity Infrastructure Rate limits { try { await retryJob(arcadia, id) await refresh() } catch (err) { setError( err instanceof ArcadiaError ? err.message : "Retry failed.", ) } }} />
) } // Map arcadia /health probe results onto the status-ui component model. // "ok" → operational, "degraded" → degraded, "error" → partial-outage, // "unconfigured" → operational (storage with no configured backend is ok). function buildStatusComponents(d: DashboardData): StatusComponent[] { const subsystems = d.health?.subsystems const meta: Record = { api: { name: "API", description: "/api/v1 — auth, REST endpoints" }, db: { name: "Database", description: "Postgres — sessions, audit log" }, workers: { name: "Background workers", description: "Oban — webhook delivery, scheduled tasks", }, storage: { name: "Storage", description: "S3-compatible object storage (per platform default)", }, } return SUBSYSTEMS.map((id) => { const probe = subsystems?.[id] return { id, name: meta[id].name, description: probe?.message ?? meta[id].description, state: probe ? mapHealthState(probe) : "partial-outage", } satisfies StatusComponent }) } function mapHealthState(probe: SubsystemHealth): ComponentState { switch (probe.status) { case "ok": case "unconfigured": return "operational" case "degraded": return "degraded" case "error": return "major-outage" default: return "partial-outage" } } // --- Host panel -------------------------------------------------------- function HostPanel({ host }: { host: HostStats | null }) { if (!host) { return ( } text="Host stats unavailable. The /api/v1/health/host endpoint may not be deployed yet, or os_mon daemons aren't reachable." /> ) } const memUsed = memoryUsedBytes(host.memory) const memTotal = host.memory.total_bytes ?? null const memPct = memoryUsedPct(host.memory) const swapTotal = host.memory.swap_total_bytes ?? null const swapUsed = swapTotal != null && host.memory.swap_free_bytes != null ? swapTotal - host.memory.swap_free_bytes : null return (
{/* CPU + load */}
CPU {host.cpu.num_cpus ? `${host.cpu.num_cpus} cores · ${host.cpu.schedulers_online} BEAM schedulers online` : `${host.cpu.schedulers_online} BEAM schedulers online`} {host.cpu.per_cpu_pct.length > 0 ? (
Per core
{host.cpu.per_cpu_pct.map((p, i) => (
50 ? "var(--primary-foreground)" : "var(--foreground)", }} title={`Core ${i}: ${p.toFixed(1)}%`} > {p.toFixed(0)}
))}
) : null}
Load average Unix-style load average. A value above the core count means the run-queue is saturated.
{/* Memory */} Memory {memTotal != null ? `${formatBytes(memTotal)} total` : "Total memory unknown"} {host.memory.available_bytes != null ? ` · ${formatBytes(host.memory.available_bytes)} available` : ""} {(host.memory.buffered_bytes != null || host.memory.cached_bytes != null) && (
{host.memory.buffered_bytes != null && ( Buffered: {formatBytes(host.memory.buffered_bytes)} )} {host.memory.cached_bytes != null && ( Cached: {formatBytes(host.memory.cached_bytes)} )}
)} {swapTotal != null && swapTotal > 0 ? ( ) : null}
{/* Disks */} Disks One row per mount point. {host.disks.length === 0 ? (

No disks reported.

) : (
{host.disks.map((d) => ( ))}
)}
) } function UsageBar({ label, pct, valueText, }: { label: string pct: number | null valueText: string }) { const clamped = pct == null ? 0 : Math.max(0, Math.min(100, pct)) const tone = pct == null ? "var(--muted-foreground)" : barColor(pct) return (
{label} {valueText}
) } function LoadAvgCell({ label, value, cores, }: { label: string value: number | null cores: number | null }) { const saturated = value != null && cores != null && value > cores return (
{label} {value != null ? value.toFixed(2) : "—"} {cores ? ( / {cores} cores ) : null}
) } function memoryUsedBytes(m: HostStats["memory"]): number | null { if (m.total_bytes == null) return null // Prefer "available" over "free" — on Linux, free excludes reclaimable // buffer/cache memory and overstates pressure. const available = m.available_bytes ?? m.free_bytes if (available == null) return null return Math.max(0, m.total_bytes - available) } function memoryUsedPct(m: HostStats["memory"]): number { const used = memoryUsedBytes(m) if (used == null || m.total_bytes == null || m.total_bytes === 0) return 0 return (used / m.total_bytes) * 100 } function memoryUsedLabel(m: HostStats["memory"]): string { const used = memoryUsedBytes(m) if (used == null || m.total_bytes == null) return "—" return `${formatBytes(used)} / ${formatBytes(m.total_bytes)}` } function busiestDiskPct(disks: HostStats["disks"]): number { return disks.reduce((m, d) => Math.max(m, d.used_pct), 0) } function busiestDiskLabel(disks: HostStats["disks"]): string { if (disks.length === 0) return "—" const busiest = disks.reduce((a, b) => (b.used_pct > a.used_pct ? b : a)) return `${busiest.used_pct}% (${busiest.mount})` } function barColor(pct: number): string { if (pct >= 90) return "var(--destructive)" if (pct >= 75) return "#f59e0b" return "var(--primary)" } // --- Jobs panel -------------------------------------------------------- function JobsPanel({ stats, recent, onRetry, }: { stats: JobStats | null recent: ObanJob[] onRetry: (id: number) => Promise }) { if (!stats) { return } text="No job stats available." /> } const stateData: ChartDatum[] = (Object.entries(stats.counts) as [string, number][]) .filter(([, n]) => n > 0) .map(([state, n]) => ({ label: state, value: n, color: jobStateColor(state) })) const queueData: ChartDatum[] = stats.queues.map((q) => { const totals = stats.by_queue[q] ?? {} const sum = Object.values(totals).reduce((a, n) => a + (n ?? 0), 0) return { label: q, value: sum } }) return (
Jobs by state {stateData.length === 0 ? (

No active jobs.

) : ( )}
    {stateData.map((d) => (
  • {d.label} {d.value}
  • ))}
Active jobs by queue {queueData.length === 0 ? (

No queued or executing jobs.

) : ( )}
Recent jobs Latest 50 — newest first. {recent.length === 0 ? (

No recent jobs.

) : (
    {recent.map((j) => (
  • {j.state} {j.worker} queue: {j.queue} attempt {j.attempt}/{j.max_attempts} · inserted{" "} {new Date(j.inserted_at).toLocaleString()} {j.completed_at ? ` · completed ${new Date(j.completed_at).toLocaleString()}` : ""} {j.errors && j.errors.length > 0 ? ( {j.errors[j.errors.length - 1]?.error ?? "(error)"} ) : null}
    {j.state === "retryable" || j.state === "discarded" ? ( ) : null}
  • ))}
)}
) } // --- Sessions panel ---------------------------------------------------- function SessionsPanel({ sessions, }: { sessions: { sessions: ActiveSession[]; count: number } | null }) { if (!sessions) { return } text="No session data available." /> } // Bucket sign-ins by hour for a 24h sparkline. const hourly = useMemo(() => { const now = Date.now() const buckets = Array.from({ length: 24 }, (_, i) => ({ x: i, y: 0, })) for (const s of sessions.sessions) { const t = new Date(s.last_sign_in_at).getTime() const ago = (now - t) / (60 * 60 * 1000) const idx = 23 - Math.floor(ago) if (idx >= 0 && idx < 24) buckets[idx].y++ } return buckets }, [sessions]) return (
Sign-ins over the last 24h One bar per hour, latest on the right.
{sessions.count} recent sessions {sessions.sessions.length === 0 ? (

No sign-ins in the last 24 hours.

) : (
    {sessions.sessions.map((s) => (
  • {s.email} {s.first_name || s.last_name ? `${s.first_name ?? ""} ${s.last_name ?? ""}`.trim() + " · " : ""} {s.user_type ?? "user"} · status: {s.status} {s.two_factor_enabled ? " · 2FA" : ""}
    {new Date(s.last_sign_in_at).toLocaleString()}
  • ))}
)}
) } // --- Audit panel ------------------------------------------------------- function AuditPanel({ stats }: { stats: AuditStats | null }) { if (!stats) { return } text="No audit stats available." /> } const bySeverity: ChartDatum[] = Object.entries(stats.by_severity ?? {}).map( ([k, v]) => ({ label: k, value: v, color: severityColor(k) }), ) const byResource: ChartDatum[] = Object.entries(stats.by_resource_type ?? {}) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .map(([k, v]) => ({ label: k, value: v })) // Time series — only render if backend supplied it. const series: SeriesPoint[] | null = stats.over_time ? stats.over_time.map((p, i) => ({ x: i, y: p.total })) : null return (
Events by severity (7d) {bySeverity.length === 0 ? (

No events.

) : ( )}
Top resource types {byResource.length === 0 ? (

No events.

) : ( )}
{series ? ( Events over time ) : null}
) } // --- Infrastructure panel ---------------------------------------------- function InfraPanel({ summary, spaces, droplets, }: { summary: InfrastructureSummary | null spaces: Space[] droplets: Droplet[] }) { const dropletsByRegion = useMemo(() => { const out: Record = {} for (const d of droplets) { const r = typeof d.region === "string" ? d.region : d.region?.slug ?? d.region?.name ?? "unknown" out[r] = (out[r] ?? 0) + 1 } return out }, [droplets]) if (!summary && spaces.length === 0 && droplets.length === 0) { return ( } text="No infrastructure connected. Wire a DigitalOcean token in arcadia's .env to see this section populate." /> ) } return (
{summary ? ( DigitalOcean summary
              {JSON.stringify(summary, null, 2)}
            
) : null}
} /> } /> } />
{droplets.length > 0 ? (
Droplet regions Coloured continents indicate any droplets in that hemisphere. Droplets ({droplets.length})
    {droplets.slice(0, 20).map((d) => (
  • {d.name} {typeof d.region === "string" ? d.region : d.region?.slug ?? "—"} {d.size_slug ? ` · ${d.size_slug}` : ""} {d.vcpus ? ` · ${d.vcpus} vCPU` : ""} {d.memory ? ` · ${formatBytes(d.memory * 1024 * 1024)}` : ""}
    {d.status}
  • ))} {droplets.length > 20 ? (
  • + {droplets.length - 20} more
  • ) : null}
) : null} {spaces.length > 0 ? ( Spaces ({spaces.length})
              {JSON.stringify(spaces, null, 2)}
            
) : null}
) } function regionColorsFor(byRegion: Record): Record { // Best-effort mapping from DO region slugs to continent IDs the WorldMapSvg knows. // The lib exposes regions like "north-america", "europe", "asia", etc. const colors: Record = {} const continentOf = (r: string): string | null => { const lc = r.toLowerCase() if (/^(nyc|sfo|tor|ams_tor)/.test(lc)) return "north-america" if (/^(lon|ams|fra)/.test(lc)) return "europe" if (/^(blr|sgp)/.test(lc)) return "asia" if (/^(syd)/.test(lc)) return "oceania" return null } for (const r of Object.keys(byRegion)) { const c = continentOf(r) if (c) colors[c] = "var(--primary)" } return colors } // --- Rate limits panel ------------------------------------------------- function RateLimitsPanel({ limits }: { limits: RateLimit[] }) { if (limits.length === 0) { return ( } text="No rate-limit configuration available." /> ) } return ( Configured rate limits The maximum requests allowed per window for each authenticated bucket.
    {limits.map((l) => (
  • {l.type} Window: {l.window_seconds}s
    {l.max_requests.toLocaleString()} req
  • ))}
) } // --- helpers ---------------------------------------------------------- function PanelStub({ icon, text }: { icon: React.ReactNode; text: string }) { return ( {icon} {text} ) } function jobStateColor(state: string): string { switch (state) { case "executing": return "#3b82f6" case "available": case "scheduled": return "#a3a3a3" case "retryable": return "#f59e0b" case "discarded": return "#ef4444" case "cancelled": return "#737373" case "completed": return "#10b981" default: return "#9ca3af" } } function jobStateVariant( state: string, ): "default" | "secondary" | "destructive" | "outline" { if (state === "executing" || state === "completed") return "default" if (state === "discarded") return "destructive" if (state === "retryable" || state === "scheduled") return "secondary" return "outline" } function severityColor(s: string): string { if (s === "critical" || s === "error") return "#ef4444" if (s === "warning") return "#f59e0b" return "#94a3b8" }