Wire health probes, host stats, and LLM proxy round-trip

Three things from the latest arcadia-app pull:

- health.ts: client for /api/v1/health{,/:service,/detailed,/host}.
  monitoring.tsx now reads real per-subsystem probe state instead of
  synthesizing it from indirect signals (rate limits, sessions, jobs).
- New Host tab on Monitoring with KPI tiles + per-core CPU bars,
  load-avg cards, memory + swap usage, and per-mount disk bars,
  backed by /api/v1/health/host.
- llm-proxy.ts: typed errors (secret_disabled, ip_not_allowed, etc.)
  and a probeProxy() that round-trips a 1-token chat. settings.tsx's
  "Test connection" in proxy mode now exercises the real endpoint
  instead of just confirming the adapter built. Contract doc flipped
  from "not yet implemented" to "implemented".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-02 17:05:22 +10:00
parent 0fcb9e40f1
commit 29030c9e72
5 changed files with 661 additions and 46 deletions

94
app/lib/arcadia/health.ts Normal file
View File

@@ -0,0 +1,94 @@
// Arcadia health probes.
//
// Backed by /api/v1/health* (public — no auth). Each subsystem is probed
// independently; the overall endpoint aggregates and returns 503 if any
// subsystem is not "ok". See arcadia-app commit f427892.
import type { ArcadiaClient } from "@crema/arcadia-client"
export type HealthSubsystem = "api" | "db" | "workers" | "storage"
export type HealthStatus = "ok" | "degraded" | "error" | "unconfigured"
export interface SubsystemHealth {
status: HealthStatus
/** Optional human-readable detail. */
message?: string
/** Free-form metrics — shape is subsystem-specific. */
details?: Record<string, unknown>
}
export interface OverallHealth {
status: HealthStatus
checked_at: string
subsystems: Record<HealthSubsystem, SubsystemHealth>
}
export interface DetailedHealth extends OverallHealth {
/** BEAM info — present on /health/detailed only. */
system?: {
otp_release?: string
elixir_version?: string
process_count?: number
memory_total_bytes?: number
[k: string]: unknown
}
}
export interface HostStats {
cpu: {
util_pct: number | null
per_cpu_pct: number[]
load_avg_1: number | null
load_avg_5: number | null
load_avg_15: number | null
schedulers_online: number
num_cpus: number | null
}
memory: {
total_bytes: number | null
free_bytes: number | null
available_bytes: number | null
buffered_bytes: number | null
cached_bytes: number | null
swap_total_bytes: number | null
swap_free_bytes: number | null
}
disks: Array<{ mount: string; total_kb: number; used_pct: number }>
checked_at: string
}
const BASE = "/api/v1/health"
export async function getHealth(arcadia: ArcadiaClient): Promise<OverallHealth> {
const res = await arcadia.GET<{ data: OverallHealth } | OverallHealth>(BASE)
return unwrap(res)
}
export async function getServiceHealth(
arcadia: ArcadiaClient,
service: HealthSubsystem,
): Promise<SubsystemHealth> {
const res = await arcadia.GET<{ data: SubsystemHealth } | SubsystemHealth>(
`${BASE}/${service}`,
)
return unwrap(res)
}
export async function getHealthDetailed(arcadia: ArcadiaClient): Promise<DetailedHealth> {
const res = await arcadia.GET<{ data: DetailedHealth } | DetailedHealth>(`${BASE}/detailed`)
return unwrap(res)
}
export async function getHostStats(arcadia: ArcadiaClient): Promise<HostStats> {
const res = await arcadia.GET<{ data: HostStats } | HostStats>(`${BASE}/host`)
return unwrap(res)
}
export const SUBSYSTEMS: HealthSubsystem[] = ["api", "db", "workers", "storage"]
function unwrap<T>(res: { data: T } | T): T {
return res && typeof res === "object" && "data" in (res as object)
? (res as { data: T }).data
: (res as T)
}