Wire health probes, host stats, and LLM proxy round-trip

Three things from the latest arcadia-app pull:

- health.ts: client for /api/v1/health{,/:service,/detailed,/host}.
  monitoring.tsx now reads real per-subsystem probe state instead of
  synthesizing it from indirect signals (rate limits, sessions, jobs).
- New Host tab on Monitoring with KPI tiles + per-core CPU bars,
  load-avg cards, memory + swap usage, and per-mount disk bars,
  backed by /api/v1/health/host.
- llm-proxy.ts: typed errors (secret_disabled, ip_not_allowed, etc.)
  and a probeProxy() that round-trips a 1-token chat. settings.tsx's
  "Test connection" in proxy mode now exercises the real endpoint
  instead of just confirming the adapter built. Contract doc flipped
  from "not yet implemented" to "implemented".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-02 17:05:22 +10:00
parent 0fcb9e40f1
commit 29030c9e72
5 changed files with 661 additions and 46 deletions

94
app/lib/arcadia/health.ts Normal file
View File

@@ -0,0 +1,94 @@
// Arcadia health probes.
//
// Backed by /api/v1/health* (public — no auth). Each subsystem is probed
// independently; the overall endpoint aggregates and returns 503 if any
// subsystem is not "ok". See arcadia-app commit f427892.
import type { ArcadiaClient } from "@crema/arcadia-client"
export type HealthSubsystem = "api" | "db" | "workers" | "storage"
export type HealthStatus = "ok" | "degraded" | "error" | "unconfigured"
export interface SubsystemHealth {
status: HealthStatus
/** Optional human-readable detail. */
message?: string
/** Free-form metrics — shape is subsystem-specific. */
details?: Record<string, unknown>
}
export interface OverallHealth {
status: HealthStatus
checked_at: string
subsystems: Record<HealthSubsystem, SubsystemHealth>
}
export interface DetailedHealth extends OverallHealth {
/** BEAM info — present on /health/detailed only. */
system?: {
otp_release?: string
elixir_version?: string
process_count?: number
memory_total_bytes?: number
[k: string]: unknown
}
}
export interface HostStats {
cpu: {
util_pct: number | null
per_cpu_pct: number[]
load_avg_1: number | null
load_avg_5: number | null
load_avg_15: number | null
schedulers_online: number
num_cpus: number | null
}
memory: {
total_bytes: number | null
free_bytes: number | null
available_bytes: number | null
buffered_bytes: number | null
cached_bytes: number | null
swap_total_bytes: number | null
swap_free_bytes: number | null
}
disks: Array<{ mount: string; total_kb: number; used_pct: number }>
checked_at: string
}
const BASE = "/api/v1/health"
export async function getHealth(arcadia: ArcadiaClient): Promise<OverallHealth> {
const res = await arcadia.GET<{ data: OverallHealth } | OverallHealth>(BASE)
return unwrap(res)
}
export async function getServiceHealth(
arcadia: ArcadiaClient,
service: HealthSubsystem,
): Promise<SubsystemHealth> {
const res = await arcadia.GET<{ data: SubsystemHealth } | SubsystemHealth>(
`${BASE}/${service}`,
)
return unwrap(res)
}
export async function getHealthDetailed(arcadia: ArcadiaClient): Promise<DetailedHealth> {
const res = await arcadia.GET<{ data: DetailedHealth } | DetailedHealth>(`${BASE}/detailed`)
return unwrap(res)
}
export async function getHostStats(arcadia: ArcadiaClient): Promise<HostStats> {
const res = await arcadia.GET<{ data: HostStats } | HostStats>(`${BASE}/host`)
return unwrap(res)
}
export const SUBSYSTEMS: HealthSubsystem[] = ["api", "db", "workers", "storage"]
function unwrap<T>(res: { data: T } | T): T {
return res && typeof res === "object" && "data" in (res as object)
? (res as { data: T }).data
: (res as T)
}

View File

@@ -0,0 +1,182 @@
// Arcadia LLM proxy client.
//
// Implements the spec in docs/LLM_PROXY_CONTRACT.md against arcadia-app's
// POST /api/v1/ai/llm/chat. The lib (@crema/llm-providers-ui buildAdapter)
// owns the streaming chat path itself; this module exposes a lightweight
// non-streaming probe so the Settings "Test connection" button can verify
// the proxy round-trips end-to-end (auth → secret resolution → upstream
// dispatch → response shape).
import type { ArcadiaClient } from "@crema/arcadia-client"
export type LLMProxyProvider =
| "openai"
| "anthropic"
| "deepseek"
| "qwen"
| "lmstudio"
export type LLMProxyErrorCode =
| "unauthorized"
| "secret_disabled"
| "secret_expired"
| "secret_consumed"
| "ip_not_allowed"
| "unknown_provider"
| "upstream_unavailable"
| "rate_limited"
| "unknown"
export interface LLMProxyChatRequest {
provider: LLMProxyProvider
/** Required for every provider except `lmstudio`. */
secret_name?: string
model: string
messages: Array<{ role: "system" | "user" | "assistant"; content: string }>
stream?: boolean
max_tokens?: number
temperature?: number
}
export interface LLMProxyChatResponse {
id: string
object: "chat.completion"
created: number
model: string
choices: Array<{
index: number
finish_reason: string | null
message: { role: "assistant"; content: string; tool_calls: unknown }
}>
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number }
}
export class LLMProxyError extends Error {
readonly code: LLMProxyErrorCode
readonly status: number
readonly retryAfter?: number
constructor(code: LLMProxyErrorCode, message: string, status: number, retryAfter?: number) {
super(message)
this.name = "LLMProxyError"
this.code = code
this.status = status
this.retryAfter = retryAfter
}
}
/**
* Non-streaming chat completion via the proxy. The streaming path is owned
* by @crema/llm-providers-ui's buildAdapter; use this for probes and
* one-shot calls where SSE is overkill.
*/
export async function chat(
arcadia: ArcadiaClient,
req: LLMProxyChatRequest,
): Promise<LLMProxyChatResponse> {
try {
const res = await arcadia.POST<LLMProxyChatResponse>(
"/api/v1/ai/llm/chat",
{ body: { ...req, stream: false } },
)
return res
} catch (e) {
throw asProxyError(e)
}
}
/**
* Cheap end-to-end probe for the Settings "Test connection" flow in proxy
* mode. Sends a 1-token "ping" and reports whether the proxy is wired,
* the secret resolves, and the upstream answered. Intentionally tolerant
* of token-budget rejections — those still prove the round-trip works.
*/
export async function probeProxy(
arcadia: ArcadiaClient,
opts: { provider: LLMProxyProvider; model: string; secretName?: string },
): Promise<{ ok: boolean; message: string }> {
try {
const res = await chat(arcadia, {
provider: opts.provider,
secret_name: opts.secretName,
model: opts.model,
messages: [{ role: "user", content: "ping" }],
max_tokens: 1,
stream: false,
})
const used = res.usage?.total_tokens
return {
ok: true,
message: `Proxy OK — ${res.model}${used != null ? ` · ${used} tokens` : ""}.`,
}
} catch (e) {
if (e instanceof LLMProxyError) {
return { ok: false, message: friendly(e) }
}
return { ok: false, message: e instanceof Error ? e.message : String(e) }
}
}
function asProxyError(e: unknown): LLMProxyError {
// ArcadiaClient throws ArcadiaError with a wrapped { error: { code, message } }
// body and HTTP status. Best-effort destructure without coupling to the
// class shape (it lives in a sibling lib).
if (e && typeof e === "object") {
const anyE = e as {
status?: number
code?: string
message?: string
body?: { error?: { code?: string; message?: string } }
headers?: Headers | Record<string, string>
}
const status = anyE.status ?? 0
const code = (anyE.body?.error?.code ?? anyE.code) as LLMProxyErrorCode | undefined
const message = anyE.body?.error?.message ?? anyE.message ?? "Proxy request failed."
const retryAfter = readRetryAfter(anyE.headers)
return new LLMProxyError(code ?? inferCodeFromStatus(status), message, status, retryAfter)
}
return new LLMProxyError("unknown", String(e), 0)
}
function inferCodeFromStatus(status: number): LLMProxyErrorCode {
if (status === 401) return "unauthorized"
if (status === 403) return "ip_not_allowed"
if (status === 404) return "unknown_provider"
if (status === 410) return "secret_expired"
if (status === 429) return "rate_limited"
if (status === 502 || status === 503 || status === 504) return "upstream_unavailable"
return "unknown"
}
function readRetryAfter(h: Headers | Record<string, string> | undefined): number | undefined {
if (!h) return undefined
const raw = h instanceof Headers ? h.get("retry-after") : h["retry-after"] ?? h["Retry-After"]
if (!raw) return undefined
const n = Number(raw)
return Number.isFinite(n) ? n : undefined
}
export function friendly(err: LLMProxyError): string {
switch (err.code) {
case "unauthorized":
return "Sign in expired — refresh and try again."
case "secret_disabled":
return "The vault secret is disabled. Re-enable it under /secrets."
case "secret_expired":
return "The vault secret has expired. Rotate it under /secrets."
case "secret_consumed":
return "Read-once secret already used. Rotate it under /secrets."
case "ip_not_allowed":
return "This client's IP is blocked by the secret's allowlist."
case "unknown_provider":
return "The proxy doesn't recognise this provider. Check the provider id."
case "upstream_unavailable":
return "The upstream LLM provider returned an error or timed out."
case "rate_limited":
return err.retryAfter
? `Rate limited. Retry in ${err.retryAfter}s.`
: "Rate limited — slow down and try again."
default:
return err.message
}
}