Wire health probes, host stats, and LLM proxy round-trip
Three things from the latest arcadia-app pull:
- health.ts: client for /api/v1/health{,/:service,/detailed,/host}.
monitoring.tsx now reads real per-subsystem probe state instead of
synthesizing it from indirect signals (rate limits, sessions, jobs).
- New Host tab on Monitoring with KPI tiles + per-core CPU bars,
load-avg cards, memory + swap usage, and per-mount disk bars,
backed by /api/v1/health/host.
- llm-proxy.ts: typed errors (secret_disabled, ip_not_allowed, etc.)
and a probeProxy() that round-trips a 1-token chat. settings.tsx's
"Test connection" in proxy mode now exercises the real endpoint
instead of just confirming the adapter built. Contract doc flipped
from "not yet implemented" to "implemented".
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
94
app/lib/arcadia/health.ts
Normal file
94
app/lib/arcadia/health.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
// Arcadia health probes.
|
||||
//
|
||||
// Backed by /api/v1/health* (public — no auth). Each subsystem is probed
|
||||
// independently; the overall endpoint aggregates and returns 503 if any
|
||||
// subsystem is not "ok". See arcadia-app commit f427892.
|
||||
|
||||
import type { ArcadiaClient } from "@crema/arcadia-client"
|
||||
|
||||
export type HealthSubsystem = "api" | "db" | "workers" | "storage"
|
||||
|
||||
export type HealthStatus = "ok" | "degraded" | "error" | "unconfigured"
|
||||
|
||||
export interface SubsystemHealth {
|
||||
status: HealthStatus
|
||||
/** Optional human-readable detail. */
|
||||
message?: string
|
||||
/** Free-form metrics — shape is subsystem-specific. */
|
||||
details?: Record<string, unknown>
|
||||
}
|
||||
|
||||
export interface OverallHealth {
|
||||
status: HealthStatus
|
||||
checked_at: string
|
||||
subsystems: Record<HealthSubsystem, SubsystemHealth>
|
||||
}
|
||||
|
||||
export interface DetailedHealth extends OverallHealth {
|
||||
/** BEAM info — present on /health/detailed only. */
|
||||
system?: {
|
||||
otp_release?: string
|
||||
elixir_version?: string
|
||||
process_count?: number
|
||||
memory_total_bytes?: number
|
||||
[k: string]: unknown
|
||||
}
|
||||
}
|
||||
|
||||
export interface HostStats {
|
||||
cpu: {
|
||||
util_pct: number | null
|
||||
per_cpu_pct: number[]
|
||||
load_avg_1: number | null
|
||||
load_avg_5: number | null
|
||||
load_avg_15: number | null
|
||||
schedulers_online: number
|
||||
num_cpus: number | null
|
||||
}
|
||||
memory: {
|
||||
total_bytes: number | null
|
||||
free_bytes: number | null
|
||||
available_bytes: number | null
|
||||
buffered_bytes: number | null
|
||||
cached_bytes: number | null
|
||||
swap_total_bytes: number | null
|
||||
swap_free_bytes: number | null
|
||||
}
|
||||
disks: Array<{ mount: string; total_kb: number; used_pct: number }>
|
||||
checked_at: string
|
||||
}
|
||||
|
||||
const BASE = "/api/v1/health"
|
||||
|
||||
export async function getHealth(arcadia: ArcadiaClient): Promise<OverallHealth> {
|
||||
const res = await arcadia.GET<{ data: OverallHealth } | OverallHealth>(BASE)
|
||||
return unwrap(res)
|
||||
}
|
||||
|
||||
export async function getServiceHealth(
|
||||
arcadia: ArcadiaClient,
|
||||
service: HealthSubsystem,
|
||||
): Promise<SubsystemHealth> {
|
||||
const res = await arcadia.GET<{ data: SubsystemHealth } | SubsystemHealth>(
|
||||
`${BASE}/${service}`,
|
||||
)
|
||||
return unwrap(res)
|
||||
}
|
||||
|
||||
export async function getHealthDetailed(arcadia: ArcadiaClient): Promise<DetailedHealth> {
|
||||
const res = await arcadia.GET<{ data: DetailedHealth } | DetailedHealth>(`${BASE}/detailed`)
|
||||
return unwrap(res)
|
||||
}
|
||||
|
||||
export async function getHostStats(arcadia: ArcadiaClient): Promise<HostStats> {
|
||||
const res = await arcadia.GET<{ data: HostStats } | HostStats>(`${BASE}/host`)
|
||||
return unwrap(res)
|
||||
}
|
||||
|
||||
export const SUBSYSTEMS: HealthSubsystem[] = ["api", "db", "workers", "storage"]
|
||||
|
||||
function unwrap<T>(res: { data: T } | T): T {
|
||||
return res && typeof res === "object" && "data" in (res as object)
|
||||
? (res as { data: T }).data
|
||||
: (res as T)
|
||||
}
|
||||
182
app/lib/arcadia/llm-proxy.ts
Normal file
182
app/lib/arcadia/llm-proxy.ts
Normal file
@@ -0,0 +1,182 @@
|
||||
// Arcadia LLM proxy client.
|
||||
//
|
||||
// Implements the spec in docs/LLM_PROXY_CONTRACT.md against arcadia-app's
|
||||
// POST /api/v1/ai/llm/chat. The lib (@crema/llm-providers-ui buildAdapter)
|
||||
// owns the streaming chat path itself; this module exposes a lightweight
|
||||
// non-streaming probe so the Settings "Test connection" button can verify
|
||||
// the proxy round-trips end-to-end (auth → secret resolution → upstream
|
||||
// dispatch → response shape).
|
||||
|
||||
import type { ArcadiaClient } from "@crema/arcadia-client"
|
||||
|
||||
export type LLMProxyProvider =
|
||||
| "openai"
|
||||
| "anthropic"
|
||||
| "deepseek"
|
||||
| "qwen"
|
||||
| "lmstudio"
|
||||
|
||||
export type LLMProxyErrorCode =
|
||||
| "unauthorized"
|
||||
| "secret_disabled"
|
||||
| "secret_expired"
|
||||
| "secret_consumed"
|
||||
| "ip_not_allowed"
|
||||
| "unknown_provider"
|
||||
| "upstream_unavailable"
|
||||
| "rate_limited"
|
||||
| "unknown"
|
||||
|
||||
export interface LLMProxyChatRequest {
|
||||
provider: LLMProxyProvider
|
||||
/** Required for every provider except `lmstudio`. */
|
||||
secret_name?: string
|
||||
model: string
|
||||
messages: Array<{ role: "system" | "user" | "assistant"; content: string }>
|
||||
stream?: boolean
|
||||
max_tokens?: number
|
||||
temperature?: number
|
||||
}
|
||||
|
||||
export interface LLMProxyChatResponse {
|
||||
id: string
|
||||
object: "chat.completion"
|
||||
created: number
|
||||
model: string
|
||||
choices: Array<{
|
||||
index: number
|
||||
finish_reason: string | null
|
||||
message: { role: "assistant"; content: string; tool_calls: unknown }
|
||||
}>
|
||||
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number }
|
||||
}
|
||||
|
||||
export class LLMProxyError extends Error {
|
||||
readonly code: LLMProxyErrorCode
|
||||
readonly status: number
|
||||
readonly retryAfter?: number
|
||||
|
||||
constructor(code: LLMProxyErrorCode, message: string, status: number, retryAfter?: number) {
|
||||
super(message)
|
||||
this.name = "LLMProxyError"
|
||||
this.code = code
|
||||
this.status = status
|
||||
this.retryAfter = retryAfter
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-streaming chat completion via the proxy. The streaming path is owned
|
||||
* by @crema/llm-providers-ui's buildAdapter; use this for probes and
|
||||
* one-shot calls where SSE is overkill.
|
||||
*/
|
||||
export async function chat(
|
||||
arcadia: ArcadiaClient,
|
||||
req: LLMProxyChatRequest,
|
||||
): Promise<LLMProxyChatResponse> {
|
||||
try {
|
||||
const res = await arcadia.POST<LLMProxyChatResponse>(
|
||||
"/api/v1/ai/llm/chat",
|
||||
{ body: { ...req, stream: false } },
|
||||
)
|
||||
return res
|
||||
} catch (e) {
|
||||
throw asProxyError(e)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cheap end-to-end probe for the Settings "Test connection" flow in proxy
|
||||
* mode. Sends a 1-token "ping" and reports whether the proxy is wired,
|
||||
* the secret resolves, and the upstream answered. Intentionally tolerant
|
||||
* of token-budget rejections — those still prove the round-trip works.
|
||||
*/
|
||||
export async function probeProxy(
|
||||
arcadia: ArcadiaClient,
|
||||
opts: { provider: LLMProxyProvider; model: string; secretName?: string },
|
||||
): Promise<{ ok: boolean; message: string }> {
|
||||
try {
|
||||
const res = await chat(arcadia, {
|
||||
provider: opts.provider,
|
||||
secret_name: opts.secretName,
|
||||
model: opts.model,
|
||||
messages: [{ role: "user", content: "ping" }],
|
||||
max_tokens: 1,
|
||||
stream: false,
|
||||
})
|
||||
const used = res.usage?.total_tokens
|
||||
return {
|
||||
ok: true,
|
||||
message: `Proxy OK — ${res.model}${used != null ? ` · ${used} tokens` : ""}.`,
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof LLMProxyError) {
|
||||
return { ok: false, message: friendly(e) }
|
||||
}
|
||||
return { ok: false, message: e instanceof Error ? e.message : String(e) }
|
||||
}
|
||||
}
|
||||
|
||||
function asProxyError(e: unknown): LLMProxyError {
|
||||
// ArcadiaClient throws ArcadiaError with a wrapped { error: { code, message } }
|
||||
// body and HTTP status. Best-effort destructure without coupling to the
|
||||
// class shape (it lives in a sibling lib).
|
||||
if (e && typeof e === "object") {
|
||||
const anyE = e as {
|
||||
status?: number
|
||||
code?: string
|
||||
message?: string
|
||||
body?: { error?: { code?: string; message?: string } }
|
||||
headers?: Headers | Record<string, string>
|
||||
}
|
||||
const status = anyE.status ?? 0
|
||||
const code = (anyE.body?.error?.code ?? anyE.code) as LLMProxyErrorCode | undefined
|
||||
const message = anyE.body?.error?.message ?? anyE.message ?? "Proxy request failed."
|
||||
const retryAfter = readRetryAfter(anyE.headers)
|
||||
return new LLMProxyError(code ?? inferCodeFromStatus(status), message, status, retryAfter)
|
||||
}
|
||||
return new LLMProxyError("unknown", String(e), 0)
|
||||
}
|
||||
|
||||
function inferCodeFromStatus(status: number): LLMProxyErrorCode {
|
||||
if (status === 401) return "unauthorized"
|
||||
if (status === 403) return "ip_not_allowed"
|
||||
if (status === 404) return "unknown_provider"
|
||||
if (status === 410) return "secret_expired"
|
||||
if (status === 429) return "rate_limited"
|
||||
if (status === 502 || status === 503 || status === 504) return "upstream_unavailable"
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
function readRetryAfter(h: Headers | Record<string, string> | undefined): number | undefined {
|
||||
if (!h) return undefined
|
||||
const raw = h instanceof Headers ? h.get("retry-after") : h["retry-after"] ?? h["Retry-After"]
|
||||
if (!raw) return undefined
|
||||
const n = Number(raw)
|
||||
return Number.isFinite(n) ? n : undefined
|
||||
}
|
||||
|
||||
export function friendly(err: LLMProxyError): string {
|
||||
switch (err.code) {
|
||||
case "unauthorized":
|
||||
return "Sign in expired — refresh and try again."
|
||||
case "secret_disabled":
|
||||
return "The vault secret is disabled. Re-enable it under /secrets."
|
||||
case "secret_expired":
|
||||
return "The vault secret has expired. Rotate it under /secrets."
|
||||
case "secret_consumed":
|
||||
return "Read-once secret already used. Rotate it under /secrets."
|
||||
case "ip_not_allowed":
|
||||
return "This client's IP is blocked by the secret's allowlist."
|
||||
case "unknown_provider":
|
||||
return "The proxy doesn't recognise this provider. Check the provider id."
|
||||
case "upstream_unavailable":
|
||||
return "The upstream LLM provider returned an error or timed out."
|
||||
case "rate_limited":
|
||||
return err.retryAfter
|
||||
? `Rate limited. Retry in ${err.retryAfter}s.`
|
||||
: "Rate limited — slow down and try again."
|
||||
default:
|
||||
return err.message
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user