Files
arcadia-admin/app/lib/arcadia/llm-proxy.ts
jules 938143f3f5 refactor: rename service references arcadia-app → arcadia-core
The Phoenix auth/identity/tenancy backend repo is being renamed
arcadia-app → arcadia-core (its primary OTP app is already arcadia_core).
Updates prose, doc paths, and git.sky-ai.com repo URLs. Deliberately
leaves the Rust crate arcadia-app-client and host arcadia-app.internal
(handled separately), and the kept namespace (issuer/release "arcadia").

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-11 13:40:25 +10:00

183 lines
5.9 KiB
TypeScript

// Arcadia LLM proxy client.
//
// Implements the spec in docs/LLM_PROXY_CONTRACT.md against arcadia-core's
// POST /api/v1/ai/llm/chat. The lib (@crema/llm-providers-ui buildAdapter)
// owns the streaming chat path itself; this module exposes a lightweight
// non-streaming probe so the Settings "Test connection" button can verify
// the proxy round-trips end-to-end (auth → secret resolution → upstream
// dispatch → response shape).
import type { ArcadiaClient } from "@crema/arcadia-core-client"
export type LLMProxyProvider =
| "openai"
| "anthropic"
| "deepseek"
| "qwen"
| "lmstudio"
export type LLMProxyErrorCode =
| "unauthorized"
| "secret_disabled"
| "secret_expired"
| "secret_consumed"
| "ip_not_allowed"
| "unknown_provider"
| "upstream_unavailable"
| "rate_limited"
| "unknown"
export interface LLMProxyChatRequest {
provider: LLMProxyProvider
/** Required for every provider except `lmstudio`. */
secret_name?: string
model: string
messages: Array<{ role: "system" | "user" | "assistant"; content: string }>
stream?: boolean
max_tokens?: number
temperature?: number
}
export interface LLMProxyChatResponse {
id: string
object: "chat.completion"
created: number
model: string
choices: Array<{
index: number
finish_reason: string | null
message: { role: "assistant"; content: string; tool_calls: unknown }
}>
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number }
}
export class LLMProxyError extends Error {
readonly code: LLMProxyErrorCode
readonly status: number
readonly retryAfter?: number
constructor(code: LLMProxyErrorCode, message: string, status: number, retryAfter?: number) {
super(message)
this.name = "LLMProxyError"
this.code = code
this.status = status
this.retryAfter = retryAfter
}
}
/**
* Non-streaming chat completion via the proxy. The streaming path is owned
* by @crema/llm-providers-ui's buildAdapter; use this for probes and
* one-shot calls where SSE is overkill.
*/
export async function chat(
arcadia: ArcadiaClient,
req: LLMProxyChatRequest,
): Promise<LLMProxyChatResponse> {
try {
const res = await arcadia.POST<LLMProxyChatResponse>(
"/api/v1/ai/llm/chat",
{ body: { ...req, stream: false } },
)
return res
} catch (e) {
throw asProxyError(e)
}
}
/**
* Cheap end-to-end probe for the Settings "Test connection" flow in proxy
* mode. Sends a 1-token "ping" and reports whether the proxy is wired,
* the secret resolves, and the upstream answered. Intentionally tolerant
* of token-budget rejections — those still prove the round-trip works.
*/
export async function probeProxy(
arcadia: ArcadiaClient,
opts: { provider: LLMProxyProvider; model: string; secretName?: string },
): Promise<{ ok: boolean; message: string }> {
try {
const res = await chat(arcadia, {
provider: opts.provider,
secret_name: opts.secretName,
model: opts.model,
messages: [{ role: "user", content: "ping" }],
max_tokens: 1,
stream: false,
})
const used = res.usage?.total_tokens
return {
ok: true,
message: `Proxy OK — ${res.model}${used != null ? ` · ${used} tokens` : ""}.`,
}
} catch (e) {
if (e instanceof LLMProxyError) {
return { ok: false, message: friendly(e) }
}
return { ok: false, message: e instanceof Error ? e.message : String(e) }
}
}
function asProxyError(e: unknown): LLMProxyError {
// ArcadiaClient throws ArcadiaError with a wrapped { error: { code, message } }
// body and HTTP status. Best-effort destructure without coupling to the
// class shape (it lives in a sibling lib).
if (e && typeof e === "object") {
const anyE = e as {
status?: number
code?: string
message?: string
body?: { error?: { code?: string; message?: string } }
headers?: Headers | Record<string, string>
}
const status = anyE.status ?? 0
const code = (anyE.body?.error?.code ?? anyE.code) as LLMProxyErrorCode | undefined
const message = anyE.body?.error?.message ?? anyE.message ?? "Proxy request failed."
const retryAfter = readRetryAfter(anyE.headers)
return new LLMProxyError(code ?? inferCodeFromStatus(status), message, status, retryAfter)
}
return new LLMProxyError("unknown", String(e), 0)
}
function inferCodeFromStatus(status: number): LLMProxyErrorCode {
if (status === 401) return "unauthorized"
if (status === 403) return "ip_not_allowed"
if (status === 404) return "unknown_provider"
if (status === 410) return "secret_expired"
if (status === 429) return "rate_limited"
if (status === 502 || status === 503 || status === 504) return "upstream_unavailable"
return "unknown"
}
function readRetryAfter(h: Headers | Record<string, string> | undefined): number | undefined {
if (!h) return undefined
const raw = h instanceof Headers ? h.get("retry-after") : h["retry-after"] ?? h["Retry-After"]
if (!raw) return undefined
const n = Number(raw)
return Number.isFinite(n) ? n : undefined
}
export function friendly(err: LLMProxyError): string {
switch (err.code) {
case "unauthorized":
return "Sign in expired — refresh and try again."
case "secret_disabled":
return "The vault secret is disabled. Re-enable it under /secrets."
case "secret_expired":
return "The vault secret has expired. Rotate it under /secrets."
case "secret_consumed":
return "Read-once secret already used. Rotate it under /secrets."
case "ip_not_allowed":
return "This client's IP is blocked by the secret's allowlist."
case "unknown_provider":
return "The proxy doesn't recognise this provider. Check the provider id."
case "upstream_unavailable":
return "The upstream LLM provider returned an error or timed out."
case "rate_limited":
return err.retryAfter
? `Rate limited. Retry in ${err.retryAfter}s.`
: "Rate limited — slow down and try again."
default:
return err.message
}
}