The Phoenix auth/identity/tenancy backend repo is being renamed arcadia-app → arcadia-core (its primary OTP app is already arcadia_core). Updates prose, doc paths, and git.sky-ai.com repo URLs. Deliberately leaves the Rust crate arcadia-app-client and host arcadia-app.internal (handled separately), and the kept namespace (issuer/release "arcadia"). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
183 lines
5.9 KiB
TypeScript
183 lines
5.9 KiB
TypeScript
// Arcadia LLM proxy client.
|
|
//
|
|
// Implements the spec in docs/LLM_PROXY_CONTRACT.md against arcadia-core's
|
|
// POST /api/v1/ai/llm/chat. The lib (@crema/llm-providers-ui buildAdapter)
|
|
// owns the streaming chat path itself; this module exposes a lightweight
|
|
// non-streaming probe so the Settings "Test connection" button can verify
|
|
// the proxy round-trips end-to-end (auth → secret resolution → upstream
|
|
// dispatch → response shape).
|
|
|
|
import type { ArcadiaClient } from "@crema/arcadia-core-client"
|
|
|
|
export type LLMProxyProvider =
|
|
| "openai"
|
|
| "anthropic"
|
|
| "deepseek"
|
|
| "qwen"
|
|
| "lmstudio"
|
|
|
|
export type LLMProxyErrorCode =
|
|
| "unauthorized"
|
|
| "secret_disabled"
|
|
| "secret_expired"
|
|
| "secret_consumed"
|
|
| "ip_not_allowed"
|
|
| "unknown_provider"
|
|
| "upstream_unavailable"
|
|
| "rate_limited"
|
|
| "unknown"
|
|
|
|
export interface LLMProxyChatRequest {
|
|
provider: LLMProxyProvider
|
|
/** Required for every provider except `lmstudio`. */
|
|
secret_name?: string
|
|
model: string
|
|
messages: Array<{ role: "system" | "user" | "assistant"; content: string }>
|
|
stream?: boolean
|
|
max_tokens?: number
|
|
temperature?: number
|
|
}
|
|
|
|
export interface LLMProxyChatResponse {
|
|
id: string
|
|
object: "chat.completion"
|
|
created: number
|
|
model: string
|
|
choices: Array<{
|
|
index: number
|
|
finish_reason: string | null
|
|
message: { role: "assistant"; content: string; tool_calls: unknown }
|
|
}>
|
|
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number }
|
|
}
|
|
|
|
export class LLMProxyError extends Error {
|
|
readonly code: LLMProxyErrorCode
|
|
readonly status: number
|
|
readonly retryAfter?: number
|
|
|
|
constructor(code: LLMProxyErrorCode, message: string, status: number, retryAfter?: number) {
|
|
super(message)
|
|
this.name = "LLMProxyError"
|
|
this.code = code
|
|
this.status = status
|
|
this.retryAfter = retryAfter
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Non-streaming chat completion via the proxy. The streaming path is owned
|
|
* by @crema/llm-providers-ui's buildAdapter; use this for probes and
|
|
* one-shot calls where SSE is overkill.
|
|
*/
|
|
export async function chat(
|
|
arcadia: ArcadiaClient,
|
|
req: LLMProxyChatRequest,
|
|
): Promise<LLMProxyChatResponse> {
|
|
try {
|
|
const res = await arcadia.POST<LLMProxyChatResponse>(
|
|
"/api/v1/ai/llm/chat",
|
|
{ body: { ...req, stream: false } },
|
|
)
|
|
return res
|
|
} catch (e) {
|
|
throw asProxyError(e)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Cheap end-to-end probe for the Settings "Test connection" flow in proxy
|
|
* mode. Sends a 1-token "ping" and reports whether the proxy is wired,
|
|
* the secret resolves, and the upstream answered. Intentionally tolerant
|
|
* of token-budget rejections — those still prove the round-trip works.
|
|
*/
|
|
export async function probeProxy(
|
|
arcadia: ArcadiaClient,
|
|
opts: { provider: LLMProxyProvider; model: string; secretName?: string },
|
|
): Promise<{ ok: boolean; message: string }> {
|
|
try {
|
|
const res = await chat(arcadia, {
|
|
provider: opts.provider,
|
|
secret_name: opts.secretName,
|
|
model: opts.model,
|
|
messages: [{ role: "user", content: "ping" }],
|
|
max_tokens: 1,
|
|
stream: false,
|
|
})
|
|
const used = res.usage?.total_tokens
|
|
return {
|
|
ok: true,
|
|
message: `Proxy OK — ${res.model}${used != null ? ` · ${used} tokens` : ""}.`,
|
|
}
|
|
} catch (e) {
|
|
if (e instanceof LLMProxyError) {
|
|
return { ok: false, message: friendly(e) }
|
|
}
|
|
return { ok: false, message: e instanceof Error ? e.message : String(e) }
|
|
}
|
|
}
|
|
|
|
function asProxyError(e: unknown): LLMProxyError {
|
|
// ArcadiaClient throws ArcadiaError with a wrapped { error: { code, message } }
|
|
// body and HTTP status. Best-effort destructure without coupling to the
|
|
// class shape (it lives in a sibling lib).
|
|
if (e && typeof e === "object") {
|
|
const anyE = e as {
|
|
status?: number
|
|
code?: string
|
|
message?: string
|
|
body?: { error?: { code?: string; message?: string } }
|
|
headers?: Headers | Record<string, string>
|
|
}
|
|
const status = anyE.status ?? 0
|
|
const code = (anyE.body?.error?.code ?? anyE.code) as LLMProxyErrorCode | undefined
|
|
const message = anyE.body?.error?.message ?? anyE.message ?? "Proxy request failed."
|
|
const retryAfter = readRetryAfter(anyE.headers)
|
|
return new LLMProxyError(code ?? inferCodeFromStatus(status), message, status, retryAfter)
|
|
}
|
|
return new LLMProxyError("unknown", String(e), 0)
|
|
}
|
|
|
|
function inferCodeFromStatus(status: number): LLMProxyErrorCode {
|
|
if (status === 401) return "unauthorized"
|
|
if (status === 403) return "ip_not_allowed"
|
|
if (status === 404) return "unknown_provider"
|
|
if (status === 410) return "secret_expired"
|
|
if (status === 429) return "rate_limited"
|
|
if (status === 502 || status === 503 || status === 504) return "upstream_unavailable"
|
|
return "unknown"
|
|
}
|
|
|
|
function readRetryAfter(h: Headers | Record<string, string> | undefined): number | undefined {
|
|
if (!h) return undefined
|
|
const raw = h instanceof Headers ? h.get("retry-after") : h["retry-after"] ?? h["Retry-After"]
|
|
if (!raw) return undefined
|
|
const n = Number(raw)
|
|
return Number.isFinite(n) ? n : undefined
|
|
}
|
|
|
|
export function friendly(err: LLMProxyError): string {
|
|
switch (err.code) {
|
|
case "unauthorized":
|
|
return "Sign in expired — refresh and try again."
|
|
case "secret_disabled":
|
|
return "The vault secret is disabled. Re-enable it under /secrets."
|
|
case "secret_expired":
|
|
return "The vault secret has expired. Rotate it under /secrets."
|
|
case "secret_consumed":
|
|
return "Read-once secret already used. Rotate it under /secrets."
|
|
case "ip_not_allowed":
|
|
return "This client's IP is blocked by the secret's allowlist."
|
|
case "unknown_provider":
|
|
return "The proxy doesn't recognise this provider. Check the provider id."
|
|
case "upstream_unavailable":
|
|
return "The upstream LLM provider returned an error or timed out."
|
|
case "rate_limited":
|
|
return err.retryAfter
|
|
? `Rate limited. Retry in ${err.retryAfter}s.`
|
|
: "Rate limited — slow down and try again."
|
|
default:
|
|
return err.message
|
|
}
|
|
}
|