arcadia-admin/app/lib/arcadia/llm-proxy.ts

// Arcadia LLM proxy client.
//
// Implements the spec in docs/LLM_PROXY_CONTRACT.md against arcadia-core's
// POST /api/v1/ai/llm/chat. The lib (@crema/llm-providers-ui buildAdapter)
// owns the streaming chat path itself; this module exposes a lightweight
// non-streaming probe so the Settings "Test connection" button can verify
// the proxy round-trips end-to-end (auth → secret resolution → upstream
// dispatch → response shape).

import type { ArcadiaClient } from "@crema/arcadia-core-client"

export type LLMProxyProvider =
  | "openai"
  | "anthropic"
  | "deepseek"
  | "qwen"
  | "lmstudio"

export type LLMProxyErrorCode =
  | "unauthorized"
  | "secret_disabled"
  | "secret_expired"
  | "secret_consumed"
  | "ip_not_allowed"
  | "unknown_provider"
  | "upstream_unavailable"
  | "rate_limited"
  | "unknown"

export interface LLMProxyChatRequest {
  provider: LLMProxyProvider
  /** Required for every provider except `lmstudio`. */
  secret_name?: string
  model: string
  messages: Array<{ role: "system" | "user" | "assistant"; content: string }>
  stream?: boolean
  max_tokens?: number
  temperature?: number
}

export interface LLMProxyChatResponse {
  id: string
  object: "chat.completion"
  created: number
  model: string
  choices: Array<{
    index: number
    finish_reason: string | null
    message: { role: "assistant"; content: string; tool_calls: unknown }
  }>
  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number }
}

export class LLMProxyError extends Error {
  readonly code: LLMProxyErrorCode
  readonly status: number
  readonly retryAfter?: number

  constructor(code: LLMProxyErrorCode, message: string, status: number, retryAfter?: number) {
    super(message)
    this.name = "LLMProxyError"
    this.code = code
    this.status = status
    this.retryAfter = retryAfter
  }
}

/**
 * Non-streaming chat completion via the proxy. The streaming path is owned
 * by @crema/llm-providers-ui's buildAdapter; use this for probes and
 * one-shot calls where SSE is overkill.
 */
export async function chat(
  arcadia: ArcadiaClient,
  req: LLMProxyChatRequest,
): Promise<LLMProxyChatResponse> {
  try {
    const res = await arcadia.POST<LLMProxyChatResponse>(
      "/api/v1/ai/llm/chat",
      { body: { ...req, stream: false } },
    )
    return res
  } catch (e) {
    throw asProxyError(e)
  }
}

/**
 * Cheap end-to-end probe for the Settings "Test connection" flow in proxy
 * mode. Sends a 1-token "ping" and reports whether the proxy is wired,
 * the secret resolves, and the upstream answered. Intentionally tolerant
 * of token-budget rejections — those still prove the round-trip works.
 */
export async function probeProxy(
  arcadia: ArcadiaClient,
  opts: { provider: LLMProxyProvider; model: string; secretName?: string },
): Promise<{ ok: boolean; message: string }> {
  try {
    const res = await chat(arcadia, {
      provider: opts.provider,
      secret_name: opts.secretName,
      model: opts.model,
      messages: [{ role: "user", content: "ping" }],
      max_tokens: 1,
      stream: false,
    })
    const used = res.usage?.total_tokens
    return {
      ok: true,
      message: `Proxy OK — ${res.model}${used != null ? ` · ${used} tokens` : ""}.`,
    }
  } catch (e) {
    if (e instanceof LLMProxyError) {
      return { ok: false, message: friendly(e) }
    }
    return { ok: false, message: e instanceof Error ? e.message : String(e) }
  }
}

function asProxyError(e: unknown): LLMProxyError {
  // ArcadiaClient throws ArcadiaError with a wrapped { error: { code, message } }
  // body and HTTP status. Best-effort destructure without coupling to the
  // class shape (it lives in a sibling lib).
  if (e && typeof e === "object") {
    const anyE = e as {
      status?: number
      code?: string
      message?: string
      body?: { error?: { code?: string; message?: string } }
      headers?: Headers | Record<string, string>
    }
    const status = anyE.status ?? 0
    const code = (anyE.body?.error?.code ?? anyE.code) as LLMProxyErrorCode | undefined
    const message = anyE.body?.error?.message ?? anyE.message ?? "Proxy request failed."
    const retryAfter = readRetryAfter(anyE.headers)
    return new LLMProxyError(code ?? inferCodeFromStatus(status), message, status, retryAfter)
  }
  return new LLMProxyError("unknown", String(e), 0)
}

function inferCodeFromStatus(status: number): LLMProxyErrorCode {
  if (status === 401) return "unauthorized"
  if (status === 403) return "ip_not_allowed"
  if (status === 404) return "unknown_provider"
  if (status === 410) return "secret_expired"
  if (status === 429) return "rate_limited"
  if (status === 502 || status === 503 || status === 504) return "upstream_unavailable"
  return "unknown"
}

function readRetryAfter(h: Headers | Record<string, string> | undefined): number | undefined {
  if (!h) return undefined
  const raw = h instanceof Headers ? h.get("retry-after") : h["retry-after"] ?? h["Retry-After"]
  if (!raw) return undefined
  const n = Number(raw)
  return Number.isFinite(n) ? n : undefined
}

export function friendly(err: LLMProxyError): string {
  switch (err.code) {
    case "unauthorized":
      return "Sign in expired — refresh and try again."
    case "secret_disabled":
      return "The vault secret is disabled. Re-enable it under /secrets."
    case "secret_expired":
      return "The vault secret has expired. Rotate it under /secrets."
    case "secret_consumed":
      return "Read-once secret already used. Rotate it under /secrets."
    case "ip_not_allowed":
      return "This client's IP is blocked by the secret's allowlist."
    case "unknown_provider":
      return "The proxy doesn't recognise this provider. Check the provider id."
    case "upstream_unavailable":
      return "The upstream LLM provider returned an error or timed out."
    case "rate_limited":
      return err.retryAfter
        ? `Rate limited. Retry in ${err.retryAfter}s.`
        : "Rate limited — slow down and try again."
    default:
      return err.message
  }
}