Wire @crema/llm-providers-ui: multi-provider picker + AI persistence

Replaces the single-base-URL LLM settings with the new providers lib (OpenAI, Anthropic, DeepSeek, Qwen, LM Studio). Settings/LLM hosts the catalog-aware card; the /ai route builds adapters via buildAdapter() and resolves API keys from the arcadia vault per-call (direct mode). Anthropic skips the /v1/models probe (no such endpoint) and uses catalog defaults; failed probes for keyed providers fall back to the catalog instead of dropping to mock. AI conversation now persists across navigation and refresh via a new crema.ai.live localStorage key (separate from the compact-snapshot key). useChat hydrates from initialMessages on mount, saves on every change, and "Clear conversation" wipes both state and storage. Vite needs explicit resolve.alias for @crema/llm-ui and @crema/llm-providers-ui — when a sibling lib imports another @crema/*, tsconfigPaths can't resolve it (the importing file isn't in this project's tsconfig scope). Adds docs/LLM_PROXY_CONTRACT.md describing the POST /api/v1/ai/llm/chat endpoint the backend needs for proxy mode (keys never leave the server). Direct mode works against today's arcadia; proxy mode unblocks once that endpoint ships. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 22:50:23 +10:00
parent a907e25a7c
commit 7ba415d78e
6 changed files with 439 additions and 221 deletions
--- a/app/app.css
+++ b/app/app.css
@@ -18,6 +18,7 @@
@source "../../lib-feedback-ui/src";
@source "../../lib-auth-ui/src";
@source "../../lib-agent-ui/src";
+@source "../../lib-llm-providers-ui/src";
 /* CREMA:SOURCES */

@custom-variant dark (&:is(.dark *));
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -1,7 +1,6 @@
 import {
  useCallback,
  useEffect,
-  useMemo,
  useRef,
  useState,
 } from "react"
@@ -29,12 +28,16 @@ import {
 import {
  LLMProvider,
  MockLLM,
-  OpenAICompatibleAdapter,
  listModels,
  useChat,
  useCompletion,
  type LLMAdapter,
 } from "@crema/llm-ui"
+import {
+  buildAdapter,
+  getProvider,
+  useSettings as useProviderSettings,
+} from "@crema/llm-providers-ui"
 import { TypingIndicator } from "@crema/chat-ui"

 import { AppShell } from "~/components/layout/app-shell"
@@ -51,7 +54,6 @@ import {
  PopoverContent,
  PopoverTrigger,
 } from "~/components/ui/popover"
-import { useLLMSettings } from "~/lib/llm-settings"
 import {
  loadActiveAgentId,
  saveActiveAgentId,
@@ -87,6 +89,37 @@ function ToolResultBlock({ name, result }: { name: string; result: unknown }) {
 }

 const SNAPSHOT_KEY = "crema.ai.snapshot"
+// Separate key for the live conversation that survives navigation. The
+// compact snapshot is reserved for the user-triggered Compact/Restore flow.
+const LIVE_KEY = "crema.ai.live"
+
+function loadLive(): LLMMessage[] | null {
+  if (typeof window === "undefined") return null
+  try {
+    const raw = localStorage.getItem(LIVE_KEY)
+    if (!raw) return null
+    const parsed = JSON.parse(raw)
+    if (Array.isArray(parsed)) return parsed as LLMMessage[]
+  } catch {}
+  return null
+}
+function saveLive(msgs: LLMMessage[]) {
+  if (typeof window === "undefined") return
+  if (msgs.length === 0) {
+    localStorage.removeItem(LIVE_KEY)
+    return
+  }
+  try {
+    localStorage.setItem(LIVE_KEY, JSON.stringify(msgs))
+  } catch {
+    // Quota exceeded or similar — silently drop persistence.
+  }
+}
+function clearLive() {
+  if (typeof window === "undefined") return
+  localStorage.removeItem(LIVE_KEY)
+}
+
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
  if (typeof window === "undefined") return null
@@ -146,13 +179,16 @@ function withTimeout<T>(p: Promise<T>, ms: number, signal: AbortSignal) {
 }

 export default function AIRoute() {
-  const settings = useLLMSettings()
+  const settings = useProviderSettings()
+  const arcadia = useArcadiaClient()
+  const provider = getProvider(settings.providerId)
  const agents = useAgents()
  const [status, setStatus] = useState<Status>({ kind: "probing" })
  const [model, setModel] = useState<string>(() => {
    if (typeof window === "undefined") return ""
    return localStorage.getItem(MODEL_KEY) ?? ""
  })
+  const [adapter, setAdapter] = useState<LLMAdapter>(mockAdapter)
  const [activeAgentId, setActiveAgentIdState] = useState<string>(() =>
    loadActiveAgentId(),
  )
@@ -163,28 +199,110 @@ export default function AIRoute() {
  const activeAgent =
    agents.find((a) => a.id === activeAgentId) ?? agents[0]

+  // When the user changes provider/model in Settings, follow along.
+  useEffect(() => {
+    if (settings.model) setModel(settings.model)
+  }, [settings.providerId, settings.model])
+
+  // Resolve the API key from the vault (direct mode) or build the proxy
+  // adapter (proxy mode), then refresh the model list.
  const probe = useCallback(() => {
    const ac = new AbortController()
    setStatus({ kind: "probing" })
-    withTimeout(
-      listModels({ baseURL: settings.baseURL, signal: ac.signal }),
+
+    const resolveSecret = async (name: string): Promise<string> => {
+      const res = await arcadia.GET<{ data: { value: string } }>(
+        `/api/v1/secrets/${encodeURIComponent(name)}`,
+      )
+      return res.data.value
+    }
+
+    const arcadiaBaseURL =
+      (import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
+    const arcadiaTenantId =
+      (import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
+    const arcadiaAuthToken =
+      typeof window !== "undefined"
+        ? sessionStorage.getItem("arcadia_access_token") ?? undefined
+        : undefined
+    ;(async () => {
+      // Build the adapter first so chat works even if the model probe fails.
+      try {
+        const a = await buildAdapter({
+          settings,
+          resolveSecret,
+          arcadiaBaseURL,
+          arcadiaAuthToken,
+          arcadiaTenantId,
+        })
+        setAdapter(a)
+      } catch {
+        setAdapter(mockAdapter)
+      }
+
+      // Probe for a live model list. Anthropic has no /models endpoint, so
+      // fall back to the provider catalog's default models.
+      if (provider.transport === "anthropic") {
+        const ids = provider.defaultModels.length
+          ? provider.defaultModels
+          : ["claude-opus-4-7"]
+        setStatus({ kind: "live", models: ids })
+        setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
+        return
+      }
+
+      const baseURL = settings.baseURL || provider.baseURL
+      let apiKey: string | undefined
+      if (provider.requiresKey && settings.secretName) {
+        try {
+          apiKey = await resolveSecret(settings.secretName)
+        } catch {
+          // Fall through; listModels may still work for some providers without a key.
+        }
+      }
+
+      try {
+        const rows = await withTimeout(
+          listModels({ baseURL, apiKey, signal: ac.signal }),
          PROBE_TIMEOUT_MS,
          ac.signal,
        )
-      .then((rows) => {
        const ids = rows.map((m) => m.id)
        if (ids.length === 0) {
          setStatus({ kind: "mock", reason: "endpoint returned no models" })
          return
        }
        setStatus({ kind: "live", models: ids })
-        setModel((cur) => (cur && ids.includes(cur) ? cur : ids[0]))
-      })
-      .catch(() => {
+        setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
+      } catch {
+        // Probe failed but adapter may still be usable; show the catalog default
+        // models so the user can pick one and just try sending.
+        if (provider.defaultModels.length) {
+          setStatus({ kind: "live", models: provider.defaultModels })
+          setModel((cur) =>
+            cur && provider.defaultModels.includes(cur)
+              ? cur
+              : settings.model || provider.defaultModels[0],
+          )
+        } else {
          setStatus({ kind: "mock", reason: "endpoint unreachable" })
-      })
+        }
+      }
+    })()
+
    return () => ac.abort()
-  }, [settings.baseURL])
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [
+    arcadia,
+    settings.providerId,
+    settings.baseURL,
+    settings.secretName,
+    settings.mode,
+    settings.model,
+    provider.transport,
+    provider.baseURL,
+    provider.requiresKey,
+  ])

  useEffect(() => probe(), [probe])

@@ -192,16 +310,6 @@ export default function AIRoute() {
    if (model) localStorage.setItem(MODEL_KEY, model)
  }, [model])

-  const adapter: LLMAdapter = useMemo(() => {
-    if (status.kind === "live") {
-      return new OpenAICompatibleAdapter({
-        baseURL: settings.baseURL,
-        apiKey: settings.apiKey || "lm-studio",
-      })
-    }
-    return mockAdapter
-  }, [status.kind, settings.baseURL, settings.apiKey])
-
  const activeModel =
    status.kind === "live" ? model || status.models[0] : "mock"

@@ -256,10 +364,29 @@ function ChatSurface({
    .filter(Boolean)
    .join("\n\n")
  const arcadia = useArcadiaClient()
+  // Hydrate from the persisted live conversation so navigating away and
+  // back doesn't reset the chat. Read once on mount.
+  const initialLive = useRef<LLMMessage[] | null>(null)
+  if (initialLive.current === null) {
+    initialLive.current = loadLive() ?? []
+  }
  const { messages, setMessages, send, continueChat, abort, isStreaming, reset } = useChat({
    system: systemPrompt,
+    initialMessages: initialLive.current,
  })

+  // Persist on every change. Streaming partials get saved too, which is what
+  // we want — refreshing mid-stream restores the partial assistant message.
+  useEffect(() => {
+    saveLive(messages)
+  }, [messages])
+
+  // Wrap reset so "Clear conversation" also drops the persisted snapshot.
+  const resetAndClear = useCallback(() => {
+    reset()
+    clearLive()
+  }, [reset])
+
  // Auto tool-loop using native function calls. Reads run automatically;
  // writes are held in `pendingConfirm` until the operator clicks Confirm
  // or Deny in the inline ConfirmCard.
@@ -642,7 +769,7 @@ function ChatSurface({
            onSaveToLibrary={saveToLibrary}
            onShowPrompt={() => setShowPromptOpen(true)}
            onRetryProbe={onRetryProbe}
-            onClear={reset}
+            onClear={resetAndClear}
            hasMessages={messages.length > 0}
            hasUserMessage={messages.some((m) => m.role === "user")}
            hasCompactSnapshot={hasCompactSnapshot}
--- a/app/routes/settings.tsx
+++ b/app/routes/settings.tsx
@@ -1,8 +1,5 @@
 import { useEffect, useState } from "react"
 import {
-  Check,
-  X,
-  Loader2,
  Cpu,
  Palette,
  User as UserIcon,
@@ -12,6 +9,14 @@ import {
  Trash2,
 } from "lucide-react"
 import { listModels } from "@crema/llm-ui"
+import {
+  buildAdapter,
+  LLMProvidersSettingsCard,
+  resetSettings as resetProviderSettings,
+  useSettings as useProviderSettings,
+  type LLMProvidersSettings,
+} from "@crema/llm-providers-ui"
+import { useArcadiaClient } from "@crema/arcadia-client"

 import { AppShell } from "~/components/layout/app-shell"
 import { Button } from "~/components/ui/button"
@@ -22,15 +27,6 @@ import {
  CardHeader,
  CardTitle,
 } from "~/components/ui/card"
-import { Input } from "~/components/ui/input"
-import { Textarea } from "~/components/ui/textarea"
-import {
-  DEFAULT_SETTINGS,
-  DEFAULT_SYSTEM_PROMPT,
-  saveLLMSettings,
-  useLLMSettings,
-  type LLMSettings,
-} from "~/lib/llm-settings"
 import {
  loadActiveAgentId,
  newAgentId,
@@ -71,51 +67,92 @@ const sections: {
  { id: "about", label: "About", icon: Info, description: "Version & credits" },
 ]

-type TestState =
-  | { kind: "idle" }
-  | { kind: "running" }
-  | { kind: "ok"; count: number }
-  | { kind: "fail"; reason: string }
-
 export default function SettingsRoute() {
-  const settings = useLLMSettings()
-  const [draft, setDraft] = useState<LLMSettings>(settings)
-  const [savedAt, setSavedAt] = useState<number | null>(null)
-  const [test, setTest] = useState<TestState>({ kind: "idle" })
+  const arcadia = useArcadiaClient()

-  useEffect(() => {
-    setDraft(settings)
-  }, [settings])
-
-  const runTest = async () => {
-    setTest({ kind: "running" })
-    const ac = new AbortController()
-    const timeout = setTimeout(() => ac.abort(), 4000)
+  const testConnection = async (
+    s: LLMProvidersSettings,
+  ): Promise<{ ok: boolean; message: string }> => {
    try {
-      const rows = await listModels({ baseURL: draft.baseURL, signal: ac.signal })
-      setTest({ kind: "ok", count: rows.length })
-    } catch (e) {
-      setTest({
-        kind: "fail",
-        reason: e instanceof Error ? e.message : String(e),
+      const arcadiaBaseURL =
+        (import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
+      const arcadiaTenantId =
+        (import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
+      const arcadiaAuthToken =
+        typeof window !== "undefined"
+          ? sessionStorage.getItem("arcadia_access_token") ?? undefined
+          : undefined
+
+      const adapter = await buildAdapter({
+        settings: s,
+        // Direct-mode resolver — fetches the API key from the vault.
+        resolveSecret: async (name) => {
+          const res = await arcadia.GET<{ data: { value: string } }>(
+            `/api/v1/secrets/${encodeURIComponent(name)}`,
+          )
+          return res.data.value
+        },
+        // Proxy-mode coordinates.
+        arcadiaBaseURL,
+        arcadiaAuthToken,
+        arcadiaTenantId,
      })
+
+      // In proxy mode the adapter just being built is the strongest signal we
+      // can get without actually firing a chat request — the proxy endpoint
+      // doesn't exist on the backend yet, so any /models probe would 404.
+      if (s.mode === "proxy") {
+        return {
+          ok: true,
+          message:
+            "Adapter built. Note: the backend proxy (/api/v1/ai/llm/chat) isn't deployed yet — see docs/LLM_PROXY_CONTRACT.md.",
+        }
+      }
+
+      // Direct mode — for OpenAI-compatible endpoints, /models is a cheap probe.
+      if (s.providerId !== "anthropic") {
+        const baseURL =
+          s.baseURL ||
+          (s.providerId === "lmstudio"
+            ? "http://localhost:1234/v1"
+            : s.providerId === "openai"
+              ? "https://api.openai.com/v1"
+              : s.providerId === "deepseek"
+                ? "https://api.deepseek.com/v1"
+                : "https://dashscope-intl.aliyuncs.com/compatible-mode/v1")
+        // Resolve key for the probe (lmstudio doesn't need one).
+        let apiKey: string | undefined
+        if (s.providerId !== "lmstudio" && s.secretName) {
+          try {
+            const res = await arcadia.GET<{ data: { value: string } }>(
+              `/api/v1/secrets/${encodeURIComponent(s.secretName)}`,
+            )
+            apiKey = res.data.value
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : String(err)
+            if (/404|not[_ ]found/i.test(msg)) {
+              return {
+                ok: false,
+                message: `No vault secret named "${s.secretName}". Create it under /secrets first (paste the API key as the Value), then enter the secret's name here.`,
+              }
+            }
+            throw err
+          }
+        }
+        const ac = new AbortController()
+        const t = setTimeout(() => ac.abort(), 5000)
+        try {
+          const rows = await listModels({ baseURL, apiKey, signal: ac.signal })
+          return { ok: true, message: `Connected. ${rows.length} model(s) reachable.` }
        } finally {
-      clearTimeout(timeout)
+          clearTimeout(t)
        }
      }
-
-  const dirty =
-    draft.baseURL !== settings.baseURL ||
-    draft.contextTokens !== settings.contextTokens ||
-    draft.responseBudget !== settings.responseBudget
-
-  const save = () => {
-    saveLLMSettings(draft)
-    setSavedAt(Date.now())
+      // Anthropic doesn't expose a /models list; we just confirm adapter built.
+      return { ok: true, message: `Adapter ready (${adapter.label ?? adapter.id}).` }
+    } catch (e) {
+      return { ok: false, message: e instanceof Error ? e.message : String(e) }
    }
-
-  const reset = () => {
-    setDraft(DEFAULT_SETTINGS)
  }

  const [section, setSection] = useState<SectionId>(() => {
@@ -173,151 +210,36 @@ export default function SettingsRoute() {

        <div className="min-w-0">
          {section === "llm" && (
+            <div className="flex flex-col gap-4">
              <Card>
                <CardHeader>
                  <CardTitle>LLM</CardTitle>
                  <CardDescription>
-                  Configure the local model endpoint and context budgets used
-                  by the Assistant.
+                    Pick a provider, model, and the arcadia-vault secret holding the API key. Settings
+                    auto-save as you type. The Assistant picks them up on the next message.
                  </CardDescription>
                </CardHeader>
-              <CardContent className="flex flex-col gap-5">
-                <Field
-                  label="Base URL"
-                  hint="OpenAI-compatible endpoint. LM Studio defaults to http://localhost:1234/v1."
-                >
-                  <Input
-                    data-action="settings-base-url"
-                    value={draft.baseURL}
-                    onChange={(e) =>
-                      setDraft((d) => ({ ...d, baseURL: e.target.value }))
-                    }
-                    placeholder="http://localhost:1234/v1"
-                    spellCheck={false}
-                    autoComplete="off"
+                <CardContent>
+                  <LLMProvidersSettingsCard
+                    onTest={testConnection}
+                    hideTransportToggle={false}
                  />
-                </Field>
+                </CardContent>
+              </Card>

-                <Field
-                  label="Context window (tokens)"
-                  hint="Match this to the context length you've loaded in LM Studio."
-                >
-                  <Input
-                    data-action="settings-context-tokens"
-                    type="number"
-                    min={1024}
-                    step={512}
-                    value={draft.contextTokens}
-                    onChange={(e) =>
-                      setDraft((d) => ({
-                        ...d,
-                        contextTokens:
-                          Number(e.target.value) || d.contextTokens,
-                      }))
-                    }
-                  />
-                </Field>
-
-                <Field
-                  label="System prompt"
-                  hint="Sent at the start of every conversation. Shapes the assistant's persona and scope. UI Control adds an action-driving preface on top of this when enabled."
-                >
-                  <Textarea
-                    data-action="settings-system-prompt"
-                    value={draft.systemPrompt}
-                    onChange={(e) =>
-                      setDraft((d) => ({ ...d, systemPrompt: e.target.value }))
-                    }
-                    rows={5}
-                    spellCheck={false}
-                    className="min-h-24 font-mono text-xs"
-                  />
-                  <button
-                    type="button"
-                    data-action="settings-system-prompt-reset"
-                    onClick={() =>
-                      setDraft((d) => ({
-                        ...d,
-                        systemPrompt: DEFAULT_SYSTEM_PROMPT,
-                      }))
-                    }
-                    className="self-start text-xs text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
-                  >
-                    Reset to default prompt
-                  </button>
-                </Field>
-
-                <Field
-                  label="Response cap (max tokens)"
-                  hint="Upper bound on each model reply. Smaller = faster, less rambling."
-                >
-                  <Input
-                    data-action="settings-response-budget"
-                    type="number"
-                    min={64}
-                    step={64}
-                    value={draft.responseBudget}
-                    onChange={(e) =>
-                      setDraft((d) => ({
-                        ...d,
-                        responseBudget:
-                          Number(e.target.value) || d.responseBudget,
-                      }))
-                    }
-                  />
-                </Field>
-
-                <div className="flex flex-wrap items-center gap-2">
+              <div className="flex items-center gap-2">
                <Button
-                    data-action="settings-save"
-                    onClick={save}
-                    disabled={!dirty}
-                  >
-                    Save
-                  </Button>
-                  <Button
-                    data-action="settings-test"
                  variant="outline"
-                    onClick={runTest}
-                    disabled={test.kind === "running"}
-                  >
-                    {test.kind === "running" ? (
-                      <Loader2 className="size-4 animate-spin" />
-                    ) : test.kind === "ok" ? (
-                      <Check className="size-4 text-emerald-600" />
-                    ) : test.kind === "fail" ? (
-                      <X className="size-4 text-destructive" />
-                    ) : null}
-                    Test connection
-                  </Button>
-                  <Button
+                  onClick={() => resetProviderSettings()}
                  data-action="settings-reset"
-                    variant="outline"
-                    onClick={reset}
                >
                  Reset to defaults
                </Button>
-                  {savedAt && !dirty && (
-                    <span className="text-sm text-muted-foreground">
-                      Saved.
+                <span className="text-xs text-muted-foreground">
+                  Need to manage stored keys? See <a href="/secrets" className="underline">Secrets</a>.
                </span>
-                  )}
-                  {test.kind === "ok" && (
-                    <span className="text-sm text-emerald-700 dark:text-emerald-400">
-                      {test.count} model{test.count === 1 ? "" : "s"} available.
-                    </span>
-                  )}
-                  {test.kind === "fail" && (
-                    <span
-                      className="text-sm text-destructive"
-                      title={test.reason}
-                    >
-                      Failed: {test.reason.slice(0, 60)}
-                    </span>
-                  )}
              </div>
-              </CardContent>
-            </Card>
+            </div>
          )}

          {section === "agents" && <AgentsPanel />}
--- a/docs/LLM_PROXY_CONTRACT.md
+++ b/docs/LLM_PROXY_CONTRACT.md
@@ -0,0 +1,158 @@
+# LLM Proxy Contract
+
+> **Status: not yet implemented on the backend.** This document is the contract that `lib-llm-providers-ui` expects from arcadia. Implement `POST /api/v1/ai/llm/chat` server-side to make `mode: "proxy"` work in the client.
+
+## Why a proxy?
+
+The Settings UI ships in two transport modes:
+
+- **`direct`** — the browser fetches the API key from arcadia's vault (`GET /api/v1/secrets/:name`), then calls OpenAI/Anthropic/DeepSeek/Qwen directly. Works today, but the key briefly lives in browser memory and the prompt contents go straight to the upstream provider with no opportunity for arcadia to log, meter, or rewrite them.
+- **`proxy`** — the browser sends the chat request to arcadia, which reads the secret server-side and calls the upstream provider. Keys never leave arcadia. This is what production should use.
+
+This contract only covers the proxy mode.
+
+## Endpoint
+
+```
+POST /api/v1/ai/llm/chat
+Authorization: Bearer <arcadia session token>
+X-Tenant-ID:   <tenant id>
+Content-Type:  application/json
+```
+
+The path is `/api/v1/ai/llm/chat` so it lives under the existing `/api/v1/ai/*` scope (next to `embeddings`, `tools`, `llm/usage`).
+
+## Request body
+
+The shape is OpenAI's chat-completion request, **plus** two arcadia-specific fields:
+
+```json
+{
+  "provider":    "openai",
+  "secret_name": "llm-openai-api-key",
+  "model":       "gpt-4o-mini",
+  "messages": [
+    { "role": "system", "content": "You are a helpful assistant." },
+    { "role": "user",   "content": "Hello!" }
+  ],
+  "stream":      true,
+  "max_tokens":  1024,
+  "temperature": 0.7,
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "search_docs",
+        "description": "...",
+        "parameters": { "type": "object", "properties": {} }
+      }
+    }
+  ],
+  "tool_choice": "auto"
+}
+```
+
+### Provider-specific fields
+
+| Field         | Type                                            | Notes |
+|---------------|-------------------------------------------------|-------|
+| `provider`    | `"openai" \| "anthropic" \| "deepseek" \| "qwen" \| "lmstudio"` | Selects the upstream backend. |
+| `secret_name` | `string` (optional for `lmstudio`)              | Name of the vault secret holding the upstream API key. The proxy resolves it via the same `Secrets.get/3` used for tenant-facing reads. |
+
+The proxy must:
+1. Authenticate the arcadia session.
+2. Resolve `secret_name` for the current tenant (or fall back to platform-level). Refuse the call if the secret is disabled, expired, or IP-blocked. The existing `Arcadia.Secrets.get/3` already returns the right error codes.
+3. Map the request to the upstream's native shape (Anthropic's `/v1/messages` differs from OpenAI's `/v1/chat/completions`).
+4. Forward it with the resolved key as the upstream's expected auth header (`Authorization: Bearer <key>` for OpenAI/DeepSeek/Qwen, `x-api-key: <key>` + `anthropic-version: 2023-06-01` for Anthropic).
+5. Stream the response back as **OpenAI-shape SSE** regardless of upstream. (See "Response — streaming" below.)
+6. Record a usage row via the existing `POST /ai/llm/usage` after each completion.
+
+## Response — non-streaming (`stream: false`)
+
+OpenAI chat-completion shape, returned as a single JSON document:
+
+```json
+{
+  "id": "chatcmpl-...",
+  "object": "chat.completion",
+  "created": 1714512000,
+  "model": "gpt-4o-mini",
+  "choices": [
+    {
+      "index": 0,
+      "finish_reason": "stop",
+      "message": {
+        "role": "assistant",
+        "content": "Hi there!",
+        "tool_calls": null
+      }
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 12,
+    "completion_tokens": 4,
+    "total_tokens": 16
+  }
+}
+```
+
+For Anthropic upstream, translate `usage.input_tokens` / `output_tokens` → `prompt_tokens` / `completion_tokens` and combine `content` blocks into a single string (or surface `tool_use` blocks via `tool_calls`).
+
+## Response — streaming (`stream: true`)
+
+Server-Sent Events, one event per delta, terminated with `data: [DONE]`. Each `data:` line is JSON of OpenAI's chat-completion *delta* shape:
+
+```
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
+
+data: [DONE]
+```
+
+For Anthropic upstream, translate `content_block_delta` events of type `text_delta` into delta `content` strings, and `message_stop` into the `finish_reason: "stop"` event. Tool calls translate `content_block_start` of type `tool_use` (with id + name) and the streaming JSON arguments into OpenAI-shape `delta.tool_calls` entries.
+
+The client uses the OpenAI parser in `@crema/llm-ui` (`OpenAICompatibleAdapter.stream()`), so any deviation from this shape will manifest as missing tokens or hung streams.
+
+## Errors
+
+Use the existing `ArcadiaWeb.FallbackController` envelope:
+
+```json
+{ "error": { "code": "secret_disabled", "message": "Secret is disabled" } }
+```
+
+Specific codes the client distinguishes:
+
+| HTTP | code                    | When |
+|------|-------------------------|------|
+| 401  | `unauthorized`          | Missing / invalid arcadia session. |
+| 403  | `secret_disabled`       | Vault returned `:disabled`. |
+| 410  | `secret_expired`        | Vault returned `:expired`. |
+| 410  | `secret_consumed`       | Read-once secret already consumed. |
+| 403  | `ip_not_allowed`        | Caller IP blocked by the vault allowlist. |
+| 404  | `unknown_provider`      | `provider` field not in the supported set. |
+| 502  | `upstream_unavailable`  | Upstream returned 5xx or timed out. |
+| 429  | `rate_limited`          | Either arcadia or upstream returned 429. Pass through `Retry-After` if present. |
+
+## Auth
+
+The proxy must verify the arcadia session bearer the same way the rest of `/api/v1/*` does. The vault read uses the **caller's tenant context**, so platform-admin sessions can use platform-level secrets and tenant sessions can use their own — no special privilege required beyond what `/api/v1/secrets/:name` already enforces.
+
+## Usage tracking
+
+After each completion (success or failure), write a row via the existing `POST /api/v1/ai/llm/usage` (or call the equivalent context module directly inside the proxy). Required fields on that endpoint already include model, prompt_tokens, completion_tokens, latency_ms — the proxy can fill them from the upstream response.
+
+## Test fixture
+
+A minimal Mix test in `apps/arcadia_core/test/arcadia_web/controllers/api/ai_controller_test.exs` should cover:
+
+- 200 with stream off, OpenAI upstream stubbed via Bypass.
+- 200 with stream on, Anthropic upstream stubbed; assert SSE chunks carry OpenAI-shape JSON.
+- 403 when the named secret is disabled.
+- 404 when `provider: "unknown"`.
+- Usage row written on the success cases.
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -40,6 +40,8 @@
      "@crema/auth-ui/*": ["../lib-auth-ui/src/*"],
      "@crema/agent-ui": ["../lib-agent-ui/src/index.tsx"],
      "@crema/agent-ui/*": ["../lib-agent-ui/src/*"],
+      "@crema/llm-providers-ui": ["../lib-llm-providers-ui/src/index.tsx"],
+      "@crema/llm-providers-ui/*": ["../lib-llm-providers-ui/src/*"],
      "// CREMA:PATHS": [""],
      "react": ["./node_modules/@types/react"],
      "react/*": ["./node_modules/@types/react/*"],
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -65,6 +65,12 @@ const arcadiaClientSrc = fileURLToPath(
 const arcadiaAuthUiSrc = fileURLToPath(
  new URL("../lib-arcadia-auth-ui/src", import.meta.url),
 )
+const llmUiSrc = fileURLToPath(
+  new URL("../lib-llm-ui/src", import.meta.url),
+)
+const llmProvidersUiSrc = fileURLToPath(
+  new URL("../lib-llm-providers-ui/src", import.meta.url),
+)

 // Sibling lib packages (lib-content-ui, lib-content-editor-ui) import bare
 // deps like clsx and @tiptap/* but have no node_modules of their own. Pin
@@ -118,6 +124,8 @@ export default defineConfig({
      "@crema/search-ui": `${searchUiSrc}/index.tsx`,
      "@crema/arcadia-client": `${arcadiaClientSrc}/index.tsx`,
      "@crema/arcadia-auth-ui": `${arcadiaAuthUiSrc}/index.tsx`,
+      "@crema/llm-ui": `${llmUiSrc}/index.tsx`,
+      "@crema/llm-providers-ui": `${llmProvidersUiSrc}/index.tsx`,
      ...sharedDepAliases,
    },
    dedupe: dedupeDeps,