From 7ba415d78edbed2b82930d0faac8cb576e0adf5f Mon Sep 17 00:00:00 2001
From: jules <cloudtech@juleslive.net>
Date: Fri, 1 May 2026 22:50:23 +1000
Subject: [PATCH] Wire @crema/llm-providers-ui: multi-provider picker + AI
 persistence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the single-base-URL LLM settings with the new providers lib
(OpenAI, Anthropic, DeepSeek, Qwen, LM Studio). Settings/LLM hosts the
catalog-aware card; the /ai route builds adapters via buildAdapter()
and resolves API keys from the arcadia vault per-call (direct mode).
Anthropic skips the /v1/models probe (no such endpoint) and uses
catalog defaults; failed probes for keyed providers fall back to the
catalog instead of dropping to mock.

AI conversation now persists across navigation and refresh via a new
crema.ai.live localStorage key (separate from the compact-snapshot
key). useChat hydrates from initialMessages on mount, saves on every
change, and "Clear conversation" wipes both state and storage.

Vite needs explicit resolve.alias for @crema/llm-ui and
@crema/llm-providers-ui — when a sibling lib imports another @crema/*,
tsconfigPaths can't resolve it (the importing file isn't in this
project's tsconfig scope).

Adds docs/LLM_PROXY_CONTRACT.md describing the
POST /api/v1/ai/llm/chat endpoint the backend needs for proxy mode
(keys never leave the server). Direct mode works against today's
arcadia; proxy mode unblocks once that endpoint ships.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/app.css                |   1 +
 app/routes/ai.tsx          | 181 ++++++++++++++++++----
 app/routes/settings.tsx    | 310 ++++++++++++++-----------------------
 docs/LLM_PROXY_CONTRACT.md | 158 +++++++++++++++++++
 tsconfig.json              |   2 +
 vite.config.ts             |   8 +
 6 files changed, 439 insertions(+), 221 deletions(-)
 create mode 100644 docs/LLM_PROXY_CONTRACT.md

diff --git a/app/app.css b/app/app.css
index 83d11e1..38dba09 100644
--- a/app/app.css
+++ b/app/app.css
@@ -18,6 +18,7 @@
 @source "../../lib-feedback-ui/src";
 @source "../../lib-auth-ui/src";
 @source "../../lib-agent-ui/src";
+@source "../../lib-llm-providers-ui/src";
 /* CREMA:SOURCES */
 
 @custom-variant dark (&:is(.dark *));
diff --git a/app/routes/ai.tsx b/app/routes/ai.tsx
index 08be413..127f165 100644
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -1,7 +1,6 @@
 import {
   useCallback,
   useEffect,
-  useMemo,
   useRef,
   useState,
 } from "react"
@@ -29,12 +28,16 @@ import {
 import {
   LLMProvider,
   MockLLM,
-  OpenAICompatibleAdapter,
   listModels,
   useChat,
   useCompletion,
   type LLMAdapter,
 } from "@crema/llm-ui"
+import {
+  buildAdapter,
+  getProvider,
+  useSettings as useProviderSettings,
+} from "@crema/llm-providers-ui"
 import { TypingIndicator } from "@crema/chat-ui"
 
 import { AppShell } from "~/components/layout/app-shell"
@@ -51,7 +54,6 @@ import {
   PopoverContent,
   PopoverTrigger,
 } from "~/components/ui/popover"
-import { useLLMSettings } from "~/lib/llm-settings"
 import {
   loadActiveAgentId,
   saveActiveAgentId,
@@ -87,6 +89,37 @@ function ToolResultBlock({ name, result }: { name: string; result: unknown }) {
 }
 
 const SNAPSHOT_KEY = "crema.ai.snapshot"
+// Separate key for the live conversation that survives navigation. The
+// compact snapshot is reserved for the user-triggered Compact/Restore flow.
+const LIVE_KEY = "crema.ai.live"
+
+function loadLive(): LLMMessage[] | null {
+  if (typeof window === "undefined") return null
+  try {
+    const raw = localStorage.getItem(LIVE_KEY)
+    if (!raw) return null
+    const parsed = JSON.parse(raw)
+    if (Array.isArray(parsed)) return parsed as LLMMessage[]
+  } catch {}
+  return null
+}
+function saveLive(msgs: LLMMessage[]) {
+  if (typeof window === "undefined") return
+  if (msgs.length === 0) {
+    localStorage.removeItem(LIVE_KEY)
+    return
+  }
+  try {
+    localStorage.setItem(LIVE_KEY, JSON.stringify(msgs))
+  } catch {
+    // Quota exceeded or similar — silently drop persistence.
+  }
+}
+function clearLive() {
+  if (typeof window === "undefined") return
+  localStorage.removeItem(LIVE_KEY)
+}
+
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
   if (typeof window === "undefined") return null
@@ -146,13 +179,16 @@ function withTimeout<T>(p: Promise<T>, ms: number, signal: AbortSignal) {
 }
 
 export default function AIRoute() {
-  const settings = useLLMSettings()
+  const settings = useProviderSettings()
+  const arcadia = useArcadiaClient()
+  const provider = getProvider(settings.providerId)
   const agents = useAgents()
   const [status, setStatus] = useState<Status>({ kind: "probing" })
   const [model, setModel] = useState<string>(() => {
     if (typeof window === "undefined") return ""
     return localStorage.getItem(MODEL_KEY) ?? ""
   })
+  const [adapter, setAdapter] = useState<LLMAdapter>(mockAdapter)
   const [activeAgentId, setActiveAgentIdState] = useState<string>(() =>
     loadActiveAgentId(),
   )
@@ -163,28 +199,110 @@ export default function AIRoute() {
   const activeAgent =
     agents.find((a) => a.id === activeAgentId) ?? agents[0]
 
+  // When the user changes provider/model in Settings, follow along.
+  useEffect(() => {
+    if (settings.model) setModel(settings.model)
+  }, [settings.providerId, settings.model])
+
+  // Resolve the API key from the vault (direct mode) or build the proxy
+  // adapter (proxy mode), then refresh the model list.
   const probe = useCallback(() => {
     const ac = new AbortController()
     setStatus({ kind: "probing" })
-    withTimeout(
-      listModels({ baseURL: settings.baseURL, signal: ac.signal }),
-      PROBE_TIMEOUT_MS,
-      ac.signal,
-    )
-      .then((rows) => {
+
+    const resolveSecret = async (name: string): Promise<string> => {
+      const res = await arcadia.GET<{ data: { value: string } }>(
+        `/api/v1/secrets/${encodeURIComponent(name)}`,
+      )
+      return res.data.value
+    }
+
+    const arcadiaBaseURL =
+      (import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
+    const arcadiaTenantId =
+      (import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
+    const arcadiaAuthToken =
+      typeof window !== "undefined"
+        ? sessionStorage.getItem("arcadia_access_token") ?? undefined
+        : undefined
+    ;(async () => {
+      // Build the adapter first so chat works even if the model probe fails.
+      try {
+        const a = await buildAdapter({
+          settings,
+          resolveSecret,
+          arcadiaBaseURL,
+          arcadiaAuthToken,
+          arcadiaTenantId,
+        })
+        setAdapter(a)
+      } catch {
+        setAdapter(mockAdapter)
+      }
+
+      // Probe for a live model list. Anthropic has no /models endpoint, so
+      // fall back to the provider catalog's default models.
+      if (provider.transport === "anthropic") {
+        const ids = provider.defaultModels.length
+          ? provider.defaultModels
+          : ["claude-opus-4-7"]
+        setStatus({ kind: "live", models: ids })
+        setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
+        return
+      }
+
+      const baseURL = settings.baseURL || provider.baseURL
+      let apiKey: string | undefined
+      if (provider.requiresKey && settings.secretName) {
+        try {
+          apiKey = await resolveSecret(settings.secretName)
+        } catch {
+          // Fall through; listModels may still work for some providers without a key.
+        }
+      }
+
+      try {
+        const rows = await withTimeout(
+          listModels({ baseURL, apiKey, signal: ac.signal }),
+          PROBE_TIMEOUT_MS,
+          ac.signal,
+        )
         const ids = rows.map((m) => m.id)
         if (ids.length === 0) {
           setStatus({ kind: "mock", reason: "endpoint returned no models" })
           return
         }
         setStatus({ kind: "live", models: ids })
-        setModel((cur) => (cur && ids.includes(cur) ? cur : ids[0]))
-      })
-      .catch(() => {
-        setStatus({ kind: "mock", reason: "endpoint unreachable" })
-      })
+        setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
+      } catch {
+        // Probe failed but adapter may still be usable; show the catalog default
+        // models so the user can pick one and just try sending.
+        if (provider.defaultModels.length) {
+          setStatus({ kind: "live", models: provider.defaultModels })
+          setModel((cur) =>
+            cur && provider.defaultModels.includes(cur)
+              ? cur
+              : settings.model || provider.defaultModels[0],
+          )
+        } else {
+          setStatus({ kind: "mock", reason: "endpoint unreachable" })
+        }
+      }
+    })()
+
     return () => ac.abort()
-  }, [settings.baseURL])
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [
+    arcadia,
+    settings.providerId,
+    settings.baseURL,
+    settings.secretName,
+    settings.mode,
+    settings.model,
+    provider.transport,
+    provider.baseURL,
+    provider.requiresKey,
+  ])
 
   useEffect(() => probe(), [probe])
 
@@ -192,16 +310,6 @@ export default function AIRoute() {
     if (model) localStorage.setItem(MODEL_KEY, model)
   }, [model])
 
-  const adapter: LLMAdapter = useMemo(() => {
-    if (status.kind === "live") {
-      return new OpenAICompatibleAdapter({
-        baseURL: settings.baseURL,
-        apiKey: settings.apiKey || "lm-studio",
-      })
-    }
-    return mockAdapter
-  }, [status.kind, settings.baseURL, settings.apiKey])
-
   const activeModel =
     status.kind === "live" ? model || status.models[0] : "mock"
 
@@ -256,10 +364,29 @@ function ChatSurface({
     .filter(Boolean)
     .join("\n\n")
   const arcadia = useArcadiaClient()
+  // Hydrate from the persisted live conversation so navigating away and
+  // back doesn't reset the chat. Read once on mount.
+  const initialLive = useRef<LLMMessage[] | null>(null)
+  if (initialLive.current === null) {
+    initialLive.current = loadLive() ?? []
+  }
   const { messages, setMessages, send, continueChat, abort, isStreaming, reset } = useChat({
     system: systemPrompt,
+    initialMessages: initialLive.current,
   })
 
+  // Persist on every change. Streaming partials get saved too, which is what
+  // we want — refreshing mid-stream restores the partial assistant message.
+  useEffect(() => {
+    saveLive(messages)
+  }, [messages])
+
+  // Wrap reset so "Clear conversation" also drops the persisted snapshot.
+  const resetAndClear = useCallback(() => {
+    reset()
+    clearLive()
+  }, [reset])
+
   // Auto tool-loop using native function calls. Reads run automatically;
   // writes are held in `pendingConfirm` until the operator clicks Confirm
   // or Deny in the inline ConfirmCard.
@@ -642,7 +769,7 @@ function ChatSurface({
             onSaveToLibrary={saveToLibrary}
             onShowPrompt={() => setShowPromptOpen(true)}
             onRetryProbe={onRetryProbe}
-            onClear={reset}
+            onClear={resetAndClear}
             hasMessages={messages.length > 0}
             hasUserMessage={messages.some((m) => m.role === "user")}
             hasCompactSnapshot={hasCompactSnapshot}
diff --git a/app/routes/settings.tsx b/app/routes/settings.tsx
index 6fec2e3..256997b 100644
--- a/app/routes/settings.tsx
+++ b/app/routes/settings.tsx
@@ -1,8 +1,5 @@
 import { useEffect, useState } from "react"
 import {
-  Check,
-  X,
-  Loader2,
   Cpu,
   Palette,
   User as UserIcon,
@@ -12,6 +9,14 @@ import {
   Trash2,
 } from "lucide-react"
 import { listModels } from "@crema/llm-ui"
+import {
+  buildAdapter,
+  LLMProvidersSettingsCard,
+  resetSettings as resetProviderSettings,
+  useSettings as useProviderSettings,
+  type LLMProvidersSettings,
+} from "@crema/llm-providers-ui"
+import { useArcadiaClient } from "@crema/arcadia-client"
 
 import { AppShell } from "~/components/layout/app-shell"
 import { Button } from "~/components/ui/button"
@@ -22,15 +27,6 @@ import {
   CardHeader,
   CardTitle,
 } from "~/components/ui/card"
-import { Input } from "~/components/ui/input"
-import { Textarea } from "~/components/ui/textarea"
-import {
-  DEFAULT_SETTINGS,
-  DEFAULT_SYSTEM_PROMPT,
-  saveLLMSettings,
-  useLLMSettings,
-  type LLMSettings,
-} from "~/lib/llm-settings"
 import {
   loadActiveAgentId,
   newAgentId,
@@ -71,53 +67,94 @@ const sections: {
   { id: "about", label: "About", icon: Info, description: "Version & credits" },
 ]
 
-type TestState =
-  | { kind: "idle" }
-  | { kind: "running" }
-  | { kind: "ok"; count: number }
-  | { kind: "fail"; reason: string }
-
 export default function SettingsRoute() {
-  const settings = useLLMSettings()
-  const [draft, setDraft] = useState<LLMSettings>(settings)
-  const [savedAt, setSavedAt] = useState<number | null>(null)
-  const [test, setTest] = useState<TestState>({ kind: "idle" })
+  const arcadia = useArcadiaClient()
 
-  useEffect(() => {
-    setDraft(settings)
-  }, [settings])
-
-  const runTest = async () => {
-    setTest({ kind: "running" })
-    const ac = new AbortController()
-    const timeout = setTimeout(() => ac.abort(), 4000)
+  const testConnection = async (
+    s: LLMProvidersSettings,
+  ): Promise<{ ok: boolean; message: string }> => {
     try {
-      const rows = await listModels({ baseURL: draft.baseURL, signal: ac.signal })
-      setTest({ kind: "ok", count: rows.length })
-    } catch (e) {
-      setTest({
-        kind: "fail",
-        reason: e instanceof Error ? e.message : String(e),
+      const arcadiaBaseURL =
+        (import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
+      const arcadiaTenantId =
+        (import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
+      const arcadiaAuthToken =
+        typeof window !== "undefined"
+          ? sessionStorage.getItem("arcadia_access_token") ?? undefined
+          : undefined
+
+      const adapter = await buildAdapter({
+        settings: s,
+        // Direct-mode resolver — fetches the API key from the vault.
+        resolveSecret: async (name) => {
+          const res = await arcadia.GET<{ data: { value: string } }>(
+            `/api/v1/secrets/${encodeURIComponent(name)}`,
+          )
+          return res.data.value
+        },
+        // Proxy-mode coordinates.
+        arcadiaBaseURL,
+        arcadiaAuthToken,
+        arcadiaTenantId,
       })
-    } finally {
-      clearTimeout(timeout)
+
+      // In proxy mode the adapter just being built is the strongest signal we
+      // can get without actually firing a chat request — the proxy endpoint
+      // doesn't exist on the backend yet, so any /models probe would 404.
+      if (s.mode === "proxy") {
+        return {
+          ok: true,
+          message:
+            "Adapter built. Note: the backend proxy (/api/v1/ai/llm/chat) isn't deployed yet — see docs/LLM_PROXY_CONTRACT.md.",
+        }
+      }
+
+      // Direct mode — for OpenAI-compatible endpoints, /models is a cheap probe.
+      if (s.providerId !== "anthropic") {
+        const baseURL =
+          s.baseURL ||
+          (s.providerId === "lmstudio"
+            ? "http://localhost:1234/v1"
+            : s.providerId === "openai"
+              ? "https://api.openai.com/v1"
+              : s.providerId === "deepseek"
+                ? "https://api.deepseek.com/v1"
+                : "https://dashscope-intl.aliyuncs.com/compatible-mode/v1")
+        // Resolve key for the probe (lmstudio doesn't need one).
+        let apiKey: string | undefined
+        if (s.providerId !== "lmstudio" && s.secretName) {
+          try {
+            const res = await arcadia.GET<{ data: { value: string } }>(
+              `/api/v1/secrets/${encodeURIComponent(s.secretName)}`,
+            )
+            apiKey = res.data.value
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : String(err)
+            if (/404|not[_ ]found/i.test(msg)) {
+              return {
+                ok: false,
+                message: `No vault secret named "${s.secretName}". Create it under /secrets first (paste the API key as the Value), then enter the secret's name here.`,
+              }
+            }
+            throw err
+          }
+        }
+        const ac = new AbortController()
+        const t = setTimeout(() => ac.abort(), 5000)
+        try {
+          const rows = await listModels({ baseURL, apiKey, signal: ac.signal })
+          return { ok: true, message: `Connected. ${rows.length} model(s) reachable.` }
+        } finally {
+          clearTimeout(t)
+        }
+      }
+      // Anthropic doesn't expose a /models list; we just confirm adapter built.
+      return { ok: true, message: `Adapter ready (${adapter.label ?? adapter.id}).` }
+    } catch (e) {
+      return { ok: false, message: e instanceof Error ? e.message : String(e) }
     }
   }
 
-  const dirty =
-    draft.baseURL !== settings.baseURL ||
-    draft.contextTokens !== settings.contextTokens ||
-    draft.responseBudget !== settings.responseBudget
-
-  const save = () => {
-    saveLLMSettings(draft)
-    setSavedAt(Date.now())
-  }
-
-  const reset = () => {
-    setDraft(DEFAULT_SETTINGS)
-  }
-
   const [section, setSection] = useState<SectionId>(() => {
     if (typeof window === "undefined") return "llm"
     const stored = localStorage.getItem(SECTION_KEY)
@@ -173,151 +210,36 @@ export default function SettingsRoute() {
 
         <div className="min-w-0">
           {section === "llm" && (
-            <Card>
-              <CardHeader>
-                <CardTitle>LLM</CardTitle>
-                <CardDescription>
-                  Configure the local model endpoint and context budgets used
-                  by the Assistant.
-                </CardDescription>
-              </CardHeader>
-              <CardContent className="flex flex-col gap-5">
-                <Field
-                  label="Base URL"
-                  hint="OpenAI-compatible endpoint. LM Studio defaults to http://localhost:1234/v1."
-                >
-                  <Input
-                    data-action="settings-base-url"
-                    value={draft.baseURL}
-                    onChange={(e) =>
-                      setDraft((d) => ({ ...d, baseURL: e.target.value }))
-                    }
-                    placeholder="http://localhost:1234/v1"
-                    spellCheck={false}
-                    autoComplete="off"
+            <div className="flex flex-col gap-4">
+              <Card>
+                <CardHeader>
+                  <CardTitle>LLM</CardTitle>
+                  <CardDescription>
+                    Pick a provider, model, and the arcadia-vault secret holding the API key. Settings
+                    auto-save as you type. The Assistant picks them up on the next message.
+                  </CardDescription>
+                </CardHeader>
+                <CardContent>
+                  <LLMProvidersSettingsCard
+                    onTest={testConnection}
+                    hideTransportToggle={false}
                   />
-                </Field>
+                </CardContent>
+              </Card>
 
-                <Field
-                  label="Context window (tokens)"
-                  hint="Match this to the context length you've loaded in LM Studio."
+              <div className="flex items-center gap-2">
+                <Button
+                  variant="outline"
+                  onClick={() => resetProviderSettings()}
+                  data-action="settings-reset"
                 >
-                  <Input
-                    data-action="settings-context-tokens"
-                    type="number"
-                    min={1024}
-                    step={512}
-                    value={draft.contextTokens}
-                    onChange={(e) =>
-                      setDraft((d) => ({
-                        ...d,
-                        contextTokens:
-                          Number(e.target.value) || d.contextTokens,
-                      }))
-                    }
-                  />
-                </Field>
-
-                <Field
-                  label="System prompt"
-                  hint="Sent at the start of every conversation. Shapes the assistant's persona and scope. UI Control adds an action-driving preface on top of this when enabled."
-                >
-                  <Textarea
-                    data-action="settings-system-prompt"
-                    value={draft.systemPrompt}
-                    onChange={(e) =>
-                      setDraft((d) => ({ ...d, systemPrompt: e.target.value }))
-                    }
-                    rows={5}
-                    spellCheck={false}
-                    className="min-h-24 font-mono text-xs"
-                  />
-                  <button
-                    type="button"
-                    data-action="settings-system-prompt-reset"
-                    onClick={() =>
-                      setDraft((d) => ({
-                        ...d,
-                        systemPrompt: DEFAULT_SYSTEM_PROMPT,
-                      }))
-                    }
-                    className="self-start text-xs text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
-                  >
-                    Reset to default prompt
-                  </button>
-                </Field>
-
-                <Field
-                  label="Response cap (max tokens)"
-                  hint="Upper bound on each model reply. Smaller = faster, less rambling."
-                >
-                  <Input
-                    data-action="settings-response-budget"
-                    type="number"
-                    min={64}
-                    step={64}
-                    value={draft.responseBudget}
-                    onChange={(e) =>
-                      setDraft((d) => ({
-                        ...d,
-                        responseBudget:
-                          Number(e.target.value) || d.responseBudget,
-                      }))
-                    }
-                  />
-                </Field>
-
-                <div className="flex flex-wrap items-center gap-2">
-                  <Button
-                    data-action="settings-save"
-                    onClick={save}
-                    disabled={!dirty}
-                  >
-                    Save
-                  </Button>
-                  <Button
-                    data-action="settings-test"
-                    variant="outline"
-                    onClick={runTest}
-                    disabled={test.kind === "running"}
-                  >
-                    {test.kind === "running" ? (
-                      <Loader2 className="size-4 animate-spin" />
-                    ) : test.kind === "ok" ? (
-                      <Check className="size-4 text-emerald-600" />
-                    ) : test.kind === "fail" ? (
-                      <X className="size-4 text-destructive" />
-                    ) : null}
-                    Test connection
-                  </Button>
-                  <Button
-                    data-action="settings-reset"
-                    variant="outline"
-                    onClick={reset}
-                  >
-                    Reset to defaults
-                  </Button>
-                  {savedAt && !dirty && (
-                    <span className="text-sm text-muted-foreground">
-                      Saved.
-                    </span>
-                  )}
-                  {test.kind === "ok" && (
-                    <span className="text-sm text-emerald-700 dark:text-emerald-400">
-                      {test.count} model{test.count === 1 ? "" : "s"} available.
-                    </span>
-                  )}
-                  {test.kind === "fail" && (
-                    <span
-                      className="text-sm text-destructive"
-                      title={test.reason}
-                    >
-                      Failed: {test.reason.slice(0, 60)}
-                    </span>
-                  )}
-                </div>
-              </CardContent>
-            </Card>
+                  Reset to defaults
+                </Button>
+                <span className="text-xs text-muted-foreground">
+                  Need to manage stored keys? See <a href="/secrets" className="underline">Secrets</a>.
+                </span>
+              </div>
+            </div>
           )}
 
           {section === "agents" && <AgentsPanel />}
diff --git a/docs/LLM_PROXY_CONTRACT.md b/docs/LLM_PROXY_CONTRACT.md
new file mode 100644
index 0000000..5a0c9d0
--- /dev/null
+++ b/docs/LLM_PROXY_CONTRACT.md
@@ -0,0 +1,158 @@
+# LLM Proxy Contract
+
+> **Status: not yet implemented on the backend.** This document is the contract that `lib-llm-providers-ui` expects from arcadia. Implement `POST /api/v1/ai/llm/chat` server-side to make `mode: "proxy"` work in the client.
+
+## Why a proxy?
+
+The Settings UI ships in two transport modes:
+
+- **`direct`** — the browser fetches the API key from arcadia's vault (`GET /api/v1/secrets/:name`), then calls OpenAI/Anthropic/DeepSeek/Qwen directly. Works today, but the key briefly lives in browser memory and the prompt contents go straight to the upstream provider with no opportunity for arcadia to log, meter, or rewrite them.
+- **`proxy`** — the browser sends the chat request to arcadia, which reads the secret server-side and calls the upstream provider. Keys never leave arcadia. This is what production should use.
+
+This contract only covers the proxy mode.
+
+## Endpoint
+
+```
+POST /api/v1/ai/llm/chat
+Authorization: Bearer <arcadia session token>
+X-Tenant-ID:   <tenant id>
+Content-Type:  application/json
+```
+
+The path is `/api/v1/ai/llm/chat` so it lives under the existing `/api/v1/ai/*` scope (next to `embeddings`, `tools`, `llm/usage`).
+
+## Request body
+
+The shape is OpenAI's chat-completion request, **plus** two arcadia-specific fields:
+
+```json
+{
+  "provider":    "openai",
+  "secret_name": "llm-openai-api-key",
+  "model":       "gpt-4o-mini",
+  "messages": [
+    { "role": "system", "content": "You are a helpful assistant." },
+    { "role": "user",   "content": "Hello!" }
+  ],
+  "stream":      true,
+  "max_tokens":  1024,
+  "temperature": 0.7,
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "search_docs",
+        "description": "...",
+        "parameters": { "type": "object", "properties": {} }
+      }
+    }
+  ],
+  "tool_choice": "auto"
+}
+```
+
+### Provider-specific fields
+
+| Field         | Type                                            | Notes |
+|---------------|-------------------------------------------------|-------|
+| `provider`    | `"openai" \| "anthropic" \| "deepseek" \| "qwen" \| "lmstudio"` | Selects the upstream backend. |
+| `secret_name` | `string` (optional for `lmstudio`)              | Name of the vault secret holding the upstream API key. The proxy resolves it via the same `Secrets.get/3` used for tenant-facing reads. |
+
+The proxy must:
+1. Authenticate the arcadia session.
+2. Resolve `secret_name` for the current tenant (or fall back to platform-level). Refuse the call if the secret is disabled, expired, or IP-blocked. The existing `Arcadia.Secrets.get/3` already returns the right error codes.
+3. Map the request to the upstream's native shape (Anthropic's `/v1/messages` differs from OpenAI's `/v1/chat/completions`).
+4. Forward it with the resolved key as the upstream's expected auth header (`Authorization: Bearer <key>` for OpenAI/DeepSeek/Qwen, `x-api-key: <key>` + `anthropic-version: 2023-06-01` for Anthropic).
+5. Stream the response back as **OpenAI-shape SSE** regardless of upstream. (See "Response — streaming" below.)
+6. Record a usage row via the existing `POST /ai/llm/usage` after each completion.
+
+## Response — non-streaming (`stream: false`)
+
+OpenAI chat-completion shape, returned as a single JSON document:
+
+```json
+{
+  "id": "chatcmpl-...",
+  "object": "chat.completion",
+  "created": 1714512000,
+  "model": "gpt-4o-mini",
+  "choices": [
+    {
+      "index": 0,
+      "finish_reason": "stop",
+      "message": {
+        "role": "assistant",
+        "content": "Hi there!",
+        "tool_calls": null
+      }
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 12,
+    "completion_tokens": 4,
+    "total_tokens": 16
+  }
+}
+```
+
+For Anthropic upstream, translate `usage.input_tokens` / `output_tokens` → `prompt_tokens` / `completion_tokens` and combine `content` blocks into a single string (or surface `tool_use` blocks via `tool_calls`).
+
+## Response — streaming (`stream: true`)
+
+Server-Sent Events, one event per delta, terminated with `data: [DONE]`. Each `data:` line is JSON of OpenAI's chat-completion *delta* shape:
+
+```
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
+
+data: [DONE]
+```
+
+For Anthropic upstream, translate `content_block_delta` events of type `text_delta` into delta `content` strings, and `message_stop` into the `finish_reason: "stop"` event. Tool calls translate `content_block_start` of type `tool_use` (with id + name) and the streaming JSON arguments into OpenAI-shape `delta.tool_calls` entries.
+
+The client uses the OpenAI parser in `@crema/llm-ui` (`OpenAICompatibleAdapter.stream()`), so any deviation from this shape will manifest as missing tokens or hung streams.
+
+## Errors
+
+Use the existing `ArcadiaWeb.FallbackController` envelope:
+
+```json
+{ "error": { "code": "secret_disabled", "message": "Secret is disabled" } }
+```
+
+Specific codes the client distinguishes:
+
+| HTTP | code                    | When |
+|------|-------------------------|------|
+| 401  | `unauthorized`          | Missing / invalid arcadia session. |
+| 403  | `secret_disabled`       | Vault returned `:disabled`. |
+| 410  | `secret_expired`        | Vault returned `:expired`. |
+| 410  | `secret_consumed`       | Read-once secret already consumed. |
+| 403  | `ip_not_allowed`        | Caller IP blocked by the vault allowlist. |
+| 404  | `unknown_provider`      | `provider` field not in the supported set. |
+| 502  | `upstream_unavailable`  | Upstream returned 5xx or timed out. |
+| 429  | `rate_limited`          | Either arcadia or upstream returned 429. Pass through `Retry-After` if present. |
+
+## Auth
+
+The proxy must verify the arcadia session bearer the same way the rest of `/api/v1/*` does. The vault read uses the **caller's tenant context**, so platform-admin sessions can use platform-level secrets and tenant sessions can use their own — no special privilege required beyond what `/api/v1/secrets/:name` already enforces.
+
+## Usage tracking
+
+After each completion (success or failure), write a row via the existing `POST /api/v1/ai/llm/usage` (or call the equivalent context module directly inside the proxy). Required fields on that endpoint already include model, prompt_tokens, completion_tokens, latency_ms — the proxy can fill them from the upstream response.
+
+## Test fixture
+
+A minimal Mix test in `apps/arcadia_core/test/arcadia_web/controllers/api/ai_controller_test.exs` should cover:
+
+- 200 with stream off, OpenAI upstream stubbed via Bypass.
+- 200 with stream on, Anthropic upstream stubbed; assert SSE chunks carry OpenAI-shape JSON.
+- 403 when the named secret is disabled.
+- 404 when `provider: "unknown"`.
+- Usage row written on the success cases.
diff --git a/tsconfig.json b/tsconfig.json
index 13eb69c..9623a2a 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -40,6 +40,8 @@
       "@crema/auth-ui/*": ["../lib-auth-ui/src/*"],
       "@crema/agent-ui": ["../lib-agent-ui/src/index.tsx"],
       "@crema/agent-ui/*": ["../lib-agent-ui/src/*"],
+      "@crema/llm-providers-ui": ["../lib-llm-providers-ui/src/index.tsx"],
+      "@crema/llm-providers-ui/*": ["../lib-llm-providers-ui/src/*"],
       "// CREMA:PATHS": [""],
       "react": ["./node_modules/@types/react"],
       "react/*": ["./node_modules/@types/react/*"],
diff --git a/vite.config.ts b/vite.config.ts
index 89190c4..067ea3d 100644
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -65,6 +65,12 @@ const arcadiaClientSrc = fileURLToPath(
 const arcadiaAuthUiSrc = fileURLToPath(
   new URL("../lib-arcadia-auth-ui/src", import.meta.url),
 )
+const llmUiSrc = fileURLToPath(
+  new URL("../lib-llm-ui/src", import.meta.url),
+)
+const llmProvidersUiSrc = fileURLToPath(
+  new URL("../lib-llm-providers-ui/src", import.meta.url),
+)
 
 // Sibling lib packages (lib-content-ui, lib-content-editor-ui) import bare
 // deps like clsx and @tiptap/* but have no node_modules of their own. Pin
@@ -118,6 +124,8 @@ export default defineConfig({
       "@crema/search-ui": `${searchUiSrc}/index.tsx`,
       "@crema/arcadia-client": `${arcadiaClientSrc}/index.tsx`,
       "@crema/arcadia-auth-ui": `${arcadiaAuthUiSrc}/index.tsx`,
+      "@crema/llm-ui": `${llmUiSrc}/index.tsx`,
+      "@crema/llm-providers-ui": `${llmProvidersUiSrc}/index.tsx`,
       ...sharedDepAliases,
     },
     dedupe: dedupeDeps,