Wire @crema/llm-providers-ui: multi-provider picker + AI persistence

Replaces the single-base-URL LLM settings with the new providers lib (OpenAI, Anthropic, DeepSeek, Qwen, LM Studio). Settings/LLM hosts the catalog-aware card; the /ai route builds adapters via buildAdapter() and resolves API keys from the arcadia vault per-call (direct mode). Anthropic skips the /v1/models probe (no such endpoint) and uses catalog defaults; failed probes for keyed providers fall back to the catalog instead of dropping to mock. AI conversation now persists across navigation and refresh via a new crema.ai.live localStorage key (separate from the compact-snapshot key). useChat hydrates from initialMessages on mount, saves on every change, and "Clear conversation" wipes both state and storage. Vite needs explicit resolve.alias for @crema/llm-ui and @crema/llm-providers-ui — when a sibling lib imports another @crema/*, tsconfigPaths can't resolve it (the importing file isn't in this project's tsconfig scope). Adds docs/LLM_PROXY_CONTRACT.md describing the POST /api/v1/ai/llm/chat endpoint the backend needs for proxy mode (keys never leave the server). Direct mode works against today's arcadia; proxy mode unblocks once that endpoint ships. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 22:50:23 +10:00
parent a907e25a7c
commit 7ba415d78e
6 changed files with 439 additions and 221 deletions
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -1,7 +1,6 @@
 import {
  useCallback,
  useEffect,
-  useMemo,
  useRef,
  useState,
 } from "react"
@@ -29,12 +28,16 @@ import {
 import {
  LLMProvider,
  MockLLM,
-  OpenAICompatibleAdapter,
  listModels,
  useChat,
  useCompletion,
  type LLMAdapter,
 } from "@crema/llm-ui"
+import {
+  buildAdapter,
+  getProvider,
+  useSettings as useProviderSettings,
+} from "@crema/llm-providers-ui"
 import { TypingIndicator } from "@crema/chat-ui"

 import { AppShell } from "~/components/layout/app-shell"
@@ -51,7 +54,6 @@ import {
  PopoverContent,
  PopoverTrigger,
 } from "~/components/ui/popover"
-import { useLLMSettings } from "~/lib/llm-settings"
 import {
  loadActiveAgentId,
  saveActiveAgentId,
@@ -87,6 +89,37 @@ function ToolResultBlock({ name, result }: { name: string; result: unknown }) {
 }

 const SNAPSHOT_KEY = "crema.ai.snapshot"
+// Separate key for the live conversation that survives navigation. The
+// compact snapshot is reserved for the user-triggered Compact/Restore flow.
+const LIVE_KEY = "crema.ai.live"
+
+function loadLive(): LLMMessage[] | null {
+  if (typeof window === "undefined") return null
+  try {
+    const raw = localStorage.getItem(LIVE_KEY)
+    if (!raw) return null
+    const parsed = JSON.parse(raw)
+    if (Array.isArray(parsed)) return parsed as LLMMessage[]
+  } catch {}
+  return null
+}
+function saveLive(msgs: LLMMessage[]) {
+  if (typeof window === "undefined") return
+  if (msgs.length === 0) {
+    localStorage.removeItem(LIVE_KEY)
+    return
+  }
+  try {
+    localStorage.setItem(LIVE_KEY, JSON.stringify(msgs))
+  } catch {
+    // Quota exceeded or similar — silently drop persistence.
+  }
+}
+function clearLive() {
+  if (typeof window === "undefined") return
+  localStorage.removeItem(LIVE_KEY)
+}
+
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
  if (typeof window === "undefined") return null
@@ -146,13 +179,16 @@ function withTimeout<T>(p: Promise<T>, ms: number, signal: AbortSignal) {
 }

 export default function AIRoute() {
-  const settings = useLLMSettings()
+  const settings = useProviderSettings()
+  const arcadia = useArcadiaClient()
+  const provider = getProvider(settings.providerId)
  const agents = useAgents()
  const [status, setStatus] = useState<Status>({ kind: "probing" })
  const [model, setModel] = useState<string>(() => {
    if (typeof window === "undefined") return ""
    return localStorage.getItem(MODEL_KEY) ?? ""
  })
+  const [adapter, setAdapter] = useState<LLMAdapter>(mockAdapter)
  const [activeAgentId, setActiveAgentIdState] = useState<string>(() =>
    loadActiveAgentId(),
  )
@@ -163,28 +199,110 @@ export default function AIRoute() {
  const activeAgent =
    agents.find((a) => a.id === activeAgentId) ?? agents[0]

+  // When the user changes provider/model in Settings, follow along.
+  useEffect(() => {
+    if (settings.model) setModel(settings.model)
+  }, [settings.providerId, settings.model])
+
+  // Resolve the API key from the vault (direct mode) or build the proxy
+  // adapter (proxy mode), then refresh the model list.
  const probe = useCallback(() => {
    const ac = new AbortController()
    setStatus({ kind: "probing" })
-    withTimeout(
-      listModels({ baseURL: settings.baseURL, signal: ac.signal }),
-      PROBE_TIMEOUT_MS,
-      ac.signal,
-    )
-      .then((rows) => {
+
+    const resolveSecret = async (name: string): Promise<string> => {
+      const res = await arcadia.GET<{ data: { value: string } }>(
+        `/api/v1/secrets/${encodeURIComponent(name)}`,
+      )
+      return res.data.value
+    }
+
+    const arcadiaBaseURL =
+      (import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
+    const arcadiaTenantId =
+      (import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
+    const arcadiaAuthToken =
+      typeof window !== "undefined"
+        ? sessionStorage.getItem("arcadia_access_token") ?? undefined
+        : undefined
+    ;(async () => {
+      // Build the adapter first so chat works even if the model probe fails.
+      try {
+        const a = await buildAdapter({
+          settings,
+          resolveSecret,
+          arcadiaBaseURL,
+          arcadiaAuthToken,
+          arcadiaTenantId,
+        })
+        setAdapter(a)
+      } catch {
+        setAdapter(mockAdapter)
+      }
+
+      // Probe for a live model list. Anthropic has no /models endpoint, so
+      // fall back to the provider catalog's default models.
+      if (provider.transport === "anthropic") {
+        const ids = provider.defaultModels.length
+          ? provider.defaultModels
+          : ["claude-opus-4-7"]
+        setStatus({ kind: "live", models: ids })
+        setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
+        return
+      }
+
+      const baseURL = settings.baseURL || provider.baseURL
+      let apiKey: string | undefined
+      if (provider.requiresKey && settings.secretName) {
+        try {
+          apiKey = await resolveSecret(settings.secretName)
+        } catch {
+          // Fall through; listModels may still work for some providers without a key.
+        }
+      }
+
+      try {
+        const rows = await withTimeout(
+          listModels({ baseURL, apiKey, signal: ac.signal }),
+          PROBE_TIMEOUT_MS,
+          ac.signal,
+        )
        const ids = rows.map((m) => m.id)
        if (ids.length === 0) {
          setStatus({ kind: "mock", reason: "endpoint returned no models" })
          return
        }
        setStatus({ kind: "live", models: ids })
-        setModel((cur) => (cur && ids.includes(cur) ? cur : ids[0]))
-      })
-      .catch(() => {
-        setStatus({ kind: "mock", reason: "endpoint unreachable" })
-      })
+        setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
+      } catch {
+        // Probe failed but adapter may still be usable; show the catalog default
+        // models so the user can pick one and just try sending.
+        if (provider.defaultModels.length) {
+          setStatus({ kind: "live", models: provider.defaultModels })
+          setModel((cur) =>
+            cur && provider.defaultModels.includes(cur)
+              ? cur
+              : settings.model || provider.defaultModels[0],
+          )
+        } else {
+          setStatus({ kind: "mock", reason: "endpoint unreachable" })
+        }
+      }
+    })()
+
    return () => ac.abort()
-  }, [settings.baseURL])
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [
+    arcadia,
+    settings.providerId,
+    settings.baseURL,
+    settings.secretName,
+    settings.mode,
+    settings.model,
+    provider.transport,
+    provider.baseURL,
+    provider.requiresKey,
+  ])

  useEffect(() => probe(), [probe])

@@ -192,16 +310,6 @@ export default function AIRoute() {
    if (model) localStorage.setItem(MODEL_KEY, model)
  }, [model])

-  const adapter: LLMAdapter = useMemo(() => {
-    if (status.kind === "live") {
-      return new OpenAICompatibleAdapter({
-        baseURL: settings.baseURL,
-        apiKey: settings.apiKey || "lm-studio",
-      })
-    }
-    return mockAdapter
-  }, [status.kind, settings.baseURL, settings.apiKey])
-
  const activeModel =
    status.kind === "live" ? model || status.models[0] : "mock"

@@ -256,10 +364,29 @@ function ChatSurface({
    .filter(Boolean)
    .join("\n\n")
  const arcadia = useArcadiaClient()
+  // Hydrate from the persisted live conversation so navigating away and
+  // back doesn't reset the chat. Read once on mount.
+  const initialLive = useRef<LLMMessage[] | null>(null)
+  if (initialLive.current === null) {
+    initialLive.current = loadLive() ?? []
+  }
  const { messages, setMessages, send, continueChat, abort, isStreaming, reset } = useChat({
    system: systemPrompt,
+    initialMessages: initialLive.current,
  })

+  // Persist on every change. Streaming partials get saved too, which is what
+  // we want — refreshing mid-stream restores the partial assistant message.
+  useEffect(() => {
+    saveLive(messages)
+  }, [messages])
+
+  // Wrap reset so "Clear conversation" also drops the persisted snapshot.
+  const resetAndClear = useCallback(() => {
+    reset()
+    clearLive()
+  }, [reset])
+
  // Auto tool-loop using native function calls. Reads run automatically;
  // writes are held in `pendingConfirm` until the operator clicks Confirm
  // or Deny in the inline ConfirmCard.
@@ -642,7 +769,7 @@ function ChatSurface({
            onSaveToLibrary={saveToLibrary}
            onShowPrompt={() => setShowPromptOpen(true)}
            onRetryProbe={onRetryProbe}
-            onClear={reset}
+            onClear={resetAndClear}
            hasMessages={messages.length > 0}
            hasUserMessage={messages.some((m) => m.role === "user")}
            hasCompactSnapshot={hasCompactSnapshot}
--- a/app/routes/settings.tsx
+++ b/app/routes/settings.tsx
@@ -1,8 +1,5 @@
 import { useEffect, useState } from "react"
 import {
-  Check,
-  X,
-  Loader2,
  Cpu,
  Palette,
  User as UserIcon,
@@ -12,6 +9,14 @@ import {
  Trash2,
 } from "lucide-react"
 import { listModels } from "@crema/llm-ui"
+import {
+  buildAdapter,
+  LLMProvidersSettingsCard,
+  resetSettings as resetProviderSettings,
+  useSettings as useProviderSettings,
+  type LLMProvidersSettings,
+} from "@crema/llm-providers-ui"
+import { useArcadiaClient } from "@crema/arcadia-client"

 import { AppShell } from "~/components/layout/app-shell"
 import { Button } from "~/components/ui/button"
@@ -22,15 +27,6 @@ import {
  CardHeader,
  CardTitle,
 } from "~/components/ui/card"
-import { Input } from "~/components/ui/input"
-import { Textarea } from "~/components/ui/textarea"
-import {
-  DEFAULT_SETTINGS,
-  DEFAULT_SYSTEM_PROMPT,
-  saveLLMSettings,
-  useLLMSettings,
-  type LLMSettings,
-} from "~/lib/llm-settings"
 import {
  loadActiveAgentId,
  newAgentId,
@@ -71,53 +67,94 @@ const sections: {
  { id: "about", label: "About", icon: Info, description: "Version & credits" },
 ]

-type TestState =
-  | { kind: "idle" }
-  | { kind: "running" }
-  | { kind: "ok"; count: number }
-  | { kind: "fail"; reason: string }
-
 export default function SettingsRoute() {
-  const settings = useLLMSettings()
-  const [draft, setDraft] = useState<LLMSettings>(settings)
-  const [savedAt, setSavedAt] = useState<number | null>(null)
-  const [test, setTest] = useState<TestState>({ kind: "idle" })
+  const arcadia = useArcadiaClient()

-  useEffect(() => {
-    setDraft(settings)
-  }, [settings])
-
-  const runTest = async () => {
-    setTest({ kind: "running" })
-    const ac = new AbortController()
-    const timeout = setTimeout(() => ac.abort(), 4000)
+  const testConnection = async (
+    s: LLMProvidersSettings,
+  ): Promise<{ ok: boolean; message: string }> => {
    try {
-      const rows = await listModels({ baseURL: draft.baseURL, signal: ac.signal })
-      setTest({ kind: "ok", count: rows.length })
-    } catch (e) {
-      setTest({
-        kind: "fail",
-        reason: e instanceof Error ? e.message : String(e),
+      const arcadiaBaseURL =
+        (import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
+      const arcadiaTenantId =
+        (import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
+      const arcadiaAuthToken =
+        typeof window !== "undefined"
+          ? sessionStorage.getItem("arcadia_access_token") ?? undefined
+          : undefined
+
+      const adapter = await buildAdapter({
+        settings: s,
+        // Direct-mode resolver — fetches the API key from the vault.
+        resolveSecret: async (name) => {
+          const res = await arcadia.GET<{ data: { value: string } }>(
+            `/api/v1/secrets/${encodeURIComponent(name)}`,
+          )
+          return res.data.value
+        },
+        // Proxy-mode coordinates.
+        arcadiaBaseURL,
+        arcadiaAuthToken,
+        arcadiaTenantId,
      })
-    } finally {
-      clearTimeout(timeout)
+
+      // In proxy mode the adapter just being built is the strongest signal we
+      // can get without actually firing a chat request — the proxy endpoint
+      // doesn't exist on the backend yet, so any /models probe would 404.
+      if (s.mode === "proxy") {
+        return {
+          ok: true,
+          message:
+            "Adapter built. Note: the backend proxy (/api/v1/ai/llm/chat) isn't deployed yet — see docs/LLM_PROXY_CONTRACT.md.",
+        }
+      }
+
+      // Direct mode — for OpenAI-compatible endpoints, /models is a cheap probe.
+      if (s.providerId !== "anthropic") {
+        const baseURL =
+          s.baseURL ||
+          (s.providerId === "lmstudio"
+            ? "http://localhost:1234/v1"
+            : s.providerId === "openai"
+              ? "https://api.openai.com/v1"
+              : s.providerId === "deepseek"
+                ? "https://api.deepseek.com/v1"
+                : "https://dashscope-intl.aliyuncs.com/compatible-mode/v1")
+        // Resolve key for the probe (lmstudio doesn't need one).
+        let apiKey: string | undefined
+        if (s.providerId !== "lmstudio" && s.secretName) {
+          try {
+            const res = await arcadia.GET<{ data: { value: string } }>(
+              `/api/v1/secrets/${encodeURIComponent(s.secretName)}`,
+            )
+            apiKey = res.data.value
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : String(err)
+            if (/404|not[_ ]found/i.test(msg)) {
+              return {
+                ok: false,
+                message: `No vault secret named "${s.secretName}". Create it under /secrets first (paste the API key as the Value), then enter the secret's name here.`,
+              }
+            }
+            throw err
+          }
+        }
+        const ac = new AbortController()
+        const t = setTimeout(() => ac.abort(), 5000)
+        try {
+          const rows = await listModels({ baseURL, apiKey, signal: ac.signal })
+          return { ok: true, message: `Connected. ${rows.length} model(s) reachable.` }
+        } finally {
+          clearTimeout(t)
+        }
+      }
+      // Anthropic doesn't expose a /models list; we just confirm adapter built.
+      return { ok: true, message: `Adapter ready (${adapter.label ?? adapter.id}).` }
+    } catch (e) {
+      return { ok: false, message: e instanceof Error ? e.message : String(e) }
    }
  }

-  const dirty =
-    draft.baseURL !== settings.baseURL ||
-    draft.contextTokens !== settings.contextTokens ||
-    draft.responseBudget !== settings.responseBudget
-
-  const save = () => {
-    saveLLMSettings(draft)
-    setSavedAt(Date.now())
-  }
-
-  const reset = () => {
-    setDraft(DEFAULT_SETTINGS)
-  }
-
  const [section, setSection] = useState<SectionId>(() => {
    if (typeof window === "undefined") return "llm"
    const stored = localStorage.getItem(SECTION_KEY)
@@ -173,151 +210,36 @@ export default function SettingsRoute() {

        <div className="min-w-0">
          {section === "llm" && (
-            <Card>
-              <CardHeader>
-                <CardTitle>LLM</CardTitle>
-                <CardDescription>
-                  Configure the local model endpoint and context budgets used
-                  by the Assistant.
-                </CardDescription>
-              </CardHeader>
-              <CardContent className="flex flex-col gap-5">
-                <Field
-                  label="Base URL"
-                  hint="OpenAI-compatible endpoint. LM Studio defaults to http://localhost:1234/v1."
-                >
-                  <Input
-                    data-action="settings-base-url"
-                    value={draft.baseURL}
-                    onChange={(e) =>
-                      setDraft((d) => ({ ...d, baseURL: e.target.value }))
-                    }
-                    placeholder="http://localhost:1234/v1"
-                    spellCheck={false}
-                    autoComplete="off"
+            <div className="flex flex-col gap-4">
+              <Card>
+                <CardHeader>
+                  <CardTitle>LLM</CardTitle>
+                  <CardDescription>
+                    Pick a provider, model, and the arcadia-vault secret holding the API key. Settings
+                    auto-save as you type. The Assistant picks them up on the next message.
+                  </CardDescription>
+                </CardHeader>
+                <CardContent>
+                  <LLMProvidersSettingsCard
+                    onTest={testConnection}
+                    hideTransportToggle={false}
                  />
-                </Field>
+                </CardContent>
+              </Card>

-                <Field
-                  label="Context window (tokens)"
-                  hint="Match this to the context length you've loaded in LM Studio."
+              <div className="flex items-center gap-2">
+                <Button
+                  variant="outline"
+                  onClick={() => resetProviderSettings()}
+                  data-action="settings-reset"
                >
-                  <Input
-                    data-action="settings-context-tokens"
-                    type="number"
-                    min={1024}
-                    step={512}
-                    value={draft.contextTokens}
-                    onChange={(e) =>
-                      setDraft((d) => ({
-                        ...d,
-                        contextTokens:
-                          Number(e.target.value) || d.contextTokens,
-                      }))
-                    }
-                  />
-                </Field>
-
-                <Field
-                  label="System prompt"
-                  hint="Sent at the start of every conversation. Shapes the assistant's persona and scope. UI Control adds an action-driving preface on top of this when enabled."
-                >
-                  <Textarea
-                    data-action="settings-system-prompt"
-                    value={draft.systemPrompt}
-                    onChange={(e) =>
-                      setDraft((d) => ({ ...d, systemPrompt: e.target.value }))
-                    }
-                    rows={5}
-                    spellCheck={false}
-                    className="min-h-24 font-mono text-xs"
-                  />
-                  <button
-                    type="button"
-                    data-action="settings-system-prompt-reset"
-                    onClick={() =>
-                      setDraft((d) => ({
-                        ...d,
-                        systemPrompt: DEFAULT_SYSTEM_PROMPT,
-                      }))
-                    }
-                    className="self-start text-xs text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
-                  >
-                    Reset to default prompt
-                  </button>
-                </Field>
-
-                <Field
-                  label="Response cap (max tokens)"
-                  hint="Upper bound on each model reply. Smaller = faster, less rambling."
-                >
-                  <Input
-                    data-action="settings-response-budget"
-                    type="number"
-                    min={64}
-                    step={64}
-                    value={draft.responseBudget}
-                    onChange={(e) =>
-                      setDraft((d) => ({
-                        ...d,
-                        responseBudget:
-                          Number(e.target.value) || d.responseBudget,
-                      }))
-                    }
-                  />
-                </Field>
-
-                <div className="flex flex-wrap items-center gap-2">
-                  <Button
-                    data-action="settings-save"
-                    onClick={save}
-                    disabled={!dirty}
-                  >
-                    Save
-                  </Button>
-                  <Button
-                    data-action="settings-test"
-                    variant="outline"
-                    onClick={runTest}
-                    disabled={test.kind === "running"}
-                  >
-                    {test.kind === "running" ? (
-                      <Loader2 className="size-4 animate-spin" />
-                    ) : test.kind === "ok" ? (
-                      <Check className="size-4 text-emerald-600" />
-                    ) : test.kind === "fail" ? (
-                      <X className="size-4 text-destructive" />
-                    ) : null}
-                    Test connection
-                  </Button>
-                  <Button
-                    data-action="settings-reset"
-                    variant="outline"
-                    onClick={reset}
-                  >
-                    Reset to defaults
-                  </Button>
-                  {savedAt && !dirty && (
-                    <span className="text-sm text-muted-foreground">
-                      Saved.
-                    </span>
-                  )}
-                  {test.kind === "ok" && (
-                    <span className="text-sm text-emerald-700 dark:text-emerald-400">
-                      {test.count} model{test.count === 1 ? "" : "s"} available.
-                    </span>
-                  )}
-                  {test.kind === "fail" && (
-                    <span
-                      className="text-sm text-destructive"
-                      title={test.reason}
-                    >
-                      Failed: {test.reason.slice(0, 60)}
-                    </span>
-                  )}
-                </div>
-              </CardContent>
-            </Card>
+                  Reset to defaults
+                </Button>
+                <span className="text-xs text-muted-foreground">
+                  Need to manage stored keys? See <a href="/secrets" className="underline">Secrets</a>.
+                </span>
+              </div>
+            </div>
          )}

          {section === "agents" && <AgentsPanel />}