Wire @crema/llm-providers-ui: multi-provider picker + AI persistence

Replaces the single-base-URL LLM settings with the new providers lib
(OpenAI, Anthropic, DeepSeek, Qwen, LM Studio). Settings/LLM hosts the
catalog-aware card; the /ai route builds adapters via buildAdapter()
and resolves API keys from the arcadia vault per-call (direct mode).
Anthropic skips the /v1/models probe (no such endpoint) and uses
catalog defaults; failed probes for keyed providers fall back to the
catalog instead of dropping to mock.

AI conversation now persists across navigation and refresh via a new
crema.ai.live localStorage key (separate from the compact-snapshot
key). useChat hydrates from initialMessages on mount, saves on every
change, and "Clear conversation" wipes both state and storage.

Vite needs explicit resolve.alias for @crema/llm-ui and
@crema/llm-providers-ui — when a sibling lib imports another @crema/*,
tsconfigPaths can't resolve it (the importing file isn't in this
project's tsconfig scope).

Adds docs/LLM_PROXY_CONTRACT.md describing the
POST /api/v1/ai/llm/chat endpoint the backend needs for proxy mode
(keys never leave the server). Direct mode works against today's
arcadia; proxy mode unblocks once that endpoint ships.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-01 22:50:23 +10:00
parent a907e25a7c
commit 7ba415d78e
6 changed files with 439 additions and 221 deletions

View File

@@ -1,7 +1,6 @@
import {
useCallback,
useEffect,
useMemo,
useRef,
useState,
} from "react"
@@ -29,12 +28,16 @@ import {
import {
LLMProvider,
MockLLM,
OpenAICompatibleAdapter,
listModels,
useChat,
useCompletion,
type LLMAdapter,
} from "@crema/llm-ui"
import {
buildAdapter,
getProvider,
useSettings as useProviderSettings,
} from "@crema/llm-providers-ui"
import { TypingIndicator } from "@crema/chat-ui"
import { AppShell } from "~/components/layout/app-shell"
@@ -51,7 +54,6 @@ import {
PopoverContent,
PopoverTrigger,
} from "~/components/ui/popover"
import { useLLMSettings } from "~/lib/llm-settings"
import {
loadActiveAgentId,
saveActiveAgentId,
@@ -87,6 +89,37 @@ function ToolResultBlock({ name, result }: { name: string; result: unknown }) {
}
const SNAPSHOT_KEY = "crema.ai.snapshot"
// Separate key for the live conversation that survives navigation. The
// compact snapshot is reserved for the user-triggered Compact/Restore flow.
const LIVE_KEY = "crema.ai.live"
function loadLive(): LLMMessage[] | null {
if (typeof window === "undefined") return null
try {
const raw = localStorage.getItem(LIVE_KEY)
if (!raw) return null
const parsed = JSON.parse(raw)
if (Array.isArray(parsed)) return parsed as LLMMessage[]
} catch {}
return null
}
function saveLive(msgs: LLMMessage[]) {
if (typeof window === "undefined") return
if (msgs.length === 0) {
localStorage.removeItem(LIVE_KEY)
return
}
try {
localStorage.setItem(LIVE_KEY, JSON.stringify(msgs))
} catch {
// Quota exceeded or similar — silently drop persistence.
}
}
function clearLive() {
if (typeof window === "undefined") return
localStorage.removeItem(LIVE_KEY)
}
type StoredMessage = { role: "user" | "assistant"; content: string }
function loadAISnapshot(): StoredMessage[] | null {
if (typeof window === "undefined") return null
@@ -146,13 +179,16 @@ function withTimeout<T>(p: Promise<T>, ms: number, signal: AbortSignal) {
}
export default function AIRoute() {
const settings = useLLMSettings()
const settings = useProviderSettings()
const arcadia = useArcadiaClient()
const provider = getProvider(settings.providerId)
const agents = useAgents()
const [status, setStatus] = useState<Status>({ kind: "probing" })
const [model, setModel] = useState<string>(() => {
if (typeof window === "undefined") return ""
return localStorage.getItem(MODEL_KEY) ?? ""
})
const [adapter, setAdapter] = useState<LLMAdapter>(mockAdapter)
const [activeAgentId, setActiveAgentIdState] = useState<string>(() =>
loadActiveAgentId(),
)
@@ -163,28 +199,110 @@ export default function AIRoute() {
const activeAgent =
agents.find((a) => a.id === activeAgentId) ?? agents[0]
// When the user changes provider/model in Settings, follow along.
useEffect(() => {
if (settings.model) setModel(settings.model)
}, [settings.providerId, settings.model])
// Resolve the API key from the vault (direct mode) or build the proxy
// adapter (proxy mode), then refresh the model list.
const probe = useCallback(() => {
const ac = new AbortController()
setStatus({ kind: "probing" })
withTimeout(
listModels({ baseURL: settings.baseURL, signal: ac.signal }),
PROBE_TIMEOUT_MS,
ac.signal,
)
.then((rows) => {
const resolveSecret = async (name: string): Promise<string> => {
const res = await arcadia.GET<{ data: { value: string } }>(
`/api/v1/secrets/${encodeURIComponent(name)}`,
)
return res.data.value
}
const arcadiaBaseURL =
(import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
const arcadiaTenantId =
(import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
const arcadiaAuthToken =
typeof window !== "undefined"
? sessionStorage.getItem("arcadia_access_token") ?? undefined
: undefined
;(async () => {
// Build the adapter first so chat works even if the model probe fails.
try {
const a = await buildAdapter({
settings,
resolveSecret,
arcadiaBaseURL,
arcadiaAuthToken,
arcadiaTenantId,
})
setAdapter(a)
} catch {
setAdapter(mockAdapter)
}
// Probe for a live model list. Anthropic has no /models endpoint, so
// fall back to the provider catalog's default models.
if (provider.transport === "anthropic") {
const ids = provider.defaultModels.length
? provider.defaultModels
: ["claude-opus-4-7"]
setStatus({ kind: "live", models: ids })
setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
return
}
const baseURL = settings.baseURL || provider.baseURL
let apiKey: string | undefined
if (provider.requiresKey && settings.secretName) {
try {
apiKey = await resolveSecret(settings.secretName)
} catch {
// Fall through; listModels may still work for some providers without a key.
}
}
try {
const rows = await withTimeout(
listModels({ baseURL, apiKey, signal: ac.signal }),
PROBE_TIMEOUT_MS,
ac.signal,
)
const ids = rows.map((m) => m.id)
if (ids.length === 0) {
setStatus({ kind: "mock", reason: "endpoint returned no models" })
return
}
setStatus({ kind: "live", models: ids })
setModel((cur) => (cur && ids.includes(cur) ? cur : ids[0]))
})
.catch(() => {
setStatus({ kind: "mock", reason: "endpoint unreachable" })
})
setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
} catch {
// Probe failed but adapter may still be usable; show the catalog default
// models so the user can pick one and just try sending.
if (provider.defaultModels.length) {
setStatus({ kind: "live", models: provider.defaultModels })
setModel((cur) =>
cur && provider.defaultModels.includes(cur)
? cur
: settings.model || provider.defaultModels[0],
)
} else {
setStatus({ kind: "mock", reason: "endpoint unreachable" })
}
}
})()
return () => ac.abort()
}, [settings.baseURL])
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [
arcadia,
settings.providerId,
settings.baseURL,
settings.secretName,
settings.mode,
settings.model,
provider.transport,
provider.baseURL,
provider.requiresKey,
])
useEffect(() => probe(), [probe])
@@ -192,16 +310,6 @@ export default function AIRoute() {
if (model) localStorage.setItem(MODEL_KEY, model)
}, [model])
const adapter: LLMAdapter = useMemo(() => {
if (status.kind === "live") {
return new OpenAICompatibleAdapter({
baseURL: settings.baseURL,
apiKey: settings.apiKey || "lm-studio",
})
}
return mockAdapter
}, [status.kind, settings.baseURL, settings.apiKey])
const activeModel =
status.kind === "live" ? model || status.models[0] : "mock"
@@ -256,10 +364,29 @@ function ChatSurface({
.filter(Boolean)
.join("\n\n")
const arcadia = useArcadiaClient()
// Hydrate from the persisted live conversation so navigating away and
// back doesn't reset the chat. Read once on mount.
const initialLive = useRef<LLMMessage[] | null>(null)
if (initialLive.current === null) {
initialLive.current = loadLive() ?? []
}
const { messages, setMessages, send, continueChat, abort, isStreaming, reset } = useChat({
system: systemPrompt,
initialMessages: initialLive.current,
})
// Persist on every change. Streaming partials get saved too, which is what
// we want — refreshing mid-stream restores the partial assistant message.
useEffect(() => {
saveLive(messages)
}, [messages])
// Wrap reset so "Clear conversation" also drops the persisted snapshot.
const resetAndClear = useCallback(() => {
reset()
clearLive()
}, [reset])
// Auto tool-loop using native function calls. Reads run automatically;
// writes are held in `pendingConfirm` until the operator clicks Confirm
// or Deny in the inline ConfirmCard.
@@ -642,7 +769,7 @@ function ChatSurface({
onSaveToLibrary={saveToLibrary}
onShowPrompt={() => setShowPromptOpen(true)}
onRetryProbe={onRetryProbe}
onClear={reset}
onClear={resetAndClear}
hasMessages={messages.length > 0}
hasUserMessage={messages.some((m) => m.role === "user")}
hasCompactSnapshot={hasCompactSnapshot}

View File

@@ -1,8 +1,5 @@
import { useEffect, useState } from "react"
import {
Check,
X,
Loader2,
Cpu,
Palette,
User as UserIcon,
@@ -12,6 +9,14 @@ import {
Trash2,
} from "lucide-react"
import { listModels } from "@crema/llm-ui"
import {
buildAdapter,
LLMProvidersSettingsCard,
resetSettings as resetProviderSettings,
useSettings as useProviderSettings,
type LLMProvidersSettings,
} from "@crema/llm-providers-ui"
import { useArcadiaClient } from "@crema/arcadia-client"
import { AppShell } from "~/components/layout/app-shell"
import { Button } from "~/components/ui/button"
@@ -22,15 +27,6 @@ import {
CardHeader,
CardTitle,
} from "~/components/ui/card"
import { Input } from "~/components/ui/input"
import { Textarea } from "~/components/ui/textarea"
import {
DEFAULT_SETTINGS,
DEFAULT_SYSTEM_PROMPT,
saveLLMSettings,
useLLMSettings,
type LLMSettings,
} from "~/lib/llm-settings"
import {
loadActiveAgentId,
newAgentId,
@@ -71,53 +67,94 @@ const sections: {
{ id: "about", label: "About", icon: Info, description: "Version & credits" },
]
type TestState =
| { kind: "idle" }
| { kind: "running" }
| { kind: "ok"; count: number }
| { kind: "fail"; reason: string }
export default function SettingsRoute() {
const settings = useLLMSettings()
const [draft, setDraft] = useState<LLMSettings>(settings)
const [savedAt, setSavedAt] = useState<number | null>(null)
const [test, setTest] = useState<TestState>({ kind: "idle" })
const arcadia = useArcadiaClient()
useEffect(() => {
setDraft(settings)
}, [settings])
const runTest = async () => {
setTest({ kind: "running" })
const ac = new AbortController()
const timeout = setTimeout(() => ac.abort(), 4000)
const testConnection = async (
s: LLMProvidersSettings,
): Promise<{ ok: boolean; message: string }> => {
try {
const rows = await listModels({ baseURL: draft.baseURL, signal: ac.signal })
setTest({ kind: "ok", count: rows.length })
} catch (e) {
setTest({
kind: "fail",
reason: e instanceof Error ? e.message : String(e),
const arcadiaBaseURL =
(import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
const arcadiaTenantId =
(import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
const arcadiaAuthToken =
typeof window !== "undefined"
? sessionStorage.getItem("arcadia_access_token") ?? undefined
: undefined
const adapter = await buildAdapter({
settings: s,
// Direct-mode resolver — fetches the API key from the vault.
resolveSecret: async (name) => {
const res = await arcadia.GET<{ data: { value: string } }>(
`/api/v1/secrets/${encodeURIComponent(name)}`,
)
return res.data.value
},
// Proxy-mode coordinates.
arcadiaBaseURL,
arcadiaAuthToken,
arcadiaTenantId,
})
} finally {
clearTimeout(timeout)
// In proxy mode the adapter just being built is the strongest signal we
// can get without actually firing a chat request — the proxy endpoint
// doesn't exist on the backend yet, so any /models probe would 404.
if (s.mode === "proxy") {
return {
ok: true,
message:
"Adapter built. Note: the backend proxy (/api/v1/ai/llm/chat) isn't deployed yet — see docs/LLM_PROXY_CONTRACT.md.",
}
}
// Direct mode — for OpenAI-compatible endpoints, /models is a cheap probe.
if (s.providerId !== "anthropic") {
const baseURL =
s.baseURL ||
(s.providerId === "lmstudio"
? "http://localhost:1234/v1"
: s.providerId === "openai"
? "https://api.openai.com/v1"
: s.providerId === "deepseek"
? "https://api.deepseek.com/v1"
: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1")
// Resolve key for the probe (lmstudio doesn't need one).
let apiKey: string | undefined
if (s.providerId !== "lmstudio" && s.secretName) {
try {
const res = await arcadia.GET<{ data: { value: string } }>(
`/api/v1/secrets/${encodeURIComponent(s.secretName)}`,
)
apiKey = res.data.value
} catch (err) {
const msg = err instanceof Error ? err.message : String(err)
if (/404|not[_ ]found/i.test(msg)) {
return {
ok: false,
message: `No vault secret named "${s.secretName}". Create it under /secrets first (paste the API key as the Value), then enter the secret's name here.`,
}
}
throw err
}
}
const ac = new AbortController()
const t = setTimeout(() => ac.abort(), 5000)
try {
const rows = await listModels({ baseURL, apiKey, signal: ac.signal })
return { ok: true, message: `Connected. ${rows.length} model(s) reachable.` }
} finally {
clearTimeout(t)
}
}
// Anthropic doesn't expose a /models list; we just confirm adapter built.
return { ok: true, message: `Adapter ready (${adapter.label ?? adapter.id}).` }
} catch (e) {
return { ok: false, message: e instanceof Error ? e.message : String(e) }
}
}
const dirty =
draft.baseURL !== settings.baseURL ||
draft.contextTokens !== settings.contextTokens ||
draft.responseBudget !== settings.responseBudget
const save = () => {
saveLLMSettings(draft)
setSavedAt(Date.now())
}
const reset = () => {
setDraft(DEFAULT_SETTINGS)
}
const [section, setSection] = useState<SectionId>(() => {
if (typeof window === "undefined") return "llm"
const stored = localStorage.getItem(SECTION_KEY)
@@ -173,151 +210,36 @@ export default function SettingsRoute() {
<div className="min-w-0">
{section === "llm" && (
<Card>
<CardHeader>
<CardTitle>LLM</CardTitle>
<CardDescription>
Configure the local model endpoint and context budgets used
by the Assistant.
</CardDescription>
</CardHeader>
<CardContent className="flex flex-col gap-5">
<Field
label="Base URL"
hint="OpenAI-compatible endpoint. LM Studio defaults to http://localhost:1234/v1."
>
<Input
data-action="settings-base-url"
value={draft.baseURL}
onChange={(e) =>
setDraft((d) => ({ ...d, baseURL: e.target.value }))
}
placeholder="http://localhost:1234/v1"
spellCheck={false}
autoComplete="off"
<div className="flex flex-col gap-4">
<Card>
<CardHeader>
<CardTitle>LLM</CardTitle>
<CardDescription>
Pick a provider, model, and the arcadia-vault secret holding the API key. Settings
auto-save as you type. The Assistant picks them up on the next message.
</CardDescription>
</CardHeader>
<CardContent>
<LLMProvidersSettingsCard
onTest={testConnection}
hideTransportToggle={false}
/>
</Field>
</CardContent>
</Card>
<Field
label="Context window (tokens)"
hint="Match this to the context length you've loaded in LM Studio."
<div className="flex items-center gap-2">
<Button
variant="outline"
onClick={() => resetProviderSettings()}
data-action="settings-reset"
>
<Input
data-action="settings-context-tokens"
type="number"
min={1024}
step={512}
value={draft.contextTokens}
onChange={(e) =>
setDraft((d) => ({
...d,
contextTokens:
Number(e.target.value) || d.contextTokens,
}))
}
/>
</Field>
<Field
label="System prompt"
hint="Sent at the start of every conversation. Shapes the assistant's persona and scope. UI Control adds an action-driving preface on top of this when enabled."
>
<Textarea
data-action="settings-system-prompt"
value={draft.systemPrompt}
onChange={(e) =>
setDraft((d) => ({ ...d, systemPrompt: e.target.value }))
}
rows={5}
spellCheck={false}
className="min-h-24 font-mono text-xs"
/>
<button
type="button"
data-action="settings-system-prompt-reset"
onClick={() =>
setDraft((d) => ({
...d,
systemPrompt: DEFAULT_SYSTEM_PROMPT,
}))
}
className="self-start text-xs text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
>
Reset to default prompt
</button>
</Field>
<Field
label="Response cap (max tokens)"
hint="Upper bound on each model reply. Smaller = faster, less rambling."
>
<Input
data-action="settings-response-budget"
type="number"
min={64}
step={64}
value={draft.responseBudget}
onChange={(e) =>
setDraft((d) => ({
...d,
responseBudget:
Number(e.target.value) || d.responseBudget,
}))
}
/>
</Field>
<div className="flex flex-wrap items-center gap-2">
<Button
data-action="settings-save"
onClick={save}
disabled={!dirty}
>
Save
</Button>
<Button
data-action="settings-test"
variant="outline"
onClick={runTest}
disabled={test.kind === "running"}
>
{test.kind === "running" ? (
<Loader2 className="size-4 animate-spin" />
) : test.kind === "ok" ? (
<Check className="size-4 text-emerald-600" />
) : test.kind === "fail" ? (
<X className="size-4 text-destructive" />
) : null}
Test connection
</Button>
<Button
data-action="settings-reset"
variant="outline"
onClick={reset}
>
Reset to defaults
</Button>
{savedAt && !dirty && (
<span className="text-sm text-muted-foreground">
Saved.
</span>
)}
{test.kind === "ok" && (
<span className="text-sm text-emerald-700 dark:text-emerald-400">
{test.count} model{test.count === 1 ? "" : "s"} available.
</span>
)}
{test.kind === "fail" && (
<span
className="text-sm text-destructive"
title={test.reason}
>
Failed: {test.reason.slice(0, 60)}
</span>
)}
</div>
</CardContent>
</Card>
Reset to defaults
</Button>
<span className="text-xs text-muted-foreground">
Need to manage stored keys? See <a href="/secrets" className="underline">Secrets</a>.
</span>
</div>
</div>
)}
{section === "agents" && <AgentsPanel />}