Wire @crema/llm-providers-ui: multi-provider picker + AI persistence

Replaces the single-base-URL LLM settings with the new providers lib
(OpenAI, Anthropic, DeepSeek, Qwen, LM Studio). Settings/LLM hosts the
catalog-aware card; the /ai route builds adapters via buildAdapter()
and resolves API keys from the arcadia vault per-call (direct mode).
Anthropic skips the /v1/models probe (no such endpoint) and uses
catalog defaults; failed probes for keyed providers fall back to the
catalog instead of dropping to mock.

AI conversation now persists across navigation and refresh via a new
crema.ai.live localStorage key (separate from the compact-snapshot
key). useChat hydrates from initialMessages on mount, saves on every
change, and "Clear conversation" wipes both state and storage.

Vite needs explicit resolve.alias for @crema/llm-ui and
@crema/llm-providers-ui — when a sibling lib imports another @crema/*,
tsconfigPaths can't resolve it (the importing file isn't in this
project's tsconfig scope).

Adds docs/LLM_PROXY_CONTRACT.md describing the
POST /api/v1/ai/llm/chat endpoint the backend needs for proxy mode
(keys never leave the server). Direct mode works against today's
arcadia; proxy mode unblocks once that endpoint ships.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-01 22:50:23 +10:00
parent a907e25a7c
commit 7ba415d78e
6 changed files with 439 additions and 221 deletions

View File

@@ -18,6 +18,7 @@
@source "../../lib-feedback-ui/src";
@source "../../lib-auth-ui/src";
@source "../../lib-agent-ui/src";
@source "../../lib-llm-providers-ui/src";
/* CREMA:SOURCES */
@custom-variant dark (&:is(.dark *));

View File

@@ -1,7 +1,6 @@
import {
useCallback,
useEffect,
useMemo,
useRef,
useState,
} from "react"
@@ -29,12 +28,16 @@ import {
import {
LLMProvider,
MockLLM,
OpenAICompatibleAdapter,
listModels,
useChat,
useCompletion,
type LLMAdapter,
} from "@crema/llm-ui"
import {
buildAdapter,
getProvider,
useSettings as useProviderSettings,
} from "@crema/llm-providers-ui"
import { TypingIndicator } from "@crema/chat-ui"
import { AppShell } from "~/components/layout/app-shell"
@@ -51,7 +54,6 @@ import {
PopoverContent,
PopoverTrigger,
} from "~/components/ui/popover"
import { useLLMSettings } from "~/lib/llm-settings"
import {
loadActiveAgentId,
saveActiveAgentId,
@@ -87,6 +89,37 @@ function ToolResultBlock({ name, result }: { name: string; result: unknown }) {
}
const SNAPSHOT_KEY = "crema.ai.snapshot"
// Separate key for the live conversation that survives navigation. The
// compact snapshot is reserved for the user-triggered Compact/Restore flow.
const LIVE_KEY = "crema.ai.live"
function loadLive(): LLMMessage[] | null {
if (typeof window === "undefined") return null
try {
const raw = localStorage.getItem(LIVE_KEY)
if (!raw) return null
const parsed = JSON.parse(raw)
if (Array.isArray(parsed)) return parsed as LLMMessage[]
} catch {}
return null
}
function saveLive(msgs: LLMMessage[]) {
if (typeof window === "undefined") return
if (msgs.length === 0) {
localStorage.removeItem(LIVE_KEY)
return
}
try {
localStorage.setItem(LIVE_KEY, JSON.stringify(msgs))
} catch {
// Quota exceeded or similar — silently drop persistence.
}
}
function clearLive() {
if (typeof window === "undefined") return
localStorage.removeItem(LIVE_KEY)
}
type StoredMessage = { role: "user" | "assistant"; content: string }
function loadAISnapshot(): StoredMessage[] | null {
if (typeof window === "undefined") return null
@@ -146,13 +179,16 @@ function withTimeout<T>(p: Promise<T>, ms: number, signal: AbortSignal) {
}
export default function AIRoute() {
const settings = useLLMSettings()
const settings = useProviderSettings()
const arcadia = useArcadiaClient()
const provider = getProvider(settings.providerId)
const agents = useAgents()
const [status, setStatus] = useState<Status>({ kind: "probing" })
const [model, setModel] = useState<string>(() => {
if (typeof window === "undefined") return ""
return localStorage.getItem(MODEL_KEY) ?? ""
})
const [adapter, setAdapter] = useState<LLMAdapter>(mockAdapter)
const [activeAgentId, setActiveAgentIdState] = useState<string>(() =>
loadActiveAgentId(),
)
@@ -163,28 +199,110 @@ export default function AIRoute() {
const activeAgent =
agents.find((a) => a.id === activeAgentId) ?? agents[0]
// When the user changes provider/model in Settings, follow along.
useEffect(() => {
if (settings.model) setModel(settings.model)
}, [settings.providerId, settings.model])
// Resolve the API key from the vault (direct mode) or build the proxy
// adapter (proxy mode), then refresh the model list.
const probe = useCallback(() => {
const ac = new AbortController()
setStatus({ kind: "probing" })
withTimeout(
listModels({ baseURL: settings.baseURL, signal: ac.signal }),
const resolveSecret = async (name: string): Promise<string> => {
const res = await arcadia.GET<{ data: { value: string } }>(
`/api/v1/secrets/${encodeURIComponent(name)}`,
)
return res.data.value
}
const arcadiaBaseURL =
(import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
const arcadiaTenantId =
(import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
const arcadiaAuthToken =
typeof window !== "undefined"
? sessionStorage.getItem("arcadia_access_token") ?? undefined
: undefined
;(async () => {
// Build the adapter first so chat works even if the model probe fails.
try {
const a = await buildAdapter({
settings,
resolveSecret,
arcadiaBaseURL,
arcadiaAuthToken,
arcadiaTenantId,
})
setAdapter(a)
} catch {
setAdapter(mockAdapter)
}
// Probe for a live model list. Anthropic has no /models endpoint, so
// fall back to the provider catalog's default models.
if (provider.transport === "anthropic") {
const ids = provider.defaultModels.length
? provider.defaultModels
: ["claude-opus-4-7"]
setStatus({ kind: "live", models: ids })
setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
return
}
const baseURL = settings.baseURL || provider.baseURL
let apiKey: string | undefined
if (provider.requiresKey && settings.secretName) {
try {
apiKey = await resolveSecret(settings.secretName)
} catch {
// Fall through; listModels may still work for some providers without a key.
}
}
try {
const rows = await withTimeout(
listModels({ baseURL, apiKey, signal: ac.signal }),
PROBE_TIMEOUT_MS,
ac.signal,
)
.then((rows) => {
const ids = rows.map((m) => m.id)
if (ids.length === 0) {
setStatus({ kind: "mock", reason: "endpoint returned no models" })
return
}
setStatus({ kind: "live", models: ids })
setModel((cur) => (cur && ids.includes(cur) ? cur : ids[0]))
})
.catch(() => {
setModel((cur) => (cur && ids.includes(cur) ? cur : settings.model || ids[0]))
} catch {
// Probe failed but adapter may still be usable; show the catalog default
// models so the user can pick one and just try sending.
if (provider.defaultModels.length) {
setStatus({ kind: "live", models: provider.defaultModels })
setModel((cur) =>
cur && provider.defaultModels.includes(cur)
? cur
: settings.model || provider.defaultModels[0],
)
} else {
setStatus({ kind: "mock", reason: "endpoint unreachable" })
})
}
}
})()
return () => ac.abort()
}, [settings.baseURL])
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [
arcadia,
settings.providerId,
settings.baseURL,
settings.secretName,
settings.mode,
settings.model,
provider.transport,
provider.baseURL,
provider.requiresKey,
])
useEffect(() => probe(), [probe])
@@ -192,16 +310,6 @@ export default function AIRoute() {
if (model) localStorage.setItem(MODEL_KEY, model)
}, [model])
const adapter: LLMAdapter = useMemo(() => {
if (status.kind === "live") {
return new OpenAICompatibleAdapter({
baseURL: settings.baseURL,
apiKey: settings.apiKey || "lm-studio",
})
}
return mockAdapter
}, [status.kind, settings.baseURL, settings.apiKey])
const activeModel =
status.kind === "live" ? model || status.models[0] : "mock"
@@ -256,10 +364,29 @@ function ChatSurface({
.filter(Boolean)
.join("\n\n")
const arcadia = useArcadiaClient()
// Hydrate from the persisted live conversation so navigating away and
// back doesn't reset the chat. Read once on mount.
const initialLive = useRef<LLMMessage[] | null>(null)
if (initialLive.current === null) {
initialLive.current = loadLive() ?? []
}
const { messages, setMessages, send, continueChat, abort, isStreaming, reset } = useChat({
system: systemPrompt,
initialMessages: initialLive.current,
})
// Persist on every change. Streaming partials get saved too, which is what
// we want — refreshing mid-stream restores the partial assistant message.
useEffect(() => {
saveLive(messages)
}, [messages])
// Wrap reset so "Clear conversation" also drops the persisted snapshot.
const resetAndClear = useCallback(() => {
reset()
clearLive()
}, [reset])
// Auto tool-loop using native function calls. Reads run automatically;
// writes are held in `pendingConfirm` until the operator clicks Confirm
// or Deny in the inline ConfirmCard.
@@ -642,7 +769,7 @@ function ChatSurface({
onSaveToLibrary={saveToLibrary}
onShowPrompt={() => setShowPromptOpen(true)}
onRetryProbe={onRetryProbe}
onClear={reset}
onClear={resetAndClear}
hasMessages={messages.length > 0}
hasUserMessage={messages.some((m) => m.role === "user")}
hasCompactSnapshot={hasCompactSnapshot}

View File

@@ -1,8 +1,5 @@
import { useEffect, useState } from "react"
import {
Check,
X,
Loader2,
Cpu,
Palette,
User as UserIcon,
@@ -12,6 +9,14 @@ import {
Trash2,
} from "lucide-react"
import { listModels } from "@crema/llm-ui"
import {
buildAdapter,
LLMProvidersSettingsCard,
resetSettings as resetProviderSettings,
useSettings as useProviderSettings,
type LLMProvidersSettings,
} from "@crema/llm-providers-ui"
import { useArcadiaClient } from "@crema/arcadia-client"
import { AppShell } from "~/components/layout/app-shell"
import { Button } from "~/components/ui/button"
@@ -22,15 +27,6 @@ import {
CardHeader,
CardTitle,
} from "~/components/ui/card"
import { Input } from "~/components/ui/input"
import { Textarea } from "~/components/ui/textarea"
import {
DEFAULT_SETTINGS,
DEFAULT_SYSTEM_PROMPT,
saveLLMSettings,
useLLMSettings,
type LLMSettings,
} from "~/lib/llm-settings"
import {
loadActiveAgentId,
newAgentId,
@@ -71,51 +67,92 @@ const sections: {
{ id: "about", label: "About", icon: Info, description: "Version & credits" },
]
type TestState =
| { kind: "idle" }
| { kind: "running" }
| { kind: "ok"; count: number }
| { kind: "fail"; reason: string }
export default function SettingsRoute() {
const settings = useLLMSettings()
const [draft, setDraft] = useState<LLMSettings>(settings)
const [savedAt, setSavedAt] = useState<number | null>(null)
const [test, setTest] = useState<TestState>({ kind: "idle" })
const arcadia = useArcadiaClient()
useEffect(() => {
setDraft(settings)
}, [settings])
const runTest = async () => {
setTest({ kind: "running" })
const ac = new AbortController()
const timeout = setTimeout(() => ac.abort(), 4000)
const testConnection = async (
s: LLMProvidersSettings,
): Promise<{ ok: boolean; message: string }> => {
try {
const rows = await listModels({ baseURL: draft.baseURL, signal: ac.signal })
setTest({ kind: "ok", count: rows.length })
} catch (e) {
setTest({
kind: "fail",
reason: e instanceof Error ? e.message : String(e),
const arcadiaBaseURL =
(import.meta.env.VITE_ARCADIA_URL as string | undefined) ?? "http://localhost:4000"
const arcadiaTenantId =
(import.meta.env.VITE_ARCADIA_TENANT as string | undefined) ?? "default"
const arcadiaAuthToken =
typeof window !== "undefined"
? sessionStorage.getItem("arcadia_access_token") ?? undefined
: undefined
const adapter = await buildAdapter({
settings: s,
// Direct-mode resolver — fetches the API key from the vault.
resolveSecret: async (name) => {
const res = await arcadia.GET<{ data: { value: string } }>(
`/api/v1/secrets/${encodeURIComponent(name)}`,
)
return res.data.value
},
// Proxy-mode coordinates.
arcadiaBaseURL,
arcadiaAuthToken,
arcadiaTenantId,
})
// In proxy mode the adapter just being built is the strongest signal we
// can get without actually firing a chat request — the proxy endpoint
// doesn't exist on the backend yet, so any /models probe would 404.
if (s.mode === "proxy") {
return {
ok: true,
message:
"Adapter built. Note: the backend proxy (/api/v1/ai/llm/chat) isn't deployed yet — see docs/LLM_PROXY_CONTRACT.md.",
}
}
// Direct mode — for OpenAI-compatible endpoints, /models is a cheap probe.
if (s.providerId !== "anthropic") {
const baseURL =
s.baseURL ||
(s.providerId === "lmstudio"
? "http://localhost:1234/v1"
: s.providerId === "openai"
? "https://api.openai.com/v1"
: s.providerId === "deepseek"
? "https://api.deepseek.com/v1"
: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1")
// Resolve key for the probe (lmstudio doesn't need one).
let apiKey: string | undefined
if (s.providerId !== "lmstudio" && s.secretName) {
try {
const res = await arcadia.GET<{ data: { value: string } }>(
`/api/v1/secrets/${encodeURIComponent(s.secretName)}`,
)
apiKey = res.data.value
} catch (err) {
const msg = err instanceof Error ? err.message : String(err)
if (/404|not[_ ]found/i.test(msg)) {
return {
ok: false,
message: `No vault secret named "${s.secretName}". Create it under /secrets first (paste the API key as the Value), then enter the secret's name here.`,
}
}
throw err
}
}
const ac = new AbortController()
const t = setTimeout(() => ac.abort(), 5000)
try {
const rows = await listModels({ baseURL, apiKey, signal: ac.signal })
return { ok: true, message: `Connected. ${rows.length} model(s) reachable.` }
} finally {
clearTimeout(timeout)
clearTimeout(t)
}
}
const dirty =
draft.baseURL !== settings.baseURL ||
draft.contextTokens !== settings.contextTokens ||
draft.responseBudget !== settings.responseBudget
const save = () => {
saveLLMSettings(draft)
setSavedAt(Date.now())
// Anthropic doesn't expose a /models list; we just confirm adapter built.
return { ok: true, message: `Adapter ready (${adapter.label ?? adapter.id}).` }
} catch (e) {
return { ok: false, message: e instanceof Error ? e.message : String(e) }
}
const reset = () => {
setDraft(DEFAULT_SETTINGS)
}
const [section, setSection] = useState<SectionId>(() => {
@@ -173,151 +210,36 @@ export default function SettingsRoute() {
<div className="min-w-0">
{section === "llm" && (
<div className="flex flex-col gap-4">
<Card>
<CardHeader>
<CardTitle>LLM</CardTitle>
<CardDescription>
Configure the local model endpoint and context budgets used
by the Assistant.
Pick a provider, model, and the arcadia-vault secret holding the API key. Settings
auto-save as you type. The Assistant picks them up on the next message.
</CardDescription>
</CardHeader>
<CardContent className="flex flex-col gap-5">
<Field
label="Base URL"
hint="OpenAI-compatible endpoint. LM Studio defaults to http://localhost:1234/v1."
>
<Input
data-action="settings-base-url"
value={draft.baseURL}
onChange={(e) =>
setDraft((d) => ({ ...d, baseURL: e.target.value }))
}
placeholder="http://localhost:1234/v1"
spellCheck={false}
autoComplete="off"
<CardContent>
<LLMProvidersSettingsCard
onTest={testConnection}
hideTransportToggle={false}
/>
</Field>
</CardContent>
</Card>
<Field
label="Context window (tokens)"
hint="Match this to the context length you've loaded in LM Studio."
>
<Input
data-action="settings-context-tokens"
type="number"
min={1024}
step={512}
value={draft.contextTokens}
onChange={(e) =>
setDraft((d) => ({
...d,
contextTokens:
Number(e.target.value) || d.contextTokens,
}))
}
/>
</Field>
<Field
label="System prompt"
hint="Sent at the start of every conversation. Shapes the assistant's persona and scope. UI Control adds an action-driving preface on top of this when enabled."
>
<Textarea
data-action="settings-system-prompt"
value={draft.systemPrompt}
onChange={(e) =>
setDraft((d) => ({ ...d, systemPrompt: e.target.value }))
}
rows={5}
spellCheck={false}
className="min-h-24 font-mono text-xs"
/>
<button
type="button"
data-action="settings-system-prompt-reset"
onClick={() =>
setDraft((d) => ({
...d,
systemPrompt: DEFAULT_SYSTEM_PROMPT,
}))
}
className="self-start text-xs text-muted-foreground underline-offset-2 hover:text-foreground hover:underline"
>
Reset to default prompt
</button>
</Field>
<Field
label="Response cap (max tokens)"
hint="Upper bound on each model reply. Smaller = faster, less rambling."
>
<Input
data-action="settings-response-budget"
type="number"
min={64}
step={64}
value={draft.responseBudget}
onChange={(e) =>
setDraft((d) => ({
...d,
responseBudget:
Number(e.target.value) || d.responseBudget,
}))
}
/>
</Field>
<div className="flex flex-wrap items-center gap-2">
<div className="flex items-center gap-2">
<Button
data-action="settings-save"
onClick={save}
disabled={!dirty}
>
Save
</Button>
<Button
data-action="settings-test"
variant="outline"
onClick={runTest}
disabled={test.kind === "running"}
>
{test.kind === "running" ? (
<Loader2 className="size-4 animate-spin" />
) : test.kind === "ok" ? (
<Check className="size-4 text-emerald-600" />
) : test.kind === "fail" ? (
<X className="size-4 text-destructive" />
) : null}
Test connection
</Button>
<Button
onClick={() => resetProviderSettings()}
data-action="settings-reset"
variant="outline"
onClick={reset}
>
Reset to defaults
</Button>
{savedAt && !dirty && (
<span className="text-sm text-muted-foreground">
Saved.
<span className="text-xs text-muted-foreground">
Need to manage stored keys? See <a href="/secrets" className="underline">Secrets</a>.
</span>
)}
{test.kind === "ok" && (
<span className="text-sm text-emerald-700 dark:text-emerald-400">
{test.count} model{test.count === 1 ? "" : "s"} available.
</span>
)}
{test.kind === "fail" && (
<span
className="text-sm text-destructive"
title={test.reason}
>
Failed: {test.reason.slice(0, 60)}
</span>
)}
</div>
</CardContent>
</Card>
</div>
)}
{section === "agents" && <AgentsPanel />}

158
docs/LLM_PROXY_CONTRACT.md Normal file
View File

@@ -0,0 +1,158 @@
# LLM Proxy Contract
> **Status: not yet implemented on the backend.** This document is the contract that `lib-llm-providers-ui` expects from arcadia. Implement `POST /api/v1/ai/llm/chat` server-side to make `mode: "proxy"` work in the client.
## Why a proxy?
The Settings UI ships in two transport modes:
- **`direct`** — the browser fetches the API key from arcadia's vault (`GET /api/v1/secrets/:name`), then calls OpenAI/Anthropic/DeepSeek/Qwen directly. Works today, but the key briefly lives in browser memory and the prompt contents go straight to the upstream provider with no opportunity for arcadia to log, meter, or rewrite them.
- **`proxy`** — the browser sends the chat request to arcadia, which reads the secret server-side and calls the upstream provider. Keys never leave arcadia. This is what production should use.
This contract only covers the proxy mode.
## Endpoint
```
POST /api/v1/ai/llm/chat
Authorization: Bearer <arcadia session token>
X-Tenant-ID: <tenant id>
Content-Type: application/json
```
The path is `/api/v1/ai/llm/chat` so it lives under the existing `/api/v1/ai/*` scope (next to `embeddings`, `tools`, `llm/usage`).
## Request body
The shape is OpenAI's chat-completion request, **plus** two arcadia-specific fields:
```json
{
"provider": "openai",
"secret_name": "llm-openai-api-key",
"model": "gpt-4o-mini",
"messages": [
{ "role": "system", "content": "You are a helpful assistant." },
{ "role": "user", "content": "Hello!" }
],
"stream": true,
"max_tokens": 1024,
"temperature": 0.7,
"tools": [
{
"type": "function",
"function": {
"name": "search_docs",
"description": "...",
"parameters": { "type": "object", "properties": {} }
}
}
],
"tool_choice": "auto"
}
```
### Provider-specific fields
| Field | Type | Notes |
|---------------|-------------------------------------------------|-------|
| `provider` | `"openai" \| "anthropic" \| "deepseek" \| "qwen" \| "lmstudio"` | Selects the upstream backend. |
| `secret_name` | `string` (optional for `lmstudio`) | Name of the vault secret holding the upstream API key. The proxy resolves it via the same `Secrets.get/3` used for tenant-facing reads. |
The proxy must:
1. Authenticate the arcadia session.
2. Resolve `secret_name` for the current tenant (or fall back to platform-level). Refuse the call if the secret is disabled, expired, or IP-blocked. The existing `Arcadia.Secrets.get/3` already returns the right error codes.
3. Map the request to the upstream's native shape (Anthropic's `/v1/messages` differs from OpenAI's `/v1/chat/completions`).
4. Forward it with the resolved key as the upstream's expected auth header (`Authorization: Bearer <key>` for OpenAI/DeepSeek/Qwen, `x-api-key: <key>` + `anthropic-version: 2023-06-01` for Anthropic).
5. Stream the response back as **OpenAI-shape SSE** regardless of upstream. (See "Response — streaming" below.)
6. Record a usage row via the existing `POST /ai/llm/usage` after each completion.
## Response — non-streaming (`stream: false`)
OpenAI chat-completion shape, returned as a single JSON document:
```json
{
"id": "chatcmpl-...",
"object": "chat.completion",
"created": 1714512000,
"model": "gpt-4o-mini",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "Hi there!",
"tool_calls": null
}
}
],
"usage": {
"prompt_tokens": 12,
"completion_tokens": 4,
"total_tokens": 16
}
}
```
For Anthropic upstream, translate `usage.input_tokens` / `output_tokens``prompt_tokens` / `completion_tokens` and combine `content` blocks into a single string (or surface `tool_use` blocks via `tool_calls`).
## Response — streaming (`stream: true`)
Server-Sent Events, one event per delta, terminated with `data: [DONE]`. Each `data:` line is JSON of OpenAI's chat-completion *delta* shape:
```
data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}
data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}
data: {"id":"chatcmpl-...","object":"chat.completion.chunk","created":1714512000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
data: [DONE]
```
For Anthropic upstream, translate `content_block_delta` events of type `text_delta` into delta `content` strings, and `message_stop` into the `finish_reason: "stop"` event. Tool calls translate `content_block_start` of type `tool_use` (with id + name) and the streaming JSON arguments into OpenAI-shape `delta.tool_calls` entries.
The client uses the OpenAI parser in `@crema/llm-ui` (`OpenAICompatibleAdapter.stream()`), so any deviation from this shape will manifest as missing tokens or hung streams.
## Errors
Use the existing `ArcadiaWeb.FallbackController` envelope:
```json
{ "error": { "code": "secret_disabled", "message": "Secret is disabled" } }
```
Specific codes the client distinguishes:
| HTTP | code | When |
|------|-------------------------|------|
| 401 | `unauthorized` | Missing / invalid arcadia session. |
| 403 | `secret_disabled` | Vault returned `:disabled`. |
| 410 | `secret_expired` | Vault returned `:expired`. |
| 410 | `secret_consumed` | Read-once secret already consumed. |
| 403 | `ip_not_allowed` | Caller IP blocked by the vault allowlist. |
| 404 | `unknown_provider` | `provider` field not in the supported set. |
| 502 | `upstream_unavailable` | Upstream returned 5xx or timed out. |
| 429 | `rate_limited` | Either arcadia or upstream returned 429. Pass through `Retry-After` if present. |
## Auth
The proxy must verify the arcadia session bearer the same way the rest of `/api/v1/*` does. The vault read uses the **caller's tenant context**, so platform-admin sessions can use platform-level secrets and tenant sessions can use their own — no special privilege required beyond what `/api/v1/secrets/:name` already enforces.
## Usage tracking
After each completion (success or failure), write a row via the existing `POST /api/v1/ai/llm/usage` (or call the equivalent context module directly inside the proxy). Required fields on that endpoint already include model, prompt_tokens, completion_tokens, latency_ms — the proxy can fill them from the upstream response.
## Test fixture
A minimal Mix test in `apps/arcadia_core/test/arcadia_web/controllers/api/ai_controller_test.exs` should cover:
- 200 with stream off, OpenAI upstream stubbed via Bypass.
- 200 with stream on, Anthropic upstream stubbed; assert SSE chunks carry OpenAI-shape JSON.
- 403 when the named secret is disabled.
- 404 when `provider: "unknown"`.
- Usage row written on the success cases.

View File

@@ -40,6 +40,8 @@
"@crema/auth-ui/*": ["../lib-auth-ui/src/*"],
"@crema/agent-ui": ["../lib-agent-ui/src/index.tsx"],
"@crema/agent-ui/*": ["../lib-agent-ui/src/*"],
"@crema/llm-providers-ui": ["../lib-llm-providers-ui/src/index.tsx"],
"@crema/llm-providers-ui/*": ["../lib-llm-providers-ui/src/*"],
"// CREMA:PATHS": [""],
"react": ["./node_modules/@types/react"],
"react/*": ["./node_modules/@types/react/*"],

View File

@@ -65,6 +65,12 @@ const arcadiaClientSrc = fileURLToPath(
const arcadiaAuthUiSrc = fileURLToPath(
new URL("../lib-arcadia-auth-ui/src", import.meta.url),
)
const llmUiSrc = fileURLToPath(
new URL("../lib-llm-ui/src", import.meta.url),
)
const llmProvidersUiSrc = fileURLToPath(
new URL("../lib-llm-providers-ui/src", import.meta.url),
)
// Sibling lib packages (lib-content-ui, lib-content-editor-ui) import bare
// deps like clsx and @tiptap/* but have no node_modules of their own. Pin
@@ -118,6 +124,8 @@ export default defineConfig({
"@crema/search-ui": `${searchUiSrc}/index.tsx`,
"@crema/arcadia-client": `${arcadiaClientSrc}/index.tsx`,
"@crema/arcadia-auth-ui": `${arcadiaAuthUiSrc}/index.tsx`,
"@crema/llm-ui": `${llmUiSrc}/index.tsx`,
"@crema/llm-providers-ui": `${llmProvidersUiSrc}/index.tsx`,
...sharedDepAliases,
},
dedupe: dedupeDeps,