// Arcadia LLM configurations API. // // Backed by /api/v1/admin/llm-configurations — server-side persisted // provider/model/secret/cost settings. Replaces the localStorage-driven // settings the admin UI used previously, so configurations and costs // survive across browsers and operators. // // `tenant_id: null` configurations are platform-defaults visible to // every tenant. Names are unique within (tenant, name). import type { ArcadiaClient } from "@crema/arcadia-core-client" export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio" /** * Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take * `reasoning_effort` natively). Translated server-side into Anthropic's * thinking block. `off` (or null) skips the field entirely. */ export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max" export const REASONING_EFFORTS: ReasoningEffort[] = [ "off", "low", "medium", "high", "max", ] export interface LlmConfiguration { id: string tenant_id: string | null name: string provider: LlmProvider model: string base_url: string | null secret_name: string | null input_cost_per_million: number | null output_cost_per_million: number | null enabled: boolean reasoning_effort: ReasoningEffort | null metadata: Record inserted_at: string updated_at: string } export interface LlmConfigurationInput { tenant_id?: string | null name: string provider: LlmProvider model: string base_url?: string | null secret_name?: string | null /** USD per 1M tokens. Omit to auto-fill from the catalog. */ input_cost_per_million?: number | null output_cost_per_million?: number | null enabled?: boolean reasoning_effort?: ReasoningEffort | null metadata?: Record } export interface CatalogEntry { provider: LlmProvider model: string input_cost_per_million: number output_cost_per_million: number context_window: number | null notes: string | null } const BASE = "/api/v1/admin/llm-configurations" export async function listConfigurations( arcadia: ArcadiaClient, opts: { enabled?: boolean; tenant_id?: string } = {}, ): Promise { const params: Record = {} if (opts.enabled != null) params.enabled = String(opts.enabled) if (opts.tenant_id) params.tenant_id = opts.tenant_id const res = await arcadia.GET<{ data: LlmConfiguration[] }>(BASE, { params }) return res.data } export async function getConfiguration( arcadia: ArcadiaClient, id: string, ): Promise { const res = await arcadia.GET<{ data: LlmConfiguration }>(`${BASE}/${id}`) return res.data } export async function createConfiguration( arcadia: ArcadiaClient, input: LlmConfigurationInput, ): Promise { const res = await arcadia.POST<{ data: LlmConfiguration }>(BASE, { body: { configuration: input }, }) return res.data } export async function updateConfiguration( arcadia: ArcadiaClient, id: string, input: Partial, ): Promise { const res = await arcadia.PATCH<{ data: LlmConfiguration }>(`${BASE}/${id}`, { body: { configuration: input }, }) return res.data } export async function deleteConfiguration( arcadia: ArcadiaClient, id: string, ): Promise { await arcadia.DELETE(`${BASE}/${id}`) } export async function getCatalog(arcadia: ArcadiaClient): Promise { const res = await arcadia.GET<{ data: CatalogEntry[] }>(`${BASE}/catalog`) return res.data } /** * Compute cost in cents for a given input/output token count using a * configuration's published rates. Mirrors `LlmConfiguration.compute_cost_cents/3` * in arcadia-app — keep in sync. */ export function computeCostCents( config: Pick, inputTokens: number, outputTokens: number, ): number { const inRate = config.input_cost_per_million ?? 0 const outRate = config.output_cost_per_million ?? 0 const cents = ((inputTokens * inRate + outputTokens * outRate) / 1_000_000) * 100 return Math.round(cents) } /** Format a cost in cents as "$X.XX" or "$0.0XX" for sub-dollar amounts. */ export function formatCost(cents: number): string { if (cents === 0) return "$0" if (cents < 100) return `$${(cents / 100).toFixed(2)}` return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}` } // --------------------------------------------------------------------------- // LLM usage summary (cost roll-up) // --------------------------------------------------------------------------- export interface LlmUsageSummary { total_requests: number | null total_input_tokens: number | null total_output_tokens: number | null total_tokens: number | null total_cost_cents: number | null avg_latency_ms: number | null } export async function getUsageSummary( arcadia: ArcadiaClient, opts: { days?: number } = {}, ): Promise { const params: Record = {} if (opts.days != null) params.days = opts.days const res = await arcadia.GET<{ data: LlmUsageSummary } | LlmUsageSummary>( "/api/v1/ai/llm/usage/summary", { params }, ) return "data" in (res as object) ? (res as { data: LlmUsageSummary }).data : (res as LlmUsageSummary) } export interface UsageByModelRow { provider: string model: string requests: number total_tokens: number cost_cents: number } export async function getUsageByModel( arcadia: ArcadiaClient, opts: { days?: number } = {}, ): Promise { const params: Record = {} if (opts.days != null) params.days = opts.days const res = await arcadia.GET<{ data: UsageByModelRow[] } | UsageByModelRow[]>( "/api/v1/ai/llm/usage/by-model", { params }, ) return "data" in (res as object) ? (res as { data: UsageByModelRow[] }).data : (res as UsageByModelRow[]) } /** Find the spend row matching a given config's (provider, model). */ export function findSpend( rows: UsageByModelRow[], config: Pick, ): UsageByModelRow | undefined { return rows.find((r) => r.provider === config.provider && r.model === config.model) } // --------------------------------------------------------------------------- // Active reasoning_effort (shared between settings panel and /ai composer) // // Stored under crema.ai.reasoning. Written when the operator stars a config // in the settings panel (so the chip on /ai inherits that config's default // on next mount) and when the operator cycles the THINK chip on /ai (per- // conversation override). Wiped on Clear conversation. // --------------------------------------------------------------------------- const ACTIVE_REASONING_KEY = "crema.ai.reasoning" const ACTIVE_REASONING_EVENT = "crema:ai-reasoning-change" export function loadActiveReasoning(): ReasoningEffort { if (typeof window === "undefined") return "off" const v = localStorage.getItem(ACTIVE_REASONING_KEY) as ReasoningEffort | null return v && REASONING_EFFORTS.includes(v) ? v : "off" } export function saveActiveReasoning(v: ReasoningEffort): void { if (typeof window === "undefined") return if (v === "off") localStorage.removeItem(ACTIVE_REASONING_KEY) else localStorage.setItem(ACTIVE_REASONING_KEY, v) window.dispatchEvent(new CustomEvent(ACTIVE_REASONING_EVENT, { detail: v })) } export function subscribeActiveReasoning( listener: (v: ReasoningEffort) => void, ): () => void { if (typeof window === "undefined") return () => {} const onChange = (e: Event) => { const detail = (e as CustomEvent).detail if (detail) listener(detail) else listener(loadActiveReasoning()) } // Same-tab via the custom event; cross-tab via the storage event. const onStorage = (e: StorageEvent) => { if (e.key === ACTIVE_REASONING_KEY) listener(loadActiveReasoning()) } window.addEventListener(ACTIVE_REASONING_EVENT, onChange) window.addEventListener("storage", onStorage) return () => { window.removeEventListener(ACTIVE_REASONING_EVENT, onChange) window.removeEventListener("storage", onStorage) } }