Files
arcadia-admin/app/lib/arcadia/llm-configs.ts
jules c379ebc37a ai: per-config reasoning_effort + composer THINK chip
Two layers for thinking-mode control:

1. Per-config default (Settings → LLM)
   New "Reasoning effort" Select in the Add/Edit dialog with
   off/low/medium/high/max + a budget hint per option (~2k, ~8k,
   ~24k, ~64k thinking tokens). Saved row meta line surfaces the
   level inline so it's visible without opening the editor.

2. Per-message override (composer chip)
   New ReasoningChip next to the model picker. Click cycles through
   the same five levels. Hidden chrome when off (muted "think" pill);
   sodium-amber active style with the level label when set.

   Persisted to crema.ai.reasoning so a refresh keeps the operator's
   intent, wiped together with the conversation on Clear.

When sending, withReasoning() merges reasoning_effort into the request
body as a top-level field. The proxy forwards it untouched to OpenAI /
DeepSeek (native field) and translates to Anthropic's thinking block
server-side.

reasoningEffortRef sidesteps a useCallback ordering issue —
regenerateLast/continueLast are declared before the state hook, so
they read the ref instead of a stale closure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 20:15:13 +10:00

201 lines
6.3 KiB
TypeScript

// Arcadia LLM configurations API.
//
// Backed by /api/v1/admin/llm-configurations — server-side persisted
// provider/model/secret/cost settings. Replaces the localStorage-driven
// settings the admin UI used previously, so configurations and costs
// survive across browsers and operators.
//
// `tenant_id: null` configurations are platform-defaults visible to
// every tenant. Names are unique within (tenant, name).
import type { ArcadiaClient } from "@crema/arcadia-client"
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
/**
* Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
* `reasoning_effort` natively). Translated server-side into Anthropic's
* thinking block. `off` (or null) skips the field entirely.
*/
export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
export const REASONING_EFFORTS: ReasoningEffort[] = [
"off",
"low",
"medium",
"high",
"max",
]
export interface LlmConfiguration {
id: string
tenant_id: string | null
name: string
provider: LlmProvider
model: string
base_url: string | null
secret_name: string | null
input_cost_per_million: number | null
output_cost_per_million: number | null
enabled: boolean
reasoning_effort: ReasoningEffort | null
metadata: Record<string, unknown>
inserted_at: string
updated_at: string
}
export interface LlmConfigurationInput {
tenant_id?: string | null
name: string
provider: LlmProvider
model: string
base_url?: string | null
secret_name?: string | null
/** USD per 1M tokens. Omit to auto-fill from the catalog. */
input_cost_per_million?: number | null
output_cost_per_million?: number | null
enabled?: boolean
reasoning_effort?: ReasoningEffort | null
metadata?: Record<string, unknown>
}
export interface CatalogEntry {
provider: LlmProvider
model: string
input_cost_per_million: number
output_cost_per_million: number
context_window: number | null
notes: string | null
}
const BASE = "/api/v1/admin/llm-configurations"
export async function listConfigurations(
arcadia: ArcadiaClient,
opts: { enabled?: boolean; tenant_id?: string } = {},
): Promise<LlmConfiguration[]> {
const params: Record<string, string | number | boolean | null | undefined> = {}
if (opts.enabled != null) params.enabled = String(opts.enabled)
if (opts.tenant_id) params.tenant_id = opts.tenant_id
const res = await arcadia.GET<{ data: LlmConfiguration[] }>(BASE, { params })
return res.data
}
export async function getConfiguration(
arcadia: ArcadiaClient,
id: string,
): Promise<LlmConfiguration> {
const res = await arcadia.GET<{ data: LlmConfiguration }>(`${BASE}/${id}`)
return res.data
}
export async function createConfiguration(
arcadia: ArcadiaClient,
input: LlmConfigurationInput,
): Promise<LlmConfiguration> {
const res = await arcadia.POST<{ data: LlmConfiguration }>(BASE, {
body: { configuration: input },
})
return res.data
}
export async function updateConfiguration(
arcadia: ArcadiaClient,
id: string,
input: Partial<LlmConfigurationInput>,
): Promise<LlmConfiguration> {
const res = await arcadia.PATCH<{ data: LlmConfiguration }>(`${BASE}/${id}`, {
body: { configuration: input },
})
return res.data
}
export async function deleteConfiguration(
arcadia: ArcadiaClient,
id: string,
): Promise<void> {
await arcadia.DELETE(`${BASE}/${id}`)
}
export async function getCatalog(arcadia: ArcadiaClient): Promise<CatalogEntry[]> {
const res = await arcadia.GET<{ data: CatalogEntry[] }>(`${BASE}/catalog`)
return res.data
}
/**
* Compute cost in cents for a given input/output token count using a
* configuration's published rates. Mirrors `LlmConfiguration.compute_cost_cents/3`
* in arcadia-app — keep in sync.
*/
export function computeCostCents(
config: Pick<LlmConfiguration, "input_cost_per_million" | "output_cost_per_million">,
inputTokens: number,
outputTokens: number,
): number {
const inRate = config.input_cost_per_million ?? 0
const outRate = config.output_cost_per_million ?? 0
const cents = ((inputTokens * inRate + outputTokens * outRate) / 1_000_000) * 100
return Math.round(cents)
}
/** Format a cost in cents as "$X.XX" or "$0.0XX" for sub-dollar amounts. */
export function formatCost(cents: number): string {
if (cents === 0) return "$0"
if (cents < 100) return `$${(cents / 100).toFixed(2)}`
return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`
}
// ---------------------------------------------------------------------------
// LLM usage summary (cost roll-up)
// ---------------------------------------------------------------------------
export interface LlmUsageSummary {
total_requests: number | null
total_input_tokens: number | null
total_output_tokens: number | null
total_tokens: number | null
total_cost_cents: number | null
avg_latency_ms: number | null
}
export async function getUsageSummary(
arcadia: ArcadiaClient,
opts: { days?: number } = {},
): Promise<LlmUsageSummary> {
const params: Record<string, string | number | boolean | null | undefined> = {}
if (opts.days != null) params.days = opts.days
const res = await arcadia.GET<{ data: LlmUsageSummary } | LlmUsageSummary>(
"/api/v1/ai/llm/usage/summary",
{ params },
)
return "data" in (res as object) ? (res as { data: LlmUsageSummary }).data : (res as LlmUsageSummary)
}
export interface UsageByModelRow {
provider: string
model: string
requests: number
total_tokens: number
cost_cents: number
}
export async function getUsageByModel(
arcadia: ArcadiaClient,
opts: { days?: number } = {},
): Promise<UsageByModelRow[]> {
const params: Record<string, string | number | boolean | null | undefined> = {}
if (opts.days != null) params.days = opts.days
const res = await arcadia.GET<{ data: UsageByModelRow[] } | UsageByModelRow[]>(
"/api/v1/ai/llm/usage/by-model",
{ params },
)
return "data" in (res as object) ? (res as { data: UsageByModelRow[] }).data : (res as UsageByModelRow[])
}
/** Find the spend row matching a given config's (provider, model). */
export function findSpend(
rows: UsageByModelRow[],
config: Pick<LlmConfiguration, "provider" | "model">,
): UsageByModelRow | undefined {
return rows.find((r) => r.provider === config.provider && r.model === config.model)
}