From c379ebc37abde1fbb5895ae1f3cdb233ea62ca7f Mon Sep 17 00:00:00 2001 From: jules Date: Sat, 2 May 2026 20:15:13 +1000 Subject: [PATCH] ai: per-config reasoning_effort + composer THINK chip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../settings/llm-configurations-panel.tsx | 55 +++++++++ app/lib/arcadia/llm-configs.ts | 16 +++ app/routes/ai.tsx | 112 +++++++++++++++++- 3 files changed, 179 insertions(+), 4 deletions(-) diff --git a/app/components/settings/llm-configurations-panel.tsx b/app/components/settings/llm-configurations-panel.tsx index 3c85310..cc85200 100644 --- a/app/components/settings/llm-configurations-panel.tsx +++ b/app/components/settings/llm-configurations-panel.tsx @@ -51,12 +51,14 @@ import { getUsageByModel, getUsageSummary, listConfigurations, + REASONING_EFFORTS, updateConfiguration, type CatalogEntry, type LlmConfiguration, type LlmConfigurationInput, type LlmProvider, type LlmUsageSummary, + type ReasoningEffort, type UsageByModelRow, } from "~/lib/arcadia/llm-configs" import { listSecrets, type Secret } from "~/lib/arcadia/secrets" @@ -401,6 +403,15 @@ function ConfigRow({ {formatRate(c.input_cost_per_million)}/1M in ·{" "} {formatRate(c.output_cost_per_million)}/1M out + {c.reasoning_effort && c.reasoning_effort !== "off" ? ( + <> + {" "} + · think{" "} + + {c.reasoning_effort} + + + ) : null} @@ -480,6 +491,7 @@ function ConfigDialog({ input_cost_per_million: existing.input_cost_per_million, output_cost_per_million: existing.output_cost_per_million, enabled: existing.enabled, + reasoning_effort: existing.reasoning_effort, } : emptyDraft(), ) @@ -612,6 +624,34 @@ function ConfigDialog({ placeholder="0.60" /> + + + + {err ? ( @@ -843,3 +883,18 @@ function formatRate(rate: number | null): string { if (rate === 0) return "free" return `$${rate.toFixed(2)}` } + +function reasoningHint(e: ReasoningEffort): string { + switch (e) { + case "off": + return "no thinking" + case "low": + return "~2k thinking tokens" + case "medium": + return "~8k thinking tokens" + case "high": + return "~24k thinking tokens" + case "max": + return "~64k — slowest, most thorough" + } +} diff --git a/app/lib/arcadia/llm-configs.ts b/app/lib/arcadia/llm-configs.ts index 4f7f98b..9fd8e14 100644 --- a/app/lib/arcadia/llm-configs.ts +++ b/app/lib/arcadia/llm-configs.ts @@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client" export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio" +/** + * Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take + * `reasoning_effort` natively). Translated server-side into Anthropic's + * thinking block. `off` (or null) skips the field entirely. + */ +export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max" +export const REASONING_EFFORTS: ReasoningEffort[] = [ + "off", + "low", + "medium", + "high", + "max", +] + export interface LlmConfiguration { id: string tenant_id: string | null @@ -23,6 +37,7 @@ export interface LlmConfiguration { input_cost_per_million: number | null output_cost_per_million: number | null enabled: boolean + reasoning_effort: ReasoningEffort | null metadata: Record inserted_at: string updated_at: string @@ -39,6 +54,7 @@ export interface LlmConfigurationInput { input_cost_per_million?: number | null output_cost_per_million?: number | null enabled?: boolean + reasoning_effort?: ReasoningEffort | null metadata?: Record } diff --git a/app/routes/ai.tsx b/app/routes/ai.tsx index f1db217..7965a22 100644 --- a/app/routes/ai.tsx +++ b/app/routes/ai.tsx @@ -19,6 +19,7 @@ import { Plus, RefreshCw, RotateCcw, + Sparkles, Square, Trash2, Undo2, @@ -179,6 +180,30 @@ function clearLive() { localStorage.removeItem(LIVE_KEY) } +/* Per-conversation reasoning override. Cycle order matters — the composer + * chip walks this array. */ +type ReasoningEffort = "off" | "low" | "medium" | "high" | "max" +const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"] +const REASONING_KEY = "crema.ai.reasoning" + +function loadReasoning(): ReasoningEffort { + if (typeof window === "undefined") return "off" + const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null + return v && REASONING_LEVELS.includes(v) ? v : "off" +} +function saveReasoning(v: ReasoningEffort) { + if (typeof window === "undefined") return + if (v === "off") localStorage.removeItem(REASONING_KEY) + else localStorage.setItem(REASONING_KEY, v) +} +function withReasoning>( + extras: T, + effort: ReasoningEffort, +): T & { reasoning_effort?: string } { + if (effort === "off") return extras + return { ...extras, reasoning_effort: effort } +} + type StoredMessage = { role: "user" | "assistant"; content: string } function loadAISnapshot(): StoredMessage[] | null { if (typeof window === "undefined") return null @@ -512,6 +537,7 @@ function ChatSurface({ setMessages([]) setAgentHistory(new Map()) setMessageAgents(new Map()) + setReasoningEffort("off") }, [setMessages]) // Auto tool-loop using native function calls. Reads run automatically; @@ -520,6 +546,10 @@ function ChatSurface({ const toolIterationsRef = useRef(0) const processedTurnRef = useRef(-1) const prevStreamingRef = useRef(isStreaming) + // Mirror of reasoningEffort state, kept current via the effect below so + // regenerate/continue callbacks (declared before the state hook) can + // read the latest value without becoming reasoningEffort dependents. + const reasoningEffortRef = useRef("off") // Maintain agent-history. Two triggers: // 1. When a turn finishes streaming and at least one user/assistant @@ -747,12 +777,18 @@ function ChatSurface({ const text = messages[lastUserIdx].content setMessages(messages.slice(0, lastUserIdx)) // Defer so the state flush completes before send() reads `messages`. - setTimeout(() => void send(text, { tools: getOpenAITools() }), 0) + setTimeout( + () => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)), + 0, + ) }, [messages, setMessages, send, isStreaming]) const continueLast = useCallback(() => { if (isStreaming || messages.length === 0) return - void send("Please continue your previous reply.", { tools: getOpenAITools() }) + void send( + "Please continue your previous reply.", + withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current), + ) }, [isStreaming, messages.length, send]) const compactConversation = useCallback(async () => { @@ -834,13 +870,31 @@ function ChatSurface({ endRef.current?.scrollIntoView({ block: "end" }) }, [messages.length, lastContent, isStreaming]) + // Per-conversation reasoning override. Persists across page reloads via + // localStorage so the operator's chosen level survives a refresh, but + // resets when they clear the conversation. "off" = pass nothing through. + const [reasoningEffort, setReasoningEffort] = useState( + () => loadReasoning(), + ) + useEffect(() => { + saveReasoning(reasoningEffort) + reasoningEffortRef.current = reasoningEffort + }, [reasoningEffort]) + + const cycleReasoning = useCallback(() => { + setReasoningEffort((cur) => { + const idx = REASONING_LEVELS.indexOf(cur) + return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length] + }) + }, []) + const submit = useCallback(() => { const text = input.trim() if (!text || isStreaming) return setInput("") stickRef.current = true - void send(text, { tools: getOpenAITools() }) - }, [input, isStreaming, send]) + void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort)) + }, [input, isStreaming, send, reasoningEffort]) const isEmpty = messages.length === 0 @@ -1052,6 +1106,8 @@ function ChatSurface({ isMock={isMock} isCompacting={compacting} placeholder={isEmpty ? "Ask anything…" : "Reply…"} + reasoning={reasoningEffort} + onCycleReasoning={cycleReasoning} /> {showPromptOpen && ( void @@ -1331,6 +1389,8 @@ function Composer({ isMock: boolean isCompacting: boolean placeholder: string + reasoning: ReasoningEffort + onCycleReasoning: () => void }) { const taRef = useRef(null) @@ -1410,6 +1470,7 @@ function Composer({ model={model} onModelChange={onModelChange} /> + onChange((value ? value + " " : "") + t)} /> @@ -1470,6 +1531,49 @@ function ModelSelector({ ) } +/** + * Reasoning-effort chip for the composer. Click cycles off → low → medium → + * high → max → off. When non-off, the next send includes + * `reasoning_effort: ` which the proxy passes to OpenAI/DeepSeek + * natively and translates to Anthropic's thinking block server-side. + * + * Visually: hidden when off (no chrome clutter for the common case), + * surfaces as a sodium-amber pill when set. + */ +function ReasoningChip({ + value, + onCycle, +}: { + value: ReasoningEffort + onCycle: () => void +}) { + const active = value !== "off" + return ( + + ) +} + function AgentChip({ agents, activeAgent,