ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions
--- a/app/components/settings/llm-configurations-panel.tsx
+++ b/app/components/settings/llm-configurations-panel.tsx
@@ -51,12 +51,14 @@ import {
  getUsageByModel,
  getUsageSummary,
  listConfigurations,
+  REASONING_EFFORTS,
  updateConfiguration,
  type CatalogEntry,
  type LlmConfiguration,
  type LlmConfigurationInput,
  type LlmProvider,
  type LlmUsageSummary,
+  type ReasoningEffort,
  type UsageByModelRow,
 } from "~/lib/arcadia/llm-configs"
 import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
          <span className="text-[11px] text-muted-foreground">
            {formatRate(c.input_cost_per_million)}/1M in ·{" "}
            {formatRate(c.output_cost_per_million)}/1M out
+            {c.reasoning_effort && c.reasoning_effort !== "off" ? (
+              <>
+                {" "}
+                · <span className="uppercase tracking-wider">think</span>{" "}
+                <span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
+                  {c.reasoning_effort}
+                </span>
+              </>
+            ) : null}
          </span>
        </div>
      </div>
@@ -480,6 +491,7 @@ function ConfigDialog({
          input_cost_per_million: existing.input_cost_per_million,
          output_cost_per_million: existing.output_cost_per_million,
          enabled: existing.enabled,
+          reasoning_effort: existing.reasoning_effort,
        }
      : emptyDraft(),
  )
@@ -612,6 +624,34 @@ function ConfigDialog({
              placeholder="0.60"
            />
          </Field>
+
+          <Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
+            <Select
+              value={draft.reasoning_effort ?? "off"}
+              onValueChange={(v) =>
+                setDraft({
+                  ...draft,
+                  reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
+                })
+              }
+            >
+              <SelectTrigger>
+                <SelectValue />
+              </SelectTrigger>
+              <SelectContent>
+                {REASONING_EFFORTS.map((e) => (
+                  <SelectItem key={e} value={e}>
+                    <span className="flex items-center justify-between gap-3">
+                      <span className="capitalize">{e}</span>
+                      <span className="text-[10px] text-muted-foreground">
+                        {reasoningHint(e)}
+                      </span>
+                    </span>
+                  </SelectItem>
+                ))}
+              </SelectContent>
+            </Select>
+          </Field>
        </div>

        {err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
  if (rate === 0) return "free"
  return `$${rate.toFixed(2)}`
 }
+
+function reasoningHint(e: ReasoningEffort): string {
+  switch (e) {
+    case "off":
+      return "no thinking"
+    case "low":
+      return "~2k thinking tokens"
+    case "medium":
+      return "~8k thinking tokens"
+    case "high":
+      return "~24k thinking tokens"
+    case "max":
+      return "~64k — slowest, most thorough"
+  }
+}
--- a/app/lib/arcadia/llm-configs.ts
+++ b/app/lib/arcadia/llm-configs.ts
@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"

 export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"

+/**
+ * Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
+ * `reasoning_effort` natively). Translated server-side into Anthropic's
+ * thinking block. `off` (or null) skips the field entirely.
+ */
+export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+export const REASONING_EFFORTS: ReasoningEffort[] = [
+  "off",
+  "low",
+  "medium",
+  "high",
+  "max",
+]
+
 export interface LlmConfiguration {
  id: string
  tenant_id: string | null
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
  input_cost_per_million: number | null
  output_cost_per_million: number | null
  enabled: boolean
+  reasoning_effort: ReasoningEffort | null
  metadata: Record<string, unknown>
  inserted_at: string
  updated_at: string
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
  input_cost_per_million?: number | null
  output_cost_per_million?: number | null
  enabled?: boolean
+  reasoning_effort?: ReasoningEffort | null
  metadata?: Record<string, unknown>
 }

--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -19,6 +19,7 @@ import {
  Plus,
  RefreshCw,
  RotateCcw,
+  Sparkles,
  Square,
  Trash2,
  Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
  localStorage.removeItem(LIVE_KEY)
 }

+/* Per-conversation reasoning override. Cycle order matters — the composer
+ * chip walks this array. */
+type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
+const REASONING_KEY = "crema.ai.reasoning"
+
+function loadReasoning(): ReasoningEffort {
+  if (typeof window === "undefined") return "off"
+  const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
+  return v && REASONING_LEVELS.includes(v) ? v : "off"
+}
+function saveReasoning(v: ReasoningEffort) {
+  if (typeof window === "undefined") return
+  if (v === "off") localStorage.removeItem(REASONING_KEY)
+  else localStorage.setItem(REASONING_KEY, v)
+}
+function withReasoning<T extends Record<string, unknown>>(
+  extras: T,
+  effort: ReasoningEffort,
+): T & { reasoning_effort?: string } {
+  if (effort === "off") return extras
+  return { ...extras, reasoning_effort: effort }
+}
+
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
  if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
    setMessages([])
    setAgentHistory(new Map())
    setMessageAgents(new Map())
+    setReasoningEffort("off")
  }, [setMessages])

  // Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
  const toolIterationsRef = useRef(0)
  const processedTurnRef = useRef(-1)
  const prevStreamingRef = useRef(isStreaming)
+  // Mirror of reasoningEffort state, kept current via the effect below so
+  // regenerate/continue callbacks (declared before the state hook) can
+  // read the latest value without becoming reasoningEffort dependents.
+  const reasoningEffortRef = useRef<ReasoningEffort>("off")

  // Maintain agent-history. Two triggers:
  //   1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
    const text = messages[lastUserIdx].content
    setMessages(messages.slice(0, lastUserIdx))
    // Defer so the state flush completes before send() reads `messages`.
-    setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
+    setTimeout(
+      () => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
+      0,
+    )
  }, [messages, setMessages, send, isStreaming])

  const continueLast = useCallback(() => {
    if (isStreaming || messages.length === 0) return
-    void send("Please continue your previous reply.", { tools: getOpenAITools() })
+    void send(
+      "Please continue your previous reply.",
+      withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
+    )
  }, [isStreaming, messages.length, send])

  const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
    endRef.current?.scrollIntoView({ block: "end" })
  }, [messages.length, lastContent, isStreaming])

+  // Per-conversation reasoning override. Persists across page reloads via
+  // localStorage so the operator's chosen level survives a refresh, but
+  // resets when they clear the conversation. "off" = pass nothing through.
+  const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
+    () => loadReasoning(),
+  )
+  useEffect(() => {
+    saveReasoning(reasoningEffort)
+    reasoningEffortRef.current = reasoningEffort
+  }, [reasoningEffort])
+
+  const cycleReasoning = useCallback(() => {
+    setReasoningEffort((cur) => {
+      const idx = REASONING_LEVELS.indexOf(cur)
+      return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
+    })
+  }, [])
+
  const submit = useCallback(() => {
    const text = input.trim()
    if (!text || isStreaming) return
    setInput("")
    stickRef.current = true
-    void send(text, { tools: getOpenAITools() })
-  }, [input, isStreaming, send])
+    void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
+  }, [input, isStreaming, send, reasoningEffort])

  const isEmpty = messages.length === 0

@@ -1052,6 +1106,8 @@ function ChatSurface({
            isMock={isMock}
            isCompacting={compacting}
            placeholder={isEmpty ? "Ask anything…" : "Reply…"}
+            reasoning={reasoningEffort}
+            onCycleReasoning={cycleReasoning}
          />
          {showPromptOpen && (
            <SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
  isMock,
  isCompacting,
  placeholder,
+  reasoning,
+  onCycleReasoning,
 }: {
  value: string
  onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
  isMock: boolean
  isCompacting: boolean
  placeholder: string
+  reasoning: ReasoningEffort
+  onCycleReasoning: () => void
 }) {
  const taRef = useRef<HTMLTextAreaElement | null>(null)

@@ -1410,6 +1470,7 @@ function Composer({
              model={model}
              onModelChange={onModelChange}
            />
+            <ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
            <VoiceInputButton
              onTranscript={(t) => onChange((value ? value + " " : "") + t)}
            />
@@ -1470,6 +1531,49 @@ function ModelSelector({
  )
 }

+/**
+ * Reasoning-effort chip for the composer. Click cycles off → low → medium →
+ * high → max → off. When non-off, the next send includes
+ * `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
+ * natively and translates to Anthropic's thinking block server-side.
+ *
+ * Visually: hidden when off (no chrome clutter for the common case),
+ * surfaces as a sodium-amber pill when set.
+ */
+function ReasoningChip({
+  value,
+  onCycle,
+}: {
+  value: ReasoningEffort
+  onCycle: () => void
+}) {
+  const active = value !== "off"
+  return (
+    <button
+      type="button"
+      onClick={onCycle}
+      data-action="ai-reasoning"
+      title={
+        active
+          ? `Reasoning: ${value}. Click to cycle.`
+          : "Reasoning: off. Click to enable thinking mode."
+      }
+      className={[
+        "inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
+        active
+          ? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
+          : "text-muted-foreground hover:bg-accent hover:text-foreground",
+      ].join(" ")}
+    >
+      <Sparkles className="size-3" />
+      <span className="select-none">
+        think
+        {active ? <span className="ml-1 font-semibold">{value}</span> : null}
+      </span>
+    </button>
+  )
+}
+
 function AgentChip({
  agents,
  activeAgent,