ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -19,6 +19,7 @@ import {
  Plus,
  RefreshCw,
  RotateCcw,
+  Sparkles,
  Square,
  Trash2,
  Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
  localStorage.removeItem(LIVE_KEY)
 }

+/* Per-conversation reasoning override. Cycle order matters — the composer
+ * chip walks this array. */
+type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
+const REASONING_KEY = "crema.ai.reasoning"
+
+function loadReasoning(): ReasoningEffort {
+  if (typeof window === "undefined") return "off"
+  const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
+  return v && REASONING_LEVELS.includes(v) ? v : "off"
+}
+function saveReasoning(v: ReasoningEffort) {
+  if (typeof window === "undefined") return
+  if (v === "off") localStorage.removeItem(REASONING_KEY)
+  else localStorage.setItem(REASONING_KEY, v)
+}
+function withReasoning<T extends Record<string, unknown>>(
+  extras: T,
+  effort: ReasoningEffort,
+): T & { reasoning_effort?: string } {
+  if (effort === "off") return extras
+  return { ...extras, reasoning_effort: effort }
+}
+
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
  if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
    setMessages([])
    setAgentHistory(new Map())
    setMessageAgents(new Map())
+    setReasoningEffort("off")
  }, [setMessages])

  // Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
  const toolIterationsRef = useRef(0)
  const processedTurnRef = useRef(-1)
  const prevStreamingRef = useRef(isStreaming)
+  // Mirror of reasoningEffort state, kept current via the effect below so
+  // regenerate/continue callbacks (declared before the state hook) can
+  // read the latest value without becoming reasoningEffort dependents.
+  const reasoningEffortRef = useRef<ReasoningEffort>("off")

  // Maintain agent-history. Two triggers:
  //   1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
    const text = messages[lastUserIdx].content
    setMessages(messages.slice(0, lastUserIdx))
    // Defer so the state flush completes before send() reads `messages`.
-    setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
+    setTimeout(
+      () => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
+      0,
+    )
  }, [messages, setMessages, send, isStreaming])

  const continueLast = useCallback(() => {
    if (isStreaming || messages.length === 0) return
-    void send("Please continue your previous reply.", { tools: getOpenAITools() })
+    void send(
+      "Please continue your previous reply.",
+      withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
+    )
  }, [isStreaming, messages.length, send])

  const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
    endRef.current?.scrollIntoView({ block: "end" })
  }, [messages.length, lastContent, isStreaming])

+  // Per-conversation reasoning override. Persists across page reloads via
+  // localStorage so the operator's chosen level survives a refresh, but
+  // resets when they clear the conversation. "off" = pass nothing through.
+  const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
+    () => loadReasoning(),
+  )
+  useEffect(() => {
+    saveReasoning(reasoningEffort)
+    reasoningEffortRef.current = reasoningEffort
+  }, [reasoningEffort])
+
+  const cycleReasoning = useCallback(() => {
+    setReasoningEffort((cur) => {
+      const idx = REASONING_LEVELS.indexOf(cur)
+      return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
+    })
+  }, [])
+
  const submit = useCallback(() => {
    const text = input.trim()
    if (!text || isStreaming) return
    setInput("")
    stickRef.current = true
-    void send(text, { tools: getOpenAITools() })
-  }, [input, isStreaming, send])
+    void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
+  }, [input, isStreaming, send, reasoningEffort])

  const isEmpty = messages.length === 0

@@ -1052,6 +1106,8 @@ function ChatSurface({
            isMock={isMock}
            isCompacting={compacting}
            placeholder={isEmpty ? "Ask anything…" : "Reply…"}
+            reasoning={reasoningEffort}
+            onCycleReasoning={cycleReasoning}
          />
          {showPromptOpen && (
            <SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
  isMock,
  isCompacting,
  placeholder,
+  reasoning,
+  onCycleReasoning,
 }: {
  value: string
  onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
  isMock: boolean
  isCompacting: boolean
  placeholder: string
+  reasoning: ReasoningEffort
+  onCycleReasoning: () => void
 }) {
  const taRef = useRef<HTMLTextAreaElement | null>(null)

@@ -1410,6 +1470,7 @@ function Composer({
              model={model}
              onModelChange={onModelChange}
            />
+            <ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
            <VoiceInputButton
              onTranscript={(t) => onChange((value ? value + " " : "") + t)}
            />
@@ -1470,6 +1531,49 @@ function ModelSelector({
  )
 }

+/**
+ * Reasoning-effort chip for the composer. Click cycles off → low → medium →
+ * high → max → off. When non-off, the next send includes
+ * `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
+ * natively and translates to Anthropic's thinking block server-side.
+ *
+ * Visually: hidden when off (no chrome clutter for the common case),
+ * surfaces as a sodium-amber pill when set.
+ */
+function ReasoningChip({
+  value,
+  onCycle,
+}: {
+  value: ReasoningEffort
+  onCycle: () => void
+}) {
+  const active = value !== "off"
+  return (
+    <button
+      type="button"
+      onClick={onCycle}
+      data-action="ai-reasoning"
+      title={
+        active
+          ? `Reasoning: ${value}. Click to cycle.`
+          : "Reasoning: off. Click to enable thinking mode."
+      }
+      className={[
+        "inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
+        active
+          ? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
+          : "text-muted-foreground hover:bg-accent hover:text-foreground",
+      ].join(" ")}
+    >
+      <Sparkles className="size-3" />
+      <span className="select-none">
+        think
+        {active ? <span className="ml-1 font-semibold">{value}</span> : null}
+      </span>
+    </button>
+  )
+}
+
 function AgentChip({
  agents,
  activeAgent,