From c379ebc37abde1fbb5895ae1f3cdb233ea62ca7f Mon Sep 17 00:00:00 2001
From: jules <cloudtech@juleslive.net>
Date: Sat, 2 May 2026 20:15:13 +1000
Subject: [PATCH] ai: per-config reasoning_effort + composer THINK chip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two layers for thinking-mode control:

1. Per-config default (Settings → LLM)
   New "Reasoning effort" Select in the Add/Edit dialog with
   off/low/medium/high/max + a budget hint per option (~2k, ~8k,
   ~24k, ~64k thinking tokens). Saved row meta line surfaces the
   level inline so it's visible without opening the editor.

2. Per-message override (composer chip)
   New ReasoningChip next to the model picker. Click cycles through
   the same five levels. Hidden chrome when off (muted "think" pill);
   sodium-amber active style with the level label when set.

   Persisted to crema.ai.reasoning so a refresh keeps the operator's
   intent, wiped together with the conversation on Clear.

When sending, withReasoning() merges reasoning_effort into the request
body as a top-level field. The proxy forwards it untouched to OpenAI /
DeepSeek (native field) and translates to Anthropic's thinking block
server-side.

reasoningEffortRef sidesteps a useCallback ordering issue —
regenerateLast/continueLast are declared before the state hook, so
they read the ref instead of a stale closure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../settings/llm-configurations-panel.tsx     |  55 +++++++++
 app/lib/arcadia/llm-configs.ts                |  16 +++
 app/routes/ai.tsx                             | 112 +++++++++++++++++-
 3 files changed, 179 insertions(+), 4 deletions(-)
diff --git a/app/components/settings/llm-configurations-panel.tsx b/app/components/settings/llm-configurations-panel.tsx
index 3c85310..cc85200 100644
--- a/app/components/settings/llm-configurations-panel.tsx
+++ b/app/components/settings/llm-configurations-panel.tsx
@@ -51,12 +51,14 @@ import {
   getUsageByModel,
   getUsageSummary,
   listConfigurations,
+  REASONING_EFFORTS,
   updateConfiguration,
   type CatalogEntry,
   type LlmConfiguration,
   type LlmConfigurationInput,
   type LlmProvider,
   type LlmUsageSummary,
+  type ReasoningEffort,
   type UsageByModelRow,
 } from "~/lib/arcadia/llm-configs"
 import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
           <span className="text-[11px] text-muted-foreground">
             {formatRate(c.input_cost_per_million)}/1M in ·{" "}
             {formatRate(c.output_cost_per_million)}/1M out
+            {c.reasoning_effort && c.reasoning_effort !== "off" ? (
+              <>
+                {" "}
+                · <span className="uppercase tracking-wider">think</span>{" "}
+                <span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
+                  {c.reasoning_effort}
+                </span>
+              </>
+            ) : null}
           </span>
         </div>
       </div>
@@ -480,6 +491,7 @@ function ConfigDialog({
           input_cost_per_million: existing.input_cost_per_million,
           output_cost_per_million: existing.output_cost_per_million,
           enabled: existing.enabled,
+          reasoning_effort: existing.reasoning_effort,
         }
       : emptyDraft(),
   )
@@ -612,6 +624,34 @@ function ConfigDialog({
               placeholder="0.60"
             />
           </Field>
+
+          <Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
+            <Select
+              value={draft.reasoning_effort ?? "off"}
+              onValueChange={(v) =>
+                setDraft({
+                  ...draft,
+                  reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
+                })
+              }
+            >
+              <SelectTrigger>
+                <SelectValue />
+              </SelectTrigger>
+              <SelectContent>
+                {REASONING_EFFORTS.map((e) => (
+                  <SelectItem key={e} value={e}>
+                    <span className="flex items-center justify-between gap-3">
+                      <span className="capitalize">{e}</span>
+                      <span className="text-[10px] text-muted-foreground">
+                        {reasoningHint(e)}
+                      </span>
+                    </span>
+                  </SelectItem>
+                ))}
+              </SelectContent>
+            </Select>
+          </Field>
         </div>
 
         {err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
   if (rate === 0) return "free"
   return `$${rate.toFixed(2)}`
 }
+
+function reasoningHint(e: ReasoningEffort): string {
+  switch (e) {
+    case "off":
+      return "no thinking"
+    case "low":
+      return "~2k thinking tokens"
+    case "medium":
+      return "~8k thinking tokens"
+    case "high":
+      return "~24k thinking tokens"
+    case "max":
+      return "~64k — slowest, most thorough"
+  }
+}
diff --git a/app/lib/arcadia/llm-configs.ts b/app/lib/arcadia/llm-configs.ts
index 4f7f98b..9fd8e14 100644
--- a/app/lib/arcadia/llm-configs.ts
+++ b/app/lib/arcadia/llm-configs.ts
@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
 
 export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
 
+/**
+ * Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
+ * `reasoning_effort` natively). Translated server-side into Anthropic's
+ * thinking block. `off` (or null) skips the field entirely.
+ */
+export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+export const REASONING_EFFORTS: ReasoningEffort[] = [
+  "off",
+  "low",
+  "medium",
+  "high",
+  "max",
+]
+
 export interface LlmConfiguration {
   id: string
   tenant_id: string | null
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
   input_cost_per_million: number | null
   output_cost_per_million: number | null
   enabled: boolean
+  reasoning_effort: ReasoningEffort | null
   metadata: Record<string, unknown>
   inserted_at: string
   updated_at: string
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
   input_cost_per_million?: number | null
   output_cost_per_million?: number | null
   enabled?: boolean
+  reasoning_effort?: ReasoningEffort | null
   metadata?: Record<string, unknown>
 }
 
diff --git a/app/routes/ai.tsx b/app/routes/ai.tsx
index f1db217..7965a22 100644
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -19,6 +19,7 @@ import {
   Plus,
   RefreshCw,
   RotateCcw,
+  Sparkles,
   Square,
   Trash2,
   Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
   localStorage.removeItem(LIVE_KEY)
 }
 
+/* Per-conversation reasoning override. Cycle order matters — the composer
+ * chip walks this array. */
+type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
+const REASONING_KEY = "crema.ai.reasoning"
+
+function loadReasoning(): ReasoningEffort {
+  if (typeof window === "undefined") return "off"
+  const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
+  return v && REASONING_LEVELS.includes(v) ? v : "off"
+}
+function saveReasoning(v: ReasoningEffort) {
+  if (typeof window === "undefined") return
+  if (v === "off") localStorage.removeItem(REASONING_KEY)
+  else localStorage.setItem(REASONING_KEY, v)
+}
+function withReasoning<T extends Record<string, unknown>>(
+  extras: T,
+  effort: ReasoningEffort,
+): T & { reasoning_effort?: string } {
+  if (effort === "off") return extras
+  return { ...extras, reasoning_effort: effort }
+}
+
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
   if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
     setMessages([])
     setAgentHistory(new Map())
     setMessageAgents(new Map())
+    setReasoningEffort("off")
   }, [setMessages])
 
   // Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
   const toolIterationsRef = useRef(0)
   const processedTurnRef = useRef(-1)
   const prevStreamingRef = useRef(isStreaming)
+  // Mirror of reasoningEffort state, kept current via the effect below so
+  // regenerate/continue callbacks (declared before the state hook) can
+  // read the latest value without becoming reasoningEffort dependents.
+  const reasoningEffortRef = useRef<ReasoningEffort>("off")
 
   // Maintain agent-history. Two triggers:
   //   1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
     const text = messages[lastUserIdx].content
     setMessages(messages.slice(0, lastUserIdx))
     // Defer so the state flush completes before send() reads `messages`.
-    setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
+    setTimeout(
+      () => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
+      0,
+    )
   }, [messages, setMessages, send, isStreaming])
 
   const continueLast = useCallback(() => {
     if (isStreaming || messages.length === 0) return
-    void send("Please continue your previous reply.", { tools: getOpenAITools() })
+    void send(
+      "Please continue your previous reply.",
+      withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
+    )
   }, [isStreaming, messages.length, send])
 
   const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
     endRef.current?.scrollIntoView({ block: "end" })
   }, [messages.length, lastContent, isStreaming])
 
+  // Per-conversation reasoning override. Persists across page reloads via
+  // localStorage so the operator's chosen level survives a refresh, but
+  // resets when they clear the conversation. "off" = pass nothing through.
+  const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
+    () => loadReasoning(),
+  )
+  useEffect(() => {
+    saveReasoning(reasoningEffort)
+    reasoningEffortRef.current = reasoningEffort
+  }, [reasoningEffort])
+
+  const cycleReasoning = useCallback(() => {
+    setReasoningEffort((cur) => {
+      const idx = REASONING_LEVELS.indexOf(cur)
+      return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
+    })
+  }, [])
+
   const submit = useCallback(() => {
     const text = input.trim()
     if (!text || isStreaming) return
     setInput("")
     stickRef.current = true
-    void send(text, { tools: getOpenAITools() })
-  }, [input, isStreaming, send])
+    void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
+  }, [input, isStreaming, send, reasoningEffort])
 
   const isEmpty = messages.length === 0
 
@@ -1052,6 +1106,8 @@ function ChatSurface({
             isMock={isMock}
             isCompacting={compacting}
             placeholder={isEmpty ? "Ask anything…" : "Reply…"}
+            reasoning={reasoningEffort}
+            onCycleReasoning={cycleReasoning}
           />
           {showPromptOpen && (
             <SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
   isMock,
   isCompacting,
   placeholder,
+  reasoning,
+  onCycleReasoning,
 }: {
   value: string
   onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
   isMock: boolean
   isCompacting: boolean
   placeholder: string
+  reasoning: ReasoningEffort
+  onCycleReasoning: () => void
 }) {
   const taRef = useRef<HTMLTextAreaElement | null>(null)
 
@@ -1410,6 +1470,7 @@ function Composer({
               model={model}
               onModelChange={onModelChange}
             />
+            <ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
             <VoiceInputButton
               onTranscript={(t) => onChange((value ? value + " " : "") + t)}
             />
@@ -1470,6 +1531,49 @@ function ModelSelector({
   )
 }
 
+/**
+ * Reasoning-effort chip for the composer. Click cycles off → low → medium →
+ * high → max → off. When non-off, the next send includes
+ * `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
+ * natively and translates to Anthropic's thinking block server-side.
+ *
+ * Visually: hidden when off (no chrome clutter for the common case),
+ * surfaces as a sodium-amber pill when set.
+ */
+function ReasoningChip({
+  value,
+  onCycle,
+}: {
+  value: ReasoningEffort
+  onCycle: () => void
+}) {
+  const active = value !== "off"
+  return (
+    <button
+      type="button"
+      onClick={onCycle}
+      data-action="ai-reasoning"
+      title={
+        active
+          ? `Reasoning: ${value}. Click to cycle.`
+          : "Reasoning: off. Click to enable thinking mode."
+      }
+      className={[
+        "inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
+        active
+          ? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
+          : "text-muted-foreground hover:bg-accent hover:text-foreground",
+      ].join(" ")}
+    >
+      <Sparkles className="size-3" />
+      <span className="select-none">
+        think
+        {active ? <span className="ml-1 font-semibold">{value}</span> : null}
+      </span>
+    </button>
+  )
+}
+
 function AgentChip({
   agents,
   activeAgent,