ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions
--- a/app/components/settings/llm-configurations-panel.tsx
+++ b/app/components/settings/llm-configurations-panel.tsx
@@ -51,12 +51,14 @@ import {
  getUsageByModel,
  getUsageSummary,
  listConfigurations,
  REASONING_EFFORTS,
  updateConfiguration,
  type CatalogEntry,
  type LlmConfiguration,
  type LlmConfigurationInput,
  type LlmProvider,
  type LlmUsageSummary,
  type ReasoningEffort,
  type UsageByModelRow,
 } from "~/lib/arcadia/llm-configs"
 import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
          <span className="text-[11px] text-muted-foreground">
            {formatRate(c.input_cost_per_million)}/1M in ·{" "}
            {formatRate(c.output_cost_per_million)}/1M out
            {c.reasoning_effort && c.reasoning_effort !== "off" ? (
              <>
                {" "}
                · <span className="uppercase tracking-wider">think</span>{" "}
                <span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
                  {c.reasoning_effort}
                </span>
              </>
            ) : null}
          </span>
        </div>
      </div>
@@ -480,6 +491,7 @@ function ConfigDialog({
          input_cost_per_million: existing.input_cost_per_million,
          output_cost_per_million: existing.output_cost_per_million,
          enabled: existing.enabled,
          reasoning_effort: existing.reasoning_effort,
        }
      : emptyDraft(),
  )
@@ -612,6 +624,34 @@ function ConfigDialog({
              placeholder="0.60"
            />
          </Field>
          <Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
            <Select
              value={draft.reasoning_effort ?? "off"}
              onValueChange={(v) =>
                setDraft({
                  ...draft,
                  reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
                })
              }
            >
              <SelectTrigger>
                <SelectValue />
              </SelectTrigger>
              <SelectContent>
                {REASONING_EFFORTS.map((e) => (
                  <SelectItem key={e} value={e}>
                    <span className="flex items-center justify-between gap-3">
                      <span className="capitalize">{e}</span>
                      <span className="text-[10px] text-muted-foreground">
                        {reasoningHint(e)}
                      </span>
                    </span>
                  </SelectItem>
                ))}
              </SelectContent>
            </Select>
          </Field>
        </div>
        {err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
  if (rate === 0) return "free"
  return `$${rate.toFixed(2)}`
 }
 function reasoningHint(e: ReasoningEffort): string {
  switch (e) {
    case "off":
      return "no thinking"
    case "low":
      return "~2k thinking tokens"
    case "medium":
      return "~8k thinking tokens"
    case "high":
      return "~24k thinking tokens"
    case "max":
      return "~64k — slowest, most thorough"
  }
 }
--- a/app/lib/arcadia/llm-configs.ts
+++ b/app/lib/arcadia/llm-configs.ts
@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
 export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
 /**
 * Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
 * `reasoning_effort` natively). Translated server-side into Anthropic's
 * thinking block. `off` (or null) skips the field entirely.
 */
 export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
 export const REASONING_EFFORTS: ReasoningEffort[] = [
  "off",
  "low",
  "medium",
  "high",
  "max",
 ]
 export interface LlmConfiguration {
  id: string
  tenant_id: string | null
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
  input_cost_per_million: number | null
  output_cost_per_million: number | null
  enabled: boolean
  reasoning_effort: ReasoningEffort | null
  metadata: Record<string, unknown>
  inserted_at: string
  updated_at: string
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
  input_cost_per_million?: number | null
  output_cost_per_million?: number | null
  enabled?: boolean
  reasoning_effort?: ReasoningEffort | null
  metadata?: Record<string, unknown>
 }
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -19,6 +19,7 @@ import {
  Plus,
  RefreshCw,
  RotateCcw,
  Sparkles,
  Square,
  Trash2,
  Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
  localStorage.removeItem(LIVE_KEY)
 }
 /* Per-conversation reasoning override. Cycle order matters — the composer
 * chip walks this array. */
 type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
 const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
 const REASONING_KEY = "crema.ai.reasoning"
 function loadReasoning(): ReasoningEffort {
  if (typeof window === "undefined") return "off"
  const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
  return v && REASONING_LEVELS.includes(v) ? v : "off"
 }
 function saveReasoning(v: ReasoningEffort) {
  if (typeof window === "undefined") return
  if (v === "off") localStorage.removeItem(REASONING_KEY)
  else localStorage.setItem(REASONING_KEY, v)
 }
 function withReasoning<T extends Record<string, unknown>>(
  extras: T,
  effort: ReasoningEffort,
 ): T & { reasoning_effort?: string } {
  if (effort === "off") return extras
  return { ...extras, reasoning_effort: effort }
 }
 type StoredMessage = { role: "user" | "assistant"; content: string }
 function loadAISnapshot(): StoredMessage[] | null {
  if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
    setMessages([])
    setAgentHistory(new Map())
    setMessageAgents(new Map())
    setReasoningEffort("off")
  }, [setMessages])
  // Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
  const toolIterationsRef = useRef(0)
  const processedTurnRef = useRef(-1)
  const prevStreamingRef = useRef(isStreaming)
  // Mirror of reasoningEffort state, kept current via the effect below so
  // regenerate/continue callbacks (declared before the state hook) can
  // read the latest value without becoming reasoningEffort dependents.
  const reasoningEffortRef = useRef<ReasoningEffort>("off")
  // Maintain agent-history. Two triggers:
  //   1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
    const text = messages[lastUserIdx].content
    setMessages(messages.slice(0, lastUserIdx))
    // Defer so the state flush completes before send() reads `messages`.
-    setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
+    setTimeout(
      () => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
      0,
    )
  }, [messages, setMessages, send, isStreaming])
  const continueLast = useCallback(() => {
    if (isStreaming || messages.length === 0) return
-    void send("Please continue your previous reply.", { tools: getOpenAITools() })
+    void send(
      "Please continue your previous reply.",
      withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
    )
  }, [isStreaming, messages.length, send])
  const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
    endRef.current?.scrollIntoView({ block: "end" })
  }, [messages.length, lastContent, isStreaming])
  // Per-conversation reasoning override. Persists across page reloads via
  // localStorage so the operator's chosen level survives a refresh, but
  // resets when they clear the conversation. "off" = pass nothing through.
  const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
    () => loadReasoning(),
  )
  useEffect(() => {
    saveReasoning(reasoningEffort)
    reasoningEffortRef.current = reasoningEffort
  }, [reasoningEffort])
  const cycleReasoning = useCallback(() => {
    setReasoningEffort((cur) => {
      const idx = REASONING_LEVELS.indexOf(cur)
      return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
    })
  }, [])
  const submit = useCallback(() => {
    const text = input.trim()
    if (!text || isStreaming) return
    setInput("")
    stickRef.current = true
-    void send(text, { tools: getOpenAITools() })
+    void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
-  }, [input, isStreaming, send])
+  }, [input, isStreaming, send, reasoningEffort])
  const isEmpty = messages.length === 0
@@ -1052,6 +1106,8 @@ function ChatSurface({
            isMock={isMock}
            isCompacting={compacting}
            placeholder={isEmpty ? "Ask anything…" : "Reply…"}
            reasoning={reasoningEffort}
            onCycleReasoning={cycleReasoning}
          />
          {showPromptOpen && (
            <SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
  isMock,
  isCompacting,
  placeholder,
  reasoning,
  onCycleReasoning,
 }: {
  value: string
  onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
  isMock: boolean
  isCompacting: boolean
  placeholder: string
  reasoning: ReasoningEffort
  onCycleReasoning: () => void
 }) {
  const taRef = useRef<HTMLTextAreaElement | null>(null)
@@ -1410,6 +1470,7 @@ function Composer({
              model={model}
              onModelChange={onModelChange}
            />
            <ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
            <VoiceInputButton
              onTranscript={(t) => onChange((value ? value + " " : "") + t)}
            />
@@ -1470,6 +1531,49 @@ function ModelSelector({
  )
 }
 /**
 * Reasoning-effort chip for the composer. Click cycles off → low → medium →
 * high → max → off. When non-off, the next send includes
 * `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
 * natively and translates to Anthropic's thinking block server-side.
 *
 * Visually: hidden when off (no chrome clutter for the common case),
 * surfaces as a sodium-amber pill when set.
 */
 function ReasoningChip({
  value,
  onCycle,
 }: {
  value: ReasoningEffort
  onCycle: () => void
 }) {
  const active = value !== "off"
  return (
    <button
      type="button"
      onClick={onCycle}
      data-action="ai-reasoning"
      title={
        active
          ? `Reasoning: ${value}. Click to cycle.`
          : "Reasoning: off. Click to enable thinking mode."
      }
      className={[
        "inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
        active
          ? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
          : "text-muted-foreground hover:bg-accent hover:text-foreground",
      ].join(" ")}
    >
      <Sparkles className="size-3" />
      <span className="select-none">
        think
        {active ? <span className="ml-1 font-semibold">{value}</span> : null}
      </span>
    </button>
  )
 }
 function AgentChip({
  agents,
  activeAgent,