ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions
--- a/app/components/settings/llm-configurations-panel.tsx
+++ b/app/components/settings/llm-configurations-panel.tsx
@@ -51,12 +51,14 @@ import {
  getUsageByModel,
  getUsageSummary,
  listConfigurations,
+  REASONING_EFFORTS,
  updateConfiguration,
  type CatalogEntry,
  type LlmConfiguration,
  type LlmConfigurationInput,
  type LlmProvider,
  type LlmUsageSummary,
+  type ReasoningEffort,
  type UsageByModelRow,
 } from "~/lib/arcadia/llm-configs"
 import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
          <span className="text-[11px] text-muted-foreground">
            {formatRate(c.input_cost_per_million)}/1M in ·{" "}
            {formatRate(c.output_cost_per_million)}/1M out
+            {c.reasoning_effort && c.reasoning_effort !== "off" ? (
+              <>
+                {" "}
+                · <span className="uppercase tracking-wider">think</span>{" "}
+                <span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
+                  {c.reasoning_effort}
+                </span>
+              </>
+            ) : null}
          </span>
        </div>
      </div>
@@ -480,6 +491,7 @@ function ConfigDialog({
          input_cost_per_million: existing.input_cost_per_million,
          output_cost_per_million: existing.output_cost_per_million,
          enabled: existing.enabled,
+          reasoning_effort: existing.reasoning_effort,
        }
      : emptyDraft(),
  )
@@ -612,6 +624,34 @@ function ConfigDialog({
              placeholder="0.60"
            />
          </Field>
+
+          <Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
+            <Select
+              value={draft.reasoning_effort ?? "off"}
+              onValueChange={(v) =>
+                setDraft({
+                  ...draft,
+                  reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
+                })
+              }
+            >
+              <SelectTrigger>
+                <SelectValue />
+              </SelectTrigger>
+              <SelectContent>
+                {REASONING_EFFORTS.map((e) => (
+                  <SelectItem key={e} value={e}>
+                    <span className="flex items-center justify-between gap-3">
+                      <span className="capitalize">{e}</span>
+                      <span className="text-[10px] text-muted-foreground">
+                        {reasoningHint(e)}
+                      </span>
+                    </span>
+                  </SelectItem>
+                ))}
+              </SelectContent>
+            </Select>
+          </Field>
        </div>

        {err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
  if (rate === 0) return "free"
  return `$${rate.toFixed(2)}`
 }
+
+function reasoningHint(e: ReasoningEffort): string {
+  switch (e) {
+    case "off":
+      return "no thinking"
+    case "low":
+      return "~2k thinking tokens"
+    case "medium":
+      return "~8k thinking tokens"
+    case "high":
+      return "~24k thinking tokens"
+    case "max":
+      return "~64k — slowest, most thorough"
+  }
+}