ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control:

1. Per-config default (Settings → LLM)
   New "Reasoning effort" Select in the Add/Edit dialog with
   off/low/medium/high/max + a budget hint per option (~2k, ~8k,
   ~24k, ~64k thinking tokens). Saved row meta line surfaces the
   level inline so it's visible without opening the editor.

2. Per-message override (composer chip)
   New ReasoningChip next to the model picker. Click cycles through
   the same five levels. Hidden chrome when off (muted "think" pill);
   sodium-amber active style with the level label when set.

   Persisted to crema.ai.reasoning so a refresh keeps the operator's
   intent, wiped together with the conversation on Clear.

When sending, withReasoning() merges reasoning_effort into the request
body as a top-level field. The proxy forwards it untouched to OpenAI /
DeepSeek (native field) and translates to Anthropic's thinking block
server-side.

reasoningEffortRef sidesteps a useCallback ordering issue —
regenerateLast/continueLast are declared before the state hook, so
they read the ref instead of a stale closure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions

View File

@@ -51,12 +51,14 @@ import {
getUsageByModel,
getUsageSummary,
listConfigurations,
REASONING_EFFORTS,
updateConfiguration,
type CatalogEntry,
type LlmConfiguration,
type LlmConfigurationInput,
type LlmProvider,
type LlmUsageSummary,
type ReasoningEffort,
type UsageByModelRow,
} from "~/lib/arcadia/llm-configs"
import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
<span className="text-[11px] text-muted-foreground">
{formatRate(c.input_cost_per_million)}/1M in ·{" "}
{formatRate(c.output_cost_per_million)}/1M out
{c.reasoning_effort && c.reasoning_effort !== "off" ? (
<>
{" "}
· <span className="uppercase tracking-wider">think</span>{" "}
<span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
{c.reasoning_effort}
</span>
</>
) : null}
</span>
</div>
</div>
@@ -480,6 +491,7 @@ function ConfigDialog({
input_cost_per_million: existing.input_cost_per_million,
output_cost_per_million: existing.output_cost_per_million,
enabled: existing.enabled,
reasoning_effort: existing.reasoning_effort,
}
: emptyDraft(),
)
@@ -612,6 +624,34 @@ function ConfigDialog({
placeholder="0.60"
/>
</Field>
<Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
<Select
value={draft.reasoning_effort ?? "off"}
onValueChange={(v) =>
setDraft({
...draft,
reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
})
}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
{REASONING_EFFORTS.map((e) => (
<SelectItem key={e} value={e}>
<span className="flex items-center justify-between gap-3">
<span className="capitalize">{e}</span>
<span className="text-[10px] text-muted-foreground">
{reasoningHint(e)}
</span>
</span>
</SelectItem>
))}
</SelectContent>
</Select>
</Field>
</div>
{err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
if (rate === 0) return "free"
return `$${rate.toFixed(2)}`
}
function reasoningHint(e: ReasoningEffort): string {
switch (e) {
case "off":
return "no thinking"
case "low":
return "~2k thinking tokens"
case "medium":
return "~8k thinking tokens"
case "high":
return "~24k thinking tokens"
case "max":
return "~64k — slowest, most thorough"
}
}

View File

@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
/**
* Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
* `reasoning_effort` natively). Translated server-side into Anthropic's
* thinking block. `off` (or null) skips the field entirely.
*/
export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
export const REASONING_EFFORTS: ReasoningEffort[] = [
"off",
"low",
"medium",
"high",
"max",
]
export interface LlmConfiguration {
id: string
tenant_id: string | null
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
input_cost_per_million: number | null
output_cost_per_million: number | null
enabled: boolean
reasoning_effort: ReasoningEffort | null
metadata: Record<string, unknown>
inserted_at: string
updated_at: string
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
input_cost_per_million?: number | null
output_cost_per_million?: number | null
enabled?: boolean
reasoning_effort?: ReasoningEffort | null
metadata?: Record<string, unknown>
}

View File

@@ -19,6 +19,7 @@ import {
Plus,
RefreshCw,
RotateCcw,
Sparkles,
Square,
Trash2,
Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
localStorage.removeItem(LIVE_KEY)
}
/* Per-conversation reasoning override. Cycle order matters — the composer
* chip walks this array. */
type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
const REASONING_KEY = "crema.ai.reasoning"
function loadReasoning(): ReasoningEffort {
if (typeof window === "undefined") return "off"
const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
return v && REASONING_LEVELS.includes(v) ? v : "off"
}
function saveReasoning(v: ReasoningEffort) {
if (typeof window === "undefined") return
if (v === "off") localStorage.removeItem(REASONING_KEY)
else localStorage.setItem(REASONING_KEY, v)
}
function withReasoning<T extends Record<string, unknown>>(
extras: T,
effort: ReasoningEffort,
): T & { reasoning_effort?: string } {
if (effort === "off") return extras
return { ...extras, reasoning_effort: effort }
}
type StoredMessage = { role: "user" | "assistant"; content: string }
function loadAISnapshot(): StoredMessage[] | null {
if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
setMessages([])
setAgentHistory(new Map())
setMessageAgents(new Map())
setReasoningEffort("off")
}, [setMessages])
// Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
const toolIterationsRef = useRef(0)
const processedTurnRef = useRef(-1)
const prevStreamingRef = useRef(isStreaming)
// Mirror of reasoningEffort state, kept current via the effect below so
// regenerate/continue callbacks (declared before the state hook) can
// read the latest value without becoming reasoningEffort dependents.
const reasoningEffortRef = useRef<ReasoningEffort>("off")
// Maintain agent-history. Two triggers:
// 1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
const text = messages[lastUserIdx].content
setMessages(messages.slice(0, lastUserIdx))
// Defer so the state flush completes before send() reads `messages`.
setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
setTimeout(
() => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
0,
)
}, [messages, setMessages, send, isStreaming])
const continueLast = useCallback(() => {
if (isStreaming || messages.length === 0) return
void send("Please continue your previous reply.", { tools: getOpenAITools() })
void send(
"Please continue your previous reply.",
withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
)
}, [isStreaming, messages.length, send])
const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
endRef.current?.scrollIntoView({ block: "end" })
}, [messages.length, lastContent, isStreaming])
// Per-conversation reasoning override. Persists across page reloads via
// localStorage so the operator's chosen level survives a refresh, but
// resets when they clear the conversation. "off" = pass nothing through.
const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
() => loadReasoning(),
)
useEffect(() => {
saveReasoning(reasoningEffort)
reasoningEffortRef.current = reasoningEffort
}, [reasoningEffort])
const cycleReasoning = useCallback(() => {
setReasoningEffort((cur) => {
const idx = REASONING_LEVELS.indexOf(cur)
return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
})
}, [])
const submit = useCallback(() => {
const text = input.trim()
if (!text || isStreaming) return
setInput("")
stickRef.current = true
void send(text, { tools: getOpenAITools() })
}, [input, isStreaming, send])
void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
}, [input, isStreaming, send, reasoningEffort])
const isEmpty = messages.length === 0
@@ -1052,6 +1106,8 @@ function ChatSurface({
isMock={isMock}
isCompacting={compacting}
placeholder={isEmpty ? "Ask anything…" : "Reply…"}
reasoning={reasoningEffort}
onCycleReasoning={cycleReasoning}
/>
{showPromptOpen && (
<SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
isMock,
isCompacting,
placeholder,
reasoning,
onCycleReasoning,
}: {
value: string
onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
isMock: boolean
isCompacting: boolean
placeholder: string
reasoning: ReasoningEffort
onCycleReasoning: () => void
}) {
const taRef = useRef<HTMLTextAreaElement | null>(null)
@@ -1410,6 +1470,7 @@ function Composer({
model={model}
onModelChange={onModelChange}
/>
<ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
<VoiceInputButton
onTranscript={(t) => onChange((value ? value + " " : "") + t)}
/>
@@ -1470,6 +1531,49 @@ function ModelSelector({
)
}
/**
* Reasoning-effort chip for the composer. Click cycles off → low → medium →
* high → max → off. When non-off, the next send includes
* `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
* natively and translates to Anthropic's thinking block server-side.
*
* Visually: hidden when off (no chrome clutter for the common case),
* surfaces as a sodium-amber pill when set.
*/
function ReasoningChip({
value,
onCycle,
}: {
value: ReasoningEffort
onCycle: () => void
}) {
const active = value !== "off"
return (
<button
type="button"
onClick={onCycle}
data-action="ai-reasoning"
title={
active
? `Reasoning: ${value}. Click to cycle.`
: "Reasoning: off. Click to enable thinking mode."
}
className={[
"inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
active
? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
: "text-muted-foreground hover:bg-accent hover:text-foreground",
].join(" ")}
>
<Sparkles className="size-3" />
<span className="select-none">
think
{active ? <span className="ml-1 font-semibold">{value}</span> : null}
</span>
</button>
)
}
function AgentChip({
agents,
activeAgent,