ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control:

1. Per-config default (Settings → LLM)
   New "Reasoning effort" Select in the Add/Edit dialog with
   off/low/medium/high/max + a budget hint per option (~2k, ~8k,
   ~24k, ~64k thinking tokens). Saved row meta line surfaces the
   level inline so it's visible without opening the editor.

2. Per-message override (composer chip)
   New ReasoningChip next to the model picker. Click cycles through
   the same five levels. Hidden chrome when off (muted "think" pill);
   sodium-amber active style with the level label when set.

   Persisted to crema.ai.reasoning so a refresh keeps the operator's
   intent, wiped together with the conversation on Clear.

When sending, withReasoning() merges reasoning_effort into the request
body as a top-level field. The proxy forwards it untouched to OpenAI /
DeepSeek (native field) and translates to Anthropic's thinking block
server-side.

reasoningEffortRef sidesteps a useCallback ordering issue —
regenerateLast/continueLast are declared before the state hook, so
they read the ref instead of a stale closure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions

View File

@@ -19,6 +19,7 @@ import {
Plus,
RefreshCw,
RotateCcw,
Sparkles,
Square,
Trash2,
Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
localStorage.removeItem(LIVE_KEY)
}
/* Per-conversation reasoning override. Cycle order matters — the composer
* chip walks this array. */
type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
const REASONING_KEY = "crema.ai.reasoning"
function loadReasoning(): ReasoningEffort {
if (typeof window === "undefined") return "off"
const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
return v && REASONING_LEVELS.includes(v) ? v : "off"
}
function saveReasoning(v: ReasoningEffort) {
if (typeof window === "undefined") return
if (v === "off") localStorage.removeItem(REASONING_KEY)
else localStorage.setItem(REASONING_KEY, v)
}
function withReasoning<T extends Record<string, unknown>>(
extras: T,
effort: ReasoningEffort,
): T & { reasoning_effort?: string } {
if (effort === "off") return extras
return { ...extras, reasoning_effort: effort }
}
type StoredMessage = { role: "user" | "assistant"; content: string }
function loadAISnapshot(): StoredMessage[] | null {
if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
setMessages([])
setAgentHistory(new Map())
setMessageAgents(new Map())
setReasoningEffort("off")
}, [setMessages])
// Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
const toolIterationsRef = useRef(0)
const processedTurnRef = useRef(-1)
const prevStreamingRef = useRef(isStreaming)
// Mirror of reasoningEffort state, kept current via the effect below so
// regenerate/continue callbacks (declared before the state hook) can
// read the latest value without becoming reasoningEffort dependents.
const reasoningEffortRef = useRef<ReasoningEffort>("off")
// Maintain agent-history. Two triggers:
// 1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
const text = messages[lastUserIdx].content
setMessages(messages.slice(0, lastUserIdx))
// Defer so the state flush completes before send() reads `messages`.
setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
setTimeout(
() => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
0,
)
}, [messages, setMessages, send, isStreaming])
const continueLast = useCallback(() => {
if (isStreaming || messages.length === 0) return
void send("Please continue your previous reply.", { tools: getOpenAITools() })
void send(
"Please continue your previous reply.",
withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
)
}, [isStreaming, messages.length, send])
const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
endRef.current?.scrollIntoView({ block: "end" })
}, [messages.length, lastContent, isStreaming])
// Per-conversation reasoning override. Persists across page reloads via
// localStorage so the operator's chosen level survives a refresh, but
// resets when they clear the conversation. "off" = pass nothing through.
const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
() => loadReasoning(),
)
useEffect(() => {
saveReasoning(reasoningEffort)
reasoningEffortRef.current = reasoningEffort
}, [reasoningEffort])
const cycleReasoning = useCallback(() => {
setReasoningEffort((cur) => {
const idx = REASONING_LEVELS.indexOf(cur)
return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
})
}, [])
const submit = useCallback(() => {
const text = input.trim()
if (!text || isStreaming) return
setInput("")
stickRef.current = true
void send(text, { tools: getOpenAITools() })
}, [input, isStreaming, send])
void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
}, [input, isStreaming, send, reasoningEffort])
const isEmpty = messages.length === 0
@@ -1052,6 +1106,8 @@ function ChatSurface({
isMock={isMock}
isCompacting={compacting}
placeholder={isEmpty ? "Ask anything…" : "Reply…"}
reasoning={reasoningEffort}
onCycleReasoning={cycleReasoning}
/>
{showPromptOpen && (
<SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
isMock,
isCompacting,
placeholder,
reasoning,
onCycleReasoning,
}: {
value: string
onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
isMock: boolean
isCompacting: boolean
placeholder: string
reasoning: ReasoningEffort
onCycleReasoning: () => void
}) {
const taRef = useRef<HTMLTextAreaElement | null>(null)
@@ -1410,6 +1470,7 @@ function Composer({
model={model}
onModelChange={onModelChange}
/>
<ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
<VoiceInputButton
onTranscript={(t) => onChange((value ? value + " " : "") + t)}
/>
@@ -1470,6 +1531,49 @@ function ModelSelector({
)
}
/**
* Reasoning-effort chip for the composer. Click cycles off → low → medium →
* high → max → off. When non-off, the next send includes
* `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
* natively and translates to Anthropic's thinking block server-side.
*
* Visually: hidden when off (no chrome clutter for the common case),
* surfaces as a sodium-amber pill when set.
*/
function ReasoningChip({
value,
onCycle,
}: {
value: ReasoningEffort
onCycle: () => void
}) {
const active = value !== "off"
return (
<button
type="button"
onClick={onCycle}
data-action="ai-reasoning"
title={
active
? `Reasoning: ${value}. Click to cycle.`
: "Reasoning: off. Click to enable thinking mode."
}
className={[
"inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
active
? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
: "text-muted-foreground hover:bg-accent hover:text-foreground",
].join(" ")}
>
<Sparkles className="size-3" />
<span className="select-none">
think
{active ? <span className="ml-1 font-semibold">{value}</span> : null}
</span>
</button>
)
}
function AgentChip({
agents,
activeAgent,