ai: per-config reasoning_effort + composer THINK chip

Two layers for thinking-mode control:

1. Per-config default (Settings → LLM)
   New "Reasoning effort" Select in the Add/Edit dialog with
   off/low/medium/high/max + a budget hint per option (~2k, ~8k,
   ~24k, ~64k thinking tokens). Saved row meta line surfaces the
   level inline so it's visible without opening the editor.

2. Per-message override (composer chip)
   New ReasoningChip next to the model picker. Click cycles through
   the same five levels. Hidden chrome when off (muted "think" pill);
   sodium-amber active style with the level label when set.

   Persisted to crema.ai.reasoning so a refresh keeps the operator's
   intent, wiped together with the conversation on Clear.

When sending, withReasoning() merges reasoning_effort into the request
body as a top-level field. The proxy forwards it untouched to OpenAI /
DeepSeek (native field) and translates to Anthropic's thinking block
server-side.

reasoningEffortRef sidesteps a useCallback ordering issue —
regenerateLast/continueLast are declared before the state hook, so
they read the ref instead of a stale closure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jules
2026-05-02 20:15:13 +10:00
parent 20494d1620
commit c379ebc37a
3 changed files with 179 additions and 4 deletions

View File

@@ -51,12 +51,14 @@ import {
getUsageByModel, getUsageByModel,
getUsageSummary, getUsageSummary,
listConfigurations, listConfigurations,
REASONING_EFFORTS,
updateConfiguration, updateConfiguration,
type CatalogEntry, type CatalogEntry,
type LlmConfiguration, type LlmConfiguration,
type LlmConfigurationInput, type LlmConfigurationInput,
type LlmProvider, type LlmProvider,
type LlmUsageSummary, type LlmUsageSummary,
type ReasoningEffort,
type UsageByModelRow, type UsageByModelRow,
} from "~/lib/arcadia/llm-configs" } from "~/lib/arcadia/llm-configs"
import { listSecrets, type Secret } from "~/lib/arcadia/secrets" import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
<span className="text-[11px] text-muted-foreground"> <span className="text-[11px] text-muted-foreground">
{formatRate(c.input_cost_per_million)}/1M in ·{" "} {formatRate(c.input_cost_per_million)}/1M in ·{" "}
{formatRate(c.output_cost_per_million)}/1M out {formatRate(c.output_cost_per_million)}/1M out
{c.reasoning_effort && c.reasoning_effort !== "off" ? (
<>
{" "}
· <span className="uppercase tracking-wider">think</span>{" "}
<span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
{c.reasoning_effort}
</span>
</>
) : null}
</span> </span>
</div> </div>
</div> </div>
@@ -480,6 +491,7 @@ function ConfigDialog({
input_cost_per_million: existing.input_cost_per_million, input_cost_per_million: existing.input_cost_per_million,
output_cost_per_million: existing.output_cost_per_million, output_cost_per_million: existing.output_cost_per_million,
enabled: existing.enabled, enabled: existing.enabled,
reasoning_effort: existing.reasoning_effort,
} }
: emptyDraft(), : emptyDraft(),
) )
@@ -612,6 +624,34 @@ function ConfigDialog({
placeholder="0.60" placeholder="0.60"
/> />
</Field> </Field>
<Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
<Select
value={draft.reasoning_effort ?? "off"}
onValueChange={(v) =>
setDraft({
...draft,
reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
})
}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
{REASONING_EFFORTS.map((e) => (
<SelectItem key={e} value={e}>
<span className="flex items-center justify-between gap-3">
<span className="capitalize">{e}</span>
<span className="text-[10px] text-muted-foreground">
{reasoningHint(e)}
</span>
</span>
</SelectItem>
))}
</SelectContent>
</Select>
</Field>
</div> </div>
{err ? ( {err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
if (rate === 0) return "free" if (rate === 0) return "free"
return `$${rate.toFixed(2)}` return `$${rate.toFixed(2)}`
} }
function reasoningHint(e: ReasoningEffort): string {
switch (e) {
case "off":
return "no thinking"
case "low":
return "~2k thinking tokens"
case "medium":
return "~8k thinking tokens"
case "high":
return "~24k thinking tokens"
case "max":
return "~64k — slowest, most thorough"
}
}

View File

@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio" export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
/**
* Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
* `reasoning_effort` natively). Translated server-side into Anthropic's
* thinking block. `off` (or null) skips the field entirely.
*/
export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
export const REASONING_EFFORTS: ReasoningEffort[] = [
"off",
"low",
"medium",
"high",
"max",
]
export interface LlmConfiguration { export interface LlmConfiguration {
id: string id: string
tenant_id: string | null tenant_id: string | null
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
input_cost_per_million: number | null input_cost_per_million: number | null
output_cost_per_million: number | null output_cost_per_million: number | null
enabled: boolean enabled: boolean
reasoning_effort: ReasoningEffort | null
metadata: Record<string, unknown> metadata: Record<string, unknown>
inserted_at: string inserted_at: string
updated_at: string updated_at: string
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
input_cost_per_million?: number | null input_cost_per_million?: number | null
output_cost_per_million?: number | null output_cost_per_million?: number | null
enabled?: boolean enabled?: boolean
reasoning_effort?: ReasoningEffort | null
metadata?: Record<string, unknown> metadata?: Record<string, unknown>
} }

View File

@@ -19,6 +19,7 @@ import {
Plus, Plus,
RefreshCw, RefreshCw,
RotateCcw, RotateCcw,
Sparkles,
Square, Square,
Trash2, Trash2,
Undo2, Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
localStorage.removeItem(LIVE_KEY) localStorage.removeItem(LIVE_KEY)
} }
/* Per-conversation reasoning override. Cycle order matters — the composer
* chip walks this array. */
type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
const REASONING_KEY = "crema.ai.reasoning"
function loadReasoning(): ReasoningEffort {
if (typeof window === "undefined") return "off"
const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
return v && REASONING_LEVELS.includes(v) ? v : "off"
}
function saveReasoning(v: ReasoningEffort) {
if (typeof window === "undefined") return
if (v === "off") localStorage.removeItem(REASONING_KEY)
else localStorage.setItem(REASONING_KEY, v)
}
function withReasoning<T extends Record<string, unknown>>(
extras: T,
effort: ReasoningEffort,
): T & { reasoning_effort?: string } {
if (effort === "off") return extras
return { ...extras, reasoning_effort: effort }
}
type StoredMessage = { role: "user" | "assistant"; content: string } type StoredMessage = { role: "user" | "assistant"; content: string }
function loadAISnapshot(): StoredMessage[] | null { function loadAISnapshot(): StoredMessage[] | null {
if (typeof window === "undefined") return null if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
setMessages([]) setMessages([])
setAgentHistory(new Map()) setAgentHistory(new Map())
setMessageAgents(new Map()) setMessageAgents(new Map())
setReasoningEffort("off")
}, [setMessages]) }, [setMessages])
// Auto tool-loop using native function calls. Reads run automatically; // Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
const toolIterationsRef = useRef(0) const toolIterationsRef = useRef(0)
const processedTurnRef = useRef(-1) const processedTurnRef = useRef(-1)
const prevStreamingRef = useRef(isStreaming) const prevStreamingRef = useRef(isStreaming)
// Mirror of reasoningEffort state, kept current via the effect below so
// regenerate/continue callbacks (declared before the state hook) can
// read the latest value without becoming reasoningEffort dependents.
const reasoningEffortRef = useRef<ReasoningEffort>("off")
// Maintain agent-history. Two triggers: // Maintain agent-history. Two triggers:
// 1. When a turn finishes streaming and at least one user/assistant // 1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
const text = messages[lastUserIdx].content const text = messages[lastUserIdx].content
setMessages(messages.slice(0, lastUserIdx)) setMessages(messages.slice(0, lastUserIdx))
// Defer so the state flush completes before send() reads `messages`. // Defer so the state flush completes before send() reads `messages`.
setTimeout(() => void send(text, { tools: getOpenAITools() }), 0) setTimeout(
() => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
0,
)
}, [messages, setMessages, send, isStreaming]) }, [messages, setMessages, send, isStreaming])
const continueLast = useCallback(() => { const continueLast = useCallback(() => {
if (isStreaming || messages.length === 0) return if (isStreaming || messages.length === 0) return
void send("Please continue your previous reply.", { tools: getOpenAITools() }) void send(
"Please continue your previous reply.",
withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
)
}, [isStreaming, messages.length, send]) }, [isStreaming, messages.length, send])
const compactConversation = useCallback(async () => { const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
endRef.current?.scrollIntoView({ block: "end" }) endRef.current?.scrollIntoView({ block: "end" })
}, [messages.length, lastContent, isStreaming]) }, [messages.length, lastContent, isStreaming])
// Per-conversation reasoning override. Persists across page reloads via
// localStorage so the operator's chosen level survives a refresh, but
// resets when they clear the conversation. "off" = pass nothing through.
const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
() => loadReasoning(),
)
useEffect(() => {
saveReasoning(reasoningEffort)
reasoningEffortRef.current = reasoningEffort
}, [reasoningEffort])
const cycleReasoning = useCallback(() => {
setReasoningEffort((cur) => {
const idx = REASONING_LEVELS.indexOf(cur)
return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
})
}, [])
const submit = useCallback(() => { const submit = useCallback(() => {
const text = input.trim() const text = input.trim()
if (!text || isStreaming) return if (!text || isStreaming) return
setInput("") setInput("")
stickRef.current = true stickRef.current = true
void send(text, { tools: getOpenAITools() }) void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
}, [input, isStreaming, send]) }, [input, isStreaming, send, reasoningEffort])
const isEmpty = messages.length === 0 const isEmpty = messages.length === 0
@@ -1052,6 +1106,8 @@ function ChatSurface({
isMock={isMock} isMock={isMock}
isCompacting={compacting} isCompacting={compacting}
placeholder={isEmpty ? "Ask anything…" : "Reply…"} placeholder={isEmpty ? "Ask anything…" : "Reply…"}
reasoning={reasoningEffort}
onCycleReasoning={cycleReasoning}
/> />
{showPromptOpen && ( {showPromptOpen && (
<SystemPromptDialog <SystemPromptDialog
@@ -1303,6 +1359,8 @@ function Composer({
isMock, isMock,
isCompacting, isCompacting,
placeholder, placeholder,
reasoning,
onCycleReasoning,
}: { }: {
value: string value: string
onChange: (v: string) => void onChange: (v: string) => void
@@ -1331,6 +1389,8 @@ function Composer({
isMock: boolean isMock: boolean
isCompacting: boolean isCompacting: boolean
placeholder: string placeholder: string
reasoning: ReasoningEffort
onCycleReasoning: () => void
}) { }) {
const taRef = useRef<HTMLTextAreaElement | null>(null) const taRef = useRef<HTMLTextAreaElement | null>(null)
@@ -1410,6 +1470,7 @@ function Composer({
model={model} model={model}
onModelChange={onModelChange} onModelChange={onModelChange}
/> />
<ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
<VoiceInputButton <VoiceInputButton
onTranscript={(t) => onChange((value ? value + " " : "") + t)} onTranscript={(t) => onChange((value ? value + " " : "") + t)}
/> />
@@ -1470,6 +1531,49 @@ function ModelSelector({
) )
} }
/**
* Reasoning-effort chip for the composer. Click cycles off → low → medium →
* high → max → off. When non-off, the next send includes
* `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
* natively and translates to Anthropic's thinking block server-side.
*
* Visually: hidden when off (no chrome clutter for the common case),
* surfaces as a sodium-amber pill when set.
*/
function ReasoningChip({
value,
onCycle,
}: {
value: ReasoningEffort
onCycle: () => void
}) {
const active = value !== "off"
return (
<button
type="button"
onClick={onCycle}
data-action="ai-reasoning"
title={
active
? `Reasoning: ${value}. Click to cycle.`
: "Reasoning: off. Click to enable thinking mode."
}
className={[
"inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
active
? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
: "text-muted-foreground hover:bg-accent hover:text-foreground",
].join(" ")}
>
<Sparkles className="size-3" />
<span className="select-none">
think
{active ? <span className="ml-1 font-semibold">{value}</span> : null}
</span>
</button>
)
}
function AgentChip({ function AgentChip({
agents, agents,
activeAgent, activeAgent,