ai: per-config reasoning_effort + composer THINK chip
Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -51,12 +51,14 @@ import {
|
||||
getUsageByModel,
|
||||
getUsageSummary,
|
||||
listConfigurations,
|
||||
REASONING_EFFORTS,
|
||||
updateConfiguration,
|
||||
type CatalogEntry,
|
||||
type LlmConfiguration,
|
||||
type LlmConfigurationInput,
|
||||
type LlmProvider,
|
||||
type LlmUsageSummary,
|
||||
type ReasoningEffort,
|
||||
type UsageByModelRow,
|
||||
} from "~/lib/arcadia/llm-configs"
|
||||
import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
|
||||
@@ -401,6 +403,15 @@ function ConfigRow({
|
||||
<span className="text-[11px] text-muted-foreground">
|
||||
{formatRate(c.input_cost_per_million)}/1M in ·{" "}
|
||||
{formatRate(c.output_cost_per_million)}/1M out
|
||||
{c.reasoning_effort && c.reasoning_effort !== "off" ? (
|
||||
<>
|
||||
{" "}
|
||||
· <span className="uppercase tracking-wider">think</span>{" "}
|
||||
<span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
|
||||
{c.reasoning_effort}
|
||||
</span>
|
||||
</>
|
||||
) : null}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -480,6 +491,7 @@ function ConfigDialog({
|
||||
input_cost_per_million: existing.input_cost_per_million,
|
||||
output_cost_per_million: existing.output_cost_per_million,
|
||||
enabled: existing.enabled,
|
||||
reasoning_effort: existing.reasoning_effort,
|
||||
}
|
||||
: emptyDraft(),
|
||||
)
|
||||
@@ -612,6 +624,34 @@ function ConfigDialog({
|
||||
placeholder="0.60"
|
||||
/>
|
||||
</Field>
|
||||
|
||||
<Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
|
||||
<Select
|
||||
value={draft.reasoning_effort ?? "off"}
|
||||
onValueChange={(v) =>
|
||||
setDraft({
|
||||
...draft,
|
||||
reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
|
||||
})
|
||||
}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{REASONING_EFFORTS.map((e) => (
|
||||
<SelectItem key={e} value={e}>
|
||||
<span className="flex items-center justify-between gap-3">
|
||||
<span className="capitalize">{e}</span>
|
||||
<span className="text-[10px] text-muted-foreground">
|
||||
{reasoningHint(e)}
|
||||
</span>
|
||||
</span>
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</Field>
|
||||
</div>
|
||||
|
||||
{err ? (
|
||||
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
|
||||
if (rate === 0) return "free"
|
||||
return `$${rate.toFixed(2)}`
|
||||
}
|
||||
|
||||
function reasoningHint(e: ReasoningEffort): string {
|
||||
switch (e) {
|
||||
case "off":
|
||||
return "no thinking"
|
||||
case "low":
|
||||
return "~2k thinking tokens"
|
||||
case "medium":
|
||||
return "~8k thinking tokens"
|
||||
case "high":
|
||||
return "~24k thinking tokens"
|
||||
case "max":
|
||||
return "~64k — slowest, most thorough"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
|
||||
|
||||
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
|
||||
|
||||
/**
|
||||
* Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
|
||||
* `reasoning_effort` natively). Translated server-side into Anthropic's
|
||||
* thinking block. `off` (or null) skips the field entirely.
|
||||
*/
|
||||
export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
|
||||
export const REASONING_EFFORTS: ReasoningEffort[] = [
|
||||
"off",
|
||||
"low",
|
||||
"medium",
|
||||
"high",
|
||||
"max",
|
||||
]
|
||||
|
||||
export interface LlmConfiguration {
|
||||
id: string
|
||||
tenant_id: string | null
|
||||
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
|
||||
input_cost_per_million: number | null
|
||||
output_cost_per_million: number | null
|
||||
enabled: boolean
|
||||
reasoning_effort: ReasoningEffort | null
|
||||
metadata: Record<string, unknown>
|
||||
inserted_at: string
|
||||
updated_at: string
|
||||
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
|
||||
input_cost_per_million?: number | null
|
||||
output_cost_per_million?: number | null
|
||||
enabled?: boolean
|
||||
reasoning_effort?: ReasoningEffort | null
|
||||
metadata?: Record<string, unknown>
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
Plus,
|
||||
RefreshCw,
|
||||
RotateCcw,
|
||||
Sparkles,
|
||||
Square,
|
||||
Trash2,
|
||||
Undo2,
|
||||
@@ -179,6 +180,30 @@ function clearLive() {
|
||||
localStorage.removeItem(LIVE_KEY)
|
||||
}
|
||||
|
||||
/* Per-conversation reasoning override. Cycle order matters — the composer
|
||||
* chip walks this array. */
|
||||
type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
|
||||
const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
|
||||
const REASONING_KEY = "crema.ai.reasoning"
|
||||
|
||||
function loadReasoning(): ReasoningEffort {
|
||||
if (typeof window === "undefined") return "off"
|
||||
const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
|
||||
return v && REASONING_LEVELS.includes(v) ? v : "off"
|
||||
}
|
||||
function saveReasoning(v: ReasoningEffort) {
|
||||
if (typeof window === "undefined") return
|
||||
if (v === "off") localStorage.removeItem(REASONING_KEY)
|
||||
else localStorage.setItem(REASONING_KEY, v)
|
||||
}
|
||||
function withReasoning<T extends Record<string, unknown>>(
|
||||
extras: T,
|
||||
effort: ReasoningEffort,
|
||||
): T & { reasoning_effort?: string } {
|
||||
if (effort === "off") return extras
|
||||
return { ...extras, reasoning_effort: effort }
|
||||
}
|
||||
|
||||
type StoredMessage = { role: "user" | "assistant"; content: string }
|
||||
function loadAISnapshot(): StoredMessage[] | null {
|
||||
if (typeof window === "undefined") return null
|
||||
@@ -512,6 +537,7 @@ function ChatSurface({
|
||||
setMessages([])
|
||||
setAgentHistory(new Map())
|
||||
setMessageAgents(new Map())
|
||||
setReasoningEffort("off")
|
||||
}, [setMessages])
|
||||
|
||||
// Auto tool-loop using native function calls. Reads run automatically;
|
||||
@@ -520,6 +546,10 @@ function ChatSurface({
|
||||
const toolIterationsRef = useRef(0)
|
||||
const processedTurnRef = useRef(-1)
|
||||
const prevStreamingRef = useRef(isStreaming)
|
||||
// Mirror of reasoningEffort state, kept current via the effect below so
|
||||
// regenerate/continue callbacks (declared before the state hook) can
|
||||
// read the latest value without becoming reasoningEffort dependents.
|
||||
const reasoningEffortRef = useRef<ReasoningEffort>("off")
|
||||
|
||||
// Maintain agent-history. Two triggers:
|
||||
// 1. When a turn finishes streaming and at least one user/assistant
|
||||
@@ -747,12 +777,18 @@ function ChatSurface({
|
||||
const text = messages[lastUserIdx].content
|
||||
setMessages(messages.slice(0, lastUserIdx))
|
||||
// Defer so the state flush completes before send() reads `messages`.
|
||||
setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
|
||||
setTimeout(
|
||||
() => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
|
||||
0,
|
||||
)
|
||||
}, [messages, setMessages, send, isStreaming])
|
||||
|
||||
const continueLast = useCallback(() => {
|
||||
if (isStreaming || messages.length === 0) return
|
||||
void send("Please continue your previous reply.", { tools: getOpenAITools() })
|
||||
void send(
|
||||
"Please continue your previous reply.",
|
||||
withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
|
||||
)
|
||||
}, [isStreaming, messages.length, send])
|
||||
|
||||
const compactConversation = useCallback(async () => {
|
||||
@@ -834,13 +870,31 @@ function ChatSurface({
|
||||
endRef.current?.scrollIntoView({ block: "end" })
|
||||
}, [messages.length, lastContent, isStreaming])
|
||||
|
||||
// Per-conversation reasoning override. Persists across page reloads via
|
||||
// localStorage so the operator's chosen level survives a refresh, but
|
||||
// resets when they clear the conversation. "off" = pass nothing through.
|
||||
const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
|
||||
() => loadReasoning(),
|
||||
)
|
||||
useEffect(() => {
|
||||
saveReasoning(reasoningEffort)
|
||||
reasoningEffortRef.current = reasoningEffort
|
||||
}, [reasoningEffort])
|
||||
|
||||
const cycleReasoning = useCallback(() => {
|
||||
setReasoningEffort((cur) => {
|
||||
const idx = REASONING_LEVELS.indexOf(cur)
|
||||
return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
|
||||
})
|
||||
}, [])
|
||||
|
||||
const submit = useCallback(() => {
|
||||
const text = input.trim()
|
||||
if (!text || isStreaming) return
|
||||
setInput("")
|
||||
stickRef.current = true
|
||||
void send(text, { tools: getOpenAITools() })
|
||||
}, [input, isStreaming, send])
|
||||
void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
|
||||
}, [input, isStreaming, send, reasoningEffort])
|
||||
|
||||
const isEmpty = messages.length === 0
|
||||
|
||||
@@ -1052,6 +1106,8 @@ function ChatSurface({
|
||||
isMock={isMock}
|
||||
isCompacting={compacting}
|
||||
placeholder={isEmpty ? "Ask anything…" : "Reply…"}
|
||||
reasoning={reasoningEffort}
|
||||
onCycleReasoning={cycleReasoning}
|
||||
/>
|
||||
{showPromptOpen && (
|
||||
<SystemPromptDialog
|
||||
@@ -1303,6 +1359,8 @@ function Composer({
|
||||
isMock,
|
||||
isCompacting,
|
||||
placeholder,
|
||||
reasoning,
|
||||
onCycleReasoning,
|
||||
}: {
|
||||
value: string
|
||||
onChange: (v: string) => void
|
||||
@@ -1331,6 +1389,8 @@ function Composer({
|
||||
isMock: boolean
|
||||
isCompacting: boolean
|
||||
placeholder: string
|
||||
reasoning: ReasoningEffort
|
||||
onCycleReasoning: () => void
|
||||
}) {
|
||||
const taRef = useRef<HTMLTextAreaElement | null>(null)
|
||||
|
||||
@@ -1410,6 +1470,7 @@ function Composer({
|
||||
model={model}
|
||||
onModelChange={onModelChange}
|
||||
/>
|
||||
<ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
|
||||
<VoiceInputButton
|
||||
onTranscript={(t) => onChange((value ? value + " " : "") + t)}
|
||||
/>
|
||||
@@ -1470,6 +1531,49 @@ function ModelSelector({
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reasoning-effort chip for the composer. Click cycles off → low → medium →
|
||||
* high → max → off. When non-off, the next send includes
|
||||
* `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
|
||||
* natively and translates to Anthropic's thinking block server-side.
|
||||
*
|
||||
* Visually: hidden when off (no chrome clutter for the common case),
|
||||
* surfaces as a sodium-amber pill when set.
|
||||
*/
|
||||
function ReasoningChip({
|
||||
value,
|
||||
onCycle,
|
||||
}: {
|
||||
value: ReasoningEffort
|
||||
onCycle: () => void
|
||||
}) {
|
||||
const active = value !== "off"
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onCycle}
|
||||
data-action="ai-reasoning"
|
||||
title={
|
||||
active
|
||||
? `Reasoning: ${value}. Click to cycle.`
|
||||
: "Reasoning: off. Click to enable thinking mode."
|
||||
}
|
||||
className={[
|
||||
"inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
|
||||
active
|
||||
? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
|
||||
: "text-muted-foreground hover:bg-accent hover:text-foreground",
|
||||
].join(" ")}
|
||||
>
|
||||
<Sparkles className="size-3" />
|
||||
<span className="select-none">
|
||||
think
|
||||
{active ? <span className="ml-1 font-semibold">{value}</span> : null}
|
||||
</span>
|
||||
</button>
|
||||
)
|
||||
}
|
||||
|
||||
function AgentChip({
|
||||
agents,
|
||||
activeAgent,
|
||||
|
||||
Reference in New Issue
Block a user