ai: per-config reasoning_effort + composer THINK chip
Two layers for thinking-mode control: 1. Per-config default (Settings → LLM) New "Reasoning effort" Select in the Add/Edit dialog with off/low/medium/high/max + a budget hint per option (~2k, ~8k, ~24k, ~64k thinking tokens). Saved row meta line surfaces the level inline so it's visible without opening the editor. 2. Per-message override (composer chip) New ReasoningChip next to the model picker. Click cycles through the same five levels. Hidden chrome when off (muted "think" pill); sodium-amber active style with the level label when set. Persisted to crema.ai.reasoning so a refresh keeps the operator's intent, wiped together with the conversation on Clear. When sending, withReasoning() merges reasoning_effort into the request body as a top-level field. The proxy forwards it untouched to OpenAI / DeepSeek (native field) and translates to Anthropic's thinking block server-side. reasoningEffortRef sidesteps a useCallback ordering issue — regenerateLast/continueLast are declared before the state hook, so they read the ref instead of a stale closure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -51,12 +51,14 @@ import {
|
|||||||
getUsageByModel,
|
getUsageByModel,
|
||||||
getUsageSummary,
|
getUsageSummary,
|
||||||
listConfigurations,
|
listConfigurations,
|
||||||
|
REASONING_EFFORTS,
|
||||||
updateConfiguration,
|
updateConfiguration,
|
||||||
type CatalogEntry,
|
type CatalogEntry,
|
||||||
type LlmConfiguration,
|
type LlmConfiguration,
|
||||||
type LlmConfigurationInput,
|
type LlmConfigurationInput,
|
||||||
type LlmProvider,
|
type LlmProvider,
|
||||||
type LlmUsageSummary,
|
type LlmUsageSummary,
|
||||||
|
type ReasoningEffort,
|
||||||
type UsageByModelRow,
|
type UsageByModelRow,
|
||||||
} from "~/lib/arcadia/llm-configs"
|
} from "~/lib/arcadia/llm-configs"
|
||||||
import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
|
import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
|
||||||
@@ -401,6 +403,15 @@ function ConfigRow({
|
|||||||
<span className="text-[11px] text-muted-foreground">
|
<span className="text-[11px] text-muted-foreground">
|
||||||
{formatRate(c.input_cost_per_million)}/1M in ·{" "}
|
{formatRate(c.input_cost_per_million)}/1M in ·{" "}
|
||||||
{formatRate(c.output_cost_per_million)}/1M out
|
{formatRate(c.output_cost_per_million)}/1M out
|
||||||
|
{c.reasoning_effort && c.reasoning_effort !== "off" ? (
|
||||||
|
<>
|
||||||
|
{" "}
|
||||||
|
· <span className="uppercase tracking-wider">think</span>{" "}
|
||||||
|
<span className="text-[var(--console-amber,oklch(0.78_0.15_60))]">
|
||||||
|
{c.reasoning_effort}
|
||||||
|
</span>
|
||||||
|
</>
|
||||||
|
) : null}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -480,6 +491,7 @@ function ConfigDialog({
|
|||||||
input_cost_per_million: existing.input_cost_per_million,
|
input_cost_per_million: existing.input_cost_per_million,
|
||||||
output_cost_per_million: existing.output_cost_per_million,
|
output_cost_per_million: existing.output_cost_per_million,
|
||||||
enabled: existing.enabled,
|
enabled: existing.enabled,
|
||||||
|
reasoning_effort: existing.reasoning_effort,
|
||||||
}
|
}
|
||||||
: emptyDraft(),
|
: emptyDraft(),
|
||||||
)
|
)
|
||||||
@@ -612,6 +624,34 @@ function ConfigDialog({
|
|||||||
placeholder="0.60"
|
placeholder="0.60"
|
||||||
/>
|
/>
|
||||||
</Field>
|
</Field>
|
||||||
|
|
||||||
|
<Field label="Reasoning effort (thinking models)" className="sm:col-span-2">
|
||||||
|
<Select
|
||||||
|
value={draft.reasoning_effort ?? "off"}
|
||||||
|
onValueChange={(v) =>
|
||||||
|
setDraft({
|
||||||
|
...draft,
|
||||||
|
reasoning_effort: (v === "off" ? null : v) as ReasoningEffort | null,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
>
|
||||||
|
<SelectTrigger>
|
||||||
|
<SelectValue />
|
||||||
|
</SelectTrigger>
|
||||||
|
<SelectContent>
|
||||||
|
{REASONING_EFFORTS.map((e) => (
|
||||||
|
<SelectItem key={e} value={e}>
|
||||||
|
<span className="flex items-center justify-between gap-3">
|
||||||
|
<span className="capitalize">{e}</span>
|
||||||
|
<span className="text-[10px] text-muted-foreground">
|
||||||
|
{reasoningHint(e)}
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
</SelectItem>
|
||||||
|
))}
|
||||||
|
</SelectContent>
|
||||||
|
</Select>
|
||||||
|
</Field>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{err ? (
|
{err ? (
|
||||||
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
|
|||||||
if (rate === 0) return "free"
|
if (rate === 0) return "free"
|
||||||
return `$${rate.toFixed(2)}`
|
return `$${rate.toFixed(2)}`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function reasoningHint(e: ReasoningEffort): string {
|
||||||
|
switch (e) {
|
||||||
|
case "off":
|
||||||
|
return "no thinking"
|
||||||
|
case "low":
|
||||||
|
return "~2k thinking tokens"
|
||||||
|
case "medium":
|
||||||
|
return "~8k thinking tokens"
|
||||||
|
case "high":
|
||||||
|
return "~24k thinking tokens"
|
||||||
|
case "max":
|
||||||
|
return "~64k — slowest, most thorough"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
|
|||||||
|
|
||||||
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
|
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
|
||||||
|
* `reasoning_effort` natively). Translated server-side into Anthropic's
|
||||||
|
* thinking block. `off` (or null) skips the field entirely.
|
||||||
|
*/
|
||||||
|
export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
|
||||||
|
export const REASONING_EFFORTS: ReasoningEffort[] = [
|
||||||
|
"off",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"max",
|
||||||
|
]
|
||||||
|
|
||||||
export interface LlmConfiguration {
|
export interface LlmConfiguration {
|
||||||
id: string
|
id: string
|
||||||
tenant_id: string | null
|
tenant_id: string | null
|
||||||
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
|
|||||||
input_cost_per_million: number | null
|
input_cost_per_million: number | null
|
||||||
output_cost_per_million: number | null
|
output_cost_per_million: number | null
|
||||||
enabled: boolean
|
enabled: boolean
|
||||||
|
reasoning_effort: ReasoningEffort | null
|
||||||
metadata: Record<string, unknown>
|
metadata: Record<string, unknown>
|
||||||
inserted_at: string
|
inserted_at: string
|
||||||
updated_at: string
|
updated_at: string
|
||||||
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
|
|||||||
input_cost_per_million?: number | null
|
input_cost_per_million?: number | null
|
||||||
output_cost_per_million?: number | null
|
output_cost_per_million?: number | null
|
||||||
enabled?: boolean
|
enabled?: boolean
|
||||||
|
reasoning_effort?: ReasoningEffort | null
|
||||||
metadata?: Record<string, unknown>
|
metadata?: Record<string, unknown>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import {
|
|||||||
Plus,
|
Plus,
|
||||||
RefreshCw,
|
RefreshCw,
|
||||||
RotateCcw,
|
RotateCcw,
|
||||||
|
Sparkles,
|
||||||
Square,
|
Square,
|
||||||
Trash2,
|
Trash2,
|
||||||
Undo2,
|
Undo2,
|
||||||
@@ -179,6 +180,30 @@ function clearLive() {
|
|||||||
localStorage.removeItem(LIVE_KEY)
|
localStorage.removeItem(LIVE_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Per-conversation reasoning override. Cycle order matters — the composer
|
||||||
|
* chip walks this array. */
|
||||||
|
type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
|
||||||
|
const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
|
||||||
|
const REASONING_KEY = "crema.ai.reasoning"
|
||||||
|
|
||||||
|
function loadReasoning(): ReasoningEffort {
|
||||||
|
if (typeof window === "undefined") return "off"
|
||||||
|
const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
|
||||||
|
return v && REASONING_LEVELS.includes(v) ? v : "off"
|
||||||
|
}
|
||||||
|
function saveReasoning(v: ReasoningEffort) {
|
||||||
|
if (typeof window === "undefined") return
|
||||||
|
if (v === "off") localStorage.removeItem(REASONING_KEY)
|
||||||
|
else localStorage.setItem(REASONING_KEY, v)
|
||||||
|
}
|
||||||
|
function withReasoning<T extends Record<string, unknown>>(
|
||||||
|
extras: T,
|
||||||
|
effort: ReasoningEffort,
|
||||||
|
): T & { reasoning_effort?: string } {
|
||||||
|
if (effort === "off") return extras
|
||||||
|
return { ...extras, reasoning_effort: effort }
|
||||||
|
}
|
||||||
|
|
||||||
type StoredMessage = { role: "user" | "assistant"; content: string }
|
type StoredMessage = { role: "user" | "assistant"; content: string }
|
||||||
function loadAISnapshot(): StoredMessage[] | null {
|
function loadAISnapshot(): StoredMessage[] | null {
|
||||||
if (typeof window === "undefined") return null
|
if (typeof window === "undefined") return null
|
||||||
@@ -512,6 +537,7 @@ function ChatSurface({
|
|||||||
setMessages([])
|
setMessages([])
|
||||||
setAgentHistory(new Map())
|
setAgentHistory(new Map())
|
||||||
setMessageAgents(new Map())
|
setMessageAgents(new Map())
|
||||||
|
setReasoningEffort("off")
|
||||||
}, [setMessages])
|
}, [setMessages])
|
||||||
|
|
||||||
// Auto tool-loop using native function calls. Reads run automatically;
|
// Auto tool-loop using native function calls. Reads run automatically;
|
||||||
@@ -520,6 +546,10 @@ function ChatSurface({
|
|||||||
const toolIterationsRef = useRef(0)
|
const toolIterationsRef = useRef(0)
|
||||||
const processedTurnRef = useRef(-1)
|
const processedTurnRef = useRef(-1)
|
||||||
const prevStreamingRef = useRef(isStreaming)
|
const prevStreamingRef = useRef(isStreaming)
|
||||||
|
// Mirror of reasoningEffort state, kept current via the effect below so
|
||||||
|
// regenerate/continue callbacks (declared before the state hook) can
|
||||||
|
// read the latest value without becoming reasoningEffort dependents.
|
||||||
|
const reasoningEffortRef = useRef<ReasoningEffort>("off")
|
||||||
|
|
||||||
// Maintain agent-history. Two triggers:
|
// Maintain agent-history. Two triggers:
|
||||||
// 1. When a turn finishes streaming and at least one user/assistant
|
// 1. When a turn finishes streaming and at least one user/assistant
|
||||||
@@ -747,12 +777,18 @@ function ChatSurface({
|
|||||||
const text = messages[lastUserIdx].content
|
const text = messages[lastUserIdx].content
|
||||||
setMessages(messages.slice(0, lastUserIdx))
|
setMessages(messages.slice(0, lastUserIdx))
|
||||||
// Defer so the state flush completes before send() reads `messages`.
|
// Defer so the state flush completes before send() reads `messages`.
|
||||||
setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
|
setTimeout(
|
||||||
|
() => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
|
||||||
|
0,
|
||||||
|
)
|
||||||
}, [messages, setMessages, send, isStreaming])
|
}, [messages, setMessages, send, isStreaming])
|
||||||
|
|
||||||
const continueLast = useCallback(() => {
|
const continueLast = useCallback(() => {
|
||||||
if (isStreaming || messages.length === 0) return
|
if (isStreaming || messages.length === 0) return
|
||||||
void send("Please continue your previous reply.", { tools: getOpenAITools() })
|
void send(
|
||||||
|
"Please continue your previous reply.",
|
||||||
|
withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
|
||||||
|
)
|
||||||
}, [isStreaming, messages.length, send])
|
}, [isStreaming, messages.length, send])
|
||||||
|
|
||||||
const compactConversation = useCallback(async () => {
|
const compactConversation = useCallback(async () => {
|
||||||
@@ -834,13 +870,31 @@ function ChatSurface({
|
|||||||
endRef.current?.scrollIntoView({ block: "end" })
|
endRef.current?.scrollIntoView({ block: "end" })
|
||||||
}, [messages.length, lastContent, isStreaming])
|
}, [messages.length, lastContent, isStreaming])
|
||||||
|
|
||||||
|
// Per-conversation reasoning override. Persists across page reloads via
|
||||||
|
// localStorage so the operator's chosen level survives a refresh, but
|
||||||
|
// resets when they clear the conversation. "off" = pass nothing through.
|
||||||
|
const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort>(
|
||||||
|
() => loadReasoning(),
|
||||||
|
)
|
||||||
|
useEffect(() => {
|
||||||
|
saveReasoning(reasoningEffort)
|
||||||
|
reasoningEffortRef.current = reasoningEffort
|
||||||
|
}, [reasoningEffort])
|
||||||
|
|
||||||
|
const cycleReasoning = useCallback(() => {
|
||||||
|
setReasoningEffort((cur) => {
|
||||||
|
const idx = REASONING_LEVELS.indexOf(cur)
|
||||||
|
return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
|
||||||
|
})
|
||||||
|
}, [])
|
||||||
|
|
||||||
const submit = useCallback(() => {
|
const submit = useCallback(() => {
|
||||||
const text = input.trim()
|
const text = input.trim()
|
||||||
if (!text || isStreaming) return
|
if (!text || isStreaming) return
|
||||||
setInput("")
|
setInput("")
|
||||||
stickRef.current = true
|
stickRef.current = true
|
||||||
void send(text, { tools: getOpenAITools() })
|
void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
|
||||||
}, [input, isStreaming, send])
|
}, [input, isStreaming, send, reasoningEffort])
|
||||||
|
|
||||||
const isEmpty = messages.length === 0
|
const isEmpty = messages.length === 0
|
||||||
|
|
||||||
@@ -1052,6 +1106,8 @@ function ChatSurface({
|
|||||||
isMock={isMock}
|
isMock={isMock}
|
||||||
isCompacting={compacting}
|
isCompacting={compacting}
|
||||||
placeholder={isEmpty ? "Ask anything…" : "Reply…"}
|
placeholder={isEmpty ? "Ask anything…" : "Reply…"}
|
||||||
|
reasoning={reasoningEffort}
|
||||||
|
onCycleReasoning={cycleReasoning}
|
||||||
/>
|
/>
|
||||||
{showPromptOpen && (
|
{showPromptOpen && (
|
||||||
<SystemPromptDialog
|
<SystemPromptDialog
|
||||||
@@ -1303,6 +1359,8 @@ function Composer({
|
|||||||
isMock,
|
isMock,
|
||||||
isCompacting,
|
isCompacting,
|
||||||
placeholder,
|
placeholder,
|
||||||
|
reasoning,
|
||||||
|
onCycleReasoning,
|
||||||
}: {
|
}: {
|
||||||
value: string
|
value: string
|
||||||
onChange: (v: string) => void
|
onChange: (v: string) => void
|
||||||
@@ -1331,6 +1389,8 @@ function Composer({
|
|||||||
isMock: boolean
|
isMock: boolean
|
||||||
isCompacting: boolean
|
isCompacting: boolean
|
||||||
placeholder: string
|
placeholder: string
|
||||||
|
reasoning: ReasoningEffort
|
||||||
|
onCycleReasoning: () => void
|
||||||
}) {
|
}) {
|
||||||
const taRef = useRef<HTMLTextAreaElement | null>(null)
|
const taRef = useRef<HTMLTextAreaElement | null>(null)
|
||||||
|
|
||||||
@@ -1410,6 +1470,7 @@ function Composer({
|
|||||||
model={model}
|
model={model}
|
||||||
onModelChange={onModelChange}
|
onModelChange={onModelChange}
|
||||||
/>
|
/>
|
||||||
|
<ReasoningChip value={reasoning} onCycle={onCycleReasoning} />
|
||||||
<VoiceInputButton
|
<VoiceInputButton
|
||||||
onTranscript={(t) => onChange((value ? value + " " : "") + t)}
|
onTranscript={(t) => onChange((value ? value + " " : "") + t)}
|
||||||
/>
|
/>
|
||||||
@@ -1470,6 +1531,49 @@ function ModelSelector({
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reasoning-effort chip for the composer. Click cycles off → low → medium →
|
||||||
|
* high → max → off. When non-off, the next send includes
|
||||||
|
* `reasoning_effort: <level>` which the proxy passes to OpenAI/DeepSeek
|
||||||
|
* natively and translates to Anthropic's thinking block server-side.
|
||||||
|
*
|
||||||
|
* Visually: hidden when off (no chrome clutter for the common case),
|
||||||
|
* surfaces as a sodium-amber pill when set.
|
||||||
|
*/
|
||||||
|
function ReasoningChip({
|
||||||
|
value,
|
||||||
|
onCycle,
|
||||||
|
}: {
|
||||||
|
value: ReasoningEffort
|
||||||
|
onCycle: () => void
|
||||||
|
}) {
|
||||||
|
const active = value !== "off"
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={onCycle}
|
||||||
|
data-action="ai-reasoning"
|
||||||
|
title={
|
||||||
|
active
|
||||||
|
? `Reasoning: ${value}. Click to cycle.`
|
||||||
|
: "Reasoning: off. Click to enable thinking mode."
|
||||||
|
}
|
||||||
|
className={[
|
||||||
|
"inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-[11px] font-mono uppercase tracking-[0.12em] transition-colors",
|
||||||
|
active
|
||||||
|
? "bg-amber-500/15 text-amber-500 hover:bg-amber-500/25 dark:text-amber-300"
|
||||||
|
: "text-muted-foreground hover:bg-accent hover:text-foreground",
|
||||||
|
].join(" ")}
|
||||||
|
>
|
||||||
|
<Sparkles className="size-3" />
|
||||||
|
<span className="select-none">
|
||||||
|
think
|
||||||
|
{active ? <span className="ml-1 font-semibold">{value}</span> : null}
|
||||||
|
</span>
|
||||||
|
</button>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
function AgentChip({
|
function AgentChip({
|
||||||
agents,
|
agents,
|
||||||
activeAgent,
|
activeAgent,
|
||||||
|
|||||||
Reference in New Issue
Block a user