diff --git a/app/components/settings/llm-configurations-panel.tsx b/app/components/settings/llm-configurations-panel.tsx
index 3c85310..cc85200 100644
--- a/app/components/settings/llm-configurations-panel.tsx
+++ b/app/components/settings/llm-configurations-panel.tsx
@@ -51,12 +51,14 @@ import {
getUsageByModel,
getUsageSummary,
listConfigurations,
+ REASONING_EFFORTS,
updateConfiguration,
type CatalogEntry,
type LlmConfiguration,
type LlmConfigurationInput,
type LlmProvider,
type LlmUsageSummary,
+ type ReasoningEffort,
type UsageByModelRow,
} from "~/lib/arcadia/llm-configs"
import { listSecrets, type Secret } from "~/lib/arcadia/secrets"
@@ -401,6 +403,15 @@ function ConfigRow({
{formatRate(c.input_cost_per_million)}/1M in ·{" "}
{formatRate(c.output_cost_per_million)}/1M out
+ {c.reasoning_effort && c.reasoning_effort !== "off" ? (
+ <>
+ {" "}
+ · think{" "}
+
+ {c.reasoning_effort}
+
+ >
+ ) : null}
@@ -480,6 +491,7 @@ function ConfigDialog({
input_cost_per_million: existing.input_cost_per_million,
output_cost_per_million: existing.output_cost_per_million,
enabled: existing.enabled,
+ reasoning_effort: existing.reasoning_effort,
}
: emptyDraft(),
)
@@ -612,6 +624,34 @@ function ConfigDialog({
placeholder="0.60"
/>
+
+
+
+
{err ? (
@@ -843,3 +883,18 @@ function formatRate(rate: number | null): string {
if (rate === 0) return "free"
return `$${rate.toFixed(2)}`
}
+
+function reasoningHint(e: ReasoningEffort): string {
+ switch (e) {
+ case "off":
+ return "no thinking"
+ case "low":
+ return "~2k thinking tokens"
+ case "medium":
+ return "~8k thinking tokens"
+ case "high":
+ return "~24k thinking tokens"
+ case "max":
+ return "~64k — slowest, most thorough"
+ }
+}
diff --git a/app/lib/arcadia/llm-configs.ts b/app/lib/arcadia/llm-configs.ts
index 4f7f98b..9fd8e14 100644
--- a/app/lib/arcadia/llm-configs.ts
+++ b/app/lib/arcadia/llm-configs.ts
@@ -12,6 +12,20 @@ import type { ArcadiaClient } from "@crema/arcadia-client"
export type LlmProvider = "openai" | "anthropic" | "deepseek" | "qwen" | "lmstudio"
+/**
+ * Reasoning effort. Sent verbatim to OpenAI / DeepSeek (which take
+ * `reasoning_effort` natively). Translated server-side into Anthropic's
+ * thinking block. `off` (or null) skips the field entirely.
+ */
+export type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+export const REASONING_EFFORTS: ReasoningEffort[] = [
+ "off",
+ "low",
+ "medium",
+ "high",
+ "max",
+]
+
export interface LlmConfiguration {
id: string
tenant_id: string | null
@@ -23,6 +37,7 @@ export interface LlmConfiguration {
input_cost_per_million: number | null
output_cost_per_million: number | null
enabled: boolean
+ reasoning_effort: ReasoningEffort | null
metadata: Record
inserted_at: string
updated_at: string
@@ -39,6 +54,7 @@ export interface LlmConfigurationInput {
input_cost_per_million?: number | null
output_cost_per_million?: number | null
enabled?: boolean
+ reasoning_effort?: ReasoningEffort | null
metadata?: Record
}
diff --git a/app/routes/ai.tsx b/app/routes/ai.tsx
index f1db217..7965a22 100644
--- a/app/routes/ai.tsx
+++ b/app/routes/ai.tsx
@@ -19,6 +19,7 @@ import {
Plus,
RefreshCw,
RotateCcw,
+ Sparkles,
Square,
Trash2,
Undo2,
@@ -179,6 +180,30 @@ function clearLive() {
localStorage.removeItem(LIVE_KEY)
}
+/* Per-conversation reasoning override. Cycle order matters — the composer
+ * chip walks this array. */
+type ReasoningEffort = "off" | "low" | "medium" | "high" | "max"
+const REASONING_LEVELS: ReasoningEffort[] = ["off", "low", "medium", "high", "max"]
+const REASONING_KEY = "crema.ai.reasoning"
+
+function loadReasoning(): ReasoningEffort {
+ if (typeof window === "undefined") return "off"
+ const v = localStorage.getItem(REASONING_KEY) as ReasoningEffort | null
+ return v && REASONING_LEVELS.includes(v) ? v : "off"
+}
+function saveReasoning(v: ReasoningEffort) {
+ if (typeof window === "undefined") return
+ if (v === "off") localStorage.removeItem(REASONING_KEY)
+ else localStorage.setItem(REASONING_KEY, v)
+}
+function withReasoning>(
+ extras: T,
+ effort: ReasoningEffort,
+): T & { reasoning_effort?: string } {
+ if (effort === "off") return extras
+ return { ...extras, reasoning_effort: effort }
+}
+
type StoredMessage = { role: "user" | "assistant"; content: string }
function loadAISnapshot(): StoredMessage[] | null {
if (typeof window === "undefined") return null
@@ -512,6 +537,7 @@ function ChatSurface({
setMessages([])
setAgentHistory(new Map())
setMessageAgents(new Map())
+ setReasoningEffort("off")
}, [setMessages])
// Auto tool-loop using native function calls. Reads run automatically;
@@ -520,6 +546,10 @@ function ChatSurface({
const toolIterationsRef = useRef(0)
const processedTurnRef = useRef(-1)
const prevStreamingRef = useRef(isStreaming)
+ // Mirror of reasoningEffort state, kept current via the effect below so
+ // regenerate/continue callbacks (declared before the state hook) can
+ // read the latest value without becoming reasoningEffort dependents.
+ const reasoningEffortRef = useRef("off")
// Maintain agent-history. Two triggers:
// 1. When a turn finishes streaming and at least one user/assistant
@@ -747,12 +777,18 @@ function ChatSurface({
const text = messages[lastUserIdx].content
setMessages(messages.slice(0, lastUserIdx))
// Defer so the state flush completes before send() reads `messages`.
- setTimeout(() => void send(text, { tools: getOpenAITools() }), 0)
+ setTimeout(
+ () => void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current)),
+ 0,
+ )
}, [messages, setMessages, send, isStreaming])
const continueLast = useCallback(() => {
if (isStreaming || messages.length === 0) return
- void send("Please continue your previous reply.", { tools: getOpenAITools() })
+ void send(
+ "Please continue your previous reply.",
+ withReasoning({ tools: getOpenAITools() }, reasoningEffortRef.current),
+ )
}, [isStreaming, messages.length, send])
const compactConversation = useCallback(async () => {
@@ -834,13 +870,31 @@ function ChatSurface({
endRef.current?.scrollIntoView({ block: "end" })
}, [messages.length, lastContent, isStreaming])
+ // Per-conversation reasoning override. Persists across page reloads via
+ // localStorage so the operator's chosen level survives a refresh, but
+ // resets when they clear the conversation. "off" = pass nothing through.
+ const [reasoningEffort, setReasoningEffort] = useState(
+ () => loadReasoning(),
+ )
+ useEffect(() => {
+ saveReasoning(reasoningEffort)
+ reasoningEffortRef.current = reasoningEffort
+ }, [reasoningEffort])
+
+ const cycleReasoning = useCallback(() => {
+ setReasoningEffort((cur) => {
+ const idx = REASONING_LEVELS.indexOf(cur)
+ return REASONING_LEVELS[(idx + 1) % REASONING_LEVELS.length]
+ })
+ }, [])
+
const submit = useCallback(() => {
const text = input.trim()
if (!text || isStreaming) return
setInput("")
stickRef.current = true
- void send(text, { tools: getOpenAITools() })
- }, [input, isStreaming, send])
+ void send(text, withReasoning({ tools: getOpenAITools() }, reasoningEffort))
+ }, [input, isStreaming, send, reasoningEffort])
const isEmpty = messages.length === 0
@@ -1052,6 +1106,8 @@ function ChatSurface({
isMock={isMock}
isCompacting={compacting}
placeholder={isEmpty ? "Ask anything…" : "Reply…"}
+ reasoning={reasoningEffort}
+ onCycleReasoning={cycleReasoning}
/>
{showPromptOpen && (
void
@@ -1331,6 +1389,8 @@ function Composer({
isMock: boolean
isCompacting: boolean
placeholder: string
+ reasoning: ReasoningEffort
+ onCycleReasoning: () => void
}) {
const taRef = useRef(null)
@@ -1410,6 +1470,7 @@ function Composer({
model={model}
onModelChange={onModelChange}
/>
+
onChange((value ? value + " " : "") + t)}
/>
@@ -1470,6 +1531,49 @@ function ModelSelector({
)
}
+/**
+ * Reasoning-effort chip for the composer. Click cycles off → low → medium →
+ * high → max → off. When non-off, the next send includes
+ * `reasoning_effort: ` which the proxy passes to OpenAI/DeepSeek
+ * natively and translates to Anthropic's thinking block server-side.
+ *
+ * Visually: hidden when off (no chrome clutter for the common case),
+ * surfaces as a sodium-amber pill when set.
+ */
+function ReasoningChip({
+ value,
+ onCycle,
+}: {
+ value: ReasoningEffort
+ onCycle: () => void
+}) {
+ const active = value !== "off"
+ return (
+
+ )
+}
+
function AgentChip({
agents,
activeAgent,