ai: wire arcadia-search backend (search_kb + read_chunk + reindex button)
Adds the agent-facing surface for the new Tantivy lexical search service (arcadia-search). Sits alongside the existing search_docs (browser MiniSearch) — agent picks based on tool description. - admin-tools.ts: new search_kb(query, corpus, limit?, tags?) and read_chunk(chunk_id, corpus) tools. KB_BASE_URL honors window.__ARCADIA_SEARCH_URL runtime override + VITE_ARCADIA_SEARCH_URL build env, defaults to localhost:7800. Token resolved per-call from sessionStorage.arcadia_access_token (matching lib-arcadia-client's storage convention) with "dev" fallback for unauthenticated dev. - assistant.tsx: system-prompt section telling the agent when to pick search_docs (browser, bundled) vs search_kb (server, dynamic + expandable via read_chunk). - ai.tsx: reindexKB() helper + "reindex kb (docs)" button on the empty state, next to the existing block-preview button. Toasts on start/success/failure. Wired with data-action="kb-reindex-docs" so the agent can also trigger via the command bus. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,77 @@ import { BLOCK_INDEX, getBlockSchema } from "~/lib/block-schemas"
|
|||||||
// calls reuse the parsed MiniSearch instance.
|
// calls reuse the parsed MiniSearch instance.
|
||||||
const docsClient = createRAGClient("/docs-index.json")
|
const docsClient = createRAGClient("/docs-index.json")
|
||||||
|
|
||||||
|
// Server-side Tantivy backend (arcadia-search).
|
||||||
|
//
|
||||||
|
// URL: comes from window.__ARCADIA_SEARCH_URL (override hook) or
|
||||||
|
// VITE_ARCADIA_SEARCH_URL build-time env, defaulting to localhost.
|
||||||
|
//
|
||||||
|
// Token: prefer the real arcadia access token (sessionStorage —
|
||||||
|
// matches lib-arcadia-client's storage convention). Fall back to "dev"
|
||||||
|
// when missing, which only works against AUTH_MODE=dev backends. In
|
||||||
|
// production, arcadia-search runs in JWT mode and the dev fallback
|
||||||
|
// gets rejected with 401 — surfacing the missing-login as a clear
|
||||||
|
// error rather than silently using the wrong identity.
|
||||||
|
const KB_BASE_URL: string =
|
||||||
|
(typeof window !== "undefined" &&
|
||||||
|
(window as unknown as { __ARCADIA_SEARCH_URL?: string }).__ARCADIA_SEARCH_URL) ||
|
||||||
|
(typeof import.meta !== "undefined" &&
|
||||||
|
(import.meta as unknown as { env?: { VITE_ARCADIA_SEARCH_URL?: string } }).env
|
||||||
|
?.VITE_ARCADIA_SEARCH_URL) ||
|
||||||
|
"http://127.0.0.1:7800"
|
||||||
|
|
||||||
|
function kbAuthToken(): string {
|
||||||
|
if (typeof window === "undefined") return "dev"
|
||||||
|
try {
|
||||||
|
return window.sessionStorage.getItem("arcadia_access_token") ?? "dev"
|
||||||
|
} catch {
|
||||||
|
return "dev"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type KBHit = {
|
||||||
|
chunk_id: string
|
||||||
|
title: string
|
||||||
|
source_path: string
|
||||||
|
heading_path: string
|
||||||
|
tags: string[]
|
||||||
|
snippet: string
|
||||||
|
score: number
|
||||||
|
mtime: string
|
||||||
|
}
|
||||||
|
|
||||||
|
async function kbSearch(
|
||||||
|
query: string,
|
||||||
|
corpus: string,
|
||||||
|
limit: number,
|
||||||
|
tags?: string[],
|
||||||
|
): Promise<{ count: number; hits: KBHit[] }> {
|
||||||
|
const res = await fetch(`${KB_BASE_URL}/search`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${kbAuthToken()}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ query, corpus, limit, tags }),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`arcadia-search ${res.status}: ${await res.text()}`)
|
||||||
|
}
|
||||||
|
return (await res.json()) as { count: number; hits: KBHit[] }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function kbRead(chunkId: string, corpus: string): Promise<unknown> {
|
||||||
|
const url = `${KB_BASE_URL}/chunks/${encodeURIComponent(chunkId)}?corpus=${encodeURIComponent(corpus)}`
|
||||||
|
const res = await fetch(url, {
|
||||||
|
headers: { Authorization: `Bearer ${kbAuthToken()}` },
|
||||||
|
})
|
||||||
|
if (res.status === 404) return null
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`arcadia-search ${res.status}: ${await res.text()}`)
|
||||||
|
}
|
||||||
|
return await res.json()
|
||||||
|
}
|
||||||
|
|
||||||
export type ToolCall = {
|
export type ToolCall = {
|
||||||
name: string
|
name: string
|
||||||
args: Record<string, unknown>
|
args: Record<string, unknown>
|
||||||
@@ -275,6 +346,73 @@ const TOOLS: ToolDef[] = [
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "search_kb",
|
||||||
|
description:
|
||||||
|
"Lexical (BM25) search over the arcadia-search Tantivy backend. Use for the LARGER, server-hosted knowledge corpora — the same arcadia docs the browser RAG serves are indexed here as `corpus=docs` for parity, and additional corpora (uploaded files, runbooks, etc.) will land here as they're added. Returns chunks with snippets + chunk_ids that can be passed to `read_chunk` to expand. Prefer this over `search_docs` (browser) when you need richer hits or when the user is asking about content that wouldn't be in the bundled docs (e.g. uploaded files).",
|
||||||
|
parameters: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
query: { type: "string", description: "Lexical search query." },
|
||||||
|
corpus: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Which indexed corpus to search. `docs` is the parity corpus (arcadia documentation). New corpora are added by the operator.",
|
||||||
|
},
|
||||||
|
limit: {
|
||||||
|
type: "integer",
|
||||||
|
description: "Max hits. Default 5, cap 20.",
|
||||||
|
minimum: 1,
|
||||||
|
maximum: 20,
|
||||||
|
},
|
||||||
|
tags: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string" },
|
||||||
|
description:
|
||||||
|
"Optional tag filter — return only hits whose chunk has at least one matching tag.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ["query", "corpus"],
|
||||||
|
additionalProperties: false,
|
||||||
|
},
|
||||||
|
isWrite: false,
|
||||||
|
run: async (args) => {
|
||||||
|
const query = typeof args.query === "string" ? args.query.trim() : ""
|
||||||
|
const corpus = typeof args.corpus === "string" ? args.corpus.trim() : ""
|
||||||
|
if (!query) throw new Error("search_kb requires a non-empty { query }")
|
||||||
|
if (!corpus) throw new Error("search_kb requires a { corpus } name")
|
||||||
|
const limit = Math.min(20, Math.max(1, typeof args.limit === "number" ? args.limit : 5))
|
||||||
|
const tags = Array.isArray(args.tags) ? (args.tags as string[]) : undefined
|
||||||
|
return await kbSearch(query, corpus, limit, tags)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "read_chunk",
|
||||||
|
description:
|
||||||
|
"Fetch the full body of one chunk by id from the arcadia-search backend, after `search_kb` returned it as a snippet. Use this to expand a hit when the snippet looked promising but you need more context to answer.",
|
||||||
|
parameters: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
chunk_id: { type: "string", description: "The chunk_id from a prior search_kb hit." },
|
||||||
|
corpus: { type: "string", description: "Same corpus the chunk came from." },
|
||||||
|
},
|
||||||
|
required: ["chunk_id", "corpus"],
|
||||||
|
additionalProperties: false,
|
||||||
|
},
|
||||||
|
isWrite: false,
|
||||||
|
run: async (args) => {
|
||||||
|
const chunkId = typeof args.chunk_id === "string" ? args.chunk_id : ""
|
||||||
|
const corpus = typeof args.corpus === "string" ? args.corpus : ""
|
||||||
|
if (!chunkId || !corpus) {
|
||||||
|
throw new Error("read_chunk requires { chunk_id, corpus }")
|
||||||
|
}
|
||||||
|
const result = await kbRead(chunkId, corpus)
|
||||||
|
if (result === null) {
|
||||||
|
return { error: "chunk not found", chunk_id: chunkId, corpus }
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "get_block_schema",
|
name: "get_block_schema",
|
||||||
description: `Fetch the full JSON schema + example for a rich-output block kind so you can emit it correctly in your reply. Call this the first time in a thread that you intend to render a particular kind. Available kinds: ${Object.entries(
|
description: `Fetch the full JSON schema + example for a rich-output block kind so you can emit it correctly in your reply. Call this the first time in a thread that you intend to render a particular kind. Available kinds: ${Object.entries(
|
||||||
|
|||||||
@@ -111,6 +111,48 @@ function ToolResultBlock({ name, result }: { name: string; result: unknown }) {
|
|||||||
return <div className="px-1">{rich}</div>
|
return <div className="px-1">{rich}</div>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Trigger a server-side rebuild of an arcadia-search corpus. Reads the
|
||||||
|
// same KB URL + token resolution as the search_kb tool (see admin-tools.ts).
|
||||||
|
// Surfaces success/failure via the existing toast provider.
|
||||||
|
async function reindexKB(
|
||||||
|
corpus: string,
|
||||||
|
toast: ReturnType<typeof useToast>,
|
||||||
|
): Promise<void> {
|
||||||
|
const baseUrl =
|
||||||
|
(typeof window !== "undefined" &&
|
||||||
|
(window as unknown as { __ARCADIA_SEARCH_URL?: string }).__ARCADIA_SEARCH_URL) ||
|
||||||
|
"http://127.0.0.1:7800"
|
||||||
|
const token =
|
||||||
|
(typeof window !== "undefined" &&
|
||||||
|
window.sessionStorage.getItem("arcadia_access_token")) ||
|
||||||
|
"dev"
|
||||||
|
const url = `${baseUrl}/index/${encodeURIComponent(corpus)}/build`
|
||||||
|
toast.show?.({
|
||||||
|
title: "Reindexing…",
|
||||||
|
description: `Rebuilding corpus '${corpus}'.`,
|
||||||
|
})
|
||||||
|
try {
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { Authorization: `Bearer ${token}` },
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`HTTP ${res.status}: ${await res.text()}`)
|
||||||
|
}
|
||||||
|
const out = (await res.json()) as { chunk_count: number; built_at: string }
|
||||||
|
toast.show?.({
|
||||||
|
title: "Reindex complete",
|
||||||
|
description: `${out.chunk_count} chunks indexed for '${corpus}'.`,
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
toast.show?.({
|
||||||
|
title: "Reindex failed",
|
||||||
|
description: err instanceof Error ? err.message : String(err),
|
||||||
|
tone: "error",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Synthetic assistant message that exercises every typed rich-output block.
|
// Synthetic assistant message that exercises every typed rich-output block.
|
||||||
// Wired to the "preview rich-output blocks" button in the empty state — used
|
// Wired to the "preview rich-output blocks" button in the empty state — used
|
||||||
// to eyeball renderer + theme without driving a live model. Safe to delete
|
// to eyeball renderer + theme without driving a live model. Safe to delete
|
||||||
@@ -1179,7 +1221,7 @@ function ChatSurface({
|
|||||||
Issue an instruction. Read tools run automatically. Writes pause for
|
Issue an instruction. Read tools run automatically. Writes pause for
|
||||||
confirmation. Tab ⇥ for command palette.
|
confirmation. Tab ⇥ for command palette.
|
||||||
</p>
|
</p>
|
||||||
<div className="console-empty-line pointer-events-auto">
|
<div className="console-empty-line pointer-events-auto flex flex-wrap gap-2">
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
onClick={() =>
|
onClick={() =>
|
||||||
@@ -1191,6 +1233,14 @@ function ChatSurface({
|
|||||||
>
|
>
|
||||||
› preview rich-output blocks
|
› preview rich-output blocks
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => void reindexKB("docs", toast)}
|
||||||
|
className="console-mono inline-flex items-center gap-1.5 rounded-md border border-[var(--console-rule-soft)] bg-transparent px-2.5 py-1 text-[10.5px] uppercase tracking-[0.18em] text-[var(--console-muted)] transition-colors hover:border-[var(--console-amber)] hover:text-[var(--console-amber)]"
|
||||||
|
data-action="kb-reindex-docs"
|
||||||
|
>
|
||||||
|
› reindex kb (docs)
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -113,6 +113,7 @@ function buildAdminPreface(activeAgent: Agent | undefined, uiControl: boolean):
|
|||||||
const ctx = formatAdminContextForPrompt()
|
const ctx = formatAdminContextForPrompt()
|
||||||
const parts = [
|
const parts = [
|
||||||
"You are the operator's assistant inside Arcadia Admin. Be precise and direct. You have native function tools attached to this conversation — call them whenever the user asks about live platform state (counts, statuses, listings, lookups). Never invent tenant slugs, user counts, or statuses; if you need data, call a tool.",
|
"You are the operator's assistant inside Arcadia Admin. Be precise and direct. You have native function tools attached to this conversation — call them whenever the user asks about live platform state (counts, statuses, listings, lookups). Never invent tenant slugs, user counts, or statuses; if you need data, call a tool.",
|
||||||
|
"Two retrieval surfaces exist for documentation/knowledge: `search_docs` (browser-side, BM25 over the bundled arcadia docs — fast, always available, small corpus) and `search_kb` (server-side, BM25 over arcadia-search — same docs as `corpus=docs` for parity, plus larger and additional corpora as the operator adds them). For questions about the bundled arcadia docs either is fine; prefer `search_kb` when you want richer hits or when the user is asking about content that wouldn't be in the bundled docs (uploaded files, tenant-specific knowledge). When `search_kb` returns a chunk_id you want to expand, call `read_chunk(chunk_id, corpus)`.",
|
||||||
RICH_OUTPUT_PREFACE,
|
RICH_OUTPUT_PREFACE,
|
||||||
ARCADIA_KNOWLEDGE,
|
ARCADIA_KNOWLEDGE,
|
||||||
persona,
|
persona,
|
||||||
|
|||||||
Reference in New Issue
Block a user