From d1469059d8e22966dda91b65132b94f1e340d330 Mon Sep 17 00:00:00 2001 From: jules Date: Mon, 4 May 2026 19:17:12 +1000 Subject: [PATCH] assistant: teach the agent about Search admin Bring the LLM agent's prompts and tools current with the new /search section and arcadia-search admin sidecar: - New tools in admin-tools.ts: - list_search_corpora: enumerate tenants + corpora with build status, so the agent can pick a real corpus instead of guessing. - rebuild_search_corpus(tenant, corpus): isWrite=true, surfaces a confirm card. Use after uploads or when results look stale. - search_kb description updated: names docs / operator-tools / files explicitly, and points at list_search_corpora when unsure. - ARCADIA_KNOWLEDGE: adds search-corpus terminology, /search route, and a one-liner pointer to the three new tools. - assistant.tsx UI_CONTROL_PREFACE: nav-search added, full Search page action catalog (search-refresh / -restart / -new-tenant / -new-corpus, corpora-search, per-row corpus-{t}-{c}-{rebuild,edit, delete,actions}, tenant-{id}-delete, dialog form fields). Recipe for the manual rebuild path, plus a note steering the agent to the rebuild_search_corpus tool by default. - search.tsx publishes a "search" surface to admin-context with tenants + corpora summary, so the agent gets live state without needing a tool call when /search is mounted. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/lib/admin-tools.ts | 83 +++++++++++++++++++++++++++++++++++- app/lib/arcadia-knowledge.ts | 2 + app/routes/assistant.tsx | 5 ++- app/routes/search.tsx | 19 +++++++++ 4 files changed, 105 insertions(+), 4 deletions(-) diff --git a/app/lib/admin-tools.ts b/app/lib/admin-tools.ts index 83b955e..f31a229 100644 --- a/app/lib/admin-tools.ts +++ b/app/lib/admin-tools.ts @@ -30,6 +30,7 @@ import { listRoles } from "~/lib/arcadia/roles" import { revokeUserApiKey } from "~/lib/arcadia/api-keys" import { createRAGClient } from "@crema/lexical-rag-ui" import { BLOCK_INDEX, getBlockSchema } from "~/lib/block-schemas" +import { searchAdmin, SearchAdminError } from "~/lib/search-admin" // Lazy singleton — first tool call fetches /docs-index.json, subsequent // calls reuse the parsed MiniSearch instance. @@ -732,7 +733,7 @@ const TOOLS: ToolDef[] = [ { name: "search_kb", description: - "Lexical (BM25) search over the arcadia-search Tantivy backend. Use for the LARGER, server-hosted knowledge corpora — the same arcadia docs the browser RAG serves are indexed here as `corpus=docs` for parity, and additional corpora (uploaded files, runbooks, etc.) will land here as they're added. Returns chunks with snippets + chunk_ids that can be passed to `read_chunk` to expand. Prefer this over `search_docs` (browser) when you need richer hits or when the user is asking about content that wouldn't be in the bundled docs (e.g. uploaded files).", + "Lexical (BM25) search over the arcadia-search Tantivy backend. Returns chunks with snippets + chunk_ids that can be passed to `read_chunk` to expand. Prefer this over `search_docs` (browser) when you need richer hits or when the content wouldn't be in the bundled docs.\n\nKnown corpora on the platform-admin tenant:\n- `docs` — arcadia-app architecture/ops docs (same as the browser RAG, server-hosted for parity).\n- `operator-tools` — arcadia-search + arcadia-admin documentation (admin sidecar, deploy script, search admin UI, MULTI_TENANT, RAG, AI_FIRST, LIBS, LLM_PROXY_CONTRACT).\n- `files` — markdown/text files uploaded by tenant users via arcadia-app.\n\nIf you're not sure what's available, call `list_search_corpora` first. Operators can add new corpora via the `/search` route.", parameters: { type: "object", properties: { @@ -740,7 +741,7 @@ const TOOLS: ToolDef[] = [ corpus: { type: "string", description: - "Which indexed corpus to search. `docs` is the parity corpus (arcadia documentation). New corpora are added by the operator.", + "Which indexed corpus to search. See list_search_corpora for the live set; common values: `docs`, `operator-tools`, `files`.", }, limit: { type: "integer", @@ -796,6 +797,84 @@ const TOOLS: ToolDef[] = [ return result }, }, + { + name: "list_search_corpora", + description: + "Enumerate the corpora currently configured on the arcadia-search admin sidecar. Returns each tenant's corpora with build status (indexed?, num_docs). Call this when you don't know what corpora exist before invoking `search_kb`, or when the user asks what knowledge is available. Requires the search admin token to be configured.", + parameters: { + type: "object", + properties: {}, + additionalProperties: false, + }, + isWrite: false, + run: async () => { + try { + const tenantsRes = await searchAdmin.listTenants() + const tenants = await Promise.all( + tenantsRes.tenants.map(async (t) => { + try { + const c = await searchAdmin.listCorpora(t.id) + return { + tenant: t.id, + corpora: c.corpora.map((cc) => ({ + corpus: cc.corpus, + indexed: cc.indexed, + num_docs: cc.num_docs, + })), + } + } catch { + return { tenant: t.id, corpora: [] } + } + }), + ) + return { tenants } + } catch (err) { + if (err instanceof SearchAdminError) { + return { + error: `search-admin ${err.status}: ${err.message}`, + hint: "VITE_ARCADIA_SEARCH_ADMIN_TOKEN may be unset, or the sidecar (default :7801) may be down.", + } + } + throw err + } + }, + }, + { + name: "rebuild_search_corpus", + description: + "Trigger a synchronous rebuild of one corpus on arcadia-search. Use when the operator says the index is stale, after they've uploaded new files, or when search_kb returned suspiciously few/old hits. Returns chunk_count and built_at on success. The operator confirms before the rebuild runs (rebuilds can take seconds–minutes depending on corpus size).", + parameters: { + type: "object", + properties: { + tenant: { + type: "string", + description: "Search tenant id (e.g. `platform-admin`). See list_search_corpora for available tenants.", + }, + corpus: { + type: "string", + description: "Corpus name within that tenant (e.g. `docs`, `operator-tools`, `files`).", + }, + }, + required: ["tenant", "corpus"], + additionalProperties: false, + }, + isWrite: true, + run: async (args) => { + const tenant = typeof args.tenant === "string" ? args.tenant.trim() : "" + const corpus = typeof args.corpus === "string" ? args.corpus.trim() : "" + if (!tenant || !corpus) { + throw new Error("rebuild_search_corpus requires { tenant, corpus }") + } + try { + return await searchAdmin.rebuild(tenant, corpus) + } catch (err) { + if (err instanceof SearchAdminError) { + return { error: `search-admin ${err.status}: ${err.message}` } + } + throw err + } + }, + }, { name: "get_block_schema", description: `Fetch the full JSON schema + example for a rich-output block kind so you can emit it correctly in your reply. Call this the first time in a thread that you intend to render a particular kind. Available kinds: ${Object.entries( diff --git a/app/lib/arcadia-knowledge.ts b/app/lib/arcadia-knowledge.ts index a4a35c3..e193cb7 100644 --- a/app/lib/arcadia-knowledge.ts +++ b/app/lib/arcadia-knowledge.ts @@ -16,6 +16,7 @@ Core entities and how they relate: - **Audit log entry** — append-only record of who did what. \`actor_type\` is one of: \`user\`, \`platform_admin\`, \`api_key\`, \`system\`. Per-tenant and platform-wide entries coexist. - **Feature flag** — boolean / variant gate. Platform-wide default + per-tenant override. - **Storage / billing config / SSO IdP / inbound webhook / API quota / data retention policy / approval workflow / announcement** — per-tenant or platform-level configurations the operator can manage. +- **Search corpus** — a Tantivy index over a set of source documents, served by the arcadia-search service. Each corpus belongs to a search tenant (a separate id space from platform tenants — typically \`platform-admin\` for the operator's own knowledge). The operator manages corpora at \`/search\`: create/edit configuration JSON, rebuild on demand, restart the service. Built-ins on \`platform-admin\`: \`docs\` (arcadia architecture), \`operator-tools\` (arcadia-search + arcadia-admin docs), \`files\` (uploaded markdown/text files). Tenant lifecycle (status field): @@ -31,6 +32,7 @@ Things to keep in mind when assisting: - The operator can impersonate tenant users for debugging (POST /api/v1/admin/impersonate/:user_id) — surface this when they ask "why can't user X log in". - Quotas / rate cards / billing config errors usually surface as 402/403 from /api/v1 endpoints — diagnose by checking the tenant's billing-config and api-metering quotas. - The reference Phoenix app lives at \`reference/arcadia-app/\` in the workspace; its OpenAPI spec is at /api/openapi (sync via \`node ../lib-arcadia-client/scripts/sync-spec.mjs\`). +- Search admin (arcadia-search) is a separate service. Manage tenants/corpora at \`/search\`. Use \`list_search_corpora\` if you don't know what's indexed; \`rebuild_search_corpus\` after uploads or when results look stale; \`search_kb\` / \`read_chunk\` to query. When the user asks something that maps to a tool, call it. When they ask about a concept, explain it from this primer in plain language. Write tools (suspend_tenant, activate_tenant) prompt the operator with an inline confirm card before they actually run — you do not need to ask in prose first; just call the tool and the user will see the confirmation UI. If the user denies a write, do not retry it; ask what they'd like to do differently. diff --git a/app/routes/assistant.tsx b/app/routes/assistant.tsx index 2e49d39..9d2ef4f 100644 --- a/app/routes/assistant.tsx +++ b/app/routes/assistant.tsx @@ -53,7 +53,7 @@ Rules: Known action ids across the app (use these even if not in "Available actions" — the page may not be mounted yet): -Sidebar / nav: nav-overview, nav-tenants, nav-resources, nav-activity, nav-assistant, nav-library, nav-settings, sidebar-toggle, mobile-nav-toggle +Sidebar / nav: nav-overview, nav-tenants, nav-resources, nav-activity, nav-search, nav-assistant, nav-library, nav-settings, sidebar-toggle, mobile-nav-toggle Appbar: appbar-search (input), appbar-scripts, appbar-font-size, appbar-surface, appbar-background, theme-toggle, appbar-notifications, appbar-avatar Account menu (after click appbar-avatar): avatar-profile (→ /profile), avatar-settings, avatar-help, avatar-signout Profile page: profile-avatar-upload, profile-avatar-remove, profile-name, profile-email, profile-title, profile-bio, profile-signature, profile-default-agent, profile-save, profile-revert, profile-reset @@ -66,6 +66,7 @@ Assistant page: assistant-model, assistant-agent, assistant-thread, assistant-th Library page: library-search, library-open-, library-copy-, library-download-, library-delete- Resources page: resources-search, resources-new-name, resources-create, resources-status-, resources-delete- Tenants page: tenants-refresh, tenants-search (input), tenants-create. Per-row (use the tenant's slug — see the "tenants" surface in Admin context for available slugs): tenant--actions (open the kebab first), tenant--suspend, tenant--activate, tenant--deactivate. Recipe to suspend a tenant: click nav-tenants, wait_for tenants-refresh, click tenant--actions, wait_for tenant--suspend, click tenant--suspend. +Search page (/search — manage arcadia-search tenants and corpora): search-refresh, search-restart (with confirm), search-new-tenant, search-new-corpus, corpora-search (input). Per-tenant chip: tenant--delete. Per-corpus row (id is "-"): corpus---actions (kebab), corpus---rebuild, corpus---edit, corpus---delete. New-tenant dialog: tenant-form-id (input), tenant-form-cancel, tenant-form-save. New/edit-corpus dialog: corpus-form-tenant (select, only when creating), corpus-form-config (textarea, JSON), corpus-form-cancel, corpus-form-save. Recipe to rebuild a corpus: click nav-search, wait_for search-refresh, click corpus---actions, wait_for corpus---rebuild, click corpus---rebuild. (NOTE: prefer the \`rebuild_search_corpus\` tool over UI-driving for rebuilds — it's atomic and gives a structured result; UI-drive only when the user explicitly wants to see it happen.) Login page: login-email, login-password, login-submit Notifications popover: appbar-notifications (open), notif-mark-all-read, notif-clear, notif-open-, notif-dismiss- Create a notification (hidden bridge — always available, even when not visible): fill the four hidden inputs, then click the submit button. Recipe: @@ -113,7 +114,7 @@ function buildAdminPreface(activeAgent: Agent | undefined, uiControl: boolean): const ctx = formatAdminContextForPrompt() const parts = [ "You are the operator's assistant inside Arcadia Admin. Be precise and direct. You have native function tools attached to this conversation — call them whenever the user asks about live platform state (counts, statuses, listings, lookups). Never invent tenant slugs, user counts, or statuses; if you need data, call a tool.", - "Two retrieval surfaces exist for documentation/knowledge: `search_docs` (browser-side, BM25 over the bundled arcadia docs — fast, always available, small corpus) and `search_kb` (server-side, BM25 over arcadia-search — same docs as `corpus=docs` for parity, plus larger and additional corpora as the operator adds them). For questions about the bundled arcadia docs either is fine; prefer `search_kb` when you want richer hits or when the user is asking about content that wouldn't be in the bundled docs (uploaded files, tenant-specific knowledge). When `search_kb` returns a chunk_id you want to expand, call `read_chunk(chunk_id, corpus)`.", + "Two retrieval surfaces exist for documentation/knowledge: `search_docs` (browser-side, BM25 over the bundled arcadia docs — fast, always available, small corpus) and `search_kb` (server-side, BM25 over arcadia-search — `docs` (arcadia parity), `operator-tools` (arcadia-search + arcadia-admin admin docs), `files` (uploaded files), plus any custom corpora the operator adds via /search). For questions about the bundled arcadia docs either is fine; prefer `search_kb` for richer hits or for content outside the bundled docs (uploaded files, the admin tooling itself, tenant-specific knowledge). If unsure what corpora exist, call `list_search_corpora`. When `search_kb` returns a chunk_id you want to expand, call `read_chunk(chunk_id, corpus)`. When the operator says results look stale or after they've uploaded new files, call `rebuild_search_corpus(tenant, corpus)`.", RICH_OUTPUT_PREFACE, ARCADIA_KNOWLEDGE, persona, diff --git a/app/routes/search.tsx b/app/routes/search.tsx index ff44c45..2974f85 100644 --- a/app/routes/search.tsx +++ b/app/routes/search.tsx @@ -60,6 +60,7 @@ import { } from "~/lib/search-admin" import { pageTitle } from "~/lib/page-meta" import { useSession } from "~/lib/session" +import { useRegisterAdminContext } from "~/lib/admin-context" export const meta = () => pageTitle("Search") @@ -140,6 +141,24 @@ export default function SearchRoute() { return { indexed, docs } }, [corpora]) + // Publish a snapshot to the assistant's admin context so the agent + // can answer "what corpora exist?" / "is the docs corpus indexed?" + // without having to call list_search_corpora. + const adminSurface = useMemo( + () => ({ + endpoint: searchAdmin.baseUrl, + tenants: tenants.map((t) => ({ id: t.id, corpus_count: t.corpus_count })), + corpora: corpora.map((c) => ({ + tenant: c.tenant, + corpus: c.corpus, + indexed: c.indexed, + num_docs: c.num_docs, + })), + }), + [tenants, corpora], + ) + useRegisterAdminContext("search", adminSurface) + const rebuild = useCallback( async (tenant: string, corpus: string) => { const id = `${tenant}/${corpus}`