lib-action-bus/src/llm-bridge.ts

// LLM ↔ command bus glue.
// - buildSystemPrompt() teaches the model the DSL and lists the actions
//   currently visible on screen.
// - extractActionBlocks() pulls ```action ... ``` blocks from assistant text.
// - runActionBlocks() executes them through the script runner.

import { commandBus } from "./bus"
import { runScriptText } from "./script"

const DSL_REFERENCE = `To act on the UI, emit a fenced \`\`\`action ... \`\`\` block. One command per line.

Commands: navigate <path> | click <target> | fill <target> "<value>" | submit <target> | select <target> <value> | wait <ms> | wait_for <target> | scroll [<target>] | read [<target>] | expect <target> to_contain "<text>" | expect <target> to_be_visible | expect <target> to_have_value "<text>"

Rules: only emit a block when asked to do something. Use only target ids from "Available actions". Short sentence + block. Quote values with spaces. Comments start with #.

Example — User: "go to resources" → "On it.\n\n\`\`\`action\nnavigate /resources\n\`\`\`"`

export type SystemPromptContext = {
  /** Optional preface specific to the app/persona. */
  preface?: string
  /** Current route pathname. */
  path?: string
  /** Whether to inject the live action snapshot. */
  includeActions?: boolean
}

export function buildSystemPrompt(ctx: SystemPromptContext = {}): string {
  const parts: string[] = []
  parts.push(
    ctx.preface ??
      "You are the assistant in Comfy Cloud. Answer concisely and drive the UI when asked.",
  )
  parts.push(DSL_REFERENCE)

  if (ctx.includeActions !== false) {
    const actions = commandBus.listActions().filter((a) => a.visible)
    const path = ctx.path ?? (typeof window !== "undefined" ? window.location.pathname : "")
    parts.push(`Route: ${path || "?"}\nAvailable actions:\n${
      actions.length === 0
        ? "(none)"
        : actions.map((a) => `- ${a.id}${a.label ? `: ${a.label}` : ""}`).join("\n")
    }`)
  }

  return parts.join("\n\n")
}

/** Rough token estimate: ~4 chars per token. Good enough for budgeting. */
export function estimateTokens(text: string): number {
  return Math.ceil(text.length / 4)
}

/** Trim a message list to fit a token budget, preserving the most recent turns. */
export function trimMessages<T extends { content: string }>(
  messages: T[],
  budgetTokens: number,
): T[] {
  let used = 0
  const kept: T[] = []
  for (let i = messages.length - 1; i >= 0; i--) {
    const t = estimateTokens(messages[i].content)
    if (used + t > budgetTokens) break
    kept.unshift(messages[i])
    used += t
  }
  return kept
}

const ACTION_BLOCK_RE = /```action\s*\n([\s\S]*?)```/g

export function extractActionBlocks(text: string): string[] {
  const blocks: string[] = []
  let m: RegExpExecArray | null
  ACTION_BLOCK_RE.lastIndex = 0
  while ((m = ACTION_BLOCK_RE.exec(text)) !== null) {
    blocks.push(m[1].trim())
  }
  return blocks
}

export type RunActionBlocksResult = {
  ran: number
  errors: string[]
}

export async function runActionBlocks(
  text: string,
  opts: { signal?: AbortSignal } = {},
): Promise<RunActionBlocksResult> {
  const blocks = extractActionBlocks(text)
  const errors: string[] = []
  let ran = 0
  for (const block of blocks) {
    try {
      await runScriptText(block, { signal: opts.signal })
      ran++
    } catch (e) {
      errors.push(e instanceof Error ? e.message : String(e))
    }
  }
  return { ran, errors }
}