Receive llm_usage_recorded events from arcadia-llm-gateway

POST /api/v1/integrations/llm-usage stores priced LLM usage events
(idempotent on gateway_request_id) in llm_usage_events. The gateway is
the LLM-pricing authority — arcadia-cloud trusts the charge it sends
rather than re-pricing.

The monthly invoice rollup now appends an llm_usage line per deployment
alongside the infra quote lines; the exact decimal charges are summed
and rounded to cents once. Closes the gateway→cloud billing loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-21 08:29:36 +10:00
parent 29f4ad97d6
commit e1f0aedcf7
6 changed files with 269 additions and 1 deletions

View File

@@ -0,0 +1,43 @@
defmodule ArcadiaCloud.Billing.LlmUsageEvent do
use Ecto.Schema
import Ecto.Changeset
@primary_key {:id, :binary_id, autogenerate: true}
@foreign_key_type :binary_id
schema "llm_usage_events" do
field :gateway_request_id, :string
field :tenant_id, :string
field :deployment_id, :string
field :provider, :string
field :model, :string
field :request_kind, :string
field :input_tokens, :integer, default: 0
field :output_tokens, :integer, default: 0
field :cached_input_tokens, :integer, default: 0
field :total_tokens, :integer, default: 0
field :upstream_cost, :decimal
field :customer_charge, :decimal
field :customer_charge_cents, :integer, default: 0
field :markup_mode, :string
field :occurred_at, :utc_datetime_usec
timestamps(type: :utc_datetime, updated_at: false)
end
@required ~w(gateway_request_id tenant_id occurred_at)a
@optional ~w(deployment_id provider model request_kind input_tokens
output_tokens cached_input_tokens total_tokens upstream_cost
customer_charge customer_charge_cents markup_mode)a
def changeset(event, attrs) do
event
|> cast(attrs, @required ++ @optional)
|> validate_required(@required)
|> unique_constraint(:gateway_request_id)
end
end

View File

@@ -77,7 +77,12 @@ defmodule ArcadiaCloud.Invoicing do
})
|> Repo.insert()
lines = Enum.flat_map(subscriptions, &subscription_lines(&1, period_start, period_end))
# Infra lines from the quote engine + LLM-token lines from metered
# gateway usage (arcadia-llm-gateway pushes priced usage events).
lines =
Enum.flat_map(subscriptions, &subscription_lines(&1, period_start, period_end)) ++
ArcadiaCloud.LlmUsage.period_lines(tenant_id, period_start, period_end)
Enum.each(lines, &insert_line(invoice.id, &1))
subtotal = Enum.reduce(lines, 0, &(&1.amount_cents + &2))

View File

@@ -0,0 +1,110 @@
defmodule ArcadiaCloud.LlmUsage do
@moduledoc """
Stores `llm_usage_recorded` events pushed by arcadia-llm-gateway and
rolls them into tenant invoices.
The gateway is the LLM-pricing authority — it sends the already-priced
`customer_charge_cents` per request. arcadia-cloud does not re-price;
it sums received events per deployment into invoice lines.
Note on `plan_inclusive` LLM pricing: the per-request charge the
gateway sends is the *marginal* (overage-rate) price. For
`flat_percent` / `fixed_per_token` policies the sum equals the bill
exactly. If a tenant is put on a `plan_inclusive` LLM policy, a
period true-up against the gateway's authoritative `period_charge`
would be needed — none are in use today.
"""
import Ecto.Query, warn: false
alias ArcadiaCloud.Repo
alias ArcadiaCloud.Billing.LlmUsageEvent
@doc """
Idempotently records an event. A repeated `gateway_request_id` (a
gateway retry) is a no-op. Returns `{:ok, :recorded | :duplicate}`.
"""
def record_event(attrs) do
gateway_request_id = attrs[:gateway_request_id] || attrs["gateway_request_id"]
if gateway_request_id && Repo.exists?(seen(gateway_request_id)) do
{:ok, :duplicate}
else
insert_event(attrs)
end
end
defp insert_event(attrs) do
case %LlmUsageEvent{} |> LlmUsageEvent.changeset(attrs) |> Repo.insert() do
{:ok, _event} ->
{:ok, :recorded}
{:error, %{errors: errors} = changeset} ->
# A concurrent insert of the same event tripped the unique index.
if Keyword.has_key?(errors, :gateway_request_id),
do: {:ok, :duplicate},
else: {:error, changeset}
end
end
defp seen(gateway_request_id) do
from(e in LlmUsageEvent, where: e.gateway_request_id == ^gateway_request_id)
end
@doc """
Invoice lines for a tenant's LLM usage in `[period_start, period_end]`,
one per deployment. Returns `[]` when there was no usage.
"""
def period_lines(tenant_id, %Date{} = period_start, %Date{} = period_end) do
from = DateTime.new!(period_start, ~T[00:00:00], "Etc/UTC")
to = DateTime.new!(Date.add(period_end, 1), ~T[00:00:00], "Etc/UTC")
from(e in LlmUsageEvent,
where: e.tenant_id == ^tenant_id and e.occurred_at >= ^from and e.occurred_at < ^to,
group_by: e.deployment_id,
select: %{
deployment_id: e.deployment_id,
requests: count(e.id),
input_tokens: coalesce(sum(e.input_tokens), 0),
output_tokens: coalesce(sum(e.output_tokens), 0),
# Sum the exact decimal charge, round to cents once — summing the
# gateway's pre-rounded per-request cents would drift on high
# request counts.
charge: coalesce(sum(e.customer_charge), 0)
}
)
|> Repo.all()
|> Enum.map(&to_line/1)
|> Enum.reject(&(&1.amount_cents == 0))
end
defp to_line(row) do
amount_cents =
row.charge
|> to_decimal()
|> Decimal.mult(Decimal.new(100))
|> Decimal.round(0, :half_up)
|> Decimal.to_integer()
%{
deployment_id: row.deployment_id,
kind: "llm_usage",
resource_kind: "llm_tokens",
description:
"LLM tokens — #{row.input_tokens} in / #{row.output_tokens} out (#{row.requests} requests)",
qty: row.input_tokens + row.output_tokens,
unit: "token",
unit_price_cents: nil,
amount_cents: amount_cents,
meta: %{
"input_tokens" => row.input_tokens,
"output_tokens" => row.output_tokens,
"requests" => row.requests
}
}
end
defp to_decimal(%Decimal{} = d), do: d
defp to_decimal(n) when is_integer(n), do: Decimal.new(n)
defp to_decimal(_), do: Decimal.new(0)
end