Receive llm_usage_recorded events from arcadia-llm-gateway
POST /api/v1/integrations/llm-usage stores priced LLM usage events (idempotent on gateway_request_id) in llm_usage_events. The gateway is the LLM-pricing authority — arcadia-cloud trusts the charge it sends rather than re-pricing. The monthly invoice rollup now appends an llm_usage line per deployment alongside the infra quote lines; the exact decimal charges are summed and rounded to cents once. Closes the gateway→cloud billing loop. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
43
lib/arcadia_cloud/billing/llm_usage_event.ex
Normal file
43
lib/arcadia_cloud/billing/llm_usage_event.ex
Normal file
@@ -0,0 +1,43 @@
|
||||
defmodule ArcadiaCloud.Billing.LlmUsageEvent do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
@primary_key {:id, :binary_id, autogenerate: true}
|
||||
@foreign_key_type :binary_id
|
||||
|
||||
schema "llm_usage_events" do
|
||||
field :gateway_request_id, :string
|
||||
field :tenant_id, :string
|
||||
field :deployment_id, :string
|
||||
|
||||
field :provider, :string
|
||||
field :model, :string
|
||||
field :request_kind, :string
|
||||
|
||||
field :input_tokens, :integer, default: 0
|
||||
field :output_tokens, :integer, default: 0
|
||||
field :cached_input_tokens, :integer, default: 0
|
||||
field :total_tokens, :integer, default: 0
|
||||
|
||||
field :upstream_cost, :decimal
|
||||
field :customer_charge, :decimal
|
||||
field :customer_charge_cents, :integer, default: 0
|
||||
field :markup_mode, :string
|
||||
|
||||
field :occurred_at, :utc_datetime_usec
|
||||
|
||||
timestamps(type: :utc_datetime, updated_at: false)
|
||||
end
|
||||
|
||||
@required ~w(gateway_request_id tenant_id occurred_at)a
|
||||
@optional ~w(deployment_id provider model request_kind input_tokens
|
||||
output_tokens cached_input_tokens total_tokens upstream_cost
|
||||
customer_charge customer_charge_cents markup_mode)a
|
||||
|
||||
def changeset(event, attrs) do
|
||||
event
|
||||
|> cast(attrs, @required ++ @optional)
|
||||
|> validate_required(@required)
|
||||
|> unique_constraint(:gateway_request_id)
|
||||
end
|
||||
end
|
||||
@@ -77,7 +77,12 @@ defmodule ArcadiaCloud.Invoicing do
|
||||
})
|
||||
|> Repo.insert()
|
||||
|
||||
lines = Enum.flat_map(subscriptions, &subscription_lines(&1, period_start, period_end))
|
||||
# Infra lines from the quote engine + LLM-token lines from metered
|
||||
# gateway usage (arcadia-llm-gateway pushes priced usage events).
|
||||
lines =
|
||||
Enum.flat_map(subscriptions, &subscription_lines(&1, period_start, period_end)) ++
|
||||
ArcadiaCloud.LlmUsage.period_lines(tenant_id, period_start, period_end)
|
||||
|
||||
Enum.each(lines, &insert_line(invoice.id, &1))
|
||||
|
||||
subtotal = Enum.reduce(lines, 0, &(&1.amount_cents + &2))
|
||||
|
||||
110
lib/arcadia_cloud/llm_usage.ex
Normal file
110
lib/arcadia_cloud/llm_usage.ex
Normal file
@@ -0,0 +1,110 @@
|
||||
defmodule ArcadiaCloud.LlmUsage do
|
||||
@moduledoc """
|
||||
Stores `llm_usage_recorded` events pushed by arcadia-llm-gateway and
|
||||
rolls them into tenant invoices.
|
||||
|
||||
The gateway is the LLM-pricing authority — it sends the already-priced
|
||||
`customer_charge_cents` per request. arcadia-cloud does not re-price;
|
||||
it sums received events per deployment into invoice lines.
|
||||
|
||||
Note on `plan_inclusive` LLM pricing: the per-request charge the
|
||||
gateway sends is the *marginal* (overage-rate) price. For
|
||||
`flat_percent` / `fixed_per_token` policies the sum equals the bill
|
||||
exactly. If a tenant is put on a `plan_inclusive` LLM policy, a
|
||||
period true-up against the gateway's authoritative `period_charge`
|
||||
would be needed — none are in use today.
|
||||
"""
|
||||
|
||||
import Ecto.Query, warn: false
|
||||
|
||||
alias ArcadiaCloud.Repo
|
||||
alias ArcadiaCloud.Billing.LlmUsageEvent
|
||||
|
||||
@doc """
|
||||
Idempotently records an event. A repeated `gateway_request_id` (a
|
||||
gateway retry) is a no-op. Returns `{:ok, :recorded | :duplicate}`.
|
||||
"""
|
||||
def record_event(attrs) do
|
||||
gateway_request_id = attrs[:gateway_request_id] || attrs["gateway_request_id"]
|
||||
|
||||
if gateway_request_id && Repo.exists?(seen(gateway_request_id)) do
|
||||
{:ok, :duplicate}
|
||||
else
|
||||
insert_event(attrs)
|
||||
end
|
||||
end
|
||||
|
||||
defp insert_event(attrs) do
|
||||
case %LlmUsageEvent{} |> LlmUsageEvent.changeset(attrs) |> Repo.insert() do
|
||||
{:ok, _event} ->
|
||||
{:ok, :recorded}
|
||||
|
||||
{:error, %{errors: errors} = changeset} ->
|
||||
# A concurrent insert of the same event tripped the unique index.
|
||||
if Keyword.has_key?(errors, :gateway_request_id),
|
||||
do: {:ok, :duplicate},
|
||||
else: {:error, changeset}
|
||||
end
|
||||
end
|
||||
|
||||
defp seen(gateway_request_id) do
|
||||
from(e in LlmUsageEvent, where: e.gateway_request_id == ^gateway_request_id)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Invoice lines for a tenant's LLM usage in `[period_start, period_end]`,
|
||||
one per deployment. Returns `[]` when there was no usage.
|
||||
"""
|
||||
def period_lines(tenant_id, %Date{} = period_start, %Date{} = period_end) do
|
||||
from = DateTime.new!(period_start, ~T[00:00:00], "Etc/UTC")
|
||||
to = DateTime.new!(Date.add(period_end, 1), ~T[00:00:00], "Etc/UTC")
|
||||
|
||||
from(e in LlmUsageEvent,
|
||||
where: e.tenant_id == ^tenant_id and e.occurred_at >= ^from and e.occurred_at < ^to,
|
||||
group_by: e.deployment_id,
|
||||
select: %{
|
||||
deployment_id: e.deployment_id,
|
||||
requests: count(e.id),
|
||||
input_tokens: coalesce(sum(e.input_tokens), 0),
|
||||
output_tokens: coalesce(sum(e.output_tokens), 0),
|
||||
# Sum the exact decimal charge, round to cents once — summing the
|
||||
# gateway's pre-rounded per-request cents would drift on high
|
||||
# request counts.
|
||||
charge: coalesce(sum(e.customer_charge), 0)
|
||||
}
|
||||
)
|
||||
|> Repo.all()
|
||||
|> Enum.map(&to_line/1)
|
||||
|> Enum.reject(&(&1.amount_cents == 0))
|
||||
end
|
||||
|
||||
defp to_line(row) do
|
||||
amount_cents =
|
||||
row.charge
|
||||
|> to_decimal()
|
||||
|> Decimal.mult(Decimal.new(100))
|
||||
|> Decimal.round(0, :half_up)
|
||||
|> Decimal.to_integer()
|
||||
|
||||
%{
|
||||
deployment_id: row.deployment_id,
|
||||
kind: "llm_usage",
|
||||
resource_kind: "llm_tokens",
|
||||
description:
|
||||
"LLM tokens — #{row.input_tokens} in / #{row.output_tokens} out (#{row.requests} requests)",
|
||||
qty: row.input_tokens + row.output_tokens,
|
||||
unit: "token",
|
||||
unit_price_cents: nil,
|
||||
amount_cents: amount_cents,
|
||||
meta: %{
|
||||
"input_tokens" => row.input_tokens,
|
||||
"output_tokens" => row.output_tokens,
|
||||
"requests" => row.requests
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
defp to_decimal(%Decimal{} = d), do: d
|
||||
defp to_decimal(n) when is_integer(n), do: Decimal.new(n)
|
||||
defp to_decimal(_), do: Decimal.new(0)
|
||||
end
|
||||
Reference in New Issue
Block a user