Phase 1 cost ingestion: balance + invoices + CSV parse + resource match

Three new schemas:
- cloud_balance_snapshots — hourly MTD balance/usage poll for live-accrual.
- cloud_invoices — header per provider invoice, with ingest status flags.
- cloud_cost_lines — per-line-item COGS, FK to cloud_resources where matched.

Three new Oban workers (queue: cloud_billing):
- BalanceWorker (hourly) records a snapshot.
- BillingHistoryWorker (daily) discovers invoices via /v2/customers/my/
  billing_history, upserts headers, enqueues an InvoiceIngestWorker for
  each not-yet-ingested invoice.
- InvoiceIngestWorker (per-invoice) fetches /invoices/:uuid/csv, parses
  with NimbleCSV (header-keyed so column order shifts don't break us),
  replaces the invoice's line set, then matches lines to cloud_resources
  by (kind, name) — case-insensitive, name extracted from "name (size)"
  description format.

DigitalOcean.Client gains get_balance / list_billing_history /
get_invoice_summary / fetch_invoice_csv. The CSV endpoint returns text/csv
so we bypass Req's body decoder.

Cron additions: BalanceWorker hourly at :07, BillingHistoryWorker daily
at 02:23.

API:
- GET /api/v1/billing/balance — latest snapshot, platform_admin only.
- GET /api/v1/billing/cost-lines?period=YYYY-MM-DD&kind&limit — per-line
  COGS, platform_admin only.

Live smoke against real DO billing API surfaced and fixed three CSV-format
gotchas: column headers use underscores not spaces (group_description,
project_name), USD column has $ prefix, dates use "YYYY-MM-DD HH:MM:SS
+0000" format (space separator + RFC822 offset).

Verified: 137 historical invoices discovered going back to 2014;
April 2026 invoice (33 lines, $86.92 total) ingested with 6/33 lines
matched to current cloud_resources. Unmatched lines are correctly
historic droplets, Spaces buckets (not yet synced), and GST.

NimbleCSV ~> 1.2 added as a dep.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-19 22:20:50 +10:00
parent 53b664558d
commit 0079f98bb5
14 changed files with 826 additions and 2 deletions

View File

@@ -0,0 +1,47 @@
defmodule ArcadiaCloud.Sync.BalanceWorker do
@moduledoc """
Hourly poll of `/v2/customers/my/balance`. Records a snapshot row so
the cost dashboard can show MTD usage in real time.
"""
use Oban.Worker, queue: :cloud_billing, max_attempts: 3
alias ArcadiaCloud.Billing
alias ArcadiaCloud.DigitalOcean.Client
@impl Oban.Worker
def perform(_job) do
with {:ok, body} <- Client.get_balance() do
Billing.record_balance(%{
provider: "digitalocean",
month_to_date_balance_cents: dollars_to_cents(body["month_to_date_balance"]),
account_balance_cents: dollars_to_cents(body["account_balance"]),
month_to_date_usage_cents: dollars_to_cents(body["month_to_date_usage"]),
generated_at: parse_iso(body["generated_at"]),
raw: body
})
:ok
end
end
defp dollars_to_cents(nil), do: nil
defp dollars_to_cents(value) when is_binary(value) do
case Float.parse(value) do
{float, _} -> round(float * 100)
:error -> nil
end
end
defp dollars_to_cents(value) when is_number(value), do: round(value * 100)
defp parse_iso(nil), do: DateTime.utc_now() |> DateTime.truncate(:second)
defp parse_iso(str) when is_binary(str) do
case DateTime.from_iso8601(str) do
{:ok, dt, _} -> DateTime.truncate(dt, :second)
_ -> DateTime.utc_now() |> DateTime.truncate(:second)
end
end
end

View File

@@ -0,0 +1,97 @@
defmodule ArcadiaCloud.Sync.BillingHistoryWorker do
@moduledoc """
Daily discovery of new invoices via `/v2/customers/my/billing_history`.
Upserts an invoice header per row and enqueues an InvoiceIngestWorker
job for any invoice not yet ingested (CSV parse + line-item write +
resource match). Idempotent — re-running re-enqueues nothing new.
"""
use Oban.Worker, queue: :cloud_billing, max_attempts: 3
alias ArcadiaCloud.Billing
alias ArcadiaCloud.DigitalOcean.Client
alias ArcadiaCloud.Sync.InvoiceIngestWorker
@impl Oban.Worker
def perform(_job) do
with {:ok, history} <- Client.list_billing_history() do
Enum.each(history, fn item ->
case extract_invoice_attrs(item) do
{:ok, attrs} ->
{:ok, invoice} = Billing.upsert_invoice(attrs)
if is_nil(invoice.lines_ingested_at) do
%{invoice_id: invoice.id}
|> InvoiceIngestWorker.new()
|> Oban.insert!()
end
:skip ->
:ok
end
end)
:ok
end
end
# DO billing_history items have shape:
# {"type" => "Invoice", "description" => "Invoice for September 2024",
# "amount" => "-100.00", "date" => "2024-10-01T00:00:00Z",
# "invoice_id" => "uuid-here", "invoice_uuid" => "uuid-here"}
# We only care about Invoice rows (skip Payment, Credit, etc).
defp extract_invoice_attrs(%{"type" => "Invoice"} = item) do
case item["invoice_uuid"] || item["invoice_id"] do
nil ->
:skip
uuid ->
{:ok,
%{
provider: "digitalocean",
provider_invoice_id: uuid,
invoice_period: derive_period(item["date"]),
amount_cents: dollars_to_cents_abs(item["amount"]),
status: "issued",
issued_at: parse_iso(item["date"]),
raw: item
}}
end
end
defp extract_invoice_attrs(_), do: :skip
# Invoice dated YYYY-MM-01 covers the previous month. Derive period as
# first-of-the-previous-month.
defp derive_period(nil), do: Date.utc_today() |> Date.add(-30) |> first_of_month()
defp derive_period(iso) do
case DateTime.from_iso8601(iso) do
{:ok, dt, _} -> dt |> DateTime.to_date() |> Date.add(-1) |> first_of_month()
_ -> Date.utc_today() |> first_of_month()
end
end
defp first_of_month(%Date{year: y, month: m}), do: Date.new!(y, m, 1)
defp dollars_to_cents_abs(nil), do: nil
defp dollars_to_cents_abs(value) when is_binary(value) do
case Float.parse(value) do
{float, _} -> abs(round(float * 100))
:error -> nil
end
end
defp dollars_to_cents_abs(value) when is_number(value), do: abs(round(value * 100))
defp parse_iso(nil), do: nil
defp parse_iso(str) when is_binary(str) do
case DateTime.from_iso8601(str) do
{:ok, dt, _} -> DateTime.truncate(dt, :second)
_ -> nil
end
end
end

View File

@@ -0,0 +1,194 @@
defmodule ArcadiaCloud.Sync.InvoiceIngestWorker do
@moduledoc """
Fetch a single invoice's CSV, parse line items, replace cost_lines,
then match each line to a cloud_resource by (kind, name).
Enqueued per invoice by BillingHistoryWorker. Per-invoice idempotency —
re-runs replace the line set in one transaction. Marks invoice
`lines_ingested_at` on success.
"""
use Oban.Worker, queue: :cloud_billing, max_attempts: 3
alias ArcadiaCloud.Billing
alias ArcadiaCloud.Billing.CloudInvoice
alias ArcadiaCloud.DigitalOcean.Client
alias ArcadiaCloud.Repo
NimbleCSV.define(InvoiceCsv, separator: ",", escape: "\"")
@impl Oban.Worker
def perform(%Oban.Job{args: %{"invoice_id" => invoice_id}}) do
invoice = Repo.get!(CloudInvoice, invoice_id)
do_ingest(invoice)
end
defp do_ingest(%CloudInvoice{provider_invoice_id: uuid} = invoice) do
with {:ok, csv} <- Client.fetch_invoice_csv(uuid) do
lines = parse_csv(csv, invoice.invoice_period)
{:ok, _} = Billing.replace_cost_lines(invoice, lines)
matched = Billing.match_cost_lines_to_resources(invoice)
{:ok, _} = Billing.mark_invoice_ingested(invoice)
{:ok, %{lines: length(lines), matched: matched}}
end
end
# ---- CSV parsing ----------------------------------------------------------
# DO invoice CSV columns (as of v2 API):
# product, group description, description, hours, start, end, USD,
# project name, category
#
# Header is on the first line; we use it to find columns rather than
# rely on order (DO occasionally adds columns).
defp parse_csv(csv, period) do
rows =
csv
|> InvoiceCsv.parse_string(skip_headers: false)
case rows do
[headers | data] ->
index = build_index(headers)
Enum.map(data, &row_to_line_attrs(&1, index, period))
_ ->
[]
end
end
defp build_index(headers) do
headers
|> Enum.with_index()
|> Enum.into(%{}, fn {h, i} -> {String.downcase(String.trim(h)), i} end)
end
defp row_to_line_attrs(row, index, period) do
product = at(row, index, "product")
description = at(row, index, "description") || at(row, index, "group_description")
hours = at(row, index, "hours")
usd = at(row, index, "usd")
start_at = at(row, index, "start")
end_at = at(row, index, "end")
project_name = at(row, index, "project_name")
category = at(row, index, "category")
%{
invoice_period: period,
kind: derive_kind(product, category),
description: description,
qty: parse_decimal(hours),
unit: if(hours, do: "hours", else: nil),
amount_cents: parse_cents(usd),
unit_cost_cents: nil,
start_at: parse_datetime(start_at),
end_at: parse_datetime(end_at),
project_name: project_name,
category: category,
raw: %{
"product" => product,
"category" => category,
"row" => row
}
}
end
defp at(row, index, key) do
case Map.get(index, key) do
nil -> nil
i -> Enum.at(row, i) |> blank_to_nil()
end
end
defp blank_to_nil(""), do: nil
defp blank_to_nil(other), do: other
# Best-effort mapping from DO product/category strings to our cloud_resources.kind.
defp derive_kind(product, _category) when is_binary(product) do
p = String.downcase(product)
cond do
String.contains?(p, "droplet") -> "droplet"
String.contains?(p, "volume") -> "volume"
String.contains?(p, "snapshot") -> "snapshot"
String.contains?(p, "load balancer") -> "load_balancer"
String.contains?(p, "load_balancer") -> "load_balancer"
String.contains?(p, "floating ip") -> "floating_ip"
String.contains?(p, "spaces") -> "spaces_bucket"
String.contains?(p, "dns") -> "dns_zone"
String.contains?(p, "managed database") -> "managed_db"
String.contains?(p, "kubernetes") -> "k8s_cluster"
true -> nil
end
end
defp derive_kind(_, _), do: nil
defp parse_cents(nil), do: 0
defp parse_cents(value) when is_binary(value) do
cleaned = value |> String.replace(["$", ",", " "], "")
case Float.parse(cleaned) do
{f, _} -> round(f * 100)
:error -> 0
end
end
defp parse_decimal(nil), do: nil
defp parse_decimal(value) when is_binary(value) do
case Decimal.parse(value) do
{dec, _} -> dec
:error -> nil
end
end
# DO CSV uses "2026-04-01 00:00:00 +0000" (space separator, RFC822 offset).
# Also handle "2026-04-01T00:00:00Z" (ISO) and plain "YYYY-MM-DD".
defp parse_datetime(nil), do: nil
defp parse_datetime(""), do: nil
defp parse_datetime(str) when is_binary(str) do
cond do
String.contains?(str, "T") -> parse_iso_datetime(str)
String.contains?(str, " ") -> parse_space_datetime(str)
true -> parse_date_only(str)
end
end
defp parse_iso_datetime(str) do
case DateTime.from_iso8601(str) do
{:ok, dt, _} -> DateTime.truncate(dt, :second)
_ -> nil
end
end
# "2026-04-01 00:00:00 +0000" → ISO equivalent
defp parse_space_datetime(str) do
[date_part, rest] = String.split(str, " ", parts: 2)
[time_part | maybe_offset] = String.split(rest, " ", parts: 2)
iso = date_part <> "T" <> time_part <> normalize_offset(maybe_offset)
parse_iso_datetime(iso)
end
defp normalize_offset([]), do: "Z"
defp normalize_offset([off]) when is_binary(off), do: normalize_offset_str(off)
defp normalize_offset_str("+0000"), do: "Z"
defp normalize_offset_str("-0000"), do: "Z"
defp normalize_offset_str(<<sign::binary-1, hh::binary-2, mm::binary-2>>) when sign in ["+", "-"] do
sign <> hh <> ":" <> mm
end
defp normalize_offset_str(_), do: "Z"
defp parse_date_only(str) do
case Date.from_iso8601(str) do
{:ok, date} -> DateTime.new!(date, ~T[00:00:00], "Etc/UTC")
_ -> nil
end
end
end