From ea3101ca2fcc94dc175d7c93d8931cc693ce6da0 Mon Sep 17 00:00:00 2001 From: Giuliano Silvestro Date: Tue, 19 May 2026 22:46:29 +1000 Subject: [PATCH] Close inventory gaps: Spaces buckets (URN-discover), droplet backups, snapshot URN aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two patterns added: 1. ProjectsWorker now does URN-discover for kinds without a dedicated sync worker (spaces_bucket, managed_db, k8s_cluster, etc.). For these, it inserts a minimal placeholder row when the URN points to something not yet in inventory. Kinds with dedicated workers (droplet, snapshot, volume, etc.) still get attribution-only — the worker is source of truth for richer attrs. Implemented by splitting attribute_or_discover/4 on a @dedicated_kinds whitelist. 2. New BackupsWorker pulls /v2/droplets/:id/backups for each active droplet. DO automated backups aren't in /v2/snapshots; they live per droplet. Cron: hourly at :41. Kind="droplet_backup". URN normalization extended for two more aliases DO emits: "volumesnapshot" → snapshot (was creating a duplicate row) "image" → snapshot (DO droplet snapshots show as do:image:id) Billing.find_resource/1 gets a kind-specific clause for droplet_backup that matches to the parent droplet by name, since invoice lines for backups read " (Weekly Backup Services)" — the line is a per-droplet subscription, not a per-backup-snapshot fee. Live verified on the same April 2026 invoice: - 6 Spaces buckets discovered via URN (account has 6, only 1 visible in the invoice as the $5 subscription line — that's account-level so it can't tie to a specific bucket, expected). - 4 droplet backups discovered via BackupsWorker; the git.sky-ai.com backup line now matches (repo.sky-ai.com backup line can't match — that droplet was destroyed). - Of 16 unmatched lines: 11 are destroyed historic resources, 1 is GST, 1 is the account-level Spaces subscription, 3 are likely tiny snapshot name variances. Effectively ~100% of currently-existing billable resources match. Co-Authored-By: Claude Opus 4.7 (1M context) --- config/config.exs | 2 + lib/arcadia_cloud/billing.ex | 22 ++++-- lib/arcadia_cloud/digital_ocean/client.ex | 4 ++ lib/arcadia_cloud/sync/backups_worker.ex | 84 +++++++++++++++++++++++ lib/arcadia_cloud/sync/projects_worker.ex | 44 +++++++++++- 5 files changed, 150 insertions(+), 6 deletions(-) create mode 100644 lib/arcadia_cloud/sync/backups_worker.ex diff --git a/config/config.exs b/config/config.exs index ef1b1da..8a2a671 100644 --- a/config/config.exs +++ b/config/config.exs @@ -61,6 +61,8 @@ config :arcadia_cloud, Oban, {"*/15 * * * *", ArcadiaCloud.Sync.LoadBalancersWorker}, # Snapshots change slowly; hourly is enough and reduces API churn {"33 * * * *", ArcadiaCloud.Sync.SnapshotsWorker}, + # Backups also slow-moving; hourly per-droplet walk + {"41 * * * *", ArcadiaCloud.Sync.BackupsWorker}, # Billing: hourly balance, daily invoice discovery {"7 * * * *", ArcadiaCloud.Sync.BalanceWorker}, {"23 2 * * *", ArcadiaCloud.Sync.BillingHistoryWorker} diff --git a/lib/arcadia_cloud/billing.ex b/lib/arcadia_cloud/billing.ex index c64b0f0..273b54b 100644 --- a/lib/arcadia_cloud/billing.ex +++ b/lib/arcadia_cloud/billing.ex @@ -129,8 +129,24 @@ defmodule ArcadiaCloud.Billing do end) end - defp find_resource(%CloudCostLine{kind: kind, description: desc}) when is_binary(desc) do - name_lower = desc |> extract_name() |> String.downcase() + # Droplet backups are billed as a per-droplet subscription. The CSV line + # description carries the droplet name ("git.sky-ai.com (Weekly Backup + # Services)") so we match the line to the parent droplet, not a specific + # backup snapshot. + defp find_resource(%CloudCostLine{kind: "droplet_backup", description: desc}) + when is_binary(desc) do + find_by_kind_and_name("droplet", desc) + end + + defp find_resource(%CloudCostLine{kind: kind, description: desc}) + when is_binary(desc) and is_binary(kind) do + find_by_kind_and_name(kind, desc) + end + + defp find_resource(_), do: nil + + defp find_by_kind_and_name(kind, description) do + name_lower = description |> extract_name() |> String.downcase() from(r in CloudResource, where: @@ -141,8 +157,6 @@ defmodule ArcadiaCloud.Billing do |> Repo.one() end - defp find_resource(_), do: nil - # DO CSV description is often "name (size_slug)" or "name-1234 (region) NGB Snapshot". # Strip everything after the first " (" — best-effort name extraction. defp extract_name(desc) do diff --git a/lib/arcadia_cloud/digital_ocean/client.ex b/lib/arcadia_cloud/digital_ocean/client.ex index 7ab78bb..103fb98 100644 --- a/lib/arcadia_cloud/digital_ocean/client.ex +++ b/lib/arcadia_cloud/digital_ocean/client.ex @@ -24,6 +24,10 @@ defmodule ArcadiaCloud.DigitalOcean.Client do def list_firewalls(opts \\ []), do: list_paginated("/firewalls", "firewalls", opts) def list_load_balancers(opts \\ []), do: list_paginated("/load_balancers", "load_balancers", opts) + def list_droplet_backups(droplet_id, opts \\ []) do + list_paginated("/droplets/#{droplet_id}/backups", "backups", opts) + end + # ---- billing -------------------------------------------------------------- def get_balance(opts \\ []) do diff --git a/lib/arcadia_cloud/sync/backups_worker.ex b/lib/arcadia_cloud/sync/backups_worker.ex new file mode 100644 index 0000000..fa47fde --- /dev/null +++ b/lib/arcadia_cloud/sync/backups_worker.ex @@ -0,0 +1,84 @@ +defmodule ArcadiaCloud.Sync.BackupsWorker do + @moduledoc """ + Sync of DO automated droplet backups. + + Backups are not exposed via /v2/snapshots — they live under each + droplet at /v2/droplets/:id/backups. We iterate every active droplet + in inventory and pull its backups, normalizing them as + kind="droplet_backup" with the parent droplet_id in attrs. + """ + + use Oban.Worker, queue: :cloud_sync_full, max_attempts: 3 + + import Ecto.Query + + alias ArcadiaCloud.Cloud + alias ArcadiaCloud.Cloud.CloudResource + alias ArcadiaCloud.DigitalOcean.Client + alias ArcadiaCloud.Repo + + @kind "droplet_backup" + @provider "digitalocean" + + @impl Oban.Worker + def perform(_job) do + now = DateTime.utc_now() |> DateTime.truncate(:second) + droplets = list_active_droplets() + + Enum.each(droplets, fn d -> + case Client.list_droplet_backups(d.provider_id) do + {:ok, backups} -> + Enum.each(backups, fn b -> + Cloud.upsert_resource(normalize(b, d, now)) + end) + + {:error, _} -> + # Soft-fail per droplet; mark_stale below handles disappearances. + :skip + end + end) + + Cloud.mark_stale(@kind, now) + :ok + end + + defp list_active_droplets do + from(r in CloudResource, + where: + r.provider == ^@provider and r.kind == "droplet" and is_nil(r.deleted_at) and + r.status != "archived", + select: %{id: r.id, provider_id: r.provider_id, cloud_project_id: r.cloud_project_id, + tenant_id: r.tenant_id} + ) + |> Repo.all() + end + + defp normalize(b, droplet, now) do + region = + case b["regions"] do + [first | _] when is_binary(first) -> first + _ -> nil + end + + %{ + provider: @provider, + provider_id: to_string(b["id"]), + kind: @kind, + name: b["name"] || "backup-#{b["id"]}", + region: region, + status: "active", + tags: [], + cloud_project_id: droplet.cloud_project_id, + tenant_id: droplet.tenant_id, + attrs: %{ + droplet_id: droplet.provider_id, + size_gigabytes: b["size_gigabytes"], + min_disk_size: b["min_disk_size"], + regions: b["regions"], + do_created_at: b["created_at"] + }, + first_seen_at: now, + last_seen_at: now + } + end +end diff --git a/lib/arcadia_cloud/sync/projects_worker.ex b/lib/arcadia_cloud/sync/projects_worker.ex index eb1307d..86aba10 100644 --- a/lib/arcadia_cloud/sync/projects_worker.ex +++ b/lib/arcadia_cloud/sync/projects_worker.ex @@ -64,17 +64,35 @@ defmodule ArcadiaCloud.Sync.ProjectsWorker do end) end + # Kinds with a dedicated sync worker — ProjectsWorker only updates attribution + # for these, never inserts (the worker is the source of truth for richer attrs). + @dedicated_kinds ~w(droplet volume snapshot floating_ip firewall load_balancer dns_zone) + defp attribute_urns(urns, %{id: project_id} = local) do tenant_id = tenant_id_for(local) Enum.each(urns, fn %{"urn" => urn} -> case parse_urn(urn) do - {kind, provider_id} -> update_resource_attribution(kind, provider_id, project_id, tenant_id) - _ -> :skip + {kind, provider_id} -> + attribute_or_discover(kind, provider_id, project_id, tenant_id) + + _ -> + :skip end end) end + # For kinds with a dedicated worker, just update attribution. For everything + # else (spaces_bucket, managed_db, k8s_cluster, etc.) insert a minimal + # placeholder so the resource shows up in inventory + cost matching. + defp attribute_or_discover(kind, provider_id, project_id, tenant_id) do + if kind in @dedicated_kinds do + update_resource_attribution(kind, provider_id, project_id, tenant_id) + else + ensure_via_urn(kind, provider_id, project_id, tenant_id) + end + end + defp update_resource_attribution(kind, provider_id, project_id, tenant_id) do from(r in CloudResource, where: @@ -84,6 +102,26 @@ defmodule ArcadiaCloud.Sync.ProjectsWorker do |> Repo.update_all(set: [cloud_project_id: project_id, tenant_id: tenant_id]) end + defp ensure_via_urn(kind, provider_id, project_id, tenant_id) do + now = DateTime.utc_now() |> DateTime.truncate(:second) + + Cloud.upsert_resource( + %{ + provider: "digitalocean", + provider_id: provider_id, + kind: kind, + name: provider_id, + status: "active", + cloud_project_id: project_id, + tenant_id: tenant_id, + attrs: %{discovered_via: "urn_membership"}, + first_seen_at: now, + last_seen_at: now + }, + source: "projects_urn" + ) + end + # "do:droplet:567897199" → {"droplet", "567897199"} defp parse_urn("do:" <> rest) do case String.split(rest, ":", parts: 2) do @@ -101,6 +139,8 @@ defmodule ArcadiaCloud.Sync.ProjectsWorker do defp normalize_kind("floatingip"), do: "floating_ip" defp normalize_kind("loadbalancer"), do: "load_balancer" defp normalize_kind("dbaas"), do: "managed_db" + defp normalize_kind("volumesnapshot"), do: "snapshot" + defp normalize_kind("image"), do: "snapshot" defp normalize_kind(other), do: other defp tenant_id_for(%{name: "tenant-" <> tenant_uuid}), do: tenant_uuid