Files
arcadia-cloud/lib/arcadia_cloud/sync/projects_worker.ex
Giuliano Silvestro 8bdf500214 Round out DO sync workers: volumes, snapshots, floating IPs, firewalls, LBs
Five new Oban workers, all on cloud_sync_full following the established
droplet/domains pattern (list → normalize → upsert → mark_stale):

- VolumesWorker         — block storage
- SnapshotsWorker       — both droplet and volume snapshots (kind="snapshot"
                          with attrs.resource_type to differentiate)
- FloatingIpsWorker     — provider_id is the IP; status assigned/unassigned
- FirewallsWorker       — inbound/outbound rules + droplet_ids in attrs
- LoadBalancersWorker   — name + region + algorithm + forwarding rules

DigitalOcean.Client gains list_snapshots / list_firewalls / list_load_balancers.

ProjectsWorker URN normalization extended: "floatingip" → floating_ip,
"loadbalancer" → load_balancer, "dbaas" → managed_db. URNs DO emits don't
have underscores for these.

Cron updated: new workers run every 15min on cloud_sync_full; snapshots
moved to hourly (at :33) since they change slowly and listing them is
the most-paginated call we make.

InvoiceIngestWorker.derive_kind/2 reordered to check specific phrases
before generic products — "Droplet Snapshots"/"Droplet Backups" no longer
get bucketed as kind=droplet ahead of the actual snapshot check. Also
adds kind="droplet_backup" for DO's automated backup billing (separate
from the snapshot kind because backups aren't exposed via /v2/snapshots).

Live verified: 12 snapshots discovered + 1 firewall (account has no
volumes / floating IPs / LBs at the moment, so those workers ran clean).
April 2026 invoice match rate jumped from 18.2% → 51.5%. Of the
unmatched: 10 historic droplets that no longer exist on DO, 2 backups
(separate API surface), 1 Spaces bucket (S3 API, deferred), 1 GST
(correctly no kind). Effectively ~95% of currently-extant resources match.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 22:41:12 +10:00

109 lines
3.4 KiB
Elixir

defmodule ArcadiaCloud.Sync.ProjectsWorker do
@moduledoc """
Sync of DigitalOcean Projects → cloud_projects table.
Two-pass:
1. Upsert every DO project locally (purpose derives from name pattern).
2. For each known project, fetch its resource memberships and update
cloud_resources.cloud_project_id + tenant_id accordingly.
Tenant attribution: a DO project named `tenant-<uuid>` maps to that
tenant. `skyai-internal` is the platform tenant (tenant_id = nil).
Everything else has tenant_id = nil and is operator-classified later.
"""
use Oban.Worker, queue: :cloud_sync_full, max_attempts: 3
import Ecto.Query
alias ArcadiaCloud.Cloud
alias ArcadiaCloud.Cloud.CloudResource
alias ArcadiaCloud.DigitalOcean.Client
alias ArcadiaCloud.Repo
@impl Oban.Worker
def perform(_job) do
with {:ok, do_projects} <- Client.list_projects() do
Enum.each(do_projects, &sync_project/1)
attribute_resources(do_projects)
:ok
end
end
defp sync_project(do_project) do
Cloud.ensure_project(%{
provider: "digitalocean",
provider_id: do_project["id"],
name: do_project["name"],
purpose: derive_purpose(do_project["name"]),
metadata: %{
do_purpose: do_project["purpose"],
environment: do_project["environment"],
description: do_project["description"],
is_default: do_project["is_default"]
}
})
end
defp derive_purpose("skyai-internal"), do: "skyai-infra"
defp derive_purpose("tenant-" <> _rest), do: "tenant-workload"
defp derive_purpose(_), do: "shared-services"
# ---- attribution ----------------------------------------------------------
defp attribute_resources(do_projects) do
Enum.each(do_projects, fn do_project ->
local = Cloud.get_project_by_provider("digitalocean", do_project["id"])
if local do
case Client.list_project_resources(do_project["id"]) do
{:ok, resources} -> attribute_urns(resources, local)
_ -> :noop
end
end
end)
end
defp attribute_urns(urns, %{id: project_id} = local) do
tenant_id = tenant_id_for(local)
Enum.each(urns, fn %{"urn" => urn} ->
case parse_urn(urn) do
{kind, provider_id} -> update_resource_attribution(kind, provider_id, project_id, tenant_id)
_ -> :skip
end
end)
end
defp update_resource_attribution(kind, provider_id, project_id, tenant_id) do
from(r in CloudResource,
where:
r.provider == "digitalocean" and r.kind == ^kind and r.provider_id == ^provider_id and
is_nil(r.deleted_at)
)
|> Repo.update_all(set: [cloud_project_id: project_id, tenant_id: tenant_id])
end
# "do:droplet:567897199" → {"droplet", "567897199"}
defp parse_urn("do:" <> rest) do
case String.split(rest, ":", parts: 2) do
[kind, id] -> {normalize_kind(kind), id}
_ -> nil
end
end
defp parse_urn(_), do: nil
# DO URN uses singular common nouns; our cloud_resources.kind names some
# things more explicitly to disambiguate (e.g. dns_zone vs dns_record).
defp normalize_kind("domain"), do: "dns_zone"
defp normalize_kind("space"), do: "spaces_bucket"
defp normalize_kind("floatingip"), do: "floating_ip"
defp normalize_kind("loadbalancer"), do: "load_balancer"
defp normalize_kind("dbaas"), do: "managed_db"
defp normalize_kind(other), do: other
defp tenant_id_for(%{name: "tenant-" <> tenant_uuid}), do: tenant_uuid
defp tenant_id_for(_), do: nil
end