arcadia-cloud/lib/arcadia_cloud/sync/invoice_ingest_worker.ex

defmodule ArcadiaCloud.Sync.InvoiceIngestWorker do
  @moduledoc """
  Fetch a single invoice's CSV, parse line items, replace cost_lines,
  then match each line to a cloud_resource by (kind, name).

  Enqueued per invoice by BillingHistoryWorker. Per-invoice idempotency —
  re-runs replace the line set in one transaction. Marks invoice
  `lines_ingested_at` on success.
  """

  use Oban.Worker, queue: :cloud_billing, max_attempts: 3

  alias ArcadiaCloud.Billing
  alias ArcadiaCloud.Billing.CloudInvoice
  alias ArcadiaCloud.DigitalOcean.Client
  alias ArcadiaCloud.Repo

  NimbleCSV.define(InvoiceCsv, separator: ",", escape: "\"")

  @impl Oban.Worker
  def perform(%Oban.Job{args: %{"invoice_id" => invoice_id}}) do
    invoice = Repo.get!(CloudInvoice, invoice_id)
    do_ingest(invoice)
  end

  defp do_ingest(%CloudInvoice{provider_invoice_id: uuid} = invoice) do
    with {:ok, csv} <- Client.fetch_invoice_csv(uuid) do
      lines = parse_csv(csv, invoice.invoice_period)

      {:ok, _} = Billing.replace_cost_lines(invoice, lines)
      matched = Billing.match_cost_lines_to_resources(invoice)
      {:ok, _} = Billing.mark_invoice_ingested(invoice)

      {:ok, %{lines: length(lines), matched: matched}}
    end
  end

  # ---- CSV parsing ----------------------------------------------------------

  # DO invoice CSV columns (as of v2 API):
  #   product, group description, description, hours, start, end, USD,
  #   project name, category
  #
  # Header is on the first line; we use it to find columns rather than
  # rely on order (DO occasionally adds columns).
  defp parse_csv(csv, period) do
    rows =
      csv
      |> InvoiceCsv.parse_string(skip_headers: false)

    case rows do
      [headers | data] ->
        index = build_index(headers)
        Enum.map(data, &row_to_line_attrs(&1, index, period))

      _ ->
        []
    end
  end

  defp build_index(headers) do
    headers
    |> Enum.with_index()
    |> Enum.into(%{}, fn {h, i} -> {String.downcase(String.trim(h)), i} end)
  end

  defp row_to_line_attrs(row, index, period) do
    product = at(row, index, "product")
    description = at(row, index, "description") || at(row, index, "group_description")
    hours = at(row, index, "hours")
    usd = at(row, index, "usd")
    start_at = at(row, index, "start")
    end_at = at(row, index, "end")
    project_name = at(row, index, "project_name")
    category = at(row, index, "category")

    %{
      invoice_period: period,
      kind: derive_kind(product, category),
      description: description,
      qty: parse_decimal(hours),
      unit: if(hours, do: "hours", else: nil),
      amount_cents: parse_cents(usd),
      unit_cost_cents: nil,
      start_at: parse_datetime(start_at),
      end_at: parse_datetime(end_at),
      project_name: project_name,
      category: category,
      raw: %{
        "product" => product,
        "category" => category,
        "row" => row
      }
    }
  end

  defp at(row, index, key) do
    case Map.get(index, key) do
      nil -> nil
      i -> Enum.at(row, i) |> blank_to_nil()
    end
  end

  defp blank_to_nil(""), do: nil
  defp blank_to_nil(other), do: other

  # Best-effort mapping from DO product/category strings to our cloud_resources.kind.
  defp derive_kind(product, _category) when is_binary(product) do
    p = String.downcase(product)

    cond do
      String.contains?(p, "droplet") -> "droplet"
      String.contains?(p, "volume") -> "volume"
      String.contains?(p, "snapshot") -> "snapshot"
      String.contains?(p, "load balancer") -> "load_balancer"
      String.contains?(p, "load_balancer") -> "load_balancer"
      String.contains?(p, "floating ip") -> "floating_ip"
      String.contains?(p, "spaces") -> "spaces_bucket"
      String.contains?(p, "dns") -> "dns_zone"
      String.contains?(p, "managed database") -> "managed_db"
      String.contains?(p, "kubernetes") -> "k8s_cluster"
      true -> nil
    end
  end

  defp derive_kind(_, _), do: nil

  defp parse_cents(nil), do: 0

  defp parse_cents(value) when is_binary(value) do
    cleaned = value |> String.replace(["$", ",", " "], "")

    case Float.parse(cleaned) do
      {f, _} -> round(f * 100)
      :error -> 0
    end
  end

  defp parse_decimal(nil), do: nil

  defp parse_decimal(value) when is_binary(value) do
    case Decimal.parse(value) do
      {dec, _} -> dec
      :error -> nil
    end
  end

  # DO CSV uses "2026-04-01 00:00:00 +0000" (space separator, RFC822 offset).
  # Also handle "2026-04-01T00:00:00Z" (ISO) and plain "YYYY-MM-DD".
  defp parse_datetime(nil), do: nil
  defp parse_datetime(""), do: nil

  defp parse_datetime(str) when is_binary(str) do
    cond do
      String.contains?(str, "T") -> parse_iso_datetime(str)
      String.contains?(str, " ") -> parse_space_datetime(str)
      true -> parse_date_only(str)
    end
  end

  defp parse_iso_datetime(str) do
    case DateTime.from_iso8601(str) do
      {:ok, dt, _} -> DateTime.truncate(dt, :second)
      _ -> nil
    end
  end

  # "2026-04-01 00:00:00 +0000" → ISO equivalent
  defp parse_space_datetime(str) do
    [date_part, rest] = String.split(str, " ", parts: 2)
    [time_part | maybe_offset] = String.split(rest, " ", parts: 2)
    iso = date_part <> "T" <> time_part <> normalize_offset(maybe_offset)
    parse_iso_datetime(iso)
  end

  defp normalize_offset([]), do: "Z"
  defp normalize_offset([off]) when is_binary(off), do: normalize_offset_str(off)

  defp normalize_offset_str("+0000"), do: "Z"
  defp normalize_offset_str("-0000"), do: "Z"

  defp normalize_offset_str(<<sign::binary-1, hh::binary-2, mm::binary-2>>) when sign in ["+", "-"] do
    sign <> hh <> ":" <> mm
  end

  defp normalize_offset_str(_), do: "Z"

  defp parse_date_only(str) do
    case Date.from_iso8601(str) do
      {:ok, date} -> DateTime.new!(date, ~T[00:00:00], "Etc/UTC")
      _ -> nil
    end
  end
end