"""LEGAL DISCLAIMER:

This script is provided "as is" as a code sample for demonstration purposes only.
Modigie, Inc. makes no representations or warranties of any kind, express or implied,
about the completeness, accuracy, reliability, suitability, or availability with respect
to this script for any purpose. Any reliance you place on such information is therefore
strictly at your own risk.

In no event will we be liable for any loss or damage including without limitation,
indirect or consequential loss or damage, or any loss or damage whatsoever arising
from loss of data or profits arising out of, or in connection with, the use of this script.

This script is not intended for production use.

===============================================================================

Publish enrichMobile job requests to Pub/Sub from a CSV file.

Reads rows from a CSV and publishes one Pub/Sub message per row to the
enrichMobile request topic. Each message carries:
  - body: JSON describing the person/employment/email/url to enrich from
  - attributes:
      modigieJobRequestId   (required) - customer-chosen unique ID per job
      modigieJobExpireAfter (optional) - ISO-8601 duration, e.g. "PT10M"

enrichMobile requires either a valid deliverable business email address or a
LinkedIn user URL as input. A request may include just an email, just a URL,
or both. Validation is performed by the service; this script does not
pre-validate beyond requiring a requestId.

CSV columns (header row required):
  requestId,firstName,lastName,jobTitle,company,email,linkedinUrl,expireAfter

  - requestId    : required, unique per row, echoed back on the response
  - firstName    : required by the service
  - lastName     : required by the service
  - email        : business email used as lookup key (required if no linkedinUrl)
  - linkedinUrl  : LinkedIn user URL (required if no email)
  - jobTitle     : optional
  - company      : optional
  - expireAfter  : optional, ISO-8601 duration (e.g. "PT10M"); omit for no TTL

Usage:
  python publisher.py path/to/input.csv

"""

import csv
import json
import sys
import time

from google.cloud import pubsub


# --8<-- [start:configuration]
# TODO: Replace with GCP project that owns the Pub/Sub topic.
PROJECT_ID = "modigie-c-q8irqgvocgg8lt4dwa3e"  # (1)!

# TODO: Replace with Pub/Sub topic that receives enrichMobile JobV2 request messages.
TOPIC_ID = "inpubsub-job-request-enrichmobile-2678a46bd60d84c"  # (2)!

# --8<-- [end:configuration]

# Sleep between publishes so jobs get distinct createTime values.
# Set to 0 for max throughput.
PUBLISH_INTERVAL_SECONDS = 1


def clean_row(row: dict) -> dict:
    """Strip whitespace from CSV cell values."""
    return {k: v.strip() if isinstance(v, str) else v for k, v in row.items()}


def build_body(row: dict) -> dict:
    """Build an enrichMobile request body from a CSV row.

    The request body is JSON and follows the JobV2-style request shape expected
    by enrichMobile. It always includes:
      - payload.person

    It conditionally adds:
      - payload.emailAddresses  (when email is provided)
      - payload.urls            (when linkedinUrl is provided)
      - payload.employment      (when company or jobTitle is provided)

    A valid request must include at least one of emailAddresses or urls; the
    service rejects requests with neither.

    """
    # Use requestId as the source object identifier so downstream responses
    # can be correlated back to the original CSV row.
    requestId = row["requestId"]

    # Base payload always includes the person section.
    payload = {
        "person": {
            "firstName": row.get("firstName", ""),
            "lastName": row.get("lastName", ""),
            "source": {
                "objectId": requestId,
                "propertyName": "name",
            },
        },
    }

    # Add business email if provided.
    if row.get("email"):
        payload["emailAddresses"] = [{
            "emailAddress": {"userFormat": row["email"]},
            "rels": ["business"],
            "source": {
                "objectId": requestId,
                "propertyName": "companyEmail",
            },
        }]

    # Add LinkedIn URL if provided. A request may carry an email, a URL, or
    # both; the service uses whatever input is available to find the matching
    # person.
    if row.get("linkedinUrl"):
        payload["urls"] = [{
            "url": {"userFormat": row["linkedinUrl"]},
            "source": {
                "objectId": requestId,
                "propertyName": "linkedInUrl",
            },
        }]

    # Add employment data only when at least one related CSV field is provided.
    if row.get("company") or row.get("jobTitle"):
        employment = {}

        # Optional job title input.
        if row.get("jobTitle"):
            employment["position"] = {
                "title": row["jobTitle"],
                "source": {"objectId": requestId, "propertyName": "title"},
            }

        # Optional company input.
        if row.get("company"):
            employment["company"] = {
                "title": row["company"],
                "source": {"objectId": requestId, "propertyName": "company"},
            }

        payload["employment"] = employment

    return {
        "payload": payload,
        "priority": 50,  # (1) optional - between 0 and 100, default 50, higher value will be dispatched before others
    }


def publish_from_csv(csvPath: str) -> None:
    """Read a CSV file and publish one Pub/Sub message per row."""
    # Create Pub/Sub publisher client and fully qualified topic path.
    publisher = pubsub.PublisherClient()
    topicPath = publisher.topic_path(PROJECT_ID, TOPIC_ID)

    print(f"Publishing to {topicPath}")

    publishedCount = 0

    # Stream CSV rows so we don't hold the whole file in memory.
    # Row numbering starts at 2 because row 1 is the header.
    with open(csvPath, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        previousFuture = None

        for rowNum, rawRow in enumerate(reader, start=2):
            row = clean_row(rawRow)

            # requestId is required
            if not row.get("requestId"):
                print(f"Row {rowNum}: missing requestId, skipping")
                continue

            # Pause between publishes when configured. Pause before the publish
            # (skipping the first iteration) so the loop ends without a trailing
            # idle wait.
            if previousFuture is not None and PUBLISH_INTERVAL_SECONDS > 0:
                time.sleep(PUBLISH_INTERVAL_SECONDS)

            # Build JSON request body from the CSV row.
            body = build_body(row)

            # Pub/Sub message body must be bytes.
            data = json.dumps(body).encode("utf-8")

            # Pub/Sub attributes carry metadata used by downstream consumers.
            attributes = {"modigieJobRequestId": row["requestId"]}

            # Optional TTL / expiration window, passed through as message attribute.
            if row.get("expireAfter"):
                attributes["modigieJobExpireAfter"] = row["expireAfter"]

            # Publish message and wait for Pub/Sub to confirm message creation.
            future = publisher.publish(topicPath, data, **attributes)
            messageId = future.result()

            print(f"Published {row['requestId']} (expire={row.get('expireAfter') or 'none'}) -> messageId={messageId}")

            previousFuture = future
            publishedCount += 1

    print(f"Done. Published {publishedCount} job(s).")


if __name__ == "__main__":
    # Expect exactly one argument: the CSV path.
    if len(sys.argv) != 2:
        print("Usage: python publisher.py path/to/input.csv")
        sys.exit(1)
    publish_from_csv(sys.argv[1])