"""GDELT Project kolektorius.

Naudojam DOC 2.0 API: https://api.gdeltproject.org/api/v2/doc/doc
Filtruojam pagal `sourcecountry` ir grąžinam top straipsnių antraštes
kaip "hourly" tendencijas.

Pavyzdys:
  https://api.gdeltproject.org/api/v2/doc/doc?query=sourcecountry:LT&mode=ArtList&maxrecords=50&format=json
"""

from __future__ import annotations

from typing import List

from .base import BaseCollector, TrendItem

# GDELT vartoja "FIPS" šalies kodus, kurie kai kuriais atvejais skiriasi nuo ISO 3166.
# Daugumai atvejų pakanka šio žemėlapio – paliekam None nepalaikomoms.
GDELT_COUNTRY = {
    "LT": "LH", "LV": "LG", "EE": "EN",
    "US": "US", "GB": "UK", "JP": "JA", "DE": "GM", "FR": "FR",
    "IT": "IT", "ES": "SP", "PL": "PL", "FI": "FI", "SE": "SW",
    "NO": "NO", "DK": "DA", "NL": "NL", "BE": "BE", "CH": "SZ",
    "AT": "AU", "GR": "GR", "IE": "EI", "PT": "PO", "CZ": "EZ",
    "HU": "HU", "UA": "UP", "RU": "RS", "TR": "TU", "BR": "BR",
    "AR": "AR", "MX": "MX", "CA": "CA", "AU": "AS", "NZ": "NZ",
    "IN": "IN", "ID": "ID", "MY": "MY", "SG": "SN", "TH": "TH",
    "VN": "VM", "PH": "RP", "KR": "KS", "TW": "TW", "HK": "HK",
    "CN": "CH", "IL": "IS", "SA": "SA", "AE": "AE", "EG": "EG",
    "NG": "NI", "KE": "KE", "ZA": "SF", "RO": "RO", "BG": "BU",
}


class GdeltCollector(BaseCollector):
    name = "gdelt"
    source_type = "gdelt"

    BASE = "https://api.gdeltproject.org/api/v2/doc/doc"

    def collect_for_country(self, country) -> List[TrendItem]:
        code = GDELT_COUNTRY.get(country.iso_code)
        if not code:
            self.log.debug("GDELT nepalaiko %s", country.iso_code)
            return []

        params = {
            "query": f"sourcecountry:{code}",
            "mode": "ArtList",
            "maxrecords": 50,
            "format": "json",
            "timespan": "1d",
            "sort": "hybridrel",
        }
        data = self.http.get_json(self.BASE, params=params, respect_robots=False)
        if not data:
            return []

        items: List[TrendItem] = []
        for rank, art in enumerate(data.get("articles", []) or [], start=1):
            title = (art.get("title") or "").strip()
            if not title:
                continue
            items.append(TrendItem(
                keyword=title,
                trend_type="hourly",
                rank=rank,
                score=max(0.0, 1.0 - (rank - 1) / 50),
                language=art.get("language"),
                description=art.get("seendate"),
                url=art.get("url"),
            ))
        self.log.info("[%s] GDELT: %d straipsnių", country.iso_code, len(items))
        return items
