"""NewsAPI.org kolektorius.

Nemokamas planas: 100 užklausų / dieną, ne komerciniam naudojimui.
Endpoint /v2/top-headlines?country=XX – `country` 2 raidžių kodas
(ne visos šalys palaikomos – netinkamoms tiesiog tuščia).
"""

from __future__ import annotations

from typing import List

from .base import BaseCollector, TrendItem

NEWSAPI_COUNTRIES = {
    "ae","ar","at","au","be","bg","br","ca","ch","cn","co","cu","cz",
    "de","eg","fr","gb","gr","hk","hu","id","ie","il","in","it","jp",
    "kr","lt","lv","ma","mx","my","ng","nl","no","nz","ph","pl","pt",
    "ro","rs","ru","sa","se","sg","si","sk","th","tr","tw","ua","us",
    "ve","za",
}


class NewsApiCollector(BaseCollector):
    name = "newsapi"
    source_type = "news_api"

    BASE = "https://newsapi.org/v2/top-headlines"

    def __init__(self, *, db, http, cfg):
        super().__init__(db=db, http=http, cfg=cfg)
        self.api_key = (cfg.get("api_keys", {}) or {}).get("newsapi") or ""

    def collect_for_country(self, country) -> List[TrendItem]:
        if not self.api_key:
            return []
        iso = country.iso_code.lower()
        if iso not in NEWSAPI_COUNTRIES:
            return []

        items: List[TrendItem] = []
        categories = (None, "business", "entertainment", "general", "health",
                      "science", "sports", "technology")

        for cat in categories:
            params = {"country": iso, "pageSize": 20, "apiKey": self.api_key}
            if cat:
                params["category"] = cat
            data = self.http.get_json(self.BASE, params=params, respect_robots=False)
            if not data or data.get("status") != "ok":
                continue
            for rank, art in enumerate(data.get("articles", []), start=1):
                title = (art.get("title") or "").strip()
                if not title or title == "[Removed]":
                    continue
                items.append(TrendItem(
                    keyword=title,
                    trend_type="hourly",
                    rank=rank,
                    score=max(0.0, 1.0 - (rank - 1) / 20),
                    language=country.primary_language,
                    category=(cat if cat in ("sports", "entertainment") else None),
                    description=(art.get("description") or "")[:1000],
                    url=art.get("url"),
                ))
        self.log.info("[%s] NewsAPI: %d straipsnių", country.iso_code, len(items))
        return items
