"""RSS srautų kolektorius.

Kiekvienai šaliai turime sąrašą RSS šaltinių. Iš įrašų antraščių darom
"hourly" tipo tendencijas (kategorijuojam pagal feed-ą, jei žinoma).
"""

from __future__ import annotations

from typing import Dict, List

import feedparser

from .base import BaseCollector, TrendItem


# Galima papildyti laisvai. Kiekvienam URL galima nurodyti category užuominą.
RSS_FEEDS: Dict[str, List[dict]] = {
    "LT": [
        {"url": "https://www.delfi.lt/rss/feeds/daily.xml", "category": "news"},
        {"url": "https://www.lrt.lt/?rss", "category": "news"},
        {"url": "https://www.15min.lt/rss", "category": "news"},
    ],
    "LV": [
        {"url": "https://www.delfi.lv/rss/?channel=delfi&category=news", "category": "news"},
        {"url": "https://eng.lsm.lv/rss/", "category": "news"},
    ],
    "EE": [
        {"url": "https://news.err.ee/rss", "category": "news"},
        {"url": "https://www.delfi.ee/rss", "category": "news"},
    ],
    "US": [
        {"url": "https://feeds.npr.org/1001/rss.xml", "category": "news"},
        {"url": "https://rss.cnn.com/rss/edition.rss", "category": "news"},
        {"url": "https://www.espn.com/espn/rss/news", "category": "sports"},
        {"url": "https://variety.com/feed/", "category": "entertainment"},
    ],
    "GB": [
        {"url": "https://feeds.bbci.co.uk/news/rss.xml", "category": "news"},
        {"url": "https://feeds.bbci.co.uk/sport/rss.xml", "category": "sports"},
        {"url": "https://www.theguardian.com/world/rss", "category": "news"},
    ],
    "JP": [
        {"url": "https://www.japantimes.co.jp/feed/", "category": "news"},
        {"url": "https://www3.nhk.or.jp/rss/news/cat0.xml", "category": "news"},
    ],
    "DE": [
        {"url": "https://www.tagesschau.de/xml/rss2", "category": "news"},
        {"url": "https://rss.sueddeutsche.de/rss/Topthemen", "category": "news"},
    ],
    "FR": [
        {"url": "https://www.lemonde.fr/rss/une.xml", "category": "news"},
        {"url": "https://www.france24.com/fr/rss", "category": "news"},
    ],
    "ES": [
        {"url": "https://e00-elmundo.uecdn.es/elmundo/rss/portada.xml", "category": "news"},
        {"url": "https://www.marca.com/rss/portada.xml", "category": "sports"},
    ],
    "BR": [
        {"url": "https://g1.globo.com/rss/g1/", "category": "news"},
    ],
    "IN": [
        {"url": "https://www.thehindu.com/news/feeder/default.rss", "category": "news"},
        {"url": "https://feeds.feedburner.com/ndtvnews-top-stories", "category": "news"},
    ],
    "RU": [
        {"url": "https://meduza.io/rss/all", "category": "news"},
    ],
    "UA": [
        {"url": "https://www.pravda.com.ua/rss/", "category": "news"},
        {"url": "https://www.ukrinform.net/rss/block-lastnews", "category": "news"},
    ],
    # Visada papildomi globalūs:
    "_GLOBAL": [
        {"url": "https://feeds.reuters.com/reuters/worldNews", "category": "news"},
        {"url": "https://feeds.bbci.co.uk/news/world/rss.xml", "category": "news"},
    ],
}


class RssCollector(BaseCollector):
    name = "rss"
    source_type = "rss"

    def collect_for_country(self, country) -> List[TrendItem]:
        feeds = list(RSS_FEEDS.get(country.iso_code, []))
        # global – pridėt tik anglakalbėms, kad neužteršti
        if country.primary_language == "en":
            feeds += RSS_FEEDS.get("_GLOBAL", [])

        items: List[TrendItem] = []
        for feed in feeds:
            url = feed["url"]
            cat = feed.get("category")
            if not self.http.can_fetch(url):
                self.log.info("[%s] praleidžiam (robots): %s", country.iso_code, url)
                continue
            try:
                txt = self.http.get_text(url, respect_robots=True)
                if not txt:
                    continue
                parsed = feedparser.parse(txt)
            except Exception as exc:
                self.log.warning("[%s] RSS klaida %s: %s", country.iso_code, url, exc)
                continue

            for rank, entry in enumerate(parsed.entries[:25], start=1):
                title = (entry.get("title") or "").strip()
                if not title:
                    continue
                items.append(TrendItem(
                    keyword=title,
                    trend_type="hourly",
                    category=cat,
                    rank=rank,
                    score=max(0.0, 1.0 - (rank - 1) / 25),
                    language=country.primary_language,
                    description=(entry.get("summary") or "")[:1000],
                    url=entry.get("link"),
                ))
            self.log.info("[%s] RSS %s: %d įrašų", country.iso_code, url, len(parsed.entries))
        return items