"""Wikipedia / Wikinews dabartinių įvykių kolektorius.

Naudojam MediaWiki API (action=query) ir, atskirai, "Most viewed" sąrašą.
Per dieną kviečiam retai (kas keletą valandų).
"""

from __future__ import annotations

import datetime as dt
from typing import List

from .base import BaseCollector, TrendItem


# Vikipedijos kalbos versijos pagal šalies pagrindinę kalbą
WIKI_LANG = {
    "lt": "lt", "lv": "lv", "et": "et", "en": "en", "ja": "ja",
    "de": "de", "fr": "fr", "es": "es", "pt": "pt", "it": "it",
    "pl": "pl", "fi": "fi", "sv": "sv", "no": "no", "da": "da",
    "nl": "nl", "el": "el", "cs": "cs", "hu": "hu", "ru": "ru",
    "uk": "uk", "tr": "tr", "ko": "ko", "zh": "zh", "id": "id",
    "ms": "ms", "th": "th", "vi": "vi", "ar": "ar", "he": "he",
}


class WikipediaCollector(BaseCollector):
    name = "wikipedia"
    source_type = "wikipedia"

    def _topviews(self, country_iso: str) -> List[TrendItem]:
        """REST API: top-views per shalį (yesterday)."""
        # Wikimedia Pageviews API – be auth
        yesterday = (dt.datetime.utcnow().date() - dt.timedelta(days=1))
        d = yesterday.strftime("%Y/%m/%d")
        url = (
            "https://wikimedia.org/api/rest_v1/metrics/pageviews/top-per-country/"
            f"{country_iso}/all-access/{d}"
        )
        data = self.http.get_json(url, respect_robots=False)
        if not data or "items" not in data:
            return []
        items: List[TrendItem] = []
        try:
            articles = data["items"][0].get("articles", [])
        except (KeyError, IndexError):
            return []

        for rank, a in enumerate(articles[:30], start=1):
            title = (a.get("article") or "").replace("_", " ").strip()
            views = int(a.get("views_ceil") or a.get("views") or 0)
            if not title:
                continue
            items.append(TrendItem(
                keyword=title,
                trend_type="daily",
                rank=rank,
                score=max(0.0, 1.0 - (rank - 1) / 30),
                volume=views,
                category="news",
            ))
        return items

    def _current_events(self, lang: str) -> List[TrendItem]:
        """Wikipedia "Current events portal" – tik anglų kalboje patogus."""
        if lang != "en":
            return []
        url = "https://en.wikipedia.org/api/rest_v1/page/summary/Portal:Current_events"
        data = self.http.get_json(url, respect_robots=False)
        if not data:
            return []
        extract = (data.get("extract") or "").strip()
        items: List[TrendItem] = []
        if extract:
            # Sukirpsim į sakinius – kiekvienas sakinys gali būti įvykiu
            for rank, sent in enumerate([s.strip() for s in extract.split(". ") if len(s.strip()) > 20][:15], start=1):
                items.append(TrendItem(
                    keyword=sent.rstrip("."),
                    trend_type="daily",
                    rank=rank,
                    category="events",
                    language="en",
                    url=data.get("content_urls", {}).get("desktop", {}).get("page"),
                ))
        return items

    def collect_for_country(self, country) -> List[TrendItem]:
        items: List[TrendItem] = []
        items += self._topviews(country.iso_code)
        items += self._current_events((country.primary_language or "en"))
        self.log.info("[%s] Wikipedia: %d įrašų", country.iso_code, len(items))
        return items
