"""Pipeline: nuo TrendItem -> apdorojimas -> DB įrašas.

Šis modulis yra centrinė vieta, kur sujungiami visi kolektoriai, procesoriai
(tekstų valymas, kategorija, sentimentas) ir DB rašymas.
"""

from __future__ import annotations

import time
from typing import Iterable, List

from .collectors.base import BaseCollector, TrendItem
from .database import Country, DatabaseHandler
from .logger import get_logger
from .processors.categorizer import categorize
from .processors.deduplicator import best_match
from .processors.sentiment import score_sentiment
from .processors.text_cleaner import clean_text, detect_language, normalize_for_dedup

log = get_logger(__name__)


class Pipeline:
    def __init__(self, db: DatabaseHandler, processing_cfg: dict):
        self.db = db
        self.enable_sentiment = processing_cfg.get("enable_sentiment", True)
        self.enable_dedup = processing_cfg.get("enable_dedup", True)
        self.threshold = float(processing_cfg.get("dedup_similarity_threshold", 0.86))
        self.enable_auto_cat = processing_cfg.get("enable_auto_category", True)

    # ----------------------------------------------------------------------
    def process_country(self, country: Country,
                        collectors: Iterable[BaseCollector]) -> int:
        total = 0
        for col in collectors:
            try:
                items = col.collect_for_country(country)
            except Exception as exc:
                log.exception("[%s] kolektorius %s žlugo: %s",
                              country.iso_code, col.name, exc)
                items = []
                self._mark_source_error(col, country, str(exc))
                continue

            source_id = col.ensure_source(country)
            for it in items:
                try:
                    self._persist(country, source_id, it)
                    total += 1
                except Exception as exc:
                    log.warning("[%s] %s -> persist klaida (%s): %s",
                                country.iso_code, col.name, it.keyword, exc)
            self.db.mark_source_fetch(source_id, status="ok")
            time.sleep(0.5)
        return total

    # ----------------------------------------------------------------------
    def _persist(self, country: Country, source_id: int, it: TrendItem) -> None:
        kw_clean = clean_text(it.keyword)
        if not kw_clean:
            return
        # MySQL VARCHAR(255) – apsaugom
        kw_norm = kw_clean[:255]
        kw_raw = it.keyword[:500]

        language = it.language or detect_language(kw_clean) or country.primary_language
        category = categorize(kw_clean, hint=it.category) if self.enable_auto_cat else (it.category or "news")

        sentiment = None
        if self.enable_sentiment:
            sentiment = score_sentiment(kw_clean, language=language)

        # ----- deduplikacija prieš įrašant -----
        if self.enable_dedup:
            recent = self.db.recent_trends(country.id, it.trend_type, hours=48, limit=200)
            pool = [t.keyword for t in recent]
            match = best_match(kw_norm, pool, threshold=self.threshold)
            if match is not None and pool:
                idx, sim = match
                existing = recent[idx]
                if normalize_for_dedup(existing.keyword) != normalize_for_dedup(kw_norm):
                    # Tai panašus, bet ne identiškas. Vis tiek įrašom kaip atskirą
                    # trendą, BET susiejam.
                    new_trend = self.db.upsert_trend(
                        country_id=country.id,
                        source_id=source_id,
                        category=category,
                        keyword=kw_norm,
                        keyword_raw=kw_raw,
                        language=language,
                        score=it.score,
                        volume=it.volume,
                        sentiment=sentiment,
                        trend_type=it.trend_type,
                        rank=it.rank,
                        description=it.description,
                        url=it.url,
                    )
                    self.db.add_trend_link(existing.id, new_trend.id,
                                           similarity=sim, method="rapidfuzz")
                    return

        self.db.upsert_trend(
            country_id=country.id,
            source_id=source_id,
            category=category,
            keyword=kw_norm,
            keyword_raw=kw_raw,
            language=language,
            score=it.score,
            volume=it.volume,
            sentiment=sentiment,
            trend_type=it.trend_type,
            rank=it.rank,
            description=it.description,
            url=it.url,
        )

    # ----------------------------------------------------------------------
    def _mark_source_error(self, col: BaseCollector, country: Country, err: str) -> None:
        try:
            sid = col.ensure_source(country)
            self.db.mark_source_fetch(sid, status="error", error=err[:1000])
        except Exception:
            pass
