"""Planuoklė, kuri pagal config.yaml intervalus paleidžia kolektorius.

Naudojam BackgroundScheduler – paleidimas viename procese su pagrindiniu
įvykių ciklu. Kiekvienas kolektoriaus tipas turi savo intervalą.
"""

from __future__ import annotations

import datetime as dt
from typing import Dict, List

from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger

from .collectors.base import BaseCollector
from .collectors.gdelt import GdeltCollector
from .collectors.gnews import GNewsCollector
from .collectors.google_trends import GoogleTrendsCollector
from .collectors.newsapi import NewsApiCollector
from .collectors.reddit import RedditCollector
from .collectors.rss import RssCollector
from .collectors.wikipedia import WikipediaCollector
from .config import Config
from .database import DatabaseHandler
from .exporters.csv_json import export_all
from .logger import get_logger
from .pipeline import Pipeline
from .utils.http import HttpClient

log = get_logger(__name__)


COLLECTOR_CLASSES = {
    "google_trends": GoogleTrendsCollector,
    "rss":           RssCollector,
    "reddit":        RedditCollector,
    "gdelt":         GdeltCollector,
    "wikipedia":     WikipediaCollector,
    "newsapi":       NewsApiCollector,
    "gnews":         GNewsCollector,
}


def _build_http(cfg: Config) -> HttpClient:
    h = cfg.http
    return HttpClient(
        user_agent=h.get("user_agent", "trends_collector/0.1"),
        timeout=int(h.get("timeout", 20)),
        retries=int(h.get("retries", 3)),
        backoff_factor=float(h.get("backoff_factor", 1.5)),
        default_sleep=float(h.get("default_sleep_between_requests", 1.5)),
    )


def _instantiate_collectors(cfg: Config, db: DatabaseHandler,
                            http: HttpClient) -> Dict[str, BaseCollector]:
    out: Dict[str, BaseCollector] = {}
    for key, cls in COLLECTOR_CLASSES.items():
        ccfg = cfg.collectors.get(key, {})
        if not ccfg.get("enabled", False):
            continue
        try:
            out[key] = cls(db=db, http=http, cfg=cfg.raw)
            log.info("Kolektorius įjungtas: %s", key)
        except Exception as exc:
            log.warning("Kolektoriaus %s instancijuoti nepavyko: %s", key, exc)
    return out


def _run_collector_for_all(cfg: Config, db: DatabaseHandler,
                           collector: BaseCollector,
                           pipeline: Pipeline) -> None:
    countries = db.get_enabled_countries(cfg.countries)
    log.info("[%s] paleidžiam %d šalims", collector.name, len(countries))
    for c in countries:
        try:
            pipeline.process_country(c, [collector])
        except Exception as exc:
            log.exception("[%s] %s žlugo: %s", c.iso_code, collector.name, exc)


def build_and_start(cfg: Config, db: DatabaseHandler,
                    run_once: bool = False) -> BackgroundScheduler | None:
    """Sukuria scheduler ir užregistruoja darbus pagal config.

    Jei `run_once=True` – iškart paleidžia visus aktyvius kolektorius vienąkart
    ir grąžina None.
    """
    http = _build_http(cfg)
    collectors = _instantiate_collectors(cfg, db, http)
    pipeline = Pipeline(db, cfg.processing)

    if run_once:
        for name, col in collectors.items():
            _run_collector_for_all(cfg, db, col, pipeline)
        if cfg.export.get("enabled", False):
            export_all(db, directory=cfg.export.get("directory", "exports"),
                       formats=cfg.export.get("formats", ["json", "csv"]))
        return None

    sched = BackgroundScheduler(timezone="UTC")
    for name, col in collectors.items():
        ccfg = cfg.collectors.get(name, {})
        # Specialios google_trends valandinės/dieninės subtaisos
        if name == "google_trends":
            iv_rt = int(ccfg.get("interval_minutes_realtime", 30))
            iv_d  = int(ccfg.get("interval_minutes_daily", 720))
            # Veikia tas pats kolektorius – jis pats grąžina abu tipus,
            # tad pakanka vieno darbo su tankesniu intervalu.
            interval = min(iv_rt, iv_d)
        else:
            interval = int(ccfg.get("interval_minutes", 60))

        sched.add_job(
            _run_collector_for_all,
            trigger=IntervalTrigger(minutes=interval, start_date=dt.datetime.utcnow() + dt.timedelta(seconds=10)),
            args=[cfg, db, col, pipeline],
            id=f"collector_{name}",
            max_instances=1,
            coalesce=True,
            replace_existing=True,
        )
        log.info("Suplanuotas %s kas %d min.", name, interval)

    # eksportas
    if cfg.export.get("enabled", False):
        sched.add_job(
            export_all,
            trigger=IntervalTrigger(minutes=int(cfg.export.get("interval_minutes", 60))),
            kwargs=dict(db=db,
                        directory=cfg.export.get("directory", "exports"),
                        formats=cfg.export.get("formats", ["json", "csv"])),
            id="exporter",
            max_instances=1,
            coalesce=True,
            replace_existing=True,
        )
        log.info("Suplanuotas eksportas kas %d min.", cfg.export.get("interval_minutes", 60))

    sched.start()
    return sched
