"""yt-dlp download service.

Runs yt-dlp as a subprocess in a temporary directory,
validates the output, then hands the file path to the caller.
"""
from __future__ import annotations

import asyncio
import mimetypes
import os
import re
import shutil
import tempfile
from pathlib import Path

import structlog
from pathvalidate import sanitize_filename

from app.config import Settings
from app.exceptions import DownloadError

log = structlog.get_logger(__name__)

# Minimum acceptable file size (10 KB)
_MIN_FILE_BYTES = 10_240
# Maximum time to allow yt-dlp to run
_TIMEOUT_SECONDS = 300  # 5 minutes

# Platforms known to need cookies for full access
_COOKIE_PLATFORMS: frozenset[str] = frozenset({"twitter", "x"})


class DownloadResult:
    __slots__ = ("file_path", "thumbnail_path", "title", "duration", "mime_type", "platform", "metadata")

    def __init__(
        self,
        file_path: Path,
        thumbnail_path: Path | None,
        title: str,
        duration: int | None,
        mime_type: str,
        platform: str,
        metadata: dict,
    ) -> None:
        self.file_path = file_path
        self.thumbnail_path = thumbnail_path
        self.title = title
        self.duration = duration
        self.mime_type = mime_type
        self.platform = platform
        self.metadata = metadata


class DownloaderService:
    def __init__(self, settings: Settings) -> None:
        self._settings = settings

    async def download(self, url: str, platform: str = "web") -> DownloadResult:
        """
        Download media from `url` into a temp directory.
        Returns a DownloadResult; the caller is responsible for cleanup.
        Raises DownloadError on failure.
        """
        tmpdir = tempfile.mkdtemp(prefix="mymedia_dl_")
        try:
            result = await self._run_ytdlp(url, tmpdir, platform)
            return result
        except DownloadError:
            shutil.rmtree(tmpdir, ignore_errors=True)
            raise
        except Exception as exc:
            shutil.rmtree(tmpdir, ignore_errors=True)
            raise DownloadError(f"Unexpected download error: {exc}") from exc

    async def _run_ytdlp(self, url: str, tmpdir: str, platform: str) -> DownloadResult:
        output_tmpl = os.path.join(tmpdir, "%(id)s.%(ext)s")

        cmd = [
            "yt-dlp",
            "--no-playlist",
            "--max-filesize", str(self._settings.max_download_bytes),
            "--format", "bestvideo[height<=1080]+bestaudio/best[height<=1080]/best",
            "--write-thumbnail",
            "--convert-thumbnails", "jpg",
            "--print-json",
            "--no-progress",
            "--output", output_tmpl,
            "--socket-timeout", "30",
            url,
        ]

        log.info("downloader.start", url=url, platform=platform)

        proc = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )

        try:
            stdout, stderr = await asyncio.wait_for(
                proc.communicate(), timeout=_TIMEOUT_SECONDS
            )
        except asyncio.TimeoutError:
            proc.kill()
            raise DownloadError(
                f"Download timed out after {_TIMEOUT_SECONDS}s for URL: {url}",
                is_permanent=False,
            )

        if proc.returncode != 0:
            err_text = stderr.decode(errors="replace")[:500]
            # Classify permanent vs transient errors
            is_permanent = any(
                phrase in err_text.lower()
                for phrase in [
                    "login required",
                    "private video",
                    "video unavailable",
                    "has been removed",
                    "paywall",
                    "geo restriction",
                ]
            )
            raise DownloadError(
                f"yt-dlp failed (exit {proc.returncode}): {err_text}",
                is_permanent=is_permanent,
            )

        # Parse JSON metadata from stdout (last valid JSON line)
        import json

        info: dict = {}
        for line in reversed(stdout.decode(errors="replace").splitlines()):
            line = line.strip()
            if line.startswith("{"):
                try:
                    info = json.loads(line)
                    break
                except json.JSONDecodeError:
                    continue

        # Find the downloaded media file (not thumbnail)
        media_files = [
            Path(tmpdir) / f
            for f in os.listdir(tmpdir)
            if not f.endswith(".jpg") and not f.endswith(".png") and not f.endswith(".webp")
        ]

        if not media_files:
            raise DownloadError("yt-dlp succeeded but no media file was written")

        media_file = media_files[0]

        # Validate output
        file_size = media_file.stat().st_size
        if file_size < _MIN_FILE_BYTES:
            raise DownloadError(
                f"Downloaded file is too small ({file_size} bytes) — likely corrupt"
            )

        mime_type, _ = mimetypes.guess_type(str(media_file))
        mime_type = mime_type or "application/octet-stream"

        # Thumbnail
        thumb_files = [
            Path(tmpdir) / f
            for f in os.listdir(tmpdir)
            if f.endswith(".jpg") or f.endswith(".png")
        ]
        thumbnail_path = thumb_files[0] if thumb_files else None

        # Extract safe title
        raw_title = info.get("title") or info.get("fulltitle") or media_file.stem
        title = sanitize_filename(raw_title, max_len=200)

        duration = info.get("duration")

        # Sanitize metadata (remove cookies, tokens, internal state)
        safe_meta = {
            k: v
            for k, v in info.items()
            if k in {
                "id", "title", "description", "uploader", "upload_date",
                "view_count", "like_count", "duration", "width", "height",
                "fps", "webpage_url", "extractor",
            }
        }

        log.info(
            "downloader.complete",
            url=url,
            file=str(media_file),
            size=file_size,
            title=title,
        )

        return DownloadResult(
            file_path=media_file,
            thumbnail_path=thumbnail_path,
            title=title,
            duration=int(duration) if duration else None,
            mime_type=mime_type,
            platform=info.get("extractor", platform),
            metadata=safe_meta,
        )

    @staticmethod
    def cleanup(result: DownloadResult) -> None:
        """Remove the temporary directory created during download."""
        tmpdir = result.file_path.parent
        shutil.rmtree(tmpdir, ignore_errors=True)