"""AI analysis worker — consumes ai:analyze stream."""
from __future__ import annotations

import uuid

import httpx
import redis.asyncio as aioredis
import structlog
import structlog.contextvars

from app.config import get_settings
from app.db.models.job import JobStatus, JobType
from app.db.models.media import LinkStatus, MediaStatus
from app.db.repositories.job import JobRepository
from app.db.repositories.media import MediaRepository
from app.db.session import AsyncSessionLocal
from app.exceptions import AIServiceError
from app.services.ai import AIService
from app.services.queue import STREAM_EMBED, QueueService
from app.workers.base import BaseWorker

log = structlog.get_logger(__name__)
settings = get_settings()


class AnalyzeWorker(BaseWorker):
    stream = "ai:analyze"
    group = "ai:analyze-workers"

    def __init__(
        self,
        redis_client: aioredis.Redis,
        queue: QueueService,
        ai: AIService,
        worker_id: int = 0,
    ) -> None:
        super().__init__(redis_client, queue)
        self._ai = ai
        self.consumer_name = f"ai-worker-{worker_id}"

    async def _process(self, fields: dict[str, str]) -> None:
        job_id = uuid.UUID(fields["job_id"])
        structlog.contextvars.bind_contextvars(job_id=str(job_id), worker=self.consumer_name)

        async with AsyncSessionLocal() as session:
            job_repo = JobRepository(session)
            media_repo = MediaRepository(session)

            job = await job_repo.get(job_id)
            if not job:
                log.error("analyze.job_not_found", job_id=str(job_id))
                return

            # Acquire lock
            lock_key = f"job:{job_id}:lock"
            acquired = await self._r.set(lock_key, self.consumer_name, nx=True, ex=600)
            if not acquired:
                log.warning("analyze.lock_not_acquired", job_id=str(job_id))
                return

            try:
                claimed = await job_repo.transition(job_id, JobStatus.PENDING, JobStatus.RUNNING)
                if not claimed:
                    return
                await job_repo.append_history_event(job_id, JobStatus.RUNNING)
                await session.commit()

                entity_id = job.entity_id
                entity_type = job.entity_type or "media"

                # Fetch entity
                url, title, content = await self._fetch_entity_content(
                    entity_id, entity_type, media_repo
                )

                # Run AI analysis
                analysis = await self._ai.analyze(content=content, url=url, title=title)

                # Save analysis
                await media_repo.save_analysis(
                    entity_id=entity_id,
                    entity_type=entity_type,
                    summary=analysis["summary"],
                    keywords=analysis["keywords"],
                    entities=analysis["entities"],
                    llm_model=analysis["llm_model"],
                    pipeline_version=analysis["pipeline_version"],
                    prompt_tokens=analysis["prompt_tokens"],
                    completion_tokens=analysis["completion_tokens"],
                )

                # Attach tags
                if analysis["tags"]:
                    await media_repo.attach_tags(
                        entity_id=entity_id,
                        entity_type=entity_type,
                        user_id=job.user_id,
                        tag_names=analysis["tags"],
                    )

                # Update entity status
                if entity_type == "media":
                    await media_repo.update_media_status(entity_id, MediaStatus.AI_COMPLETE)
                else:
                    await media_repo.update_link_status(entity_id, LinkStatus.AI_COMPLETE)

                await session.commit()

                # Enqueue embed job
                embed_job = await job_repo.create(
                    entity_id=entity_id,
                    entity_type=entity_type,
                    user_id=job.user_id,
                    canonical_url_hash=job.canonical_url_hash or "",
                    job_type=JobType.AI_EMBED,
                    payload={
                        **job.payload,
                        "title": title,
                        "summary": analysis["summary"],
                        "keywords": analysis["keywords"],
                    },
                )
                await self._queue.enqueue(STREAM_EMBED, job_id=str(embed_job.id))
                await job_repo.transition(job_id, JobStatus.RUNNING, JobStatus.DONE)
                await session.commit()

                log.info("analyze.complete", entity_id=str(entity_id), entity_type=entity_type)

            except AIServiceError as exc:
                await self._handle_failure(job_id, job_repo, session, exc)
            except Exception as exc:
                await self._handle_failure(job_id, job_repo, session, exc)
                raise
            finally:
                await self._r.delete(lock_key)

        structlog.contextvars.unbind_contextvars("job_id", "worker")

    async def _fetch_entity_content(
        self,
        entity_id: uuid.UUID,
        entity_type: str,
        media_repo: MediaRepository,
    ) -> tuple[str, str, str]:
        """Return (url, title, content) for the entity."""
        if entity_type == "media":
            item = await media_repo.get_media_item(entity_id)
            if not item:
                return "", "", ""
            return (
                item.source_url,
                item.page_title or "",
                f"{item.page_title or ''}\n{item.source_url}",
            )
        else:
            link = await media_repo.get_link(entity_id)
            if not link:
                return "", "", ""
            # Try to fetch Open Graph metadata for links
            og_content = await self._fetch_og_content(link.url)
            return (
                link.url,
                link.page_title or "",
                og_content or f"{link.page_title or ''}\n{link.og_description or ''}\n{link.url}",
            )

    @staticmethod
    async def _fetch_og_content(url: str) -> str | None:
        """Fetch page and extract Open Graph metadata for richer AI analysis."""
        try:
            async with httpx.AsyncClient(
                timeout=10,
                follow_redirects=True,
                headers={"User-Agent": "Mozilla/5.0 (compatible; MyMediaBot/1.0)"},
            ) as client:
                response = await client.get(url)
                if response.status_code != 200:
                    return None
                html = response.text[:50_000]

                import re

                def _og(prop: str) -> str:
                    m = re.search(
                        rf'<meta[^>]+property=["\']og:{prop}["\'][^>]+content=["\']([^"\']+)["\']',
                        html,
                        re.IGNORECASE,
                    )
                    return m.group(1) if m else ""

                title = _og("title") or ""
                description = _og("description") or ""
                site_name = _og("site_name") or ""

                # Also try <title> tag
                if not title:
                    m = re.search(r"<title[^>]*>([^<]+)</title>", html, re.IGNORECASE)
                    title = m.group(1).strip() if m else ""

                parts = [p for p in [site_name, title, description] if p]
                return "\n".join(parts) if parts else None
        except Exception:
            return None

    async def _handle_failure(
        self, job_id: uuid.UUID, job_repo: JobRepository, session: object, exc: Exception
    ) -> None:
        from sqlalchemy.ext.asyncio import AsyncSession
        assert isinstance(session, AsyncSession)
        attempts = await job_repo.increment_attempts(job_id)
        await job_repo.append_history_event(job_id, "FAILED", str(exc))
        is_permanent = getattr(exc, "is_permanent", False)
        if is_permanent or attempts >= settings.job_max_attempts:
            await job_repo.transition(job_id, JobStatus.RUNNING, JobStatus.FAILED, error_message=str(exc))
            await self._queue.to_dlq(self.stream, str(job_id), str(exc), attempts)
        else:
            await job_repo.transition(job_id, JobStatus.RUNNING, JobStatus.FAILED)
        await session.commit()
