import os
import asyncio
import logging
from datetime import datetime, timezone
from http import HTTPMethod
import httpx

logger = logging.getLogger(__name__)

REQUEST_TIMEOUT = 60


def parse_ts(v) -> datetime | None:
    """수집기마다 다른 timestamp 포맷을 통일된 datetime으로 변환.
    파싱 실패 시 None.
    """
    # 숫자면 epoch (Unix timestamp) — apify가 가끔 epoch로 줌
    if isinstance(v, (int, float)):
        return datetime.fromtimestamp(v, tz=timezone.utc)
    if isinstance(v, str):
        # 1순위: ISO 8601 (대부분 apify/firecrawl 출력)
        try:
            return datetime.fromisoformat(v.replace("Z", "+00:00"))
        except ValueError:
            pass
        # 2순위: RFC 2822 (네이버 블로그 RSS 등 — 표준 라이브러리 파서로)
        try:
            from email.utils import parsedate_to_datetime
            return parsedate_to_datetime(v)
        except (TypeError, ValueError):
            return None
    return None


def get_env(key: str) -> str:
    v = os.environ.get(key, "")
    if not v:
        raise EnvironmentError(f"Missing env: {key}")
    return v

async def http_request(
    method: HTTPMethod,
    url: str,
    *,
    label: str,
    headers: dict | None = None,
    params: dict | None = None,
    json_body: dict | None = None,
    timeout: int = REQUEST_TIMEOUT,
    max_retries: int = 0,
) -> httpx.Response | None:
    async with httpx.AsyncClient() as client:
        for attempt in range(max_retries + 1):
            try:
                resp = await client.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout)
                return resp
            except httpx.RequestError as e:
                if attempt < max_retries:
                    print(f"  [retry] {label} → {e}, attempt {attempt + 1}")
                    await asyncio.sleep((attempt + 1) * 2)
                else:
                    print(f"  [error] {label} → {e}")
                    return None
    return None


async def _run_optional_step(coro, label: str) -> None:
    """부가 단계 실행 헬퍼: 예외를 삼키고 경고 로그만 남겨 호출측 흐름이 멈추지 않게 격리."""
    try:
        await coro
    except Exception as e:
        logger.warning("%s 실패 (무시하고 진행): %s", label, e)


def _normalize_homepage(url: str) -> str:
    """URL을 scheme/www/끝슬래시 제거 + 소문자로 정규화 (homepage 매칭용)."""
    u = (url or "").strip().lower()
    for p in ("https://", "http://"):
        if u.startswith(p):
            u = u[len(p):]
    if u.startswith("www."):
        u = u[4:]
    return u.rstrip("/")


# SSL 인증서가 www.* 에만 유효한 도메인 — bare 도메인이면 사용자 클릭 시 브라우저 SSL warning 뜸.
_WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com", "toxnfill.com")


def _with_scheme(u: str | None) -> str | None:
    """scheme 없는 URL에 https:// 보정 (수집기/링크 표시용). 빈 값은 None.
    + 중첩된 https://가 끼어있으면 마지막 URL만 추출 (LLM이 가끔 'https://www.X/https://Y' 같이 만듦).
    + SSL 엄격 도메인(gangnamunni/facebook/instagram)은 www. 자동 보강."""
    if not u:
        return None
    u = u.strip()
    # 'https://www.facebook.com/https://facebook.com/X' 같은 중첩 → 마지막 'http(s)://' 부터 잘라 사용
    last = max(u.rfind("https://"), u.rfind("http://"))
    if last > 0:
        u = u[last:]
    if "://" not in u:
        u = "https://" + u
    # scheme 뒤가 www. 없이 SSL 엄격 도메인이면 www. 추가
    for dom in _WWW_REQUIRED:
        for scheme in ("https://", "http://"):
            if u.startswith(scheme + dom):
                u = scheme + "www." + u[len(scheme):]
                break
    return u