import os import re import asyncio from http import HTTPMethod import httpx REQUEST_TIMEOUT = 60 def get_env(key: str) -> str: v = os.environ.get(key, "") if not v: raise EnvironmentError(f"Missing env: {key}") return v async def http_request( method: HTTPMethod, url: str, *, label: str, headers: dict | None = None, params: dict | None = None, json_body: dict | None = None, timeout: int = REQUEST_TIMEOUT, max_retries: int = 0, ) -> httpx.Response | None: async with httpx.AsyncClient() as client: for attempt in range(max_retries + 1): try: resp = await client.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout) return resp except httpx.RequestError as e: if attempt < max_retries: print(f" [retry] {label} → {e}, attempt {attempt + 1}") await asyncio.sleep((attempt + 1) * 2) else: print(f" [error] {label} → {e}") return None return None _SKIP_IG = {"p", "reel", "stories", "explore", "accounts", "about", "directory"} _SKIP_FB = {"sharer", "share", "dialog", "plugins", "groups", "events", "watch", "help"} def extract_social_handles(urls: list[str]) -> dict[str, list[str]]: result: dict[str, list[str]] = {"instagram": [], "youtube": [], "facebook": [], "naver_blog": [], "tiktok": []} for url in urls: if not url: continue m = re.search(r"instagram\.com/([a-zA-Z0-9._]+)", url) if m and m.group(1).lower() not in _SKIP_IG: result["instagram"].append(m.group(1)) m = re.search(r"youtube\.com/(?:@([a-zA-Z0-9._-]+)|channel/(UC[a-zA-Z0-9_-]+)|c/([a-zA-Z0-9._-]+))", url) if m: result["youtube"].append(f"@{m.group(1)}" if m.group(1) else (m.group(2) or m.group(3) or "")) m = re.search(r"facebook\.com/([a-zA-Z0-9._-]+)", url) if m and m.group(1).lower() not in _SKIP_FB: result["facebook"].append(m.group(1)) m = re.search(r"blog\.naver\.com/([a-zA-Z0-9_-]+)", url) if m: result["naver_blog"].append(m.group(1)) m = re.search(r"tiktok\.com/@([a-zA-Z0-9._-]+)", url) if m: result["tiktok"].append(m.group(1)) return {k: list(set(v)) for k, v in result.items()} def normalize_handle(platform: str, value: str) -> str: """URL이 들어오면 핸들을 추출하고, 이미 핸들이면 그대로 반환.""" if not value: return value if "://" in value or value.startswith("www."): handles = extract_social_handles([value]).get(platform, []) value = handles[0] if handles else value return value.lstrip("@") if platform != "youtube" else value