83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
import os
|
|
import re
|
|
import asyncio
|
|
from http import HTTPMethod
|
|
import httpx
|
|
|
|
REQUEST_TIMEOUT = 60
|
|
|
|
|
|
def get_env(key: str) -> str:
|
|
v = os.environ.get(key, "")
|
|
if not v:
|
|
raise EnvironmentError(f"Missing env: {key}")
|
|
return v
|
|
|
|
async def http_request(
|
|
method: HTTPMethod,
|
|
url: str,
|
|
*,
|
|
label: str,
|
|
headers: dict | None = None,
|
|
params: dict | None = None,
|
|
json_body: dict | None = None,
|
|
timeout: int = REQUEST_TIMEOUT,
|
|
max_retries: int = 0,
|
|
) -> httpx.Response | None:
|
|
async with httpx.AsyncClient() as client:
|
|
for attempt in range(max_retries + 1):
|
|
try:
|
|
resp = await client.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout)
|
|
return resp
|
|
except httpx.RequestError as e:
|
|
if attempt < max_retries:
|
|
print(f" [retry] {label} → {e}, attempt {attempt + 1}")
|
|
await asyncio.sleep((attempt + 1) * 2)
|
|
else:
|
|
print(f" [error] {label} → {e}")
|
|
return None
|
|
return None
|
|
|
|
|
|
_SKIP_IG = {"p", "reel", "stories", "explore", "accounts", "about", "directory"}
|
|
_SKIP_FB = {"sharer", "share", "dialog", "plugins", "groups", "events", "watch", "help"}
|
|
|
|
|
|
def extract_social_handles(urls: list[str]) -> dict[str, list[str]]:
|
|
result: dict[str, list[str]] = {"instagram": [], "youtube": [], "facebook": [], "naver_blog": [], "tiktok": []}
|
|
|
|
for url in urls:
|
|
if not url:
|
|
continue
|
|
m = re.search(r"instagram\.com/([a-zA-Z0-9._]+)", url)
|
|
if m and m.group(1).lower() not in _SKIP_IG:
|
|
result["instagram"].append(m.group(1))
|
|
|
|
m = re.search(r"youtube\.com/(?:@([a-zA-Z0-9._-]+)|channel/(UC[a-zA-Z0-9_-]+)|c/([a-zA-Z0-9._-]+))", url)
|
|
if m:
|
|
result["youtube"].append(f"@{m.group(1)}" if m.group(1) else (m.group(2) or m.group(3) or ""))
|
|
|
|
m = re.search(r"facebook\.com/([a-zA-Z0-9._-]+)", url)
|
|
if m and m.group(1).lower() not in _SKIP_FB:
|
|
result["facebook"].append(m.group(1))
|
|
|
|
m = re.search(r"blog\.naver\.com/([a-zA-Z0-9_-]+)", url)
|
|
if m:
|
|
result["naver_blog"].append(m.group(1))
|
|
|
|
m = re.search(r"tiktok\.com/@([a-zA-Z0-9._-]+)", url)
|
|
if m:
|
|
result["tiktok"].append(m.group(1))
|
|
|
|
return {k: list(set(v)) for k, v in result.items()}
|
|
|
|
|
|
def normalize_handle(platform: str, value: str) -> str:
|
|
"""URL이 들어오면 핸들을 추출하고, 이미 핸들이면 그대로 반환."""
|
|
if not value:
|
|
return value
|
|
if "://" in value or value.startswith("www."):
|
|
handles = extract_social_handles([value]).get(platform, [])
|
|
value = handles[0] if handles else value
|
|
return value.lstrip("@") if platform != "youtube" else value
|