67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
import os
|
|
import asyncio
|
|
import logging
|
|
from http import HTTPMethod
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
REQUEST_TIMEOUT = 60
|
|
|
|
|
|
|
|
def get_env(key: str) -> str:
|
|
v = os.environ.get(key, "")
|
|
if not v:
|
|
raise EnvironmentError(f"Missing env: {key}")
|
|
return v
|
|
|
|
async def http_request(
|
|
method: HTTPMethod,
|
|
url: str,
|
|
*,
|
|
label: str,
|
|
headers: dict | None = None,
|
|
params: dict | None = None,
|
|
json_body: dict | None = None,
|
|
timeout: int = REQUEST_TIMEOUT,
|
|
max_retries: int = 0,
|
|
) -> httpx.Response | None:
|
|
async with httpx.AsyncClient() as client:
|
|
for attempt in range(max_retries + 1):
|
|
try:
|
|
resp = await client.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout)
|
|
return resp
|
|
except httpx.RequestError as e:
|
|
if attempt < max_retries:
|
|
print(f" [retry] {label} → {e}, attempt {attempt + 1}")
|
|
await asyncio.sleep((attempt + 1) * 2)
|
|
else:
|
|
print(f" [error] {label} → {e}")
|
|
return None
|
|
return None
|
|
|
|
|
|
async def _run_optional_step(coro, label: str) -> None:
|
|
"""부가 단계 실행 헬퍼: 예외를 삼키고 경고 로그만 남겨 호출측 흐름이 멈추지 않게 격리."""
|
|
try:
|
|
await coro
|
|
except Exception as e:
|
|
logger.warning("%s 실패 (무시하고 진행): %s", label, e)
|
|
|
|
|
|
def _normalize_homepage(url: str) -> str:
|
|
"""URL을 scheme/www/끝슬래시 제거 + 소문자로 정규화 (homepage 매칭용)."""
|
|
u = (url or "").strip().lower()
|
|
for p in ("https://", "http://"):
|
|
if u.startswith(p):
|
|
u = u[len(p):]
|
|
if u.startswith("www."):
|
|
u = u[4:]
|
|
return u.rstrip("/")
|
|
|
|
|
|
def _with_scheme(u: str | None) -> str | None:
|
|
"""scheme 없는 URL에 https:// 보정 (수집기 파싱용). 빈 값은 None."""
|
|
return (u if "://" in u else "https://" + u) if u else None
|