89 lines
4.0 KiB
Python
89 lines
4.0 KiB
Python
"""report 단계 - Gemini Vision 으로 로고 묘사 + 채널 로고 매칭."""
|
|
import logging
|
|
import os
|
|
from urllib.parse import urlparse
|
|
from common.db.source import (
|
|
select_run_raw_data, update_raw_info_merge,
|
|
select_branding_info_id, select_branding_logo_url,
|
|
)
|
|
from common.utils import _run_optional_step
|
|
from integrations.llm.gemini_vision import VisionClient
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
|
|
"""공식 로고 정성 묘사. branding raw_info["brandAssets"] 머지.
|
|
호출 우선순위: raw_info.logo_url 컬럼 (HTML parser canonical) → firecrawl 메타 fallback."""
|
|
raw = await select_run_raw_data(analysis_run_id)
|
|
mainpage = raw.get("mainpage") or {}
|
|
homepage_url = mainpage.get("sourceUrl") or ""
|
|
branding_meta = mainpage.get("branding") or {}
|
|
column_logo = await select_branding_logo_url(analysis_run_id)
|
|
candidates = [u for u in [
|
|
column_logo,
|
|
branding_meta.get("logoUrl"),
|
|
branding_meta.get("faviconUrl"),
|
|
] if u]
|
|
if homepage_url:
|
|
parsed = urlparse(homepage_url)
|
|
if parsed.scheme and parsed.netloc:
|
|
candidates.append(f"{parsed.scheme}://{parsed.netloc}/favicon.ico")
|
|
if not candidates:
|
|
logger.info("[brand_logo] skip — no candidates")
|
|
return
|
|
logger.info("[brand_logo] start run=%s candidates=%d", analysis_run_id, len(candidates))
|
|
result: dict = {}
|
|
for cand in candidates:
|
|
result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url)
|
|
if result:
|
|
break
|
|
result.pop("logo_images", None) # logo_images 는 컬럼으로 옮겼으니 JSON 에서 제거
|
|
if result:
|
|
await update_raw_info_merge(info_id, {"brandAssets": result})
|
|
logger.info("[brand_logo] done keys=%s", list(result.keys()) if result else None)
|
|
|
|
|
|
async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
|
|
"""채널 프로필 로고를 공식 로고와 비교. branding raw_info["channelLogos"] 머지."""
|
|
raw = await select_run_raw_data(analysis_run_id)
|
|
official = await select_branding_logo_url(analysis_run_id)
|
|
_label = {
|
|
"instagram": "Instagram",
|
|
"facebook": "Facebook",
|
|
"youtube": "YouTube",
|
|
"instagram_en": "Instagram EN",
|
|
"facebook_en": "Facebook EN",
|
|
"tiktok": "TikTok",
|
|
}
|
|
logos = [{"channel": label, "url": img}
|
|
for key, label in _label.items()
|
|
if (img := (raw.get(key) or {}).get("_logo_url"))]
|
|
if not logos:
|
|
logger.info("[channel_logos] skip — no channel profileImages")
|
|
return
|
|
logger.info("[channel_logos] start run=%s channels=%s official=%s",
|
|
analysis_run_id, [l["channel"] for l in logos], bool(official))
|
|
result = await vc.describe_channel_logos(official, logos)
|
|
if result:
|
|
result["logos"] = logos # Vision 못 본 채널도 url 은 프론트 표시용으로 보관
|
|
await update_raw_info_merge(info_id, {"channelLogos": result})
|
|
logger.info("[channel_logos] done keys=%s", list(result.keys()) if result else None)
|
|
|
|
|
|
async def analyze_branding(analysis_run_id: str) -> None:
|
|
"""report build 직전 호출 — 로고 묘사 + 채널 로고 매칭 (Gemini). 둘 다 격리."""
|
|
api_key = os.getenv("GEMINI_API_KEY")
|
|
if not api_key:
|
|
logger.info("[branding] skip — GEMINI_API_KEY 없음")
|
|
return
|
|
branding_info_id = await select_branding_info_id(analysis_run_id)
|
|
if branding_info_id is None:
|
|
logger.info("[branding] skip — branding source 없음 run=%s", analysis_run_id)
|
|
return
|
|
vc = VisionClient(api_key)
|
|
logger.info("[branding] start run=%s", analysis_run_id)
|
|
await _run_optional_step(_describe_logo(analysis_run_id, branding_info_id, vc), "brand_logo")
|
|
await _run_optional_step(_describe_channel_logos(analysis_run_id, branding_info_id, vc), "channel_logos")
|
|
logger.info("[branding] done run=%s", analysis_run_id)
|