diff --git a/app/common/db/__init__.py b/app/common/db/__init__.py index 7ac14e3..8f20d0d 100644 --- a/app/common/db/__init__.py +++ b/app/common/db/__init__.py @@ -3,6 +3,7 @@ from common.db.hospital import select_hospital, update_hospital_status, insert_h from common.db.source import ( insert_source, select_source_mainpage, select_source_by_type, insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge, + update_raw_info_logo_url, select_branding_logo_url, select_raw_info_data, select_run_sources, select_run_raw_data, select_run_source_raw, select_run_mainpage_url, diff --git a/app/common/db/source.py b/app/common/db/source.py index 1977791..3084685 100644 --- a/app/common/db/source.py +++ b/app/common/db/source.py @@ -98,6 +98,23 @@ async def select_run_source_raw( return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"] +async def update_raw_info_logo_url(info_id: int, logo_url: str) -> None: + """raw_info.logo_url 컬럼에 로고 URL 저장 (JSON raw_data 와 분리해 컬럼 인덱스/조회 용이).""" + await execute( + "UPDATE raw_info SET logo_url = %s WHERE info_id = %s", + (logo_url, info_id), + ) + + +async def select_branding_logo_url(analysis_run_id: str) -> str | None: + row = await fetchone( + "SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)" + " WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1", + (analysis_run_id,), + ) + return (row or {}).get("logo_url") + + async def update_raw_info_merge(info_id: int, patch: dict) -> None: """raw_info.raw_data 를 read-modify-write 로 top-level 머지. 한 source 가 단계별로 (예: branding 의 brandAssets → channelLogos) 키를 덧붙일 때 사용.""" diff --git a/app/services/analysis.py b/app/services/analysis.py index 8672c50..4dee2a6 100644 --- a/app/services/analysis.py +++ b/app/services/analysis.py @@ -4,7 +4,7 @@ import re from datetime import datetime from urllib.parse import urlparse from common.db.run import select_run, update_run_report, update_run_plan -from common.db.source import select_run_raw_data +from common.db.source import select_run_raw_data, select_branding_logo_url from common.db.market import select_market from integrations.llm.llm_service import LLMService from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt @@ -100,7 +100,7 @@ async def generate_plan(analysis_run_id: str) -> PlanOutput: return await LLMService(provider="perplexity").generate(plan_prompt, input_data) -def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict) -> dict: +def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict, logo_url: str | None) -> dict: snapshot: dict = {} doctors = gangnam_unni.get("doctors", []) lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None @@ -121,8 +121,9 @@ def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dic "rating": lead.get("rating"), "review_count": lead.get("reviews"), } - # branding.brandAssets 에서 logo_images / brand_colors 강제 주입. LLM 이 프롬프트 가드 무시하고 null 로 두는 케이스 차단. - if brand_assets.get("logo_images"): snapshot["logo_images"] = brand_assets["logo_images"] + # logo URL 은 raw_info.logo_url 컬럼에서, brand_colors 는 JSON 에서 강제 주입. LLM 의 null 처리 차단. + if logo_url: + snapshot["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None} if brand_assets.get("brand_colors"): snapshot["brand_colors"] = brand_assets["brand_colors"] return ClinicSnapshot.model_validate(snapshot).model_dump() @@ -268,8 +269,9 @@ async def _build_overrides(analysis_run_id: str) -> dict: naver_cafe = raw.get("naver_cafe", {}) or {} brand_assets = branding.get("brandAssets") or {} channel_logos = branding.get("channelLogos") or {} + logo_url = await select_branding_logo_url(analysis_run_id) - snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets) + snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url) yt_patch: dict = await _build_youtube_audit(youtube) # ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ────────────── diff --git a/app/services/collect_extras.py b/app/services/collect_extras.py index 3cec4d6..2f05dca 100644 --- a/app/services/collect_extras.py +++ b/app/services/collect_extras.py @@ -1,7 +1,7 @@ import logging import os from urllib.parse import urlparse -from common.db.source import select_run_raw_data, update_raw_info_merge +from common.db.source import select_run_raw_data, update_raw_info_merge, update_raw_info_logo_url from integrations.vision import VisionClient from integrations.color_extractor import extract_brand_assets_from_site @@ -57,9 +57,6 @@ async def collect_brand_assets(analysis_run_id: str, info_id: int) -> None: if result: used_kind = kind break - # favicon 으로만 분석된 경우 진짜 로고가 아니므로 logo URL 은 박지 않음 (묘사는 OK) - if result and used_kind == "favicon" and result.get("logo_images"): - result["logo_images"] = {"circle": None, "horizontal": None, "korean": None} elif not api_key: logger.info("[brand_assets] GEMINI_API_KEY not set — 색상만 저장, Vision 묘사 skip") @@ -71,10 +68,18 @@ async def collect_brand_assets(analysis_run_id: str, info_id: int) -> None: elif result: result["color_source"] = "vision" + # 5. logo URL 은 JSON 이 아니라 raw_info.logo_url 컬럼에 분리 저장 (raw vs 분석 텍스트 분리). + # favicon 으로만 매칭된 경우 진짜 로고 아니라 컬럼 저장 X. + result.pop("logo_images", None) + column_logo_url = logo_url if used_kind in ("logo", "og") and logo_url else None + if column_logo_url: + await update_raw_info_logo_url(info_id, column_logo_url) + if result: result["logo_source"] = used_kind or "none" await update_raw_info_merge(info_id, {"brandAssets": result}) - logger.info("[brand_assets] done keys=%s", list(result.keys()) if result else None) + logger.info("[brand_assets] done logo_url=%s keys=%s", + bool(column_logo_url), list(result.keys()) if result else None) async def collect_channel_logos(analysis_run_id: str, info_id: int) -> None: