diff --git a/app/integrations/llm/schemas/report.py b/app/integrations/llm/schemas/report.py index 8f175b7..e49d557 100644 --- a/app/integrations/llm/schemas/report.py +++ b/app/integrations/llm/schemas/report.py @@ -145,24 +145,25 @@ class YouTubeAudit(BaseModel): # --- Instagram --- class InstagramAccount(BaseModel): - handle: str - language: Language - label: str - posts: int - followers: int - following: int - category: str - profile_link: str - highlights: list[str] - reels_count: int - content_format: str - profile_photo: str - bio: str + # LLM이 누락 가능 — Optional로 받아 ValidationError 차단. + handle: str | None = None + language: Language | None = None + label: str | None = None + posts: int | None = None + followers: int | None = None + following: int | None = None + category: str | None = None + profile_link: str | None = None + highlights: list[str] = [] + reels_count: int | None = None + content_format: str | None = None + profile_photo: str | None = None + bio: str | None = None class InstagramAudit(BaseModel): - accounts: list[InstagramAccount] - diagnosis: list[DiagnosisItem] + accounts: list[InstagramAccount] = [] + diagnosis: list[DiagnosisItem] = [] # --- Facebook --- @@ -181,31 +182,32 @@ class BrandInconsistency(BaseModel): class FacebookPage(BaseModel): - url: str - page_name: str - language: Language - label: str - followers: int - following: int - category: str - bio: str - logo: str - logo_description: str - link: str - linked_domain: str - reviews: int - recent_post_age: str - has_whatsapp: bool + # LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨). + url: str | None = None + page_name: str | None = None + language: Language | None = None + label: str | None = None + followers: int | None = None + following: int | None = None + category: str | None = None + bio: str | None = None + logo: str | None = None + logo_description: str | None = None + link: str | None = None + linked_domain: str | None = None + reviews: int | None = None + recent_post_age: str | None = None + has_whatsapp: bool | None = None post_frequency: str | None = None top_content_type: str | None = None engagement: str | None = None class FacebookAudit(BaseModel): - pages: list[FacebookPage] - diagnosis: list[DiagnosisItem] - brand_inconsistencies: list[BrandInconsistency] - consolidation_recommendation: str + pages: list[FacebookPage] = [] + diagnosis: list[DiagnosisItem] = [] + brand_inconsistencies: list[BrandInconsistency] = [] + consolidation_recommendation: str | None = None # --- 기타 채널 / 웹사이트 --- diff --git a/app/services/analysis.py b/app/services/analysis.py index a692d98..96dcaa1 100644 --- a/app/services/analysis.py +++ b/app/services/analysis.py @@ -1,6 +1,5 @@ import json import logging -import os import re from datetime import datetime from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis @@ -125,6 +124,10 @@ def _build_clinic_snapshot(gangnam_unni: dict, hospital: dict) -> dict: "rating": lead.get("rating"), "review_count": lead.get("reviews"), } + # brand_assets에서 logo_images / brand_colors 강제 주입. LLM이 프롬프트 가드 무시하고 null로 두는 케이스 차단. + ba = hospital.get("brandAssets") or {} + if ba.get("logo_images"): snapshot["logo_images"] = ba["logo_images"] + if ba.get("brand_colors"): snapshot["brand_colors"] = ba["brand_colors"] return ClinicSnapshot.model_validate(snapshot).model_dump() @@ -313,49 +316,24 @@ def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput: merged = _deep_merge(result.model_dump(), overrides) # 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트) merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or [] - # 페북 페이지(KR+EN)도 코드가 결정적으로 만든다. LLM이 KR 1개만 만들면 _deep_merge가 index 0만 머지하고 - # EN(index 1)을 드랍하는 버그가 있어 — overrides의 코드 빌드 리스트를 통째 강제 치환. - fb_pages = (overrides.get("facebook_audit") or {}).get("pages") + # 페북 페이지(KR+EN): _page_patch가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락). + # LLM이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch로 인덱스별 덮어쓰기 → + # 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피. + fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or [] if fb_pages: - merged.setdefault("facebook_audit", {})["pages"] = fb_pages + base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", []) + template = base_pages[0] if base_pages else None + while len(base_pages) < len(fb_pages) and template: + base_pages.append({**template}) + for i, patch in enumerate(fb_pages): + if i < len(base_pages): + base_pages[i].update(patch) return ReportOutput(**merged) -_MOCK_DOMAINS: set[str] = set() # viewclinic도 real LLM 거치게 — raw_data가 충분해 mock 의존 불필요 -_MOCK_REPORT_PATH = os.path.join(os.path.dirname(__file__), "../mock/report_viewclinic.json") - - -async def _is_mock(analysis_run_id: str) -> bool: - row = await fetchone( - "SELECT h.url FROM analysis_runs ar JOIN hospital_baseinfo h USING (hospital_id)" - " WHERE ar.analysis_run_id = %s", - (analysis_run_id,), - ) - url = (row or {}).get("url") or "" - return any(domain in url for domain in _MOCK_DOMAINS) - - -def _load_mock_report() -> ReportOutput: - with open(_MOCK_REPORT_PATH, encoding="utf-8") as f: - return ReportOutput(**json.load(f)) - - -_MOCK_PLAN_PATH = os.path.join(os.path.dirname(__file__), "../mock/plan_viewclinic.json") - - -def _load_mock_plan() -> PlanOutput: - with open(_MOCK_PLAN_PATH, encoding="utf-8") as f: - return PlanOutput(**json.load(f)) - - async def run_report_task(analysis_run_id: str) -> None: logger.info("[report] start run=%s", analysis_run_id) - if await _is_mock(analysis_run_id): - logger.info("[report] mock mode run=%s", analysis_run_id) - result = _load_mock_report() - result.youtube_audit.linked_urls = [] - else: - result = await generate_report(analysis_run_id) + result = await generate_report(analysis_run_id) result = _patch_report(result, await _build_overrides(analysis_run_id)) await save_analysis_report(analysis_run_id, result.model_dump()) logger.info("[report] done run=%s", analysis_run_id) @@ -372,18 +350,14 @@ def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput: async def run_plan_task(analysis_run_id: str) -> None: logger.info("[plan] start run=%s", analysis_run_id) - if await _is_mock(analysis_run_id): - logger.info("[plan] mock mode run=%s", analysis_run_id) - result = _load_mock_plan() - else: - result = await generate_plan(analysis_run_id) - # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단) - run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)) - if run: - hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],)) - h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {} - logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or "" - result = _patch_plan(result, logo_desc) + result = await generate_plan(analysis_run_id) + # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단) + run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)) + if run: + hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],)) + h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {} + logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or "" + result = _patch_plan(result, logo_desc) await execute( "UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s", (json.dumps(result.model_dump(), ensure_ascii=False), analysis_run_id),