fix(report+analysis): Instagram/Facebook Optional 완화 + viewclinic mock 제거 + brand_assets 강제주입

- schemas/report.py: InstagramAccount/InstagramAudit/FacebookPage/FacebookAudit 필드 Optional 완화 (LLM이 page 1·2개 모두 language/label/logo/has_whatsapp 등 빼먹는 케이스 차단) - analysis.py: viewclinic mock 분기(_is_mock, _load_mock_report, _load_mock_plan) 제거 — raw_data 충분 - analysis.py: _build_clinic_snapshot에 brandAssets.logo_images/brand_colors 강제 주입 (LLM 프롬프트 가드 무시하고 null 두는 케이스 차단) - analysis.py: facebook_audit.pages 머지 방식 변경 — LLM 첫 페이지 템플릿 복제 후 코드 patch로 인덱스별 덮어쓰기 (EN(index 1) 드랍 + label/logo 누락 검증 실패 동시 회피) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-01 08:50:35 +09:00 · 2026-06-01 08:50:35 +09:00 · e5a9036e47
parent 5dbc7d7ffe
commit e5a9036e47
2 changed files with 60 additions and 84 deletions
--- a/app/integrations/llm/schemas/report.py
+++ b/app/integrations/llm/schemas/report.py
@ -145,24 +145,25 @@ class YouTubeAudit(BaseModel):
 # --- Instagram ---
 class InstagramAccount(BaseModel):
-    handle: str
+    # LLM이 누락 가능 — Optional로 받아 ValidationError 차단.
-    language: Language
+    handle: str | None = None
-    label: str
+    language: Language | None = None
-    posts: int
+    label: str | None = None
-    followers: int
+    posts: int | None = None
-    following: int
+    followers: int | None = None
-    category: str
+    following: int | None = None
-    profile_link: str
+    category: str | None = None
-    highlights: list[str]
+    profile_link: str | None = None
-    reels_count: int
+    highlights: list[str] = []
-    content_format: str
+    reels_count: int | None = None
-    profile_photo: str
+    content_format: str | None = None
-    bio: str
+    profile_photo: str | None = None
    bio: str | None = None
 class InstagramAudit(BaseModel):
-    accounts: list[InstagramAccount]
+    accounts: list[InstagramAccount] = []
-    diagnosis: list[DiagnosisItem]
+    diagnosis: list[DiagnosisItem] = []
 # --- Facebook ---
@ -181,31 +182,32 @@ class BrandInconsistency(BaseModel):
 class FacebookPage(BaseModel):
-    url: str
+    # LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨).
-    page_name: str
+    url: str | None = None
-    language: Language
+    page_name: str | None = None
-    label: str
+    language: Language | None = None
-    followers: int
+    label: str | None = None
-    following: int
+    followers: int | None = None
-    category: str
+    following: int | None = None
-    bio: str
+    category: str | None = None
-    logo: str
+    bio: str | None = None
-    logo_description: str
+    logo: str | None = None
-    link: str
+    logo_description: str | None = None
-    linked_domain: str
+    link: str | None = None
-    reviews: int
+    linked_domain: str | None = None
-    recent_post_age: str
+    reviews: int | None = None
-    has_whatsapp: bool
+    recent_post_age: str | None = None
    has_whatsapp: bool | None = None
    post_frequency: str | None = None
    top_content_type: str | None = None
    engagement: str | None = None
 class FacebookAudit(BaseModel):
-    pages: list[FacebookPage]
+    pages: list[FacebookPage] = []
-    diagnosis: list[DiagnosisItem]
+    diagnosis: list[DiagnosisItem] = []
-    brand_inconsistencies: list[BrandInconsistency]
+    brand_inconsistencies: list[BrandInconsistency] = []
-    consolidation_recommendation: str
+    consolidation_recommendation: str | None = None
 # --- 기타 채널 / 웹사이트 ---
--- a/app/services/analysis.py
+++ b/app/services/analysis.py
@ -1,6 +1,5 @@
 import json
 import logging
 import os
 import re
 from datetime import datetime
 from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis
@ -125,6 +124,10 @@ def _build_clinic_snapshot(gangnam_unni: dict, hospital: dict) -> dict:
            "rating":       lead.get("rating"),
            "review_count": lead.get("reviews"),
        }
    # brand_assets에서 logo_images / brand_colors 강제 주입. LLM이 프롬프트 가드 무시하고 null로 두는 케이스 차단.
    ba = hospital.get("brandAssets") or {}
    if ba.get("logo_images"):  snapshot["logo_images"]  = ba["logo_images"]
    if ba.get("brand_colors"): snapshot["brand_colors"] = ba["brand_colors"]
    return ClinicSnapshot.model_validate(snapshot).model_dump()
@ -313,49 +316,24 @@ def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
    merged = _deep_merge(result.model_dump(), overrides)
    # 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
    merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
-    # 페북 페이지(KR+EN)도 코드가 결정적으로 만든다. LLM이 KR 1개만 만들면 _deep_merge가 index 0만 머지하고
+    # 페북 페이지(KR+EN): _page_patch가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락).
-    # EN(index 1)을 드랍하는 버그가 있어 — overrides의 코드 빌드 리스트를 통째 강제 치환.
+    # LLM이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch로 인덱스별 덮어쓰기 →
-    fb_pages = (overrides.get("facebook_audit") or {}).get("pages")
+    # 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피.
    fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or []
    if fb_pages:
-        merged.setdefault("facebook_audit", {})["pages"] = fb_pages
+        base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", [])
        template = base_pages[0] if base_pages else None
        while len(base_pages) < len(fb_pages) and template:
            base_pages.append({**template})
        for i, patch in enumerate(fb_pages):
            if i < len(base_pages):
                base_pages[i].update(patch)
    return ReportOutput(**merged)
 _MOCK_DOMAINS: set[str] = set()  # viewclinic도 real LLM 거치게 — raw_data가 충분해 mock 의존 불필요
 _MOCK_REPORT_PATH = os.path.join(os.path.dirname(__file__), "../mock/report_viewclinic.json")
 async def _is_mock(analysis_run_id: str) -> bool:
    row = await fetchone(
        "SELECT h.url FROM analysis_runs ar JOIN hospital_baseinfo h USING (hospital_id)"
        " WHERE ar.analysis_run_id = %s",
        (analysis_run_id,),
    )
    url = (row or {}).get("url") or ""
    return any(domain in url for domain in _MOCK_DOMAINS)
 def _load_mock_report() -> ReportOutput:
    with open(_MOCK_REPORT_PATH, encoding="utf-8") as f:
        return ReportOutput(**json.load(f))
 _MOCK_PLAN_PATH = os.path.join(os.path.dirname(__file__), "../mock/plan_viewclinic.json")
 def _load_mock_plan() -> PlanOutput:
    with open(_MOCK_PLAN_PATH, encoding="utf-8") as f:
        return PlanOutput(**json.load(f))
 async def run_report_task(analysis_run_id: str) -> None:
    logger.info("[report] start run=%s", analysis_run_id)
-    if await _is_mock(analysis_run_id):
+    result = await generate_report(analysis_run_id)
        logger.info("[report] mock mode run=%s", analysis_run_id)
        result = _load_mock_report()
        result.youtube_audit.linked_urls = []
    else:
        result = await generate_report(analysis_run_id)
    result = _patch_report(result, await _build_overrides(analysis_run_id))
    await save_analysis_report(analysis_run_id, result.model_dump())
    logger.info("[report] done run=%s", analysis_run_id)
@ -372,18 +350,14 @@ def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:
 async def run_plan_task(analysis_run_id: str) -> None:
    logger.info("[plan] start run=%s", analysis_run_id)
-    if await _is_mock(analysis_run_id):
+    result = await generate_plan(analysis_run_id)
-        logger.info("[plan] mock mode run=%s", analysis_run_id)
+    # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단)
-        result = _load_mock_plan()
+    run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,))
-    else:
+    if run:
-        result = await generate_plan(analysis_run_id)
+        hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],))
-        # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단)
+        h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {}
-        run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,))
+        logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or ""
-        if run:
+        result = _patch_plan(result, logo_desc)
            hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],))
            h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {}
            logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or ""
            result = _patch_plan(result, logo_desc)
    await execute(
        "UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s",
        (json.dumps(result.model_dump(), ensure_ascii=False), analysis_run_id),