fix(report+analysis): Instagram/Facebook Optional 완화 + viewclinic mock 제거 + brand_assets 강제주입

- schemas/report.py: InstagramAccount/InstagramAudit/FacebookPage/FacebookAudit 필드 Optional 완화 (LLM이 page 1·2개 모두 language/label/logo/has_whatsapp 등 빼먹는 케이스 차단) - analysis.py: viewclinic mock 분기(_is_mock, _load_mock_report, _load_mock_plan) 제거 — raw_data 충분 - analysis.py: _build_clinic_snapshot에 brandAssets.logo_images/brand_colors 강제 주입 (LLM 프롬프트 가드 무시하고 null 두는 케이스 차단) - analysis.py: facebook_audit.pages 머지 방식 변경 — LLM 첫 페이지 템플릿 복제 후 코드 patch로 인덱스별 덮어쓰기 (EN(index 1) 드랍 + label/logo 누락 검증 실패 동시 회피) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-01 08:50:35 +09:00 · 2026-06-01 08:50:35 +09:00 · e5a9036e47
parent 5dbc7d7ffe
commit e5a9036e47
2 changed files with 60 additions and 84 deletions
--- a/app/integrations/llm/schemas/report.py
+++ b/app/integrations/llm/schemas/report.py
@ -145,24 +145,25 @@ class YouTubeAudit(BaseModel):
 # --- Instagram ---

 class InstagramAccount(BaseModel):
-    handle: str
-    language: Language
-    label: str
-    posts: int
-    followers: int
-    following: int
-    category: str
-    profile_link: str
-    highlights: list[str]
-    reels_count: int
-    content_format: str
-    profile_photo: str
-    bio: str
+    # LLM이 누락 가능 — Optional로 받아 ValidationError 차단.
+    handle: str | None = None
+    language: Language | None = None
+    label: str | None = None
+    posts: int | None = None
+    followers: int | None = None
+    following: int | None = None
+    category: str | None = None
+    profile_link: str | None = None
+    highlights: list[str] = []
+    reels_count: int | None = None
+    content_format: str | None = None
+    profile_photo: str | None = None
+    bio: str | None = None


 class InstagramAudit(BaseModel):
-    accounts: list[InstagramAccount]
-    diagnosis: list[DiagnosisItem]
+    accounts: list[InstagramAccount] = []
+    diagnosis: list[DiagnosisItem] = []


 # --- Facebook ---
@ -181,31 +182,32 @@ class BrandInconsistency(BaseModel):


 class FacebookPage(BaseModel):
-    url: str
-    page_name: str
-    language: Language
-    label: str
-    followers: int
-    following: int
-    category: str
-    bio: str
-    logo: str
-    logo_description: str
-    link: str
-    linked_domain: str
-    reviews: int
-    recent_post_age: str
-    has_whatsapp: bool
+    # LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨).
+    url: str | None = None
+    page_name: str | None = None
+    language: Language | None = None
+    label: str | None = None
+    followers: int | None = None
+    following: int | None = None
+    category: str | None = None
+    bio: str | None = None
+    logo: str | None = None
+    logo_description: str | None = None
+    link: str | None = None
+    linked_domain: str | None = None
+    reviews: int | None = None
+    recent_post_age: str | None = None
+    has_whatsapp: bool | None = None
    post_frequency: str | None = None
    top_content_type: str | None = None
    engagement: str | None = None


 class FacebookAudit(BaseModel):
-    pages: list[FacebookPage]
-    diagnosis: list[DiagnosisItem]
-    brand_inconsistencies: list[BrandInconsistency]
-    consolidation_recommendation: str
+    pages: list[FacebookPage] = []
+    diagnosis: list[DiagnosisItem] = []
+    brand_inconsistencies: list[BrandInconsistency] = []
+    consolidation_recommendation: str | None = None


 # --- 기타 채널 / 웹사이트 ---
--- a/app/services/analysis.py
+++ b/app/services/analysis.py
@ -1,6 +1,5 @@
 import json
 import logging
-import os
 import re
 from datetime import datetime
 from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis
@ -125,6 +124,10 @@ def _build_clinic_snapshot(gangnam_unni: dict, hospital: dict) -> dict:
            "rating":       lead.get("rating"),
            "review_count": lead.get("reviews"),
        }
+    # brand_assets에서 logo_images / brand_colors 강제 주입. LLM이 프롬프트 가드 무시하고 null로 두는 케이스 차단.
+    ba = hospital.get("brandAssets") or {}
+    if ba.get("logo_images"):  snapshot["logo_images"]  = ba["logo_images"]
+    if ba.get("brand_colors"): snapshot["brand_colors"] = ba["brand_colors"]
    return ClinicSnapshot.model_validate(snapshot).model_dump()


@ -313,48 +316,23 @@ def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
    merged = _deep_merge(result.model_dump(), overrides)
    # 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
    merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
-    # 페북 페이지(KR+EN)도 코드가 결정적으로 만든다. LLM이 KR 1개만 만들면 _deep_merge가 index 0만 머지하고
-    # EN(index 1)을 드랍하는 버그가 있어 — overrides의 코드 빌드 리스트를 통째 강제 치환.
-    fb_pages = (overrides.get("facebook_audit") or {}).get("pages")
+    # 페북 페이지(KR+EN): _page_patch가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락).
+    # LLM이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch로 인덱스별 덮어쓰기 →
+    # 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피.
+    fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or []
    if fb_pages:
-        merged.setdefault("facebook_audit", {})["pages"] = fb_pages
+        base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", [])
+        template = base_pages[0] if base_pages else None
+        while len(base_pages) < len(fb_pages) and template:
+            base_pages.append({**template})
+        for i, patch in enumerate(fb_pages):
+            if i < len(base_pages):
+                base_pages[i].update(patch)
    return ReportOutput(**merged)


-_MOCK_DOMAINS: set[str] = set()  # viewclinic도 real LLM 거치게 — raw_data가 충분해 mock 의존 불필요
-_MOCK_REPORT_PATH = os.path.join(os.path.dirname(__file__), "../mock/report_viewclinic.json")
-
-
-async def _is_mock(analysis_run_id: str) -> bool:
-    row = await fetchone(
-        "SELECT h.url FROM analysis_runs ar JOIN hospital_baseinfo h USING (hospital_id)"
-        " WHERE ar.analysis_run_id = %s",
-        (analysis_run_id,),
-    )
-    url = (row or {}).get("url") or ""
-    return any(domain in url for domain in _MOCK_DOMAINS)
-
-
-def _load_mock_report() -> ReportOutput:
-    with open(_MOCK_REPORT_PATH, encoding="utf-8") as f:
-        return ReportOutput(**json.load(f))
-
-
-_MOCK_PLAN_PATH = os.path.join(os.path.dirname(__file__), "../mock/plan_viewclinic.json")
-
-
-def _load_mock_plan() -> PlanOutput:
-    with open(_MOCK_PLAN_PATH, encoding="utf-8") as f:
-        return PlanOutput(**json.load(f))
-
-
 async def run_report_task(analysis_run_id: str) -> None:
    logger.info("[report] start run=%s", analysis_run_id)
-    if await _is_mock(analysis_run_id):
-        logger.info("[report] mock mode run=%s", analysis_run_id)
-        result = _load_mock_report()
-        result.youtube_audit.linked_urls = []
-    else:
    result = await generate_report(analysis_run_id)
    result = _patch_report(result, await _build_overrides(analysis_run_id))
    await save_analysis_report(analysis_run_id, result.model_dump())
@ -372,10 +350,6 @@ def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:

 async def run_plan_task(analysis_run_id: str) -> None:
    logger.info("[plan] start run=%s", analysis_run_id)
-    if await _is_mock(analysis_run_id):
-        logger.info("[plan] mock mode run=%s", analysis_run_id)
-        result = _load_mock_plan()
-    else:
    result = await generate_plan(analysis_run_id)
    # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단)
    run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,))