fix(report+analysis): Instagram/Facebook Optional 완화 + viewclinic mock 제거 + brand_assets 강제주입

- schemas/report.py: InstagramAccount/InstagramAudit/FacebookPage/FacebookAudit 필드 Optional 완화
  (LLM이 page 1·2개 모두 language/label/logo/has_whatsapp 등 빼먹는 케이스 차단)
- analysis.py: viewclinic mock 분기(_is_mock, _load_mock_report, _load_mock_plan) 제거 — raw_data 충분
- analysis.py: _build_clinic_snapshot에 brandAssets.logo_images/brand_colors 강제 주입
  (LLM 프롬프트 가드 무시하고 null 두는 케이스 차단)
- analysis.py: facebook_audit.pages 머지 방식 변경 — LLM 첫 페이지 템플릿 복제 후 코드 patch로 인덱스별 덮어쓰기
  (EN(index 1) 드랍 + label/logo 누락 검증 실패 동시 회피)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
main
Mina Choi 2026-06-01 08:50:35 +09:00
parent 5dbc7d7ffe
commit e5a9036e47
2 changed files with 60 additions and 84 deletions

View File

@ -145,24 +145,25 @@ class YouTubeAudit(BaseModel):
# --- Instagram ---
class InstagramAccount(BaseModel):
handle: str
language: Language
label: str
posts: int
followers: int
following: int
category: str
profile_link: str
highlights: list[str]
reels_count: int
content_format: str
profile_photo: str
bio: str
# LLM이 누락 가능 — Optional로 받아 ValidationError 차단.
handle: str | None = None
language: Language | None = None
label: str | None = None
posts: int | None = None
followers: int | None = None
following: int | None = None
category: str | None = None
profile_link: str | None = None
highlights: list[str] = []
reels_count: int | None = None
content_format: str | None = None
profile_photo: str | None = None
bio: str | None = None
class InstagramAudit(BaseModel):
accounts: list[InstagramAccount]
diagnosis: list[DiagnosisItem]
accounts: list[InstagramAccount] = []
diagnosis: list[DiagnosisItem] = []
# --- Facebook ---
@ -181,31 +182,32 @@ class BrandInconsistency(BaseModel):
class FacebookPage(BaseModel):
url: str
page_name: str
language: Language
label: str
followers: int
following: int
category: str
bio: str
logo: str
logo_description: str
link: str
linked_domain: str
reviews: int
recent_post_age: str
has_whatsapp: bool
# LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨).
url: str | None = None
page_name: str | None = None
language: Language | None = None
label: str | None = None
followers: int | None = None
following: int | None = None
category: str | None = None
bio: str | None = None
logo: str | None = None
logo_description: str | None = None
link: str | None = None
linked_domain: str | None = None
reviews: int | None = None
recent_post_age: str | None = None
has_whatsapp: bool | None = None
post_frequency: str | None = None
top_content_type: str | None = None
engagement: str | None = None
class FacebookAudit(BaseModel):
pages: list[FacebookPage]
diagnosis: list[DiagnosisItem]
brand_inconsistencies: list[BrandInconsistency]
consolidation_recommendation: str
pages: list[FacebookPage] = []
diagnosis: list[DiagnosisItem] = []
brand_inconsistencies: list[BrandInconsistency] = []
consolidation_recommendation: str | None = None
# --- 기타 채널 / 웹사이트 ---

View File

@ -1,6 +1,5 @@
import json
import logging
import os
import re
from datetime import datetime
from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis
@ -125,6 +124,10 @@ def _build_clinic_snapshot(gangnam_unni: dict, hospital: dict) -> dict:
"rating": lead.get("rating"),
"review_count": lead.get("reviews"),
}
# brand_assets에서 logo_images / brand_colors 강제 주입. LLM이 프롬프트 가드 무시하고 null로 두는 케이스 차단.
ba = hospital.get("brandAssets") or {}
if ba.get("logo_images"): snapshot["logo_images"] = ba["logo_images"]
if ba.get("brand_colors"): snapshot["brand_colors"] = ba["brand_colors"]
return ClinicSnapshot.model_validate(snapshot).model_dump()
@ -313,48 +316,23 @@ def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
merged = _deep_merge(result.model_dump(), overrides)
# 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
# 페북 페이지(KR+EN)도 코드가 결정적으로 만든다. LLM이 KR 1개만 만들면 _deep_merge가 index 0만 머지하고
# EN(index 1)을 드랍하는 버그가 있어 — overrides의 코드 빌드 리스트를 통째 강제 치환.
fb_pages = (overrides.get("facebook_audit") or {}).get("pages")
# 페북 페이지(KR+EN): _page_patch가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락).
# LLM이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch로 인덱스별 덮어쓰기 →
# 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피.
fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or []
if fb_pages:
merged.setdefault("facebook_audit", {})["pages"] = fb_pages
base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", [])
template = base_pages[0] if base_pages else None
while len(base_pages) < len(fb_pages) and template:
base_pages.append({**template})
for i, patch in enumerate(fb_pages):
if i < len(base_pages):
base_pages[i].update(patch)
return ReportOutput(**merged)
_MOCK_DOMAINS: set[str] = set() # viewclinic도 real LLM 거치게 — raw_data가 충분해 mock 의존 불필요
_MOCK_REPORT_PATH = os.path.join(os.path.dirname(__file__), "../mock/report_viewclinic.json")
async def _is_mock(analysis_run_id: str) -> bool:
row = await fetchone(
"SELECT h.url FROM analysis_runs ar JOIN hospital_baseinfo h USING (hospital_id)"
" WHERE ar.analysis_run_id = %s",
(analysis_run_id,),
)
url = (row or {}).get("url") or ""
return any(domain in url for domain in _MOCK_DOMAINS)
def _load_mock_report() -> ReportOutput:
with open(_MOCK_REPORT_PATH, encoding="utf-8") as f:
return ReportOutput(**json.load(f))
_MOCK_PLAN_PATH = os.path.join(os.path.dirname(__file__), "../mock/plan_viewclinic.json")
def _load_mock_plan() -> PlanOutput:
with open(_MOCK_PLAN_PATH, encoding="utf-8") as f:
return PlanOutput(**json.load(f))
async def run_report_task(analysis_run_id: str) -> None:
logger.info("[report] start run=%s", analysis_run_id)
if await _is_mock(analysis_run_id):
logger.info("[report] mock mode run=%s", analysis_run_id)
result = _load_mock_report()
result.youtube_audit.linked_urls = []
else:
result = await generate_report(analysis_run_id)
result = _patch_report(result, await _build_overrides(analysis_run_id))
await save_analysis_report(analysis_run_id, result.model_dump())
@ -372,10 +350,6 @@ def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:
async def run_plan_task(analysis_run_id: str) -> None:
logger.info("[plan] start run=%s", analysis_run_id)
if await _is_mock(analysis_run_id):
logger.info("[plan] mock mode run=%s", analysis_run_id)
result = _load_mock_plan()
else:
result = await generate_plan(analysis_run_id)
# profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단)
run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,))