fix(report+analysis): Instagram/Facebook Optional 완화 + viewclinic mock 제거 + brand_assets 강제주입

- schemas/report.py: InstagramAccount/InstagramAudit/FacebookPage/FacebookAudit 필드 Optional 완화
  (LLM이 page 1·2개 모두 language/label/logo/has_whatsapp 등 빼먹는 케이스 차단)
- analysis.py: viewclinic mock 분기(_is_mock, _load_mock_report, _load_mock_plan) 제거 — raw_data 충분
- analysis.py: _build_clinic_snapshot에 brandAssets.logo_images/brand_colors 강제 주입
  (LLM 프롬프트 가드 무시하고 null 두는 케이스 차단)
- analysis.py: facebook_audit.pages 머지 방식 변경 — LLM 첫 페이지 템플릿 복제 후 코드 patch로 인덱스별 덮어쓰기
  (EN(index 1) 드랍 + label/logo 누락 검증 실패 동시 회피)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
main
Mina Choi 2026-06-01 08:50:35 +09:00
parent 5dbc7d7ffe
commit e5a9036e47
2 changed files with 60 additions and 84 deletions

View File

@ -145,24 +145,25 @@ class YouTubeAudit(BaseModel):
# --- Instagram --- # --- Instagram ---
class InstagramAccount(BaseModel): class InstagramAccount(BaseModel):
handle: str # LLM이 누락 가능 — Optional로 받아 ValidationError 차단.
language: Language handle: str | None = None
label: str language: Language | None = None
posts: int label: str | None = None
followers: int posts: int | None = None
following: int followers: int | None = None
category: str following: int | None = None
profile_link: str category: str | None = None
highlights: list[str] profile_link: str | None = None
reels_count: int highlights: list[str] = []
content_format: str reels_count: int | None = None
profile_photo: str content_format: str | None = None
bio: str profile_photo: str | None = None
bio: str | None = None
class InstagramAudit(BaseModel): class InstagramAudit(BaseModel):
accounts: list[InstagramAccount] accounts: list[InstagramAccount] = []
diagnosis: list[DiagnosisItem] diagnosis: list[DiagnosisItem] = []
# --- Facebook --- # --- Facebook ---
@ -181,31 +182,32 @@ class BrandInconsistency(BaseModel):
class FacebookPage(BaseModel): class FacebookPage(BaseModel):
url: str # LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨).
page_name: str url: str | None = None
language: Language page_name: str | None = None
label: str language: Language | None = None
followers: int label: str | None = None
following: int followers: int | None = None
category: str following: int | None = None
bio: str category: str | None = None
logo: str bio: str | None = None
logo_description: str logo: str | None = None
link: str logo_description: str | None = None
linked_domain: str link: str | None = None
reviews: int linked_domain: str | None = None
recent_post_age: str reviews: int | None = None
has_whatsapp: bool recent_post_age: str | None = None
has_whatsapp: bool | None = None
post_frequency: str | None = None post_frequency: str | None = None
top_content_type: str | None = None top_content_type: str | None = None
engagement: str | None = None engagement: str | None = None
class FacebookAudit(BaseModel): class FacebookAudit(BaseModel):
pages: list[FacebookPage] pages: list[FacebookPage] = []
diagnosis: list[DiagnosisItem] diagnosis: list[DiagnosisItem] = []
brand_inconsistencies: list[BrandInconsistency] brand_inconsistencies: list[BrandInconsistency] = []
consolidation_recommendation: str consolidation_recommendation: str | None = None
# --- 기타 채널 / 웹사이트 --- # --- 기타 채널 / 웹사이트 ---

View File

@ -1,6 +1,5 @@
import json import json
import logging import logging
import os
import re import re
from datetime import datetime from datetime import datetime
from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis
@ -125,6 +124,10 @@ def _build_clinic_snapshot(gangnam_unni: dict, hospital: dict) -> dict:
"rating": lead.get("rating"), "rating": lead.get("rating"),
"review_count": lead.get("reviews"), "review_count": lead.get("reviews"),
} }
# brand_assets에서 logo_images / brand_colors 강제 주입. LLM이 프롬프트 가드 무시하고 null로 두는 케이스 차단.
ba = hospital.get("brandAssets") or {}
if ba.get("logo_images"): snapshot["logo_images"] = ba["logo_images"]
if ba.get("brand_colors"): snapshot["brand_colors"] = ba["brand_colors"]
return ClinicSnapshot.model_validate(snapshot).model_dump() return ClinicSnapshot.model_validate(snapshot).model_dump()
@ -313,49 +316,24 @@ def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
merged = _deep_merge(result.model_dump(), overrides) merged = _deep_merge(result.model_dump(), overrides)
# 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트) # 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or [] merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
# 페북 페이지(KR+EN)도 코드가 결정적으로 만든다. LLM이 KR 1개만 만들면 _deep_merge가 index 0만 머지하고 # 페북 페이지(KR+EN): _page_patch가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락).
# EN(index 1)을 드랍하는 버그가 있어 — overrides의 코드 빌드 리스트를 통째 강제 치환. # LLM이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch로 인덱스별 덮어쓰기 →
fb_pages = (overrides.get("facebook_audit") or {}).get("pages") # 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피.
fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or []
if fb_pages: if fb_pages:
merged.setdefault("facebook_audit", {})["pages"] = fb_pages base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", [])
template = base_pages[0] if base_pages else None
while len(base_pages) < len(fb_pages) and template:
base_pages.append({**template})
for i, patch in enumerate(fb_pages):
if i < len(base_pages):
base_pages[i].update(patch)
return ReportOutput(**merged) return ReportOutput(**merged)
_MOCK_DOMAINS: set[str] = set() # viewclinic도 real LLM 거치게 — raw_data가 충분해 mock 의존 불필요
_MOCK_REPORT_PATH = os.path.join(os.path.dirname(__file__), "../mock/report_viewclinic.json")
async def _is_mock(analysis_run_id: str) -> bool:
row = await fetchone(
"SELECT h.url FROM analysis_runs ar JOIN hospital_baseinfo h USING (hospital_id)"
" WHERE ar.analysis_run_id = %s",
(analysis_run_id,),
)
url = (row or {}).get("url") or ""
return any(domain in url for domain in _MOCK_DOMAINS)
def _load_mock_report() -> ReportOutput:
with open(_MOCK_REPORT_PATH, encoding="utf-8") as f:
return ReportOutput(**json.load(f))
_MOCK_PLAN_PATH = os.path.join(os.path.dirname(__file__), "../mock/plan_viewclinic.json")
def _load_mock_plan() -> PlanOutput:
with open(_MOCK_PLAN_PATH, encoding="utf-8") as f:
return PlanOutput(**json.load(f))
async def run_report_task(analysis_run_id: str) -> None: async def run_report_task(analysis_run_id: str) -> None:
logger.info("[report] start run=%s", analysis_run_id) logger.info("[report] start run=%s", analysis_run_id)
if await _is_mock(analysis_run_id): result = await generate_report(analysis_run_id)
logger.info("[report] mock mode run=%s", analysis_run_id)
result = _load_mock_report()
result.youtube_audit.linked_urls = []
else:
result = await generate_report(analysis_run_id)
result = _patch_report(result, await _build_overrides(analysis_run_id)) result = _patch_report(result, await _build_overrides(analysis_run_id))
await save_analysis_report(analysis_run_id, result.model_dump()) await save_analysis_report(analysis_run_id, result.model_dump())
logger.info("[report] done run=%s", analysis_run_id) logger.info("[report] done run=%s", analysis_run_id)
@ -372,18 +350,14 @@ def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:
async def run_plan_task(analysis_run_id: str) -> None: async def run_plan_task(analysis_run_id: str) -> None:
logger.info("[plan] start run=%s", analysis_run_id) logger.info("[plan] start run=%s", analysis_run_id)
if await _is_mock(analysis_run_id): result = await generate_plan(analysis_run_id)
logger.info("[plan] mock mode run=%s", analysis_run_id) # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단)
result = _load_mock_plan() run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,))
else: if run:
result = await generate_plan(analysis_run_id) hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],))
# profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단) h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {}
run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)) logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or ""
if run: result = _patch_plan(result, logo_desc)
hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],))
h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {}
logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or ""
result = _patch_plan(result, logo_desc)
await execute( await execute(
"UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s", "UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s",
(json.dumps(result.model_dump(), ensure_ascii=False), analysis_run_id), (json.dumps(result.model_dump(), ensure_ascii=False), analysis_run_id),