From 5504f79a9d20a53caa978e45b24e11f41a645123 Mon Sep 17 00:00:00 2001 From: Mina Choi Date: Tue, 2 Jun 2026 17:04:33 +0900 Subject: [PATCH] =?UTF-8?q?refactor(report):=20build=5Foverrides=20+=20pat?= =?UTF-8?q?ch=5Freport=20=ED=86=B5=ED=95=A9=20/=20list=20wholesale=20merge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _build_overrides 가 result 받아 deep_merge 까지 처리, _patch_report 제거 - _deep_merge: list by-index → wholesale 치환 (EN 슬롯 누락/라벨 섞임 차단) - build_facebook_audit: template-copy 대신 LLM logo/logo_description 만 두 페이지에 공통 적용 - _page_patch: language/label 명시 박음 (KR/EN 교차 오염 방지) - FacebookPage/InstagramAccount/YouTubeAudit: 불필요한 Optional 제거, has_whatsapp/top_content_type 만 Optional 유지 - build_instagram_audit/build_facebook_audit: dict 반환 (overrides[k] = patch 단순 박기) --- app/api/analysis.py | 4 +- app/common/db/__init__.py | 2 +- app/common/db/source.py | 2 +- app/integrations/llm/schemas/report.py | 92 ++++++++++++-------------- app/models/report.py | 92 ++++++++++++-------------- app/services/analysis.py | 85 ++++++++---------------- app/services/branding.py | 8 +-- app/services/facebook_audit.py | 22 ++++-- app/services/instagram_audit.py | 6 +- app/services/kpi_dashboard.py | 4 +- 10 files changed, 144 insertions(+), 173 deletions(-) diff --git a/app/api/analysis.py b/app/api/analysis.py index d2e464c..c2d99d7 100644 --- a/app/api/analysis.py +++ b/app/api/analysis.py @@ -59,7 +59,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks if mainpage: await insert_raw_info(mainpage["source_id"], analysis_run_id, data_tag=SourceType.MAINPAGE) # branding (HTML/CSS + Vision 로고 매칭) — mainpage 와 같은 homepage URL 을 source 로 사용. - branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"]) + branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"], language="KR") await insert_raw_info(branding_id, analysis_run_id, data_tag=SourceType.BRANDING) # 클라가 안 보낸 채널은 mock_urls 에서 homepage 매칭으로 보충 (main + extra 동일 규칙). @@ -75,7 +75,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks ] for source_type, url in main_channels: if url: - source_id = await insert_source(hospital_id, source_type, url) + source_id = await insert_source(hospital_id, source_type, url, language="KR") await insert_raw_info(source_id, analysis_run_id, data_tag=source_type) # 부가 채널 — instagram_en/facebook_en 은 동일 source_type 에 language='EN' 으로 구분, 나머지는 자체 source_type. diff --git a/app/common/db/__init__.py b/app/common/db/__init__.py index 6ea6cf7..67048a8 100644 --- a/app/common/db/__init__.py +++ b/app/common/db/__init__.py @@ -3,7 +3,7 @@ from common.db.hospital import select_hospital, update_hospital_status, insert_h from common.db.source import ( insert_source, select_source_mainpage, select_source_by_type, insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge, - update_raw_info_logo_url, select_branding_logo_url, select_branding_info_id, + update_raw_info_logo_url, select_mainpage_logo_url, select_branding_info_id, select_raw_info_data, select_run_sources, select_run_raw_data, select_run_source_raw, select_run_mainpage_url, diff --git a/app/common/db/source.py b/app/common/db/source.py index de91c98..886f92b 100644 --- a/app/common/db/source.py +++ b/app/common/db/source.py @@ -117,7 +117,7 @@ async def select_branding_info_id(analysis_run_id: str) -> int | None: return (row or {}).get("info_id") -async def select_branding_logo_url(analysis_run_id: str) -> str | None: +async def select_mainpage_logo_url(analysis_run_id: str) -> str | None: row = await fetchone( "SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)" " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1", diff --git a/app/integrations/llm/schemas/report.py b/app/integrations/llm/schemas/report.py index e49d557..ed5f1db 100644 --- a/app/integrations/llm/schemas/report.py +++ b/app/integrations/llm/schemas/report.py @@ -123,42 +123,39 @@ class TopVideo(BaseModel): class YouTubeAudit(BaseModel): - # YouTube 미수집 병원에서 _build_youtube_audit가 채울 수 없는 필드 빔. - # required면 ValidationError로 리포트 실패 → Optional로 받아 부분 응답 허용. - channel_name: str | None = None - handle: str | None = None - subscribers: int | None = None - total_videos: int | None = None - total_views: int | None = None - weekly_view_growth: WeeklyViewGrowth | None = None - estimated_monthly_revenue: EstimatedRevenue | None = None - avg_video_length: str | None = None - upload_frequency: str | None = None - channel_created_date: str | None = None - channel_description: str | None = None - linked_urls: list[LinkedUrl] = [] - playlists: list[str] = [] - top_videos: list[TopVideo] = [] - diagnosis: list[DiagnosisItem] = [] + channel_name: str + handle: str + subscribers: int + total_videos: int + total_views: int + weekly_view_growth: WeeklyViewGrowth + estimated_monthly_revenue: EstimatedRevenue + avg_video_length: str + upload_frequency: str + channel_created_date: str + channel_description: str + linked_urls: list[LinkedUrl] + playlists: list[str] + top_videos: list[TopVideo] + diagnosis: list[DiagnosisItem] # --- Instagram --- class InstagramAccount(BaseModel): - # LLM이 누락 가능 — Optional로 받아 ValidationError 차단. - handle: str | None = None - language: Language | None = None - label: str | None = None - posts: int | None = None - followers: int | None = None - following: int | None = None - category: str | None = None - profile_link: str | None = None - highlights: list[str] = [] - reels_count: int | None = None - content_format: str | None = None - profile_photo: str | None = None - bio: str | None = None + handle: str + language: Language + label: str + posts: int + followers: int + following: int + category: str + profile_link: str + highlights: list[str] + reels_count: int + content_format: str + profile_photo: str + bio: str class InstagramAudit(BaseModel): @@ -182,25 +179,24 @@ class BrandInconsistency(BaseModel): class FacebookPage(BaseModel): - # LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨). - url: str | None = None - page_name: str | None = None - language: Language | None = None - label: str | None = None - followers: int | None = None - following: int | None = None - category: str | None = None - bio: str | None = None - logo: str | None = None - logo_description: str | None = None - link: str | None = None - linked_domain: str | None = None - reviews: int | None = None - recent_post_age: str | None = None + url: str + page_name: str + language: Language + label: str + followers: int + following: int + category: str + bio: str + logo: str + logo_description: str + link: str + linked_domain: str + reviews: int + recent_post_age: str has_whatsapp: bool | None = None - post_frequency: str | None = None + post_frequency: str top_content_type: str | None = None - engagement: str | None = None + engagement: str class FacebookAudit(BaseModel): diff --git a/app/models/report.py b/app/models/report.py index 67fa655..98b515c 100644 --- a/app/models/report.py +++ b/app/models/report.py @@ -117,40 +117,37 @@ class TopVideo(CamelModel): class YouTubeAudit(CamelModel): - # YouTube 채널 없는 병원이면 _build_youtube_audit가 채울 수 없는 필드들 (channel_name 등)이 빔. - # required면 ValidationError로 리포트 실패 → Optional로 받아 부분 응답 허용. - channel_name: str | None = None - handle: str | None = None - subscribers: int | None = None - total_videos: int | None = None - total_views: int | None = None - weekly_view_growth: WeeklyViewGrowth | None = None - estimated_monthly_revenue: EstimatedRevenue | None = None - avg_video_length: str | None = None - upload_frequency: str | None = None - channel_created_date: str | None = None - channel_description: str | None = None - linked_urls: list[LinkedUrl] = [] - playlists: list[str] = [] - top_videos: list[TopVideo] = [] - diagnosis: list[DiagnosisItem] = [] + channel_name: str + handle: str + subscribers: int + total_videos: int + total_views: int + weekly_view_growth: WeeklyViewGrowth + estimated_monthly_revenue: EstimatedRevenue + avg_video_length: str + upload_frequency: str + channel_created_date: str + channel_description: str + linked_urls: list[LinkedUrl] + playlists: list[str] + top_videos: list[TopVideo] + diagnosis: list[DiagnosisItem] class InstagramAccount(CamelModel): - # 인스타 계정(KR/EN) 미수집 시 빈 필드 가능 — Optional. - handle: str | None = None - language: Language | None = None - label: str | None = None - posts: int | None = None - followers: int | None = None - following: int | None = None - category: str | None = None - profile_link: str | None = None - highlights: list[str] = [] - reels_count: int | None = None - content_format: str | None = None - profile_photo: str | None = None - bio: str | None = None + handle: str + language: Language + label: str + posts: int + followers: int + following: int + category: str + profile_link: str + highlights: list[str] + reels_count: int + content_format: str + profile_photo: str + bio: str class InstagramAudit(CamelModel): @@ -172,25 +169,24 @@ class BrandInconsistency(CamelModel): class FacebookPage(CamelModel): - # 페북 페이지(KR/EN) 미수집 시 빈 필드 가능 — Optional. - url: str | None = None - page_name: str | None = None - language: Language | None = None - label: str | None = None - followers: int | None = None - following: int | None = None - category: str | None = None - bio: str | None = None - logo: str | None = None - logo_description: str | None = None - link: str | None = None - linked_domain: str | None = None - reviews: int | None = None - recent_post_age: str | None = None + url: str + page_name: str + language: Language + label: str + followers: int + following: int + category: str + bio: str + logo: str + logo_description: str + link: str + linked_domain: str + reviews: int + recent_post_age: str has_whatsapp: bool | None = None - post_frequency: str | None = None + post_frequency: str top_content_type: str | None = None - engagement: str | None = None + engagement: str class FacebookAudit(CamelModel): diff --git a/app/services/analysis.py b/app/services/analysis.py index 94adcde..aecaed2 100644 --- a/app/services/analysis.py +++ b/app/services/analysis.py @@ -4,14 +4,14 @@ import re from datetime import datetime from urllib.parse import urlparse from common.db.run import update_run_report, update_run_plan, select_run_report_data -from common.db.source import select_run_raw_data, select_branding_logo_url +from common.db.source import select_run_raw_data, select_mainpage_logo_url from common.db.market import select_market from integrations.llm.llm_service import LLMService from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt from integrations.llm.schemas.report import ReportOutput, ClinicSnapshot, YouTubeAudit from services.branding import analyze_branding -from services.instagram_audit import build_instagram_accounts -from services.facebook_audit import build_facebook_pages +from services.instagram_audit import build_instagram_audit +from services.facebook_audit import build_facebook_audit from services.kpi_dashboard import build_kpi_dashboard from integrations.llm.schemas.plan import PlanOutput @@ -250,10 +250,20 @@ async def _build_youtube_audit(youtube: dict) -> dict: return YouTubeAudit.model_validate(yt_patch).model_dump() -async def _build_overrides(analysis_run_id: str) -> dict: +def _deep_merge(base: dict, overrides: dict) -> dict: + """dict 끼리 만나면 재귀로 안쪽까지 합치고, 그 외(list/scalar/None) 는 override 값으로 통째 치환.""" + for k, v in overrides.items(): + if isinstance(v, dict) and isinstance(base.get(k), dict): + _deep_merge(base[k], v) + else: + base[k] = v + return base + + +async def _build_overrides(analysis_run_id: str, result: ReportOutput) -> ReportOutput: raw = await select_run_raw_data(analysis_run_id) if not raw: - return {} + return result mainpage = raw.get("mainpage", {}) or {} branding = raw.get("branding", {}) or {} @@ -268,19 +278,14 @@ async def _build_overrides(analysis_run_id: str) -> dict: naver_cafe = raw.get("naver_cafe", {}) or {} brand_assets = branding.get("brandAssets") or {} channel_logos = branding.get("channelLogos") or {} - logo_url = await select_branding_logo_url(analysis_run_id) + logo_url = await select_mainpage_logo_url(analysis_run_id) + + llm_fb_pages = result.model_dump().get("facebook_audit", {}).get("pages", []) snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url) yt_patch: dict = await _build_youtube_audit(youtube) - - # ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ────────────── - ig_patch = build_instagram_accounts(instagram, instagram_en, channel_logos) - - # ── facebook (KR=raw.facebook, EN=raw.facebook_en 둘 다 코드 산출, [KR, EN] 순서) ── - fb_pages = build_facebook_pages(facebook, facebook_en) - - # ── KPI dashboard: 7개 mockup 라이프사이클 공식으로 코드가 결정. LLM 출력은 무시. ────── - # build_kpi_dashboard 의 hospital 인자에 부가 채널 dict 모아서 넘김 (instagramEn/facebookEn/tiktok/naverCafe 키 기대). + ig_patch = build_instagram_audit(instagram, instagram_en, channel_logos) + fb_patch = build_facebook_audit(facebook, facebook_en, llm_fb_pages) kpi_extras = { "instagramEn": instagram_en, "facebookEn": facebook_en, @@ -290,51 +295,13 @@ async def _build_overrides(analysis_run_id: str) -> dict: kpi = build_kpi_dashboard(instagram, facebook, youtube, gangnam_unni, kpi_extras, naver_blog) overrides: dict = {} - if snapshot: - overrides["clinic_snapshot"] = snapshot - if ig_patch: - overrides["instagram_audit"] = {"accounts": ig_patch} - if fb_pages: - overrides["facebook_audit"] = {"pages": fb_pages} - if yt_patch: - overrides["youtube_audit"] = yt_patch - if kpi: - overrides["kpi_dashboard"] = kpi - return overrides + if snapshot: overrides["clinic_snapshot"] = snapshot + if ig_patch: overrides["instagram_audit"] = ig_patch + if fb_patch: overrides["facebook_audit"] = fb_patch + if yt_patch: overrides["youtube_audit"] = yt_patch + if kpi: overrides["kpi_dashboard"] = kpi - -def _deep_merge(base: dict, overrides: dict) -> dict: - for k, v in overrides.items(): - if isinstance(v, dict) and isinstance(base.get(k), dict): - _deep_merge(base[k], v) - elif isinstance(v, list) and isinstance(base.get(k), list): - for i, item in enumerate(v): - if i < len(base[k]) and isinstance(item, dict) and isinstance(base[k][i], dict): - _deep_merge(base[k][i], item) - else: - base[k] = v - return base - - -def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput: merged = _deep_merge(result.model_dump(), overrides) - # 인스타 계정은 프롬프트에서 LLM 이 [] 로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트) - merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or [] - # 페북 페이지(KR+EN): _page_patch 가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락). - # LLM 이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch 로 인덱스별 덮어쓰기 → - # 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피. - fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or [] - if fb_pages: - base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", []) - template = base_pages[0] if base_pages else None - while len(base_pages) < len(fb_pages) and template: - base_pages.append({**template}) - for i, patch in enumerate(fb_pages): - if i < len(base_pages): - base_pages[i].update(patch) - # KPI dashboard 강제 치환 — 코드가 계산한 라이프사이클 공식 그대로. - if overrides.get("kpi_dashboard"): - merged["kpi_dashboard"] = overrides["kpi_dashboard"] return ReportOutput(**merged) @@ -342,7 +309,7 @@ async def run_report_task(analysis_run_id: str) -> None: logger.info("[report] start run=%s", analysis_run_id) await analyze_branding(analysis_run_id) result = await generate_report(analysis_run_id) - result = _patch_report(result, await _build_overrides(analysis_run_id)) + result = await _build_overrides(analysis_run_id, result) await update_run_report(analysis_run_id, result.model_dump()) logger.info("[report] done run=%s", analysis_run_id) diff --git a/app/services/branding.py b/app/services/branding.py index c50fab7..8fc7081 100644 --- a/app/services/branding.py +++ b/app/services/branding.py @@ -4,7 +4,7 @@ import os from urllib.parse import urlparse from common.db.source import ( select_run_raw_data, update_raw_info_merge, - select_branding_info_id, select_branding_logo_url, + select_branding_info_id, select_mainpage_logo_url, ) from common.utils import _run_optional_step from integrations.llm.gemini_vision import VisionClient @@ -19,7 +19,7 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) - mainpage = raw.get("mainpage") or {} homepage_url = mainpage.get("sourceUrl") or "" branding_meta = mainpage.get("branding") or {} - column_logo = await select_branding_logo_url(analysis_run_id) + column_logo = await select_mainpage_logo_url(analysis_run_id) candidates = [u for u in [ column_logo, branding_meta.get("logoUrl"), @@ -38,7 +38,6 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) - result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url) if result: break - result.pop("logo_images", None) # logo_images 는 컬럼으로 옮겼으니 JSON 에서 제거 if result: await update_raw_info_merge(info_id, {"brandAssets": result}) logger.info("[brand_logo] done keys=%s", list(result.keys()) if result else None) @@ -47,7 +46,7 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) - async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: VisionClient) -> None: """채널 프로필 로고를 공식 로고와 비교. branding raw_info["channelLogos"] 머지.""" raw = await select_run_raw_data(analysis_run_id) - official = await select_branding_logo_url(analysis_run_id) + official = await select_mainpage_logo_url(analysis_run_id) _label = { "instagram": "Instagram", "facebook": "Facebook", @@ -66,7 +65,6 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision analysis_run_id, [l["channel"] for l in logos], bool(official)) result = await vc.describe_channel_logos(official, logos) if result: - result["logos"] = logos # Vision 못 본 채널도 url 은 프론트 표시용으로 보관 await update_raw_info_merge(info_id, {"channelLogos": result}) logger.info("[channel_logos] done keys=%s", list(result.keys()) if result else None) diff --git a/app/services/facebook_audit.py b/app/services/facebook_audit.py index ed872e3..ba1b8b8 100644 --- a/app/services/facebook_audit.py +++ b/app/services/facebook_audit.py @@ -5,6 +5,7 @@ from datetime import datetime, timezone from common.utils import parse_ts +from integrations.llm.schemas.report import FacebookAudit def _humanize_age(days: int) -> str: @@ -74,8 +75,9 @@ def transform_for_storage(fb: dict | None) -> dict | None: return out -def _page_patch(fb: dict) -> dict: - """저장된 페북 페이지 → FacebookPage 스키마 필드 패치. 수치 지표는 수집 시점에 박혀있어 그대로 복사.""" +def _page_patch(fb: dict, language: str, label: str) -> dict: + """저장된 페북 페이지 → FacebookPage 스키마 필드 패치. 수치 지표는 수집 시점에 박혀있어 그대로 복사. + language/label 은 데이터 있을 때만 명시적으로 박음 — template-copy 가 KR 값을 EN 슬롯에 잘못 상속시키는 것 방지.""" p: dict = {} if fb.get("pageUrl"): p["url"] = p["link"] = fb["pageUrl"] if fb.get("pageName"): p["page_name"] = fb["pageName"] @@ -87,10 +89,18 @@ def _page_patch(fb: dict) -> dict: if fb.get("following") is not None: p["following"] = fb["following"] for key in ("recent_post_age", "post_frequency", "engagement"): if fb.get(key): p[key] = fb[key] + if p: + p["language"] = language + p["label"] = label return p -def build_facebook_pages(facebook: dict, facebook_en: dict) -> list[dict]: - """KR·EN 페북 페이지 패치 리스트 구성. 프롬프트가 pages를 [KR, EN] 순서로 만들므로 동일 순서 유지. - 빈 패치는 제외 (해당 채널 데이터 없음 → LLM도 페이지 안 만듦 → 인덱스 정렬 유지).""" - return [pp for pp in (_page_patch(facebook), _page_patch(facebook_en)) if pp] +def build_facebook_audit(facebook: dict, facebook_en: dict, llm_pages: list[dict] | None = None) -> dict: + """KR·EN 페북 페이지 구성. logo/logo_description 은 LLM Vision 결과(첫 페이지) 모든 페이지에 공통 적용, + 나머지 필드는 코드가 수집 데이터로 계산.""" + llm_logo = {k: v for k, v in ((llm_pages or [{}])[0]).items() if k in {"logo", "logo_description"} and v} + pages = [{**llm_logo, **p} for p in ( + _page_patch(facebook, "KR", "페이스북 KR"), + _page_patch(facebook_en, "EN", "페이스북 EN"), + ) if p] + return FacebookAudit.model_validate({"pages": pages}).model_dump(exclude_unset=True) diff --git a/app/services/instagram_audit.py b/app/services/instagram_audit.py index 9e02c31..dfdcafd 100644 --- a/app/services/instagram_audit.py +++ b/app/services/instagram_audit.py @@ -1,6 +1,8 @@ """Instagram audit 계정(KR·EN)을 수집 데이터로 구성. fix 값(handle/followers/highlights/content_format 등)은 전부 코드에서 박는다 — LLM 출력 무시.""" +from integrations.llm.schemas.report import InstagramAudit + _MEDIA = {"GraphImage": "이미지", "GraphSidecar": "카드뉴스", "GraphVideo": "영상/릴스"} @@ -38,11 +40,11 @@ def _account(data: dict, language: str, label: str, channel: str, channel_logos: } -def build_instagram_accounts(instagram: dict, instagram_en: dict, channel_logos: dict) -> list[dict]: +def build_instagram_audit(instagram: dict, instagram_en: dict, channel_logos: dict) -> dict: """KR·EN 인스타 계정 리스트 구성 (username 있는 것만).""" accounts: list[dict] = [] if instagram.get("username"): accounts.append(_account(instagram, "KR", "인스타그램 KR", "Instagram", channel_logos)) if instagram_en.get("username"): accounts.append(_account(instagram_en, "EN", "인스타그램 EN", "Instagram EN", channel_logos)) - return accounts + return InstagramAudit.model_validate({"accounts": accounts}).model_dump() diff --git a/app/services/kpi_dashboard.py b/app/services/kpi_dashboard.py index c29450c..3586729 100644 --- a/app/services/kpi_dashboard.py +++ b/app/services/kpi_dashboard.py @@ -1,5 +1,7 @@ """mockup 7개 역분석 — 채널 규모별 3개월/12개월 target 성장률 공식.""" +from integrations.llm.schemas.report import KPIMetric + def _round_clean(n: int) -> int: if n < 100: return n @@ -91,4 +93,4 @@ def build_kpi_dashboard( "target_12_month": f"{_round_clean(int(gu_reviews * rm12)):,}개", }) - return kpis + return [KPIMetric.model_validate(k).model_dump() for k in kpis]