Compare commits

..

2 Commits

Author SHA1 Message Date
Mina Choi 5504f79a9d refactor(report): build_overrides + patch_report 통합 / list wholesale merge
- _build_overrides 가 result 받아 deep_merge 까지 처리, _patch_report 제거
- _deep_merge: list by-index → wholesale 치환 (EN 슬롯 누락/라벨 섞임 차단)
- build_facebook_audit: template-copy 대신 LLM logo/logo_description 만 두 페이지에 공통 적용
- _page_patch: language/label 명시 박음 (KR/EN 교차 오염 방지)
- FacebookPage/InstagramAccount/YouTubeAudit: 불필요한 Optional 제거, has_whatsapp/top_content_type 만 Optional 유지
- build_instagram_audit/build_facebook_audit: dict 반환 (overrides[k] = patch 단순 박기)
2026-06-02 17:04:33 +09:00
Mina Choi 9a9ce1319f fix(branding): logo URL 컬럼 일관성 + 잘못된 로고 묘사 회피
- 채널 collectors (instagram/facebook/youtube/tiktok) 가 profileImage 를 raw_info.logo_url 컬럼에도 저장
- collect_brand_basics 가 공식 로고 URL 을 branding row 가 아니라 mainpage row 의 logo_url 컬럼에 저장
- select_branding_logo_url 가 mainpage row 의 logo_url 조회하도록 SQL 수정
- select_run_raw_data 가 logo_url 컬럼도 반환 (_logo_url 합성키) → branding._describe_channel_logos 가 컬럼에서 통일된 이름으로 읽음
- _describe_logo candidates 에서 firecrawl ogImage 제거 (이벤트 배너 잘못 잡히던 케이스)
- extra_channels (tiktok/kakaotalk/naver_cafe) language='KR' 박음

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 13:12:58 +09:00
11 changed files with 172 additions and 189 deletions

View File

@ -59,7 +59,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
if mainpage: if mainpage:
await insert_raw_info(mainpage["source_id"], analysis_run_id, data_tag=SourceType.MAINPAGE) await insert_raw_info(mainpage["source_id"], analysis_run_id, data_tag=SourceType.MAINPAGE)
# branding (HTML/CSS + Vision 로고 매칭) — mainpage 와 같은 homepage URL 을 source 로 사용. # branding (HTML/CSS + Vision 로고 매칭) — mainpage 와 같은 homepage URL 을 source 로 사용.
branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"]) branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"], language="KR")
await insert_raw_info(branding_id, analysis_run_id, data_tag=SourceType.BRANDING) await insert_raw_info(branding_id, analysis_run_id, data_tag=SourceType.BRANDING)
# 클라가 안 보낸 채널은 mock_urls 에서 homepage 매칭으로 보충 (main + extra 동일 규칙). # 클라가 안 보낸 채널은 mock_urls 에서 homepage 매칭으로 보충 (main + extra 동일 규칙).
@ -75,16 +75,16 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
] ]
for source_type, url in main_channels: for source_type, url in main_channels:
if url: if url:
source_id = await insert_source(hospital_id, source_type, url) source_id = await insert_source(hospital_id, source_type, url, language="KR")
await insert_raw_info(source_id, analysis_run_id, data_tag=source_type) await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
# 부가 채널 — instagram_en/facebook_en 은 동일 source_type 에 language='EN' 으로 구분, 나머지는 자체 source_type. # 부가 채널 — instagram_en/facebook_en 은 동일 source_type 에 language='EN' 으로 구분, 나머지는 자체 source_type.
extra_channels = [ extra_channels = [
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")), (SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")), (SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
(SourceType.TIKTOK, None, _with_scheme(body.channels.tiktok) or mock.get("tiktok")), (SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
(SourceType.KAKAOTALK, None, _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")), (SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
(SourceType.NAVER_CAFE, None, _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")), (SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
] ]
for source_type, language, url in extra_channels: for source_type, language, url in extra_channels:
if url: if url:

View File

@ -3,7 +3,7 @@ from common.db.hospital import select_hospital, update_hospital_status, insert_h
from common.db.source import ( from common.db.source import (
insert_source, select_source_mainpage, select_source_by_type, insert_source, select_source_mainpage, select_source_by_type,
insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge, insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge,
update_raw_info_logo_url, select_branding_logo_url, select_branding_info_id, update_raw_info_logo_url, select_mainpage_logo_url, select_branding_info_id,
select_raw_info_data, select_raw_info_data,
select_run_sources, select_run_raw_data, select_run_source_raw, select_run_sources, select_run_raw_data, select_run_source_raw,
select_run_mainpage_url, select_run_mainpage_url,

View File

@ -63,9 +63,8 @@ async def select_run_sources(analysis_run_id: str) -> list[dict]:
async def select_run_raw_data(analysis_run_id: str) -> dict: async def select_run_raw_data(analysis_run_id: str) -> dict:
# language='EN' 인 row 는 dict key 를 "<source_type>_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지).
rows = await fetchall( rows = await fetchall(
"SELECT rs.source_type, rs.language, ri.raw_data" "SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url"
" FROM raw_info ri JOIN remote_source rs USING (source_id)" " FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s", " WHERE ri.analysis_run_id = %s",
(analysis_run_id,), (analysis_run_id,),
@ -76,7 +75,10 @@ async def select_run_raw_data(analysis_run_id: str) -> dict:
key = row["source_type"] key = row["source_type"]
if (row.get("language") or "").upper() == "EN": if (row.get("language") or "").upper() == "EN":
key = f"{key}_en" key = f"{key}_en"
result[key] = json.loads(raw) if isinstance(raw, str) else raw data = json.loads(raw) if isinstance(raw, str) else (raw or {})
if isinstance(data, dict) and row.get("logo_url"):
data["_logo_url"] = row["logo_url"]
result[key] = data
return result return result
@ -115,10 +117,10 @@ async def select_branding_info_id(analysis_run_id: str) -> int | None:
return (row or {}).get("info_id") return (row or {}).get("info_id")
async def select_branding_logo_url(analysis_run_id: str) -> str | None: async def select_mainpage_logo_url(analysis_run_id: str) -> str | None:
row = await fetchone( row = await fetchone(
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)" "SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1", " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
(analysis_run_id,), (analysis_run_id,),
) )
return (row or {}).get("logo_url") return (row or {}).get("logo_url")

View File

@ -123,42 +123,39 @@ class TopVideo(BaseModel):
class YouTubeAudit(BaseModel): class YouTubeAudit(BaseModel):
# YouTube 미수집 병원에서 _build_youtube_audit가 채울 수 없는 필드 빔. channel_name: str
# required면 ValidationError로 리포트 실패 → Optional로 받아 부분 응답 허용. handle: str
channel_name: str | None = None subscribers: int
handle: str | None = None total_videos: int
subscribers: int | None = None total_views: int
total_videos: int | None = None weekly_view_growth: WeeklyViewGrowth
total_views: int | None = None estimated_monthly_revenue: EstimatedRevenue
weekly_view_growth: WeeklyViewGrowth | None = None avg_video_length: str
estimated_monthly_revenue: EstimatedRevenue | None = None upload_frequency: str
avg_video_length: str | None = None channel_created_date: str
upload_frequency: str | None = None channel_description: str
channel_created_date: str | None = None linked_urls: list[LinkedUrl]
channel_description: str | None = None playlists: list[str]
linked_urls: list[LinkedUrl] = [] top_videos: list[TopVideo]
playlists: list[str] = [] diagnosis: list[DiagnosisItem]
top_videos: list[TopVideo] = []
diagnosis: list[DiagnosisItem] = []
# --- Instagram --- # --- Instagram ---
class InstagramAccount(BaseModel): class InstagramAccount(BaseModel):
# LLM이 누락 가능 — Optional로 받아 ValidationError 차단. handle: str
handle: str | None = None language: Language
language: Language | None = None label: str
label: str | None = None posts: int
posts: int | None = None followers: int
followers: int | None = None following: int
following: int | None = None category: str
category: str | None = None profile_link: str
profile_link: str | None = None highlights: list[str]
highlights: list[str] = [] reels_count: int
reels_count: int | None = None content_format: str
content_format: str | None = None profile_photo: str
profile_photo: str | None = None bio: str
bio: str | None = None
class InstagramAudit(BaseModel): class InstagramAudit(BaseModel):
@ -182,25 +179,24 @@ class BrandInconsistency(BaseModel):
class FacebookPage(BaseModel): class FacebookPage(BaseModel):
# LLM이 누락 가능 (page 1·2개 모두 language/label/logo/has_whatsapp 빼먹는 경우 관찰됨). url: str
url: str | None = None page_name: str
page_name: str | None = None language: Language
language: Language | None = None label: str
label: str | None = None followers: int
followers: int | None = None following: int
following: int | None = None category: str
category: str | None = None bio: str
bio: str | None = None logo: str
logo: str | None = None logo_description: str
logo_description: str | None = None link: str
link: str | None = None linked_domain: str
linked_domain: str | None = None reviews: int
reviews: int | None = None recent_post_age: str
recent_post_age: str | None = None
has_whatsapp: bool | None = None has_whatsapp: bool | None = None
post_frequency: str | None = None post_frequency: str
top_content_type: str | None = None top_content_type: str | None = None
engagement: str | None = None engagement: str
class FacebookAudit(BaseModel): class FacebookAudit(BaseModel):

View File

@ -117,40 +117,37 @@ class TopVideo(CamelModel):
class YouTubeAudit(CamelModel): class YouTubeAudit(CamelModel):
# YouTube 채널 없는 병원이면 _build_youtube_audit가 채울 수 없는 필드들 (channel_name 등)이 빔. channel_name: str
# required면 ValidationError로 리포트 실패 → Optional로 받아 부분 응답 허용. handle: str
channel_name: str | None = None subscribers: int
handle: str | None = None total_videos: int
subscribers: int | None = None total_views: int
total_videos: int | None = None weekly_view_growth: WeeklyViewGrowth
total_views: int | None = None estimated_monthly_revenue: EstimatedRevenue
weekly_view_growth: WeeklyViewGrowth | None = None avg_video_length: str
estimated_monthly_revenue: EstimatedRevenue | None = None upload_frequency: str
avg_video_length: str | None = None channel_created_date: str
upload_frequency: str | None = None channel_description: str
channel_created_date: str | None = None linked_urls: list[LinkedUrl]
channel_description: str | None = None playlists: list[str]
linked_urls: list[LinkedUrl] = [] top_videos: list[TopVideo]
playlists: list[str] = [] diagnosis: list[DiagnosisItem]
top_videos: list[TopVideo] = []
diagnosis: list[DiagnosisItem] = []
class InstagramAccount(CamelModel): class InstagramAccount(CamelModel):
# 인스타 계정(KR/EN) 미수집 시 빈 필드 가능 — Optional. handle: str
handle: str | None = None language: Language
language: Language | None = None label: str
label: str | None = None posts: int
posts: int | None = None followers: int
followers: int | None = None following: int
following: int | None = None category: str
category: str | None = None profile_link: str
profile_link: str | None = None highlights: list[str]
highlights: list[str] = [] reels_count: int
reels_count: int | None = None content_format: str
content_format: str | None = None profile_photo: str
profile_photo: str | None = None bio: str
bio: str | None = None
class InstagramAudit(CamelModel): class InstagramAudit(CamelModel):
@ -172,25 +169,24 @@ class BrandInconsistency(CamelModel):
class FacebookPage(CamelModel): class FacebookPage(CamelModel):
# 페북 페이지(KR/EN) 미수집 시 빈 필드 가능 — Optional. url: str
url: str | None = None page_name: str
page_name: str | None = None language: Language
language: Language | None = None label: str
label: str | None = None followers: int
followers: int | None = None following: int
following: int | None = None category: str
category: str | None = None bio: str
bio: str | None = None logo: str
logo: str | None = None logo_description: str
logo_description: str | None = None link: str
link: str | None = None linked_domain: str
linked_domain: str | None = None reviews: int
reviews: int | None = None recent_post_age: str
recent_post_age: str | None = None
has_whatsapp: bool | None = None has_whatsapp: bool | None = None
post_frequency: str | None = None post_frequency: str
top_content_type: str | None = None top_content_type: str | None = None
engagement: str | None = None engagement: str
class FacebookAudit(CamelModel): class FacebookAudit(CamelModel):

View File

@ -4,14 +4,14 @@ import re
from datetime import datetime from datetime import datetime
from urllib.parse import urlparse from urllib.parse import urlparse
from common.db.run import update_run_report, update_run_plan, select_run_report_data from common.db.run import update_run_report, update_run_plan, select_run_report_data
from common.db.source import select_run_raw_data, select_branding_logo_url from common.db.source import select_run_raw_data, select_mainpage_logo_url
from common.db.market import select_market from common.db.market import select_market
from integrations.llm.llm_service import LLMService from integrations.llm.llm_service import LLMService
from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt
from integrations.llm.schemas.report import ReportOutput, ClinicSnapshot, YouTubeAudit from integrations.llm.schemas.report import ReportOutput, ClinicSnapshot, YouTubeAudit
from services.branding import analyze_branding from services.branding import analyze_branding
from services.instagram_audit import build_instagram_accounts from services.instagram_audit import build_instagram_audit
from services.facebook_audit import build_facebook_pages from services.facebook_audit import build_facebook_audit
from services.kpi_dashboard import build_kpi_dashboard from services.kpi_dashboard import build_kpi_dashboard
from integrations.llm.schemas.plan import PlanOutput from integrations.llm.schemas.plan import PlanOutput
@ -250,10 +250,20 @@ async def _build_youtube_audit(youtube: dict) -> dict:
return YouTubeAudit.model_validate(yt_patch).model_dump() return YouTubeAudit.model_validate(yt_patch).model_dump()
async def _build_overrides(analysis_run_id: str) -> dict: def _deep_merge(base: dict, overrides: dict) -> dict:
"""dict 끼리 만나면 재귀로 안쪽까지 합치고, 그 외(list/scalar/None) 는 override 값으로 통째 치환."""
for k, v in overrides.items():
if isinstance(v, dict) and isinstance(base.get(k), dict):
_deep_merge(base[k], v)
else:
base[k] = v
return base
async def _build_overrides(analysis_run_id: str, result: ReportOutput) -> ReportOutput:
raw = await select_run_raw_data(analysis_run_id) raw = await select_run_raw_data(analysis_run_id)
if not raw: if not raw:
return {} return result
mainpage = raw.get("mainpage", {}) or {} mainpage = raw.get("mainpage", {}) or {}
branding = raw.get("branding", {}) or {} branding = raw.get("branding", {}) or {}
@ -268,19 +278,14 @@ async def _build_overrides(analysis_run_id: str) -> dict:
naver_cafe = raw.get("naver_cafe", {}) or {} naver_cafe = raw.get("naver_cafe", {}) or {}
brand_assets = branding.get("brandAssets") or {} brand_assets = branding.get("brandAssets") or {}
channel_logos = branding.get("channelLogos") or {} channel_logos = branding.get("channelLogos") or {}
logo_url = await select_branding_logo_url(analysis_run_id) logo_url = await select_mainpage_logo_url(analysis_run_id)
llm_fb_pages = result.model_dump().get("facebook_audit", {}).get("pages", [])
snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url) snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url)
yt_patch: dict = await _build_youtube_audit(youtube) yt_patch: dict = await _build_youtube_audit(youtube)
ig_patch = build_instagram_audit(instagram, instagram_en, channel_logos)
# ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ────────────── fb_patch = build_facebook_audit(facebook, facebook_en, llm_fb_pages)
ig_patch = build_instagram_accounts(instagram, instagram_en, channel_logos)
# ── facebook (KR=raw.facebook, EN=raw.facebook_en 둘 다 코드 산출, [KR, EN] 순서) ──
fb_pages = build_facebook_pages(facebook, facebook_en)
# ── KPI dashboard: 7개 mockup 라이프사이클 공식으로 코드가 결정. LLM 출력은 무시. ──────
# build_kpi_dashboard 의 hospital 인자에 부가 채널 dict 모아서 넘김 (instagramEn/facebookEn/tiktok/naverCafe 키 기대).
kpi_extras = { kpi_extras = {
"instagramEn": instagram_en, "instagramEn": instagram_en,
"facebookEn": facebook_en, "facebookEn": facebook_en,
@ -290,51 +295,13 @@ async def _build_overrides(analysis_run_id: str) -> dict:
kpi = build_kpi_dashboard(instagram, facebook, youtube, gangnam_unni, kpi_extras, naver_blog) kpi = build_kpi_dashboard(instagram, facebook, youtube, gangnam_unni, kpi_extras, naver_blog)
overrides: dict = {} overrides: dict = {}
if snapshot: if snapshot: overrides["clinic_snapshot"] = snapshot
overrides["clinic_snapshot"] = snapshot if ig_patch: overrides["instagram_audit"] = ig_patch
if ig_patch: if fb_patch: overrides["facebook_audit"] = fb_patch
overrides["instagram_audit"] = {"accounts": ig_patch} if yt_patch: overrides["youtube_audit"] = yt_patch
if fb_pages: if kpi: overrides["kpi_dashboard"] = kpi
overrides["facebook_audit"] = {"pages": fb_pages}
if yt_patch:
overrides["youtube_audit"] = yt_patch
if kpi:
overrides["kpi_dashboard"] = kpi
return overrides
def _deep_merge(base: dict, overrides: dict) -> dict:
for k, v in overrides.items():
if isinstance(v, dict) and isinstance(base.get(k), dict):
_deep_merge(base[k], v)
elif isinstance(v, list) and isinstance(base.get(k), list):
for i, item in enumerate(v):
if i < len(base[k]) and isinstance(item, dict) and isinstance(base[k][i], dict):
_deep_merge(base[k][i], item)
else:
base[k] = v
return base
def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
merged = _deep_merge(result.model_dump(), overrides) merged = _deep_merge(result.model_dump(), overrides)
# 인스타 계정은 프롬프트에서 LLM 이 [] 로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
# 페북 페이지(KR+EN): _page_patch 가 부분 필드만 만들어 그대로 박으면 검증 실패(label/logo 등 누락).
# LLM 이 만든 첫 페이지(보통 KR)를 템플릿으로 복사한 뒤 코드 patch 로 인덱스별 덮어쓰기 →
# 필수 필드는 LLM 디폴트 받고, 수집 수치는 코드 값. EN 누락 버그 회피.
fb_pages = (overrides.get("facebook_audit") or {}).get("pages") or []
if fb_pages:
base_pages = merged.setdefault("facebook_audit", {}).setdefault("pages", [])
template = base_pages[0] if base_pages else None
while len(base_pages) < len(fb_pages) and template:
base_pages.append({**template})
for i, patch in enumerate(fb_pages):
if i < len(base_pages):
base_pages[i].update(patch)
# KPI dashboard 강제 치환 — 코드가 계산한 라이프사이클 공식 그대로.
if overrides.get("kpi_dashboard"):
merged["kpi_dashboard"] = overrides["kpi_dashboard"]
return ReportOutput(**merged) return ReportOutput(**merged)
@ -342,7 +309,7 @@ async def run_report_task(analysis_run_id: str) -> None:
logger.info("[report] start run=%s", analysis_run_id) logger.info("[report] start run=%s", analysis_run_id)
await analyze_branding(analysis_run_id) await analyze_branding(analysis_run_id)
result = await generate_report(analysis_run_id) result = await generate_report(analysis_run_id)
result = _patch_report(result, await _build_overrides(analysis_run_id)) result = await _build_overrides(analysis_run_id, result)
await update_run_report(analysis_run_id, result.model_dump()) await update_run_report(analysis_run_id, result.model_dump())
logger.info("[report] done run=%s", analysis_run_id) logger.info("[report] done run=%s", analysis_run_id)

View File

@ -4,7 +4,7 @@ import os
from urllib.parse import urlparse from urllib.parse import urlparse
from common.db.source import ( from common.db.source import (
select_run_raw_data, update_raw_info_merge, select_run_raw_data, update_raw_info_merge,
select_branding_info_id, select_branding_logo_url, select_branding_info_id, select_mainpage_logo_url,
) )
from common.utils import _run_optional_step from common.utils import _run_optional_step
from integrations.llm.gemini_vision import VisionClient from integrations.llm.gemini_vision import VisionClient
@ -19,11 +19,10 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -
mainpage = raw.get("mainpage") or {} mainpage = raw.get("mainpage") or {}
homepage_url = mainpage.get("sourceUrl") or "" homepage_url = mainpage.get("sourceUrl") or ""
branding_meta = mainpage.get("branding") or {} branding_meta = mainpage.get("branding") or {}
column_logo = await select_branding_logo_url(analysis_run_id) column_logo = await select_mainpage_logo_url(analysis_run_id)
candidates = [u for u in [ candidates = [u for u in [
column_logo, column_logo,
branding_meta.get("logoUrl"), branding_meta.get("logoUrl"),
branding_meta.get("ogImage"),
branding_meta.get("faviconUrl"), branding_meta.get("faviconUrl"),
] if u] ] if u]
if homepage_url: if homepage_url:
@ -39,7 +38,6 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -
result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url) result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url)
if result: if result:
break break
result.pop("logo_images", None) # logo_images 는 컬럼으로 옮겼으니 JSON 에서 제거
if result: if result:
await update_raw_info_merge(info_id, {"brandAssets": result}) await update_raw_info_merge(info_id, {"brandAssets": result})
logger.info("[brand_logo] done keys=%s", list(result.keys()) if result else None) logger.info("[brand_logo] done keys=%s", list(result.keys()) if result else None)
@ -48,7 +46,7 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -
async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: VisionClient) -> None: async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
"""채널 프로필 로고를 공식 로고와 비교. branding raw_info["channelLogos"] 머지.""" """채널 프로필 로고를 공식 로고와 비교. branding raw_info["channelLogos"] 머지."""
raw = await select_run_raw_data(analysis_run_id) raw = await select_run_raw_data(analysis_run_id)
official = await select_branding_logo_url(analysis_run_id) official = await select_mainpage_logo_url(analysis_run_id)
_label = { _label = {
"instagram": "Instagram", "instagram": "Instagram",
"facebook": "Facebook", "facebook": "Facebook",
@ -59,7 +57,7 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision
} }
logos = [{"channel": label, "url": img} logos = [{"channel": label, "url": img}
for key, label in _label.items() for key, label in _label.items()
if (img := (raw.get(key) or {}).get("profileImage"))] if (img := (raw.get(key) or {}).get("_logo_url"))]
if not logos: if not logos:
logger.info("[channel_logos] skip — no channel profileImages") logger.info("[channel_logos] skip — no channel profileImages")
return return
@ -67,7 +65,6 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision
analysis_run_id, [l["channel"] for l in logos], bool(official)) analysis_run_id, [l["channel"] for l in logos], bool(official))
result = await vc.describe_channel_logos(official, logos) result = await vc.describe_channel_logos(official, logos)
if result: if result:
result["logos"] = logos # Vision 못 본 채널도 url 은 프론트 표시용으로 보관
await update_raw_info_merge(info_id, {"channelLogos": result}) await update_raw_info_merge(info_id, {"channelLogos": result})
logger.info("[channel_logos] done keys=%s", list(result.keys()) if result else None) logger.info("[channel_logos] done keys=%s", list(result.keys()) if result else None)

View File

@ -11,11 +11,18 @@ from models.status import SourceType
from integrations.site_fetcher import fetch_html_and_css from integrations.site_fetcher import fetch_html_and_css
from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
from common.db.base import fetchone
from services.facebook_audit import transform_for_storage as transform_facebook from services.facebook_audit import transform_for_storage as transform_facebook
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def _save_with_logo(info_id: int, data: dict) -> None:
await update_raw_info(info_id, data)
if data.get("profileImage"):
await update_raw_info_logo_url(info_id, data["profileImage"])
async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None: async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url) logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
await update_raw_info_status(info_id, "processing") await update_raw_info_status(info_id, "processing")
@ -24,7 +31,7 @@ async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> Non
await update_raw_info_status(info_id, "failed") await update_raw_info_status(info_id, "failed")
logger.warning("[instagram] failed run=%s", analysis_run_id) logger.warning("[instagram] failed run=%s", analysis_run_id)
return return
await update_raw_info(info_id, data) await _save_with_logo(info_id, data)
logger.info("[instagram] done run=%s", analysis_run_id) logger.info("[instagram] done run=%s", analysis_run_id)
@ -37,7 +44,7 @@ async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None
logger.warning("[facebook] failed run=%s", analysis_run_id) logger.warning("[facebook] failed run=%s", analysis_run_id)
return return
data = transform_facebook(data) data = transform_facebook(data)
await update_raw_info(info_id, data) await _save_with_logo(info_id, data)
logger.info("[facebook] done run=%s", analysis_run_id) logger.info("[facebook] done run=%s", analysis_run_id)
@ -61,7 +68,7 @@ async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None:
await update_raw_info_status(info_id, "failed") await update_raw_info_status(info_id, "failed")
logger.warning("[youtube] failed run=%s", analysis_run_id) logger.warning("[youtube] failed run=%s", analysis_run_id)
return return
await update_raw_info(info_id, data) await _save_with_logo(info_id, data)
logger.info("[youtube] done run=%s", analysis_run_id) logger.info("[youtube] done run=%s", analysis_run_id)
@ -101,7 +108,7 @@ async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None:
await update_raw_info_status(info_id, "failed") await update_raw_info_status(info_id, "failed")
logger.warning("[tiktok] failed run=%s", analysis_run_id) logger.warning("[tiktok] failed run=%s", analysis_run_id)
return return
await update_raw_info(info_id, data) await _save_with_logo(info_id, data)
logger.info("[tiktok] done run=%s", analysis_run_id) logger.info("[tiktok] done run=%s", analysis_run_id)
@ -125,8 +132,6 @@ async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> Non
async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None: async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
"""branding 단계 collect — HTML/CSS 한 번 fetch → logo URL(컬럼) + brand 색상(JSON).
mainpage 수집 결과 의존이라 main wave gather 끝난 호출."""
logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id) logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
raw = await select_run_raw_data(analysis_run_id) raw = await select_run_raw_data(analysis_run_id)
mainpage = raw.get("mainpage") or {} mainpage = raw.get("mainpage") or {}
@ -139,7 +144,13 @@ async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage") logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
if logo_url: if logo_url:
await update_raw_info_logo_url(info_id, logo_url) mainpage_row = await fetchone(
"SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
(analysis_run_id,),
)
if mainpage_row:
await update_raw_info_logo_url(mainpage_row["info_id"], logo_url)
payload: dict = {} payload: dict = {}
if css_colors: if css_colors:

View File

@ -5,6 +5,7 @@
from datetime import datetime, timezone from datetime import datetime, timezone
from common.utils import parse_ts from common.utils import parse_ts
from integrations.llm.schemas.report import FacebookAudit
def _humanize_age(days: int) -> str: def _humanize_age(days: int) -> str:
@ -74,8 +75,9 @@ def transform_for_storage(fb: dict | None) -> dict | None:
return out return out
def _page_patch(fb: dict) -> dict: def _page_patch(fb: dict, language: str, label: str) -> dict:
"""저장된 페북 페이지 → FacebookPage 스키마 필드 패치. 수치 지표는 수집 시점에 박혀있어 그대로 복사.""" """저장된 페북 페이지 → FacebookPage 스키마 필드 패치. 수치 지표는 수집 시점에 박혀있어 그대로 복사.
language/label 데이터 있을 때만 명시적으로 박음 template-copy KR 값을 EN 슬롯에 잘못 상속시키는 방지."""
p: dict = {} p: dict = {}
if fb.get("pageUrl"): p["url"] = p["link"] = fb["pageUrl"] if fb.get("pageUrl"): p["url"] = p["link"] = fb["pageUrl"]
if fb.get("pageName"): p["page_name"] = fb["pageName"] if fb.get("pageName"): p["page_name"] = fb["pageName"]
@ -87,10 +89,18 @@ def _page_patch(fb: dict) -> dict:
if fb.get("following") is not None: p["following"] = fb["following"] if fb.get("following") is not None: p["following"] = fb["following"]
for key in ("recent_post_age", "post_frequency", "engagement"): for key in ("recent_post_age", "post_frequency", "engagement"):
if fb.get(key): p[key] = fb[key] if fb.get(key): p[key] = fb[key]
if p:
p["language"] = language
p["label"] = label
return p return p
def build_facebook_pages(facebook: dict, facebook_en: dict) -> list[dict]: def build_facebook_audit(facebook: dict, facebook_en: dict, llm_pages: list[dict] | None = None) -> dict:
"""KR·EN 페북 페이지 패치 리스트 구성. 프롬프트가 pages를 [KR, EN] 순서로 만들므로 동일 순서 유지. """KR·EN 페북 페이지 구성. logo/logo_description 은 LLM Vision 결과(첫 페이지) 모든 페이지에 공통 적용,
패치는 제외 (해당 채널 데이터 없음 LLM도 페이지 만듦 인덱스 정렬 유지).""" 나머지 필드는 코드가 수집 데이터로 계산."""
return [pp for pp in (_page_patch(facebook), _page_patch(facebook_en)) if pp] llm_logo = {k: v for k, v in ((llm_pages or [{}])[0]).items() if k in {"logo", "logo_description"} and v}
pages = [{**llm_logo, **p} for p in (
_page_patch(facebook, "KR", "페이스북 KR"),
_page_patch(facebook_en, "EN", "페이스북 EN"),
) if p]
return FacebookAudit.model_validate({"pages": pages}).model_dump(exclude_unset=True)

View File

@ -1,6 +1,8 @@
"""Instagram audit 계정(KR·EN)을 수집 데이터로 구성. """Instagram audit 계정(KR·EN)을 수집 데이터로 구성.
fix (handle/followers/highlights/content_format ) 전부 코드에서 박는다 LLM 출력 무시.""" fix (handle/followers/highlights/content_format ) 전부 코드에서 박는다 LLM 출력 무시."""
from integrations.llm.schemas.report import InstagramAudit
_MEDIA = {"GraphImage": "이미지", "GraphSidecar": "카드뉴스", "GraphVideo": "영상/릴스"} _MEDIA = {"GraphImage": "이미지", "GraphSidecar": "카드뉴스", "GraphVideo": "영상/릴스"}
@ -38,11 +40,11 @@ def _account(data: dict, language: str, label: str, channel: str, channel_logos:
} }
def build_instagram_accounts(instagram: dict, instagram_en: dict, channel_logos: dict) -> list[dict]: def build_instagram_audit(instagram: dict, instagram_en: dict, channel_logos: dict) -> dict:
"""KR·EN 인스타 계정 리스트 구성 (username 있는 것만).""" """KR·EN 인스타 계정 리스트 구성 (username 있는 것만)."""
accounts: list[dict] = [] accounts: list[dict] = []
if instagram.get("username"): if instagram.get("username"):
accounts.append(_account(instagram, "KR", "인스타그램 KR", "Instagram", channel_logos)) accounts.append(_account(instagram, "KR", "인스타그램 KR", "Instagram", channel_logos))
if instagram_en.get("username"): if instagram_en.get("username"):
accounts.append(_account(instagram_en, "EN", "인스타그램 EN", "Instagram EN", channel_logos)) accounts.append(_account(instagram_en, "EN", "인스타그램 EN", "Instagram EN", channel_logos))
return accounts return InstagramAudit.model_validate({"accounts": accounts}).model_dump()

View File

@ -1,5 +1,7 @@
"""mockup 7개 역분석 — 채널 규모별 3개월/12개월 target 성장률 공식.""" """mockup 7개 역분석 — 채널 규모별 3개월/12개월 target 성장률 공식."""
from integrations.llm.schemas.report import KPIMetric
def _round_clean(n: int) -> int: def _round_clean(n: int) -> int:
if n < 100: return n if n < 100: return n
@ -91,4 +93,4 @@ def build_kpi_dashboard(
"target_12_month": f"{_round_clean(int(gu_reviews * rm12)):,}", "target_12_month": f"{_round_clean(int(gu_reviews * rm12)):,}",
}) })
return kpis return [KPIMetric.model_validate(k).model_dump() for k in kpis]