From 9a9ce1319f2926c0a627d10acc0966e4031356d4 Mon Sep 17 00:00:00 2001 From: Mina Choi Date: Tue, 2 Jun 2026 13:12:58 +0900 Subject: [PATCH] =?UTF-8?q?fix(branding):=20logo=20URL=20=EC=BB=AC?= =?UTF-8?q?=EB=9F=BC=20=EC=9D=BC=EA=B4=80=EC=84=B1=20+=20=EC=9E=98?= =?UTF-8?q?=EB=AA=BB=EB=90=9C=20=EB=A1=9C=EA=B3=A0=20=EB=AC=98=EC=82=AC=20?= =?UTF-8?q?=ED=9A=8C=ED=94=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 채널 collectors (instagram/facebook/youtube/tiktok) 가 profileImage 를 raw_info.logo_url 컬럼에도 저장 - collect_brand_basics 가 공식 로고 URL 을 branding row 가 아니라 mainpage row 의 logo_url 컬럼에 저장 - select_branding_logo_url 가 mainpage row 의 logo_url 조회하도록 SQL 수정 - select_run_raw_data 가 logo_url 컬럼도 반환 (_logo_url 합성키) → branding._describe_channel_logos 가 컬럼에서 통일된 이름으로 읽음 - _describe_logo candidates 에서 firecrawl ogImage 제거 (이벤트 배너 잘못 잡히던 케이스) - extra_channels (tiktok/kakaotalk/naver_cafe) language='KR' 박음 Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/analysis.py | 6 +++--- app/common/db/source.py | 10 ++++++---- app/services/branding.py | 3 +-- app/services/collect.py | 25 ++++++++++++++++++------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/app/api/analysis.py b/app/api/analysis.py index 8a77af1..d2e464c 100644 --- a/app/api/analysis.py +++ b/app/api/analysis.py @@ -82,9 +82,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks extra_channels = [ (SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")), (SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")), - (SourceType.TIKTOK, None, _with_scheme(body.channels.tiktok) or mock.get("tiktok")), - (SourceType.KAKAOTALK, None, _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")), - (SourceType.NAVER_CAFE, None, _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")), + (SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")), + (SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")), + (SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")), ] for source_type, language, url in extra_channels: if url: diff --git a/app/common/db/source.py b/app/common/db/source.py index 871778f..de91c98 100644 --- a/app/common/db/source.py +++ b/app/common/db/source.py @@ -63,9 +63,8 @@ async def select_run_sources(analysis_run_id: str) -> list[dict]: async def select_run_raw_data(analysis_run_id: str) -> dict: - # language='EN' 인 row 는 dict key 를 "_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지). rows = await fetchall( - "SELECT rs.source_type, rs.language, ri.raw_data" + "SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url" " FROM raw_info ri JOIN remote_source rs USING (source_id)" " WHERE ri.analysis_run_id = %s", (analysis_run_id,), @@ -76,7 +75,10 @@ async def select_run_raw_data(analysis_run_id: str) -> dict: key = row["source_type"] if (row.get("language") or "").upper() == "EN": key = f"{key}_en" - result[key] = json.loads(raw) if isinstance(raw, str) else raw + data = json.loads(raw) if isinstance(raw, str) else (raw or {}) + if isinstance(data, dict) and row.get("logo_url"): + data["_logo_url"] = row["logo_url"] + result[key] = data return result @@ -118,7 +120,7 @@ async def select_branding_info_id(analysis_run_id: str) -> int | None: async def select_branding_logo_url(analysis_run_id: str) -> str | None: row = await fetchone( "SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)" - " WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1", + " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1", (analysis_run_id,), ) return (row or {}).get("logo_url") diff --git a/app/services/branding.py b/app/services/branding.py index e85dd7d..c50fab7 100644 --- a/app/services/branding.py +++ b/app/services/branding.py @@ -23,7 +23,6 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) - candidates = [u for u in [ column_logo, branding_meta.get("logoUrl"), - branding_meta.get("ogImage"), branding_meta.get("faviconUrl"), ] if u] if homepage_url: @@ -59,7 +58,7 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision } logos = [{"channel": label, "url": img} for key, label in _label.items() - if (img := (raw.get(key) or {}).get("profileImage"))] + if (img := (raw.get(key) or {}).get("_logo_url"))] if not logos: logger.info("[channel_logos] skip — no channel profileImages") return diff --git a/app/services/collect.py b/app/services/collect.py index fca9728..a7a5ee6 100644 --- a/app/services/collect.py +++ b/app/services/collect.py @@ -11,11 +11,18 @@ from models.status import SourceType from integrations.site_fetcher import fetch_html_and_css from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data +from common.db.base import fetchone from services.facebook_audit import transform_for_storage as transform_facebook logger = logging.getLogger(__name__) +async def _save_with_logo(info_id: int, data: dict) -> None: + await update_raw_info(info_id, data) + if data.get("profileImage"): + await update_raw_info_logo_url(info_id, data["profileImage"]) + + async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None: logger.info("[instagram] start run=%s url=%s", analysis_run_id, url) await update_raw_info_status(info_id, "processing") @@ -24,7 +31,7 @@ async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> Non await update_raw_info_status(info_id, "failed") logger.warning("[instagram] failed run=%s", analysis_run_id) return - await update_raw_info(info_id, data) + await _save_with_logo(info_id, data) logger.info("[instagram] done run=%s", analysis_run_id) @@ -37,7 +44,7 @@ async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None logger.warning("[facebook] failed run=%s", analysis_run_id) return data = transform_facebook(data) - await update_raw_info(info_id, data) + await _save_with_logo(info_id, data) logger.info("[facebook] done run=%s", analysis_run_id) @@ -61,7 +68,7 @@ async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None: await update_raw_info_status(info_id, "failed") logger.warning("[youtube] failed run=%s", analysis_run_id) return - await update_raw_info(info_id, data) + await _save_with_logo(info_id, data) logger.info("[youtube] done run=%s", analysis_run_id) @@ -101,7 +108,7 @@ async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None: await update_raw_info_status(info_id, "failed") logger.warning("[tiktok] failed run=%s", analysis_run_id) return - await update_raw_info(info_id, data) + await _save_with_logo(info_id, data) logger.info("[tiktok] done run=%s", analysis_run_id) @@ -125,8 +132,6 @@ async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> Non async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None: - """branding 단계 collect — HTML/CSS 한 번 fetch → logo URL(컬럼) + brand 색상(JSON). - mainpage 수집 결과 의존이라 main wave gather 끝난 뒤 호출.""" logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id) raw = await select_run_raw_data(analysis_run_id) mainpage = raw.get("mainpage") or {} @@ -139,7 +144,13 @@ async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None: logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage") if logo_url: - await update_raw_info_logo_url(info_id, logo_url) + mainpage_row = await fetchone( + "SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)" + " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1", + (analysis_run_id,), + ) + if mainpage_row: + await update_raw_info_logo_url(mainpage_row["info_id"], logo_url) payload: dict = {} if css_colors: