fix(branding): logo URL 컬럼 일관성 + 잘못된 로고 묘사 회피

- 채널 collectors (instagram/facebook/youtube/tiktok) 가 profileImage 를 raw_info.logo_url 컬럼에도 저장
- collect_brand_basics 가 공식 로고 URL 을 branding row 가 아니라 mainpage row 의 logo_url 컬럼에 저장
- select_branding_logo_url 가 mainpage row 의 logo_url 조회하도록 SQL 수정
- select_run_raw_data 가 logo_url 컬럼도 반환 (_logo_url 합성키) → branding._describe_channel_logos 가 컬럼에서 통일된 이름으로 읽음
- _describe_logo candidates 에서 firecrawl ogImage 제거 (이벤트 배너 잘못 잡히던 케이스)
- extra_channels (tiktok/kakaotalk/naver_cafe) language='KR' 박음

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
main
Mina Choi 2026-06-02 13:12:58 +09:00
parent af61713697
commit 9a9ce1319f
4 changed files with 28 additions and 16 deletions

View File

@ -82,9 +82,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
extra_channels = [
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
(SourceType.TIKTOK, None, _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
(SourceType.KAKAOTALK, None, _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
(SourceType.NAVER_CAFE, None, _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
(SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
(SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
(SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
]
for source_type, language, url in extra_channels:
if url:

View File

@ -63,9 +63,8 @@ async def select_run_sources(analysis_run_id: str) -> list[dict]:
async def select_run_raw_data(analysis_run_id: str) -> dict:
# language='EN' 인 row 는 dict key 를 "<source_type>_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지).
rows = await fetchall(
"SELECT rs.source_type, rs.language, ri.raw_data"
"SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url"
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s",
(analysis_run_id,),
@ -76,7 +75,10 @@ async def select_run_raw_data(analysis_run_id: str) -> dict:
key = row["source_type"]
if (row.get("language") or "").upper() == "EN":
key = f"{key}_en"
result[key] = json.loads(raw) if isinstance(raw, str) else raw
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
if isinstance(data, dict) and row.get("logo_url"):
data["_logo_url"] = row["logo_url"]
result[key] = data
return result
@ -118,7 +120,7 @@ async def select_branding_info_id(analysis_run_id: str) -> int | None:
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
row = await fetchone(
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
(analysis_run_id,),
)
return (row or {}).get("logo_url")

View File

@ -23,7 +23,6 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -
candidates = [u for u in [
column_logo,
branding_meta.get("logoUrl"),
branding_meta.get("ogImage"),
branding_meta.get("faviconUrl"),
] if u]
if homepage_url:
@ -59,7 +58,7 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision
}
logos = [{"channel": label, "url": img}
for key, label in _label.items()
if (img := (raw.get(key) or {}).get("profileImage"))]
if (img := (raw.get(key) or {}).get("_logo_url"))]
if not logos:
logger.info("[channel_logos] skip — no channel profileImages")
return

View File

@ -11,11 +11,18 @@ from models.status import SourceType
from integrations.site_fetcher import fetch_html_and_css
from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
from common.db.base import fetchone
from services.facebook_audit import transform_for_storage as transform_facebook
logger = logging.getLogger(__name__)
async def _save_with_logo(info_id: int, data: dict) -> None:
await update_raw_info(info_id, data)
if data.get("profileImage"):
await update_raw_info_logo_url(info_id, data["profileImage"])
async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
await update_raw_info_status(info_id, "processing")
@ -24,7 +31,7 @@ async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> Non
await update_raw_info_status(info_id, "failed")
logger.warning("[instagram] failed run=%s", analysis_run_id)
return
await update_raw_info(info_id, data)
await _save_with_logo(info_id, data)
logger.info("[instagram] done run=%s", analysis_run_id)
@ -37,7 +44,7 @@ async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None
logger.warning("[facebook] failed run=%s", analysis_run_id)
return
data = transform_facebook(data)
await update_raw_info(info_id, data)
await _save_with_logo(info_id, data)
logger.info("[facebook] done run=%s", analysis_run_id)
@ -61,7 +68,7 @@ async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None:
await update_raw_info_status(info_id, "failed")
logger.warning("[youtube] failed run=%s", analysis_run_id)
return
await update_raw_info(info_id, data)
await _save_with_logo(info_id, data)
logger.info("[youtube] done run=%s", analysis_run_id)
@ -101,7 +108,7 @@ async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None:
await update_raw_info_status(info_id, "failed")
logger.warning("[tiktok] failed run=%s", analysis_run_id)
return
await update_raw_info(info_id, data)
await _save_with_logo(info_id, data)
logger.info("[tiktok] done run=%s", analysis_run_id)
@ -125,8 +132,6 @@ async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> Non
async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
"""branding 단계 collect — HTML/CSS 한 번 fetch → logo URL(컬럼) + brand 색상(JSON).
mainpage 수집 결과 의존이라 main wave gather 끝난 호출."""
logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
raw = await select_run_raw_data(analysis_run_id)
mainpage = raw.get("mainpage") or {}
@ -139,7 +144,13 @@ async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
if logo_url:
await update_raw_info_logo_url(info_id, logo_url)
mainpage_row = await fetchone(
"SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
(analysis_run_id,),
)
if mainpage_row:
await update_raw_info_logo_url(mainpage_row["info_id"], logo_url)
payload: dict = {}
if css_colors: