fix(branding): logo URL 컬럼 일관성 + 잘못된 로고 묘사 회피
- 채널 collectors (instagram/facebook/youtube/tiktok) 가 profileImage 를 raw_info.logo_url 컬럼에도 저장 - collect_brand_basics 가 공식 로고 URL 을 branding row 가 아니라 mainpage row 의 logo_url 컬럼에 저장 - select_branding_logo_url 가 mainpage row 의 logo_url 조회하도록 SQL 수정 - select_run_raw_data 가 logo_url 컬럼도 반환 (_logo_url 합성키) → branding._describe_channel_logos 가 컬럼에서 통일된 이름으로 읽음 - _describe_logo candidates 에서 firecrawl ogImage 제거 (이벤트 배너 잘못 잡히던 케이스) - extra_channels (tiktok/kakaotalk/naver_cafe) language='KR' 박음 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>main
parent
af61713697
commit
9a9ce1319f
|
|
@ -82,9 +82,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
|||
extra_channels = [
|
||||
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
|
||||
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
|
||||
(SourceType.TIKTOK, None, _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
|
||||
(SourceType.KAKAOTALK, None, _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
|
||||
(SourceType.NAVER_CAFE, None, _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
|
||||
(SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
|
||||
(SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
|
||||
(SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
|
||||
]
|
||||
for source_type, language, url in extra_channels:
|
||||
if url:
|
||||
|
|
|
|||
|
|
@ -63,9 +63,8 @@ async def select_run_sources(analysis_run_id: str) -> list[dict]:
|
|||
|
||||
|
||||
async def select_run_raw_data(analysis_run_id: str) -> dict:
|
||||
# language='EN' 인 row 는 dict key 를 "<source_type>_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지).
|
||||
rows = await fetchall(
|
||||
"SELECT rs.source_type, rs.language, ri.raw_data"
|
||||
"SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url"
|
||||
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
|
|
@ -76,7 +75,10 @@ async def select_run_raw_data(analysis_run_id: str) -> dict:
|
|||
key = row["source_type"]
|
||||
if (row.get("language") or "").upper() == "EN":
|
||||
key = f"{key}_en"
|
||||
result[key] = json.loads(raw) if isinstance(raw, str) else raw
|
||||
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||
if isinstance(data, dict) and row.get("logo_url"):
|
||||
data["_logo_url"] = row["logo_url"]
|
||||
result[key] = data
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -118,7 +120,7 @@ async def select_branding_info_id(analysis_run_id: str) -> int | None:
|
|||
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
|
||||
row = await fetchone(
|
||||
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return (row or {}).get("logo_url")
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -
|
|||
candidates = [u for u in [
|
||||
column_logo,
|
||||
branding_meta.get("logoUrl"),
|
||||
branding_meta.get("ogImage"),
|
||||
branding_meta.get("faviconUrl"),
|
||||
] if u]
|
||||
if homepage_url:
|
||||
|
|
@ -59,7 +58,7 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision
|
|||
}
|
||||
logos = [{"channel": label, "url": img}
|
||||
for key, label in _label.items()
|
||||
if (img := (raw.get(key) or {}).get("profileImage"))]
|
||||
if (img := (raw.get(key) or {}).get("_logo_url"))]
|
||||
if not logos:
|
||||
logger.info("[channel_logos] skip — no channel profileImages")
|
||||
return
|
||||
|
|
|
|||
|
|
@ -11,11 +11,18 @@ from models.status import SourceType
|
|||
from integrations.site_fetcher import fetch_html_and_css
|
||||
from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
|
||||
from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
|
||||
from common.db.base import fetchone
|
||||
from services.facebook_audit import transform_for_storage as transform_facebook
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _save_with_logo(info_id: int, data: dict) -> None:
|
||||
await update_raw_info(info_id, data)
|
||||
if data.get("profileImage"):
|
||||
await update_raw_info_logo_url(info_id, data["profileImage"])
|
||||
|
||||
|
||||
async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
|
|
@ -24,7 +31,7 @@ async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> Non
|
|||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[instagram] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await update_raw_info(info_id, data)
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[instagram] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
|
|
@ -37,7 +44,7 @@ async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None
|
|||
logger.warning("[facebook] failed run=%s", analysis_run_id)
|
||||
return
|
||||
data = transform_facebook(data)
|
||||
await update_raw_info(info_id, data)
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[facebook] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
|
|
@ -61,7 +68,7 @@ async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None:
|
|||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[youtube] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await update_raw_info(info_id, data)
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[youtube] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
|
|
@ -101,7 +108,7 @@ async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None:
|
|||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[tiktok] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await update_raw_info(info_id, data)
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[tiktok] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
|
|
@ -125,8 +132,6 @@ async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> Non
|
|||
|
||||
|
||||
async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
|
||||
"""branding 단계 collect — HTML/CSS 한 번 fetch → logo URL(컬럼) + brand 색상(JSON).
|
||||
mainpage 수집 결과 의존이라 main wave gather 끝난 뒤 호출."""
|
||||
logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
mainpage = raw.get("mainpage") or {}
|
||||
|
|
@ -139,7 +144,13 @@ async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
|
|||
|
||||
logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
|
||||
if logo_url:
|
||||
await update_raw_info_logo_url(info_id, logo_url)
|
||||
mainpage_row = await fetchone(
|
||||
"SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
if mainpage_row:
|
||||
await update_raw_info_logo_url(mainpage_row["info_id"], logo_url)
|
||||
|
||||
payload: dict = {}
|
||||
if css_colors:
|
||||
|
|
|
|||
Loading…
Reference in New Issue