fix(branding): logo URL 컬럼 일관성 + 잘못된 로고 묘사 회피
- 채널 collectors (instagram/facebook/youtube/tiktok) 가 profileImage 를 raw_info.logo_url 컬럼에도 저장 - collect_brand_basics 가 공식 로고 URL 을 branding row 가 아니라 mainpage row 의 logo_url 컬럼에 저장 - select_branding_logo_url 가 mainpage row 의 logo_url 조회하도록 SQL 수정 - select_run_raw_data 가 logo_url 컬럼도 반환 (_logo_url 합성키) → branding._describe_channel_logos 가 컬럼에서 통일된 이름으로 읽음 - _describe_logo candidates 에서 firecrawl ogImage 제거 (이벤트 배너 잘못 잡히던 케이스) - extra_channels (tiktok/kakaotalk/naver_cafe) language='KR' 박음 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>main
parent
af61713697
commit
9a9ce1319f
|
|
@ -82,9 +82,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
||||||
extra_channels = [
|
extra_channels = [
|
||||||
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
|
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
|
||||||
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
|
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
|
||||||
(SourceType.TIKTOK, None, _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
|
(SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
|
||||||
(SourceType.KAKAOTALK, None, _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
|
(SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
|
||||||
(SourceType.NAVER_CAFE, None, _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
|
(SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
|
||||||
]
|
]
|
||||||
for source_type, language, url in extra_channels:
|
for source_type, language, url in extra_channels:
|
||||||
if url:
|
if url:
|
||||||
|
|
|
||||||
|
|
@ -63,9 +63,8 @@ async def select_run_sources(analysis_run_id: str) -> list[dict]:
|
||||||
|
|
||||||
|
|
||||||
async def select_run_raw_data(analysis_run_id: str) -> dict:
|
async def select_run_raw_data(analysis_run_id: str) -> dict:
|
||||||
# language='EN' 인 row 는 dict key 를 "<source_type>_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지).
|
|
||||||
rows = await fetchall(
|
rows = await fetchall(
|
||||||
"SELECT rs.source_type, rs.language, ri.raw_data"
|
"SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url"
|
||||||
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||||
" WHERE ri.analysis_run_id = %s",
|
" WHERE ri.analysis_run_id = %s",
|
||||||
(analysis_run_id,),
|
(analysis_run_id,),
|
||||||
|
|
@ -76,7 +75,10 @@ async def select_run_raw_data(analysis_run_id: str) -> dict:
|
||||||
key = row["source_type"]
|
key = row["source_type"]
|
||||||
if (row.get("language") or "").upper() == "EN":
|
if (row.get("language") or "").upper() == "EN":
|
||||||
key = f"{key}_en"
|
key = f"{key}_en"
|
||||||
result[key] = json.loads(raw) if isinstance(raw, str) else raw
|
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||||
|
if isinstance(data, dict) and row.get("logo_url"):
|
||||||
|
data["_logo_url"] = row["logo_url"]
|
||||||
|
result[key] = data
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -118,7 +120,7 @@ async def select_branding_info_id(analysis_run_id: str) -> int | None:
|
||||||
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
|
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
|
||||||
row = await fetchone(
|
row = await fetchone(
|
||||||
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
|
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
|
||||||
(analysis_run_id,),
|
(analysis_run_id,),
|
||||||
)
|
)
|
||||||
return (row or {}).get("logo_url")
|
return (row or {}).get("logo_url")
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@ async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -
|
||||||
candidates = [u for u in [
|
candidates = [u for u in [
|
||||||
column_logo,
|
column_logo,
|
||||||
branding_meta.get("logoUrl"),
|
branding_meta.get("logoUrl"),
|
||||||
branding_meta.get("ogImage"),
|
|
||||||
branding_meta.get("faviconUrl"),
|
branding_meta.get("faviconUrl"),
|
||||||
] if u]
|
] if u]
|
||||||
if homepage_url:
|
if homepage_url:
|
||||||
|
|
@ -59,7 +58,7 @@ async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: Vision
|
||||||
}
|
}
|
||||||
logos = [{"channel": label, "url": img}
|
logos = [{"channel": label, "url": img}
|
||||||
for key, label in _label.items()
|
for key, label in _label.items()
|
||||||
if (img := (raw.get(key) or {}).get("profileImage"))]
|
if (img := (raw.get(key) or {}).get("_logo_url"))]
|
||||||
if not logos:
|
if not logos:
|
||||||
logger.info("[channel_logos] skip — no channel profileImages")
|
logger.info("[channel_logos] skip — no channel profileImages")
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,18 @@ from models.status import SourceType
|
||||||
from integrations.site_fetcher import fetch_html_and_css
|
from integrations.site_fetcher import fetch_html_and_css
|
||||||
from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
|
from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
|
||||||
from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
|
from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
|
||||||
|
from common.db.base import fetchone
|
||||||
from services.facebook_audit import transform_for_storage as transform_facebook
|
from services.facebook_audit import transform_for_storage as transform_facebook
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def _save_with_logo(info_id: int, data: dict) -> None:
|
||||||
|
await update_raw_info(info_id, data)
|
||||||
|
if data.get("profileImage"):
|
||||||
|
await update_raw_info_logo_url(info_id, data["profileImage"])
|
||||||
|
|
||||||
|
|
||||||
async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
|
async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||||
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
|
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
|
||||||
await update_raw_info_status(info_id, "processing")
|
await update_raw_info_status(info_id, "processing")
|
||||||
|
|
@ -24,7 +31,7 @@ async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> Non
|
||||||
await update_raw_info_status(info_id, "failed")
|
await update_raw_info_status(info_id, "failed")
|
||||||
logger.warning("[instagram] failed run=%s", analysis_run_id)
|
logger.warning("[instagram] failed run=%s", analysis_run_id)
|
||||||
return
|
return
|
||||||
await update_raw_info(info_id, data)
|
await _save_with_logo(info_id, data)
|
||||||
logger.info("[instagram] done run=%s", analysis_run_id)
|
logger.info("[instagram] done run=%s", analysis_run_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -37,7 +44,7 @@ async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None
|
||||||
logger.warning("[facebook] failed run=%s", analysis_run_id)
|
logger.warning("[facebook] failed run=%s", analysis_run_id)
|
||||||
return
|
return
|
||||||
data = transform_facebook(data)
|
data = transform_facebook(data)
|
||||||
await update_raw_info(info_id, data)
|
await _save_with_logo(info_id, data)
|
||||||
logger.info("[facebook] done run=%s", analysis_run_id)
|
logger.info("[facebook] done run=%s", analysis_run_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -61,7 +68,7 @@ async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||||
await update_raw_info_status(info_id, "failed")
|
await update_raw_info_status(info_id, "failed")
|
||||||
logger.warning("[youtube] failed run=%s", analysis_run_id)
|
logger.warning("[youtube] failed run=%s", analysis_run_id)
|
||||||
return
|
return
|
||||||
await update_raw_info(info_id, data)
|
await _save_with_logo(info_id, data)
|
||||||
logger.info("[youtube] done run=%s", analysis_run_id)
|
logger.info("[youtube] done run=%s", analysis_run_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -101,7 +108,7 @@ async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||||
await update_raw_info_status(info_id, "failed")
|
await update_raw_info_status(info_id, "failed")
|
||||||
logger.warning("[tiktok] failed run=%s", analysis_run_id)
|
logger.warning("[tiktok] failed run=%s", analysis_run_id)
|
||||||
return
|
return
|
||||||
await update_raw_info(info_id, data)
|
await _save_with_logo(info_id, data)
|
||||||
logger.info("[tiktok] done run=%s", analysis_run_id)
|
logger.info("[tiktok] done run=%s", analysis_run_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -125,8 +132,6 @@ async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> Non
|
||||||
|
|
||||||
|
|
||||||
async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
|
async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
|
||||||
"""branding 단계 collect — HTML/CSS 한 번 fetch → logo URL(컬럼) + brand 색상(JSON).
|
|
||||||
mainpage 수집 결과 의존이라 main wave gather 끝난 뒤 호출."""
|
|
||||||
logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
|
logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
|
||||||
raw = await select_run_raw_data(analysis_run_id)
|
raw = await select_run_raw_data(analysis_run_id)
|
||||||
mainpage = raw.get("mainpage") or {}
|
mainpage = raw.get("mainpage") or {}
|
||||||
|
|
@ -139,7 +144,13 @@ async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
|
||||||
|
|
||||||
logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
|
logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
|
||||||
if logo_url:
|
if logo_url:
|
||||||
await update_raw_info_logo_url(info_id, logo_url)
|
mainpage_row = await fetchone(
|
||||||
|
"SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||||
|
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
|
||||||
|
(analysis_run_id,),
|
||||||
|
)
|
||||||
|
if mainpage_row:
|
||||||
|
await update_raw_info_logo_url(mainpage_row["info_id"], logo_url)
|
||||||
|
|
||||||
payload: dict = {}
|
payload: dict = {}
|
||||||
if css_colors:
|
if css_colors:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue