refactor(branding): logo URL 을 raw_info.logo_url 컬럼으로 분리
- collect_brand_assets: Vision 결과의 logo_images 를 JSON 에서 제거하고 진짜 로고(logo/og 매칭) 인 경우만 raw_info.logo_url 컬럼에 저장. favicon-only 매칭은 컬럼 저장 X (옛 logic 동일). - analysis._build_overrides: select_branding_logo_url 로 컬럼 읽어 ClinicSnapshot.logo_images 를 horizontal=logo_url 로 재구성. - branding raw_data 가 "사실 데이터(URL/hex)" vs "Vision 분석 텍스트(묘사)" 섞이던 문제 일부 해소 — URL 은 컬럼, 텍스트만 JSON 에 잔존. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>main
parent
009d95377a
commit
b844951ad8
|
|
@ -3,6 +3,7 @@ from common.db.hospital import select_hospital, update_hospital_status, insert_h
|
|||
from common.db.source import (
|
||||
insert_source, select_source_mainpage, select_source_by_type,
|
||||
insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge,
|
||||
update_raw_info_logo_url, select_branding_logo_url,
|
||||
select_raw_info_data,
|
||||
select_run_sources, select_run_raw_data, select_run_source_raw,
|
||||
select_run_mainpage_url,
|
||||
|
|
|
|||
|
|
@ -98,6 +98,23 @@ async def select_run_source_raw(
|
|||
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
||||
|
||||
|
||||
async def update_raw_info_logo_url(info_id: int, logo_url: str) -> None:
|
||||
"""raw_info.logo_url 컬럼에 로고 URL 저장 (JSON raw_data 와 분리해 컬럼 인덱스/조회 용이)."""
|
||||
await execute(
|
||||
"UPDATE raw_info SET logo_url = %s WHERE info_id = %s",
|
||||
(logo_url, info_id),
|
||||
)
|
||||
|
||||
|
||||
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
|
||||
row = await fetchone(
|
||||
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return (row or {}).get("logo_url")
|
||||
|
||||
|
||||
async def update_raw_info_merge(info_id: int, patch: dict) -> None:
|
||||
"""raw_info.raw_data 를 read-modify-write 로 top-level 머지.
|
||||
한 source 가 단계별로 (예: branding 의 brandAssets → channelLogos) 키를 덧붙일 때 사용."""
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import re
|
|||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from common.db.run import select_run, update_run_report, update_run_plan
|
||||
from common.db.source import select_run_raw_data
|
||||
from common.db.source import select_run_raw_data, select_branding_logo_url
|
||||
from common.db.market import select_market
|
||||
from integrations.llm.llm_service import LLMService
|
||||
from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt
|
||||
|
|
@ -100,7 +100,7 @@ async def generate_plan(analysis_run_id: str) -> PlanOutput:
|
|||
return await LLMService(provider="perplexity").generate(plan_prompt, input_data)
|
||||
|
||||
|
||||
def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict) -> dict:
|
||||
def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict, logo_url: str | None) -> dict:
|
||||
snapshot: dict = {}
|
||||
doctors = gangnam_unni.get("doctors", [])
|
||||
lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None
|
||||
|
|
@ -121,8 +121,9 @@ def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dic
|
|||
"rating": lead.get("rating"),
|
||||
"review_count": lead.get("reviews"),
|
||||
}
|
||||
# branding.brandAssets 에서 logo_images / brand_colors 강제 주입. LLM 이 프롬프트 가드 무시하고 null 로 두는 케이스 차단.
|
||||
if brand_assets.get("logo_images"): snapshot["logo_images"] = brand_assets["logo_images"]
|
||||
# logo URL 은 raw_info.logo_url 컬럼에서, brand_colors 는 JSON 에서 강제 주입. LLM 의 null 처리 차단.
|
||||
if logo_url:
|
||||
snapshot["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
|
||||
if brand_assets.get("brand_colors"): snapshot["brand_colors"] = brand_assets["brand_colors"]
|
||||
return ClinicSnapshot.model_validate(snapshot).model_dump()
|
||||
|
||||
|
|
@ -268,8 +269,9 @@ async def _build_overrides(analysis_run_id: str) -> dict:
|
|||
naver_cafe = raw.get("naver_cafe", {}) or {}
|
||||
brand_assets = branding.get("brandAssets") or {}
|
||||
channel_logos = branding.get("channelLogos") or {}
|
||||
logo_url = await select_branding_logo_url(analysis_run_id)
|
||||
|
||||
snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets)
|
||||
snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url)
|
||||
yt_patch: dict = await _build_youtube_audit(youtube)
|
||||
|
||||
# ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ──────────────
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import logging
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from common.db.source import select_run_raw_data, update_raw_info_merge
|
||||
from common.db.source import select_run_raw_data, update_raw_info_merge, update_raw_info_logo_url
|
||||
from integrations.vision import VisionClient
|
||||
from integrations.color_extractor import extract_brand_assets_from_site
|
||||
|
||||
|
|
@ -57,9 +57,6 @@ async def collect_brand_assets(analysis_run_id: str, info_id: int) -> None:
|
|||
if result:
|
||||
used_kind = kind
|
||||
break
|
||||
# favicon 으로만 분석된 경우 진짜 로고가 아니므로 logo URL 은 박지 않음 (묘사는 OK)
|
||||
if result and used_kind == "favicon" and result.get("logo_images"):
|
||||
result["logo_images"] = {"circle": None, "horizontal": None, "korean": None}
|
||||
elif not api_key:
|
||||
logger.info("[brand_assets] GEMINI_API_KEY not set — 색상만 저장, Vision 묘사 skip")
|
||||
|
||||
|
|
@ -71,10 +68,18 @@ async def collect_brand_assets(analysis_run_id: str, info_id: int) -> None:
|
|||
elif result:
|
||||
result["color_source"] = "vision"
|
||||
|
||||
# 5. logo URL 은 JSON 이 아니라 raw_info.logo_url 컬럼에 분리 저장 (raw vs 분석 텍스트 분리).
|
||||
# favicon 으로만 매칭된 경우 진짜 로고 아니라 컬럼 저장 X.
|
||||
result.pop("logo_images", None)
|
||||
column_logo_url = logo_url if used_kind in ("logo", "og") and logo_url else None
|
||||
if column_logo_url:
|
||||
await update_raw_info_logo_url(info_id, column_logo_url)
|
||||
|
||||
if result:
|
||||
result["logo_source"] = used_kind or "none"
|
||||
await update_raw_info_merge(info_id, {"brandAssets": result})
|
||||
logger.info("[brand_assets] done keys=%s", list(result.keys()) if result else None)
|
||||
logger.info("[brand_assets] done logo_url=%s keys=%s",
|
||||
bool(column_logo_url), list(result.keys()) if result else None)
|
||||
|
||||
|
||||
async def collect_channel_logos(analysis_run_id: str, info_id: int) -> None:
|
||||
|
|
|
|||
Loading…
Reference in New Issue