refactor(branding): logo URL 을 raw_info.logo_url 컬럼으로 분리

- collect_brand_assets: Vision 결과의 logo_images 를 JSON 에서 제거하고
  진짜 로고(logo/og 매칭) 인 경우만 raw_info.logo_url 컬럼에 저장.
  favicon-only 매칭은 컬럼 저장 X (옛 logic 동일).
- analysis._build_overrides: select_branding_logo_url 로 컬럼 읽어
  ClinicSnapshot.logo_images 를 horizontal=logo_url 로 재구성.
- branding raw_data 가 "사실 데이터(URL/hex)" vs "Vision 분석 텍스트(묘사)"
  섞이던 문제 일부 해소 — URL 은 컬럼, 텍스트만 JSON 에 잔존.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
main
Mina Choi 2026-06-01 17:01:19 +09:00
parent 009d95377a
commit b844951ad8
4 changed files with 35 additions and 10 deletions

View File

@ -3,6 +3,7 @@ from common.db.hospital import select_hospital, update_hospital_status, insert_h
from common.db.source import ( from common.db.source import (
insert_source, select_source_mainpage, select_source_by_type, insert_source, select_source_mainpage, select_source_by_type,
insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge, insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge,
update_raw_info_logo_url, select_branding_logo_url,
select_raw_info_data, select_raw_info_data,
select_run_sources, select_run_raw_data, select_run_source_raw, select_run_sources, select_run_raw_data, select_run_source_raw,
select_run_mainpage_url, select_run_mainpage_url,

View File

@ -98,6 +98,23 @@ async def select_run_source_raw(
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"] return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
async def update_raw_info_logo_url(info_id: int, logo_url: str) -> None:
"""raw_info.logo_url 컬럼에 로고 URL 저장 (JSON raw_data 와 분리해 컬럼 인덱스/조회 용이)."""
await execute(
"UPDATE raw_info SET logo_url = %s WHERE info_id = %s",
(logo_url, info_id),
)
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
row = await fetchone(
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
(analysis_run_id,),
)
return (row or {}).get("logo_url")
async def update_raw_info_merge(info_id: int, patch: dict) -> None: async def update_raw_info_merge(info_id: int, patch: dict) -> None:
"""raw_info.raw_data 를 read-modify-write 로 top-level 머지. """raw_info.raw_data 를 read-modify-write 로 top-level 머지.
source 단계별로 (: branding brandAssets channelLogos) 키를 덧붙일 사용.""" source 단계별로 (: branding brandAssets channelLogos) 키를 덧붙일 사용."""

View File

@ -4,7 +4,7 @@ import re
from datetime import datetime from datetime import datetime
from urllib.parse import urlparse from urllib.parse import urlparse
from common.db.run import select_run, update_run_report, update_run_plan from common.db.run import select_run, update_run_report, update_run_plan
from common.db.source import select_run_raw_data from common.db.source import select_run_raw_data, select_branding_logo_url
from common.db.market import select_market from common.db.market import select_market
from integrations.llm.llm_service import LLMService from integrations.llm.llm_service import LLMService
from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt
@ -100,7 +100,7 @@ async def generate_plan(analysis_run_id: str) -> PlanOutput:
return await LLMService(provider="perplexity").generate(plan_prompt, input_data) return await LLMService(provider="perplexity").generate(plan_prompt, input_data)
def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict) -> dict: def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict, logo_url: str | None) -> dict:
snapshot: dict = {} snapshot: dict = {}
doctors = gangnam_unni.get("doctors", []) doctors = gangnam_unni.get("doctors", [])
lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None
@ -121,8 +121,9 @@ def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dic
"rating": lead.get("rating"), "rating": lead.get("rating"),
"review_count": lead.get("reviews"), "review_count": lead.get("reviews"),
} }
# branding.brandAssets 에서 logo_images / brand_colors 강제 주입. LLM 이 프롬프트 가드 무시하고 null 로 두는 케이스 차단. # logo URL 은 raw_info.logo_url 컬럼에서, brand_colors 는 JSON 에서 강제 주입. LLM 의 null 처리 차단.
if brand_assets.get("logo_images"): snapshot["logo_images"] = brand_assets["logo_images"] if logo_url:
snapshot["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
if brand_assets.get("brand_colors"): snapshot["brand_colors"] = brand_assets["brand_colors"] if brand_assets.get("brand_colors"): snapshot["brand_colors"] = brand_assets["brand_colors"]
return ClinicSnapshot.model_validate(snapshot).model_dump() return ClinicSnapshot.model_validate(snapshot).model_dump()
@ -268,8 +269,9 @@ async def _build_overrides(analysis_run_id: str) -> dict:
naver_cafe = raw.get("naver_cafe", {}) or {} naver_cafe = raw.get("naver_cafe", {}) or {}
brand_assets = branding.get("brandAssets") or {} brand_assets = branding.get("brandAssets") or {}
channel_logos = branding.get("channelLogos") or {} channel_logos = branding.get("channelLogos") or {}
logo_url = await select_branding_logo_url(analysis_run_id)
snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets) snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url)
yt_patch: dict = await _build_youtube_audit(youtube) yt_patch: dict = await _build_youtube_audit(youtube)
# ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ────────────── # ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ──────────────

View File

@ -1,7 +1,7 @@
import logging import logging
import os import os
from urllib.parse import urlparse from urllib.parse import urlparse
from common.db.source import select_run_raw_data, update_raw_info_merge from common.db.source import select_run_raw_data, update_raw_info_merge, update_raw_info_logo_url
from integrations.vision import VisionClient from integrations.vision import VisionClient
from integrations.color_extractor import extract_brand_assets_from_site from integrations.color_extractor import extract_brand_assets_from_site
@ -57,9 +57,6 @@ async def collect_brand_assets(analysis_run_id: str, info_id: int) -> None:
if result: if result:
used_kind = kind used_kind = kind
break break
# favicon 으로만 분석된 경우 진짜 로고가 아니므로 logo URL 은 박지 않음 (묘사는 OK)
if result and used_kind == "favicon" and result.get("logo_images"):
result["logo_images"] = {"circle": None, "horizontal": None, "korean": None}
elif not api_key: elif not api_key:
logger.info("[brand_assets] GEMINI_API_KEY not set — 색상만 저장, Vision 묘사 skip") logger.info("[brand_assets] GEMINI_API_KEY not set — 색상만 저장, Vision 묘사 skip")
@ -71,10 +68,18 @@ async def collect_brand_assets(analysis_run_id: str, info_id: int) -> None:
elif result: elif result:
result["color_source"] = "vision" result["color_source"] = "vision"
# 5. logo URL 은 JSON 이 아니라 raw_info.logo_url 컬럼에 분리 저장 (raw vs 분석 텍스트 분리).
# favicon 으로만 매칭된 경우 진짜 로고 아니라 컬럼 저장 X.
result.pop("logo_images", None)
column_logo_url = logo_url if used_kind in ("logo", "og") and logo_url else None
if column_logo_url:
await update_raw_info_logo_url(info_id, column_logo_url)
if result: if result:
result["logo_source"] = used_kind or "none" result["logo_source"] = used_kind or "none"
await update_raw_info_merge(info_id, {"brandAssets": result}) await update_raw_info_merge(info_id, {"brandAssets": result})
logger.info("[brand_assets] done keys=%s", list(result.keys()) if result else None) logger.info("[brand_assets] done logo_url=%s keys=%s",
bool(column_logo_url), list(result.keys()) if result else None)
async def collect_channel_logos(analysis_run_id: str, info_id: int) -> None: async def collect_channel_logos(analysis_run_id: str, info_id: int) -> None: