diff --git a/app/common/db/__init__.py b/app/common/db/__init__.py
index 8f20d0d..6ea6cf7 100644
--- a/app/common/db/__init__.py
+++ b/app/common/db/__init__.py
@@ -3,14 +3,14 @@ from common.db.hospital import select_hospital, update_hospital_status, insert_h
from common.db.source import (
insert_source, select_source_mainpage, select_source_by_type,
insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge,
- update_raw_info_logo_url, select_branding_logo_url,
+ update_raw_info_logo_url, select_branding_logo_url, select_branding_info_id,
select_raw_info_data,
select_run_sources, select_run_raw_data, select_run_source_raw,
select_run_mainpage_url,
)
from common.db.run import (
insert_run, select_run, select_run_status, update_run_status,
- update_run_report, update_run_plan, select_run_with_clinic,
+ update_run_report, update_run_plan, select_run_with_clinic, select_run_report_data,
)
from common.db.market import upsert_market_status, upsert_market_result, select_market
from common.db.file_data import insert_file, select_run_files, select_file, delete_file
diff --git a/app/common/db/run.py b/app/common/db/run.py
index cbf188c..4169f5f 100644
--- a/app/common/db/run.py
+++ b/app/common/db/run.py
@@ -22,6 +22,18 @@ async def select_run(analysis_run_id: str) -> dict | None:
)
+async def select_run_report_data(analysis_run_id: str) -> dict | None:
+ """report 결과가 필요할 때만 호출. raw JSON 파싱해서 dict 반환."""
+ import json
+ row = await fetchone(
+ "SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s",
+ (analysis_run_id,),
+ )
+ if not row or not row["report_data"]:
+ return None
+ return json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"]
+
+
async def select_run_status(analysis_run_id: str) -> str | None:
row = await fetchone(
"SELECT status FROM analysis_runs WHERE analysis_run_id = %s",
diff --git a/app/common/db/source.py b/app/common/db/source.py
index 3084685..871778f 100644
--- a/app/common/db/source.py
+++ b/app/common/db/source.py
@@ -106,6 +106,15 @@ async def update_raw_info_logo_url(info_id: int, logo_url: str) -> None:
)
+async def select_branding_info_id(analysis_run_id: str) -> int | None:
+ row = await fetchone(
+ "SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
+ " WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
+ (analysis_run_id,),
+ )
+ return (row or {}).get("info_id")
+
+
async def select_branding_logo_url(analysis_run_id: str) -> str | None:
row = await fetchone(
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
diff --git a/app/integrations/color_extractor.py b/app/integrations/color_extractor.py
deleted file mode 100644
index 6419061..0000000
--- a/app/integrations/color_extractor.py
+++ /dev/null
@@ -1,275 +0,0 @@
-"""홈페이지 HTML/CSS에서 hex 색상 직접 추출 + 빈도 기반 brand palette 산출.
-
-Vision LLM에 의존하지 않고 페이지의 실제 CSS 값을 정규식으로 잡음.
-로고만 분석하는 Vision보다 사이트 전체 컬러 시스템 (primary/secondary/background/text)을 더 정확히 추출.
-"""
-import logging
-import re
-import ssl
-from collections import Counter
-from urllib.parse import urljoin, urlparse
-import httpx
-
-logger = logging.getLogger(__name__)
-
-
-def _make_ssl_context() -> ssl.SSLContext:
- """오래된 한국 의료 사이트들이 SSL DH_KEY_TOO_SMALL / cipher 약함 등으로 차단되는 문제 우회.
- 보안 등급 1로 낮춤 + cert 검증 유지."""
- ctx = ssl.create_default_context()
- try:
- ctx.set_ciphers("DEFAULT@SECLEVEL=1")
- except ssl.SSLError:
- pass
- return ctx
-
-
-async def _fetch_html(url: str, timeout: float = 20.0) -> tuple[int, str]:
- """SSL/검증 단계별 fallback으로 HTML 받기. 그랜드/톡스앤필 같은 oldsite 대응."""
- headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
- # 1차: 표준 검증
- try:
- async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers) as c:
- r = await c.get(url)
- return r.status_code, r.text
- except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
- logger.info("[fetch] %s standard SSL failed: %s — fallback to weak cipher", url, e)
- # 2차: 약한 cipher 허용
- try:
- async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=_make_ssl_context()) as c:
- r = await c.get(url)
- return r.status_code, r.text
- except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
- logger.info("[fetch] %s weak cipher failed: %s — fallback to verify=False", url, e)
- # 3차: SSL 검증 끔 (host mismatch 등)
- try:
- async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=False) as c:
- r = await c.get(url)
- return r.status_code, r.text
- except Exception as e:
- logger.warning("[fetch] %s all fallbacks failed: %s", url, e)
- return 0, ""
-
-LOGO_IMG_PATTERNS = [
- # 1)
- re.compile(r'
]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
- # 2)
- re.compile(r'
]*\bsrc=["\']([^"\']+)["\'][^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\']', re.IGNORECASE),
- # 3)
- re.compile(r'
]*\bid=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
- # 4)
- re.compile(r'
]*\balt=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
- # 5) <...nested...>
- re.compile(r'<(?:a|h[1-6]|div|span)[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*>(?:[^<]|<(?!img))*
]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE | re.DOTALL),
- # 6) inline background-image:
- re.compile(r'<(?:a|div|span|h[1-6])[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)', re.IGNORECASE),
- # 7) inline background-image: (속성 순서 반대)
- re.compile(r'<(?:a|div|span|h[1-6])[^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)[^"\']*["\'][^>]*\b(?:class|id)=["\'][^"\']*\blogo\b', re.IGNORECASE),
- # 8) src 자체에 "logo" 포함 (header_logo.png, brand-logo.svg 등)
- re.compile(r'
]*\bsrc=["\']([^"\']*\blogo\b[^"\']*\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE),
- # 9) ...
(헤더 영역 첫 img)
- re.compile(r']*>(?:[^<]|<(?!img))*
]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
- # 10)