60 changed files with 1576 additions and 6523 deletions
--- a/.gitignore
+++ b/.gitignore
@ -46,5 +46,3 @@ alembic/versions/*.pyc
 test_results/
 app/test*
 docker-compose.yml
--- a/SQL/db_create.sql
+++ b/SQL/db_create.sql
@ -1,4 +1,85 @@
-- user_info
+-- 테이블 순서는 관계를 고려하여 한 번에 실행해도 에러가 발생하지 않게 정렬되었습니다.
 -- instagram_data Table Create SQL
 -- 테이블 생성 SQL - instagram_data
 CREATE TABLE instagram_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT,
    `hospital_id`  CHAR(36)        NOT NULL,
    `url`          VARCHAR(500)    NOT NULL,
    `status`       VARCHAR(20)     NOT NULL    DEFAULT 'start',
    `raw_data`     JSON            NULL,
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - instagram_data(hospital_id)
 CREATE INDEX IX_instagram_data_1
    ON instagram_data(hospital_id);
 -- facebook_data Table Create SQL
 -- 테이블 생성 SQL - facebook_data
 CREATE TABLE facebook_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT,
    `hospital_id`  CHAR(36)        NOT NULL,
    `url`          VARCHAR(500)    NOT NULL,
    `status`       VARCHAR(20)     NOT NULL    DEFAULT 'start',
    `raw_data`     JSON            NULL,
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - facebook_data(hospital_id)
 CREATE INDEX IX_facebook_data_1
    ON facebook_data(hospital_id);
 -- naver_blog_data Table Create SQL
 -- 테이블 생성 SQL - naver_blog_data
 CREATE TABLE naver_blog_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT,
    `hospital_id`  CHAR(36)        NOT NULL,
    `url`          VARCHAR(500)    NOT NULL,
    `status`       VARCHAR(20)     NOT NULL    DEFAULT 'start',
    `raw_data`     JSON            NULL,
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - naver_blog_data(hospital_id)
 CREATE INDEX IX_naver_blog_data_1
    ON naver_blog_data(hospital_id);
 -- hospital_baseinfo Table Create SQL
 -- 테이블 생성 SQL - hospital_baseinfo
 CREATE TABLE hospital_baseinfo
 (
    `hospital_id`    CHAR(36)        NOT NULL,
    `owner_user_id`  INT             NOT NULL,
    `hospital_name`  VARCHAR(50)     NOT NULL,
    `hospital_name_en` VARCHAR(50)   NULL,
    `brn`            VARCHAR(50)     NOT NULL,
    `road_address`   VARCHAR(100)    NULL,
    `site_address`   VARCHAR(100)    NULL,
    `url`            VARCHAR(500)    NULL,
    `status`         VARCHAR(20)     NOT NULL    DEFAULT 'start',
    `raw_data`       JSON            NULL,
    `created_at`     TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
    `updated_at`     TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
     PRIMARY KEY (hospital_id)
 );
 -- Index 설정 SQL - hospital_baseinfo(owner_user_id)
 CREATE INDEX IX_hospital_baseinfo_1
    ON hospital_baseinfo(owner_user_id);
 -- user_info Table Create SQL
 -- 테이블 생성 SQL - user_info
 CREATE TABLE user_info
 (
    `user_id`     INT            NOT NULL    AUTO_INCREMENT, 
@ -9,49 +90,52 @@ CREATE TABLE user_info
     PRIMARY KEY (user_id)
 );
-
+-- youtube_data Table Create SQL
-- hospital_baseinfo
+CREATE TABLE youtube_data
 CREATE TABLE hospital_baseinfo
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT,
    `hospital_id`  CHAR(36)        NOT NULL,
    `owner_user_id`    INT          NOT NULL,
    `hospital_name`    VARCHAR(50)  NOT NULL,
    `hospital_name_en` VARCHAR(50)  NULL,
    `brn`              VARCHAR(50)  NOT NULL,
    `road_address`     VARCHAR(100) NULL,
    `site_address`     VARCHAR(100) NULL,
    `status`           VARCHAR(20)  NOT NULL    DEFAULT 'start',
    `created_at`       TIMESTAMP    NOT NULL    DEFAULT CURRENT_TIMESTAMP,
    `updated_at`       TIMESTAMP    NOT NULL    DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
     PRIMARY KEY (hospital_id)
 );
 CREATE INDEX IX_hospital_baseinfo_1 ON hospital_baseinfo (owner_user_id);
 -- remote_source: 병원별 채널 소스 정보 (instagram/facebook/naver_blog/youtube/gangnam_unni 등)
 CREATE TABLE remote_source
 (
    `source_id`   INT          NOT NULL    AUTO_INCREMENT,
    `hospital_id` CHAR(36)     NOT NULL,
    `source_type` VARCHAR(50)  NOT NULL,
    `language`    CHAR(2)      NULL,
    `url`          VARCHAR(500)    NOT NULL,
    `status`       VARCHAR(20)     NOT NULL    DEFAULT 'start',
    `raw_data`     JSON            NULL,
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
-     PRIMARY KEY (source_id)
+     PRIMARY KEY (id)
 );
-CREATE INDEX IX_remote_source_1 ON remote_source (hospital_id);
+-- Index 설정 SQL - youtube_data(hospital_id)
-CREATE INDEX IX_remote_source_2 ON remote_source (hospital_id, source_type);
+CREATE INDEX IX_youtube_data_1
    ON youtube_data(hospital_id);
-- analysis_runs
+-- gangnam_unni_data Table Create SQL
 CREATE TABLE gangnam_unni_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT,
    `hospital_id`  CHAR(36)        NOT NULL,
    `url`          VARCHAR(500)    NOT NULL,
    `status`       VARCHAR(20)     NOT NULL    DEFAULT 'start',
    `raw_data`     JSON            NULL,
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - gangnam_unni_data(hospital_id)
 CREATE INDEX IX_gangnam_unni_data_1
    ON gangnam_unni_data(hospital_id);
 -- analysis_runs Table Create SQL
 CREATE TABLE analysis_runs
 (
    `analysis_run_id`      CHAR(36)     NOT NULL,
    `hospital_id`          CHAR(36)     NOT NULL,
    `owner_user_id`        INT          NOT NULL    DEFAULT 0,
    `status`               VARCHAR(50)  NOT NULL    DEFAULT 'discovering',
    `instagram_data_id`    INT          NULL,
    `facebook_data_id`     INT          NULL,
    `naver_blog_data_id`   INT          NULL,
    `youtube_data_id`      INT          NULL,
    `gangnam_unni_data_id` INT          NULL,
    `report_data`          JSON         NULL,
    `plan_data`            JSON         NULL,
    `created_at`           TIMESTAMP    NOT NULL    DEFAULT CURRENT_TIMESTAMP,
@ -59,30 +143,16 @@ CREATE TABLE analysis_runs
     PRIMARY KEY (analysis_run_id)
 );
-CREATE INDEX IX_analysis_runs_1 ON analysis_runs (hospital_id);
+-- Index 설정 SQL - analysis_runs(hospital_id)
-CREATE INDEX IX_analysis_runs_2 ON analysis_runs (owner_user_id);
+CREATE INDEX IX_analysis_runs_1
    ON analysis_runs(hospital_id);
 -- Index 설정 SQL - analysis_runs(owner_user_id)
 CREATE INDEX IX_analysis_runs_2
    ON analysis_runs(owner_user_id);
-- raw_info: 분석 실행별 수집 원시 데이터
+-- file_data Table Create SQL
 CREATE TABLE raw_info
 (
    `info_id`         INT          NOT NULL    AUTO_INCREMENT,
    `source_id`       INT          NOT NULL,
    `analysis_run_id` CHAR(36)     NOT NULL,
    `data_tag`        VARCHAR(50)  NOT NULL    DEFAULT 'default',
    `status`          VARCHAR(20)  NOT NULL    DEFAULT 'start',
    `raw_data`        JSON         NULL,
    `logo_url`        VARCHAR(500) NULL,
    `created_at`      TIMESTAMP    NOT NULL    DEFAULT CURRENT_TIMESTAMP,
    `updated_at`      TIMESTAMP    NOT NULL    DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
     PRIMARY KEY (info_id)
 );
 CREATE INDEX IX_raw_info_1 ON raw_info (analysis_run_id);
 CREATE INDEX IX_raw_info_2 ON raw_info (source_id);
 -- file_data
 CREATE TABLE file_data
 (
    `id`              INT             NOT NULL    AUTO_INCREMENT,
@ -99,7 +169,7 @@ CREATE TABLE file_data
 );
-- hospital_history
+-- hospital_history Table Create SQL
 CREATE TABLE hospital_history
 (
    `id`               INT             NOT NULL    AUTO_INCREMENT,
@ -110,17 +180,24 @@ CREATE TABLE hospital_history
    `brn`              VARCHAR(50)     NOT NULL,
    `road_address`     VARCHAR(100)    NULL,
    `site_address`     VARCHAR(100)    NULL,
    `url`              VARCHAR(500)    NULL,
    `status`           VARCHAR(20)     NOT NULL,
    `raw_data`         JSON            NULL,
    `analysis_run_id`  CHAR(36)        NULL,
    `created_at`       TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
     PRIMARY KEY (id)
 );
-CREATE INDEX IX_hospital_history_1 ON hospital_history (hospital_id);
+-- Index 설정 SQL - hospital_history(hospital_id)
-CREATE INDEX IX_hospital_history_2 ON hospital_history (analysis_run_id);
+CREATE INDEX IX_hospital_history_1
    ON hospital_history(hospital_id);
 -- Index 설정 SQL - hospital_history(analysis_run_id)
 CREATE INDEX IX_hospital_history_2
    ON hospital_history(analysis_run_id);
-- market_analysis
+-- market_analysis Table Create SQL
 CREATE TABLE market_analysis
 (
    `id`               INT          NOT NULL    AUTO_INCREMENT,
@ -133,4 +210,7 @@ CREATE TABLE market_analysis
     UNIQUE KEY UQ_market_analysis (analysis_run_id, analysis_type)
 );
-CREATE INDEX IX_market_analysis_1 ON market_analysis (analysis_run_id);
+-- Index 설정 SQL - market_analysis(analysis_run_id)
 CREATE INDEX IX_market_analysis_1
    ON market_analysis(analysis_run_id);
--- a/app/api/analysis.py
+++ b/app/api/analysis.py
@ -2,23 +2,21 @@ import logging
 import uuid6
 from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile, status
 from common.deps import verify_api_key
-from common.db.hospital import select_hospital
+from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run
 from common.db.source import select_source_mainpage, insert_source, insert_raw_info
 from common.db.run import insert_run, select_run_status
 from common.utils import _normalize_homepage, _with_scheme
 from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse
 from models.file import FileListItem, FileType, FileUploadResponse
-from models.status import AnalysisStatus, SourceType
+from models.status import AnalysisStatus
 from services.pipeline import run_pipeline
-from services.file_data import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
+from services.file import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
 from mock_urls import MOCK_CLINICS
 from common.utils import _normalize_homepage, _with_scheme
 router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)])
 logger = logging.getLogger(__name__)
-
+# 추후 DB에 클리닉별로 매핑할 채널(틱톡/영문 인스타·페북). 지금은 mock_urls에서 homepage 매칭으로 보충.
-# 클라가 일부만 보내거나 빈 값이면 mock_urls 의 동일 homepage 매칭으로 채워줌 (메인 + 부가 채널 동일 규칙).
+def _extra_channels_from_mockurls(homepage_url: str) -> dict:
-def _channels_from_mockurls(homepage_url: str) -> dict:
+    """homepage로 mock_urls에서 클리닉을 찾아 틱톡/영문 인스타·페북 URL 반환 (없으면 {})."""
    target = _normalize_homepage(homepage_url)
    if not target:
        return {}
@ -26,18 +24,9 @@ def _channels_from_mockurls(homepage_url: str) -> dict:
        urls = c["urls"]
        if _normalize_homepage(urls.get("homepage", "")) == target:
            return {
                # main
                "instagram":    _with_scheme(urls.get("instagram")),
                "facebook":     _with_scheme(urls.get("facebook")),
                "naver_blog":   _with_scheme(urls.get("naverBlog")),
                "youtube":      _with_scheme(urls.get("youtube")),
                "gangnam_unni": _with_scheme(urls.get("gangnamUnni")),
                # extra
                "tiktok": _with_scheme(urls.get("tiktok")),
                "instagram_en": _with_scheme(urls.get("instagramEn")),
                "facebook_en": _with_scheme(urls.get("facebookEn")),
                "kakao_talk":   _with_scheme(urls.get("kakaoTalk")),
                "naver_cafe":   _with_scheme(urls.get("naverCafe")),
            }
    return {}
@ -48,51 +37,34 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
    analysis_run_id = str(uuid6.uuid7())
    hospital_id = body.clinic_id
-    # 사실 hospital 과 owner_user_id 비교 후 검증이 필요한 거지만 일단 PoC 니까. 나중에 바꿉니다.
+    # 사실 hospital과 owner_user_id 비교 후 검증이 필요한 거지만 일단 PoC 니까. 나중에 바꿉니다.
-    hospital = await select_hospital(hospital_id)
+    hospital = await fetchone(
        "SELECT owner_user_id, url FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
    if not hospital:
        raise HTTPException(status_code=409, detail="Clinic not found")
-    analysis_run_id = await insert_run(analysis_run_id, hospital_id, hospital["owner_user_id"])
+    ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None
    fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None
    nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None
    yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None
    gu_id = await insert_gangnam_unni_row(hospital_id, body.channels.gangnam_unni) if body.channels.gangnam_unni else None
-    mainpage = await select_source_mainpage(hospital_id)
+    analysis_run_id = await insert_analysis_run(
-    if mainpage:
+        analysis_run_id, hospital_id, hospital["owner_user_id"],
-        await insert_raw_info(mainpage["source_id"], analysis_run_id, data_tag=SourceType.MAINPAGE)
+        ig_id, fb_id, nb_id, yt_id, gu_id,
-        # branding (HTML/CSS + Vision 로고 매칭) — mainpage 와 같은 homepage URL 을 source 로 사용.
+    )
        branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"], language="KR")
        await insert_raw_info(branding_id, analysis_run_id, data_tag=SourceType.BRANDING)
-    # 클라가 안 보낸 채널은 mock_urls 에서 homepage 매칭으로 보충 (main + extra 동일 규칙).
+    # 클라 값 우선, 없으면 보충 (추후 DB에서 클리닉별로 가져올 값)
-    mock = _channels_from_mockurls((mainpage or {}).get("url") or "")
+    mock_extra = _extra_channels_from_mockurls(hospital["url"])
-
+    extra_channels = {
-    # 메인 5채널 (KR). _with_scheme 으로 'gangnamunni.com/...' 같이 scheme/www 없이 와도 보강.
+        "tiktok":       body.channels.tiktok or mock_extra.get("tiktok"),
-    main_channels = [
+        "instagram_en": body.channels.instagram_en or mock_extra.get("instagram_en"),
-        (SourceType.INSTAGRAM,    _with_scheme(body.channels.instagram)    or mock.get("instagram")),
+        "facebook_en":  body.channels.facebook_en or mock_extra.get("facebook_en"),
-        (SourceType.FACEBOOK,     _with_scheme(body.channels.facebook)     or mock.get("facebook")),
+    }
-        (SourceType.NAVER_BLOG,   _with_scheme(body.channels.naver_blog)   or mock.get("naver_blog")),
+    logger.info("[analysis] extra_channels=%s (mock_matched=%s)", extra_channels, bool(mock_extra))
-        (SourceType.YOUTUBE,      _with_scheme(body.channels.youtube)      or mock.get("youtube")),
+    background_tasks.add_task(run_pipeline, analysis_run_id, extra_channels)
        (SourceType.GANGNAM_UNNI, _with_scheme(body.channels.gangnam_unni) or mock.get("gangnam_unni")),
    ]
    for source_type, url in main_channels:
        if url:
            source_id = await insert_source(hospital_id, source_type, url, language="KR")
            await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
    # 부가 채널 — instagram_en/facebook_en 은 동일 source_type 에 language='EN' 으로 구분, 나머지는 자체 source_type.
    extra_channels = [
        (SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
        (SourceType.FACEBOOK,  "EN", _with_scheme(body.channels.facebook_en)  or mock.get("facebook_en")),
        (SourceType.TIKTOK,    "KR", _with_scheme(body.channels.tiktok)       or mock.get("tiktok")),
        (SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk)   or mock.get("kakao_talk")),
        (SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe)  or mock.get("naver_cafe")),
    ]
    for source_type, language, url in extra_channels:
        if url:
            source_id = await insert_source(hospital_id, source_type, url, language=language)
            await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
    logger.info("[analysis] main+extra channels resolved (mock_matched=%s)", bool(mock))
    background_tasks.add_task(run_pipeline, analysis_run_id)
    return AnalysisStartResponse(
        analysis_run_id=analysis_run_id,
@ -129,12 +101,12 @@ async def delete_analysis_run_file(run_id: str, file_id: int) -> None:
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
 async def get_analysis_status(run_id: str):
    logger.info("GET /api/analysis/%s/status", run_id)
-    run_status = await select_run_status(run_id)
+    row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,))
-    if run_status is None:
+    if not row:
        raise HTTPException(status_code=404, detail="Run not found")
    return AnalysisStatusResponse(
        analysis_run_id=run_id,
-        status=AnalysisStatus(run_status),
+        status=AnalysisStatus(row["status"]),
        progress=50.0,
        current_step="",
        channel_errors={},
--- a/app/api/clinics.py
+++ b/app/api/clinics.py
@ -2,8 +2,7 @@ import logging
 import uuid6
 from fastapi import APIRouter, Depends, HTTPException, status
 from common.deps import verify_api_key
-from common.db.hospital import select_hospital, insert_hospital
+from common.db import insert_hospital, fetchone
 from common.db.source import insert_source
 from common.utils import get_env
 from integrations.firecrawl import FirecrawlClient
 from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicResponse, ClinicHistoryResponse, RunSummary
@ -31,8 +30,9 @@ async def create_clinic(body: ClinicCreate):
        name=info["clinicName"],
        name_en=info.get("clinicNameEn"),
        road_address=info.get("address"),
        url=body.url,
        raw_data=info,
    )
    await insert_source(hospital_id, "mainpage", body.url)
    return ClinicCreateResponse(
        id=hospital_id,
        url=body.url,
@ -44,7 +44,11 @@ async def create_clinic(body: ClinicCreate):
@router.get("/{hospital_id}", response_model=ClinicResponse)
 async def get_clinic(hospital_id: str):
    logger.info("GET /api/clinics/%s", hospital_id)
-    row = await select_hospital(hospital_id)
+    row = await fetchone(
        "SELECT hospital_id, hospital_name, hospital_name_en, road_address, url, status, raw_data, created_at, updated_at"
        " FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
    if not row:
        raise HTTPException(status_code=404, detail="Clinic not found")
    return ClinicResponse(**{**row, "created_at": str(row["created_at"]), "updated_at": str(row["updated_at"])})
--- a/app/api/plan.py
+++ b/app/api/plan.py
@ -1,13 +1,10 @@
 import json
 import logging
 from fastapi import APIRouter, Depends, HTTPException, Response
-from common.db.run import select_run_with_clinic
+from common.db import fetchone
 from common.db.source import select_run_source_raw
 from common.deps import verify_api_key
 from common.utils import _with_scheme
 from integrations.llm.schemas.plan import PlanOutput
 from models.plan import PlanApiResponse
 from models.status import SourceType
 router = APIRouter(prefix="/api/plan", tags=["plan"], dependencies=[Depends(verify_api_key)])
 logger = logging.getLogger(__name__)
@ -16,21 +13,24 @@ logger = logging.getLogger(__name__)
@router.get("/{run_id}", response_model=PlanApiResponse, response_model_by_alias=True)
 async def get_plan(run_id: str):
    logger.info("GET /api/plan/%s", run_id)
-    row = await select_run_with_clinic(run_id)
+    row = await fetchone(
        "SELECT ar.plan_data, ar.created_at, h.hospital_name, h.hospital_name_en, h.url"
        " FROM analysis_runs ar"
        " JOIN hospital_baseinfo h ON ar.hospital_id = h.hospital_id"
        " WHERE ar.analysis_run_id = %s",
        (run_id,),
    )
    if row is None:
        raise HTTPException(status_code=404, detail="Run not found")
    if row["plan_data"] is None:
        return Response(status_code=204)
    data = json.loads(row["plan_data"]) if isinstance(row["plan_data"], str) else row["plan_data"]
    plan = PlanOutput(**data)
    # 강남언니에서 긁어온 이름이 있으면 우선 (hospital_baseinfo 의 정식 이름보다 강남언니가 더 광고용 표기).
    gu = await select_run_source_raw(run_id, SourceType.GANGNAM_UNNI) or {}
    clinic_name = gu.get("name") or row["hospital_name"]
    return PlanApiResponse(
        id=run_id,
-        clinic_name=clinic_name,
+        clinic_name=row["hospital_name"],
        clinic_name_en=row["hospital_name_en"],
        created_at=str(row["created_at"]),
-        target_url=_with_scheme(row["target_url"]),
+        target_url=row["url"],
        **plan.model_dump(),
    )
--- a/app/api/report.py
+++ b/app/api/report.py
@ -1,9 +1,8 @@
 import json
 import logging
 from fastapi import APIRouter, Depends, HTTPException, Response
-from common.db.run import select_run_with_clinic
+from common.db import fetchone
 from common.deps import verify_api_key
 from common.utils import _with_scheme
 from integrations.llm.schemas.report import ReportOutput
 from models.report import MarketingReportResponse
@ -14,7 +13,13 @@ logger = logging.getLogger(__name__)
@router.get("/{run_id}", response_model=MarketingReportResponse, response_model_by_alias=True)
 async def get_report(run_id: str):
    logger.info("GET /api/report/%s", run_id)
-    row = await select_run_with_clinic(run_id)
+    row = await fetchone(
        "SELECT ar.report_data, ar.created_at, h.hospital_name, h.hospital_name_en, h.url"
        " FROM analysis_runs ar"
        " JOIN hospital_baseinfo h ON ar.hospital_id = h.hospital_id"
        " WHERE ar.analysis_run_id = %s",
        (run_id,),
    )
    if row is None:
        raise HTTPException(status_code=404, detail="Run not found")
    if row["report_data"] is None:
@ -26,6 +31,6 @@ async def get_report(run_id: str):
        clinic_name=row["hospital_name"],
        clinic_name_en=row["hospital_name_en"],
        created_at=str(row["created_at"]),
-        target_url=_with_scheme(row["target_url"]),
+        target_url=row["url"],
        **llm_output.model_dump(exclude={"id", "created_at", "target_url"}),
    )
--- a/app/common/db.py
+++ b/app/common/db.py
@ -0,0 +1,287 @@
 import json
 import os
 import aiomysql
 from common.utils import get_env
 _pool: aiomysql.Pool | None = None
 async def get_pool() -> aiomysql.Pool:
    global _pool
    if _pool is None:
        _pool = await aiomysql.create_pool(
            host=get_env("MYSQL_HOST"),
            port=int(os.getenv("MYSQL_PORT", "3306")),
            user=get_env("MYSQL_USER"),
            password=get_env("MYSQL_PASSWORD"),
            db=get_env("MYSQL_DB"),
            charset="utf8mb4",
            minsize=0,
            maxsize=30,
            connect_timeout=10,
        )
    return _pool
 # 쓰기 (INSERT/UPDATE/DELETE)
 async def execute(sql: str, args: tuple = ()) -> int:
    pool = await get_pool()
    async with pool.acquire() as conn:
        try:
            async with conn.cursor() as cur:
                await cur.execute(sql, args)
                await conn.commit()
                return cur.lastrowid
        finally:
            conn.close()
 # 읽기 (SELECT)
 async def fetchone(sql: str, args: tuple = ()) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        try:
            async with conn.cursor(aiomysql.DictCursor) as cur:
                await cur.execute(sql, args)
                return await cur.fetchone()
        finally:
            conn.close()
 async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        try:
            async with conn.cursor(aiomysql.DictCursor) as cur:
                await cur.execute(sql, args)
                return await cur.fetchall()
        finally:
            conn.close()
 async def insert_instagram_row(hospital_id: str, url: str) -> int:
    return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
 async def insert_facebook_row(hospital_id: str, url: str) -> int:
    return await execute("INSERT INTO facebook_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
 async def insert_naver_blog_row(hospital_id: str, url: str) -> int:
    return await execute("INSERT INTO naver_blog_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
 async def insert_youtube_row(hospital_id: str, url: str) -> int:
    return await execute("INSERT INTO youtube_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
 async def insert_gangnam_unni_row(hospital_id: str, url: str) -> int:
    return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
 async def insert_file_row(
    analysis_run_id: str,
    file_type: str,
    file_name: str,
    file_url: str,
    size_bytes: int | None = None,
    hospital_id: str | None = None,
 ) -> int:
    return await execute(
        "INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)"
        " VALUES (%s, %s, %s, %s, %s, %s)",
        (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes),
    )
 async def insert_analysis_run(
    analysis_run_id: str,
    hospital_id: str,
    owner_user_id: int,
    instagram_data_id: int | None,
    facebook_data_id: int | None,
    naver_blog_data_id: int | None,
    youtube_data_id: int | None,
    gangnam_unni_data_id: int | None,
 ) -> str:
    await execute(
        "INSERT INTO analysis_runs"
        " (analysis_run_id, hospital_id, owner_user_id, instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id)"
        " VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
        (analysis_run_id, hospital_id, owner_user_id, instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id),
    )
    return analysis_run_id
 async def save_analysis_report(analysis_run_id: str, data: dict) -> None:
    await execute(
        "UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
        (json.dumps(data, ensure_ascii=False), analysis_run_id),
    )
 async def is_done(table: str, row_id: int | None) -> bool:
    if row_id is None:
        return True
    r = await fetchone(f"SELECT status FROM {table} WHERE id = %s", (row_id,))
    return r["status"] == "done"
 async def fetch_raw(table: str, row_id: int | None) -> dict | None:
    if row_id is None:
        return None
    row = await fetchone(f"SELECT raw_data FROM {table} WHERE id = %s", (row_id,))
    if not row or not row["raw_data"]:
        return None
    return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
 async def get_analysis_raw_data(analysis_run_id: str) -> dict:
    run = await fetchone(
        "SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
        " FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    return {
        "instagram":    await fetch_raw("instagram_data",    run["instagram_data_id"]),
        "facebook":     await fetch_raw("facebook_data",     run["facebook_data_id"]),
        "naver_blog":   await fetch_raw("naver_blog_data",   run["naver_blog_data_id"]),
        "youtube":      await fetch_raw("youtube_data",      run["youtube_data_id"]),
        "gangnam_unni": await fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]),
    }
 async def set_instagram_status(row_id: int, status: str) -> None:
    await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id))
 async def set_facebook_status(row_id: int, status: str) -> None:
    await execute("UPDATE facebook_data SET status = %s WHERE id = %s", (status, row_id))
 async def set_naver_blog_status(row_id: int, status: str) -> None:
    await execute("UPDATE naver_blog_data SET status = %s WHERE id = %s", (status, row_id))
 async def set_youtube_status(row_id: int, status: str) -> None:
    await execute("UPDATE youtube_data SET status = %s WHERE id = %s", (status, row_id))
 async def set_gangnam_unni_status(row_id: int, status: str) -> None:
    await execute("UPDATE gangnam_unni_data SET status = %s WHERE id = %s", (status, row_id))
 async def save_instagram_raw_data(row_id: int, data: dict) -> None:
    await execute("UPDATE instagram_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
 async def save_facebook_raw_data(row_id: int, data: dict) -> None:
    await execute("UPDATE facebook_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
 async def save_naver_blog_raw_data(row_id: int, data: dict) -> None:
    await execute("UPDATE naver_blog_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
 async def save_youtube_raw_data(row_id: int, data: dict) -> None:
    await execute("UPDATE youtube_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
 async def save_gangnam_unni_raw_data(row_id: int, data: dict) -> None:
    await execute("UPDATE gangnam_unni_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
 async def _insert_hospital_history(hospital_id: str, analysis_run_id: str | None) -> None:
    row = await fetchone(
        "SELECT owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, url, status, raw_data"
        " FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
    if not row:
        return
    await execute(
        "INSERT INTO hospital_history"
        " (hospital_id, owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, url, status, raw_data, analysis_run_id)"
        " VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
        (
            hospital_id,
            row["owner_user_id"],
            row["hospital_name"],
            row["hospital_name_en"],
            row["brn"],
            row["road_address"],
            row["site_address"],
            row["url"],
            row["status"],
            row["raw_data"] if isinstance(row["raw_data"], str) else json.dumps(row["raw_data"], ensure_ascii=False) if row["raw_data"] else None,
            analysis_run_id,
        ),
    )
 async def insert_hospital(
    hospital_id: str,
    name: str,
    name_en: str | None = None,
    road_address: str | None = None,
    site_address: str | None = None,
    url: str | None = None,
    raw_data: dict | None = None,
    owner_user_id: int = 0,
    brn: str = "",
 ) -> dict:
    await execute(
        "INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, url, raw_data, status, owner_user_id, brn)"
        " VALUES (%s, %s, %s, %s, %s, %s, %s, 'done', %s, %s)",
        (hospital_id, name, name_en, road_address, site_address, url,
         json.dumps(raw_data, ensure_ascii=False) if raw_data else None,
         owner_user_id, brn),
    )
    await _insert_hospital_history(hospital_id, analysis_run_id=None)
    return await fetchone(
        "SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
 async def save_hospital_raw_data(hospital_id: str, data: dict, analysis_run_id: str | None = None) -> None:
    await execute(
        "UPDATE hospital_baseinfo"
        " SET raw_data = %s, status = 'done',"
        "     hospital_name = COALESCE(%s, hospital_name),"
        "     hospital_name_en = COALESCE(%s, hospital_name_en),"
        "     road_address = COALESCE(%s, road_address)"
        " WHERE hospital_id = %s",
        (
            json.dumps(data, ensure_ascii=False),
            data.get("clinicName"),
            data.get("clinicNameEn"),
            data.get("address"),
            hospital_id,
        ),
    )
    await _insert_hospital_history(hospital_id, analysis_run_id)
 async def merge_hospital_raw_data(hospital_id: str, patch: dict) -> None:
    """hospital_baseinfo.raw_data를 읽어 patch를 top-level 병합 후 저장 (read-modify-write).
    부가 수집 단계들이 순차로 raw_data에 키를 덧붙일 때 사용."""
    row = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,))
    raw = row["raw_data"] if row else None
    raw_data = json.loads(raw) if isinstance(raw, str) else (raw or {})
    raw_data.update(patch)
    await execute(
        "UPDATE hospital_baseinfo SET raw_data = %s WHERE hospital_id = %s",
        (json.dumps(raw_data, ensure_ascii=False), hospital_id),
    )
 async def get_market_analysis(analysis_run_id: str) -> dict:
    rows = await fetchall(
        "SELECT analysis_type, data FROM market_analysis WHERE analysis_run_id = %s AND status = 'done'",
        (analysis_run_id,),
    )
    return {
        row["analysis_type"]: json.loads(row["data"]) if isinstance(row["data"], str) else row["data"]
        for row in rows
    }
--- a/app/common/db/init.py
+++ b/app/common/db/init.py
@ -1,16 +0,0 @@
 from common.db.base import execute, fetchone, fetchall
 from common.db.hospital import select_hospital, update_hospital_status, insert_hospital, update_hospital
 from common.db.source import (
    insert_source, select_source_mainpage, select_source_by_type,
    insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge,
    update_raw_info_logo_url, select_mainpage_logo_url, select_branding_info_id,
    select_raw_info_data,
    select_run_sources, select_run_raw_data, select_run_source_raw,
    select_run_mainpage_url,
 )
 from common.db.run import (
    insert_run, select_run, select_run_status, update_run_status,
    update_run_report, update_run_plan, select_run_with_clinic, select_run_report_data,
 )
 from common.db.market import upsert_market_status, upsert_market_result, select_market
 from common.db.file_data import insert_file, select_run_files, select_file, delete_file
--- a/app/common/db/base.py
+++ b/app/common/db/base.py
@ -1,56 +0,0 @@
 import os
 import aiomysql
 from common.utils import get_env
 _pool: aiomysql.Pool | None = None
 async def get_pool() -> aiomysql.Pool:
    global _pool
    if _pool is None:
        _pool = await aiomysql.create_pool(
            host=get_env("MYSQL_HOST"),
            port=int(os.getenv("MYSQL_PORT", "3306")),
            user=get_env("MYSQL_USER"),
            password=get_env("MYSQL_PASSWORD"),
            db=get_env("MYSQL_DB"),
            charset="utf8mb4",
            minsize=0,
            maxsize=30,
            connect_timeout=10,
        )
    return _pool
 async def execute(sql: str, args: tuple = ()) -> int:
    pool = await get_pool()
    async with pool.acquire() as conn:
        try:
            async with conn.cursor() as cur:
                await cur.execute(sql, args)
                await conn.commit()
                return cur.lastrowid
        finally:
            conn.close()
 async def fetchone(sql: str, args: tuple = ()) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        try:
            async with conn.cursor(aiomysql.DictCursor) as cur:
                await cur.execute(sql, args)
                return await cur.fetchone()
        finally:
            conn.close()
 async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        try:
            async with conn.cursor(aiomysql.DictCursor) as cur:
                await cur.execute(sql, args)
                return await cur.fetchall()
        finally:
            conn.close()
--- a/app/common/db/file_data.py
+++ b/app/common/db/file_data.py
@ -1,39 +0,0 @@
 from common.db.base import execute, fetchone, fetchall
 async def insert_file(
    analysis_run_id: str,
    file_type: str,
    file_name: str,
    file_url: str,
    size_bytes: int | None = None,
    hospital_id: str | None = None,
 ) -> int:
    return await execute(
        "INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)"
        " VALUES (%s, %s, %s, %s, %s, %s)",
        (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes),
    )
 async def select_run_files(analysis_run_id: str) -> list[dict]:
    return await fetchall(
        "SELECT id, file_type, file_name, file_url, size_bytes, created_at"
        " FROM file_data WHERE analysis_run_id = %s AND is_deleted = FALSE"
        " ORDER BY created_at DESC",
        (analysis_run_id,),
    )
 async def select_file(file_id: int, analysis_run_id: str) -> dict | None:
    return await fetchone(
        "SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s",
        (file_id, analysis_run_id),
    )
 async def delete_file(file_id: int) -> None:
    await execute(
        "UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE",
        (file_id,),
    )
--- a/app/common/db/hospital.py
+++ b/app/common/db/hospital.py
@ -1,78 +0,0 @@
 from common.db.base import execute, fetchone
 async def select_hospital(hospital_id: str) -> dict | None:
    return await fetchone(
        "SELECT hospital_id, owner_user_id, hospital_name, hospital_name_en,"
        " brn, road_address, site_address, status, created_at, updated_at"
        " FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
 async def update_hospital_status(hospital_id: str, status: str) -> None:
    await execute(
        "UPDATE hospital_baseinfo SET status = %s WHERE hospital_id = %s",
        (status, hospital_id),
    )
 async def _insert_hospital_history(hospital_id: str, analysis_run_id: str | None) -> None:
    row = await fetchone(
        "SELECT owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, status"
        " FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
    if not row:
        return
    await execute(
        "INSERT INTO hospital_history"
        " (hospital_id, owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, status, analysis_run_id)"
        " VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
        (
            hospital_id,
            row["owner_user_id"],
            row["hospital_name"],
            row["hospital_name_en"],
            row["brn"],
            row["road_address"],
            row["site_address"],
            row["status"],
            analysis_run_id,
        ),
    )
 async def insert_hospital(
    hospital_id: str,
    name: str,
    name_en: str | None = None,
    road_address: str | None = None,
    site_address: str | None = None,
    owner_user_id: int = 0,
    brn: str = "",
 ) -> dict:
    await execute(
        "INSERT INTO hospital_baseinfo"
        " (hospital_id, hospital_name, hospital_name_en, road_address, site_address, status, owner_user_id, brn)"
        " VALUES (%s, %s, %s, %s, %s, 'done', %s, %s)",
        (hospital_id, name, name_en, road_address, site_address, owner_user_id, brn),
    )
    await _insert_hospital_history(hospital_id, analysis_run_id=None)
    return await fetchone(
        "SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
 async def update_hospital(hospital_id: str, data: dict, analysis_run_id: str | None = None) -> None:
    await execute(
        "UPDATE hospital_baseinfo"
        " SET status = 'done',"
        "     hospital_name = COALESCE(%s, hospital_name),"
        "     hospital_name_en = COALESCE(%s, hospital_name_en),"
        "     road_address = COALESCE(%s, road_address)"
        " WHERE hospital_id = %s",
        (data.get("clinicName"), data.get("clinicNameEn"), data.get("address"), hospital_id),
    )
    await _insert_hospital_history(hospital_id, analysis_run_id)
--- a/app/common/db/market.py
+++ b/app/common/db/market.py
@ -1,31 +0,0 @@
 import json
 from common.db.base import execute, fetchall
 async def upsert_market_status(analysis_run_id: str, analysis_type: str, status: str) -> None:
    await execute(
        "INSERT INTO market_analysis (analysis_run_id, analysis_type, status)"
        " VALUES (%s, %s, %s)"
        " ON DUPLICATE KEY UPDATE status = VALUES(status)",
        (analysis_run_id, analysis_type, status),
    )
 async def upsert_market_result(analysis_run_id: str, analysis_type: str, data: dict) -> None:
    await execute(
        "INSERT INTO market_analysis (analysis_run_id, analysis_type, status, data)"
        " VALUES (%s, %s, 'done', %s)"
        " ON DUPLICATE KEY UPDATE status = 'done', data = VALUES(data)",
        (analysis_run_id, analysis_type, json.dumps(data, ensure_ascii=False)),
    )
 async def select_market(analysis_run_id: str) -> dict:
    rows = await fetchall(
        "SELECT analysis_type, data FROM market_analysis WHERE analysis_run_id = %s AND status = 'done'",
        (analysis_run_id,),
    )
    return {
        row["analysis_type"]: json.loads(row["data"]) if isinstance(row["data"], str) else row["data"]
        for row in rows
    }
--- a/app/common/db/run.py
+++ b/app/common/db/run.py
@ -1,76 +0,0 @@
 import json
 from common.db.base import execute, fetchone
 async def insert_run(
    analysis_run_id: str,
    hospital_id: str,
    owner_user_id: int,
 ) -> str:
    await execute(
        "INSERT INTO analysis_runs (analysis_run_id, hospital_id, owner_user_id) VALUES (%s, %s, %s)",
        (analysis_run_id, hospital_id, owner_user_id),
    )
    return analysis_run_id
 async def select_run(analysis_run_id: str) -> dict | None:
    return await fetchone(
        "SELECT analysis_run_id, hospital_id, owner_user_id, status, created_at, updated_at"
        " FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
 async def select_run_report_data(analysis_run_id: str) -> dict | None:
    """report 결과가 필요할 때만 호출. raw JSON 파싱해서 dict 반환."""
    import json
    row = await fetchone(
        "SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    if not row or not row["report_data"]:
        return None
    return json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"]
 async def select_run_status(analysis_run_id: str) -> str | None:
    row = await fetchone(
        "SELECT status FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    return row["status"] if row else None
 async def update_run_status(analysis_run_id: str, status: str) -> None:
    await execute(
        "UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
        (status, analysis_run_id),
    )
 async def update_run_report(analysis_run_id: str, data: dict) -> None:
    await execute(
        "UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
        (json.dumps(data, ensure_ascii=False), analysis_run_id),
    )
 async def update_run_plan(analysis_run_id: str, data: dict) -> None:
    await execute(
        "UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s",
        (json.dumps(data, ensure_ascii=False), analysis_run_id),
    )
 async def select_run_with_clinic(analysis_run_id: str) -> dict | None:
    return await fetchone(
        "SELECT ar.report_data, ar.plan_data, ar.created_at,"
        " h.hospital_name, h.hospital_name_en,"
        " rs.url AS target_url"
        " FROM analysis_runs ar"
        " JOIN hospital_baseinfo h ON ar.hospital_id = h.hospital_id"
        " LEFT JOIN remote_source rs ON rs.hospital_id = h.hospital_id AND rs.source_type = 'mainpage'"
        " WHERE ar.analysis_run_id = %s",
        (analysis_run_id,),
    )
--- a/app/common/db/source.py
+++ b/app/common/db/source.py
@ -1,166 +0,0 @@
 import json
 from common.db.base import execute, fetchone, fetchall
 from models.status import SourceType
 async def insert_source(
    hospital_id: str,
    source_type: SourceType,
    url: str,
    language: str | None = None,
 ) -> int:
    return await execute(
        "INSERT INTO remote_source (hospital_id, source_type, language, url) VALUES (%s, %s, %s, %s)",
        (hospital_id, source_type, language, url),
    )
 async def select_source_mainpage(hospital_id: str) -> dict | None:
    return await fetchone(
        "SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = 'mainpage'",
        (hospital_id,),
    )
 async def insert_raw_info(
    source_id: int,
    analysis_run_id: str,
    data_tag: SourceType,
 ) -> int:
    return await execute(
        "INSERT INTO raw_info (source_id, analysis_run_id, data_tag) VALUES (%s, %s, %s)",
        (source_id, analysis_run_id, data_tag),
    )
 async def update_raw_info_status(info_id: int, status: str) -> None:
    await execute("UPDATE raw_info SET status = %s WHERE info_id = %s", (status, info_id))
 async def update_raw_info(info_id: int, data: dict) -> None:
    await execute(
        "UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
        (json.dumps(data, ensure_ascii=False), info_id),
    )
 async def select_raw_info_data(info_id: int | None) -> dict | None:
    if info_id is None:
        return None
    row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
    if not row or not row["raw_data"]:
        return None
    return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
 async def select_run_sources(analysis_run_id: str) -> list[dict]:
    return await fetchall(
        "SELECT ri.info_id, rs.source_type, rs.url"
        " FROM raw_info ri JOIN remote_source rs USING (source_id)"
        " WHERE ri.analysis_run_id = %s",
        (analysis_run_id,),
    )
 async def select_run_raw_data(analysis_run_id: str) -> dict:
    rows = await fetchall(
        "SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url"
        " FROM raw_info ri JOIN remote_source rs USING (source_id)"
        " WHERE ri.analysis_run_id = %s AND ri.status = 'done'",
        (analysis_run_id,),
    )
    result: dict = {}
    for row in rows:
        source_type = row["source_type"]
        if source_type not in result:
            result[source_type] = list()
        item : dict = {}
        item["raw_data"] = json.loads(row["raw_data"])
        item["logo_url"] = row["logo_url"]
        item["source_type"] = row["source_type"]
        item["language"] = row["language"]
        result[source_type].append(item)
    return result
 async def select_run_source_raw(
    analysis_run_id: str, source_type: str, language: str | None = None,
 ) -> dict | None:
    sql = (
        "SELECT ri.raw_data FROM raw_info ri JOIN remote_source rs USING (source_id)"
        " WHERE ri.analysis_run_id = %s AND rs.source_type = %s"
    )
    args: tuple = (analysis_run_id, source_type)
    if language:
        sql += " AND rs.language = %s"
        args = (*args, language)
    sql += " LIMIT 1"
    row = await fetchone(sql, args)
    if not row or not row["raw_data"]:
        return None
    return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
 async def update_raw_info_logo_url(info_id: int, logo_url: str) -> None:
    """raw_info.logo_url 컬럼에 로고 URL 저장 (JSON raw_data 와 분리해 컬럼 인덱스/조회 용이)."""
    await execute(
        "UPDATE raw_info SET logo_url = %s WHERE info_id = %s",
        (logo_url, info_id),
    )
 async def select_branding_info_id(analysis_run_id: str) -> int | None:
    row = await fetchone(
        "SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
        " WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
        (analysis_run_id,),
    )
    return (row or {}).get("info_id")
 async def select_mainpage_logo_url(analysis_run_id: str) -> str | None:
    row = await fetchone(
        "SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
        " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
        (analysis_run_id,),
    )
    return (row or {}).get("logo_url")
 async def update_raw_info_merge(info_id: int, patch: dict) -> None:
    """raw_info.raw_data 를 read-modify-write 로 top-level 머지.
    한 source 가 단계별로 (예: branding 의 brandAssets → channelLogos) 키를 덧붙일 때 사용."""
    row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
    if not row:
        return
    raw = row["raw_data"]
    data = json.loads(raw) if isinstance(raw, str) else (raw or {})
    data.update(patch)
    await execute(
        "UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
        (json.dumps(data, ensure_ascii=False), info_id),
    )
 async def select_source_by_type(
    hospital_id: str, source_type: str, language: str | None = None,
 ) -> dict | None:
    sql = "SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = %s"
    args: tuple = (hospital_id, source_type)
    if language:
        sql += " AND language = %s"
        args = (*args, language)
    sql += " LIMIT 1"
    return await fetchone(sql, args)
 async def select_run_mainpage_url(analysis_run_id: str) -> str:
    row = await fetchone(
        "SELECT rs.url FROM raw_info ri JOIN remote_source rs USING (source_id)"
        " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage'",
        (analysis_run_id,),
    )
    return (row or {}).get("url") or ""
--- a/app/common/utils.py
+++ b/app/common/utils.py
@ -1,8 +1,6 @@
 import os
 import re
 import asyncio
 import logging
 from datetime import datetime, timezone
 from http import HTTPMethod
 import httpx
@ -11,91 +9,6 @@ logger = logging.getLogger(__name__)
 REQUEST_TIMEOUT = 60
 def parse_iso_duration_seconds(iso: str) -> int:
    m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "")
    if not m:
        return 0
    h, mins, s = (int(x or 0) for x in m.groups())
    return h * 3600 + mins * 60 + s
 def format_seconds(seconds: int) -> str:
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    return f"{h}시간 {m}분" if h else f"{m}분 {s}초"
 def format_clock(seconds: int) -> str:
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    return f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
 def calc_avg_video_length(videos: list[dict]) -> str:
    durations = [parse_iso_duration_seconds(v.get("duration", "")) for v in videos]
    durations = [d for d in durations if d > 0]
    if not durations:
        return ""
    return format_seconds(sum(durations) // len(durations))
 def relative_date(date_str: str) -> str:
    if not date_str:
        return ""
    try:
        past = datetime.fromisoformat(date_str[:10])
    except ValueError:
        return ""
    days = (datetime.now() - past).days
    if days < 1:
        return "오늘"
    if days < 30:
        return f"{days}일 전"
    if days < 365:
        return f"{days // 30}개월 전"
    return f"{days // 365}년 전"
 def calc_upload_frequency(videos: list[dict]) -> str:
    dates = sorted(
        [v["date"][:10] for v in videos if v.get("date")],
        reverse=True,
    )
    if len(dates) < 2:
        return ""
    gaps = [
        (datetime.fromisoformat(dates[i]) - datetime.fromisoformat(dates[i + 1])).days
        for i in range(len(dates) - 1)
    ]
    avg_days = sum(gaps) // len(gaps)
    if avg_days <= 7:
        return f"주 {7 // max(avg_days, 1)}회"
    if avg_days <= 30:
        return f"월 {30 // avg_days}회"
    return f"{avg_days}일에 1회"
 def parse_ts(v) -> datetime | None:
    """수집기마다 다른 timestamp 포맷을 통일된 datetime으로 변환.
    파싱 실패 시 None.
    """
    # 숫자면 epoch (Unix timestamp) — apify가 가끔 epoch로 줌
    if isinstance(v, (int, float)):
        return datetime.fromtimestamp(v, tz=timezone.utc)
    if isinstance(v, str):
        # 1순위: ISO 8601 (대부분 apify/firecrawl 출력)
        try:
            return datetime.fromisoformat(v.replace("Z", "+00:00"))
        except ValueError:
            pass
        # 2순위: RFC 2822 (네이버 블로그 RSS 등 — 표준 라이브러리 파서로)
        try:
            from email.utils import parsedate_to_datetime
            return parsedate_to_datetime(v)
        except (TypeError, ValueError):
            return None
    return None
 def get_env(key: str) -> str:
    v = os.environ.get(key, "")
@ -148,27 +61,6 @@ def _normalize_homepage(url: str) -> str:
    return u.rstrip("/")
 # SSL 인증서가 www.* 에만 유효한 도메인 — bare 도메인이면 사용자 클릭 시 브라우저 SSL warning 뜸.
 _WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com", "toxnfill.com")
 def _with_scheme(u: str | None) -> str | None:
-    """scheme 없는 URL에 https:// 보정 (수집기/링크 표시용). 빈 값은 None.
+    """scheme 없는 URL에 https:// 보정 (수집기 파싱용). 빈 값은 None."""
-    + 중첩된 https://가 끼어있으면 마지막 URL만 추출 (LLM이 가끔 'https://www.X/https://Y' 같이 만듦).
+    return (u if "://" in u else "https://" + u) if u else None
    + SSL 엄격 도메인(gangnamunni/facebook/instagram)은 www. 자동 보강."""
    if not u:
        return None
    u = u.strip()
    # 'https://www.facebook.com/https://facebook.com/X' 같은 중첩 → 마지막 'http(s)://' 부터 잘라 사용
    last = max(u.rfind("https://"), u.rfind("http://"))
    if last > 0:
        u = u[last:]
    if "://" not in u:
        u = "https://" + u
    # scheme 뒤가 www. 없이 SSL 엄격 도메인이면 www. 추가
    for dom in _WWW_REQUIRED:
        for scheme in ("https://", "http://"):
            if u.startswith(scheme + dom):
                u = scheme + "www." + u[len(scheme):]
                break
    return u
--- a/app/integrations/apify.py
+++ b/app/integrations/apify.py
@ -9,13 +9,6 @@ APIFY_BASE = "https://api.apify.com/v2"
 IG_PROFILE_ACTOR = "coderx~instagram-profile-scraper-bio-posts"
 IG_HIGHLIGHTS_ACTOR = "igview-owner~instagram-highlights-scraper"
 # Facebook: pages + posts 두 actor 직접 호출.
 FB_PAGES_ACTOR = "apify~facebook-pages-scraper"
 FB_POSTS_ACTOR = "apify~facebook-posts-scraper"
 # TikTok
 TIKTOK_ACTOR = "clockworks~tiktok-scraper"
 def _ig_username(url: str) -> str:
    return urlparse(url).path.strip("/").split("/")[0] if "://" in url else url.lstrip("@")
@ -26,7 +19,7 @@ class ApifyClient:
        self.token = token
        self.wait_for_finish = wait_for_finish
-    async def _run_actor(self, actor_id: str, input_data: dict, limit: int = 20) -> list[dict]:
+    async def _run_actor(self, actor_id: str, input_data: dict) -> list[dict]:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{APIFY_BASE}/acts/{actor_id}/runs",
@ -42,7 +35,7 @@ class ApifyClient:
        items_resp = await http_request(
            HTTPMethod.GET,
            url=f"{APIFY_BASE}/datasets/{dataset_id}/items",
-            params={"token": self.token, "limit": limit},
+            params={"token": self.token, "limit": 20},
            label=f"apify-dataset-{dataset_id}",
        )
        if not items_resp or not items_resp.is_success:
@ -68,13 +61,6 @@ class ApifyClient:
            return None
        if isinstance(highlights, Exception):
            highlights = []
        # 프로필상 하이라이트가 있다고 하면(highlight_reel_count>0) 빈 결과일 때 최대 2회 재시도.
        if not highlights and (profile.get("highlight_reel_count", 0) or profile.get("highlightReelCount", 0)) > 0:
            for _ in range(2):
                retry = await self.fetch_instagram_highlights(username)
                if retry:
                    highlights = retry
                    break
        return {
            "username": profile["username"],
            "profileImage": profile.get("hdProfilePicUrl") or profile.get("profilePicUrl"),
@ -130,52 +116,31 @@ class ApifyClient:
    #     }
    async def fetch_facebook_page(self, page_url: str) -> dict | None:
-        items = await self._run_actor(FB_PAGES_ACTOR, {"startUrls": [{"url": page_url}]})
+        items = await self._run_actor("apify~facebook-pages-scraper", {"startUrls": [{"url": page_url}]})
        return items[0] if items else None
    async def fetch_facebook_posts(self, page_url: str, limit: int = 20) -> list[dict]:
        return await self._run_actor(
            FB_POSTS_ACTOR, {"startUrls": [{"url": page_url}], "resultsLimit": limit}, limit=limit,
        )
    async def get_facebook_page(self, page_url: str) -> dict | None:
-        # pages·posts 두 task 병렬 호출 (posts 실패해도 page만 있으면 진행)
+        page = await self.fetch_facebook_page(page_url)
-        page, posts = await asyncio.gather(
+        if not page:
            self.fetch_facebook_page(page_url),
            self.fetch_facebook_posts(page_url),
            return_exceptions=True,
        )
        if isinstance(page, Exception) or not page:
            return None
        if isinstance(posts, Exception):
            posts = []
        return {
            "pageName": page.get("title") or page.get("name"),
            "profileImage": page.get("profilePictureUrl") or page.get("profilePhoto") or page.get("profilePic"),
            "pageUrl": page.get("pageUrl", page_url),
            "followers": page.get("followers", 0),
-            "following": page.get("followings", 0),
+            "likes": page.get("likes", 0),
            "reviews": page.get("ratingCount", 0),
            "categories": page.get("categories", []),
-            "website": page.get("website") or page.get("websites"),
+            "email": page.get("email"),
            "phone": page.get("phone"),
            "website": page.get("website"),
            "address": page.get("address"),
            "intro": page.get("intro"),
-            "latestPosts": [
+            "rating": page.get("rating"),
                {
                    "text": (p.get("text") or "")[:160],
                    "likes": p.get("likes", 0),
                    "reactions": p.get("topReactionsCount", 0),
                    "shares": p.get("shares", 0),
                    "views": p.get("viewsCount") or 0,
                    "isVideo": p.get("isVideo", False),
                    "timestamp": p.get("time") or p.get("timestamp"),
                }
                for p in (posts or []) if isinstance(p, dict)
            ],
        }
    async def fetch_tiktok_profile(self, url: str) -> list[dict]:
        user = urlparse(url).path.strip("/").lstrip("@").split("/")[0] if "://" in url else url.lstrip("@")
-        return await self._run_actor(TIKTOK_ACTOR, {
+        return await self._run_actor("clockworks~tiktok-scraper", {
            "profiles": [user],
            "resultsPerPage": 10,
            "profileScrapeSections": ["videos"],
--- a/app/integrations/color_extractor.py
+++ b/app/integrations/color_extractor.py
@ -0,0 +1,250 @@
 """홈페이지 HTML/CSS에서 hex 색상 직접 추출 + 빈도 기반 brand palette 산출.
 Vision LLM에 의존하지 않고 페이지의 실제 CSS 값을 정규식으로 잡음.
 로고만 분석하는 Vision보다 사이트 전체 컬러 시스템 (primary/secondary/background/text)을 더 정확히 추출.
 """
 import logging
 import re
 import ssl
 from collections import Counter
 from urllib.parse import urljoin, urlparse
 import httpx
 logger = logging.getLogger(__name__)
 def _make_ssl_context() -> ssl.SSLContext:
    """오래된 한국 의료 사이트들이 SSL DH_KEY_TOO_SMALL / cipher 약함 등으로 차단되는 문제 우회.
    보안 등급 1로 낮춤 + cert 검증 유지."""
    ctx = ssl.create_default_context()
    try:
        ctx.set_ciphers("DEFAULT@SECLEVEL=1")
    except ssl.SSLError:
        pass
    return ctx
 async def _fetch_html(url: str, timeout: float = 20.0) -> tuple[int, str]:
    """SSL/검증 단계별 fallback으로 HTML 받기. 그랜드/톡스앤필 같은 oldsite 대응."""
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
    # 1차: 표준 검증
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers) as c:
            r = await c.get(url)
            return r.status_code, r.text
    except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
        logger.info("[fetch] %s standard SSL failed: %s — fallback to weak cipher", url, e)
    # 2차: 약한 cipher 허용
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=_make_ssl_context()) as c:
            r = await c.get(url)
            return r.status_code, r.text
    except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
        logger.info("[fetch] %s weak cipher failed: %s — fallback to verify=False", url, e)
    # 3차: SSL 검증 끔 (host mismatch 등)
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=False) as c:
            r = await c.get(url)
            return r.status_code, r.text
    except Exception as e:
        logger.warning("[fetch] %s all fallbacks failed: %s", url, e)
        return 0, ""
 LOGO_IMG_PATTERNS = [
    # 1) <img class="...logo..." src="...">
    re.compile(r'<img[^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
    # 2) <img src="..." class="...logo...">
    re.compile(r'<img[^>]*\bsrc=["\']([^"\']+)["\'][^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\']', re.IGNORECASE),
    # 3) <img id="...logo..." src="...">
    re.compile(r'<img[^>]*\bid=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
    # 4) <img alt="...logo..." src="...">
    re.compile(r'<img[^>]*\balt=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
    # 5) <a/h1 class="logo"><...nested...><img src="...">
    re.compile(r'<(?:a|h[1-6]|div|span)[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE | re.DOTALL),
    # 6) inline background-image: <a/div class="logo" style="background-image: url(...)">
    re.compile(r'<(?:a|div|span|h[1-6])[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)', re.IGNORECASE),
    # 7) inline background-image: <a/div style="background-image: url(...)" class="logo">  (속성 순서 반대)
    re.compile(r'<(?:a|div|span|h[1-6])[^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)[^"\']*["\'][^>]*\b(?:class|id)=["\'][^"\']*\blogo\b', re.IGNORECASE),
    # 8) src 자체에 "logo" 포함 (header_logo.png, brand-logo.svg 등)
    re.compile(r'<img[^>]*\bsrc=["\']([^"\']*\blogo\b[^"\']*\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE),
    # 9) <header>...<img src="..."> (헤더 영역 첫 img)
    re.compile(r'<header\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
    # 10) <nav>...<img src="..."> (nav 영역 첫 img)
    re.compile(r'<nav\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
    # 11) Open Graph image (대표 이미지) - 최후 fallback
    re.compile(r'<meta[^>]*\bproperty=["\']og:image["\'][^>]*\bcontent=["\']([^"\']+)["\']', re.IGNORECASE),
    re.compile(r'<meta[^>]*\bcontent=["\']([^"\']+)["\'][^>]*\bproperty=["\']og:image["\']', re.IGNORECASE),
 ]
 # CSS 파일에서 .logo { background-image: url(...) } 추출용
 LOGO_CSS_PATTERN = re.compile(
    r'\.[\w-]*\blogo\b[\w-]*\s*(?:,\s*\.[\w-]+\s*)*\{[^}]*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)',
    re.IGNORECASE | re.DOTALL,
 )
 def find_logo_url_in_html(html: str, base_url: str, css_texts: list[str] | None = None) -> str | None:
    """HTML에서 logo URL 찾기. class/id/alt → 부모 + 중첩 img → background-image → src에 logo → header/nav → og:image 순."""
    for pat in LOGO_IMG_PATTERNS:
        for m in pat.finditer(html):
            src = m.group(1)
            if not src or src.startswith("data:"):
                continue
            if re.search(r"(blank|spacer|pixel|transparent|1x1)\b", src, re.IGNORECASE):
                continue
            return urljoin(base_url, src)
    # 외부 CSS에서 .logo background-image 추출
    for css in (css_texts or []):
        m = LOGO_CSS_PATTERN.search(css)
        if m:
            src = m.group(1)
            if src and not src.startswith("data:"):
                return urljoin(base_url, src)
    return None
 HEX6 = re.compile(r"#([0-9a-fA-F]{6})\b")
 HEX3 = re.compile(r"#([0-9a-fA-F]{3})\b(?![0-9a-fA-F])")
 RGB  = re.compile(r"rgba?\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*(?:,\s*[\d.]+\s*)?\)")
 CSS_VAR_HEX = re.compile(r"--[\w-]+\s*:\s*(#[0-9a-fA-F]{3,8})", re.IGNORECASE)
 CSS_LINK = re.compile(r'<link[^>]+rel=["\']stylesheet["\'][^>]+href=["\']([^"\']+)["\']', re.IGNORECASE)
 STYLE_BLOCK = re.compile(r"<style[^>]*>(.*?)</style>", re.IGNORECASE | re.DOTALL)
 # 무채색·아주 흔한 노이즈 컬러 (이런 건 brand color로 잡지 않음)
 NOISE = {
    "#ffffff", "#000000", "#fff", "#000",
    "#333", "#222", "#111", "#444", "#555", "#666", "#777", "#888", "#999",
    "#aaa", "#bbb", "#ccc", "#ddd", "#eee", "#f0f0f0", "#f5f5f5", "#fafafa",
 }
 def _normalize(hex_str: str) -> str:
    h = hex_str.lstrip("#").lower()
    if len(h) == 3:
        h = "".join(c * 2 for c in h)
    if len(h) == 8:
        h = h[:6]
    return f"#{h}"
 def _rgb_to_hex(r: int, g: int, b: int) -> str:
    return f"#{r:02x}{g:02x}{b:02x}"
 def _hex_to_rgb(h: str) -> tuple[int, int, int]:
    h = h.lstrip("#")
    return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
 def _distance(a: str, b: str) -> float:
    ar, ag, ab = _hex_to_rgb(a)
    br, bg, bb = _hex_to_rgb(b)
    return ((ar - br) ** 2 + (ag - bg) ** 2 + (ab - bb) ** 2) ** 0.5
 def _is_grayscale(h: str, tol: int = 12) -> bool:
    r, g, b = _hex_to_rgb(h)
    return max(r, g, b) - min(r, g, b) < tol
 def _extract_hex(text: str) -> list[str]:
    """텍스트에서 모든 hex 색상 추출 (정규화)."""
    out: list[str] = []
    out.extend(_normalize(m.group(0)) for m in HEX6.finditer(text))
    out.extend(_normalize(m.group(0)) for m in HEX3.finditer(text))
    for m in RGB.finditer(text):
        r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3))
        if 0 <= r <= 255 and 0 <= g <= 255 and 0 <= b <= 255:
            out.append(_rgb_to_hex(r, g, b))
    return out
 def _cluster(colors: Counter, threshold: float = 25.0) -> list[tuple[str, int]]:
    """비슷한 색은 묶음. 가장 빈도 높은 색을 대표로."""
    ranked = colors.most_common()
    clusters: list[tuple[str, int]] = []
    for color, count in ranked:
        merged = False
        for i, (rep, rep_count) in enumerate(clusters):
            if _distance(color, rep) < threshold:
                clusters[i] = (rep, rep_count + count)
                merged = True
                break
        if not merged:
            clusters.append((color, count))
    return clusters
 async def _fetch_html_and_css(homepage_url: str, max_css_files: int = 8) -> tuple[str, list[str]]:
    """홈페이지 HTML + 외부 CSS(Top N)를 한 번에 fetch. 로고/색상 추출이 사이트를 중복으로 긁지 않도록 공유.
    _fetch_html이 SSL 약함/host mismatch까지 fallback 처리. 실패 시 ("", [])."""
    status, html = await _fetch_html(homepage_url)
    if status != 200 or not html:
        logger.warning("[color_extractor] homepage fetch failed status=%s url=%s", status, homepage_url)
        return "", []
    css_texts: list[str] = []
    for css_href in CSS_LINK.findall(html)[:max_css_files]:
        cstatus, ctext = await _fetch_html(urljoin(homepage_url, css_href), timeout=15.0)
        if cstatus == 200 and ctext:
            css_texts.append(ctext)
    return html, css_texts
 def _colors_from_text(html: str, css_texts: list[str], source_url: str = "") -> dict:
    """이미 받아온 HTML + CSS 텍스트에서 hex 빈도 분석 → primary/accent/text + palette. (fetch 없음, 순수 계산)"""
    # 1. HTML 내 <style> 블록 + 통째(inline style="color:#...") + 외부 CSS
    all_text_chunks: list[str] = list(STYLE_BLOCK.findall(html))
    all_text_chunks.append(html)
    all_text_chunks.extend(css_texts)
    # 2. 모든 hex 추출 (NOISE 제외)
    counter: Counter = Counter()
    for text in all_text_chunks:
        for color in _extract_hex(text):
            if color in NOISE:
                continue
            counter[color] += 1
    if not counter:
        logger.info("[color_extractor] no colors extracted from %s", source_url)
        return {}
    # 3. 비슷한 색 클러스터링
    clustered = _cluster(counter)
    # 4. primary = 빈도 높은 채도 있는 색 / accent = 두번째 채도 있는 색 / text = 빈도 높은 무채색
    chromatic = [c for c, _ in clustered if not _is_grayscale(c)]
    grayscale = [c for c, _ in clustered if _is_grayscale(c)]
    palette_top = clustered[:8]
    palette = [{"name": f"색상 {i+1}", "hex": h, "usage": f"빈도 {n}"} for i, (h, n) in enumerate(palette_top)]
    return {
        "brand_colors": {
            "primary": chromatic[0] if chromatic else None,
            "accent": chromatic[1] if len(chromatic) > 1 else None,
            "text": grayscale[0] if grayscale else None,
        },
        "color_palette": palette,
        "extracted_from": "html+css",
    }
 async def extract_brand_colors_from_site(homepage_url: str, max_css_files: int = 8) -> dict:
    """홈페이지 HTML + 외부 CSS fetch → hex 색상 빈도 분석 → primary/accent/text + palette 5종."""
    html, css_texts = await _fetch_html_and_css(homepage_url, max_css_files)
    if not html:
        return {}
    return _colors_from_text(html, css_texts, homepage_url)
 async def extract_brand_assets_from_site(homepage_url: str, max_css_files: int = 8) -> dict:
    """사이트를 한 번만 fetch해서 로고 URL과 brand 색상을 함께 추출.
    반환: {"logo_url": str | None, "colors": {brand_colors, color_palette, ...} | {}}"""
    html, css_texts = await _fetch_html_and_css(homepage_url, max_css_files)
    if not html:
        return {"logo_url": None, "colors": {}}
    return {
        "logo_url": find_logo_url_in_html(html, homepage_url, css_texts=css_texts),
        "colors": _colors_from_text(html, css_texts, homepage_url),
    }
--- a/app/integrations/firecrawl.py
+++ b/app/integrations/firecrawl.py
@ -16,7 +16,7 @@ class FirecrawlClient:
            HTTPMethod.POST,
            url=f"{FIRECRAWL_BASE}/scrape",
            headers=self._headers(),
-            json_body={"url": url, "formats": ["json", "links"], "jsonOptions": json_options, "waitFor": wait_for, "maxAge": 0},
+            json_body={"url": url, "formats": ["json", "links"], "jsonOptions": json_options, "waitFor": wait_for},
            label="firecrawl-scrape",
        )
        if not resp or not resp.is_success:
@ -74,9 +74,9 @@ class FirecrawlClient:
            headers=self._headers(),
            json_body={
                "url": url,
-                "formats": ["json", "links", "rawHtml"],
+                "formats": ["json", "links"],
                "jsonOptions": {
-                    "prompt": "Extract: 클리닉 이름 - clinicName (Korean), clinic name (English), address, phone with dash format, business hours, slogan, services offered, doctors with name/title/specialty, brand identity (primary/accent/background/text colors in hex, heading/body fonts, logo URL from the actual header/main <img> src, og:image from <meta property='og:image'> content, favicon URL)",
+                    "prompt": "Extract: clinic name (Korean), clinic name (English), address, phone with dash format, business hours, slogan, services offered, doctors with name/title/specialty, brand identity (primary/accent/background/text colors in hex, heading/body fonts, logo URL from the actual header/main <img> src, og:image from <meta property='og:image'> content, favicon URL)",
                    "schema": {
                        "type": "object",
                        "properties": {
@ -127,11 +127,8 @@ class FirecrawlClient:
                    },
                },
                "waitFor": 5000,
                "maxAge": 0,
                "proxy": "auto",     # 기본 엔진이 차단되는 사이트(예: viewclinic.com)는 자동으로 stealth 프록시로 재시도.
                "timeout": 120000,   # proxy:auto면 60s로도 충분했지만(실측), 혹시 모르니 여유있게 120s.
            },
-            timeout=150,  # 위 Firecrawl 잡 타임아웃보다 길어야 우리 쪽 HTTP 클라이언트가 먼저 끊지 않음.
+            timeout=60,
            label="firecrawl-clinic-info",
        )
        if not resp or not resp.is_success:
@ -150,7 +147,6 @@ class FirecrawlClient:
            # "socialMedia": info.get("socialMedia", {}),
            "branding":      info.get("branding", {}),
            "siteLinks":     data.get("links", []),
            "html":          data.get("rawHtml", "") or data.get("html", ""),  # rawHtml = 가공 전 원본 — <script> 등 보존.
            "sourceUrl":     url,
        }
@ -190,7 +186,6 @@ class FirecrawlClient:
                    },
                },
                "waitFor": 5000,
                "maxAge": 0,
            },
            timeout=60,
            label="firecrawl-gangnamunni",
--- a/app/integrations/llm/gemini_vision.py
+++ b/app/integrations/llm/gemini_vision.py
@ -1,329 +0,0 @@
 """Gemini Vision — 로고/브랜드 비주얼 자동 분석 (OpenAI 호환 모드).
 정확한 hex 색상은 color_extractor가 CSS에서 직접 뽑음 (Vision은 근사값밖에 못 냄).
 Vision은 사람이 봐야 알 수 있는 정성 정보 — 심볼 형태/워드마크/톤 — 를 담당.
 """
 import asyncio
 import base64
 import json
 import logging
 import re
 import ssl
 import httpx
 import resvg_py
 from openai import AsyncOpenAI
 logger = logging.getLogger(__name__)
 DEFAULT_MODEL = "gemini-2.5-flash"
 class VisionClient:
    """Gemini Vision을 OpenAI 호환 endpoint로 호출. GEMINI_API_KEY만 필요."""
    def __init__(self, api_key: str, model: str = DEFAULT_MODEL, timeout: float = 30.0, max_retries: int = 2):
        self.client = AsyncOpenAI(
            api_key=api_key,
            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
            timeout=timeout,
            max_retries=max_retries,
        )
        self.model = model
    @staticmethod
    def _extract_json(text: str) -> dict | None:
        if not text:
            return None
        m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
        if m:
            try:
                return json.loads(m.group(1))
            except json.JSONDecodeError:
                pass
        m = re.search(r"\{.*\}", text, re.DOTALL)
        if m:
            try:
                return json.loads(m.group(0))
            except json.JSONDecodeError:
                return None
        return None
    @staticmethod
    async def _fetch_as_data_url(url: str) -> str | None:
        """Gemini는 URL 직접 fetch가 막힌 호스트가 많아 base64 인라인으로 변환.
        + 'image does not exist' 같은 placeholder 이미지 거부 (작은 bytes / 잘못된 content-type).
        + 한국 의료 사이트 중 SSL이 약해서 표준 검증에 실패하는 곳 대응 (3단 SSL fallback)."""
        headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
        def _weak_ctx() -> ssl.SSLContext:
            ctx = ssl.create_default_context()
            try:
                ctx.set_ciphers("DEFAULT@SECLEVEL=1")
            except ssl.SSLError:
                pass
            return ctx
        last_err: Exception | None = None
        for verify in (True, _weak_ctx(), False):
            try:
                async with httpx.AsyncClient(
                    timeout=15.0, follow_redirects=True, headers=headers, verify=verify,
                ) as c:
                    resp = await c.get(url)
                if resp.status_code != 200:
                    logger.warning("[vision] fetch %s status=%s", url, resp.status_code)
                    return None
                mime = resp.headers.get("content-type", "").split(";")[0].strip()
                # 실제 이미지가 아니면 거부 (HTML 페이지가 404 대신 200으로 리다이렉트 되는 경우)
                if not mime.startswith("image/"):
                    logger.warning("[vision] %s not an image (content-type=%s)", url, mime)
                    return None
                # SVG는 Gemini가 못 보므로 즉시 PNG로 래스터화 (resvg, in-memory ~1ms)
                content = resp.content
                if mime == "image/svg+xml" or url.lower().split("?")[0].endswith(".svg"):
                    try:
                        content = bytes(resvg_py.svg_to_bytes(svg_string=resp.text))
                        mime = "image/png"
                    except Exception as e:
                        logger.warning("[vision] svg rasterize failed %s: %s", url, e)
                        return None
                size = len(content)
                if size < 500:
                    logger.warning("[vision] %s too small (%d bytes) — likely placeholder", url, size)
                    return None
                b64 = base64.b64encode(content).decode("ascii")
                return f"data:{mime};base64,{b64}"
            except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
                last_err = e
                continue
            except Exception as e:
                logger.warning("[vision] fetch error %s: %s", url, e)
                return None
        logger.warning("[vision] fetch %s SSL fallback all failed: %s", url, last_err)
        return None
    async def _ask(self, image_urls: list[str], prompt: str, max_tokens: int = 4000) -> dict | None:
        content: list[dict] = []
        for u in image_urls:
            if not u:
                continue
            data_url = await self._fetch_as_data_url(u)
            if not data_url:
                continue
            content.append({"type": "image_url", "image_url": {"url": data_url}})
        if not any(c.get("type") == "image_url" for c in content):
            logger.warning("[vision] no images could be fetched")
            return None
        content.append({"type": "text", "text": prompt})
        try:
            resp = await self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": content}],
                max_tokens=max_tokens,
            )
            choice = resp.choices[0]
            if choice.finish_reason != "stop":
                logger.warning("[vision] unexpected finish_reason=%s", choice.finish_reason)
            return self._extract_json(choice.message.content or "")
        except Exception as e:
            logger.warning("[vision] error: %s", e)
            return None
    async def describe_svg_text(self, svg_url: str) -> dict | None:
        """SVG는 Gemini Vision이 못 보지만 XML 텍스트 자체는 LLM이 읽을 수 있음.
        SVG 소스를 받아 그대로 text endpoint에 던지고 색·심볼·텍스트를 추론하게 함.
        analyze_brand_assets와 동일한 스키마(logo_description/style/has_symbol/...) 반환."""
        headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
        def _weak_ctx() -> ssl.SSLContext:
            ctx = ssl.create_default_context()
            try:
                ctx.set_ciphers("DEFAULT@SECLEVEL=1")
            except ssl.SSLError:
                pass
            return ctx
        svg_text: str | None = None
        for verify in (True, _weak_ctx(), False):
            try:
                async with httpx.AsyncClient(
                    timeout=15.0, follow_redirects=True, headers=headers, verify=verify,
                ) as c:
                    resp = await c.get(svg_url)
                if resp.status_code == 200:
                    svg_text = resp.text
                break
            except (httpx.ConnectError, httpx.ReadError, ssl.SSLError):
                continue
            except Exception as e:
                logger.warning("[vision] svg fetch error %s: %s", svg_url, e)
                return None
        if not svg_text:
            logger.warning("[vision] svg fetch failed %s", svg_url)
            return None
        # 페이로드 폭주 방지 — 평범한 로고 SVG는 수 KB 수준
        if len(svg_text) > 60000:
            svg_text = svg_text[:60000]
        prompt = (
            "아래는 병원 로고 SVG 소스 코드입니다. SVG 마크업(path/circle/text/fill/stroke 등)을 "
            "읽고 로고의 시각적 특징을 추론해 아래 JSON 스키마로만 응답하세요. 코드펜스 없이 순수 JSON.\n"
            "{\n"
            '  "logo_description": "심볼 형태 + 워드마크 + 톤을 1~2문장 한국어로",\n'
            '  "logo_style": "minimal | illustrative | typographic | abstract 중 하나",\n'
            '  "has_symbol": "심볼/아이콘이 있으면 true, 글자만 있으면 false (boolean)",\n'
            '  "logo_symbol": "심볼 묘사 (예: \'잎사귀\'). 없으면 빈 문자열",\n'
            '  "logo_text": "워드마크 텍스트 그대로. <text> 태그 내용 우선",\n'
            '  "logo_colors_desc": "쓰인 색감을 사람이 부르는 이름으로 (예: \'딥네이비 + 골드\'). hex 출력 금지"\n'
            "}\n"
            "주의: hex 값이나 URL은 출력하지 마세요 (별도 추출 로직 처리). 모든 텍스트는 한국어로.\n\n"
            "SVG 소스:\n"
            f"{svg_text}"
        )
        try:
            resp = await self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=8000,  # Gemini 2.5는 thinking 토큰을 max_tokens에서 차감하므로 여유 필요
            )
            choice = resp.choices[0]
            if choice.finish_reason != "stop":
                logger.warning("[vision] svg describe finish_reason=%s", choice.finish_reason)
            result = self._extract_json(choice.message.content or "")
        except Exception as e:
            logger.warning("[vision] svg describe error: %s", e)
            return None
        if not result:
            return None
        result["logo_images"] = {"circle": None, "horizontal": svg_url, "korean": None}
        return result
    async def analyze_brand_assets(
        self,
        logo_url: str | None,
        homepage_url: str | None,
        additional_images: list[str] | None = None,
    ) -> dict:
        """로고 이미지를 보고 정성 분석. 정확한 hex는 color_extractor가 따로 처리하므로 여기선 안 뽑음."""
        urls = [u for u in [logo_url] + list(additional_images or []) if u]
        if not urls:
            return {}
        prompt = (
            "당신은 브랜드 로고 시각 분석가입니다. 첨부된 이미지(첫 번째가 병원의 대표 로고)를 보고 "
            "아래 JSON 스키마로만 응답하세요. 코드펜스 없이 순수 JSON만 출력.\n"
            "{\n"
            '  "logo_description": "로고를 1~2문장으로 설명 (심볼 형태 + 워드마크 + 전반적 톤). 예: \'둥근 잎사귀를 감싼 추상 심볼에 세리프 한글 워드마크, 차분하고 고급스러운 톤\'",\n'
            '  "logo_style": "minimal | illustrative | typographic | abstract 중 하나",\n'
            '  "has_symbol": "심볼/아이콘이 있으면 true, 글자만 있으면 false (boolean)",\n'
            '  "logo_symbol": "심볼이 묘사하는 대상 (예: \'잎사귀\', \'추상 곡선\'). 없으면 빈 문자열",\n'
            '  "logo_text": "로고에 보이는 워드마크 텍스트 그대로 (한글/영문). 없으면 빈 문자열",\n'
            '  "logo_colors_desc": "로고에 쓰인 색감을 사람이 부르는 이름으로 서술 (예: \'딥네이비 + 골드\')",\n'
            '  "logo_colors_hex": ["로고에서 시각적으로 가장 두드러진 색 최대 2개의 hex 근사값 배열. 예: [\'#1A2B3C\', \'#D4A017\']. 색이 1개면 1개만, 강한 색이 1개도 없으면 빈 배열."]\n'
            "}\n"
            "주의: logo_colors_hex 는 시각 추정이라 정확도 떨어질 수 있음. CSS 추출이 우선이고 이건 fallback/보완 용.\n"
            "모든 설명/텍스트 값은 반드시 한국어로 작성하세요 (영어 금지)."
        )
        result = await self._ask(urls, prompt)
        if not result:
            return {}
        # logo_images는 우리가 직접 채움 (Vision은 묘사만)
        result["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
        # logo_colors_hex 최대 2개로 제한.
        hex_list = [h for h in (result.get("logo_colors_hex") or []) if isinstance(h, str) and h.startswith("#")]
        result["logo_colors_hex"] = hex_list[:2]
        return result
    async def describe_channel_logos(
        self,
        official_logo_url: str | None,
        channel_logos: list[dict],
    ) -> dict | None:
        """채널별 프로필 이미지(로고)를 보고 각각 설명 + 공식 로고와 일치 여부 평가.
        channel_logos: [{"channel": "Instagram", "url": "..."}, ...]
        반환: {"channel_logos": [{"channel","logo_description","is_official"}], "inconsistency_summary", "recommendation"}
        **3채널씩 묶어 병렬 호출** (한 번에 다 묶으면 LLM이 채널-이미지 매칭 헷갈려 같은 묘사를
        여러 채널에 복사하는 문제 — VIEW 한국페북·영문인스타가 둘 다 "공식 로고" 묘사로 잘못
        박혔던 케이스 — 가 있어서 분리. 1채널씩 N번보다 가성비 좋음)."""
        items = [c for c in channel_logos if c.get("url")]
        if not items:
            return None
        CHUNK = 3
        async def _chunk(batch: list[dict]) -> list[dict]:
            urls = [official_logo_url] + [c["url"] for c in batch] if official_logo_url else [c["url"] for c in batch]
            n = len(batch)
            # 이미지 번호 ↔ 채널 매핑 명시
            if official_logo_url:
                mapping = "이미지 1 = 공식 로고\n" + "\n".join(
                    f"이미지 {i+2} = {c.get('channel','?')} 채널 프로필" for i, c in enumerate(batch)
                )
                instruction = (
                    f"{mapping}\n\n"
                    f"이미지 2~{n+1}(채널 프로필 {n}개)을 각각 **그 이미지에 실제로 보이는 그대로** "
                    "한국어 1문장으로 묘사하세요 (색·형태·텍스트·배경 그대로).\n"
                    "❗ 공식 로고(이미지 1) 묘사를 절대 복사하지 마세요. 각 채널 이미지에 보이는 실제 특징만.\n"
                    "각 채널이 공식 로고와 시각적으로 거의 동일하면 is_official=true, "
                    "심볼/색/배경/텍스트가 다르거나 모델 사진이면 false.\n"
                )
            else:
                mapping = "\n".join(f"이미지 {i+1} = {c.get('channel','?')} 채널 프로필" for i, c in enumerate(batch))
                instruction = (
                    f"{mapping}\n\n"
                    f"각 이미지를 보이는 그대로 한국어 1문장으로 묘사 (색·형태·텍스트·배경).\n"
                )
            schema_lines = ",\n".join(
                f'    {{"channel": "{c.get("channel","?")}", "logo_description": "...", "is_official": true}}'
                for c in batch
            )
            p = (
                instruction
                + "\n아래 JSON으로만 응답 (코드펜스 없이, 순수 JSON):\n{\n"
                + f'  "channel_logos": [\n{schema_lines}\n  ]\n'
                + "}\n"
                + f"channel 필드는 위 매핑 그대로 ({', '.join(c.get('channel','?') for c in batch)}). "
                + "logo_description은 반드시 한국어 (영어 금지)."
            )
            r = await self._ask(urls, p)
            if not r:
                return []
            out = []
            for c in r.get("channel_logos", []):
                out.append({
                    "channel": c.get("channel", ""),
                    "logo_description": c.get("logo_description", ""),
                    "is_official": bool(c.get("is_official", False)) if official_logo_url else None,
                })
            return out
        # 3개씩 청크 → 병렬
        chunks = [items[i:i+CHUNK] for i in range(0, len(items), CHUNK)]
        results = await asyncio.gather(*[_chunk(b) for b in chunks], return_exceptions=True)
        channel_logos_out: list[dict] = []
        for r in results:
            if isinstance(r, Exception):
                logger.warning("[vision] channel_logo chunk error: %s", r)
                continue
            channel_logos_out.extend(r)
        if not channel_logos_out:
            return None
        # 일관성 요약 + 권고는 결정적 산출 (LLM 한번 더 안 부름)
        if official_logo_url:
            mismatches = [c["channel"] for c in channel_logos_out if not c.get("is_official")]
            if not mismatches:
                summary = "모든 채널이 공식 로고를 일관되게 사용하고 있습니다."
                rec = "현재 일관성 유지."
            else:
                summary = f"{len(mismatches)}개 채널({', '.join(mismatches)})이 공식 로고와 다른 이미지를 사용해 브랜드 일관성이 부족합니다."
                rec = "비공식 채널 프로필을 공식 로고로 통일 권고."
        else:
            summary, rec = "", ""
        return {
            "channel_logos": channel_logos_out,
            "inconsistency_summary": summary,
            "recommendation": rec,
        }
--- a/app/integrations/llm/prompt.py
+++ b/app/integrations/llm/prompt.py
@ -1,19 +1,8 @@
 import os
 from pydantic import BaseModel
 from common.utils import get_env
-from integrations.llm.schemas.report import (
+from integrations.llm.schemas.report import ReportInput, ReportOutput
-    ReportInput, ReportOutput,
+from integrations.llm.schemas.plan import PlanInput, PlanOutput
    CriticalIssuesInput, CriticalIssuesOutput,
    YouTubeDiagnosisInput, YouTubeDiagnosisOutput,
    InstagramDiagnosisInput, InstagramDiagnosisOutput,
    FacebookDiagnosisInput, FacebookDiagnosisOutput,
    BrandConsistencyInput, BrandConsistencyOutput,
    TransformationInput, TransformationProposal,
    RoadmapInput, RoadmapOutput,
    ScoresInput, ScoresOutput,
    OtherChannelsInput, OtherChannelsOutput
 )
 from integrations.llm.schemas.plan import PlanInput, PlanOutput, SummarizeInput, SummarizeOutput
 from integrations.llm.schemas.market import (
    MarketCompetitorsInput, MarketCompetitorsOutput,
    MarketKeywordsInput, MarketKeywordsOutput,
@ -64,13 +53,6 @@ plan_prompt = Prompt(
    output_class=PlanOutput,
 )
 summarize_prompt = Prompt(
    file_name="summarize_prompt.txt",
    prompt_model="PLAN_MODEL",
    input_class=SummarizeInput,
    output_class=SummarizeOutput,
 )
 market_competitors_prompt = Prompt(
    file_name="market_competitors_prompt.txt",
    prompt_model="MARKET_MODEL",
@ -98,66 +80,3 @@ market_target_audience_prompt = Prompt(
    input_class=MarketTargetAudienceInput,
    output_class=MarketTargetAudienceOutput,
 )
 facebook_diagnosis_prompt = Prompt(
    file_name="facebook_diagnosis_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=FacebookDiagnosisInput,
    output_class=FacebookDiagnosisOutput,
 )
 instagram_diagnosis_prompt = Prompt(
    file_name="instagram_diagnosis_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=InstagramDiagnosisInput,
    output_class=InstagramDiagnosisOutput,
 )
 youtube_diagnosis_prompt = Prompt(
    file_name="youtube_diagnosis_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=YouTubeDiagnosisInput,
    output_class=YouTubeDiagnosisOutput,
 )
 brand_consistency_prompt = Prompt(
    file_name="brand_consistency_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=BrandConsistencyInput,
    output_class=BrandConsistencyOutput,
 )
 critical_issues_prompt = Prompt(
    file_name="critical_issues_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=CriticalIssuesInput,
    output_class=CriticalIssuesOutput,
 )
 transformation_prompt = Prompt(
    file_name="transformation_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=TransformationInput,
    output_class=TransformationProposal,
 )
 scores_prompt = Prompt(
    file_name="scores_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=ScoresInput,
    output_class=ScoresOutput,
 )
 roadmap_prompt = Prompt(
    file_name="roadmap_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=RoadmapInput,
    output_class=RoadmapOutput,
 )
 other_channels_prompt = Prompt(
    file_name="other_channels_prompt.txt",
    prompt_model="REPORT_MODEL",
    input_class=OtherChannelsInput,
    output_class=OtherChannelsOutput,
 )
--- a/app/integrations/llm/schemas/plan.py
+++ b/app/integrations/llm/schemas/plan.py
@ -2,15 +2,6 @@ from typing import Literal
 from pydantic import BaseModel
 class SummarizeInput(BaseModel):
    label: str
    data: str
 class SummarizeOutput(BaseModel):
    summary: str
 class PlanInput(BaseModel):
    clinic_name: str | None = None
    clinic_name_en: str | None = None
@ -25,11 +16,8 @@ class PlanInput(BaseModel):
    market_trend: str | None = None
    market_target_audience: str | None = None
    tiktok: str | None = None
-    instagram: str | None = None
+    instagram_en: str | None = None
-    facebook: str | None = None
+    facebook_en: str | None = None
    naver_blog: str | None = None
    naver_cafe: str | None = None
    kakao_talk: str | None = None
    channel_logos: str | None = None
    brand_assets: str | None = None
@ -68,7 +56,7 @@ class ChannelBrandingRule(BaseModel):
    profile_photo: str
    banner_spec: str
    bio_template: str
-    current_status: Literal["correct", "incorrect", "N/A"]
+    current_status: Literal["correct", "incorrect", "missing"]
 class BrandPlanInconsistencyValue(BaseModel):
--- a/app/integrations/llm/schemas/report.py
+++ b/app/integrations/llm/schemas/report.py
@ -70,14 +70,20 @@ class RegistryData(BaseModel):
 class ClinicSnapshot(BaseModel):
    name: str
    name_en: str
-    staff_count: int | None = None
+    established: str
-    lead_doctor: LeadDoctor | None = None
+    years_in_business: int
-    overall_rating: float | None = None
+    staff_count: int
-    total_reviews: int | None = None
+    lead_doctor: LeadDoctor
-    certifications: list[str] = []
+    overall_rating: float
    total_reviews: int
    price_range: PriceRange
    certifications: list[str]
    media_appearances: list[str]
    medical_tourism: list[str]
    location: str
    nearest_station: str
    phone: str
-    domain: str | None = None
+    domain: str
    logo_images: LogoImages | None = None
    brand_colors: BrandColors | None = None
    source: DataSource | None = None
@ -131,10 +137,11 @@ class YouTubeAudit(BaseModel):
    avg_video_length: str
    upload_frequency: str
    channel_created_date: str
    subscriber_rank: str
    channel_description: str
    linked_urls: list[LinkedUrl]
    playlists: list[str]
-    top_videos: list[TopVideo] = []
+    top_videos: list[TopVideo]
    diagnosis: list[DiagnosisItem]
@ -184,24 +191,24 @@ class FacebookPage(BaseModel):
    followers: int
    following: int
    category: str
-    bio: str = ""
+    bio: str
    logo: str
    logo_description: str
    link: str
-    linked_domain: str = ""
+    linked_domain: str
    reviews: int
    recent_post_age: str
-    has_whatsapp: bool | None = None
+    has_whatsapp: bool
-    post_frequency: str
+    post_frequency: str | None = None
    top_content_type: str | None = None
-    engagement: str
+    engagement: str | None = None
 class FacebookAudit(BaseModel):
-    pages: list[FacebookPage] = []
+    pages: list[FacebookPage]
-    diagnosis: list[DiagnosisItem] = []
+    diagnosis: list[DiagnosisItem]
-    brand_inconsistencies: list[BrandInconsistency] = []
+    brand_inconsistencies: list[BrandInconsistency]
-    consolidation_recommendation: str | None = None
+    consolidation_recommendation: str
 # --- 기타 채널 / 웹사이트 ---
@ -213,19 +220,6 @@ class OtherChannel(BaseModel):
    url: str | None = None
 class OtherChannelsInput(BaseModel):
    clinic_name:  str
    tiktok:       str | None = None
    kakao_talk:   str | None = None
    naver_cafe:   str | None = None
    naver_blog:   str | None = None
    gangnam_unni: str | None = None
 class OtherChannelsOutput(BaseModel):
    other_channels: list[OtherChannel]
 class TrackingPixel(BaseModel):
    name: str
    installed: bool
@ -245,7 +239,7 @@ class AdditionalDomain(BaseModel):
 class WebsiteAudit(BaseModel):
    primary_domain: str
-    additional_domains: list[AdditionalDomain] = []
+    additional_domains: list[AdditionalDomain]
    sns_links_on_site: bool
    sns_links_detail: list[SnsLink] | None = None
    tracking_pixels: list[TrackingPixel]
@ -332,8 +326,6 @@ class ReportInput(BaseModel):
    tiktok: str | None = None
    instagram_en: str | None = None
    facebook_en: str | None = None
    kakao_talk: str | None = None
    naver_cafe: str | None = None
    channel_logos: str | None = None
@ -359,92 +351,3 @@ class MarketingReport(BaseModel):
 ReportOutput = MarketingReport
 # --- YouTubeDiagnosis ---
 class YouTubeDiagnosisInput(BaseModel):
    channel_name: str | None = None
    subscribers: int | None = None
    total_videos: int | None = None
    total_views: int | None = None
    avg_video_length: str | None = None
    upload_frequency: str | None = None
    top_videos: str | None = None
    playlists: str | None = None
 class YouTubeDiagnosisOutput(BaseModel):
    diagnosis: list[DiagnosisItem]
 class InstagramDiagnosisInput(BaseModel):
    accounts: str | None = None
 class InstagramDiagnosisOutput(BaseModel):
    diagnosis: list[DiagnosisItem]
 class FacebookDiagnosisInput(BaseModel):
    pages: str | None = None
 class FacebookDiagnosisOutput(BaseModel):
    diagnosis: list[DiagnosisItem]
 # --- Scores ---
 class ScoresInput(BaseModel):
    clinic_name: str | None = None
    data: str | None = None
 class ScoresOutput(BaseModel):
    overall_score: int
    channel_scores: list[ChannelScore]
 # --- Diagnosis ---
 class CriticalIssuesInput(BaseModel):
    clinic_name: str | None = None
    data: str | None = None
 class CriticalIssuesOutput(BaseModel):
    diagnosis: list[DiagnosisItem]
 # --- Roadmap ---
 class RoadmapInput(BaseModel):
    clinic_name: str | None = None
    data: str | None = None
 class RoadmapOutput(BaseModel):
    roadmap: list[RoadmapMonth]
 # --- Transformation ---
 class TransformationInput(BaseModel):
    clinic_name: str | None = None
    data: str | None = None
 # --- BrandConsistency ---
 class BrandConsistencyInput(BaseModel):
    clinic_name: str | None = None
    mainpage: str | None = None
    instagram: str | None = None
    facebook: str | None = None
    youtube: str | None = None
    gangnam_unni: str | None = None
 class BrandConsistencyOutput(BaseModel):
    brand_inconsistencies: list[BrandInconsistency]
--- a/app/integrations/llm/temp-prompt/brand_consistency_prompt.txt
+++ b/app/integrations/llm/temp-prompt/brand_consistency_prompt.txt
@ -1,18 +0,0 @@
 다음은 성형외과/피부과 {clinic_name} 의 채널별 브랜드 데이터입니다.
 공식 홈페이지: {mainpage}
 인스타그램: {instagram}
 페이스북: {facebook}
 유튜브: {youtube}
 강남언니: {gangnam_unni}
 위 채널들 간의 브랜드 불일치 항목을 분석해줘.
 비교 대상 필드 예시: 병원명(한글/영문), 전화번호, 주소, 로고, 슬로건, 소개 문구 등.
 각 항목은 다음 JSON 형식의 배열로 출력해줘:
 - field: 불일치 필드명
 - values: 채널별 실제 값 목록 (channel, value, is_correct)
 - impact: 불일치가 브랜드에 미치는 영향
 - recommendation: 개선 권고사항
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/critical_issues_prompt.txt
+++ b/app/integrations/llm/temp-prompt/critical_issues_prompt.txt
@ -1,13 +0,0 @@
 다음은 성형외과/피부과 {clinic_name} 의 전 채널 수집 데이터입니다.
 {data}
 위 데이터를 바탕으로 이 병원의 마케팅 전반에 걸친 핵심 문제점과 개선사항을 진단해줘.
 각 항목은 category(진단 카테고리), detail(상세 설명), severity(critical/warning/info) 형식의 JSON 배열로 출력해줘.
 현재 주요 진단 카테고리는 3개야.
 브랜드 아이덴티티 파편화
 콘텐츠 전략 부재
 플랫폼 간 유입 단절
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/facebook_diagnosis_prompt.txt
+++ b/app/integrations/llm/temp-prompt/facebook_diagnosis_prompt.txt
@ -1,8 +0,0 @@
 다음은 성형외과/피부과의 페이스북 페이지 데이터입니다.
 {pages}
 위 데이터를 바탕으로 이 병원의 페이스북 마케팅 현황을 진단해줘.
 각 항목은 category(진단 카테고리), detail(상세 설명), severity(critical/warning/info) 형식의 JSON 배열로 출력해줘.
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/instagram_diagnosis_prompt.txt
+++ b/app/integrations/llm/temp-prompt/instagram_diagnosis_prompt.txt
@ -1,8 +0,0 @@
 다음은 성형외과/피부과의 인스타그램 계정 데이터입니다.
 {accounts}
 위 데이터를 바탕으로 이 병원의 인스타그램 마케팅 현황을 진단해줘.
 각 항목은 category(진단 카테고리), detail(상세 설명), severity(critical/warning/info) 형식의 JSON 배열로 출력해줘.
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/other_channels_prompt.txt
+++ b/app/integrations/llm/temp-prompt/other_channels_prompt.txt
@ -1,45 +0,0 @@
 당신은 의료 마케팅 분석가입니다. 아래 부가 채널 데이터를 보고 `other_channels` 리스트만 JSON 으로 생성하세요.
 결과물은 한국어로 작성하세요.
 ## 병원
 - 병원명: {clinic_name}
 ## 부가 채널 raw 데이터
 ### TikTok
 {tiktok}
 ### KakaoTalk
 {kakao_talk}
 ### Naver Cafe
 {naver_cafe}
 ### Naver Blog
 {naver_blog}
 ### Gangnam Unni
 {gangnam_unni}
 ## 작성 지침
 - 메인 audit(YouTube/Instagram KR/Facebook KR/Website)에 **포함되지 않은** 채널만 넣으세요.
 - 위 채널 데이터에 **실제 값이 있는 채널만** status=active 와 실제 URL 로 일관되게 포함:
 - **카카오톡·네이버 카페**: {kakao_talk} 또는 {naver_cafe} 에 url 이 있으면 each "KakaoTalk" / "Naver Cafe" 로 status=active + 해당 url 로 포함. 수집된 콘텐츠 데이터는 없으므로 URL 존재 자체가 활성 채널 신호. **둘 다 null/빈 값이면 절대 만들지 마세요.**
 - **그 외 데이터 없는 채널(네이버플레이스/Threads 등)은 절대 임의로 만들지 마세요.** 데이터 없으면 그 채널은 생략 (랜덤 생성·추측 금지).
 - url 은 위 raw 데이터의 **실제 URL 만** 사용. 없으면 빈 문자열.
 - **URL 에 'https://www.facebook.com/' 같은 prefix 를 절대 직접 만들지 마세요.** 받은 데이터 URL = 출력 URL. 이미 'https://...' 가 붙은 URL 에 또 prefix 붙이면 깨집니다.
 - `details` 는 해당 채널의 짧은 1줄 묘사 (예: "팔로워 1.2만, 주 2회 게시", "오픈채팅 상담", "환자 후기 게시판"). 데이터 없으면 빈 문자열.
 ## 출력 형식
 ```json
 {{
  "other_channels": [
    {{"name": "Naver Blog", "status": "active", "details": "...", "url": "https://..."}},
    ...
  ]
 }}
 ```
 `status` 는 active / inactive / unknown / not_found 중 하나. 데이터·URL 모두 없으면 그 채널 자체를 생략하세요 (null 항목 만들지 말 것).
--- a/app/integrations/llm/temp-prompt/plan_prompt.txt
+++ b/app/integrations/llm/temp-prompt/plan_prompt.txt
@ -32,40 +32,22 @@
 ## 분석 리포트
 {report}
-## 추가 채널 데이터 (네이버 블로그 / 틱톡 / 인스타그램 EN / 페이스북 EN / 네이버 카페 / 카카오톡)
+## 추가 채널 데이터 (틱톡 / 인스타그램 EN / 페이스북 EN)
-아래에 데이터가 있는 채널은 channelStrategies에 **반드시 포함**하세요 (네이버 블로그, 틱톡, 인스타그램, 페이스북, 네이버 카페, 카카오톡). channelBranding은 SNS·블로그·카페까지만 포함(카카오톡은 메신저라 제외). null이면 제외.
+아래에 데이터가 있는 채널은 channelStrategies와 channelBranding에 **반드시 포함**하세요 (틱톡, 영문 인스타그램, 영문 페이스북). null이면 제외.
 ### 네이버 블로그 (Naver Blog)
 {naver_blog}
 ### 틱톡 (TikTok)
 {tiktok}
-### 인스타그램
+### 인스타그램 (영문 계정)
-{instagram}
+{instagram_en}
-### 페이스북
+### 페이스북 (영문 페이지)
-{facebook}
+{facebook_en}
 ### 네이버 카페 (공식 카페 운영 신호)
 {naver_cafe}
 - naver_cafe.cafeName: 카페명, naver_cafe.memberCount: 회원수
 - currentStatus는 "회원 N명" 형태로 간단하게. 게시글 수·최근 활동은 수집 불가 (추측 금지).
 - targetGoal은 회원 확보 목표 수치 + 운영 권장 (예: "회원 5,000명, 주 1~2회 공지 발행").
 ### 카카오톡 채널 (URL only — 콘텐츠 수집 X, 존재 여부만)
 {kakao_talk}
 - channelStrategies 카드 하나로 포함. currentStatus는 "공식 카카오톡 채널 운영 중" 정도, targetGoal은 친구 추가 유도·상담 전환·자동응답 시나리오 구체화 등.
 ## 채널별 로고 분석 (Gemini Vision) — 채널룰/일관성의 근거
 {channel_logos}
 - 위 channel_logos[]의 각 항목: channel(채널명), logo_description(프로필이 어떻게 생겼는지), is_official(공식 로고와 일치 여부).
- **channelBranding[]은 "어떻게 해야 하는지 권장 가이드라인" 섹션입니다.** 채널 통일 전략 기준으로 권장값 박을 것:
+- **channelBranding[]를 이 데이터로 채우세요**: 채널별로 profilePhoto=해당 채널의 logo_description, currentStatus=is_official이 true면 "correct" / false면 "incorrect" (데이터 없는 채널은 "missing"). bannerSpec은 권장 배너 규격(크기/디자인)을 작성.
  - profilePhoto: **빈 문자열 ""로 두세요.** 시스템이 brand_assets.logo_description으로 직접 채우므로 LLM은 만들지 마세요.
  - bannerSpec: 권장 배너 규격 (크기·디자인 가이드)
  - bioTemplate: 권장 bio 템플릿 (구조·필수 요소·예약 링크 포함 여부)
  - currentStatus: is_official=true면 "correct" / false면 "incorrect" (데이터 없는 채널은 "N/A") — 현재 상태 마커는 이 필드 하나로만.
 - 현재 채널 프로필 이미지의 실제 묘사(channel_logos.channel_logos[].logo_description)는 brandInconsistencies에서만 사용. channelBranding에서 채널별로 다른 묘사를 박지 마세요.
 - **brandInconsistencies[]에 "로고" 항목을 반드시 만드세요**: values[]에 채널마다 channel(채널명) / value(logo_description 그대로) / is_correct(is_official 값) 세 필드를 넣고, impact는 inconsistency_summary, recommendation은 channel_logos.recommendation 기반으로 작성 (공식 로고로 통일 권고 포함).
 ## 브랜드 자산 (홈페이지 CSS에서 추출 — 결정적 데이터)
@ -87,10 +69,10 @@
 - brandInconsistencies: 채널 간 브랜딩 불일치 항목 및 개선 권고
 ### Section 2: channelStrategies
- 메인 SNS 채널(Instagram, Facebook, YouTube, TikTok, 네이버 블로그) + 영문 계정(Instagram EN, Facebook EN) + **네이버 카페 / 카카오톡** (URL 있을 때) 카드를 **모두 포함**. 데이터 없는 채널도 빠뜨리지 말 것.
+- 리포트에 데이터가 있는 채널만 포함
- **currentStatus**: 데이터 있는 채널은 실제 수치로 서술 (예: "14,047 팔로워, Reels 0개", "104K 구독자, 주 2~3회 업로드"). **데이터 없는 채널은 "계정 없음"** 으로 표시. `excellent`/`warning`/`good` 같은 등급·평가어 금지.
+- **currentStatus는 현재 채널 상태를 실제 수치로 서술** (예: "14,047 팔로워, Reels 0개", "104K 구독자, 주 2~3회 업로드"). `excellent`/`warning`/`good` 같은 등급·평가어를 절대 쓰지 마세요.
- **targetGoal은 모든 채널에 반드시 채울 것** — 구체적 목표 수치(예: "50K 팔로워, Reels 주 5개"). 데이터 없는 채널도 시작 시 권장 목표를 작성하고 비우지 말 것.
+- targetGoal은 구체적 목표 수치로 작성 (예: "50K 팔로워, Reels 주 5개")
- 각 채널의 우선순위(P0/P1/P2), 콘텐츠 유형, 게시 빈도, 포맷 가이드라인 모두 권장값으로 작성 — 데이터 없어도 시작 권장값으로 채울 것.
+- 각 채널의 우선순위(P0/P1/P2), 콘텐츠 유형, 게시 빈도, 포맷 가이드라인 작성
 - customerJourneyStage는 해당 채널의 주요 기여 단계로 설정
 ### Section 3: contentStrategy
--- a/app/integrations/llm/temp-prompt/report_prompt.txt
+++ b/app/integrations/llm/temp-prompt/report_prompt.txt
@ -63,43 +63,26 @@
 ### 페이스북 (영문 페이지)
 {facebook_en}
 ### 카카오톡 채널 (URL only — 수집 데이터 없음, 존재 여부만 확인)
 {kakao_talk}
 ### 네이버 카페 (공식 카페 운영 신호)
 {naver_cafe}
 - naver_cafe.cafeName: 카페명
 - naver_cafe.memberCount: 회원수
 - 게시글 총 수·최근 게시일은 로그인 필요라 수집 불가. 추측 금지. 위 두 값만 사용.
 ### 채널별 로고 분석 (Gemini Vision)
 {channel_logos}
 - channel_logos.channel_logos[]에 각 채널의 로고 설명(logo_description)과 공식 로고 일치 여부(is_official)가 있습니다.
 - **facebook_audit.pages[].logo** 는 짧은 판정 타이틀로: is_official=true면 `"일치 (공식 로고)"`, false면 `"불일치 (비공식 변형)"`. 그리고 **facebook_audit.pages[].logo_description** 에 해당 채널의 logo_description(설명문)을 넣으세요.
 - 위 값들은 channel_logos 데이터 기반으로만 작성하고 추측하지 마세요.
 - 채널 간 로고 불일치(is_official=false)는 brand 일관성 진단(problem_diagnosis/weaknesses)에 반영하세요.
 - **brand_inconsistencies[]에 "로고" 항목을 반드시 만드세요**: values[]에 channel_logos.channel_logos[] 각 채널마다 다음 3필드를 **그대로** 박을 것 — channel(채널명 그대로), value(해당 채널의 logo_description 문자열 그대로 복붙), is_correct(해당 채널의 is_official 값 그대로). ❗ **채널-묘사 매핑을 절대 swap·재해석·임의 변형 금지**. channel_logos에 적힌 그대로 사용. impact는 channel_logos.inconsistency_summary 사용, recommendation은 channel_logos.recommendation 사용.
 ## clinic_snapshot / 채널 audit 작성 지침 (수집 데이터 그대로, 추측 금지)
 - clinic_snapshot.name 은 {clinic_name} 을 **그대로** 사용 (강남언니 표기명 '-본원' 등으로 바꾸지 말 것).
 - clinic_snapshot 의 overall_rating/total_reviews/staff_count/location/certifications/lead_doctor 는 강남언니({gangnam_unni}) 데이터의 값을 그대로 사용.
 - **instagram_audit.accounts 는 반드시 빈 배열 []로 두세요.** 계정 정보는 시스템이 수집 데이터로 직접 채우니 LLM은 만들지 말고, instagram_audit.diagnosis(진단)만 작성하세요.
 - facebook_audit.pages: KR 페북({facebook})·영문 페북({facebook_en}) 데이터가 있으면 **각각 별도 페이지**로 넣고, url/page_name/followers 등은 그 데이터 그대로. language/label 동일 규칙.
 - facebook_audit.pages[].top_content_type 은 해당 페이지 latestPosts의 **캡션·미디어를 읽고** 주로 올리는 콘텐츠를 의미 기반으로 짧게 묘사하세요 (예: "Before/After 사진 + 환자 여정 Reels", "이벤트·프로모션 카드뉴스", "다국어 시술 소개"). 단순 "동영상/이미지 위주"가 아니라 **무슨 주제**인지 쓰세요. (recent_post_age·post_frequency·engagement 수치는 시스템이 덮어쓰니 대략 적어도 됩니다.)
 - 위 수치·URL·이름은 제공된 데이터에서 그대로 쓰고 절대 지어내지 마세요.
 ## 기타 채널 현황 (other_channels) 작성 지침
 - other_channels에는 메인 audit(YouTube/Instagram/Facebook/Website)에 **포함되지 않은** 채널만 넣으세요.
 - 위 '채널 데이터'에 **실제 수집된 데이터가 있는 채널만** status=active와 실제 url로 일관되게 포함: 네이버 블로그, 강남언니, 틱톡, 영문 인스타그램({instagram_en}), 영문 페이스북({facebook_en}).
 - **영문 인스타그램·영문 페이스북은 KR 메인 audit(Instagram/Facebook)과 별개 계정이므로, 데이터가 있으면 반드시 other_channels에 "Instagram EN" / "Facebook EN"으로 각각 포함하세요 (절대 누락 금지).**
- **카카오톡·네이버 카페**: {kakao_talk} 또는 {naver_cafe}에 url이 있으면 other_channels에 각각 "KakaoTalk" / "Naver Cafe"로 status=active + 해당 url로 포함. 수집된 콘텐츠 데이터는 없으므로 URL 존재 자체가 활성 채널 신호. **둘 다 null/빈 값이면 절대 만들지 마세요.**
+- **수집 데이터에 없는 채널(카카오톡/네이버플레이스/네이버카페/Threads 등)은 절대 임의로 만들지 마세요.** 데이터 없으면 그 채널은 생략 (랜덤 생성·추측 금지).
 - **그 외 데이터 없는 채널(네이버플레이스/Threads 등)은 절대 임의로 만들지 마세요.** 데이터 없으면 그 채널은 생략 (랜덤 생성·추측 금지).
 - url은 수집 데이터의 실제 URL만 사용. 없으면 빈 문자열.
 - **URL에 'https://www.facebook.com/' 같은 prefix를 절대 직접 만들지 마세요.** 수집 데이터의 URL을 그대로 사용. 이미 'https://...' 가 붙은 URL에 또 prefix 붙이면 'https://www.facebook.com/https://facebook.com/X' 같이 깨집니다. 받은 URL = 출력 URL.
 ## registry_data 작성 지침 (clinic_snapshot 안)
 - **registry_data.website_en / district / branches / brand_group / naver_place_url / gangnam_unni_url / google_maps_url 모두 제공된 데이터에 명시되지 않으면 반드시 null로 두세요.**
 - 영문 사이트 URL, 영문명, 지점 정보 같은 거 데이터에 없으면 **절대 추측하거나 그럴듯해 보이는 도메인을 지어내지 마세요** (예: 'thepsclinic.com', '*-eng.com' 같은 거).
 ## 분석 지침
@ -111,5 +94,5 @@
  - 데이터가 null인 계정은 항목을 만들지 마세요. icon은 instagram/facebook/video 등 플랫폼에 맞게 설정.
 - strengths와 weaknesses는 각 3개 이상 작성하세요.
 - roadmap은 우선순위 순으로 실행 가능한 액션으로 작성하세요.
- kpi_dashboard는 코드가 결정적으로 산출해 후처리 강제 치환하므로 LLM 출력 무시됩니다. 빈 배열 또는 placeholder로 두세요.
+- kpis는 실제 수집된 수치 기반으로 현실적인 측정 가능 지표로 작성하세요.
 - conversion_strategy의 actions는 구체적인 실행 방안으로 작성하세요.
--- a/app/integrations/llm/temp-prompt/roadmap_prompt.txt
+++ b/app/integrations/llm/temp-prompt/roadmap_prompt.txt
@ -1,14 +0,0 @@
 다음은 성형외과/피부과 {clinic_name} 의 전 채널 수집 데이터입니다.
 {data}
 위 데이터를 바탕으로 이 병원의 3개월 마케팅 실행 로드맵을 수립해줘.
 month 1, 2, 3 각각 하나씩, 총 3개 항목을 포함한 roadmap JSON 배열로 출력해줘.
 각 항목은 아래 형식을 따라줘:
 - month: 월 번호 (1, 2, 3)
 - title: 해당 월의 핵심 테마 (예: "브랜드 정비")
 - subtitle: 한 줄 부제 (예: "기반 구축 — 로고·계정 통일")
 - tasks: 실행 과제 목록, 각 과제는 task(string)와 completed(false)로 구성
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/scores_prompt.txt
+++ b/app/integrations/llm/temp-prompt/scores_prompt.txt
@ -1,16 +0,0 @@
 다음은 성형외과/피부과 {clinic_name} 의 전 채널 수집 데이터입니다.
 {data}
 위 데이터를 바탕으로 이 병원의 마케팅 종합 점수를 평가해줘.
 1. overall_score: 전체 마케팅 종합 점수 (0~100 정수)
 2. channel_scores: 채널별 점수 목록. 각 항목은 아래 형식:
   - channel: 채널명 (예: YouTube, Instagram, Facebook, 웹사이트 등)
   - icon: 채널 아이콘 식별자 (예: youtube, instagram, facebook, website)
   - score: 해당 채널 점수 (20 ~ 100)(정수)
   - max_score: 해당 채널 최대 점수 (정수)
   - status: 심각도 (critical / warning / info)
   - headline: 한 줄 평가 요약
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/summarize_prompt.txt
+++ b/app/integrations/llm/temp-prompt/summarize_prompt.txt
@ -1,7 +0,0 @@
 다음은 "{label}" 원본 데이터입니다.
 {data}
 위 데이터를 마케팅 플랜 생성에 필요한 핵심 정보만 남기고 간결하게 요약하세요.
 구체적인 수치·날짜·고유명사(채널명, 게시물 제목 등)는 그대로 보존하고, 중복되거나 플랜 작성에 불필요한 메타데이터는 제거하세요.
 요약문 하나의 문자열로만 출력하세요.
--- a/app/integrations/llm/temp-prompt/transformation_prompt.txt
+++ b/app/integrations/llm/temp-prompt/transformation_prompt.txt
@ -1,14 +0,0 @@
 다음은 성형외과/피부과 {clinic_name} 의 전 채널 수집 데이터입니다.
 {data}
 위 데이터를 바탕으로 이 병원의 마케팅 전환 전략을 수립해줘.
 아래 5개 항목을 포함한 JSON을 출력해줘.
 1. brand_identity: 브랜드 아이덴티티 개선 항목 (area, as_is, to_be)
 2. content_strategy: 콘텐츠 전략 개선 항목 (area, as_is, to_be)
 3. platform_strategies: 플랫폼별 전략 (platform, icon, current_metric, target_metric, strategies: 각 항목은 strategy와 detail 포함)
 4. website_improvements: 웹사이트 개선 항목 (area, as_is, to_be)
 5. new_channel_proposals: 신규 채널 제안 (channel, priority, rationale)
 출처 번호([1], [2] 등)는 포함하지 마.
--- a/app/integrations/llm/temp-prompt/youtube_diagnosis_prompt.txt
+++ b/app/integrations/llm/temp-prompt/youtube_diagnosis_prompt.txt
@ -1,24 +0,0 @@
 다음은 성형외과/피부과 유튜브 채널 데이터입니다.
 채널명: {channel_name}
 구독자 수: {subscribers}
 총 영상 수: {total_videos}
 총 조회수: {total_views}
 평균 영상 길이: {avg_video_length}
 업로드 주기: {upload_frequency}
 인기 영상 목록: {top_videos}
 플레이리스트: {playlists}
 위 데이터를 바탕으로 이 채널의 마케팅 문제점과 개선사항을 진단해줘.
 각 항목은 category(진단 카테고리), detail(상세 설명), severity(critical/warning/info) 형식의 JSON 배열로 출력해줘.
 진단 카테고리들은 다음과 같아. :
 구독자 대비 조회수 비율,
 최근 롱폼 조회수,
 Shorts 조회수,
 업로드 빈도,
 콘텐츠 톤앤매너,
 썸네일 디자인,
 최고 성과 Shorts
 출처 번호([1], [2] 등)는 굳이 포함하지 마.
--- a/app/integrations/naver.py
+++ b/app/integrations/naver.py
@ -1,5 +1,4 @@
 import re
 import httpx
 from http import HTTPMethod
 from urllib.parse import urlparse
 from common.utils import http_request
@ -65,20 +64,6 @@ class NaverClient:
            return None
        return resp.text
    async def fetch_blog_total_count(self, handle: str) -> int | None:
        """블로그 전체 글 수는 RSS에 없어서 PostList HTML에서 '554개의 글' 패턴 추출.
        <h4 class="category_title pcol2">... 554개의 글</h4> 구조."""
        resp = await http_request(
            HTTPMethod.GET,
            url=f"https://blog.naver.com/PostList.naver?blogId={handle}&from=postList&directAccess=true",
            timeout=15,
            label="naver-blog-postlist",
        )
        if not resp or not resp.is_success:
            return None
        m = re.search(r"(\d+)개의 글", resp.text)
        return int(m.group(1)) if m else None
    async def get_blog_rss(self, url: str) -> dict | None:
        blog_handle = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url
        xml = await self.fetch_blog_rss(blog_handle)
@ -97,71 +82,10 @@ class NaverClient:
                "postDate": date.group(1) if date else "",
                "description": re.sub(r"<[^>]*>", "", desc.group(1) if desc else "").strip()[:150],
            })
        # RSS의 totalCount 우선, 없으면 블로그 PostList 페이지에서 "N개의 글" 파싱, 그것도 없으면 RSS 글수
        total_match = re.search(r"<totalCount>(\d+)</totalCount>", xml)
        if total_match:
            total = int(total_match.group(1))
        else:
            total = await self.fetch_blog_total_count(blog_handle) or len(posts)
        return {
            "officialBlogUrl": f"https://blog.naver.com/{blog_handle}",
            "officialBlogHandle": blog_handle,
-            "totalResults": total,
+            "totalResults": int(total_match.group(1)) if total_match else len(posts),
            "posts": posts[:10],
        }
    async def get_cafe_info(self, cafe_url: str, *_args, **_kwargs) -> dict | None:
        """네이버 카페 운영 신호 수집. 2단계 fetch:
        1) https://cafe.naver.com/{handle} → cafeId 추출
        2) ArticleList.nhn?search.clubid={cafeId} → memberCount + cafeName 추출
        본문/게시글은 로그인 필요라 못 가져옴. 회원수·카페명만 잡히면 충분.
        common.http_request는 redirect 안 따라가서 카페 페이지에 안 맞아 httpx 직접 사용."""
        handle = urlparse(cafe_url).path.strip("/").split("/")[0] if "://" in cafe_url else cafe_url.split("/")[-1]
        if not handle:
            return None
        async with httpx.AsyncClient(
            timeout=10, follow_redirects=True,
            headers={"User-Agent": "Mozilla/5.0"},
        ) as c:
            # 1. cafeId 추출
            try:
                main = await c.get(f"https://cafe.naver.com/{handle}")
            except Exception:
                return {"url": f"https://cafe.naver.com/{handle}", "cafeHandle": handle, "accessible": False}
            if main.status_code != 200:
                return {"url": f"https://cafe.naver.com/{handle}", "cafeHandle": handle, "accessible": False}
            cid_match = re.search(r'cafeId["\']?\s*[:=]\s*["\']?(\d+)', main.text)
            cafe_id = cid_match.group(1) if cid_match else None
            result: dict = {
                "url": f"https://cafe.naver.com/{handle}",
                "cafeHandle": handle,
                "cafeId": cafe_id,
                "accessible": True,
                "cafeName": None,
                "memberCount": None,
            }
            if not cafe_id:
                return result
            # 2. ArticleList 페이지에서 회원수 + 카페명 추출 (로그인 없이 접근 가능한 유일한 endpoint)
            try:
                listing = await c.get(
                    f"https://cafe.naver.com/ArticleList.nhn?search.clubid={cafe_id}&search.menuid=&search.boardtype=L",
                    headers={"Referer": f"https://cafe.naver.com/{handle}"},
                )
            except Exception:
                return result
            if listing.status_code != 200:
                return result
            mc = re.search(r'memberCount[^0-9]+(\d[\d,]*)', listing.text)
            if mc:
                result["memberCount"] = int(mc.group(1).replace(",", ""))
            tm = re.search(r"<title>(.+?)\s*:\s*네이버 카페</title>", listing.text)
            if tm:
                name = re.sub(r"&amp;", "&", tm.group(1)).strip()
                if "," in name:
                    name = name.split(",", 1)[1].strip()
                result["cafeName"] = name
            return result
--- a/app/integrations/site_fetcher.py
+++ b/app/integrations/site_fetcher.py
@ -1,66 +0,0 @@
 """홈페이지 HTML + 외부 CSS 를 가져오는 fetch 전용 모듈.
 오래된 한국 의료 사이트들이 SSL DH_KEY_TOO_SMALL / cipher 약함 / host mismatch 등으로
 표준 fetch 에 차단되는 케이스가 많아 단계별 SSL fallback 으로 받는다.
 파싱·도메인 로직은 들어가지 않음 — 순수 HTTP 응답 본문 반환.
 """
 import logging
 import re
 import ssl
 from urllib.parse import urljoin
 import httpx
 logger = logging.getLogger(__name__)
 CSS_LINK = re.compile(
    r'<link[^>]+rel=["\']stylesheet["\'][^>]+href=["\']([^"\']+)["\']',
    re.IGNORECASE,
 )
 def _make_ssl_context() -> ssl.SSLContext:
    """보안 등급 1로 낮춤 + cert 검증 유지 (옛 한국 의료 사이트 cipher 약함 회피)."""
    ctx = ssl.create_default_context()
    try:
        ctx.set_ciphers("DEFAULT@SECLEVEL=1")
    except ssl.SSLError:
        pass
    return ctx
 async def fetch_html(url: str, timeout: float = 20.0) -> tuple[int, str]:
    """SSL 검증 단계별 fallback 으로 HTML 본문 받기. 실패 시 (0, "")."""
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers) as c:
            r = await c.get(url)
            return r.status_code, r.text
    except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
        logger.info("[fetch] %s standard SSL failed: %s — fallback to weak cipher", url, e)
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=_make_ssl_context()) as c:
            r = await c.get(url)
            return r.status_code, r.text
    except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
        logger.info("[fetch] %s weak cipher failed: %s — fallback to verify=False", url, e)
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=False) as c:
            r = await c.get(url)
            return r.status_code, r.text
    except Exception as e:
        logger.warning("[fetch] %s all fallbacks failed: %s", url, e)
        return 0, ""
 async def fetch_html_and_css(homepage_url: str, max_css_files: int = 8) -> tuple[str, list[str]]:
    """홈페이지 HTML + 외부 CSS(Top N) 한 번에 fetch. 실패 시 ("", [])."""
    status, html = await fetch_html(homepage_url)
    if status != 200 or not html:
        logger.warning("[fetch] homepage fetch failed status=%s url=%s", status, homepage_url)
        return "", []
    css_texts: list[str] = []
    for css_href in CSS_LINK.findall(html)[:max_css_files]:
        cstatus, ctext = await fetch_html(urljoin(homepage_url, css_href), timeout=15.0)
        if cstatus == 200 and ctext:
            css_texts.append(ctext)
    return html, css_texts
--- a/app/integrations/vision.py
+++ b/app/integrations/vision.py
@ -0,0 +1,173 @@
 """Gemini Vision — 로고/브랜드 비주얼 자동 분석 (OpenAI 호환 모드).
 정확한 hex 색상은 color_extractor가 CSS에서 직접 뽑음 (Vision은 근사값밖에 못 냄).
 Vision은 사람이 봐야 알 수 있는 정성 정보 — 심볼 형태/워드마크/톤 — 를 담당.
 """
 import base64
 import json
 import logging
 import re
 import httpx
 from openai import AsyncOpenAI
 logger = logging.getLogger(__name__)
 DEFAULT_MODEL = "gemini-2.5-flash"
 class VisionClient:
    """Gemini Vision을 OpenAI 호환 endpoint로 호출. GEMINI_API_KEY만 필요."""
    def __init__(self, api_key: str, model: str = DEFAULT_MODEL, timeout: float = 30.0, max_retries: int = 2):
        self.client = AsyncOpenAI(
            api_key=api_key,
            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
            timeout=timeout,
            max_retries=max_retries,
        )
        self.model = model
    @staticmethod
    def _extract_json(text: str) -> dict | None:
        if not text:
            return None
        m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
        if m:
            try:
                return json.loads(m.group(1))
            except json.JSONDecodeError:
                pass
        m = re.search(r"\{.*\}", text, re.DOTALL)
        if m:
            try:
                return json.loads(m.group(0))
            except json.JSONDecodeError:
                return None
        return None
    @staticmethod
    async def _fetch_as_data_url(url: str) -> str | None:
        """Gemini는 URL 직접 fetch가 막힌 호스트가 많아 base64 인라인으로 변환.
        + 'image does not exist' 같은 placeholder 이미지 거부 (작은 bytes / 잘못된 content-type)."""
        try:
            async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as c:
                resp = await c.get(url)
                if resp.status_code != 200:
                    logger.warning("[vision] fetch %s status=%s", url, resp.status_code)
                    return None
                mime = resp.headers.get("content-type", "").split(";")[0].strip()
                # 실제 이미지가 아니면 거부 (HTML 페이지가 404 대신 200으로 리다이렉트 되는 경우)
                if not mime.startswith("image/"):
                    logger.warning("[vision] %s not an image (content-type=%s)", url, mime)
                    return None
                size = len(resp.content)
                if size < 500:
                    logger.warning("[vision] %s too small (%d bytes) — likely placeholder", url, size)
                    return None
                b64 = base64.b64encode(resp.content).decode("ascii")
                return f"data:{mime};base64,{b64}"
        except Exception as e:
            logger.warning("[vision] fetch error %s: %s", url, e)
            return None
    async def _ask(self, image_urls: list[str], prompt: str, max_tokens: int = 4000) -> dict | None:
        content: list[dict] = []
        for u in image_urls:
            if not u:
                continue
            data_url = await self._fetch_as_data_url(u)
            if not data_url:
                continue
            content.append({"type": "image_url", "image_url": {"url": data_url}})
        if not any(c.get("type") == "image_url" for c in content):
            logger.warning("[vision] no images could be fetched")
            return None
        content.append({"type": "text", "text": prompt})
        try:
            resp = await self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": content}],
                max_tokens=max_tokens,
            )
            choice = resp.choices[0]
            if choice.finish_reason != "stop":
                logger.warning("[vision] unexpected finish_reason=%s", choice.finish_reason)
            return self._extract_json(choice.message.content or "")
        except Exception as e:
            logger.warning("[vision] error: %s", e)
            return None
    async def analyze_brand_assets(
        self,
        logo_url: str | None,
        homepage_url: str | None,
        additional_images: list[str] | None = None,
    ) -> dict:
        """로고 이미지를 보고 정성 분석. 정확한 hex는 color_extractor가 따로 처리하므로 여기선 안 뽑음."""
        urls = [u for u in [logo_url] + list(additional_images or []) if u]
        if not urls:
            return {}
        prompt = (
            "당신은 브랜드 로고 시각 분석가입니다. 첨부된 이미지(첫 번째가 병원의 대표 로고)를 보고 "
            "아래 JSON 스키마로만 응답하세요. 코드펜스 없이 순수 JSON만 출력.\n"
            "{\n"
            '  "logo_description": "로고를 1~2문장으로 설명 (심볼 형태 + 워드마크 + 전반적 톤). 예: \'둥근 잎사귀를 감싼 추상 심볼에 세리프 한글 워드마크, 차분하고 고급스러운 톤\'",\n'
            '  "logo_style": "minimal | illustrative | typographic | abstract 중 하나",\n'
            '  "has_symbol": "심볼/아이콘이 있으면 true, 글자만 있으면 false (boolean)",\n'
            '  "logo_symbol": "심볼이 묘사하는 대상 (예: \'잎사귀\', \'추상 곡선\'). 없으면 빈 문자열",\n'
            '  "logo_text": "로고에 보이는 워드마크 텍스트 그대로 (한글/영문). 없으면 빈 문자열",\n'
            '  "logo_colors_desc": "로고에 쓰인 색감을 사람이 부르는 이름으로 서술 (예: \'딥네이비 + 골드\'). 정확한 hex는 출력하지 말 것"\n'
            "}\n"
            "주의: 색상 hex 값이나 logo URL 같은 필드는 출력하지 마세요 (별도 추출 로직이 처리).\n"
            "모든 설명/텍스트 값은 반드시 한국어로 작성하세요 (영어 금지)."
        )
        result = await self._ask(urls, prompt)
        if not result:
            return {}
        # logo_images는 우리가 직접 채움 (Vision은 묘사만)
        result["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
        return result
    async def describe_channel_logos(
        self,
        official_logo_url: str | None,
        channel_logos: list[dict],
    ) -> dict | None:
        """채널별 프로필 이미지(로고)를 보고 각각 설명 + 공식 로고와 일치 여부 평가.
        channel_logos: [{"channel": "Instagram", "url": "..."}, ...]
        반환: {"channel_logos": [{"channel","logo_description","is_official"}], "inconsistency_summary", "recommendation"}"""
        items = [c for c in channel_logos if c.get("url")]
        if not items:
            return None
        # 공식 로고가 있으면 맨 앞에 두고 기준으로 삼음
        urls: list[str] = []
        if official_logo_url:
            urls.append(official_logo_url)
        urls.extend(c["url"] for c in items)
        channel_order = ", ".join(c.get("channel", "?") for c in items)
        if official_logo_url:
            header = (
                "첨부 이미지 중 **첫 번째가 이 병원의 공식 로고**입니다. "
                f"이어지는 이미지들은 채널별 프로필 이미지이며 순서는: {channel_order}.\n"
                "각 채널 로고를 1문장으로 설명하고, 공식 로고(첫 번째)와 일치하면 is_official=true, "
                "비공식 변형/모델사진/다른 이미지면 false로 평가하세요.\n"
            )
        else:
            header = (
                f"첨부 이미지는 한 병원의 채널별 프로필 이미지입니다. 순서: {channel_order}.\n"
                "각 채널 로고를 1문장으로 설명하세요 (공식 로고 기준이 없으므로 is_official은 판단 가능하면만).\n"
            )
        prompt = (
            header
            + "아래 JSON으로만 응답 (코드펜스 없이 순수 JSON):\n"
            "{\n"
            '  "channel_logos": [{"channel": "...", "logo_description": "...", "is_official": true}],\n'
            '  "inconsistency_summary": "채널 간 로고 일관성 1~2문장 요약",\n'
            '  "recommendation": "통합 권고 1문장"\n'
            "}\n"
            "모든 logo_description·inconsistency_summary·recommendation은 반드시 한국어로 작성하세요 (영어 금지)."
        )
        return await self._ask(urls, prompt)
--- a/app/integrations/youtube.py
+++ b/app/integrations/youtube.py
@ -48,7 +48,7 @@ class YouTubeClient:
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{YT}/channels",
-            params={"part": "snippet,statistics,contentDetails", "id": channel_id, "key": self.api_key},
+            params={"part": "snippet,statistics", "id": channel_id, "key": self.api_key},
            label="yt-channel",
        )
        if not resp or not resp.is_success:
@ -58,76 +58,28 @@ class YouTubeClient:
            return None
        channel = items[0]
-        async def _video_details(video_ids: list[str]) -> list[dict]:
+        video_ids: list[str] = []
-            """video_ids 순서를 보존한 채 snippet/statistics/contentDetails 채워서 반환."""
+        resp = await http_request(
-            if not video_ids:
+            HTTPMethod.GET,
-                return []
+            url=f"{YT}/search",
            params={"part": "snippet", "channelId": channel_id, "order": "viewCount", "type": "video", "maxResults": 10, "key": self.api_key},
            label="yt-search",
        )
        if resp and resp.is_success:
            video_ids = [i["id"]["videoId"] for i in resp.json().get("items", []) if i.get("id", {}).get("videoId")]
        videos: list[dict] = []
        if video_ids:
            resp = await http_request(
                HTTPMethod.GET,
                url=f"{YT}/videos",
                params={"part": "snippet,statistics,contentDetails", "id": ",".join(video_ids), "key": self.api_key},
                label="yt-videos",
            )
            if not resp or not resp.is_success:
                return []
            video_map = {v["id"]: v for v in resp.json().get("items", [])}
            return [video_map[vid] for vid in video_ids if vid in video_map]
        # 인기 영상 top 10 (조회수순) — search index 기반, 정확한 정렬 보장.
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{YT}/search",
            params={"part": "snippet", "channelId": channel_id, "order": "viewCount", "type": "video", "maxResults": 10, "key": self.api_key},
            label="yt-search-top",
        )
        top_ids: list[str] = []
            if resp and resp.is_success:
-            top_ids = [i["id"]["videoId"] for i in resp.json().get("items", []) if i.get("id", {}).get("videoId")]
+                videos = resp.json().get("items", [])[:10]
        videos = await _video_details(top_ids)
-        # 최근 영상 10개 — search index는 누락이 흔해 채널의 실제 uploads 재생목록에서 직접 읽고
+        return {"channelId": channel_id, "channel": channel, "videos": videos}
        # publishedAt 기준으로 코드에서 직접 정렬 (재생목록 순서 자체는 보장되지 않으므로).
        recents: list[dict] = []
        uploads_id = (channel.get("contentDetails") or {}).get("relatedPlaylists", {}).get("uploads")
        if uploads_id:
            resp = await http_request(
                HTTPMethod.GET,
                url=f"{YT}/playlistItems",
                params={"part": "snippet", "playlistId": uploads_id, "maxResults": 10, "key": self.api_key},
                label="yt-uploads",
            )
            if resp and resp.is_success:
                entries = resp.json().get("items", [])
                entries.sort(key=lambda i: i.get("snippet", {}).get("publishedAt") or "", reverse=True)
                recent_ids = [
                    vid for e in entries
                    if (vid := e.get("snippet", {}).get("resourceId", {}).get("videoId"))
                ]
                recents = await _video_details(recent_ids)
        playlists: list[dict] = []
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{YT}/playlists",
            params={"part": "snippet", "channelId": channel_id, "maxResults": 50, "key": self.api_key},
            label="yt-playlists",
        )
        if resp and resp.is_success:
            playlists = resp.json().get("items", [])
        return {"channelId": channel_id, "channel": channel, "videos": videos, "recents": recents, "playlists": playlists}
    @staticmethod
    def _video_item(v: dict) -> dict:
        return {
            "title": v.get("snippet", {}).get("title"),
            "views": int(v.get("statistics", {}).get("viewCount", 0)),
            "likes": int(v.get("statistics", {}).get("likeCount", 0)),
            "comments": int(v.get("statistics", {}).get("commentCount", 0)),
            "date": v.get("snippet", {}).get("publishedAt"),
            "duration": v.get("contentDetails", {}).get("duration"),
            "url": f"https://www.youtube.com/watch?v={v['id']}",
        }
    async def get_channel(self, url: str) -> dict | None:
        raw = await self.fetch_channel(url)
@ -137,7 +89,6 @@ class YouTubeClient:
        stats = ch.get("statistics", {})
        snippet = ch.get("snippet", {})
        thumbs = snippet.get("thumbnails", {})
        print(snippet)
        return {
            "channelId": raw["channelId"],
            "channelName": snippet.get("title"),
@ -148,12 +99,17 @@ class YouTubeClient:
            "subscribers": int(stats.get("subscriberCount", 0)),
            "totalViews": int(stats.get("viewCount", 0)),
            "totalVideos": int(stats.get("videoCount", 0)),
-            "videos": [self._video_item(v) for v in raw["videos"]],
+            "videos": [
-            "recents": [self._video_item(v) for v in raw["recents"]],
+                {
-            "playlists": [
+                    "title": v.get("snippet", {}).get("title"),
-                p.get("snippet", {}).get("title")
+                    "views": int(v.get("statistics", {}).get("viewCount", 0)),
-                for p in raw["playlists"]
+                    "likes": int(v.get("statistics", {}).get("likeCount", 0)),
-                if p.get("snippet", {}).get("title")
+                    "comments": int(v.get("statistics", {}).get("commentCount", 0)),
                    "date": v.get("snippet", {}).get("publishedAt"),
                    "duration": v.get("contentDetails", {}).get("duration"),
                    "url": f"https://www.youtube.com/watch?v={v['id']}",
                }
                for v in raw["videos"]
            ],
        }
--- a/app/mock/plan_viewclinic.json
+++ b/app/mock/plan_viewclinic.json
--- a/app/mock/report_viewclinic.json
+++ b/app/mock/report_viewclinic.json
--- a/app/mock_urls.py
+++ b/app/mock_urls.py
--- a/app/models/analysis.py
+++ b/app/models/analysis.py
@ -11,8 +11,6 @@ class Channels(BaseModel):
    tiktok: str | None = None
    instagram_en: str | None = None
    facebook_en: str | None = None
    kakao_talk: str | None = None
    naver_cafe: str | None = None
 class AnalysisOptions(BaseModel):
--- a/app/models/clinic.py
+++ b/app/models/clinic.py
@ -10,7 +10,9 @@ class ClinicResponse(BaseModel):
    hospital_name: str
    hospital_name_en: str | None
    road_address: str | None
    url: str | None
    status: str
    raw_data: dict | None
    created_at: str
    updated_at: str
--- a/app/models/plan.py
+++ b/app/models/plan.py
@ -49,7 +49,7 @@ class ChannelBrandingRule(CamelModel):
    profile_photo: str
    banner_spec: str
    bio_template: str
-    current_status: Literal["correct", "incorrect", "N/A"]
+    current_status: Literal["correct", "incorrect", "missing"]
 class BrandGuide(CamelModel):
--- a/app/models/report.py
+++ b/app/models/report.py
@ -66,18 +66,22 @@ class RegistryData(CamelModel):
 class ClinicSnapshot(CamelModel):
-    # _build_clinic_snapshot은 source 데이터 있을 때만 필드 추가 (`if x:` 가드).
+    name: str
-    # 강남언니/홈페이지 수집 누락된 병원에서 required면 ValidationError로 리포트 전체 실패.
+    name_en: str
-    name: str | None = None
+    established: str
-    name_en: str | None = None
+    years_in_business: int
-    staff_count: int | None = None
+    staff_count: int
-    lead_doctor: LeadDoctor | None = None
+    lead_doctor: LeadDoctor
-    overall_rating: float | None = None
+    overall_rating: float
-    total_reviews: int | None = None
+    total_reviews: int
-    certifications: list[str] = []
+    price_range: PriceRange
-    location: str | None = None
+    certifications: list[str]
-    phone: str | None = None
+    media_appearances: list[str]
-    domain: str | None = None
+    medical_tourism: list[str]
    location: str
    nearest_station: str
    phone: str
    domain: str
    logo_images: LogoImages | None = None
    brand_colors: BrandColors | None = None
    source: DataSource | None = None
@ -88,7 +92,7 @@ class ChannelScore(CamelModel):
    channel: str
    icon: str
    score: int
-    max_score: int = 100
+    max_score: int
    status: Severity
    headline: str
@ -127,10 +131,11 @@ class YouTubeAudit(CamelModel):
    avg_video_length: str
    upload_frequency: str
    channel_created_date: str
    subscriber_rank: str
    channel_description: str
    linked_urls: list[LinkedUrl]
    playlists: list[str]
-    top_videos: list[TopVideo] = []
+    top_videos: list[TopVideo]
    diagnosis: list[DiagnosisItem]
@ -151,8 +156,8 @@ class InstagramAccount(CamelModel):
 class InstagramAudit(CamelModel):
-    accounts: list[InstagramAccount] = []
+    accounts: list[InstagramAccount]
-    diagnosis: list[DiagnosisItem] = []
+    diagnosis: list[DiagnosisItem]
 class BrandInconsistencyValue(CamelModel):
@ -176,24 +181,24 @@ class FacebookPage(CamelModel):
    followers: int
    following: int
    category: str
-    bio: str = ""
+    bio: str
    logo: str
    logo_description: str
    link: str
-    linked_domain: str = ""
+    linked_domain: str
    reviews: int
    recent_post_age: str
-    has_whatsapp: bool | None = None
+    has_whatsapp: bool
-    post_frequency: str
+    post_frequency: str | None = None
    top_content_type: str | None = None
-    engagement: str
+    engagement: str | None = None
 class FacebookAudit(CamelModel):
-    pages: list[FacebookPage] = []
+    pages: list[FacebookPage]
-    diagnosis: list[DiagnosisItem] = []
+    diagnosis: list[DiagnosisItem]
-    brand_inconsistencies: list[BrandInconsistency] = []
+    brand_inconsistencies: list[BrandInconsistency]
-    consolidation_recommendation: str | None = None
+    consolidation_recommendation: str
 class OtherChannel(CamelModel):
@ -222,7 +227,7 @@ class AdditionalDomain(CamelModel):
 class WebsiteAudit(CamelModel):
    primary_domain: str
-    additional_domains: list[AdditionalDomain] = []
+    additional_domains: list[AdditionalDomain]
    sns_links_on_site: bool
    sns_links_detail: list[SnsLink] | None = None
    tracking_pixels: list[TrackingPixel]
--- a/app/models/status.py
+++ b/app/models/status.py
@ -36,24 +36,9 @@ class DataSource(StrEnum):
    SCRAPE = "scrape"
 class SourceType(StrEnum):
    MAINPAGE = "mainpage"
    INSTAGRAM = "instagram"
    FACEBOOK = "facebook"
    NAVER_BLOG = "naver_blog"
    YOUTUBE = "youtube"
    TIKTOK = "tiktok"
    GANGNAM_UNNI = "gangnam_unni"
    KAKAOTALK = "kakaotalk"
    NAVER_CAFE = "naver_cafe"
    # 부가 수집/분석 (HTML/CSS 재크롤 + Vision 로고 매칭) — 한 raw_info entry 에 brandAssets/channelLogos 같이 보관.
    BRANDING = "branding"
 class Language(StrEnum):
    KR = "KR"
    EN = "EN"
    WW = "WW"
 class VideoType(StrEnum):
--- a/app/services/analysis.py
+++ b/app/services/analysis.py
@ -1,75 +1,33 @@
 import asyncio
 import json
 import logging
-from urllib.parse import urlparse
+from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis
 from models.status import SourceType
 from common.utils import parse_iso_duration_seconds, format_seconds, format_clock, calc_avg_video_length, relative_date, calc_upload_frequency
 from common.db.run import update_run_report, update_run_plan, select_run_report_data, select_run
 from common.db.source import select_run_raw_data, select_mainpage_logo_url
 from common.db.market import select_market
 from integrations.llm.llm_service import LLMService
-from integrations.llm.prompt import report_prompt, plan_prompt, summarize_prompt, youtube_diagnosis_prompt, brand_consistency_prompt, critical_issues_prompt, transformation_prompt, roadmap_prompt, scores_prompt, other_channels_prompt
+from integrations.llm.prompt import report_prompt, plan_prompt
-from integrations.llm.schemas.report import ReportOutput, ClinicSnapshot, YouTubeAudit, BrandConsistencyOutput, CriticalIssuesOutput, DiagnosisItem, TransformationProposal, RoadmapOutput, RoadmapMonth, ScoresOutput, ChannelScore, WebsiteAudit, OtherChannelsOutput, OtherChannel
+from integrations.llm.schemas.report import ReportOutput
-from services.branding import analyze_branding
+from services.instagram_audit import build_instagram_accounts
 from services.instagram_audit import build_instagram_audit
 from services.facebook_audit import build_facebook_audit
 from services.kpi_dashboard import build_kpi_dashboard
 from integrations.llm.schemas.plan import PlanOutput
 from models.status import AnalysisStatus
 logger = logging.getLogger(__name__)
 async def generate_plan(analysis_run_id: str) -> PlanOutput:
    raw = await select_run_raw_data(analysis_run_id)
    clinic   = raw.get(SourceType.MAINPAGE) or []
    clinic   = clinic[0]["raw_data"] # if not exist, must error
    branding = raw.get(SourceType.BRANDING) or []
    branding = branding[0]["raw_data"] # if not exist, must error
    report = await select_run_report_data(analysis_run_id)
    market = await select_market(analysis_run_id)
-
+async def generate_report(analysis_run_id: str) -> ReportOutput:
-    mainpage     = raw.get(SourceType.MAINPAGE)     or []
+    run = await fetchone(
-    mainpage     = mainpage[0]["raw_data"] # 유일
+        "SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
-    branding     = raw.get(SourceType.BRANDING)     or []
+        (analysis_run_id,),
-    branding     = branding[0]["raw_data"] # 유일
+    )
-    instagram    = raw.get(SourceType.INSTAGRAM)    or []
+    clinic_row = await fetchone(
-    facebook     = raw.get(SourceType.FACEBOOK)     or []
+        "SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
-    youtube      = raw.get(SourceType.YOUTUBE)      or []
+        (run["hospital_id"],),
-    youtube      = youtube[0]["raw_data"] if youtube else None # 유일 (기획상)
+    )
-    gangnam_unni = raw.get(SourceType.GANGNAM_UNNI) or []
+    raw_data = clinic_row["raw_data"] if clinic_row else None
-    gangnam_unni = gangnam_unni[0]["raw_data"] if gangnam_unni else None# 유일 (기획상)
+    clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
-    naver_blog   = raw.get(SourceType.NAVER_BLOG)   or []
+    raw = await get_analysis_raw_data(analysis_run_id)
-    naver_blog   = naver_blog[0]["raw_data"] if naver_blog else None# 유일 (기획상)
+    market = await get_market_analysis(analysis_run_id)
    tiktok       = raw.get(SourceType.TIKTOK)       or []
    tiktok       = tiktok[0]["raw_data"] if tiktok else None# 유일 (기획상)
    naver_cafe   = raw.get(SourceType.NAVER_CAFE)   or []
    naver_cafe   = naver_cafe[0]["raw_data"] if naver_cafe else None# 유일 (기획상)
    kakaotalk   = raw.get(SourceType.KAKAOTALK)   or []
    kakaotalk   = kakaotalk[0]["raw_data"] if kakaotalk else None# 유일 (기획상)
    def _json(v) -> str | None:
        return json.dumps(v, ensure_ascii=False) if v else None
    # map: 큰 입력은 LLM으로 압축 요약해 100KB 초과 에러를 방지하고, 작은 입력은 그대로 둔다.
    large_fields = {
        "report":                 _json(report),
        "market_competitors":     _json(market.get("competitors")),
        "market_keywords":        _json(market.get("keywords")),
        "market_trend":           _json(market.get("trend")),
        "market_target_audience": _json(market.get("target_audience")),
        "tiktok":                 _json(tiktok),
        "instagram":              _json(instagram),
        "facebook":               _json(facebook),
        "naver_cafe":             _json(naver_cafe),
        "channel_logos":          _json(branding.get("channelLogos")),
        "brand_assets":           _json(branding.get("brandAssets")),
    }
    summarized = dict(zip(
        large_fields.keys(),
        await asyncio.gather(*(_summarize(label, data) for label, data in large_fields.items())),
    ))
    # reduce: 요약된 입력을 모아 최종 플랜 생성.
    input_data = {
        "clinic_name":            clinic.get("clinicName"),
        "clinic_name_en":         clinic.get("clinicNameEn"),
@ -78,35 +36,95 @@ async def generate_plan(analysis_run_id: str) -> PlanOutput:
        "slogan":                 clinic.get("slogan"),
        "services":               json.dumps(clinic.get("services", []), ensure_ascii=False),
        "doctors":                json.dumps(clinic.get("doctors", []), ensure_ascii=False),
-        "naver_blog":     _json(_naver_blog_summary(naver_blog)),
+        "market_competitors":     _json(market.get("competitors")),
-        "kakao_talk":     _json(kakaotalk),
+        "market_keywords":        _json(market.get("keywords")),
-        **summarized,
+        "market_trend":           _json(market.get("trend")),
        "market_target_audience": _json(market.get("target_audience")),
        "branding":               _json(clinic.get("branding")),
        "brand_assets":           _json(clinic.get("brandAssets")),
        "tiktok":                 _json(clinic.get("tiktok")),
        "instagram_en":           _json(clinic.get("instagramEn")),
        "facebook_en":            _json(clinic.get("facebookEn")),
        "channel_logos":          _json(clinic.get("channelLogos")),
        **{
            channel: _json(data)
            for channel, data in raw.items()
        },
    }
    return await LLMService(provider="perplexity").generate(report_prompt, input_data)
 async def generate_plan(analysis_run_id: str) -> PlanOutput:
    run = await fetchone(
        "SELECT hospital_id, report_data FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    clinic_row = await fetchone(
        "SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
        (run["hospital_id"],),
    )
    raw_data = clinic_row["raw_data"] if clinic_row else None
    clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
    report_data = run["report_data"]
    report = json.loads(report_data) if isinstance(report_data, str) else report_data
    market = await get_market_analysis(analysis_run_id)
    def _json(v) -> str | None:
        return json.dumps(v, ensure_ascii=False) if v else None
    input_data = {
        "clinic_name":            clinic.get("clinicName"),
        "clinic_name_en":         clinic.get("clinicNameEn"),
        "address":                clinic.get("address"),
        "phone":                  clinic.get("phone"),
        "slogan":                 clinic.get("slogan"),
        "services":               json.dumps(clinic.get("services", []), ensure_ascii=False),
        "doctors":                json.dumps(clinic.get("doctors", []), ensure_ascii=False),
        "report":                 _json(report),
        "market_competitors":     _json(market.get("competitors")),
        "market_keywords":        _json(market.get("keywords")),
        "market_trend":           _json(market.get("trend")),
        "market_target_audience": _json(market.get("target_audience")),
        "tiktok":                 _json(clinic.get("tiktok")),
        "instagram_en":           _json(clinic.get("instagramEn")),
        "facebook_en":            _json(clinic.get("facebookEn")),
        "channel_logos":          _json(clinic.get("channelLogos")),
        "brand_assets":           _json(clinic.get("brandAssets")),
    }
    return await LLMService(provider="perplexity").generate(plan_prompt, input_data)
-_SUMMARIZE_THRESHOLD = 4000  # 이 길이(문자 수)를 넘는 입력만 요약 LLM 호출 (불필요한 호출 방지)
+async def _build_overrides(analysis_run_id: str) -> dict:
    run = await fetchone(
        "SELECT hospital_id, instagram_data_id, facebook_data_id,"
        " naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
        " FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    if not run:
        return {}
    hospital_row = await fetchone(
        "SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
        (run["hospital_id"],),
    )
    hospital     = json.loads(hospital_row["raw_data"]) if hospital_row and isinstance(hospital_row.get("raw_data"), str) else (hospital_row or {}).get("raw_data") or {}
    instagram    = await fetch_raw("instagram_data",    run["instagram_data_id"])    or {}
    facebook     = await fetch_raw("facebook_data",     run["facebook_data_id"])     or {}
    naver_blog   = await fetch_raw("naver_blog_data",   run["naver_blog_data_id"])   or {}
    youtube      = await fetch_raw("youtube_data",      run["youtube_data_id"])      or {}
    gangnam_unni = await fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]) or {}
 async def _summarize(label: str, data: str | None) -> str | None:
    if not data or len(data) <= _SUMMARIZE_THRESHOLD:
        return data
    result = await LLMService(provider="perplexity").generate(summarize_prompt, {"label": label, "data": data})
    return result.summary
 def _build_clinic_snapshot(mainpage: dict, gangnam_unni: dict, brand_assets: dict, logo_url: str | None) -> dict:
    snapshot: dict = {}
    # ── gangnam_unni ──────────────────────────────────────────────────────────
    doctors = gangnam_unni.get("doctors", [])
    lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None
-    snapshot["name"]     = mainpage["clinicName"]
+    if gangnam_unni.get("name"):         snapshot["name"]           = gangnam_unni["name"]
    snapshot["name_en"]  = mainpage["clinicNameEn"]
    snapshot["phone"]    = mainpage["phone"]
    snapshot["location"] = mainpage["address"]
    snapshot["domain"]   = mainpage.get("domain") or urlparse(mainpage.get("sourceUrl") or "").netloc
    if gangnam_unni.get("rating"):       snapshot["overall_rating"] = gangnam_unni["rating"]
    if gangnam_unni.get("totalReviews"): snapshot["total_reviews"]  = gangnam_unni["totalReviews"]
    if gangnam_unni.get("address"):      snapshot["location"]       = gangnam_unni["address"]
    if gangnam_unni.get("badges"):       snapshot["certifications"] = gangnam_unni["badges"]
    if gangnam_unni.get("totalMajorStaffs"): snapshot["staff_count"]    = gangnam_unni["totalMajorStaffs"]
    if lead:
@ -116,256 +134,87 @@ def _build_clinic_snapshot(mainpage: dict, gangnam_unni: dict, brand_assets: dic
            "rating":       lead.get("rating"),
            "review_count": lead.get("reviews"),
        }
    # logo URL 은 raw_info.logo_url 컬럼에서, brand_colors 는 JSON 에서 강제 주입. LLM 의 null 처리 차단.
    if logo_url:
        snapshot["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
    if brand_assets.get("brand_colors"): snapshot["brand_colors"] = brand_assets["brand_colors"]
    return ClinicSnapshot.model_validate(snapshot).model_dump()
-def _naver_blog_summary(blog: dict | None) -> dict | None:
+    # ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ──────────────
-    """plan 카드 한 장에 들어가는 건 전체 포스트 수와 최근 활동 시점뿐. 그 외(본문·링크·제목)는
+    ig_patch = build_instagram_accounts(
-    던져봐야 토큰만 늘고 LLM 이 무관 정보로 hallucinate 함."""
+        instagram, hospital.get("instagramEn") or {}, hospital.get("channelLogos") or {},
-    if not blog:
+    )
        return None
    posts = blog.get("posts") or []
    return {
        "totalPosts":     blog.get("totalResults"),
        "latestPostDate": posts[0].get("postDate") if posts else None,
    }
-async def _build_youtube_audit(youtube: dict) -> dict: # 기획상 1개의 input channel, 다중 채널은 기획에 없음.
+    # ── facebook ──────────────────────────────────────────────────────────────
-    videos  = youtube.get("videos", [])
+    fb_patch: dict = {}
-    recents = youtube.get("recents", [])
+    if facebook.get("pageUrl"):    fb_patch["url"]           = facebook["pageUrl"]
-    yt_patch: dict = {
+    if facebook.get("pageUrl"):    fb_patch["link"]          = facebook["pageUrl"]
-        "weekly_view_growth":        {"absolute": 0, "percentage": 0.0},
+    if facebook.get("pageName"):   fb_patch["page_name"]     = facebook["pageName"]
-        "estimated_monthly_revenue": {"min": 0, "max": 0},
+    if facebook.get("followers"):  fb_patch["followers"]     = facebook["followers"]
-        "linked_urls":               [],
+    if facebook.get("intro"):      fb_patch["bio"]           = facebook["intro"]
-        "avg_video_length":          calc_avg_video_length(videos),
+    if facebook.get("categories"): fb_patch["category"]     = ", ".join(facebook["categories"])
-        "upload_frequency":          calc_upload_frequency(recents),
+    if facebook.get("website"):    fb_patch["linked_domain"] = facebook["website"]
-    }
+
    # ── youtube ───────────────────────────────────────────────────────────────
    yt_patch: dict = {}
    if youtube.get("channelName"):  yt_patch["channel_name"]        = youtube["channelName"]
    if youtube.get("handle"):       yt_patch["handle"]              = youtube["handle"]
    if youtube.get("subscribers"):  yt_patch["subscribers"]         = youtube["subscribers"]
    if youtube.get("totalVideos"):  yt_patch["total_videos"]        = youtube["totalVideos"]
    if youtube.get("totalViews"):   yt_patch["total_views"]         = youtube["totalViews"]
    if youtube.get("publishedAt"):  yt_patch["channel_created_date"] = youtube["publishedAt"][:10]
-    yt_patch["channel_description"] = youtube.get("description") or ""
+    if youtube.get("description"):  yt_patch["channel_description"] = youtube["description"]
-    if youtube.get("playlists"):    yt_patch["playlists"]            = youtube["playlists"]
+    if youtube.get("videos"):
    if videos:
        yt_patch["top_videos"] = [
            {
                "title":        v["title"],
                "views":        v["views"],
-                "duration":     format_clock(parse_iso_duration_seconds(v.get("duration", ""))),
+                "duration":     v.get("duration"),
                "type":         "Short" if "M" not in v.get("duration", "") else "Long",
-                "uploaded_ago": relative_date(v.get("date", "")),
+                "uploaded_ago": v.get("date", "")[:10],
            }
-            for v in videos
+            for v in youtube["videos"]
        ]
-    diagnosis_result = await LLMService(provider="perplexity").generate(
+    overrides: dict = {}
-        youtube_diagnosis_prompt,
+    if snapshot:
-        {
+        overrides["clinic_snapshot"] = snapshot
-            "channel_name":    yt_patch.get("channel_name"),
+    if ig_patch:
-            "subscribers":     yt_patch.get("subscribers"),
+        overrides["instagram_audit"] = {"accounts": ig_patch}
-            "total_videos":    yt_patch.get("total_videos"),
+    if fb_patch:
-            "total_views":     yt_patch.get("total_views"),
+        overrides["facebook_audit"] = {"pages": [fb_patch]}
-            "avg_video_length": yt_patch.get("avg_video_length"),
+    if yt_patch:
-            "upload_frequency": yt_patch.get("upload_frequency"),
+        overrides["youtube_audit"] = yt_patch
-            "top_videos":      json.dumps(yt_patch.get("top_videos", []), ensure_ascii=False),
+    return overrides
            "playlists":       json.dumps(yt_patch.get("playlists", []), ensure_ascii=False),
        },
    )
    yt_patch["diagnosis"] = [item.model_dump() for item in diagnosis_result.diagnosis]
    return YouTubeAudit.model_validate(yt_patch).model_dump()
 async def _build_roadmap(analysis_run_id: str, raw: dict) -> list[dict]:
    result: RoadmapOutput = await LLMService(provider="perplexity").generate(
        roadmap_prompt,
        {
            "clinic_name": (raw.get(SourceType.MAINPAGE) or [{}])[0]["raw_data"].get("clinicName"),
            "data":        json.dumps(raw, ensure_ascii=False),
        },
    )
    return [RoadmapMonth.model_validate(item).model_dump() for item in result.roadmap]
 async def _build_transformation(analysis_run_id: str, raw: dict) -> dict:
    result: TransformationProposal = await LLMService(provider="perplexity").generate(
        transformation_prompt,
        {
            "clinic_name": (raw.get(SourceType.MAINPAGE) or [{}])[0]["raw_data"].get("clinicName"),
            "data":        json.dumps(raw, ensure_ascii=False),
        },
    )
    return result.model_dump()
 async def _build_critical_issues(analysis_run_id: str, raw: dict) -> list[dict]:
    result: CriticalIssuesOutput = await LLMService(provider="perplexity").generate(
        critical_issues_prompt,
        {
            "clinic_name": (raw.get(SourceType.MAINPAGE) or [{}])[0]["raw_data"].get("clinicName"),
            "data":        json.dumps(raw, ensure_ascii=False),
        },
    )
    return [DiagnosisItem.model_validate(item).model_dump() for item in result.diagnosis]
 async def _build_scores(analysis_run_id: str, raw: dict) -> ScoresOutput:
    return await LLMService(provider="perplexity").generate(
        scores_prompt,
        {
            "clinic_name": (raw.get(SourceType.MAINPAGE) or [{}])[0]["raw_data"].get("clinicName"),
            "data":        json.dumps(raw, ensure_ascii=False),
        },
    )
 def _build_website_audit(mainpage: dict) -> dict:
    """mainpage raw_data 에서 직접 매핑. LLM 미경유.
    Firecrawl 의 raw HTML 을 collect_mainpage 가 정규식 파싱해서 tracking/SNS/domain 까지 mainpage 에 다 박아둠."""
    domain = mainpage.get("domain") or urlparse(mainpage.get("sourceUrl") or "").netloc
    sns_links = mainpage.get("snsLinks") or []
    audit = {
        "primary_domain":     domain,
        "additional_domains": mainpage.get("additionalDomains") or [],
        "sns_links_on_site":  bool(sns_links),
        "sns_links_detail":   sns_links or None,
        "tracking_pixels":    mainpage.get("trackingPixels") or [],
        "main_cta":           mainpage.get("mainCta") or "",
    }
    return WebsiteAudit.model_validate(audit).model_dump()
 async def _build_other_channels(raw: dict) -> list[dict]:
    result: OtherChannelsOutput = await LLMService(provider="perplexity").generate(
        other_channels_prompt,
        {
            "clinic_name":  (raw.get(SourceType.MAINPAGE) or [{}])[0]["raw_data"].get("clinicName"),
            "tiktok":       json.dumps(raw.get(SourceType.TIKTOK), ensure_ascii=False),
            "kakao_talk":   json.dumps(raw.get(SourceType.KAKAOTALK), ensure_ascii=False),
            "naver_cafe":   json.dumps(raw.get(SourceType.NAVER_CAFE), ensure_ascii=False),
            "naver_blog":   json.dumps(raw.get(SourceType.NAVER_BLOG), ensure_ascii=False),
            "gangnam_unni": json.dumps(raw.get(SourceType.GANGNAM_UNNI), ensure_ascii=False),
        },
    )
    return [OtherChannel.model_validate(item).model_dump() for item in result.other_channels]
 async def _build_report(analysis_run_id: str) -> dict:
    raw = await select_run_raw_data(analysis_run_id)
    run = await select_run(analysis_run_id)
    if not raw:
        return {}
    mainpage     = raw.get(SourceType.MAINPAGE)     or []
    mainpage     = mainpage[0]["raw_data"] # 유일
    branding     = raw.get(SourceType.BRANDING)     or []
    branding     = branding[0]["raw_data"] # 유일
    instagram    = raw.get(SourceType.INSTAGRAM)    or []
    facebook     = raw.get(SourceType.FACEBOOK)     or []
    youtube      = raw.get(SourceType.YOUTUBE)      or []
    youtube      = youtube[0]["raw_data"] if youtube else None # 유일 (기획상)
    gangnam_unni = raw.get(SourceType.GANGNAM_UNNI) or []
    gangnam_unni = gangnam_unni[0]["raw_data"] if gangnam_unni else None# 유일 (기획상)
    naver_blog   = raw.get(SourceType.NAVER_BLOG)   or []
    naver_blog   = naver_blog[0]["raw_data"] if naver_blog else None# 유일 (기획상)
    tiktok       = raw.get(SourceType.TIKTOK)       or []
    tiktok       = tiktok[0]["raw_data"] if tiktok else None# 유일 (기획상)
    naver_cafe   = raw.get(SourceType.NAVER_CAFE)   or []
    naver_cafe   = naver_cafe[0]["raw_data"] if naver_cafe else None# 유일 (기획상)
    brand_assets  = branding.get("brandAssets") or {}
    channel_logos = branding.get("channelLogos") or {}
    logo_url      = await select_mainpage_logo_url(analysis_run_id)
    brand = await generate_brand_consistency(analysis_run_id)
    brand_patch : list[dict] = brand.model_dump()["brand_inconsistencies"]
    kpi_extras = {
        "tiktok":      tiktok,
        "naverCafe":   naver_cafe,
    }
    scores = (await _build_scores(analysis_run_id, raw)).model_dump()
    report = {
        "id" : analysis_run_id,
        "created_at" : str(run["created_at"]) if run.get("created_at") else None,
        "target_url" : mainpage.get("domain") or urlparse(mainpage.get("sourceUrl") or "").netloc,
        "overall_score" : scores.get("overall_score"),
        "channel_scores" : scores.get("channel_scores"),
        "clinic_snapshot":   _build_clinic_snapshot(mainpage, gangnam_unni, brand_assets, logo_url),
        "instagram_audit":   await build_instagram_audit(instagram, channel_logos),
        "facebook_audit":    await build_facebook_audit(facebook, brand_patch, channel_logos),
        "youtube_audit":     await _build_youtube_audit(youtube),
        "other_channels":    await _build_other_channels(raw),
        "website_audit":     _build_website_audit(mainpage),
        "problem_diagnosis": await _build_critical_issues(analysis_run_id, raw),
        "transformation" :   await _build_transformation(analysis_run_id, raw),
        "roadmap":           await _build_roadmap(analysis_run_id, raw),
        "kpi_dashboard":     build_kpi_dashboard(instagram, facebook, youtube, gangnam_unni, kpi_extras, naver_blog),
    }
    return ReportOutput(**report)
 def _deep_merge(base: dict, overrides: dict) -> dict:
    """dict 끼리 만나면 재귀로 안쪽까지 합치고, 그 외(list/scalar/None) 는 override 값으로 통째 치환."""
    for k, v in overrides.items():
        if isinstance(v, dict) and isinstance(base.get(k), dict):
            _deep_merge(base[k], v)
        elif isinstance(v, list) and isinstance(base.get(k), list):
            for i, item in enumerate(v):
                if i < len(base[k]) and isinstance(item, dict) and isinstance(base[k][i], dict):
                    _deep_merge(base[k][i], item)
        else:
            base[k] = v
    return base
-async def generate_brand_consistency(analysis_run_id: str) -> BrandConsistencyOutput:
+def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
-    raw = await select_run_raw_data(analysis_run_id)
+    merged = _deep_merge(result.model_dump(), overrides)
    # 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
    merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
    return ReportOutput(**merged)
    def _json(v) -> str | None:
        return json.dumps(v, ensure_ascii=False) if v else None
    mainpage = raw.get(SourceType.MAINPAGE) or []
    input_data = {
        "clinic_name":  (mainpage[0].get("clinicName") if mainpage else None),
        "mainpage":     _json(mainpage),
        "instagram":    _json(raw.get(SourceType.INSTAGRAM)),
        "facebook":     _json(raw.get(SourceType.FACEBOOK)),
        "youtube":      _json(raw.get(SourceType.YOUTUBE)),
        "gangnam_unni": _json(raw.get(SourceType.GANGNAM_UNNI)),
    }
    return await LLMService(provider="perplexity").generate(brand_consistency_prompt, input_data)
 async def run_report_task(analysis_run_id: str) -> None:
    logger.info("[report] start run=%s", analysis_run_id)
-    await analyze_branding(analysis_run_id)
+    result = await generate_report(analysis_run_id)
-    # result = await generate_report(analysis_run_id)
+    result = _patch_report(result, await _build_overrides(analysis_run_id))
-    result = await _build_report(analysis_run_id)
+    await save_analysis_report(analysis_run_id, result.model_dump())
    await update_run_report(analysis_run_id, result.model_dump())
    logger.info("[report] done run=%s", analysis_run_id)
 def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:
    """brand_guide.channel_branding[].profile_photo 는 LLM 안 맡기고 코드가 박는다
    (모든 채널 동일값 = brand_assets.logo_description). LLM 이 fallback 문구 hallucinate 방지."""
    p = result.model_dump()
    for ch in (p.get("brand_guide") or {}).get("channel_branding") or []:
        ch["profile_photo"] = logo_desc
    return PlanOutput(**p)
 async def run_plan_task(analysis_run_id: str) -> None:
    logger.info("[plan] start run=%s", analysis_run_id)
    result = await generate_plan(analysis_run_id)
-    # profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단).
+    await execute(
-    raw = await select_run_raw_data(analysis_run_id)
+        "UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s",
-    branding_list = raw.get(SourceType.BRANDING) or []
+        (json.dumps(result.model_dump(), ensure_ascii=False), analysis_run_id),
-    branding_data = branding_list[0]["raw_data"] if branding_list else {}
+    )
    logo_desc = (branding_data.get("brandAssets") or {}).get("logo_description") or ""
    result = _patch_plan(result, logo_desc)
    await update_run_plan(analysis_run_id, result.model_dump())
    logger.info("[plan] done run=%s", analysis_run_id)
--- a/app/services/brand_parser.py
+++ b/app/services/brand_parser.py
@ -1,172 +0,0 @@
 """collect 단계 - HTML/CSS 텍스트에서 brand 로고 URL + 색상 추출"""
 import logging
 import re
 from collections import Counter
 from urllib.parse import urljoin
 logger = logging.getLogger(__name__)
 # ── 로고 URL 추출 ─────────────────────────────────────────────────────────────
 LOGO_IMG_PATTERNS = [
    re.compile(r'<img[^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
    re.compile(r'<img[^>]*\bsrc=["\']([^"\']+)["\'][^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\']', re.IGNORECASE),
    re.compile(r'<img[^>]*\bid=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
    re.compile(r'<img[^>]*\balt=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
    re.compile(r'<(?:a|h[1-6]|div|span)[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE | re.DOTALL),
    re.compile(r'<(?:a|div|span|h[1-6])[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)', re.IGNORECASE),
    re.compile(r'<(?:a|div|span|h[1-6])[^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)[^"\']*["\'][^>]*\b(?:class|id)=["\'][^"\']*\blogo\b', re.IGNORECASE),
    re.compile(r'<img[^>]*\bsrc=["\']([^"\']*\blogo\b[^"\']*\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE),
    re.compile(r'<header\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
    re.compile(r'<nav\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
    re.compile(r'<meta[^>]*\bproperty=["\']og:image["\'][^>]*\bcontent=["\']([^"\']+)["\']', re.IGNORECASE),
    re.compile(r'<meta[^>]*\bcontent=["\']([^"\']+)["\'][^>]*\bproperty=["\']og:image["\']', re.IGNORECASE),
 ]
 LOGO_CSS_PATTERN = re.compile(
    r'\.[\w-]*\blogo\b[\w-]*\s*(?:,\s*\.[\w-]+\s*)*\{[^}]*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)',
    re.IGNORECASE | re.DOTALL,
 )
 def find_logo_url_in_html(html: str, base_url: str, css_texts: list[str] | None = None) -> str | None:
    """HTML 에서 logo URL 찾기. 우선순위: 1) class/id/alt 명시 img 2) 외부 CSS .logo bg 3) header/nav 첫 img."""
    def _is_noise(src: str) -> bool:
        if not src or src.startswith("data:"):
            return True
        if re.search(r"(blank|spacer|pixel|transparent|1x1)\b", src, re.IGNORECASE):
            return True
        if re.search(r"(lang[-_]?(kor|eng|chn|jpn|rus|jp|en|ko|cn|ar|in)|flag|country|icon-|btn-|arrow|prev|next|search)\b", src, re.IGNORECASE):
            return True
        return False
    for pat in LOGO_IMG_PATTERNS[:8]:
        for m in pat.finditer(html):
            src = m.group(1)
            if _is_noise(src):
                continue
            return urljoin(base_url, src)
    for css in (css_texts or []):
        m = LOGO_CSS_PATTERN.search(css)
        if m:
            src = m.group(1)
            if not _is_noise(src):
                return urljoin(base_url, src)
    for pat in LOGO_IMG_PATTERNS[8:]:
        for m in pat.finditer(html):
            src = m.group(1)
            if _is_noise(src):
                continue
            return urljoin(base_url, src)
    return None
 # ── 색상 추출 ────────────────────────────────────────────────────────────────
 HEX6 = re.compile(r"#([0-9a-fA-F]{6})\b")
 HEX3 = re.compile(r"#([0-9a-fA-F]{3})\b(?![0-9a-fA-F])")
 RGB  = re.compile(r"rgba?\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*(?:,\s*[\d.]+\s*)?\)")
 STYLE_BLOCK = re.compile(r"<style[^>]*>(.*?)</style>", re.IGNORECASE | re.DOTALL)
 NOISE = {
    "#ffffff", "#000000", "#fff", "#000",
    "#333", "#222", "#111", "#444", "#555", "#666", "#777", "#888", "#999",
    "#aaa", "#bbb", "#ccc", "#ddd", "#eee", "#f0f0f0", "#f5f5f5", "#fafafa",
 }
 def _normalize(hex_str: str) -> str:
    h = hex_str.lstrip("#").lower()
    if len(h) == 3:
        h = "".join(c * 2 for c in h)
    if len(h) == 8:
        h = h[:6]
    return f"#{h}"
 def _rgb_to_hex(r: int, g: int, b: int) -> str:
    return f"#{r:02x}{g:02x}{b:02x}"
 def _hex_to_rgb(h: str) -> tuple[int, int, int]:
    h = h.lstrip("#")
    return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
 def _distance(a: str, b: str) -> float:
    ar, ag, ab = _hex_to_rgb(a)
    br, bg, bb = _hex_to_rgb(b)
    return ((ar - br) ** 2 + (ag - bg) ** 2 + (ab - bb) ** 2) ** 0.5
 def _is_grayscale(h: str, tol: int = 12) -> bool:
    r, g, b = _hex_to_rgb(h)
    return max(r, g, b) - min(r, g, b) < tol
 def _extract_hex(text: str) -> list[str]:
    out: list[str] = []
    out.extend(_normalize(m.group(0)) for m in HEX6.finditer(text))
    out.extend(_normalize(m.group(0)) for m in HEX3.finditer(text))
    for m in RGB.finditer(text):
        r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3))
        if 0 <= r <= 255 and 0 <= g <= 255 and 0 <= b <= 255:
            out.append(_rgb_to_hex(r, g, b))
    return out
 def _cluster(colors: Counter, threshold: float = 25.0) -> list[tuple[str, int]]:
    ranked = colors.most_common()
    clusters: list[tuple[str, int]] = []
    for color, count in ranked:
        merged = False
        for i, (rep, rep_count) in enumerate(clusters):
            if _distance(color, rep) < threshold:
                clusters[i] = (rep, rep_count + count)
                merged = True
                break
        if not merged:
            clusters.append((color, count))
    return clusters
 def extract_brand_colors_from_text(html: str, css_texts: list[str], source_url: str = "") -> dict:
    """HTML + CSS 텍스트에서 hex 빈도 분석 → primary/accent/text + palette. (fetch 없음)"""
    all_text_chunks: list[str] = list(STYLE_BLOCK.findall(html))
    all_text_chunks.append(html)
    all_text_chunks.extend(css_texts)
    counter: Counter = Counter()
    for text in all_text_chunks:
        for color in _extract_hex(text):
            if color in NOISE:
                continue
            counter[color] += 1
    if not counter:
        logger.info("[brand_parser] no colors extracted from %s", source_url)
        return {}
    clustered = _cluster(counter)
    chromatic = [c for c, _ in clustered if not _is_grayscale(c)]
    grayscale = [c for c, _ in clustered if _is_grayscale(c)]
    palette_top = clustered[:8]
    palette = [{"name": f"색상 {i+1}", "hex": h, "usage": f"빈도 {n}"} for i, (h, n) in enumerate(palette_top)]
    return {
        "brand_colors": {
            "primary": chromatic[0] if chromatic else None,
            "accent": chromatic[1] if len(chromatic) > 1 else None,
            "text": grayscale[0] if grayscale else None,
        },
        "color_palette": palette,
        "extracted_from": "html+css",
    }
--- a/app/services/branding.py
+++ b/app/services/branding.py
@ -1,103 +0,0 @@
 """report 단계 - Gemini Vision 으로 로고 묘사 + 채널 로고 매칭."""
 import logging
 import os
 from urllib.parse import urlparse
 from common.db.source import (
    select_run_raw_data, update_raw_info_merge,
    select_branding_info_id, select_mainpage_logo_url,
 )
 from common.utils import _run_optional_step
 from integrations.llm.gemini_vision import VisionClient
 logger = logging.getLogger(__name__)
 async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
    """공식 로고 정성 묘사. branding raw_info["brandAssets"] 머지.
    호출 우선순위: raw_info.logo_url 컬럼 (HTML parser canonical) → firecrawl 메타 fallback."""
    raw = await select_run_raw_data(analysis_run_id)
    mainpage = ((raw.get("mainpage") or [{}])[0].get("raw_data")) or {}
    homepage_url = mainpage.get("sourceUrl") or ""
    branding_meta = mainpage.get("branding") or {}
    column_logo = await select_mainpage_logo_url(analysis_run_id)
    candidates = [u for u in [
        column_logo,
        branding_meta.get("logoUrl"),
        branding_meta.get("faviconUrl"),
    ] if u]
    if homepage_url:
        parsed = urlparse(homepage_url)
        if parsed.scheme and parsed.netloc:
            candidates.append(f"{parsed.scheme}://{parsed.netloc}/favicon.ico")
    if not candidates:
        logger.info("[brand_logo] skip — no candidates")
        return
    logger.info("[brand_logo] start run=%s candidates=%d", analysis_run_id, len(candidates))
    result: dict = {}
    for cand in candidates:
        result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url)
        if result:
            break
    if result:
        # collect_brand_basics 가 미리 채운 brand_colors/color_palette/color_source 보존하면서 logo_* 덧붙이기.
        raw = await select_run_raw_data(analysis_run_id)
        existing = ((raw.get("branding") or [{}])[0].get("raw_data") or {}).get("brandAssets") or {}
        merged = {**existing, **result}
        await update_raw_info_merge(info_id, {"brandAssets": merged})
    logger.info("[brand_logo] done keys=%s", list(result.keys()) if result else None)
 async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
    """채널 프로필 로고를 공식 로고와 비교. branding raw_info["channelLogos"] 머지."""
    raw = await select_run_raw_data(analysis_run_id)
    official = await select_mainpage_logo_url(analysis_run_id)
    _label = {
        "instagram":    "Instagram",
        "facebook":     "Facebook",
        "youtube":      "YouTube",
        "tiktok":       "TikTok",
    }
    logos = []
    for key, label_prefix in _label.items():
        if key in raw:
            channel_list = raw.get(key)
        for item in channel_list:
            language = item["language"]
            if language != "KR":
                label = label_prefix + " " + language
            else :
                label = label_prefix
            img = item.get("logo_url")
            if img: 
                logos.append({"channel": label, "url": img})
    if not logos:
        logger.info("[channel_logos] skip — no channel profileImages")
        return
    logger.info("[channel_logos] start run=%s channels=%s official=%s",
                analysis_run_id, [l["channel"] for l in logos], bool(official))
    result = await vc.describe_channel_logos(official, logos)
    if result:
        await update_raw_info_merge(info_id, {"channelLogos": result})
    logger.info("[channel_logos] done keys=%s", list(result.keys()) if result else None)
 async def analyze_branding(analysis_run_id: str) -> None:
    """report build 직전 호출 — 로고 묘사 + 채널 로고 매칭 (Gemini). 둘 다 격리."""
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        logger.info("[branding] skip — GEMINI_API_KEY 없음")
        return
    branding_info_id = await select_branding_info_id(analysis_run_id)
    if branding_info_id is None:
        logger.info("[branding] skip — branding source 없음 run=%s", analysis_run_id)
        return
    vc = VisionClient(api_key)
    logger.info("[branding] start run=%s", analysis_run_id)
    await _run_optional_step(_describe_logo(analysis_run_id, branding_info_id, vc), "brand_logo")
    await _run_optional_step(_describe_channel_logos(analysis_run_id, branding_info_id, vc), "channel_logos")
    logger.info("[branding] done run=%s", analysis_run_id)
--- a/app/services/collect.py
+++ b/app/services/collect.py
@ -1,221 +1,115 @@
 import asyncio
 import logging
-from urllib.parse import urlparse
+from common.db import (
-from common.db.hospital import update_hospital_status, update_hospital
+    fetchone,
-from common.db.source import select_run_sources, update_raw_info_status, update_raw_info
+    set_instagram_status, save_instagram_raw_data,
    set_facebook_status, save_facebook_raw_data,
    set_naver_blog_status, save_naver_blog_raw_data,
    set_youtube_status, save_youtube_raw_data,
    set_gangnam_unni_status, save_gangnam_unni_raw_data,
    execute, save_hospital_raw_data,
 )
 from common.utils import get_env, _run_optional_step
 from integrations.apify import ApifyClient
 from integrations.naver import NaverClient
 from integrations.youtube import YouTubeClient
 from integrations.firecrawl import FirecrawlClient
-from models.status import SourceType
+from services.enrichment import collect_brand_assets, collect_extra_channels, collect_channel_logos
 from integrations.site_fetcher import fetch_html_and_css
 from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
 from services.website_parser import extract_tracking_pixels, extract_sns_links, extract_additional_domains, extract_main_cta
 from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
 from common.db.base import fetchone
 from services.facebook_audit import transform_for_storage as transform_facebook
 logger = logging.getLogger(__name__)
-async def _save_with_logo(info_id: int, data: dict) -> None:
+async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None:
    await update_raw_info(info_id, data)
    if data.get("profileImage"):
        await update_raw_info_logo_url(info_id, data["profileImage"])
 async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
    logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
-    await update_raw_info_status(info_id, "processing")
+    await set_instagram_status(row_id, "processing")
    data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_instagram_profile(url)
-    if data is None:
+    await save_instagram_raw_data(row_id, data)
        await update_raw_info_status(info_id, "failed")
        logger.warning("[instagram] failed run=%s", analysis_run_id)
        return
    await _save_with_logo(info_id, data)
    logger.info("[instagram] done run=%s", analysis_run_id)
-async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None:
+async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None:
    logger.info("[facebook] start run=%s url=%s", analysis_run_id, url)
-    await update_raw_info_status(info_id, "processing")
+    await set_facebook_status(row_id, "processing")
    data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_facebook_page(url)
-    if data is None:
+    await save_facebook_raw_data(row_id, data)
        await update_raw_info_status(info_id, "failed")
        logger.warning("[facebook] failed run=%s", analysis_run_id)
        return
    data = transform_facebook(data)
    await _save_with_logo(info_id, data)
    logger.info("[facebook] done run=%s", analysis_run_id)
-async def collect_naver_blog(analysis_run_id: str, info_id: int, url: str) -> None:
+async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None:
    logger.info("[naver_blog] start run=%s url=%s", analysis_run_id, url)
-    await update_raw_info_status(info_id, "processing")
+    await set_naver_blog_status(row_id, "processing")
    data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_blog_rss(url)
-    if data is None:
+    await save_naver_blog_raw_data(row_id, data)
        await update_raw_info_status(info_id, "failed")
        logger.warning("[naver_blog] failed run=%s", analysis_run_id)
        return
    await update_raw_info(info_id, data)
    logger.info("[naver_blog] done run=%s", analysis_run_id)
-async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None:
+async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None:
    logger.info("[youtube] start run=%s url=%s", analysis_run_id, url)
-    await update_raw_info_status(info_id, "processing")
+    await set_youtube_status(row_id, "processing")
    data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).get_channel(url)
-    if data is None:
+    await save_youtube_raw_data(row_id, data)
        await update_raw_info_status(info_id, "failed")
        logger.warning("[youtube] failed run=%s", analysis_run_id)
        return
    await _save_with_logo(info_id, data)
    logger.info("[youtube] done run=%s", analysis_run_id)
-async def collect_gangnam_unni(analysis_run_id: str, info_id: int, url: str) -> None:
+async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None:
    logger.info("[gangnam_unni] start run=%s url=%s", analysis_run_id, url)
-    await update_raw_info_status(info_id, "processing")
+    await set_gangnam_unni_status(row_id, "processing")
    data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).get_gangnam_unni(url)
-    if data is None:
+    await save_gangnam_unni_raw_data(row_id, data)
        await update_raw_info_status(info_id, "failed")
        logger.warning("[gangnam_unni] failed run=%s", analysis_run_id)
        return
    await update_raw_info(info_id, data)
    logger.info("[gangnam_unni] done run=%s", analysis_run_id)
-def _extract_website_audit(html: str, url: str) -> dict:
+async def collect_clinic_info(analysis_run_id: str, hospital_id: str, url: str) -> None:
-    """raw HTML 에서 main CTA / tracking pixels / SNS / additional domains 정규식 추출."""
+    logger.info("[clinic] start run=%s url=%s", analysis_run_id, url)
-    if not html:
+    await execute("UPDATE hospital_baseinfo SET status = 'processing' WHERE hospital_id = %s", (hospital_id,))
        return {}
    primary_host = urlparse(url).netloc
    result: dict = {
        "trackingPixels":    extract_tracking_pixels(html),
        "snsLinks":          extract_sns_links(html),
        "additionalDomains": extract_additional_domains(html, primary_host),
    }
    html_cta = extract_main_cta(html)
    if html_cta:
        result["mainCta"] = html_cta
    return result
 async def collect_mainpage(analysis_run_id: str, info_id: int, hospital_id: str, url: str) -> None:
    logger.info("[mainpage] start run=%s url=%s", analysis_run_id, url)
    await update_raw_info_status(info_id, "processing")
    await update_hospital_status(hospital_id, "processing")
    data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(url)
-    if data is None:
+    await save_hospital_raw_data(hospital_id, data, analysis_run_id=analysis_run_id)
-        await update_raw_info_status(info_id, "failed")
+    logger.info("[clinic] done run=%s", analysis_run_id)
        logger.warning("[mainpage] failed run=%s", analysis_run_id)
        return
    html = data.pop("html", "") or ""  # raw_data 에는 저장 안 함 — 여기서만 사용하고 버림.
    # 홈페이지 URL 자체도 raw_data 에 박아둬야 brand_assets / 분석 단계에서 mainpage URL 재조회 없이 사용 가능.
    data = {**data, "sourceUrl": url}
    data.update(_extract_website_audit(html, url))
    await update_raw_info(info_id, data)
    await update_hospital(hospital_id, data, analysis_run_id=analysis_run_id)
    logger.info("[mainpage] done run=%s", analysis_run_id)
-async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None:
+async def collect_all(
-    logger.info("[tiktok] start run=%s url=%s", analysis_run_id, url)
+    analysis_run_id: str,
-    await update_raw_info_status(info_id, "processing")
+    hospital_id: str,
-    data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_tiktok_profile(url)
+    instagram_id: int | None = None,
-    if data is None:
+    facebook_id: int | None = None,
-        await update_raw_info_status(info_id, "failed")
+    naver_blog_id: int | None = None,
-        logger.warning("[tiktok] failed run=%s", analysis_run_id)
+    youtube_id: int | None = None,
-        return
+    gangnam_unni_id: int | None = None,
-    await _save_with_logo(info_id, data)
+    tiktok_url: str | None = None,
-    logger.info("[tiktok] done run=%s", analysis_run_id)
+    instagram_en_url: str | None = None,
    facebook_en_url: str | None = None,
 ) -> None:
    async def _url(table: str, row_id: int) -> str:
        row = await fetchone(f"SELECT url FROM {table} WHERE id = %s", (row_id,))
        return row["url"] if row else ""
    hospital = await fetchone("SELECT url FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,))
    tasks = [collect_clinic_info(analysis_run_id, hospital_id, hospital["url"])]
-async def collect_naver_cafe(analysis_run_id: str, info_id: int, url: str) -> None:
+    if instagram_id:
-    """카페는 로그인 필요라 본문 못 봄. URL 활성·cafeId·이름 언급수만 신호로 수집."""
+        tasks.append(collect_instagram(analysis_run_id, instagram_id, await _url("instagram_data", instagram_id)))
-    logger.info("[naver_cafe] start run=%s url=%s", analysis_run_id, url)
+    if facebook_id:
-    await update_raw_info_status(info_id, "processing")
+        tasks.append(collect_facebook(analysis_run_id, facebook_id, await _url("facebook_data", facebook_id)))
-    data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_cafe_info(url)
+    if naver_blog_id:
-    if data is None:
+        tasks.append(collect_naver_blog(analysis_run_id, naver_blog_id, await _url("naver_blog_data", naver_blog_id)))
-        await update_raw_info_status(info_id, "failed")
+    if youtube_id:
-        logger.warning("[naver_cafe] failed run=%s", analysis_run_id)
+        tasks.append(collect_youtube(analysis_run_id, youtube_id, await _url("youtube_data", youtube_id)))
-        return
+    if gangnam_unni_id:
-    await update_raw_info(info_id, data)
+        tasks.append(collect_gangnam_unni(analysis_run_id, gangnam_unni_id, await _url("gangnam_unni_data", gangnam_unni_id)))
    logger.info("[naver_cafe] done run=%s", analysis_run_id)
 async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> None:
    """카카오톡은 수집 X — URL 보관만. LLM이 채널 존재 신호로만 사용."""
    logger.info("[kakaotalk] url-only run=%s url=%s", analysis_run_id, url)
    await update_raw_info(info_id, {"url": url})
 async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
    logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
    raw = await select_run_raw_data(analysis_run_id)
    mainpage = ((raw.get("mainpage") or [{}])[0].get("raw_data")) or {}
    homepage_url = mainpage.get("sourceUrl") or ""
    branding_meta = mainpage.get("branding") or {}
    html, css_texts = await fetch_html_and_css(homepage_url) if homepage_url else ("", [])
    html_logo_url = find_logo_url_in_html(html, homepage_url, css_texts) if html else None
    css_colors    = extract_brand_colors_from_text(html, css_texts, homepage_url) if html else {}
    logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
    if logo_url:
        mainpage_row = await fetchone(
            "SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
            " WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
            (analysis_run_id,),
        )
        if mainpage_row:
            await update_raw_info_logo_url(mainpage_row["info_id"], logo_url)
    payload: dict = {}
    if css_colors:
        if css_colors.get("brand_colors"):  payload["brand_colors"]  = css_colors["brand_colors"]
        if css_colors.get("color_palette"): payload["color_palette"] = css_colors["color_palette"]
        payload["color_source"] = "html+css"
    if payload:
        await update_raw_info_merge(info_id, {"brandAssets": payload})
    logger.info("[brand_basics] done logo_url=%s colors=%s", bool(logo_url), bool(payload))
 async def collect_all(analysis_run_id: str, hospital_id: str) -> None:
    rows = await select_run_sources(analysis_run_id)
    # source_type → collector. KR/EN 구분은 collector 입장에서 동일, language 컬럼만 다름.
    _collectors = {
        SourceType.INSTAGRAM:    collect_instagram,
        SourceType.FACEBOOK:     collect_facebook,
        SourceType.NAVER_BLOG:   collect_naver_blog,
        SourceType.YOUTUBE:      collect_youtube,
        SourceType.GANGNAM_UNNI: collect_gangnam_unni,
        SourceType.TIKTOK:       collect_tiktok,
        SourceType.NAVER_CAFE:   collect_naver_cafe,
        SourceType.KAKAOTALK:    collect_kakaotalk,
    }
    tasks = []
    branding_info_id: int | None = None
    for row in rows:
        info_id     = row["info_id"]
        source_type = row["source_type"]
        url         = row["url"]
        if source_type == SourceType.BRANDING:
            branding_info_id = info_id  # mainpage·채널 수집 끝난 뒤 2단계에서 사용
            continue
        if source_type == SourceType.MAINPAGE:
            tasks.append(collect_mainpage(analysis_run_id, info_id, hospital_id, url))
        elif source_type in _collectors:
            tasks.append(_collectors[source_type](analysis_run_id, info_id, url))
    await asyncio.gather(*tasks, return_exceptions=True)
-    # 2단계: branding (brandAssets → channelLogos 한 raw_info 안에 머지). mainpage·채널 raw_data 의존이라 순차.
+    # 아래 3단계는 모두 hospital raw_data를 read-modify-write 하므로 race 방지 위해 순차.
-    # 부가 기능이라 실패해도 리포트는 나와야 하므로 _run_optional_step 으로 격리.
+    #   brand_assets  : clinic_info가 채운 branding.logoUrl로 공식 로고/hex 추출
-    if branding_info_id is not None:
+    #   extra_channels: 틱톡/인스타EN/페북EN 수집
-        await _run_optional_step(collect_brand_basics(analysis_run_id, branding_info_id), "brand_basics")
+    #   channel_logos : 공식 로고(brand_assets)+채널 profileImage(extra_channels) 채워진 뒤 Vision 비교
    # 부가 기능이라 실패해도 리포트는 나와야 하므로 _run_optional_step으로 각각 격리.
    await _run_optional_step(collect_brand_assets(analysis_run_id, hospital_id), "brand_assets")
    await _run_optional_step(
        collect_extra_channels(
            analysis_run_id, hospital_id,
            tiktok_url=tiktok_url, instagram_en_url=instagram_en_url, facebook_en_url=facebook_en_url,
        ),
        "extra_channels",
    )
    await _run_optional_step(collect_channel_logos(analysis_run_id, hospital_id), "channel_logos")
--- a/app/services/enrichment.py
+++ b/app/services/enrichment.py
@ -0,0 +1,175 @@
 import asyncio
 import json
 import logging
 import os
 from urllib.parse import urlparse
 from common.db import fetchone, fetch_raw, merge_hospital_raw_data
 from common.utils import get_env
 from integrations.apify import ApifyClient
 from integrations.vision import VisionClient
 from integrations.color_extractor import extract_brand_assets_from_site
 logger = logging.getLogger(__name__)
 async def collect_brand_assets(analysis_run_id: str, hospital_id: str) -> None:
    """홈페이지에서 로고 URL + brand hex 색상을 뽑아 raw_data["brandAssets"]에 저장.
    - 로고 URL/hex: HTML·CSS 정규식 (color_extractor) — Vision 의존 X, 사이트 전체 컬러 시스템이 더 정확.
    - 로고 정성 묘사(심볼/워드마크/톤): Gemini Vision (GEMINI_API_KEY 없으면 색상만 저장하고 skip).
    """
    logger.info("[brand_assets] start run=%s", analysis_run_id)
    row = await fetchone(
        "SELECT raw_data, url FROM hospital_baseinfo WHERE hospital_id = %s",
        (hospital_id,),
    )
    if not row:
        return
    raw = row["raw_data"]
    raw_data = json.loads(raw) if isinstance(raw, str) else (raw or {})
    branding = raw_data.get("branding") or {}
    homepage_url = row["url"]
    # 0~1. 사이트 1회 fetch로 logo URL + brand hex 동시 추출 (img/background-image/CSS .logo, Vision 의존 X)
    site = await extract_brand_assets_from_site(homepage_url) if homepage_url else {}
    html_logo_url = site.get("logo_url")
    css_colors = site.get("colors") or {}
    if html_logo_url:
        logger.info("[brand_assets] HTML logo found: %s", html_logo_url)
    if css_colors:
        logger.info("[brand_assets] css colors: %s", css_colors.get("brand_colors"))
    # 2. 로고/대표 이미지 후보 (logo → og:image → favicon 순)
    logo_url = html_logo_url or branding.get("logoUrl")
    og_image = branding.get("ogImage")
    favicon = branding.get("faviconUrl")
    candidates: list[tuple[str, str]] = []
    if logo_url: candidates.append(("logo", logo_url))
    if og_image: candidates.append(("og", og_image))
    if favicon:  candidates.append(("favicon", favicon))
    if homepage_url:
        parsed = urlparse(homepage_url)
        if parsed.scheme and parsed.netloc:
            candidates.append(("favicon", f"{parsed.scheme}://{parsed.netloc}/favicon.ico"))
    if not candidates and not css_colors:
        logger.info("[brand_assets] skip — no logo/og/favicon candidates and no CSS colors")
        return
    # 3. Vision은 로고 정성 묘사만 (hex는 CSS 추출이 더 정확). 키 없으면 색상만 저장.
    result: dict = {}
    used_kind: str | None = None
    api_key = os.getenv("GEMINI_API_KEY")
    if api_key and candidates:
        vc = VisionClient(api_key)
        for kind, cand in candidates:
            result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url)
            if result:
                used_kind = kind
                break
        # favicon으로만 분석된 경우 진짜 로고가 아니므로 logo URL은 박지 않음 (묘사는 OK)
        if result and used_kind == "favicon" and result.get("logo_images"):
            result["logo_images"] = {"circle": None, "horizontal": None, "korean": None}
    elif not api_key:
        logger.info("[brand_assets] GEMINI_API_KEY not set — 색상만 저장, Vision 묘사 skip")
    # 4. CSS에서 추출한 brand_colors/palette를 Vision보다 우선 사용
    if css_colors:
        if css_colors.get("brand_colors"):
            result["brand_colors"] = css_colors["brand_colors"]
        if css_colors.get("color_palette"):
            result["color_palette"] = css_colors["color_palette"]
        result["color_source"] = "html+css"
    elif result:
        result["color_source"] = "vision"
    if result:
        result["logo_source"] = used_kind or "none"
        await merge_hospital_raw_data(hospital_id, {"brandAssets": result})
    logger.info("[brand_assets] done keys=%s", list(result.keys()) if result else None)
 async def collect_extra_channels(
    analysis_run_id: str,
    hospital_id: str,
    tiktok_url: str | None = None,
    instagram_en_url: str | None = None,
    facebook_en_url: str | None = None,
 ) -> None:
    """틱톡 / 인스타 EN / 페북 EN 수집 → hospital raw_data에 저장 (별도 테이블 없이).
    인스타EN·페북EN은 기존 Apify 수집기 재사용, 틱톡은 신규 액터."""
    apify = ApifyClient(get_env("APIFY_API_TOKEN"))
    jobs: dict = {}
    if instagram_en_url:
        jobs["instagramEn"] = apify.get_instagram_profile(instagram_en_url)
    if facebook_en_url:
        jobs["facebookEn"] = apify.get_facebook_page(facebook_en_url)
    if tiktok_url:
        jobs["tiktok"] = apify.get_tiktok_profile(tiktok_url)
    if not jobs:
        return
    logger.info("[extra_channels] start run=%s channels=%s", analysis_run_id, list(jobs))
    done = await asyncio.gather(*jobs.values(), return_exceptions=True)
    results: dict = {}
    for key, res in zip(jobs.keys(), done):
        if isinstance(res, Exception):
            logger.warning("[extra_channels] %s 수집 실패: %s", key, res)
        elif res:
            results[key] = res
    if not results:
        logger.info("[extra_channels] 수집 결과 없음 run=%s", analysis_run_id)
        return
    await merge_hospital_raw_data(hospital_id, results)
    logger.info("[extra_channels] done run=%s keys=%s", analysis_run_id, list(results))
 async def collect_channel_logos(analysis_run_id: str, hospital_id: str) -> None:
    """채널별 프로필 이미지(로고)를 모아 Gemini Vision으로 설명 + 공식 로고 일치 여부 평가.
    → hospital raw_data["channelLogos"]에 저장. GEMINI_API_KEY 없으면 skip.
    brand_assets(공식 로고)·extra_channels(틱톡/EN profileImage) 다음에 실행돼야 함."""
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        logger.info("[channel_logos] skip — GEMINI_API_KEY 없음")
        return
    hrow = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,))
    raw = hrow["raw_data"] if hrow else None
    raw_data = json.loads(raw) if isinstance(raw, str) else (raw or {})
    official = ((raw_data.get("brandAssets") or {}).get("logo_images") or {}).get("horizontal")
    run = await fetchone(
        "SELECT instagram_data_id, facebook_data_id, youtube_data_id"
        " FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    logos: list[dict] = []
    # 전용 테이블 채널 (KR)
    for ch, table, col in [
        ("Instagram", "instagram_data", "instagram_data_id"),
        ("Facebook", "facebook_data", "facebook_data_id"),
        ("YouTube", "youtube_data", "youtube_data_id"),
    ]:
        rid = (run or {}).get(col)
        if rid:
            d = await fetch_raw(table, rid) or {}
            if d.get("profileImage"):
                logos.append({"channel": ch, "url": d["profileImage"]})
    # 추가 채널 (hospital raw_data)
    for ch, key in [("Instagram EN", "instagramEn"), ("Facebook EN", "facebookEn"), ("TikTok", "tiktok")]:
        img = (raw_data.get(key) or {}).get("profileImage")
        if img:
            logos.append({"channel": ch, "url": img})
    if not logos:
        logger.info("[channel_logos] skip — 채널 프로필 이미지 없음")
        return
    logger.info("[channel_logos] start run=%s channels=%s official=%s", analysis_run_id,
                [l["channel"] for l in logos], bool(official))
    result = await VisionClient(api_key).describe_channel_logos(official, logos)
    if result:
        # Vision이 못 본 채널도 url은 채워둠 (프론트에서 이미지 표시용)
        result["logos"] = logos
        await merge_hospital_raw_data(hospital_id, {"channelLogos": result})
    logger.info("[channel_logos] done run=%s keys=%s", analysis_run_id, list(result.keys()) if result else None)
--- a/app/services/facebook_audit.py
+++ b/app/services/facebook_audit.py
@ -1,130 +0,0 @@
 """Facebook audit 페이지(KR·EN)를 수집 데이터로 구성.
 수치 지표(최근 게시일·게시 빈도·참여율)는 **수집 시점에** 결정적으로 산출해 DB에 박는다 (transform_for_storage).
 콘텐츠 주제(top_content_type)는 캡션 본문 이해가 필요해 LLM이 채운다 (리포트 프롬프트 지시)."""
 import json
 from datetime import datetime, timezone
 from common.utils import parse_ts
 from integrations.llm.llm_service import LLMService
 from integrations.llm.prompt import facebook_diagnosis_prompt
 from integrations.llm.schemas.report import FacebookAudit, DiagnosisItem
 def _humanize_age(days: int) -> str:
    days = max(days, 0)
    if days == 0:  return "오늘"
    if days < 7:   return f"{days}일 전"
    if days < 30:  return f"{days // 7}주 전"
    if days < 365: return f"{days // 30}개월 전"
    return f"{days // 365}년 전"
 def _frequency_label(avg_gap_days: float) -> str:
    """게시물 사이 평균 간격(일) → 빈도 라벨."""
    if avg_gap_days <= 1.5: return "거의 매일"
    if avg_gap_days <= 10:  return f"주 {7 / avg_gap_days:.1f}회"
    if avg_gap_days <= 45:  return f"월 {30 / avg_gap_days:.1f}회"
    return "비정기 (분기 이상 간격)"
 def _engagement_text(posts: list[dict]) -> str:
    """게시물당 좋아요/반응/공유/조회를 min~max 범위로. 전부 0인 지표는 제외.
    댓글은 posts actor가 안 줘서 '댓글 거의 없음' 고정 부가 (FB 페이지는 댓글 희박이 일반적)."""
    def _rng(vals: list[int], label: str, unit: str) -> str | None:
        lo, hi = min(vals), max(vals)
        if hi == 0:
            return None
        return f"{label} {lo}{unit}" if lo == hi else f"{label} {lo}~{hi}{unit}"
    parts = [
        _rng([p.get("likes", 0)     for p in posts], "좋아요", "개"),
        _rng([p.get("reactions", 0) for p in posts], "반응",   "개"),
        _rng([p.get("shares", 0)    for p in posts], "공유",   "개"),
    ]
    vid_views = [p.get("views", 0) for p in posts if p.get("isVideo")]
    if vid_views:
        parts.append(_rng(vid_views, "영상 조회", "회"))
    parts = [x for x in parts if x]
    if not parts:
        return "게시물당 참여 거의 없음"
    return "게시물당 " + " · ".join(parts) + " · 댓글 거의 없음"
 def transform_for_storage(fb: dict | None) -> dict | None:
    """apify 원본 → DB에 저장할 최종 형태.
    - 수치 지표(recent_post_age·post_frequency·engagement)를 그 자리에서 계산해 박음.
    - 게시물은 캡션·타입만 남김 (raw 숫자/timestamp는 어차피 재계산 안 하므로 버림).
    수집 시점에 한 번 계산 → 리포트 생성 때는 그대로 갖다 박기만 함."""
    if not isinstance(fb, dict):
        return fb
    posts = fb.get("latestPosts") or []
    out = {k: v for k, v in fb.items() if k != "latestPosts"}
    if posts:
        dts = sorted((d for d in (parse_ts(p.get("timestamp")) for p in posts) if d), reverse=True)
        if dts:
            out["recent_post_age"] = _humanize_age((datetime.now(timezone.utc) - dts[0]).days)
            if len(dts) > 1:
                avg_gap = ((dts[0] - dts[-1]).days or 1) / (len(dts) - 1)
                out["post_frequency"] = _frequency_label(avg_gap)
        out["engagement"] = _engagement_text(posts)
        out["latestPosts"] = [
            {"caption": (p.get("text") or "")[:160],
             "type": "video" if p.get("isVideo") else "image"}
            for p in posts
        ]
    else:
        out["latestPosts"] = []
    return out
 def _logo_data(channel_logos: dict, channel: str) -> dict:
    """channelLogos(비전 결과)에서 해당 채널 가져온다."""
    for c in (channel_logos or {}).get("channel_logos", []):
        if c.get("channel") == channel:
            return c
    print("channel_logos NOT FOUND : ",  channel_logos)
    return {
        "is_official" : False,
        "logo_description" : "로고 찾지 못함"
    }
 def _page_patch(item: dict, channel_logos) -> dict:
    p: dict = {"page_name": "", "followers": 0, "category": "", "reviews": 0,
               "following": 0, "recent_post_age": "", "post_frequency": "", "engagement": ""}
    fb = item["raw_data"]
    language = item.get("language") if item.get("language") else "KR"
    label = "페이스북 " + language
    channel = "Facebook"
    if language != "KR":
        channel = channel + " " + language
    logo_data = _logo_data(channel_logos, channel)
    if fb.get("pageUrl"):           p["url"] = p["link"] = fb["pageUrl"]
    if fb.get("pageName"):          p["page_name"]      = fb["pageName"]
    if fb.get("followers"):         p["followers"]      = fb["followers"]
    if fb.get("intro"):             p["bio"]            = fb["intro"]
    if fb.get("categories"):        p["category"]       = ", ".join(fb["categories"])
    if fb.get("website"):           p["linked_domain"]  = fb["website"]
    if fb.get("reviews") is not None:   p["reviews"]    = fb["reviews"]
    if fb.get("following") is not None: p["following"]  = fb["following"]
    for key in ("recent_post_age", "post_frequency", "engagement"):
        if fb.get(key):             p[key]              = fb[key]
    if p:
        p["language"] = item["language"]
        p["label"]    = label
    p["logo"] = "일치 (공식 로고)" if logo_data["is_official"] else "불일치 (비공식 변형)"
    p["logo_description"] = logo_data["logo_description"]
    return p
 async def build_facebook_audit(facebook: list[dict], brand_patch: list[dict], channel_logos) -> dict:
    pages = [_page_patch(item, channel_logos) for item in facebook]
    diagnosis_result = await LLMService(provider="perplexity").generate(
        facebook_diagnosis_prompt,
        {"pages": json.dumps(pages, ensure_ascii=False)},
    )
    return FacebookAudit.model_validate({
        "pages":                 pages,
        "diagnosis":             [DiagnosisItem.model_validate(item).model_dump() for item in diagnosis_result.diagnosis],
        "brand_inconsistencies": brand_patch,
    }).model_dump()
--- a/app/services/file_data.py
+++ b/app/services/file_data.py
@ -2,8 +2,7 @@ import logging
 from fastapi import HTTPException, UploadFile
-from common.db.run import select_run
+from common.db import execute, fetchall, fetchone, insert_file_row
 from common.db.file_data import insert_file, select_run_files, select_file, delete_file
 from integrations.azure_blob import AzureBlobUploader
 from models.file import FileListItem, FileType, FileUploadResponse
@ -32,7 +31,10 @@ async def upload_analysis_file(
    content_type: str | None = None,
 ) -> tuple[int, str]:
    """analysis_run에 딸린 파일 업로드. Blob 업로드 + file_data row 생성. (file_id, url) 반환."""
-    run = await select_run(analysis_run_id)
+    run = await fetchone(
        "SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    if not run:
        raise HTTPException(status_code=404, detail="analysis_run not found")
    hospital_id = run["hospital_id"]
@ -45,7 +47,7 @@ async def upload_analysis_file(
        content_type=content_type,
    )
-    file_id = await insert_file(
+    file_id = await insert_file_row(
        analysis_run_id=analysis_run_id,
        hospital_id=hospital_id,
        file_type=file_type,
@ -59,7 +61,12 @@ async def upload_analysis_file(
 async def list_analysis_files(analysis_run_id: str) -> list[dict]:
    """analysis_run에 딸린 (삭제 안 된) 파일 목록."""
-    return await select_run_files(analysis_run_id)
+    return await fetchall(
        "SELECT id, file_type, file_name, file_url, size_bytes, created_at FROM file_data"
        " WHERE analysis_run_id = %s AND is_deleted = FALSE"
        " ORDER BY created_at DESC",
        (analysis_run_id,),
    )
 async def handle_analysis_file_upload(
@ -95,7 +102,7 @@ async def handle_analysis_file_upload(
 async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem]:
    """run 존재 확인 + 응답 모델 생성."""
-    if not await select_run(analysis_run_id):
+    if not await fetchone("SELECT 1 FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)):
        raise HTTPException(status_code=404, detail="analysis_run not found")
    rows = await list_analysis_files(analysis_run_id)
    return [FileListItem(**{**r, "created_at": str(r["created_at"])}) for r in rows]
@ -103,8 +110,14 @@ async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem
 async def soft_delete_analysis_file(analysis_run_id: str, file_id: int) -> None:
    """analysis_run에 딸린 파일을 소프트 삭제. 멱등성 보장."""
-    row = await select_file(file_id, analysis_run_id)
+    row = await fetchone(
        "SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s",
        (file_id, analysis_run_id),
    )
    if not row:
        raise HTTPException(status_code=404, detail="file not found")
-    await delete_file(file_id)
+    await execute(
        "UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE",
        (file_id,),
    )
    logger.info("soft-deleted analysis file run=%s file_id=%s", analysis_run_id, file_id)
--- a/app/services/instagram_audit.py
+++ b/app/services/instagram_audit.py
@ -1,11 +1,6 @@
 """Instagram audit 계정(KR·EN)을 수집 데이터로 구성.
 fix 값(handle/followers/highlights/content_format 등)은 전부 코드에서 박는다 — LLM 출력 무시."""
 import json
 from integrations.llm.llm_service import LLMService
 from integrations.llm.prompt import instagram_diagnosis_prompt
 from integrations.llm.schemas.report import InstagramAudit, DiagnosisItem
 _MEDIA = {"GraphImage": "이미지", "GraphSidecar": "카드뉴스", "GraphVideo": "영상/릴스"}
@ -23,16 +18,8 @@ def _logo_desc(channel_logos: dict, channel: str) -> str:
    return ""
-def _account(item: dict, channel_logos: dict) -> dict:
+def _account(data: dict, language: str, label: str, channel: str, channel_logos: dict) -> dict:
    """스크래퍼 수집값으로 InstagramAccount 전 필드를 구성."""
    language = item.get("language") if item.get("language") else "KR"
    label = "인스타그램 " + language
    channel = "Instagram"
    if language != "KR":
        channel = channel + " " + language
    data = item.get("raw_data")
    handle = data.get("username") or ""
    return {
        "handle":         handle,
@ -51,14 +38,11 @@ def _account(item: dict, channel_logos: dict) -> dict:
    }
-async def build_instagram_audit(instagram: list[dict], channel_logos: dict) -> dict:
+def build_instagram_accounts(instagram: dict, instagram_en: dict, channel_logos: dict) -> list[dict]:
    """KR·EN 인스타 계정 리스트 구성 (username 있는 것만)."""
-    accounts = [_account(item, channel_logos) for item in instagram if item.get("raw_data").get("username")]
+    accounts: list[dict] = []
-    diagnosis_result = await LLMService(provider="perplexity").generate(
+    if instagram.get("username"):
-        instagram_diagnosis_prompt,
+        accounts.append(_account(instagram, "KR", "인스타그램 KR", "Instagram", channel_logos))
-        {"accounts": json.dumps(accounts, ensure_ascii=False)},
+    if instagram_en.get("username"):
-    )
+        accounts.append(_account(instagram_en, "EN", "인스타그램 EN", "Instagram EN", channel_logos))
-    return InstagramAudit.model_validate({
+    return accounts
        "accounts":  accounts,
        "diagnosis": [DiagnosisItem.model_validate(item).model_dump() for item in diagnosis_result.diagnosis],
    }).model_dump()
--- a/app/services/kpi_dashboard.py
+++ b/app/services/kpi_dashboard.py
@ -1,99 +0,0 @@
 """mockup 7개 역분석 — 채널 규모별 3개월/12개월 target 성장률 공식."""
 from integrations.llm.schemas.report import KPIMetric
 def _round_clean(n: int) -> int:
    if n < 100:        return n
    if n < 1000:       return round(n / 100) * 100
    if n < 10_000:     return round(n / 500) * 500
    if n < 100_000:    return round(n / 1000) * 1000
    if n < 1_000_000:  return round(n / 5000) * 5000
    return round(n / 50_000) * 50_000
 def _target_multiplier(current: int) -> tuple[float, float]:
    if current < 1_000:     return (2.5, 9.0)
    if current < 5_000:     return (1.7, 4.0)
    if current < 25_000:    return (1.5, 2.5)
    if current < 50_000:    return (1.3, 2.2)
    return (1.1, 1.9)
 def _follower_kpi(metric: str, val: int | None, unit: str = "명") -> dict | None:
    if not val: return None
    m3, m12 = _target_multiplier(val)
    return {
        "metric":          metric,
        "current":         f"{val:,}{unit}",
        "target_3_month":  f"{_round_clean(int(val * m3)):,}{unit}",
        "target_12_month": f"{_round_clean(int(val * m12)):,}{unit}",
    }
 def _blog_frequency(posts: list) -> tuple[str, str, str] | None:
    """RSS posts timestamp로 (current, target_3m, target_12m) 라벨 반환. target은 절대 downgrade 안 함."""
    from common.utils import parse_ts
    dts = sorted((d for d in (parse_ts(p.get("postDate")) for p in posts) if d), reverse=True)
    if len(dts) < 2: return None
    avg_gap = (dts[0] - dts[-1]).days / (len(dts) - 1)
    if avg_gap > 90:    current = f"방치 ({dts[0].strftime('%Y-%m')})"
    elif avg_gap <= 1:  current = f"주 {7 // max(int(avg_gap), 1)}회"
    elif avg_gap <= 3:  current = "주 2~3회"
    elif avg_gap <= 14: current = "주 1~2회"
    elif avg_gap <= 30: current = f"월 {max(30 // int(avg_gap), 1)}회"
    else:               current = "월 1회 미만"
    if avg_gap > 3:     return current, "주 2회", "주 3회"
    if avg_gap > 2:     return current, "주 3회", "주 5회"
    if avg_gap > 1:     return current, "주 5회", "주 7회"
    return current, f"{current} 유지", f"{current} 유지"
 def build_kpi_dashboard(
    instagram: dict, facebook: dict, youtube: dict, gangnam_unni: dict, hospital: dict,
    naver_blog: dict | None = None,
 ) -> list[dict]:
    tiktok = hospital.get("tiktok")      or {}
    cafe   = hospital.get("naverCafe")   or {}
    # print("facebook", facebook)
    # print("instagram", instagram)
    kpis: list[dict] = []
    facebook_kpis = [_follower_kpi("Facebook " + fb["language"] + " 팔로워",  fb['raw_data'].get("followers") ) for fb in facebook]
    instagram_kpis = [_follower_kpi("Instagram " + ib["language"] + " 팔로워",  ib['raw_data'].get("followers")) for ib in instagram]
    print("facebook_kpis", facebook_kpis)
    print("instagram_kpis", instagram_kpis)
    kpis += [k for k in facebook_kpis if k]
    kpis += [k for k in instagram_kpis if k]
    for k in [
        _follower_kpi("YouTube 구독자",     youtube.get("subscribers")),
        _follower_kpi("TikTok 팔로워",       tiktok.get("followers")),
        _follower_kpi("Naver Cafe 회원 수",  cafe.get("memberCount")),
    ]:
        if k: kpis.append(k)
    if naver_blog:
        freq = _blog_frequency(naver_blog.get("posts") or [])
        if freq:
            cur, t3, t12 = freq
            kpis.append({
                "metric":          "네이버 블로그 포스팅 빈도",
                "current":         cur,
                "target_3_month":  t3,
                "target_12_month": t12,
            })
    gu_reviews = gangnam_unni.get("totalReviews")
    if gu_reviews:
        if gu_reviews < 1000:    rm3, rm12 = 2.0, 6.0
        elif gu_reviews < 5000:  rm3, rm12 = 1.10, 1.50
        else:                    rm3, rm12 = 1.07, 1.27
        kpis.append({
            "metric":          "강남언니 리뷰",
            "current":         f"{gu_reviews:,}개",
            "target_3_month":  f"{_round_clean(int(gu_reviews * rm3)):,}개",
            "target_12_month": f"{_round_clean(int(gu_reviews * rm12)):,}개",
        })
    print ("kpis", kpis)
    return [KPIMetric.model_validate(k).model_dump() for k in kpis]
--- a/app/services/market.py
+++ b/app/services/market.py
@ -1,9 +1,7 @@
 import asyncio
 import json
 import logging
-from common.db.run import select_run
+from common.db import fetchone, execute
 from common.db.hospital import select_hospital
 from common.db.market import upsert_market_status, upsert_market_result
 from common.db.source import select_run_raw_data
 from integrations.llm.llm_service import LLMService
 from integrations.llm.prompt import (
    market_competitors_prompt,
@ -20,27 +18,49 @@ _TYPES = ["competitors", "keywords", "trend", "target_audience"]
 async def _save(analysis_run_id: str, analysis_type: str, result, exc: Exception | None) -> None:
    if exc:
        logger.warning("[market] %s failed run=%s: %s", analysis_type, analysis_run_id, exc)
-        await upsert_market_status(analysis_run_id, analysis_type, "failed")
+        await execute(
            "INSERT INTO market_analysis (analysis_run_id, analysis_type, status)"
            " VALUES (%s, %s, 'failed')"
            " ON DUPLICATE KEY UPDATE status = 'failed'",
            (analysis_run_id, analysis_type),
        )
    else:
-        await upsert_market_result(analysis_run_id, analysis_type, result.model_dump())
+        await execute(
            "INSERT INTO market_analysis (analysis_run_id, analysis_type, status, data)"
            " VALUES (%s, %s, 'done', %s)"
            " ON DUPLICATE KEY UPDATE status = 'done', data = VALUES(data)",
            (analysis_run_id, analysis_type, json.dumps(result.model_dump(), ensure_ascii=False)),
        )
 async def run_market_analysis(analysis_run_id: str) -> None:
    logger.info("[market] start run=%s", analysis_run_id)
-    run = await select_run(analysis_run_id)
+    run = await fetchone(
-    clinic = await select_hospital(run["hospital_id"])
+        "SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
-    raw = await select_run_raw_data(analysis_run_id)
+        (analysis_run_id,),
-    mainpage = ((raw.get("mainpage") or [{}])[0].get("raw_data")) or {}
+    )
    clinic = await fetchone(
        "SELECT hospital_name, road_address, raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
        (run["hospital_id"],),
    )
-    clinic_name = (clinic or {}).get("hospital_name") or ""
+    raw_data = clinic["raw_data"]
-    address = (clinic or {}).get("road_address") or ""
+    clinic_data = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
-    services = mainpage.get("services", [])
+
    clinic_name = clinic["hospital_name"] or ""
    address = clinic["road_address"] or ""
    services = clinic_data.get("services", [])
    services_str = ", ".join(services[:3])
    primary_service = services[0] if services else ""
    for analysis_type in _TYPES:
-        await upsert_market_status(analysis_run_id, analysis_type, "processing")
+        await execute(
            "INSERT INTO market_analysis (analysis_run_id, analysis_type, status)"
            " VALUES (%s, %s, 'processing')"
            " ON DUPLICATE KEY UPDATE status = 'processing'",
            (analysis_run_id, analysis_type),
        )
    llm = LLMService(provider="perplexity")
    results = await asyncio.gather(
--- a/app/services/pipeline.py
+++ b/app/services/pipeline.py
@ -1,5 +1,5 @@
 import logging
-from common.db.run import select_run, update_run_status
+from common.db import fetchone, execute
 from models.status import AnalysisStatus
 from services.collect import collect_all
 from services.market import run_market_analysis
@ -8,23 +8,49 @@ from services.analysis import run_report_task, run_plan_task
 logger = logging.getLogger(__name__)
-async def run_pipeline(analysis_run_id: str) -> None:
+async def run_pipeline(analysis_run_id: str, extra_channels: dict | None = None) -> None:
    logger.info("[pipeline] start run=%s", analysis_run_id)
    extra_channels = extra_channels or {}
    # ── 1. Collect ──────────────────────────────────────────────────────────
-    run = await select_run(analysis_run_id)
+    run = await fetchone(
-    await collect_all(analysis_run_id, hospital_id=run["hospital_id"])
+        "SELECT hospital_id, instagram_data_id, facebook_data_id,"
        " naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
        " FROM analysis_runs WHERE analysis_run_id = %s",
        (analysis_run_id,),
    )
    await collect_all(
        analysis_run_id,
        hospital_id=run["hospital_id"],
        instagram_id=run["instagram_data_id"],
        facebook_id=run["facebook_data_id"],
        naver_blog_id=run["naver_blog_data_id"],
        youtube_id=run["youtube_data_id"],
        gangnam_unni_id=run["gangnam_unni_data_id"],
        tiktok_url=extra_channels.get("tiktok"),
        instagram_en_url=extra_channels.get("instagram_en"),
        facebook_en_url=extra_channels.get("facebook_en"),
    )
    # ── 2. Market ────────────────────────────────────────────────────────────
-    await update_run_status(analysis_run_id, AnalysisStatus.ANALYZING)
+    await execute(
        "UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
        (AnalysisStatus.ANALYZING, analysis_run_id),
    )
    await run_market_analysis(analysis_run_id)
    # ── 3. Report ────────────────────────────────────────────────────────────
    await run_report_task(analysis_run_id)
    # ── 4. Plan ──────────────────────────────────────────────────────────────
-    await update_run_status(analysis_run_id, AnalysisStatus.PLANNING)
+    await execute(
        "UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
        (AnalysisStatus.PLANNING, analysis_run_id),
    )
    await run_plan_task(analysis_run_id)
-    await update_run_status(analysis_run_id, AnalysisStatus.COMPLETED)
+    await execute(
        "UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
        (AnalysisStatus.COMPLETED, analysis_run_id),
    )
    logger.info("[pipeline] done run=%s", analysis_run_id)
--- a/app/services/website_parser.py
+++ b/app/services/website_parser.py
@ -1,248 +0,0 @@
 """HTML 정규식 기반 deterministic 추출 — tracking pixels / SNS / additional domains / main CTA."""
 import re
 from urllib.parse import urlparse
 # ── tracking pixels ──────────────────────────────────────────────────────────
 # 픽셀별 시그니처: 하나라도 매치되면 installed, ID 는 첫 non-empty 그룹.
 _TRACKING_PIXEL_PATTERNS: dict[str, list[re.Pattern]] = {
    "Google Analytics": [
        re.compile(r"googletagmanager\.com/gtag/js\?id=(G-[A-Z0-9]+)", re.IGNORECASE),
        re.compile(r"google-analytics\.com/analytics\.js", re.IGNORECASE),
        re.compile(r"\bgtag\(\s*['\"]config['\"]\s*,\s*['\"](G-[A-Z0-9]+|UA-\d+-\d+)['\"]", re.IGNORECASE),
        re.compile(r"\b(UA-\d+-\d+)\b"),
    ],
    "Google Tag Manager": [
        re.compile(r"googletagmanager\.com/gtm\.js\?id=(GTM-[A-Z0-9]+)", re.IGNORECASE),
        re.compile(r"\b(GTM-[A-Z0-9]+)\b"),
    ],
    "Facebook Pixel": [
        re.compile(r"connect\.facebook\.net/[^/]+/fbevents\.js", re.IGNORECASE),
        re.compile(r"fbq\(\s*['\"]init['\"]\s*,\s*['\"](\d{10,20})['\"]", re.IGNORECASE),
        re.compile(r"facebook\.com/tr/?\?id=(\d{10,20})", re.IGNORECASE),  # noscript image pixel fallback
        re.compile(r"_fbq\.push\(\s*\[\s*['\"]init['\"]\s*,\s*['\"](\d{10,20})['\"]", re.IGNORECASE),  # legacy
    ],
    "Naver Analytics": [
        re.compile(r"wcs\.naver\.net/wcslog\.js", re.IGNORECASE),
        re.compile(r"\bwcs_add\b", re.IGNORECASE),
        re.compile(r"\bwcs\.inflow\(", re.IGNORECASE),
        re.compile(r"wcs_add\s*\[\s*['\"]wa['\"]\s*\]\s*=\s*['\"]([a-zA-Z0-9_]+)['\"]", re.IGNORECASE),  # wa ID
    ],
    "Kakao Pixel": [
        re.compile(r"t1\.daumcdn\.net/kas/static/kp\.js", re.IGNORECASE),
        re.compile(r"kakaoPixel\s*\(\s*['\"]?(\d+)['\"]?", re.IGNORECASE),
    ],
 }
 def extract_tracking_pixels(html: str) -> list[dict]:
    """HTML 에서 트래킹 픽셀 설치 여부 + ID 추출."""
    if not html:
        return []
    pixels: list[dict] = []
    for name, patterns in _TRACKING_PIXEL_PATTERNS.items():
        any_match = False
        captured_id: str | None = None
        for pat in patterns:
            m = pat.search(html)
            if not m:
                continue
            any_match = True
            if m.groups() and m.group(1) and not captured_id:
                captured_id = m.group(1)
        if any_match:
            pixels.append({
                "name":      name,
                "installed": True,
                "details":   captured_id,
            })
    return pixels
 # ── main CTA ─────────────────────────────────────────────────────────────────
 _CTA_KEYWORDS = re.compile(
    r"(상담|예약|문의|신청|등록|Book\s*Now|Consult|Reservation|Contact|Apply)",
    re.IGNORECASE,
 )
 # CTA 카테고리 우선순위 (전화>카톡>예약). 매치 없으면 결과에 포함 X.
 _CTA_CATEGORIES: list[tuple[str, re.Pattern]] = [
    ("전화 상담",     re.compile(r"전화|\bCall\b|\bPhone\b", re.IGNORECASE)),
    ("카카오톡 상담", re.compile(r"카(카오)?톡|Kakao", re.IGNORECASE)),
    ("온라인 예약",   re.compile(r"예약|Reserv|Book", re.IGNORECASE)),
 ]
 _BUTTON_TAG = re.compile(r"<button\b[^>]*>(.*?)</button>", re.IGNORECASE | re.DOTALL)
 _ANCHOR_TAG = re.compile(r"<a\b[^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
 def _clean_text(inner: str) -> str:
    text = re.sub(r"<[^>]+>", "", inner)
    return re.sub(r"\s+", " ", text).strip()
 _MAIN_CTA_LIMIT = 3
 def _classify_cta(text: str) -> str | None:
    """CTA 텍스트 → 카테고리 (전화/카톡/예약). 매치 없으면 None."""
    for label, pat in _CTA_CATEGORIES:
        if pat.search(text):
            return label
    return None
 def extract_main_cta(html: str) -> str:
    """<button>·<a> 라벨을 카테고리로 분류 후 우선순위 순서로 최대 _MAIN_CTA_LIMIT 개 join."""
    if not html:
        return ""
    found: set[str] = set()
    for pat in (_BUTTON_TAG, _ANCHOR_TAG):
        for m in pat.finditer(html):
            text = _clean_text(m.group(1))
            if text and 1 <= len(text) <= 20 and _CTA_KEYWORDS.search(text):
                cat = _classify_cta(text)
                if cat:
                    found.add(cat)
    ordered_labels = [label for label, _ in _CTA_CATEGORIES]
    result = [label for label in ordered_labels if label in found]
    return " + ".join(result[:_MAIN_CTA_LIMIT])
 # ── SNS links ────────────────────────────────────────────────────────────────
 # SNS 도메인 → 표준 platform 이름.
 _SNS_DOMAINS: dict[str, str] = {
    "facebook.com":   "Facebook",
    "instagram.com":  "Instagram",
    "youtube.com":    "YouTube",
    "youtu.be":       "YouTube",
    "tiktok.com":     "TikTok",
    "pf.kakao.com":   "KakaoTalk",
    "open.kakao.com": "KakaoTalk",
    "blog.naver.com": "Naver Blog",
    "cafe.naver.com": "Naver Cafe",
    "x.com":          "X",
    "twitter.com":    "X",
 }
 _FOOTER_BLOCK = re.compile(r"<footer\b[^>]*>(.*?)</footer>", re.IGNORECASE | re.DOTALL)
 _HEADER_BLOCK = re.compile(r"<header\b[^>]*>(.*?)</header>", re.IGNORECASE | re.DOTALL)
 # <a> 태그 안의 href 만 매치 — <link>/<script> 등 리소스 호출 제외 (CDN 노이즈 차단).
 _ANCHOR_HREF_PATTERN = re.compile(
    r"""<a\b[^>]*\bhref\s*=\s*['"](https?://[^'"\s]+)['"]""",
    re.IGNORECASE,
 )
 def _platform_for(url: str) -> str | None:
    try:
        host = urlparse(url).netloc.lower().removeprefix("www.")
    except Exception:
        return None
    for domain, name in _SNS_DOMAINS.items():
        if host == domain or host.endswith("." + domain):
            return name
    return None
 def _location_for(url: str, html: str) -> str:
    """url 이 <header>/<footer> 블록 안에 있는지 판정 — 둘 다 아니면 'body'."""
    if not html:
        return ""
    needle = re.escape(url)
    footer = _FOOTER_BLOCK.search(html)
    if footer and re.search(needle, footer.group(1)):
        return "footer"
    header = _HEADER_BLOCK.search(html)
    if header and re.search(needle, header.group(1)):
        return "header"
    return "body"
 def _extract_anchor_hrefs(html: str) -> list[str]:
    """<a href="http..."> 만 추출 — 순서 보존, 중복 제거."""
    if not html:
        return []
    seen: dict[str, None] = {}
    for url in _ANCHOR_HREF_PATTERN.findall(html):
        if url not in seen:
            seen[url] = None
    return list(seen.keys())
 def extract_sns_links(html: str) -> list[dict]:
    """anchor href 에서 SNS 도메인 매칭. 중복 platform 은 첫 URL 만 유지."""
    seen: dict[str, dict] = {}
    for url in _extract_anchor_hrefs(html):
        platform = _platform_for(url)
        if not platform or platform in seen:
            continue
        seen[platform] = {
            "platform": platform,
            "url":      url,
            "location": _location_for(url, html),
        }
    return list(seen.values())
 # ── additional domains (글로벌/다국어 사이트만) ─────────────────────────────────
 # 언어 코드 → 한국어 라벨.
 _LANG_LABEL: dict[str, str] = {
    "en": "영어", "eng": "영어",
    "zh": "중국어", "cn": "중국어", "chn": "중국어",
    "ja": "일본어", "jp": "일본어",
    "ko": "한국어", "kor": "한국어", "kr": "한국어",
    "vi": "베트남어", "vn": "베트남어",
    "th": "태국어", "thai": "태국어",
    "ru": "러시아어",
    "es": "스페인어",
    "mn": "몽골어",
    "ar": "아랍어", "arab": "아랍어",
    "id": "인도네시아어",
    "de": "독일어",
    "fr": "프랑스어",
    "pt": "포르투갈어",
 }
 _LANG_LI_PATTERN = re.compile(
    r'<li\b[^>]*\bdata-lang\s*=\s*[\'"]([^\'"]+)[\'"][^>]*>(.*?)</li>',
    re.IGNORECASE | re.DOTALL,
 )
 def extract_additional_domains(html: str, primary_host: str) -> list[dict]:
    """글로벌/다국어 사이트 anchor 수집 — data-lang 마커 + URL 서브도메인 prefix 로 식별."""
    if not html:
        return []
    primary = (primary_host or "").lower().removeprefix("www.")
    seen: dict[str, dict] = {}
    def add(host: str, lang_kr: str):
        host = host.lower().removeprefix("www.")
        if not host or host == primary or host in seen:
            return
        seen[host] = {"domain": host, "purpose": lang_kr}
    # 1) <li data-lang="xx"> 안의 anchor
    for m in _LANG_LI_PATTERN.finditer(html):
        lang_kr = _LANG_LABEL.get(m.group(1).lower())
        if not lang_kr:
            continue
        a_m = re.search(r'<a[^>]*href\s*=\s*[\'"](https?://[^\'"\s]+)[\'"]', m.group(2), re.IGNORECASE)
        if a_m:
            add(urlparse(a_m.group(1)).netloc, lang_kr)
    # 2) URL 서브도메인 prefix 가 언어 코드
    for url in _extract_anchor_hrefs(html):
        host = urlparse(url).netloc.lower().removeprefix("www.")
        parts = host.split(".")
        if len(parts) >= 3:
            lang_kr = _LANG_LABEL.get(parts[0])
            if lang_kr:
                add(host, lang_kr)
    return list(seen.values())
--- a/docker-compose-template.yml
+++ b/docker-compose-template.yml
@ -12,8 +12,3 @@ services:
    volumes:
      - ./app:/app
    restart: unless-stopped
    networks:
       - o2o-net
 networks:
  o2o-net:
    external: true
--- a/requirements.txt
+++ b/requirements.txt
@ -10,4 +10,3 @@ passlib[bcrypt]==1.7.4
 python-multipart==0.0.26
 uuid6==2025.0.1
 aiomysql==0.3.2
 resvg-py==0.3.2