Compare commits
18 Commits
channel-br
...
main
| Author | SHA1 | Date |
|---|---|---|
|
|
5504f79a9d | |
|
|
9a9ce1319f | |
|
|
af61713697 | |
|
|
b844951ad8 | |
|
|
009d95377a | |
|
|
c23e620fb4 | |
|
|
b6a0134ba7 | |
|
|
86af23b56d | |
|
|
3b4c154fb2 | |
|
|
e5a9036e47 | |
|
|
5dbc7d7ffe | |
|
|
71b605eaa6 | |
|
|
c9c5ee9177 | |
|
|
aff2b2720d | |
|
|
eed57729d9 | |
|
|
ab215395c6 | |
|
|
d1293f9188 | |
|
|
0e68cbe71b |
|
|
@ -1,85 +1,4 @@
|
|||
-- 테이블 순서는 관계를 고려하여 한 번에 실행해도 에러가 발생하지 않게 정렬되었습니다.
|
||||
|
||||
-- instagram_data Table Create SQL
|
||||
-- 테이블 생성 SQL - instagram_data
|
||||
CREATE TABLE instagram_data
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`url` VARCHAR(500) NOT NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - instagram_data(hospital_id)
|
||||
CREATE INDEX IX_instagram_data_1
|
||||
ON instagram_data(hospital_id);
|
||||
|
||||
|
||||
-- facebook_data Table Create SQL
|
||||
-- 테이블 생성 SQL - facebook_data
|
||||
CREATE TABLE facebook_data
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`url` VARCHAR(500) NOT NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - facebook_data(hospital_id)
|
||||
CREATE INDEX IX_facebook_data_1
|
||||
ON facebook_data(hospital_id);
|
||||
|
||||
|
||||
-- naver_blog_data Table Create SQL
|
||||
-- 테이블 생성 SQL - naver_blog_data
|
||||
CREATE TABLE naver_blog_data
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`url` VARCHAR(500) NOT NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - naver_blog_data(hospital_id)
|
||||
CREATE INDEX IX_naver_blog_data_1
|
||||
ON naver_blog_data(hospital_id);
|
||||
|
||||
|
||||
-- hospital_baseinfo Table Create SQL
|
||||
-- 테이블 생성 SQL - hospital_baseinfo
|
||||
CREATE TABLE hospital_baseinfo
|
||||
(
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`owner_user_id` INT NOT NULL,
|
||||
`hospital_name` VARCHAR(50) NOT NULL,
|
||||
`hospital_name_en` VARCHAR(50) NULL,
|
||||
`brn` VARCHAR(50) NOT NULL,
|
||||
`road_address` VARCHAR(100) NULL,
|
||||
`site_address` VARCHAR(100) NULL,
|
||||
`url` VARCHAR(500) NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (hospital_id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - hospital_baseinfo(owner_user_id)
|
||||
CREATE INDEX IX_hospital_baseinfo_1
|
||||
ON hospital_baseinfo(owner_user_id);
|
||||
|
||||
|
||||
-- user_info Table Create SQL
|
||||
-- 테이블 생성 SQL - user_info
|
||||
-- user_info
|
||||
CREATE TABLE user_info
|
||||
(
|
||||
`user_id` INT NOT NULL AUTO_INCREMENT,
|
||||
|
|
@ -90,52 +9,49 @@ CREATE TABLE user_info
|
|||
PRIMARY KEY (user_id)
|
||||
);
|
||||
|
||||
-- youtube_data Table Create SQL
|
||||
CREATE TABLE youtube_data
|
||||
|
||||
-- hospital_baseinfo
|
||||
CREATE TABLE hospital_baseinfo
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`url` VARCHAR(500) NOT NULL,
|
||||
`owner_user_id` INT NOT NULL,
|
||||
`hospital_name` VARCHAR(50) NOT NULL,
|
||||
`hospital_name_en` VARCHAR(50) NULL,
|
||||
`brn` VARCHAR(50) NOT NULL,
|
||||
`road_address` VARCHAR(100) NULL,
|
||||
`site_address` VARCHAR(100) NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id)
|
||||
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (hospital_id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - youtube_data(hospital_id)
|
||||
CREATE INDEX IX_youtube_data_1
|
||||
ON youtube_data(hospital_id);
|
||||
CREATE INDEX IX_hospital_baseinfo_1 ON hospital_baseinfo (owner_user_id);
|
||||
|
||||
|
||||
-- gangnam_unni_data Table Create SQL
|
||||
CREATE TABLE gangnam_unni_data
|
||||
-- remote_source: 병원별 채널 소스 정보 (instagram/facebook/naver_blog/youtube/gangnam_unni 등)
|
||||
CREATE TABLE remote_source
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
`source_id` INT NOT NULL AUTO_INCREMENT,
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`source_type` VARCHAR(50) NOT NULL,
|
||||
`language` CHAR(2) NULL,
|
||||
`url` VARCHAR(500) NOT NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id)
|
||||
PRIMARY KEY (source_id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - gangnam_unni_data(hospital_id)
|
||||
CREATE INDEX IX_gangnam_unni_data_1
|
||||
ON gangnam_unni_data(hospital_id);
|
||||
CREATE INDEX IX_remote_source_1 ON remote_source (hospital_id);
|
||||
CREATE INDEX IX_remote_source_2 ON remote_source (hospital_id, source_type);
|
||||
|
||||
|
||||
-- analysis_runs Table Create SQL
|
||||
-- analysis_runs
|
||||
CREATE TABLE analysis_runs
|
||||
(
|
||||
`analysis_run_id` CHAR(36) NOT NULL,
|
||||
`hospital_id` CHAR(36) NOT NULL,
|
||||
`owner_user_id` INT NOT NULL DEFAULT 0,
|
||||
`status` VARCHAR(50) NOT NULL DEFAULT 'discovering',
|
||||
`instagram_data_id` INT NULL,
|
||||
`facebook_data_id` INT NULL,
|
||||
`naver_blog_data_id` INT NULL,
|
||||
`youtube_data_id` INT NULL,
|
||||
`gangnam_unni_data_id` INT NULL,
|
||||
`report_data` JSON NULL,
|
||||
`plan_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
|
@ -143,16 +59,30 @@ CREATE TABLE analysis_runs
|
|||
PRIMARY KEY (analysis_run_id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - analysis_runs(hospital_id)
|
||||
CREATE INDEX IX_analysis_runs_1
|
||||
ON analysis_runs(hospital_id);
|
||||
|
||||
-- Index 설정 SQL - analysis_runs(owner_user_id)
|
||||
CREATE INDEX IX_analysis_runs_2
|
||||
ON analysis_runs(owner_user_id);
|
||||
CREATE INDEX IX_analysis_runs_1 ON analysis_runs (hospital_id);
|
||||
CREATE INDEX IX_analysis_runs_2 ON analysis_runs (owner_user_id);
|
||||
|
||||
|
||||
-- file_data Table Create SQL
|
||||
-- raw_info: 분석 실행별 수집 원시 데이터
|
||||
CREATE TABLE raw_info
|
||||
(
|
||||
`info_id` INT NOT NULL AUTO_INCREMENT,
|
||||
`source_id` INT NOT NULL,
|
||||
`analysis_run_id` CHAR(36) NOT NULL,
|
||||
`data_tag` VARCHAR(50) NOT NULL DEFAULT 'default',
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`logo_url` VARCHAR(500) NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (info_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IX_raw_info_1 ON raw_info (analysis_run_id);
|
||||
CREATE INDEX IX_raw_info_2 ON raw_info (source_id);
|
||||
|
||||
|
||||
-- file_data
|
||||
CREATE TABLE file_data
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
|
|
@ -169,7 +99,7 @@ CREATE TABLE file_data
|
|||
);
|
||||
|
||||
|
||||
-- hospital_history Table Create SQL
|
||||
-- hospital_history
|
||||
CREATE TABLE hospital_history
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
|
|
@ -180,24 +110,17 @@ CREATE TABLE hospital_history
|
|||
`brn` VARCHAR(50) NOT NULL,
|
||||
`road_address` VARCHAR(100) NULL,
|
||||
`site_address` VARCHAR(100) NULL,
|
||||
`url` VARCHAR(500) NULL,
|
||||
`status` VARCHAR(20) NOT NULL,
|
||||
`raw_data` JSON NULL,
|
||||
`analysis_run_id` CHAR(36) NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - hospital_history(hospital_id)
|
||||
CREATE INDEX IX_hospital_history_1
|
||||
ON hospital_history(hospital_id);
|
||||
|
||||
-- Index 설정 SQL - hospital_history(analysis_run_id)
|
||||
CREATE INDEX IX_hospital_history_2
|
||||
ON hospital_history(analysis_run_id);
|
||||
CREATE INDEX IX_hospital_history_1 ON hospital_history (hospital_id);
|
||||
CREATE INDEX IX_hospital_history_2 ON hospital_history (analysis_run_id);
|
||||
|
||||
|
||||
-- market_analysis Table Create SQL
|
||||
-- market_analysis
|
||||
CREATE TABLE market_analysis
|
||||
(
|
||||
`id` INT NOT NULL AUTO_INCREMENT,
|
||||
|
|
@ -210,7 +133,4 @@ CREATE TABLE market_analysis
|
|||
UNIQUE KEY UQ_market_analysis (analysis_run_id, analysis_type)
|
||||
);
|
||||
|
||||
-- Index 설정 SQL - market_analysis(analysis_run_id)
|
||||
CREATE INDEX IX_market_analysis_1
|
||||
ON market_analysis(analysis_run_id);
|
||||
|
||||
CREATE INDEX IX_market_analysis_1 ON market_analysis (analysis_run_id);
|
||||
|
|
|
|||
|
|
@ -2,21 +2,23 @@ import logging
|
|||
import uuid6
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile, status
|
||||
from common.deps import verify_api_key
|
||||
from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run
|
||||
from common.db.hospital import select_hospital
|
||||
from common.db.source import select_source_mainpage, insert_source, insert_raw_info
|
||||
from common.db.run import insert_run, select_run_status
|
||||
from common.utils import _normalize_homepage, _with_scheme
|
||||
from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse
|
||||
from models.file import FileListItem, FileType, FileUploadResponse
|
||||
from models.status import AnalysisStatus
|
||||
from models.status import AnalysisStatus, SourceType
|
||||
from services.pipeline import run_pipeline
|
||||
from services.file import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
|
||||
from services.file_data import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
|
||||
from mock_urls import MOCK_CLINICS
|
||||
from common.utils import _normalize_homepage, _with_scheme
|
||||
|
||||
router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 추후 DB에 클리닉별로 매핑할 채널(틱톡/영문 인스타·페북). 지금은 mock_urls에서 homepage 매칭으로 보충.
|
||||
def _extra_channels_from_mockurls(homepage_url: str) -> dict:
|
||||
"""homepage로 mock_urls에서 클리닉을 찾아 틱톡/영문 인스타·페북 URL 반환 (없으면 {})."""
|
||||
|
||||
# 클라가 일부만 보내거나 빈 값이면 mock_urls 의 동일 homepage 매칭으로 채워줌 (메인 + 부가 채널 동일 규칙).
|
||||
def _channels_from_mockurls(homepage_url: str) -> dict:
|
||||
target = _normalize_homepage(homepage_url)
|
||||
if not target:
|
||||
return {}
|
||||
|
|
@ -24,6 +26,13 @@ def _extra_channels_from_mockurls(homepage_url: str) -> dict:
|
|||
urls = c["urls"]
|
||||
if _normalize_homepage(urls.get("homepage", "")) == target:
|
||||
return {
|
||||
# main
|
||||
"instagram": _with_scheme(urls.get("instagram")),
|
||||
"facebook": _with_scheme(urls.get("facebook")),
|
||||
"naver_blog": _with_scheme(urls.get("naverBlog")),
|
||||
"youtube": _with_scheme(urls.get("youtube")),
|
||||
"gangnam_unni": _with_scheme(urls.get("gangnamUnni")),
|
||||
# extra
|
||||
"tiktok": _with_scheme(urls.get("tiktok")),
|
||||
"instagram_en": _with_scheme(urls.get("instagramEn")),
|
||||
"facebook_en": _with_scheme(urls.get("facebookEn")),
|
||||
|
|
@ -39,37 +48,51 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
|||
analysis_run_id = str(uuid6.uuid7())
|
||||
hospital_id = body.clinic_id
|
||||
|
||||
# 사실 hospital과 owner_user_id 비교 후 검증이 필요한 거지만 일단 PoC 니까. 나중에 바꿉니다.
|
||||
hospital = await fetchone(
|
||||
"SELECT owner_user_id, url FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
# 사실 hospital 과 owner_user_id 비교 후 검증이 필요한 거지만 일단 PoC 니까. 나중에 바꿉니다.
|
||||
hospital = await select_hospital(hospital_id)
|
||||
if not hospital:
|
||||
raise HTTPException(status_code=409, detail="Clinic not found")
|
||||
|
||||
# 사용자가 'gangnamunni.com/...' 같이 scheme/www 없이 줘도 _with_scheme이 https://www. 보강.
|
||||
ig_id = await insert_instagram_row(hospital_id, _with_scheme(body.channels.instagram)) if body.channels.instagram else None
|
||||
fb_id = await insert_facebook_row(hospital_id, _with_scheme(body.channels.facebook)) if body.channels.facebook else None
|
||||
nb_id = await insert_naver_blog_row(hospital_id, _with_scheme(body.channels.naver_blog)) if body.channels.naver_blog else None
|
||||
yt_id = await insert_youtube_row(hospital_id, _with_scheme(body.channels.youtube)) if body.channels.youtube else None
|
||||
gu_id = await insert_gangnam_unni_row(hospital_id, _with_scheme(body.channels.gangnam_unni)) if body.channels.gangnam_unni else None
|
||||
analysis_run_id = await insert_run(analysis_run_id, hospital_id, hospital["owner_user_id"])
|
||||
|
||||
analysis_run_id = await insert_analysis_run(
|
||||
analysis_run_id, hospital_id, hospital["owner_user_id"],
|
||||
ig_id, fb_id, nb_id, yt_id, gu_id,
|
||||
)
|
||||
mainpage = await select_source_mainpage(hospital_id)
|
||||
if mainpage:
|
||||
await insert_raw_info(mainpage["source_id"], analysis_run_id, data_tag=SourceType.MAINPAGE)
|
||||
# branding (HTML/CSS + Vision 로고 매칭) — mainpage 와 같은 homepage URL 을 source 로 사용.
|
||||
branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"], language="KR")
|
||||
await insert_raw_info(branding_id, analysis_run_id, data_tag=SourceType.BRANDING)
|
||||
|
||||
# 클라 값 우선, 없으면 보충 (추후 DB에서 클리닉별로 가져올 값)
|
||||
mock_extra = _extra_channels_from_mockurls(hospital["url"])
|
||||
extra_channels = {
|
||||
"tiktok": body.channels.tiktok or mock_extra.get("tiktok"),
|
||||
"instagram_en": body.channels.instagram_en or mock_extra.get("instagram_en"),
|
||||
"facebook_en": body.channels.facebook_en or mock_extra.get("facebook_en"),
|
||||
"kakao_talk": body.channels.kakao_talk or mock_extra.get("kakao_talk"),
|
||||
"naver_cafe": body.channels.naver_cafe or mock_extra.get("naver_cafe"),
|
||||
}
|
||||
logger.info("[analysis] extra_channels=%s (mock_matched=%s)", extra_channels, bool(mock_extra))
|
||||
background_tasks.add_task(run_pipeline, analysis_run_id, extra_channels)
|
||||
# 클라가 안 보낸 채널은 mock_urls 에서 homepage 매칭으로 보충 (main + extra 동일 규칙).
|
||||
mock = _channels_from_mockurls((mainpage or {}).get("url") or "")
|
||||
|
||||
# 메인 5채널 (KR). _with_scheme 으로 'gangnamunni.com/...' 같이 scheme/www 없이 와도 보강.
|
||||
main_channels = [
|
||||
(SourceType.INSTAGRAM, _with_scheme(body.channels.instagram) or mock.get("instagram")),
|
||||
(SourceType.FACEBOOK, _with_scheme(body.channels.facebook) or mock.get("facebook")),
|
||||
(SourceType.NAVER_BLOG, _with_scheme(body.channels.naver_blog) or mock.get("naver_blog")),
|
||||
(SourceType.YOUTUBE, _with_scheme(body.channels.youtube) or mock.get("youtube")),
|
||||
(SourceType.GANGNAM_UNNI, _with_scheme(body.channels.gangnam_unni) or mock.get("gangnam_unni")),
|
||||
]
|
||||
for source_type, url in main_channels:
|
||||
if url:
|
||||
source_id = await insert_source(hospital_id, source_type, url, language="KR")
|
||||
await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
|
||||
|
||||
# 부가 채널 — instagram_en/facebook_en 은 동일 source_type 에 language='EN' 으로 구분, 나머지는 자체 source_type.
|
||||
extra_channels = [
|
||||
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
|
||||
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
|
||||
(SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
|
||||
(SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
|
||||
(SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
|
||||
]
|
||||
for source_type, language, url in extra_channels:
|
||||
if url:
|
||||
source_id = await insert_source(hospital_id, source_type, url, language=language)
|
||||
await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
|
||||
|
||||
logger.info("[analysis] main+extra channels resolved (mock_matched=%s)", bool(mock))
|
||||
background_tasks.add_task(run_pipeline, analysis_run_id)
|
||||
|
||||
return AnalysisStartResponse(
|
||||
analysis_run_id=analysis_run_id,
|
||||
|
|
@ -106,12 +129,12 @@ async def delete_analysis_run_file(run_id: str, file_id: int) -> None:
|
|||
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
|
||||
async def get_analysis_status(run_id: str):
|
||||
logger.info("GET /api/analysis/%s/status", run_id)
|
||||
row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,))
|
||||
if not row:
|
||||
run_status = await select_run_status(run_id)
|
||||
if run_status is None:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
return AnalysisStatusResponse(
|
||||
analysis_run_id=run_id,
|
||||
status=AnalysisStatus(row["status"]),
|
||||
status=AnalysisStatus(run_status),
|
||||
progress=50.0,
|
||||
current_step="",
|
||||
channel_errors={},
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@ import logging
|
|||
import uuid6
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from common.deps import verify_api_key
|
||||
from common.db import insert_hospital, fetchone
|
||||
from common.db.hospital import select_hospital, insert_hospital
|
||||
from common.db.source import insert_source
|
||||
from common.utils import get_env
|
||||
from integrations.firecrawl import FirecrawlClient
|
||||
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicResponse, ClinicHistoryResponse, RunSummary
|
||||
|
|
@ -30,9 +31,8 @@ async def create_clinic(body: ClinicCreate):
|
|||
name=info["clinicName"],
|
||||
name_en=info.get("clinicNameEn"),
|
||||
road_address=info.get("address"),
|
||||
url=body.url,
|
||||
raw_data=info,
|
||||
)
|
||||
await insert_source(hospital_id, "mainpage", body.url)
|
||||
return ClinicCreateResponse(
|
||||
id=hospital_id,
|
||||
url=body.url,
|
||||
|
|
@ -44,11 +44,7 @@ async def create_clinic(body: ClinicCreate):
|
|||
@router.get("/{hospital_id}", response_model=ClinicResponse)
|
||||
async def get_clinic(hospital_id: str):
|
||||
logger.info("GET /api/clinics/%s", hospital_id)
|
||||
row = await fetchone(
|
||||
"SELECT hospital_id, hospital_name, hospital_name_en, road_address, url, status, raw_data, created_at, updated_at"
|
||||
" FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
row = await select_hospital(hospital_id)
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Clinic not found")
|
||||
return ClinicResponse(**{**row, "created_at": str(row["created_at"]), "updated_at": str(row["updated_at"])})
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
import json
|
||||
import logging
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response
|
||||
from common.db import fetchone
|
||||
from common.db.run import select_run_with_clinic
|
||||
from common.db.source import select_run_source_raw
|
||||
from common.deps import verify_api_key
|
||||
from common.utils import _with_scheme
|
||||
from integrations.llm.schemas.plan import PlanOutput
|
||||
from models.plan import PlanApiResponse
|
||||
from models.status import SourceType
|
||||
|
||||
router = APIRouter(prefix="/api/plan", tags=["plan"], dependencies=[Depends(verify_api_key)])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -13,24 +16,21 @@ logger = logging.getLogger(__name__)
|
|||
@router.get("/{run_id}", response_model=PlanApiResponse, response_model_by_alias=True)
|
||||
async def get_plan(run_id: str):
|
||||
logger.info("GET /api/plan/%s", run_id)
|
||||
row = await fetchone(
|
||||
"SELECT ar.plan_data, ar.created_at, h.hospital_name, h.hospital_name_en, h.url"
|
||||
" FROM analysis_runs ar"
|
||||
" JOIN hospital_baseinfo h ON ar.hospital_id = h.hospital_id"
|
||||
" WHERE ar.analysis_run_id = %s",
|
||||
(run_id,),
|
||||
)
|
||||
row = await select_run_with_clinic(run_id)
|
||||
if row is None:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
if row["plan_data"] is None:
|
||||
return Response(status_code=204)
|
||||
data = json.loads(row["plan_data"]) if isinstance(row["plan_data"], str) else row["plan_data"]
|
||||
plan = PlanOutput(**data)
|
||||
# 강남언니에서 긁어온 이름이 있으면 우선 (hospital_baseinfo 의 정식 이름보다 강남언니가 더 광고용 표기).
|
||||
gu = await select_run_source_raw(run_id, SourceType.GANGNAM_UNNI) or {}
|
||||
clinic_name = gu.get("name") or row["hospital_name"]
|
||||
return PlanApiResponse(
|
||||
id=run_id,
|
||||
clinic_name=row["hospital_name"],
|
||||
clinic_name=clinic_name,
|
||||
clinic_name_en=row["hospital_name_en"],
|
||||
created_at=str(row["created_at"]),
|
||||
target_url=row["url"],
|
||||
target_url=_with_scheme(row["target_url"]),
|
||||
**plan.model_dump(),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import json
|
||||
import logging
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response
|
||||
from common.db import fetchone
|
||||
from common.db.run import select_run_with_clinic
|
||||
from common.deps import verify_api_key
|
||||
from common.utils import _with_scheme
|
||||
from integrations.llm.schemas.report import ReportOutput
|
||||
from models.report import MarketingReportResponse
|
||||
|
||||
|
|
@ -13,13 +14,7 @@ logger = logging.getLogger(__name__)
|
|||
@router.get("/{run_id}", response_model=MarketingReportResponse, response_model_by_alias=True)
|
||||
async def get_report(run_id: str):
|
||||
logger.info("GET /api/report/%s", run_id)
|
||||
row = await fetchone(
|
||||
"SELECT ar.report_data, ar.created_at, h.hospital_name, h.hospital_name_en, h.url"
|
||||
" FROM analysis_runs ar"
|
||||
" JOIN hospital_baseinfo h ON ar.hospital_id = h.hospital_id"
|
||||
" WHERE ar.analysis_run_id = %s",
|
||||
(run_id,),
|
||||
)
|
||||
row = await select_run_with_clinic(run_id)
|
||||
if row is None:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
if row["report_data"] is None:
|
||||
|
|
@ -31,6 +26,6 @@ async def get_report(run_id: str):
|
|||
clinic_name=row["hospital_name"],
|
||||
clinic_name_en=row["hospital_name_en"],
|
||||
created_at=str(row["created_at"]),
|
||||
target_url=row["url"],
|
||||
target_url=_with_scheme(row["target_url"]),
|
||||
**llm_output.model_dump(exclude={"id", "created_at", "target_url"}),
|
||||
)
|
||||
|
|
|
|||
287
app/common/db.py
287
app/common/db.py
|
|
@ -1,287 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
import aiomysql
|
||||
from common.utils import get_env
|
||||
|
||||
_pool: aiomysql.Pool | None = None
|
||||
|
||||
|
||||
async def get_pool() -> aiomysql.Pool:
|
||||
global _pool
|
||||
if _pool is None:
|
||||
_pool = await aiomysql.create_pool(
|
||||
host=get_env("MYSQL_HOST"),
|
||||
port=int(os.getenv("MYSQL_PORT", "3306")),
|
||||
user=get_env("MYSQL_USER"),
|
||||
password=get_env("MYSQL_PASSWORD"),
|
||||
db=get_env("MYSQL_DB"),
|
||||
charset="utf8mb4",
|
||||
minsize=0,
|
||||
maxsize=30,
|
||||
connect_timeout=10,
|
||||
)
|
||||
return _pool
|
||||
|
||||
|
||||
# 쓰기 (INSERT/UPDATE/DELETE)
|
||||
async def execute(sql: str, args: tuple = ()) -> int:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
try:
|
||||
async with conn.cursor() as cur:
|
||||
await cur.execute(sql, args)
|
||||
await conn.commit()
|
||||
return cur.lastrowid
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# 읽기 (SELECT)
|
||||
async def fetchone(sql: str, args: tuple = ()) -> dict | None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
try:
|
||||
async with conn.cursor(aiomysql.DictCursor) as cur:
|
||||
await cur.execute(sql, args)
|
||||
return await cur.fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
try:
|
||||
async with conn.cursor(aiomysql.DictCursor) as cur:
|
||||
await cur.execute(sql, args)
|
||||
return await cur.fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def insert_instagram_row(hospital_id: str, url: str) -> int:
|
||||
return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
|
||||
|
||||
|
||||
async def insert_facebook_row(hospital_id: str, url: str) -> int:
|
||||
return await execute("INSERT INTO facebook_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
|
||||
|
||||
|
||||
async def insert_naver_blog_row(hospital_id: str, url: str) -> int:
|
||||
return await execute("INSERT INTO naver_blog_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
|
||||
|
||||
|
||||
async def insert_youtube_row(hospital_id: str, url: str) -> int:
|
||||
return await execute("INSERT INTO youtube_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
|
||||
|
||||
|
||||
async def insert_gangnam_unni_row(hospital_id: str, url: str) -> int:
|
||||
return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
|
||||
|
||||
|
||||
async def insert_file_row(
|
||||
analysis_run_id: str,
|
||||
file_type: str,
|
||||
file_name: str,
|
||||
file_url: str,
|
||||
size_bytes: int | None = None,
|
||||
hospital_id: str | None = None,
|
||||
) -> int:
|
||||
return await execute(
|
||||
"INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s)",
|
||||
(analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes),
|
||||
)
|
||||
|
||||
|
||||
async def insert_analysis_run(
|
||||
analysis_run_id: str,
|
||||
hospital_id: str,
|
||||
owner_user_id: int,
|
||||
instagram_data_id: int | None,
|
||||
facebook_data_id: int | None,
|
||||
naver_blog_data_id: int | None,
|
||||
youtube_data_id: int | None,
|
||||
gangnam_unni_data_id: int | None,
|
||||
) -> str:
|
||||
await execute(
|
||||
"INSERT INTO analysis_runs"
|
||||
" (analysis_run_id, hospital_id, owner_user_id, instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
(analysis_run_id, hospital_id, owner_user_id, instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id),
|
||||
)
|
||||
return analysis_run_id
|
||||
|
||||
|
||||
|
||||
async def save_analysis_report(analysis_run_id: str, data: dict) -> None:
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
|
||||
(json.dumps(data, ensure_ascii=False), analysis_run_id),
|
||||
)
|
||||
|
||||
|
||||
async def is_done(table: str, row_id: int | None) -> bool:
|
||||
if row_id is None:
|
||||
return True
|
||||
r = await fetchone(f"SELECT status FROM {table} WHERE id = %s", (row_id,))
|
||||
return r["status"] == "done"
|
||||
|
||||
|
||||
async def fetch_raw(table: str, row_id: int | None) -> dict | None:
|
||||
if row_id is None:
|
||||
return None
|
||||
row = await fetchone(f"SELECT raw_data FROM {table} WHERE id = %s", (row_id,))
|
||||
if not row or not row["raw_data"]:
|
||||
return None
|
||||
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
||||
|
||||
|
||||
async def get_analysis_raw_data(analysis_run_id: str) -> dict:
|
||||
run = await fetchone(
|
||||
"SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
|
||||
" FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return {
|
||||
"instagram": await fetch_raw("instagram_data", run["instagram_data_id"]),
|
||||
"facebook": await fetch_raw("facebook_data", run["facebook_data_id"]),
|
||||
"naver_blog": await fetch_raw("naver_blog_data", run["naver_blog_data_id"]),
|
||||
"youtube": await fetch_raw("youtube_data", run["youtube_data_id"]),
|
||||
"gangnam_unni": await fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]),
|
||||
}
|
||||
|
||||
|
||||
async def set_instagram_status(row_id: int, status: str) -> None:
|
||||
await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id))
|
||||
|
||||
|
||||
async def set_facebook_status(row_id: int, status: str) -> None:
|
||||
await execute("UPDATE facebook_data SET status = %s WHERE id = %s", (status, row_id))
|
||||
|
||||
|
||||
async def set_naver_blog_status(row_id: int, status: str) -> None:
|
||||
await execute("UPDATE naver_blog_data SET status = %s WHERE id = %s", (status, row_id))
|
||||
|
||||
|
||||
async def set_youtube_status(row_id: int, status: str) -> None:
|
||||
await execute("UPDATE youtube_data SET status = %s WHERE id = %s", (status, row_id))
|
||||
|
||||
|
||||
async def set_gangnam_unni_status(row_id: int, status: str) -> None:
|
||||
await execute("UPDATE gangnam_unni_data SET status = %s WHERE id = %s", (status, row_id))
|
||||
|
||||
|
||||
async def save_instagram_raw_data(row_id: int, data: dict) -> None:
|
||||
await execute("UPDATE instagram_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
|
||||
|
||||
|
||||
async def save_facebook_raw_data(row_id: int, data: dict) -> None:
|
||||
await execute("UPDATE facebook_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
|
||||
|
||||
|
||||
async def save_naver_blog_raw_data(row_id: int, data: dict) -> None:
|
||||
await execute("UPDATE naver_blog_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
|
||||
|
||||
|
||||
async def save_youtube_raw_data(row_id: int, data: dict) -> None:
|
||||
await execute("UPDATE youtube_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
|
||||
|
||||
|
||||
async def save_gangnam_unni_raw_data(row_id: int, data: dict) -> None:
|
||||
await execute("UPDATE gangnam_unni_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
|
||||
|
||||
|
||||
async def _insert_hospital_history(hospital_id: str, analysis_run_id: str | None) -> None:
|
||||
row = await fetchone(
|
||||
"SELECT owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, url, status, raw_data"
|
||||
" FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
if not row:
|
||||
return
|
||||
await execute(
|
||||
"INSERT INTO hospital_history"
|
||||
" (hospital_id, owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, url, status, raw_data, analysis_run_id)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
(
|
||||
hospital_id,
|
||||
row["owner_user_id"],
|
||||
row["hospital_name"],
|
||||
row["hospital_name_en"],
|
||||
row["brn"],
|
||||
row["road_address"],
|
||||
row["site_address"],
|
||||
row["url"],
|
||||
row["status"],
|
||||
row["raw_data"] if isinstance(row["raw_data"], str) else json.dumps(row["raw_data"], ensure_ascii=False) if row["raw_data"] else None,
|
||||
analysis_run_id,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def insert_hospital(
|
||||
hospital_id: str,
|
||||
name: str,
|
||||
name_en: str | None = None,
|
||||
road_address: str | None = None,
|
||||
site_address: str | None = None,
|
||||
url: str | None = None,
|
||||
raw_data: dict | None = None,
|
||||
owner_user_id: int = 0,
|
||||
brn: str = "",
|
||||
) -> dict:
|
||||
await execute(
|
||||
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, url, raw_data, status, owner_user_id, brn)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s, %s, 'done', %s, %s)",
|
||||
(hospital_id, name, name_en, road_address, site_address, url,
|
||||
json.dumps(raw_data, ensure_ascii=False) if raw_data else None,
|
||||
owner_user_id, brn),
|
||||
)
|
||||
await _insert_hospital_history(hospital_id, analysis_run_id=None)
|
||||
return await fetchone(
|
||||
"SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
|
||||
|
||||
async def save_hospital_raw_data(hospital_id: str, data: dict, analysis_run_id: str | None = None) -> None:
|
||||
await execute(
|
||||
"UPDATE hospital_baseinfo"
|
||||
" SET raw_data = %s, status = 'done',"
|
||||
" hospital_name = COALESCE(%s, hospital_name),"
|
||||
" hospital_name_en = COALESCE(%s, hospital_name_en),"
|
||||
" road_address = COALESCE(%s, road_address)"
|
||||
" WHERE hospital_id = %s",
|
||||
(
|
||||
json.dumps(data, ensure_ascii=False),
|
||||
data.get("clinicName"),
|
||||
data.get("clinicNameEn"),
|
||||
data.get("address"),
|
||||
hospital_id,
|
||||
),
|
||||
)
|
||||
await _insert_hospital_history(hospital_id, analysis_run_id)
|
||||
|
||||
|
||||
async def merge_hospital_raw_data(hospital_id: str, patch: dict) -> None:
|
||||
"""hospital_baseinfo.raw_data를 읽어 patch를 top-level 병합 후 저장 (read-modify-write).
|
||||
부가 수집 단계들이 순차로 raw_data에 키를 덧붙일 때 사용."""
|
||||
row = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,))
|
||||
raw = row["raw_data"] if row else None
|
||||
raw_data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||
raw_data.update(patch)
|
||||
await execute(
|
||||
"UPDATE hospital_baseinfo SET raw_data = %s WHERE hospital_id = %s",
|
||||
(json.dumps(raw_data, ensure_ascii=False), hospital_id),
|
||||
)
|
||||
|
||||
|
||||
async def get_market_analysis(analysis_run_id: str) -> dict:
|
||||
rows = await fetchall(
|
||||
"SELECT analysis_type, data FROM market_analysis WHERE analysis_run_id = %s AND status = 'done'",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return {
|
||||
row["analysis_type"]: json.loads(row["data"]) if isinstance(row["data"], str) else row["data"]
|
||||
for row in rows
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
from common.db.base import execute, fetchone, fetchall
|
||||
from common.db.hospital import select_hospital, update_hospital_status, insert_hospital, update_hospital
|
||||
from common.db.source import (
|
||||
insert_source, select_source_mainpage, select_source_by_type,
|
||||
insert_raw_info, update_raw_info_status, update_raw_info, update_raw_info_merge,
|
||||
update_raw_info_logo_url, select_mainpage_logo_url, select_branding_info_id,
|
||||
select_raw_info_data,
|
||||
select_run_sources, select_run_raw_data, select_run_source_raw,
|
||||
select_run_mainpage_url,
|
||||
)
|
||||
from common.db.run import (
|
||||
insert_run, select_run, select_run_status, update_run_status,
|
||||
update_run_report, update_run_plan, select_run_with_clinic, select_run_report_data,
|
||||
)
|
||||
from common.db.market import upsert_market_status, upsert_market_result, select_market
|
||||
from common.db.file_data import insert_file, select_run_files, select_file, delete_file
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
import os
|
||||
import aiomysql
|
||||
from common.utils import get_env
|
||||
|
||||
_pool: aiomysql.Pool | None = None
|
||||
|
||||
|
||||
async def get_pool() -> aiomysql.Pool:
|
||||
global _pool
|
||||
if _pool is None:
|
||||
_pool = await aiomysql.create_pool(
|
||||
host=get_env("MYSQL_HOST"),
|
||||
port=int(os.getenv("MYSQL_PORT", "3306")),
|
||||
user=get_env("MYSQL_USER"),
|
||||
password=get_env("MYSQL_PASSWORD"),
|
||||
db=get_env("MYSQL_DB"),
|
||||
charset="utf8mb4",
|
||||
minsize=0,
|
||||
maxsize=30,
|
||||
connect_timeout=10,
|
||||
)
|
||||
return _pool
|
||||
|
||||
|
||||
async def execute(sql: str, args: tuple = ()) -> int:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
try:
|
||||
async with conn.cursor() as cur:
|
||||
await cur.execute(sql, args)
|
||||
await conn.commit()
|
||||
return cur.lastrowid
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def fetchone(sql: str, args: tuple = ()) -> dict | None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
try:
|
||||
async with conn.cursor(aiomysql.DictCursor) as cur:
|
||||
await cur.execute(sql, args)
|
||||
return await cur.fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
try:
|
||||
async with conn.cursor(aiomysql.DictCursor) as cur:
|
||||
await cur.execute(sql, args)
|
||||
return await cur.fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
from common.db.base import execute, fetchone, fetchall
|
||||
|
||||
|
||||
async def insert_file(
|
||||
analysis_run_id: str,
|
||||
file_type: str,
|
||||
file_name: str,
|
||||
file_url: str,
|
||||
size_bytes: int | None = None,
|
||||
hospital_id: str | None = None,
|
||||
) -> int:
|
||||
return await execute(
|
||||
"INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s)",
|
||||
(analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes),
|
||||
)
|
||||
|
||||
|
||||
async def select_run_files(analysis_run_id: str) -> list[dict]:
|
||||
return await fetchall(
|
||||
"SELECT id, file_type, file_name, file_url, size_bytes, created_at"
|
||||
" FROM file_data WHERE analysis_run_id = %s AND is_deleted = FALSE"
|
||||
" ORDER BY created_at DESC",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
|
||||
|
||||
async def select_file(file_id: int, analysis_run_id: str) -> dict | None:
|
||||
return await fetchone(
|
||||
"SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s",
|
||||
(file_id, analysis_run_id),
|
||||
)
|
||||
|
||||
|
||||
async def delete_file(file_id: int) -> None:
|
||||
await execute(
|
||||
"UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE",
|
||||
(file_id,),
|
||||
)
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
from common.db.base import execute, fetchone
|
||||
|
||||
|
||||
async def select_hospital(hospital_id: str) -> dict | None:
|
||||
return await fetchone(
|
||||
"SELECT hospital_id, owner_user_id, hospital_name, hospital_name_en,"
|
||||
" brn, road_address, site_address, status, created_at, updated_at"
|
||||
" FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
|
||||
|
||||
async def update_hospital_status(hospital_id: str, status: str) -> None:
|
||||
await execute(
|
||||
"UPDATE hospital_baseinfo SET status = %s WHERE hospital_id = %s",
|
||||
(status, hospital_id),
|
||||
)
|
||||
|
||||
|
||||
async def _insert_hospital_history(hospital_id: str, analysis_run_id: str | None) -> None:
|
||||
row = await fetchone(
|
||||
"SELECT owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, status"
|
||||
" FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
if not row:
|
||||
return
|
||||
await execute(
|
||||
"INSERT INTO hospital_history"
|
||||
" (hospital_id, owner_user_id, hospital_name, hospital_name_en, brn, road_address, site_address, status, analysis_run_id)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
(
|
||||
hospital_id,
|
||||
row["owner_user_id"],
|
||||
row["hospital_name"],
|
||||
row["hospital_name_en"],
|
||||
row["brn"],
|
||||
row["road_address"],
|
||||
row["site_address"],
|
||||
row["status"],
|
||||
analysis_run_id,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def insert_hospital(
|
||||
hospital_id: str,
|
||||
name: str,
|
||||
name_en: str | None = None,
|
||||
road_address: str | None = None,
|
||||
site_address: str | None = None,
|
||||
owner_user_id: int = 0,
|
||||
brn: str = "",
|
||||
) -> dict:
|
||||
await execute(
|
||||
"INSERT INTO hospital_baseinfo"
|
||||
" (hospital_id, hospital_name, hospital_name_en, road_address, site_address, status, owner_user_id, brn)"
|
||||
" VALUES (%s, %s, %s, %s, %s, 'done', %s, %s)",
|
||||
(hospital_id, name, name_en, road_address, site_address, owner_user_id, brn),
|
||||
)
|
||||
await _insert_hospital_history(hospital_id, analysis_run_id=None)
|
||||
return await fetchone(
|
||||
"SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
|
||||
|
||||
async def update_hospital(hospital_id: str, data: dict, analysis_run_id: str | None = None) -> None:
|
||||
await execute(
|
||||
"UPDATE hospital_baseinfo"
|
||||
" SET status = 'done',"
|
||||
" hospital_name = COALESCE(%s, hospital_name),"
|
||||
" hospital_name_en = COALESCE(%s, hospital_name_en),"
|
||||
" road_address = COALESCE(%s, road_address)"
|
||||
" WHERE hospital_id = %s",
|
||||
(data.get("clinicName"), data.get("clinicNameEn"), data.get("address"), hospital_id),
|
||||
)
|
||||
await _insert_hospital_history(hospital_id, analysis_run_id)
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
import json
|
||||
from common.db.base import execute, fetchall
|
||||
|
||||
|
||||
async def upsert_market_status(analysis_run_id: str, analysis_type: str, status: str) -> None:
|
||||
await execute(
|
||||
"INSERT INTO market_analysis (analysis_run_id, analysis_type, status)"
|
||||
" VALUES (%s, %s, %s)"
|
||||
" ON DUPLICATE KEY UPDATE status = VALUES(status)",
|
||||
(analysis_run_id, analysis_type, status),
|
||||
)
|
||||
|
||||
|
||||
async def upsert_market_result(analysis_run_id: str, analysis_type: str, data: dict) -> None:
|
||||
await execute(
|
||||
"INSERT INTO market_analysis (analysis_run_id, analysis_type, status, data)"
|
||||
" VALUES (%s, %s, 'done', %s)"
|
||||
" ON DUPLICATE KEY UPDATE status = 'done', data = VALUES(data)",
|
||||
(analysis_run_id, analysis_type, json.dumps(data, ensure_ascii=False)),
|
||||
)
|
||||
|
||||
|
||||
async def select_market(analysis_run_id: str) -> dict:
|
||||
rows = await fetchall(
|
||||
"SELECT analysis_type, data FROM market_analysis WHERE analysis_run_id = %s AND status = 'done'",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return {
|
||||
row["analysis_type"]: json.loads(row["data"]) if isinstance(row["data"], str) else row["data"]
|
||||
for row in rows
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
import json
|
||||
from common.db.base import execute, fetchone
|
||||
|
||||
|
||||
async def insert_run(
|
||||
analysis_run_id: str,
|
||||
hospital_id: str,
|
||||
owner_user_id: int,
|
||||
) -> str:
|
||||
await execute(
|
||||
"INSERT INTO analysis_runs (analysis_run_id, hospital_id, owner_user_id) VALUES (%s, %s, %s)",
|
||||
(analysis_run_id, hospital_id, owner_user_id),
|
||||
)
|
||||
return analysis_run_id
|
||||
|
||||
|
||||
async def select_run(analysis_run_id: str) -> dict | None:
|
||||
return await fetchone(
|
||||
"SELECT analysis_run_id, hospital_id, owner_user_id, status, created_at, updated_at"
|
||||
" FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
|
||||
|
||||
async def select_run_report_data(analysis_run_id: str) -> dict | None:
|
||||
"""report 결과가 필요할 때만 호출. raw JSON 파싱해서 dict 반환."""
|
||||
import json
|
||||
row = await fetchone(
|
||||
"SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
if not row or not row["report_data"]:
|
||||
return None
|
||||
return json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"]
|
||||
|
||||
|
||||
async def select_run_status(analysis_run_id: str) -> str | None:
|
||||
row = await fetchone(
|
||||
"SELECT status FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return row["status"] if row else None
|
||||
|
||||
|
||||
async def update_run_status(analysis_run_id: str, status: str) -> None:
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
|
||||
(status, analysis_run_id),
|
||||
)
|
||||
|
||||
|
||||
async def update_run_report(analysis_run_id: str, data: dict) -> None:
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
|
||||
(json.dumps(data, ensure_ascii=False), analysis_run_id),
|
||||
)
|
||||
|
||||
|
||||
async def update_run_plan(analysis_run_id: str, data: dict) -> None:
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s",
|
||||
(json.dumps(data, ensure_ascii=False), analysis_run_id),
|
||||
)
|
||||
|
||||
|
||||
async def select_run_with_clinic(analysis_run_id: str) -> dict | None:
|
||||
return await fetchone(
|
||||
"SELECT ar.report_data, ar.plan_data, ar.created_at,"
|
||||
" h.hospital_name, h.hospital_name_en,"
|
||||
" rs.url AS target_url"
|
||||
" FROM analysis_runs ar"
|
||||
" JOIN hospital_baseinfo h ON ar.hospital_id = h.hospital_id"
|
||||
" LEFT JOIN remote_source rs ON rs.hospital_id = h.hospital_id AND rs.source_type = 'mainpage'"
|
||||
" WHERE ar.analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
import json
|
||||
from common.db.base import execute, fetchone, fetchall
|
||||
from models.status import SourceType
|
||||
|
||||
|
||||
async def insert_source(
|
||||
hospital_id: str,
|
||||
source_type: SourceType,
|
||||
url: str,
|
||||
language: str | None = None,
|
||||
) -> int:
|
||||
return await execute(
|
||||
"INSERT INTO remote_source (hospital_id, source_type, language, url) VALUES (%s, %s, %s, %s)",
|
||||
(hospital_id, source_type, language, url),
|
||||
)
|
||||
|
||||
|
||||
async def select_source_mainpage(hospital_id: str) -> dict | None:
|
||||
return await fetchone(
|
||||
"SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = 'mainpage'",
|
||||
(hospital_id,),
|
||||
)
|
||||
|
||||
|
||||
async def insert_raw_info(
|
||||
source_id: int,
|
||||
analysis_run_id: str,
|
||||
data_tag: SourceType,
|
||||
) -> int:
|
||||
return await execute(
|
||||
"INSERT INTO raw_info (source_id, analysis_run_id, data_tag) VALUES (%s, %s, %s)",
|
||||
(source_id, analysis_run_id, data_tag),
|
||||
)
|
||||
|
||||
|
||||
async def update_raw_info_status(info_id: int, status: str) -> None:
|
||||
await execute("UPDATE raw_info SET status = %s WHERE info_id = %s", (status, info_id))
|
||||
|
||||
|
||||
async def update_raw_info(info_id: int, data: dict) -> None:
|
||||
await execute(
|
||||
"UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
|
||||
(json.dumps(data, ensure_ascii=False), info_id),
|
||||
)
|
||||
|
||||
|
||||
async def select_raw_info_data(info_id: int | None) -> dict | None:
|
||||
if info_id is None:
|
||||
return None
|
||||
row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
|
||||
if not row or not row["raw_data"]:
|
||||
return None
|
||||
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
||||
|
||||
|
||||
async def select_run_sources(analysis_run_id: str) -> list[dict]:
|
||||
return await fetchall(
|
||||
"SELECT ri.info_id, rs.source_type, rs.url"
|
||||
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
|
||||
|
||||
async def select_run_raw_data(analysis_run_id: str) -> dict:
|
||||
rows = await fetchall(
|
||||
"SELECT rs.source_type, rs.language, ri.raw_data, ri.logo_url"
|
||||
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
result: dict = {}
|
||||
for row in rows:
|
||||
raw = row["raw_data"]
|
||||
key = row["source_type"]
|
||||
if (row.get("language") or "").upper() == "EN":
|
||||
key = f"{key}_en"
|
||||
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||
if isinstance(data, dict) and row.get("logo_url"):
|
||||
data["_logo_url"] = row["logo_url"]
|
||||
result[key] = data
|
||||
return result
|
||||
|
||||
|
||||
async def select_run_source_raw(
|
||||
analysis_run_id: str, source_type: str, language: str | None = None,
|
||||
) -> dict | None:
|
||||
sql = (
|
||||
"SELECT ri.raw_data FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = %s"
|
||||
)
|
||||
args: tuple = (analysis_run_id, source_type)
|
||||
if language:
|
||||
sql += " AND rs.language = %s"
|
||||
args = (*args, language)
|
||||
sql += " LIMIT 1"
|
||||
row = await fetchone(sql, args)
|
||||
if not row or not row["raw_data"]:
|
||||
return None
|
||||
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
||||
|
||||
|
||||
async def update_raw_info_logo_url(info_id: int, logo_url: str) -> None:
|
||||
"""raw_info.logo_url 컬럼에 로고 URL 저장 (JSON raw_data 와 분리해 컬럼 인덱스/조회 용이)."""
|
||||
await execute(
|
||||
"UPDATE raw_info SET logo_url = %s WHERE info_id = %s",
|
||||
(logo_url, info_id),
|
||||
)
|
||||
|
||||
|
||||
async def select_branding_info_id(analysis_run_id: str) -> int | None:
|
||||
row = await fetchone(
|
||||
"SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'branding' LIMIT 1",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return (row or {}).get("info_id")
|
||||
|
||||
|
||||
async def select_mainpage_logo_url(analysis_run_id: str) -> str | None:
|
||||
row = await fetchone(
|
||||
"SELECT ri.logo_url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return (row or {}).get("logo_url")
|
||||
|
||||
|
||||
async def update_raw_info_merge(info_id: int, patch: dict) -> None:
|
||||
"""raw_info.raw_data 를 read-modify-write 로 top-level 머지.
|
||||
한 source 가 단계별로 (예: branding 의 brandAssets → channelLogos) 키를 덧붙일 때 사용."""
|
||||
row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
|
||||
if not row:
|
||||
return
|
||||
raw = row["raw_data"]
|
||||
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||
data.update(patch)
|
||||
await execute(
|
||||
"UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
|
||||
(json.dumps(data, ensure_ascii=False), info_id),
|
||||
)
|
||||
|
||||
|
||||
async def select_source_by_type(
|
||||
hospital_id: str, source_type: str, language: str | None = None,
|
||||
) -> dict | None:
|
||||
sql = "SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = %s"
|
||||
args: tuple = (hospital_id, source_type)
|
||||
if language:
|
||||
sql += " AND language = %s"
|
||||
args = (*args, language)
|
||||
sql += " LIMIT 1"
|
||||
return await fetchone(sql, args)
|
||||
|
||||
|
||||
async def select_run_mainpage_url(analysis_run_id: str) -> str:
|
||||
row = await fetchone(
|
||||
"SELECT rs.url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage'",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return (row or {}).get("url") or ""
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from http import HTTPMethod
|
||||
import httpx
|
||||
|
||||
|
|
@ -9,6 +10,27 @@ logger = logging.getLogger(__name__)
|
|||
REQUEST_TIMEOUT = 60
|
||||
|
||||
|
||||
def parse_ts(v) -> datetime | None:
|
||||
"""수집기마다 다른 timestamp 포맷을 통일된 datetime으로 변환.
|
||||
파싱 실패 시 None.
|
||||
"""
|
||||
# 숫자면 epoch (Unix timestamp) — apify가 가끔 epoch로 줌
|
||||
if isinstance(v, (int, float)):
|
||||
return datetime.fromtimestamp(v, tz=timezone.utc)
|
||||
if isinstance(v, str):
|
||||
# 1순위: ISO 8601 (대부분 apify/firecrawl 출력)
|
||||
try:
|
||||
return datetime.fromisoformat(v.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
pass
|
||||
# 2순위: RFC 2822 (네이버 블로그 RSS 등 — 표준 라이브러리 파서로)
|
||||
try:
|
||||
from email.utils import parsedate_to_datetime
|
||||
return parsedate_to_datetime(v)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def get_env(key: str) -> str:
|
||||
v = os.environ.get(key, "")
|
||||
|
|
@ -62,7 +84,7 @@ def _normalize_homepage(url: str) -> str:
|
|||
|
||||
|
||||
# SSL 인증서가 www.* 에만 유효한 도메인 — bare 도메인이면 사용자 클릭 시 브라우저 SSL warning 뜸.
|
||||
_WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com")
|
||||
_WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com", "toxnfill.com")
|
||||
|
||||
|
||||
def _with_scheme(u: str | None) -> str | None:
|
||||
|
|
|
|||
|
|
@ -1,275 +0,0 @@
|
|||
"""홈페이지 HTML/CSS에서 hex 색상 직접 추출 + 빈도 기반 brand palette 산출.
|
||||
|
||||
Vision LLM에 의존하지 않고 페이지의 실제 CSS 값을 정규식으로 잡음.
|
||||
로고만 분석하는 Vision보다 사이트 전체 컬러 시스템 (primary/secondary/background/text)을 더 정확히 추출.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
import ssl
|
||||
from collections import Counter
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _make_ssl_context() -> ssl.SSLContext:
|
||||
"""오래된 한국 의료 사이트들이 SSL DH_KEY_TOO_SMALL / cipher 약함 등으로 차단되는 문제 우회.
|
||||
보안 등급 1로 낮춤 + cert 검증 유지."""
|
||||
ctx = ssl.create_default_context()
|
||||
try:
|
||||
ctx.set_ciphers("DEFAULT@SECLEVEL=1")
|
||||
except ssl.SSLError:
|
||||
pass
|
||||
return ctx
|
||||
|
||||
|
||||
async def _fetch_html(url: str, timeout: float = 20.0) -> tuple[int, str]:
|
||||
"""SSL/검증 단계별 fallback으로 HTML 받기. 그랜드/톡스앤필 같은 oldsite 대응."""
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
|
||||
# 1차: 표준 검증
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers) as c:
|
||||
r = await c.get(url)
|
||||
return r.status_code, r.text
|
||||
except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
|
||||
logger.info("[fetch] %s standard SSL failed: %s — fallback to weak cipher", url, e)
|
||||
# 2차: 약한 cipher 허용
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=_make_ssl_context()) as c:
|
||||
r = await c.get(url)
|
||||
return r.status_code, r.text
|
||||
except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
|
||||
logger.info("[fetch] %s weak cipher failed: %s — fallback to verify=False", url, e)
|
||||
# 3차: SSL 검증 끔 (host mismatch 등)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=False) as c:
|
||||
r = await c.get(url)
|
||||
return r.status_code, r.text
|
||||
except Exception as e:
|
||||
logger.warning("[fetch] %s all fallbacks failed: %s", url, e)
|
||||
return 0, ""
|
||||
|
||||
LOGO_IMG_PATTERNS = [
|
||||
# 1) <img class="...logo..." src="...">
|
||||
re.compile(r'<img[^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
# 2) <img src="..." class="...logo...">
|
||||
re.compile(r'<img[^>]*\bsrc=["\']([^"\']+)["\'][^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\']', re.IGNORECASE),
|
||||
# 3) <img id="...logo..." src="...">
|
||||
re.compile(r'<img[^>]*\bid=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
# 4) <img alt="...logo..." src="...">
|
||||
re.compile(r'<img[^>]*\balt=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
# 5) <a/h1 class="logo"><...nested...><img src="...">
|
||||
re.compile(r'<(?:a|h[1-6]|div|span)[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE | re.DOTALL),
|
||||
# 6) inline background-image: <a/div class="logo" style="background-image: url(...)">
|
||||
re.compile(r'<(?:a|div|span|h[1-6])[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)', re.IGNORECASE),
|
||||
# 7) inline background-image: <a/div style="background-image: url(...)" class="logo"> (속성 순서 반대)
|
||||
re.compile(r'<(?:a|div|span|h[1-6])[^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)[^"\']*["\'][^>]*\b(?:class|id)=["\'][^"\']*\blogo\b', re.IGNORECASE),
|
||||
# 8) src 자체에 "logo" 포함 (header_logo.png, brand-logo.svg 등)
|
||||
re.compile(r'<img[^>]*\bsrc=["\']([^"\']*\blogo\b[^"\']*\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE),
|
||||
# 9) <header>...<img src="..."> (헤더 영역 첫 img)
|
||||
re.compile(r'<header\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
|
||||
# 10) <nav>...<img src="..."> (nav 영역 첫 img)
|
||||
re.compile(r'<nav\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
|
||||
# 11) Open Graph image (대표 이미지) - 최후 fallback
|
||||
re.compile(r'<meta[^>]*\bproperty=["\']og:image["\'][^>]*\bcontent=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
re.compile(r'<meta[^>]*\bcontent=["\']([^"\']+)["\'][^>]*\bproperty=["\']og:image["\']', re.IGNORECASE),
|
||||
]
|
||||
|
||||
# CSS 파일에서 .logo { background-image: url(...) } 추출용
|
||||
LOGO_CSS_PATTERN = re.compile(
|
||||
r'\.[\w-]*\blogo\b[\w-]*\s*(?:,\s*\.[\w-]+\s*)*\{[^}]*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)',
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def find_logo_url_in_html(html: str, base_url: str, css_texts: list[str] | None = None) -> str | None:
|
||||
"""HTML에서 logo URL 찾기. 우선순위:
|
||||
1) 패턴 1~8 (class/id/alt/src에 'logo' 명시된 img — 가장 specific)
|
||||
2) 외부 CSS의 .logo background-image (class-based, 더 specific)
|
||||
3) 패턴 9~10 (<header>/<nav> 안 첫 img — 가장 generic, 잘못 잡힐 위험 큼)
|
||||
"""
|
||||
|
||||
def _is_noise(src: str) -> bool:
|
||||
"""logo로 잘못 잡힐 가능성 높은 URL 패턴 — lang/flag/icon/arrow/spacer 등."""
|
||||
if not src or src.startswith("data:"):
|
||||
return True
|
||||
if re.search(r"(blank|spacer|pixel|transparent|1x1)\b", src, re.IGNORECASE):
|
||||
return True
|
||||
# 헤더 첫 img가 lang flag / 검색 아이콘 / 네비 화살표인 경우 (JK plastic 한국어 깃발이 잡히던 케이스)
|
||||
if re.search(r"(lang[-_]?(kor|eng|chn|jpn|rus|jp|en|ko|cn|ar|in)|flag|country|icon-|btn-|arrow|prev|next|search)\b", src, re.IGNORECASE):
|
||||
return True
|
||||
return False
|
||||
|
||||
# 1) class/id/alt/src/inline-bg/src-with-logo 패턴 (1~8)
|
||||
for pat in LOGO_IMG_PATTERNS[:8]:
|
||||
for m in pat.finditer(html):
|
||||
src = m.group(1)
|
||||
if _is_noise(src):
|
||||
continue
|
||||
return urljoin(base_url, src)
|
||||
|
||||
# 2) 외부 CSS의 .logo { background-image } — class-based 이므로 generic 패턴보다 우선
|
||||
for css in (css_texts or []):
|
||||
m = LOGO_CSS_PATTERN.search(css)
|
||||
if m:
|
||||
src = m.group(1)
|
||||
if not _is_noise(src):
|
||||
return urljoin(base_url, src)
|
||||
|
||||
# 3) header/nav 첫 img — 가장 generic, lang flag 등 noise 필터 강화 적용
|
||||
for pat in LOGO_IMG_PATTERNS[8:]:
|
||||
for m in pat.finditer(html):
|
||||
src = m.group(1)
|
||||
if _is_noise(src):
|
||||
continue
|
||||
return urljoin(base_url, src)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
HEX6 = re.compile(r"#([0-9a-fA-F]{6})\b")
|
||||
HEX3 = re.compile(r"#([0-9a-fA-F]{3})\b(?![0-9a-fA-F])")
|
||||
RGB = re.compile(r"rgba?\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*(?:,\s*[\d.]+\s*)?\)")
|
||||
CSS_VAR_HEX = re.compile(r"--[\w-]+\s*:\s*(#[0-9a-fA-F]{3,8})", re.IGNORECASE)
|
||||
CSS_LINK = re.compile(r'<link[^>]+rel=["\']stylesheet["\'][^>]+href=["\']([^"\']+)["\']', re.IGNORECASE)
|
||||
STYLE_BLOCK = re.compile(r"<style[^>]*>(.*?)</style>", re.IGNORECASE | re.DOTALL)
|
||||
|
||||
# 무채색·아주 흔한 노이즈 컬러 (이런 건 brand color로 잡지 않음)
|
||||
NOISE = {
|
||||
"#ffffff", "#000000", "#fff", "#000",
|
||||
"#333", "#222", "#111", "#444", "#555", "#666", "#777", "#888", "#999",
|
||||
"#aaa", "#bbb", "#ccc", "#ddd", "#eee", "#f0f0f0", "#f5f5f5", "#fafafa",
|
||||
}
|
||||
|
||||
|
||||
def _normalize(hex_str: str) -> str:
|
||||
h = hex_str.lstrip("#").lower()
|
||||
if len(h) == 3:
|
||||
h = "".join(c * 2 for c in h)
|
||||
if len(h) == 8:
|
||||
h = h[:6]
|
||||
return f"#{h}"
|
||||
|
||||
|
||||
def _rgb_to_hex(r: int, g: int, b: int) -> str:
|
||||
return f"#{r:02x}{g:02x}{b:02x}"
|
||||
|
||||
|
||||
def _hex_to_rgb(h: str) -> tuple[int, int, int]:
|
||||
h = h.lstrip("#")
|
||||
return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
|
||||
|
||||
|
||||
def _distance(a: str, b: str) -> float:
|
||||
ar, ag, ab = _hex_to_rgb(a)
|
||||
br, bg, bb = _hex_to_rgb(b)
|
||||
return ((ar - br) ** 2 + (ag - bg) ** 2 + (ab - bb) ** 2) ** 0.5
|
||||
|
||||
|
||||
def _is_grayscale(h: str, tol: int = 12) -> bool:
|
||||
r, g, b = _hex_to_rgb(h)
|
||||
return max(r, g, b) - min(r, g, b) < tol
|
||||
|
||||
|
||||
def _extract_hex(text: str) -> list[str]:
|
||||
"""텍스트에서 모든 hex 색상 추출 (정규화)."""
|
||||
out: list[str] = []
|
||||
out.extend(_normalize(m.group(0)) for m in HEX6.finditer(text))
|
||||
out.extend(_normalize(m.group(0)) for m in HEX3.finditer(text))
|
||||
for m in RGB.finditer(text):
|
||||
r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3))
|
||||
if 0 <= r <= 255 and 0 <= g <= 255 and 0 <= b <= 255:
|
||||
out.append(_rgb_to_hex(r, g, b))
|
||||
return out
|
||||
|
||||
|
||||
def _cluster(colors: Counter, threshold: float = 25.0) -> list[tuple[str, int]]:
|
||||
"""비슷한 색은 묶음. 가장 빈도 높은 색을 대표로."""
|
||||
ranked = colors.most_common()
|
||||
clusters: list[tuple[str, int]] = []
|
||||
for color, count in ranked:
|
||||
merged = False
|
||||
for i, (rep, rep_count) in enumerate(clusters):
|
||||
if _distance(color, rep) < threshold:
|
||||
clusters[i] = (rep, rep_count + count)
|
||||
merged = True
|
||||
break
|
||||
if not merged:
|
||||
clusters.append((color, count))
|
||||
return clusters
|
||||
|
||||
|
||||
async def _fetch_html_and_css(homepage_url: str, max_css_files: int = 8) -> tuple[str, list[str]]:
|
||||
"""홈페이지 HTML + 외부 CSS(Top N)를 한 번에 fetch. 로고/색상 추출이 사이트를 중복으로 긁지 않도록 공유.
|
||||
_fetch_html이 SSL 약함/host mismatch까지 fallback 처리. 실패 시 ("", [])."""
|
||||
status, html = await _fetch_html(homepage_url)
|
||||
if status != 200 or not html:
|
||||
logger.warning("[color_extractor] homepage fetch failed status=%s url=%s", status, homepage_url)
|
||||
return "", []
|
||||
css_texts: list[str] = []
|
||||
for css_href in CSS_LINK.findall(html)[:max_css_files]:
|
||||
cstatus, ctext = await _fetch_html(urljoin(homepage_url, css_href), timeout=15.0)
|
||||
if cstatus == 200 and ctext:
|
||||
css_texts.append(ctext)
|
||||
return html, css_texts
|
||||
|
||||
|
||||
def _colors_from_text(html: str, css_texts: list[str], source_url: str = "") -> dict:
|
||||
"""이미 받아온 HTML + CSS 텍스트에서 hex 빈도 분석 → primary/accent/text + palette. (fetch 없음, 순수 계산)"""
|
||||
# 1. HTML 내 <style> 블록 + 통째(inline style="color:#...") + 외부 CSS
|
||||
all_text_chunks: list[str] = list(STYLE_BLOCK.findall(html))
|
||||
all_text_chunks.append(html)
|
||||
all_text_chunks.extend(css_texts)
|
||||
|
||||
# 2. 모든 hex 추출 (NOISE 제외)
|
||||
counter: Counter = Counter()
|
||||
for text in all_text_chunks:
|
||||
for color in _extract_hex(text):
|
||||
if color in NOISE:
|
||||
continue
|
||||
counter[color] += 1
|
||||
|
||||
if not counter:
|
||||
logger.info("[color_extractor] no colors extracted from %s", source_url)
|
||||
return {}
|
||||
|
||||
# 3. 비슷한 색 클러스터링
|
||||
clustered = _cluster(counter)
|
||||
|
||||
# 4. primary = 빈도 높은 채도 있는 색 / accent = 두번째 채도 있는 색 / text = 빈도 높은 무채색
|
||||
chromatic = [c for c, _ in clustered if not _is_grayscale(c)]
|
||||
grayscale = [c for c, _ in clustered if _is_grayscale(c)]
|
||||
|
||||
palette_top = clustered[:8]
|
||||
palette = [{"name": f"색상 {i+1}", "hex": h, "usage": f"빈도 {n}"} for i, (h, n) in enumerate(palette_top)]
|
||||
|
||||
return {
|
||||
"brand_colors": {
|
||||
"primary": chromatic[0] if chromatic else None,
|
||||
"accent": chromatic[1] if len(chromatic) > 1 else None,
|
||||
"text": grayscale[0] if grayscale else None,
|
||||
},
|
||||
"color_palette": palette,
|
||||
"extracted_from": "html+css",
|
||||
}
|
||||
|
||||
|
||||
async def extract_brand_colors_from_site(homepage_url: str, max_css_files: int = 8) -> dict:
|
||||
"""홈페이지 HTML + 외부 CSS fetch → hex 색상 빈도 분석 → primary/accent/text + palette 5종."""
|
||||
html, css_texts = await _fetch_html_and_css(homepage_url, max_css_files)
|
||||
if not html:
|
||||
return {}
|
||||
return _colors_from_text(html, css_texts, homepage_url)
|
||||
|
||||
|
||||
async def extract_brand_assets_from_site(homepage_url: str, max_css_files: int = 8) -> dict:
|
||||
"""사이트를 한 번만 fetch해서 로고 URL과 brand 색상을 함께 추출.
|
||||
반환: {"logo_url": str | None, "colors": {brand_colors, color_palette, ...} | {}}"""
|
||||
html, css_texts = await _fetch_html_and_css(homepage_url, max_css_files)
|
||||
if not html:
|
||||
return {"logo_url": None, "colors": {}}
|
||||
return {
|
||||
"logo_url": find_logo_url_in_html(html, homepage_url, css_texts=css_texts),
|
||||
"colors": _colors_from_text(html, css_texts, homepage_url),
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ import logging
|
|||
import re
|
||||
import ssl
|
||||
import httpx
|
||||
import resvg_py
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -77,11 +78,20 @@ class VisionClient:
|
|||
if not mime.startswith("image/"):
|
||||
logger.warning("[vision] %s not an image (content-type=%s)", url, mime)
|
||||
return None
|
||||
size = len(resp.content)
|
||||
# SVG는 Gemini가 못 보므로 즉시 PNG로 래스터화 (resvg, in-memory ~1ms)
|
||||
content = resp.content
|
||||
if mime == "image/svg+xml" or url.lower().split("?")[0].endswith(".svg"):
|
||||
try:
|
||||
content = bytes(resvg_py.svg_to_bytes(svg_string=resp.text))
|
||||
mime = "image/png"
|
||||
except Exception as e:
|
||||
logger.warning("[vision] svg rasterize failed %s: %s", url, e)
|
||||
return None
|
||||
size = len(content)
|
||||
if size < 500:
|
||||
logger.warning("[vision] %s too small (%d bytes) — likely placeholder", url, size)
|
||||
return None
|
||||
b64 = base64.b64encode(resp.content).decode("ascii")
|
||||
b64 = base64.b64encode(content).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
|
||||
last_err = e
|
||||
|
|
@ -120,6 +130,75 @@ class VisionClient:
|
|||
logger.warning("[vision] error: %s", e)
|
||||
return None
|
||||
|
||||
async def describe_svg_text(self, svg_url: str) -> dict | None:
|
||||
"""SVG는 Gemini Vision이 못 보지만 XML 텍스트 자체는 LLM이 읽을 수 있음.
|
||||
SVG 소스를 받아 그대로 text endpoint에 던지고 색·심볼·텍스트를 추론하게 함.
|
||||
analyze_brand_assets와 동일한 스키마(logo_description/style/has_symbol/...) 반환."""
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
|
||||
|
||||
def _weak_ctx() -> ssl.SSLContext:
|
||||
ctx = ssl.create_default_context()
|
||||
try:
|
||||
ctx.set_ciphers("DEFAULT@SECLEVEL=1")
|
||||
except ssl.SSLError:
|
||||
pass
|
||||
return ctx
|
||||
|
||||
svg_text: str | None = None
|
||||
for verify in (True, _weak_ctx(), False):
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=15.0, follow_redirects=True, headers=headers, verify=verify,
|
||||
) as c:
|
||||
resp = await c.get(svg_url)
|
||||
if resp.status_code == 200:
|
||||
svg_text = resp.text
|
||||
break
|
||||
except (httpx.ConnectError, httpx.ReadError, ssl.SSLError):
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning("[vision] svg fetch error %s: %s", svg_url, e)
|
||||
return None
|
||||
if not svg_text:
|
||||
logger.warning("[vision] svg fetch failed %s", svg_url)
|
||||
return None
|
||||
# 페이로드 폭주 방지 — 평범한 로고 SVG는 수 KB 수준
|
||||
if len(svg_text) > 60000:
|
||||
svg_text = svg_text[:60000]
|
||||
|
||||
prompt = (
|
||||
"아래는 병원 로고 SVG 소스 코드입니다. SVG 마크업(path/circle/text/fill/stroke 등)을 "
|
||||
"읽고 로고의 시각적 특징을 추론해 아래 JSON 스키마로만 응답하세요. 코드펜스 없이 순수 JSON.\n"
|
||||
"{\n"
|
||||
' "logo_description": "심볼 형태 + 워드마크 + 톤을 1~2문장 한국어로",\n'
|
||||
' "logo_style": "minimal | illustrative | typographic | abstract 중 하나",\n'
|
||||
' "has_symbol": "심볼/아이콘이 있으면 true, 글자만 있으면 false (boolean)",\n'
|
||||
' "logo_symbol": "심볼 묘사 (예: \'잎사귀\'). 없으면 빈 문자열",\n'
|
||||
' "logo_text": "워드마크 텍스트 그대로. <text> 태그 내용 우선",\n'
|
||||
' "logo_colors_desc": "쓰인 색감을 사람이 부르는 이름으로 (예: \'딥네이비 + 골드\'). hex 출력 금지"\n'
|
||||
"}\n"
|
||||
"주의: hex 값이나 URL은 출력하지 마세요 (별도 추출 로직 처리). 모든 텍스트는 한국어로.\n\n"
|
||||
"SVG 소스:\n"
|
||||
f"{svg_text}"
|
||||
)
|
||||
try:
|
||||
resp = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=8000, # Gemini 2.5는 thinking 토큰을 max_tokens에서 차감하므로 여유 필요
|
||||
)
|
||||
choice = resp.choices[0]
|
||||
if choice.finish_reason != "stop":
|
||||
logger.warning("[vision] svg describe finish_reason=%s", choice.finish_reason)
|
||||
result = self._extract_json(choice.message.content or "")
|
||||
except Exception as e:
|
||||
logger.warning("[vision] svg describe error: %s", e)
|
||||
return None
|
||||
if not result:
|
||||
return None
|
||||
result["logo_images"] = {"circle": None, "horizontal": svg_url, "korean": None}
|
||||
return result
|
||||
|
||||
async def analyze_brand_assets(
|
||||
self,
|
||||
logo_url: str | None,
|
||||
|
|
@ -139,9 +218,10 @@ class VisionClient:
|
|||
' "has_symbol": "심볼/아이콘이 있으면 true, 글자만 있으면 false (boolean)",\n'
|
||||
' "logo_symbol": "심볼이 묘사하는 대상 (예: \'잎사귀\', \'추상 곡선\'). 없으면 빈 문자열",\n'
|
||||
' "logo_text": "로고에 보이는 워드마크 텍스트 그대로 (한글/영문). 없으면 빈 문자열",\n'
|
||||
' "logo_colors_desc": "로고에 쓰인 색감을 사람이 부르는 이름으로 서술 (예: \'딥네이비 + 골드\'). 정확한 hex는 출력하지 말 것"\n'
|
||||
' "logo_colors_desc": "로고에 쓰인 색감을 사람이 부르는 이름으로 서술 (예: \'딥네이비 + 골드\')",\n'
|
||||
' "logo_colors_hex": ["로고에서 시각적으로 두드러진 색 정확히 5개의 hex 근사값 배열. 예: [\'#1A2B3C\', \'#D4A017\', \'#FFFFFF\', \'#9E5C2A\', \'#1F1F1F\']. 강한 색이 5개 안 되면 음영/명도 차이로 5개 채울 것. 빈 배열 금지."]\n'
|
||||
"}\n"
|
||||
"주의: 색상 hex 값이나 logo URL 같은 필드는 출력하지 마세요 (별도 추출 로직이 처리).\n"
|
||||
"주의: logo_colors_hex 는 시각 추정이라 정확도 떨어질 수 있음. CSS 추출이 우선이고 이건 fallback/보완 용.\n"
|
||||
"모든 설명/텍스트 값은 반드시 한국어로 작성하세요 (영어 금지)."
|
||||
)
|
||||
result = await self._ask(urls, prompt)
|
||||
|
|
@ -149,6 +229,14 @@ class VisionClient:
|
|||
return {}
|
||||
# logo_images는 우리가 직접 채움 (Vision은 묘사만)
|
||||
result["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
|
||||
# logo_colors_hex 5개 강제 정규화 — LLM 이 4개나 6개 줄 수도 있어서 길이 fallback.
|
||||
hex_list = [h for h in (result.get("logo_colors_hex") or []) if isinstance(h, str) and h.startswith("#")]
|
||||
if hex_list:
|
||||
while len(hex_list) < 5:
|
||||
hex_list.append(hex_list[-1]) # 마지막 색 복제로 패딩
|
||||
result["logo_colors_hex"] = hex_list[:5]
|
||||
else:
|
||||
result["logo_colors_hex"] = []
|
||||
return result
|
||||
|
||||
async def describe_channel_logos(
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
from pydantic import BaseModel
|
||||
from common.utils import get_env
|
||||
from integrations.llm.schemas.report import ReportInput, ReportOutput
|
||||
from integrations.llm.schemas.report import ReportInput, ReportOutput, YouTubeDiagnosisInput, YouTubeDiagnosisOutput
|
||||
from integrations.llm.schemas.plan import PlanInput, PlanOutput
|
||||
from integrations.llm.schemas.market import (
|
||||
MarketCompetitorsInput, MarketCompetitorsOutput,
|
||||
|
|
@ -80,3 +80,10 @@ market_target_audience_prompt = Prompt(
|
|||
input_class=MarketTargetAudienceInput,
|
||||
output_class=MarketTargetAudienceOutput,
|
||||
)
|
||||
|
||||
youtube_diagnosis_prompt = Prompt(
|
||||
file_name="youtube_diagnosis_prompt.txt",
|
||||
prompt_model="REPORT_MODEL",
|
||||
input_class=YouTubeDiagnosisInput,
|
||||
output_class=YouTubeDiagnosisOutput,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ class PlanInput(BaseModel):
|
|||
instagram_en: str | None = None
|
||||
facebook_en: str | None = None
|
||||
naver_blog: str | None = None
|
||||
naver_cafe: str | None = None
|
||||
kakao_talk: str | None = None
|
||||
channel_logos: str | None = None
|
||||
brand_assets: str | None = None
|
||||
|
||||
|
|
|
|||
|
|
@ -68,22 +68,18 @@ class RegistryData(BaseModel):
|
|||
|
||||
|
||||
class ClinicSnapshot(BaseModel):
|
||||
name: str
|
||||
name_en: str
|
||||
established: str
|
||||
years_in_business: int
|
||||
staff_count: int
|
||||
lead_doctor: LeadDoctor
|
||||
overall_rating: float
|
||||
total_reviews: int
|
||||
price_range: PriceRange
|
||||
certifications: list[str]
|
||||
media_appearances: list[str]
|
||||
medical_tourism: list[str]
|
||||
location: str
|
||||
nearest_station: str
|
||||
phone: str
|
||||
domain: str
|
||||
# _build_clinic_snapshot은 source 데이터 있을 때만 채움 (`if x:` 가드).
|
||||
# required면 강남언니/홈페이지 누락 병원에서 ValidationError로 리포트 실패.
|
||||
name: str | None = None
|
||||
name_en: str | None = None
|
||||
staff_count: int | None = None
|
||||
lead_doctor: LeadDoctor | None = None
|
||||
overall_rating: float | None = None
|
||||
total_reviews: int | None = None
|
||||
certifications: list[str] = []
|
||||
location: str | None = None
|
||||
phone: str | None = None
|
||||
domain: str | None = None
|
||||
logo_images: LogoImages | None = None
|
||||
brand_colors: BrandColors | None = None
|
||||
source: DataSource | None = None
|
||||
|
|
@ -137,7 +133,6 @@ class YouTubeAudit(BaseModel):
|
|||
avg_video_length: str
|
||||
upload_frequency: str
|
||||
channel_created_date: str
|
||||
subscriber_rank: str
|
||||
channel_description: str
|
||||
linked_urls: list[LinkedUrl]
|
||||
playlists: list[str]
|
||||
|
|
@ -164,8 +159,8 @@ class InstagramAccount(BaseModel):
|
|||
|
||||
|
||||
class InstagramAudit(BaseModel):
|
||||
accounts: list[InstagramAccount]
|
||||
diagnosis: list[DiagnosisItem]
|
||||
accounts: list[InstagramAccount] = []
|
||||
diagnosis: list[DiagnosisItem] = []
|
||||
|
||||
|
||||
# --- Facebook ---
|
||||
|
|
@ -198,17 +193,17 @@ class FacebookPage(BaseModel):
|
|||
linked_domain: str
|
||||
reviews: int
|
||||
recent_post_age: str
|
||||
has_whatsapp: bool
|
||||
post_frequency: str | None = None
|
||||
has_whatsapp: bool | None = None
|
||||
post_frequency: str
|
||||
top_content_type: str | None = None
|
||||
engagement: str | None = None
|
||||
engagement: str
|
||||
|
||||
|
||||
class FacebookAudit(BaseModel):
|
||||
pages: list[FacebookPage]
|
||||
diagnosis: list[DiagnosisItem]
|
||||
brand_inconsistencies: list[BrandInconsistency]
|
||||
consolidation_recommendation: str
|
||||
pages: list[FacebookPage] = []
|
||||
diagnosis: list[DiagnosisItem] = []
|
||||
brand_inconsistencies: list[BrandInconsistency] = []
|
||||
consolidation_recommendation: str | None = None
|
||||
|
||||
|
||||
# --- 기타 채널 / 웹사이트 ---
|
||||
|
|
@ -353,3 +348,20 @@ class MarketingReport(BaseModel):
|
|||
|
||||
|
||||
ReportOutput = MarketingReport
|
||||
|
||||
|
||||
# --- YouTubeDiagnosis ---
|
||||
|
||||
class YouTubeDiagnosisInput(BaseModel):
|
||||
channel_name: str | None = None
|
||||
subscribers: int | None = None
|
||||
total_videos: int | None = None
|
||||
total_views: int | None = None
|
||||
avg_video_length: str | None = None
|
||||
upload_frequency: str | None = None
|
||||
top_videos: str | None = None
|
||||
playlists: str | None = None
|
||||
|
||||
|
||||
class YouTubeDiagnosisOutput(BaseModel):
|
||||
diagnosis: list[DiagnosisItem]
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@
|
|||
## 분석 리포트
|
||||
{report}
|
||||
|
||||
## 추가 채널 데이터 (네이버 블로그 / 틱톡 / 인스타그램 EN / 페이스북 EN)
|
||||
아래에 데이터가 있는 채널은 channelStrategies와 channelBranding에 **반드시 포함**하세요 (네이버 블로그, 틱톡, 영문 인스타그램, 영문 페이스북). null이면 제외.
|
||||
## 추가 채널 데이터 (네이버 블로그 / 틱톡 / 인스타그램 EN / 페이스북 EN / 네이버 카페 / 카카오톡)
|
||||
아래에 데이터가 있는 채널은 channelStrategies에 **반드시 포함**하세요 (네이버 블로그, 틱톡, 영문 인스타그램, 영문 페이스북, 네이버 카페, 카카오톡). channelBranding은 SNS·블로그·카페까지만 포함(카카오톡은 메신저라 제외). null이면 제외.
|
||||
|
||||
### 네이버 블로그 (Naver Blog)
|
||||
{naver_blog}
|
||||
|
|
@ -47,6 +47,16 @@
|
|||
### 페이스북 (영문 페이지)
|
||||
{facebook_en}
|
||||
|
||||
### 네이버 카페 (공식 카페 운영 신호)
|
||||
{naver_cafe}
|
||||
- naver_cafe.cafeName: 카페명, naver_cafe.memberCount: 회원수
|
||||
- currentStatus는 "회원 N명" 형태로 간단하게. 게시글 수·최근 활동은 수집 불가 (추측 금지).
|
||||
- targetGoal은 회원 확보 목표 수치 + 운영 권장 (예: "회원 5,000명, 주 1~2회 공지 발행").
|
||||
|
||||
### 카카오톡 채널 (URL only — 콘텐츠 수집 X, 존재 여부만)
|
||||
{kakao_talk}
|
||||
- channelStrategies 카드 하나로 포함. currentStatus는 "공식 카카오톡 채널 운영 중" 정도, targetGoal은 친구 추가 유도·상담 전환·자동응답 시나리오 구체화 등.
|
||||
|
||||
## 채널별 로고 분석 (Gemini Vision) — 채널룰/일관성의 근거
|
||||
{channel_logos}
|
||||
- 위 channel_logos[]의 각 항목: channel(채널명), logo_description(프로필이 어떻게 생겼는지), is_official(공식 로고와 일치 여부).
|
||||
|
|
@ -77,7 +87,7 @@
|
|||
- brandInconsistencies: 채널 간 브랜딩 불일치 항목 및 개선 권고
|
||||
|
||||
### Section 2: channelStrategies
|
||||
- 메인 SNS 채널(Instagram, Facebook, YouTube, TikTok, 네이버 블로그) + 영문 계정(Instagram EN, Facebook EN) 카드를 **모두 포함**. 데이터 없는 채널도 빠뜨리지 말 것.
|
||||
- 메인 SNS 채널(Instagram, Facebook, YouTube, TikTok, 네이버 블로그) + 영문 계정(Instagram EN, Facebook EN) + **네이버 카페 / 카카오톡** (URL 있을 때) 카드를 **모두 포함**. 데이터 없는 채널도 빠뜨리지 말 것.
|
||||
- **currentStatus**: 데이터 있는 채널은 실제 수치로 서술 (예: "14,047 팔로워, Reels 0개", "104K 구독자, 주 2~3회 업로드"). **데이터 없는 채널은 "계정 없음"** 으로 표시. `excellent`/`warning`/`good` 같은 등급·평가어 금지.
|
||||
- **targetGoal은 모든 채널에 반드시 채울 것** — 구체적 목표 수치(예: "50K 팔로워, Reels 주 5개"). 데이터 없는 채널도 시작 시 권장 목표를 작성하고 비우지 말 것.
|
||||
- 각 채널의 우선순위(P0/P1/P2), 콘텐츠 유형, 게시 빈도, 포맷 가이드라인 모두 권장값으로 작성 — 데이터 없어도 시작 권장값으로 채울 것.
|
||||
|
|
|
|||
|
|
@ -66,8 +66,11 @@
|
|||
### 카카오톡 채널 (URL only — 수집 데이터 없음, 존재 여부만 확인)
|
||||
{kakao_talk}
|
||||
|
||||
### 네이버 카페 (URL only — 수집 데이터 없음, 존재 여부만 확인)
|
||||
### 네이버 카페 (공식 카페 운영 신호)
|
||||
{naver_cafe}
|
||||
- naver_cafe.cafeName: 카페명
|
||||
- naver_cafe.memberCount: 회원수
|
||||
- 게시글 총 수·최근 게시일은 로그인 필요라 수집 불가. 추측 금지. 위 두 값만 사용.
|
||||
|
||||
### 채널별 로고 분석 (Gemini Vision)
|
||||
{channel_logos}
|
||||
|
|
@ -108,5 +111,5 @@
|
|||
- 데이터가 null인 계정은 항목을 만들지 마세요. icon은 instagram/facebook/video 등 플랫폼에 맞게 설정.
|
||||
- strengths와 weaknesses는 각 3개 이상 작성하세요.
|
||||
- roadmap은 우선순위 순으로 실행 가능한 액션으로 작성하세요.
|
||||
- kpis는 실제 수집된 수치 기반으로 현실적인 측정 가능 지표로 작성하세요.
|
||||
- kpi_dashboard는 코드가 결정적으로 산출해 후처리 강제 치환하므로 LLM 출력 무시됩니다. 빈 배열 또는 placeholder로 두세요.
|
||||
- conversion_strategy의 actions는 구체적인 실행 방안으로 작성하세요.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,24 @@
|
|||
다음은 성형외과/피부과 유튜브 채널 데이터입니다.
|
||||
|
||||
채널명: {channel_name}
|
||||
구독자 수: {subscribers}
|
||||
총 영상 수: {total_videos}
|
||||
총 조회수: {total_views}
|
||||
평균 영상 길이: {avg_video_length}
|
||||
업로드 주기: {upload_frequency}
|
||||
인기 영상 목록: {top_videos}
|
||||
플레이리스트: {playlists}
|
||||
|
||||
위 데이터를 바탕으로 이 채널의 마케팅 문제점과 개선사항을 진단해줘.
|
||||
각 항목은 category(진단 카테고리), detail(상세 설명), severity(critical/warning/info) 형식의 JSON 배열로 출력해줘.
|
||||
|
||||
진단 카테고리들은 다음과 같아. :
|
||||
구독자 대비 조회수 비율,
|
||||
최근 롱폼 조회수,
|
||||
Shorts 조회수,
|
||||
업로드 빈도,
|
||||
콘텐츠 톤앤매너,
|
||||
썸네일 디자인,
|
||||
최고 성과 Shorts
|
||||
|
||||
출처 번호([1], [2] 등)는 굳이 포함하지 마.
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import httpx
|
||||
from http import HTTPMethod
|
||||
from urllib.parse import urlparse
|
||||
from common.utils import http_request
|
||||
|
|
@ -108,3 +109,59 @@ class NaverClient:
|
|||
"totalResults": total,
|
||||
"posts": posts[:10],
|
||||
}
|
||||
|
||||
async def get_cafe_info(self, cafe_url: str, *_args, **_kwargs) -> dict | None:
|
||||
"""네이버 카페 운영 신호 수집. 2단계 fetch:
|
||||
1) https://cafe.naver.com/{handle} → cafeId 추출
|
||||
2) ArticleList.nhn?search.clubid={cafeId} → memberCount + cafeName 추출
|
||||
본문/게시글은 로그인 필요라 못 가져옴. 회원수·카페명만 잡히면 충분.
|
||||
common.http_request는 redirect 안 따라가서 카페 페이지에 안 맞아 httpx 직접 사용."""
|
||||
handle = urlparse(cafe_url).path.strip("/").split("/")[0] if "://" in cafe_url else cafe_url.split("/")[-1]
|
||||
if not handle:
|
||||
return None
|
||||
|
||||
async with httpx.AsyncClient(
|
||||
timeout=10, follow_redirects=True,
|
||||
headers={"User-Agent": "Mozilla/5.0"},
|
||||
) as c:
|
||||
# 1. cafeId 추출
|
||||
try:
|
||||
main = await c.get(f"https://cafe.naver.com/{handle}")
|
||||
except Exception:
|
||||
return {"url": f"https://cafe.naver.com/{handle}", "cafeHandle": handle, "accessible": False}
|
||||
if main.status_code != 200:
|
||||
return {"url": f"https://cafe.naver.com/{handle}", "cafeHandle": handle, "accessible": False}
|
||||
cid_match = re.search(r'cafeId["\']?\s*[:=]\s*["\']?(\d+)', main.text)
|
||||
cafe_id = cid_match.group(1) if cid_match else None
|
||||
|
||||
result: dict = {
|
||||
"url": f"https://cafe.naver.com/{handle}",
|
||||
"cafeHandle": handle,
|
||||
"cafeId": cafe_id,
|
||||
"accessible": True,
|
||||
"cafeName": None,
|
||||
"memberCount": None,
|
||||
}
|
||||
if not cafe_id:
|
||||
return result
|
||||
|
||||
# 2. ArticleList 페이지에서 회원수 + 카페명 추출 (로그인 없이 접근 가능한 유일한 endpoint)
|
||||
try:
|
||||
listing = await c.get(
|
||||
f"https://cafe.naver.com/ArticleList.nhn?search.clubid={cafe_id}&search.menuid=&search.boardtype=L",
|
||||
headers={"Referer": f"https://cafe.naver.com/{handle}"},
|
||||
)
|
||||
except Exception:
|
||||
return result
|
||||
if listing.status_code != 200:
|
||||
return result
|
||||
mc = re.search(r'memberCount[^0-9]+(\d[\d,]*)', listing.text)
|
||||
if mc:
|
||||
result["memberCount"] = int(mc.group(1).replace(",", ""))
|
||||
tm = re.search(r"<title>(.+?)\s*:\s*네이버 카페</title>", listing.text)
|
||||
if tm:
|
||||
name = re.sub(r"&", "&", tm.group(1)).strip()
|
||||
if "," in name:
|
||||
name = name.split(",", 1)[1].strip()
|
||||
result["cafeName"] = name
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -0,0 +1,66 @@
|
|||
"""홈페이지 HTML + 외부 CSS 를 가져오는 fetch 전용 모듈.
|
||||
|
||||
오래된 한국 의료 사이트들이 SSL DH_KEY_TOO_SMALL / cipher 약함 / host mismatch 등으로
|
||||
표준 fetch 에 차단되는 케이스가 많아 단계별 SSL fallback 으로 받는다.
|
||||
파싱·도메인 로직은 들어가지 않음 — 순수 HTTP 응답 본문 반환.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
import ssl
|
||||
from urllib.parse import urljoin
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CSS_LINK = re.compile(
|
||||
r'<link[^>]+rel=["\']stylesheet["\'][^>]+href=["\']([^"\']+)["\']',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _make_ssl_context() -> ssl.SSLContext:
|
||||
"""보안 등급 1로 낮춤 + cert 검증 유지 (옛 한국 의료 사이트 cipher 약함 회피)."""
|
||||
ctx = ssl.create_default_context()
|
||||
try:
|
||||
ctx.set_ciphers("DEFAULT@SECLEVEL=1")
|
||||
except ssl.SSLError:
|
||||
pass
|
||||
return ctx
|
||||
|
||||
|
||||
async def fetch_html(url: str, timeout: float = 20.0) -> tuple[int, str]:
|
||||
"""SSL 검증 단계별 fallback 으로 HTML 본문 받기. 실패 시 (0, "")."""
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers) as c:
|
||||
r = await c.get(url)
|
||||
return r.status_code, r.text
|
||||
except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
|
||||
logger.info("[fetch] %s standard SSL failed: %s — fallback to weak cipher", url, e)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=_make_ssl_context()) as c:
|
||||
r = await c.get(url)
|
||||
return r.status_code, r.text
|
||||
except (httpx.ConnectError, httpx.ReadError, ssl.SSLError) as e:
|
||||
logger.info("[fetch] %s weak cipher failed: %s — fallback to verify=False", url, e)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, headers=headers, verify=False) as c:
|
||||
r = await c.get(url)
|
||||
return r.status_code, r.text
|
||||
except Exception as e:
|
||||
logger.warning("[fetch] %s all fallbacks failed: %s", url, e)
|
||||
return 0, ""
|
||||
|
||||
|
||||
async def fetch_html_and_css(homepage_url: str, max_css_files: int = 8) -> tuple[str, list[str]]:
|
||||
"""홈페이지 HTML + 외부 CSS(Top N) 한 번에 fetch. 실패 시 ("", [])."""
|
||||
status, html = await fetch_html(homepage_url)
|
||||
if status != 200 or not html:
|
||||
logger.warning("[fetch] homepage fetch failed status=%s url=%s", status, homepage_url)
|
||||
return "", []
|
||||
css_texts: list[str] = []
|
||||
for css_href in CSS_LINK.findall(html)[:max_css_files]:
|
||||
cstatus, ctext = await fetch_html(urljoin(homepage_url, css_href), timeout=15.0)
|
||||
if cstatus == 200 and ctext:
|
||||
css_texts.append(ctext)
|
||||
return html, css_texts
|
||||
|
|
@ -79,7 +79,17 @@ class YouTubeClient:
|
|||
if resp and resp.is_success:
|
||||
videos = resp.json().get("items", [])[:10]
|
||||
|
||||
return {"channelId": channel_id, "channel": channel, "videos": videos}
|
||||
playlists: list[dict] = []
|
||||
resp = await http_request(
|
||||
HTTPMethod.GET,
|
||||
url=f"{YT}/playlists",
|
||||
params={"part": "snippet", "channelId": channel_id, "maxResults": 50, "key": self.api_key},
|
||||
label="yt-playlists",
|
||||
)
|
||||
if resp and resp.is_success:
|
||||
playlists = resp.json().get("items", [])
|
||||
|
||||
return {"channelId": channel_id, "channel": channel, "videos": videos, "playlists": playlists}
|
||||
|
||||
async def get_channel(self, url: str) -> dict | None:
|
||||
raw = await self.fetch_channel(url)
|
||||
|
|
@ -111,6 +121,11 @@ class YouTubeClient:
|
|||
}
|
||||
for v in raw["videos"]
|
||||
],
|
||||
"playlists": [
|
||||
p.get("snippet", {}).get("title")
|
||||
for p in raw["playlists"]
|
||||
if p.get("snippet", {}).get("title")
|
||||
],
|
||||
}
|
||||
|
||||
async def search_channels(self, query: str, max_results: int = 3) -> list[str]:
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
227
app/mock_urls.py
227
app/mock_urls.py
|
|
@ -32,6 +32,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/23",
|
||||
"tiktok": "tiktok.com/@banobagi_official",
|
||||
"tiktokEn": "tiktok.com/@banobagi_english",
|
||||
"instagramEn": "instagram.com/english_banobagi",
|
||||
"facebookEn": "facebook.com/englishbanobagi",
|
||||
"kakaoTalk": "pf.kakao.com/_bCMxfE"
|
||||
|
|
@ -49,6 +50,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/257",
|
||||
"tiktok": "tiktok.com/@idhospitalkorea",
|
||||
"tiktokEn": "tiktok.com/@idhospital_eng",
|
||||
"instagramEn": "instagram.com/idhospitalkorea",
|
||||
"facebookEn": "facebook.com/idhospital.eng",
|
||||
"kakaoTalk": "pf.kakao.com/_yxlxmxhE"
|
||||
|
|
@ -63,9 +65,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "facebook.com/jkmedicalgroup",
|
||||
"naverPlace": "https://naver.me/x67y6cAc",
|
||||
"naverBlog": "blog.naver.com/jkstory1",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/858",
|
||||
"naverCafe": "cafe.naver.com/seoljyps",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "tiktok.com/@jkplastic",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/jkplasticsurgery",
|
||||
"facebookEn": "facebook.com/jkplastic",
|
||||
"kakaoTalk": "pf.kakao.com/_uCxeRE"
|
||||
|
|
@ -83,6 +86,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/62",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/grandps_eng",
|
||||
"facebookEn": "facebook.com/grandplasticsurgery",
|
||||
"kakaoTalk": "pf.kakao.com/_SxixbqV"
|
||||
|
|
@ -94,12 +98,13 @@ MOCK_CLINICS = [
|
|||
"homepage": "bkhospital.com",
|
||||
"youtube": "youtube.com/channel/UChJONft3hemy5DGbXUveTFg",
|
||||
"instagram": "instagram.com/bkhospital_korea",
|
||||
"facebook": "",
|
||||
"facebook": "facebook.com/bkhospitalkorea",
|
||||
"naverPlace": "https://naver.me/517CTH3W",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/english_bkhospital",
|
||||
"facebookEn": "facebook.com/BKPSKoreaE",
|
||||
"kakaoTalk": "pf.kakao.com/_kpxjLV"
|
||||
|
|
@ -114,9 +119,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "facebook.com/toxnfill.official",
|
||||
"naverPlace": "https://naver.me/FvEmJIHA",
|
||||
"naverBlog": "blog.naver.com/toxnfill",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/toxnfill",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/3702",
|
||||
"tiktok": "tiktok.com/@toxnfillglobal",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/toxnfill_global",
|
||||
"facebookEn": "facebook.com/p/Toxnfill-Global-61557593068252",
|
||||
"kakaoTalk": "pf.kakao.com/_EKXJxl"
|
||||
|
|
@ -133,7 +139,8 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/with_theps",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/30",
|
||||
"tiktok": "",
|
||||
"tiktok": "tiktok.com/@the_plasticsurgery",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/the_plasticsurgery.en",
|
||||
"facebookEn": "facebook.com/theps.english",
|
||||
"kakaoTalk": "pf.kakao.com/_yUAZE"
|
||||
|
|
@ -151,9 +158,10 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/125",
|
||||
"tiktok": "tiktok.com/@oracleclinic_usa",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/oracleclinic_global",
|
||||
"facebookEn": "facebook.com/oracleclinicglobal",
|
||||
"kakaoTalk": ""
|
||||
"kakaoTalk": "pf.kakao.com/_RHjhl"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
@ -168,6 +176,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/2500",
|
||||
"tiktok": "tiktok.com/@wonjin_official",
|
||||
"tiktokEn": "tiktok.com/@wonjinglobal",
|
||||
"instagramEn": "instagram.com/wonjinplasticsurgery",
|
||||
"facebookEn": "facebook.com/wonjinplasticsurgery",
|
||||
"kakaoTalk": "pf.kakao.com/_bxflzE"
|
||||
|
|
@ -179,12 +188,13 @@ MOCK_CLINICS = [
|
|||
"homepage": "mindprs.com",
|
||||
"youtube": "youtube.com/channel/UCzM5tIgkC8Es10YmLI55R_w",
|
||||
"instagram": "instagram.com/mind.prs",
|
||||
"facebook": "",
|
||||
"facebook": "facebook.com/mindprs",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/1342923541",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/729",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/mindprs_eng",
|
||||
"facebookEn": "facebook.com/mindprseng",
|
||||
"kakaoTalk": "pf.kakao.com/_kmxodC"
|
||||
|
|
@ -202,6 +212,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/215",
|
||||
"tiktok": "tiktok.com/@braunps",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/braunpskr",
|
||||
"facebookEn": "facebook.com/braunpskr",
|
||||
"kakaoTalk": "pf.kakao.com/_gMVxaT"
|
||||
|
|
@ -219,6 +230,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/926",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_JxoeFu"
|
||||
|
|
@ -236,8 +248,9 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/938",
|
||||
"tiktok": "tiktok.com/@nanahospitaleng",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/nanahospital_eng",
|
||||
"facebookEn": "",
|
||||
"facebookEn": "facebook.com/nanahospital.eng",
|
||||
"kakaoTalk": "pf.kakao.com/_ZxgKUC"
|
||||
}
|
||||
},
|
||||
|
|
@ -253,8 +266,9 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/2186",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/noteprs_eng",
|
||||
"facebookEn": "facebook.com/Noteprs.English",
|
||||
"kakaoTalk": "pf.kakao.com/_xbvuxaT"
|
||||
}
|
||||
},
|
||||
|
|
@ -270,8 +284,9 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/250",
|
||||
"tiktok": "tiktok.com/@daprs",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/daplasticsurgery_korea",
|
||||
"facebookEn": "",
|
||||
"facebookEn": "facebook.com/daplasticsurgery",
|
||||
"kakaoTalk": "pf.kakao.com/_Qqrzd"
|
||||
}
|
||||
},
|
||||
|
|
@ -286,9 +301,10 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/abps20",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/3004",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktok": "tiktok.com/@abplasticsurgeryeng",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/ab_plasticsurgery_eng",
|
||||
"facebookEn": "facebook.com/abplasticsurgeryeng",
|
||||
"kakaoTalk": "pf.kakao.com/_lJnuK"
|
||||
}
|
||||
},
|
||||
|
|
@ -301,10 +317,11 @@ MOCK_CLINICS = [
|
|||
"facebook": "facebook.com/biopskorea",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/21428819",
|
||||
"naverBlog": "blog.naver.com/bioplastics20",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/prsyang",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/bioplasticsurgery",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
}
|
||||
|
|
@ -321,6 +338,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/139",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/jayjunps_eng",
|
||||
"facebookEn": "facebook.com/jayjuneng",
|
||||
"kakaoTalk": "pf.kakao.com/_XxmSFV"
|
||||
|
|
@ -338,8 +356,9 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/141",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/marbleps_english",
|
||||
"facebookEn": "",
|
||||
"facebookEn": "facebook.com/marbleps.eng",
|
||||
"kakaoTalk": "pf.kakao.com/_xefxmxiM"
|
||||
}
|
||||
},
|
||||
|
|
@ -355,6 +374,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/55",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/jewelryps_global",
|
||||
"facebookEn": "facebook.com/jewelrypsglobal",
|
||||
"kakaoTalk": "pf.kakao.com/_SSaRE"
|
||||
|
|
@ -372,7 +392,8 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/116",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/tsprsglobal",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_Jxhduxd"
|
||||
}
|
||||
|
|
@ -389,8 +410,9 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/248",
|
||||
"tiktok": "tiktok.com/@yuno_global",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/yuno.global",
|
||||
"facebookEn": "facebook.com/yuno.global",
|
||||
"kakaoTalk": "pf.kakao.com/_xkWbSu"
|
||||
}
|
||||
},
|
||||
|
|
@ -405,7 +427,8 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/night140160",
|
||||
"naverCafe": "cafe.naver.com/litingps",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/331",
|
||||
"tiktok": "",
|
||||
"tiktok": "tiktok.com/@liting_ps",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_xfxmiDu"
|
||||
|
|
@ -422,9 +445,10 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/answerps",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/1449",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktok": "tiktok.com/@answerplasticsurgery1",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/answerplasticsurgery",
|
||||
"facebookEn": "facebook.com/answerplasticsurgeryseoul",
|
||||
"kakaoTalk": ""
|
||||
}
|
||||
},
|
||||
|
|
@ -440,6 +464,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/2196",
|
||||
"tiktok": "tiktok.com/@thefixps2860",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
|
|
@ -454,9 +479,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/21705376",
|
||||
"naverBlog": "blog.naver.com/girinlife",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/girinlife",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/398",
|
||||
"tiktok": "tiktok.com/@girin_korea",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/girin_eng",
|
||||
"facebookEn": "facebook.com/girinplasticsurgery.en",
|
||||
"kakaoTalk": "pf.kakao.com/_mtiyl"
|
||||
|
|
@ -468,12 +494,13 @@ MOCK_CLINICS = [
|
|||
"homepage": "asps.co.kr",
|
||||
"youtube": "youtube.com/channel/UCqNtVCL2u5Xvx74ymboxDyg",
|
||||
"instagram": "instagram.com/asps_no.1",
|
||||
"facebook": "",
|
||||
"facebook": "facebook.com/seoulps",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/11531189",
|
||||
"naverBlog": "blog.naver.com/asps0119",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/fox5282",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/213",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/asps_en",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_bMkqxd"
|
||||
|
|
@ -491,6 +518,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/4154",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_jPxgNxd"
|
||||
|
|
@ -508,6 +536,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/623",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_Jntxnb"
|
||||
|
|
@ -519,14 +548,15 @@ MOCK_CLINICS = [
|
|||
"homepage": "objetps.com",
|
||||
"youtube": "youtube.com/channel/UC2QrFhj-S8oUrbfOp1NXKZA",
|
||||
"instagram": "instagram.com/objet_plastic_surgery",
|
||||
"facebook": "",
|
||||
"facebook": "facebook.com/p/오브제성형외과-100064266697249",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "blog.naver.com/objetps",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/2122",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/objetplasticsurgery_eng",
|
||||
"facebookEn": "facebook.com/61584552036644",
|
||||
"kakaoTalk": "pf.kakao.com/_HtZsxb"
|
||||
}
|
||||
},
|
||||
|
|
@ -542,6 +572,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/6597",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_eTyqn"
|
||||
|
|
@ -559,7 +590,8 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/166",
|
||||
"tiktok": "tiktok.com/@eighthospital.th",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/eightplasticsurgery",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_zxafjxl"
|
||||
}
|
||||
|
|
@ -576,6 +608,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/108",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/opera_plastic_surgery_eng",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_iCPwE"
|
||||
|
|
@ -593,6 +626,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/69",
|
||||
"tiktok": "tiktok.com/@lienjang_ps",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/lienjang_english",
|
||||
"facebookEn": "facebook.com/lienjang.english",
|
||||
"kakaoTalk": "pf.kakao.com/_xlhEyl"
|
||||
|
|
@ -607,9 +641,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/21868487",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/luho1",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/660",
|
||||
"tiktok": "tiktok.com/@luho_beauty_jp",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/luho_global",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_kvrXj"
|
||||
|
|
@ -627,6 +662,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/4244",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_eVmCxj"
|
||||
|
|
@ -644,6 +680,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/300",
|
||||
"tiktok": "tiktok.com/@atopplasticsurgery",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/atop_plasticsurgery",
|
||||
"facebookEn": "facebook.com/atoppsglobal",
|
||||
"kakaoTalk": "pf.kakao.com/_tePHd"
|
||||
|
|
@ -661,6 +698,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/563",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_xcwXAxd"
|
||||
|
|
@ -678,6 +716,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "cafe.naver.com/starclinic",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_xhXPdl"
|
||||
|
|
@ -695,9 +734,10 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/itemps_eng",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
"kakaoTalk": "pf.kakao.com/_DxmNeK"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
@ -712,6 +752,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_RWpexd"
|
||||
|
|
@ -728,9 +769,10 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/2amsomething",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/2991",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktok": "tiktok.com/@pspskorea",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/pspsenglish",
|
||||
"facebookEn": "facebook.com/pspskorea",
|
||||
"kakaoTalk": "pf.kakao.com/_vCLxdK"
|
||||
}
|
||||
},
|
||||
|
|
@ -745,7 +787,8 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/58",
|
||||
"tiktok": "",
|
||||
"tiktok": "tiktok.com/@iwellps",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "facebook.com/iwellpseng",
|
||||
"kakaoTalk": "pf.kakao.com/_Sxlhql"
|
||||
|
|
@ -763,6 +806,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/54",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/glovi_eng",
|
||||
"facebookEn": "facebook.com/glovieng",
|
||||
"kakaoTalk": "pf.kakao.com/_cVdhxh"
|
||||
|
|
@ -780,6 +824,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/1181",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_xcsxaZl"
|
||||
|
|
@ -790,13 +835,14 @@ MOCK_CLINICS = [
|
|||
"urls": {
|
||||
"homepage": "onepeakps.com",
|
||||
"youtube": "",
|
||||
"instagram": "",
|
||||
"instagram": "instagram.com/onepeakps",
|
||||
"facebook": "",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/3000",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_jwbqK"
|
||||
|
|
@ -813,9 +859,10 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/ina3599",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/66",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"tiktok": "tiktok.com/@topfaceplasticsurgery",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/topfaceplasticsurgeryclinic",
|
||||
"facebookEn": "facebook.com/topfaceplasticsurgery",
|
||||
"kakaoTalk": "pf.kakao.com/_BqwFxb"
|
||||
}
|
||||
},
|
||||
|
|
@ -824,13 +871,14 @@ MOCK_CLINICS = [
|
|||
"urls": {
|
||||
"homepage": "http://semin100.co.kr",
|
||||
"youtube": "youtube.com/channel/UCKaNYEvRqME2h1lUSOYIYew",
|
||||
"instagram": "",
|
||||
"instagram": "instagram.com/seminps",
|
||||
"facebook": "",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "blog.naver.com/semin100",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
|
|
@ -848,6 +896,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "cafe.naver.com/baekstage",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_XxdSxmE"
|
||||
|
|
@ -865,6 +914,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/4749",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_cxekpxd"
|
||||
|
|
@ -881,7 +931,8 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/5500",
|
||||
"tiktok": "",
|
||||
"tiktok": "tiktok.com/@dl_plastic_en",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/dl_plastic_int",
|
||||
"facebookEn": "facebook.com/dl.plastic.en",
|
||||
"kakaoTalk": "pf.kakao.com/_xgpMCG"
|
||||
|
|
@ -899,6 +950,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/431",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_xjepWxl"
|
||||
|
|
@ -916,43 +968,28 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/laprin_en",
|
||||
"facebookEn": "facebook.com/Laprinprincess.en",
|
||||
"kakaoTalk": "pf.kakao.com/_xieNxmV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "도도성형외과",
|
||||
"urls": {
|
||||
"homepage": "http://dodobeauty.com",
|
||||
"youtube": "",
|
||||
"instagram": "",
|
||||
"facebook": "",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "blog.naver.com/mmscjh",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "엠제이성형외과",
|
||||
"urls": {
|
||||
"homepage": "mjskinclinic.com",
|
||||
"youtube": "youtube.com/channel/UCFjkFyYDu4HpLjc9axlh6YQ",
|
||||
"instagram": "",
|
||||
"instagram": "instagram.com/mjskinclinic",
|
||||
"facebook": "facebook.com/people/MJ피부과/100063928095304",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "blog.naver.com/mjskinclinic",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/mjskinclinic_en",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
"kakaoTalk": "pf.kakao.com/_xcdgixb"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
@ -967,6 +1004,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/5870",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_HKxkxeG"
|
||||
|
|
@ -984,6 +1022,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/2052",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_xfjyQj"
|
||||
|
|
@ -1001,6 +1040,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/4459",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_fCExej"
|
||||
|
|
@ -1018,6 +1058,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/339",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_tBclE"
|
||||
|
|
@ -1032,9 +1073,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/likeps",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/912",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_UxfEmC"
|
||||
|
|
@ -1052,6 +1094,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/1265",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_zJBPxl"
|
||||
|
|
@ -1069,6 +1112,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_pICRM"
|
||||
|
|
@ -1083,9 +1127,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "facebook.com/서울아이성형외과-105199207892670",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/1240083198",
|
||||
"naverBlog": "blog.naver.com/seoulips",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/cometrueps",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/839",
|
||||
"tiktok": "tiktok.com/@seoulips_jp",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/seouli_eng",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_pxlYUxb"
|
||||
|
|
@ -1103,9 +1148,10 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/5554",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/saerops_en",
|
||||
"facebookEn": "facebook.com/p/SAERO-Plastic-Surgery-61552761680056",
|
||||
"kakaoTalk": "pf.kakao.com/_rMQTG"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
@ -1119,7 +1165,8 @@ MOCK_CLINICS = [
|
|||
"naverBlog": "blog.naver.com/chaminst",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/4212",
|
||||
"tiktok": "",
|
||||
"tiktok": "tiktok.com/@chaminps",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_afxgpb"
|
||||
|
|
@ -1137,28 +1184,12 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/6680",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_exfHsxb"
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "화이트성형외과",
|
||||
"urls": {
|
||||
"homepage": "http://whiteclinic.com",
|
||||
"youtube": "",
|
||||
"instagram": "",
|
||||
"facebook": "",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_FxdRvd"
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "미호성형외과",
|
||||
"urls": {
|
||||
|
|
@ -1171,6 +1202,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/369",
|
||||
"tiktok": "tiktok.com/@koreaplasticsurgery",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/mihops_en",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_yxmexod"
|
||||
|
|
@ -1181,13 +1213,14 @@ MOCK_CLINICS = [
|
|||
"urls": {
|
||||
"homepage": "http://avenueps.com",
|
||||
"youtube": "youtube.com/channel/UCOTVAerYogSEia3L-ERkAFg",
|
||||
"instagram": "",
|
||||
"instagram": "instagram.com/avenueps15",
|
||||
"facebook": "",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/6204",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": ""
|
||||
|
|
@ -1198,13 +1231,14 @@ MOCK_CLINICS = [
|
|||
"urls": {
|
||||
"homepage": "bon-ps.com",
|
||||
"youtube": "youtube.com/channel/UClCjGIfEb3b1N-Q5tCNSU5w",
|
||||
"instagram": "",
|
||||
"instagram": "instagram.com/bon_p.s",
|
||||
"facebook": "",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/1492257313",
|
||||
"naverBlog": "blog.naver.com/miz2199",
|
||||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_AQZxbu"
|
||||
|
|
@ -1219,9 +1253,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "",
|
||||
"naverPlace": "https://m.place.naver.com/hospital/1638918034",
|
||||
"naverBlog": "",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/arcps",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/3569",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/arcplasticsurgery_en",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_dxdQxdb"
|
||||
|
|
@ -1239,6 +1274,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_IxaxekV"
|
||||
|
|
@ -1256,7 +1292,8 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/1178",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/younme_eng",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_yKPmM"
|
||||
}
|
||||
|
|
@ -1273,7 +1310,8 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/3429",
|
||||
"tiktok": "",
|
||||
"instagramEn": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "instagram.com/mano_plastic_surgery_clinic",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_ksJts"
|
||||
}
|
||||
|
|
@ -1290,6 +1328,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/413",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_JLxofl"
|
||||
|
|
@ -1307,6 +1346,7 @@ MOCK_CLINICS = [
|
|||
"naverCafe": "",
|
||||
"gangnamUnni": "",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_SCEJxj"
|
||||
|
|
@ -1321,9 +1361,10 @@ MOCK_CLINICS = [
|
|||
"facebook": "facebook.com/pages/category/Hospital/더원성형외과-1539415639613021",
|
||||
"naverPlace": "",
|
||||
"naverBlog": "blog.naver.com/brs0714",
|
||||
"naverCafe": "",
|
||||
"naverCafe": "cafe.naver.com/cephalos1.cafe",
|
||||
"gangnamUnni": "www.gangnamunni.com/hospitals/5636",
|
||||
"tiktok": "",
|
||||
"tiktokEn": "",
|
||||
"instagramEn": "",
|
||||
"facebookEn": "",
|
||||
"kakaoTalk": "pf.kakao.com/_eWIqK"
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@ class ClinicResponse(BaseModel):
|
|||
hospital_name: str
|
||||
hospital_name_en: str | None
|
||||
road_address: str | None
|
||||
url: str | None
|
||||
status: str
|
||||
raw_data: dict | None
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
|
|
|||
|
|
@ -66,22 +66,18 @@ class RegistryData(CamelModel):
|
|||
|
||||
|
||||
class ClinicSnapshot(CamelModel):
|
||||
name: str
|
||||
name_en: str
|
||||
established: str
|
||||
years_in_business: int
|
||||
staff_count: int
|
||||
lead_doctor: LeadDoctor
|
||||
overall_rating: float
|
||||
total_reviews: int
|
||||
price_range: PriceRange
|
||||
certifications: list[str]
|
||||
media_appearances: list[str]
|
||||
medical_tourism: list[str]
|
||||
location: str
|
||||
nearest_station: str
|
||||
phone: str
|
||||
domain: str
|
||||
# _build_clinic_snapshot은 source 데이터 있을 때만 필드 추가 (`if x:` 가드).
|
||||
# 강남언니/홈페이지 수집 누락된 병원에서 required면 ValidationError로 리포트 전체 실패.
|
||||
name: str | None = None
|
||||
name_en: str | None = None
|
||||
staff_count: int | None = None
|
||||
lead_doctor: LeadDoctor | None = None
|
||||
overall_rating: float | None = None
|
||||
total_reviews: int | None = None
|
||||
certifications: list[str] = []
|
||||
location: str | None = None
|
||||
phone: str | None = None
|
||||
domain: str | None = None
|
||||
logo_images: LogoImages | None = None
|
||||
brand_colors: BrandColors | None = None
|
||||
source: DataSource | None = None
|
||||
|
|
@ -131,7 +127,6 @@ class YouTubeAudit(CamelModel):
|
|||
avg_video_length: str
|
||||
upload_frequency: str
|
||||
channel_created_date: str
|
||||
subscriber_rank: str
|
||||
channel_description: str
|
||||
linked_urls: list[LinkedUrl]
|
||||
playlists: list[str]
|
||||
|
|
@ -156,8 +151,8 @@ class InstagramAccount(CamelModel):
|
|||
|
||||
|
||||
class InstagramAudit(CamelModel):
|
||||
accounts: list[InstagramAccount]
|
||||
diagnosis: list[DiagnosisItem]
|
||||
accounts: list[InstagramAccount] = []
|
||||
diagnosis: list[DiagnosisItem] = []
|
||||
|
||||
|
||||
class BrandInconsistencyValue(CamelModel):
|
||||
|
|
@ -188,17 +183,17 @@ class FacebookPage(CamelModel):
|
|||
linked_domain: str
|
||||
reviews: int
|
||||
recent_post_age: str
|
||||
has_whatsapp: bool
|
||||
post_frequency: str | None = None
|
||||
has_whatsapp: bool | None = None
|
||||
post_frequency: str
|
||||
top_content_type: str | None = None
|
||||
engagement: str | None = None
|
||||
engagement: str
|
||||
|
||||
|
||||
class FacebookAudit(CamelModel):
|
||||
pages: list[FacebookPage]
|
||||
diagnosis: list[DiagnosisItem]
|
||||
brand_inconsistencies: list[BrandInconsistency]
|
||||
consolidation_recommendation: str
|
||||
pages: list[FacebookPage] = []
|
||||
diagnosis: list[DiagnosisItem] = []
|
||||
brand_inconsistencies: list[BrandInconsistency] = []
|
||||
consolidation_recommendation: str | None = None
|
||||
|
||||
|
||||
class OtherChannel(CamelModel):
|
||||
|
|
|
|||
|
|
@ -36,9 +36,24 @@ class DataSource(StrEnum):
|
|||
SCRAPE = "scrape"
|
||||
|
||||
|
||||
class SourceType(StrEnum):
|
||||
MAINPAGE = "mainpage"
|
||||
INSTAGRAM = "instagram"
|
||||
FACEBOOK = "facebook"
|
||||
NAVER_BLOG = "naver_blog"
|
||||
YOUTUBE = "youtube"
|
||||
TIKTOK = "tiktok"
|
||||
GANGNAM_UNNI = "gangnam_unni"
|
||||
KAKAOTALK = "kakaotalk"
|
||||
NAVER_CAFE = "naver_cafe"
|
||||
# 부가 수집/분석 (HTML/CSS 재크롤 + Vision 로고 매칭) — 한 raw_info entry 에 brandAssets/channelLogos 같이 보관.
|
||||
BRANDING = "branding"
|
||||
|
||||
|
||||
class Language(StrEnum):
|
||||
KR = "KR"
|
||||
EN = "EN"
|
||||
WW = "WW"
|
||||
|
||||
|
||||
class VideoType(StrEnum):
|
||||
|
|
|
|||
|
|
@ -1,30 +1,28 @@
|
|||
import json
|
||||
import logging
|
||||
from common.db import fetchone, execute, fetch_raw, get_analysis_raw_data, save_analysis_report, get_market_analysis
|
||||
import re
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from common.db.run import update_run_report, update_run_plan, select_run_report_data
|
||||
from common.db.source import select_run_raw_data, select_mainpage_logo_url
|
||||
from common.db.market import select_market
|
||||
from integrations.llm.llm_service import LLMService
|
||||
from integrations.llm.prompt import report_prompt, plan_prompt
|
||||
from integrations.llm.schemas.report import ReportOutput
|
||||
from services.instagram_audit import build_instagram_accounts
|
||||
from services.facebook_audit import build_facebook_pages
|
||||
from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt
|
||||
from integrations.llm.schemas.report import ReportOutput, ClinicSnapshot, YouTubeAudit
|
||||
from services.branding import analyze_branding
|
||||
from services.instagram_audit import build_instagram_audit
|
||||
from services.facebook_audit import build_facebook_audit
|
||||
from services.kpi_dashboard import build_kpi_dashboard
|
||||
from integrations.llm.schemas.plan import PlanOutput
|
||||
from models.status import AnalysisStatus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def generate_report(analysis_run_id: str) -> ReportOutput:
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
clinic_row = await fetchone(
|
||||
"SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(run["hospital_id"],),
|
||||
)
|
||||
raw_data = clinic_row["raw_data"] if clinic_row else None
|
||||
clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
|
||||
raw = await get_analysis_raw_data(analysis_run_id)
|
||||
market = await get_market_analysis(analysis_run_id)
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
clinic = raw.get("mainpage") or {}
|
||||
branding = raw.get("branding") or {}
|
||||
market = await select_market(analysis_run_id)
|
||||
|
||||
def _json(v) -> str | None:
|
||||
return json.dumps(v, ensure_ascii=False) if v else None
|
||||
|
|
@ -41,37 +39,36 @@ async def generate_report(analysis_run_id: str) -> ReportOutput:
|
|||
"market_keywords": _json(market.get("keywords")),
|
||||
"market_trend": _json(market.get("trend")),
|
||||
"market_target_audience": _json(market.get("target_audience")),
|
||||
# firecrawl 이 mainpage 에서 뽑은 branding 메타(logoUrl/ogImage/faviconUrl) + Vision/CSS 산출물
|
||||
"branding": _json(clinic.get("branding")),
|
||||
"brand_assets": _json(clinic.get("brandAssets")),
|
||||
"tiktok": _json(clinic.get("tiktok")),
|
||||
"instagram_en": _json(clinic.get("instagramEn")),
|
||||
"facebook_en": _json(clinic.get("facebookEn")),
|
||||
"kakao_talk": _json(clinic.get("kakaoTalk")),
|
||||
"naver_cafe": _json(clinic.get("naverCafe")),
|
||||
"channel_logos": _json(clinic.get("channelLogos")),
|
||||
"brand_assets": _json(branding.get("brandAssets")),
|
||||
"channel_logos": _json(branding.get("channelLogos")),
|
||||
# 부가 채널 (raw_info entry) — raw dict 의 한국식 key 그대로
|
||||
"tiktok": _json(raw.get("tiktok")),
|
||||
"instagram_en": _json(raw.get("instagram_en")),
|
||||
"facebook_en": _json(raw.get("facebook_en")),
|
||||
"kakao_talk": _json(raw.get("kakaotalk")),
|
||||
"naver_cafe": _json(raw.get("naver_cafe")),
|
||||
# 메인 5채널은 raw dict 그대로 펼쳐서 prompt placeholder 와 매칭
|
||||
**{
|
||||
channel: _json(data)
|
||||
for channel, data in raw.items()
|
||||
source_type: _json(data)
|
||||
for source_type, data in raw.items()
|
||||
if source_type not in {
|
||||
"mainpage", "branding",
|
||||
"tiktok", "instagram_en", "facebook_en", "kakaotalk", "naver_cafe",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
return await LLMService(provider="perplexity").generate(report_prompt, input_data)
|
||||
|
||||
|
||||
async def generate_plan(analysis_run_id: str) -> PlanOutput:
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id, report_data FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
clinic_row = await fetchone(
|
||||
"SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(run["hospital_id"],),
|
||||
)
|
||||
raw_data = clinic_row["raw_data"] if clinic_row else None
|
||||
clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
|
||||
report_data = run["report_data"]
|
||||
report = json.loads(report_data) if isinstance(report_data, str) else report_data
|
||||
market = await get_market_analysis(analysis_run_id)
|
||||
raw = await get_analysis_raw_data(analysis_run_id)
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
clinic = raw.get("mainpage") or {}
|
||||
branding = raw.get("branding") or {}
|
||||
report = await select_run_report_data(analysis_run_id)
|
||||
market = await select_market(analysis_run_id)
|
||||
|
||||
def _json(v) -> str | None:
|
||||
return json.dumps(v, ensure_ascii=False) if v else None
|
||||
|
|
@ -89,56 +86,28 @@ async def generate_plan(analysis_run_id: str) -> PlanOutput:
|
|||
"market_keywords": _json(market.get("keywords")),
|
||||
"market_trend": _json(market.get("trend")),
|
||||
"market_target_audience": _json(market.get("target_audience")),
|
||||
"tiktok": _json(clinic.get("tiktok")),
|
||||
"instagram_en": _json(clinic.get("instagramEn")),
|
||||
"facebook_en": _json(clinic.get("facebookEn")),
|
||||
"tiktok": _json(raw.get("tiktok")),
|
||||
"instagram_en": _json(raw.get("instagram_en")),
|
||||
"facebook_en": _json(raw.get("facebook_en")),
|
||||
"naver_blog": _json(_naver_blog_summary(raw.get("naver_blog"))),
|
||||
"channel_logos": _json(clinic.get("channelLogos")),
|
||||
"brand_assets": _json(clinic.get("brandAssets")),
|
||||
"naver_cafe": _json(raw.get("naver_cafe")),
|
||||
"kakao_talk": _json(raw.get("kakaotalk")),
|
||||
"channel_logos": _json(branding.get("channelLogos")),
|
||||
"brand_assets": _json(branding.get("brandAssets")),
|
||||
}
|
||||
|
||||
return await LLMService(provider="perplexity").generate(plan_prompt, input_data)
|
||||
|
||||
|
||||
def _naver_blog_summary(blog: dict | None) -> dict | None:
|
||||
"""plan 카드 한 장에 들어가는 건 전체 포스트 수와 최근 활동 시점뿐. 그 외(본문·링크·제목)는
|
||||
던져봐야 토큰만 늘고 LLM이 무관 정보로 hallucinate 함."""
|
||||
if not blog:
|
||||
return None
|
||||
posts = blog.get("posts") or []
|
||||
return {
|
||||
"totalPosts": blog.get("totalResults"),
|
||||
"latestPostDate": posts[0].get("postDate") if posts else None,
|
||||
}
|
||||
|
||||
|
||||
async def _build_overrides(analysis_run_id: str) -> dict:
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id, instagram_data_id, facebook_data_id,"
|
||||
" naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
|
||||
" FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
if not run:
|
||||
return {}
|
||||
|
||||
hospital_row = await fetchone(
|
||||
"SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(run["hospital_id"],),
|
||||
)
|
||||
hospital = json.loads(hospital_row["raw_data"]) if hospital_row and isinstance(hospital_row.get("raw_data"), str) else (hospital_row or {}).get("raw_data") or {}
|
||||
instagram = await fetch_raw("instagram_data", run["instagram_data_id"]) or {}
|
||||
facebook = await fetch_raw("facebook_data", run["facebook_data_id"]) or {}
|
||||
naver_blog = await fetch_raw("naver_blog_data", run["naver_blog_data_id"]) or {}
|
||||
youtube = await fetch_raw("youtube_data", run["youtube_data_id"]) or {}
|
||||
gangnam_unni = await fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]) or {}
|
||||
|
||||
def _build_clinic_snapshot(gangnam_unni: dict, mainpage: dict, brand_assets: dict, logo_url: str | None) -> dict:
|
||||
snapshot: dict = {}
|
||||
|
||||
# ── gangnam_unni ──────────────────────────────────────────────────────────
|
||||
doctors = gangnam_unni.get("doctors", [])
|
||||
lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None
|
||||
if gangnam_unni.get("name"): snapshot["name"] = gangnam_unni["name"]
|
||||
if mainpage.get("clinicNameEn"): snapshot["name_en"] = mainpage["clinicNameEn"]
|
||||
if mainpage.get("phone"): snapshot["phone"] = mainpage["phone"]
|
||||
domain = mainpage.get("domain") or urlparse(mainpage.get("sourceUrl") or "").netloc
|
||||
if domain: snapshot["domain"] = domain
|
||||
if gangnam_unni.get("rating"): snapshot["overall_rating"] = gangnam_unni["rating"]
|
||||
if gangnam_unni.get("totalReviews"): snapshot["total_reviews"] = gangnam_unni["totalReviews"]
|
||||
if gangnam_unni.get("address"): snapshot["location"] = gangnam_unni["address"]
|
||||
|
|
@ -151,17 +120,98 @@ async def _build_overrides(analysis_run_id: str) -> dict:
|
|||
"rating": lead.get("rating"),
|
||||
"review_count": lead.get("reviews"),
|
||||
}
|
||||
# logo URL 은 raw_info.logo_url 컬럼에서, brand_colors 는 JSON 에서 강제 주입. LLM 의 null 처리 차단.
|
||||
if logo_url:
|
||||
snapshot["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
|
||||
if brand_assets.get("brand_colors"): snapshot["brand_colors"] = brand_assets["brand_colors"]
|
||||
return ClinicSnapshot.model_validate(snapshot).model_dump()
|
||||
|
||||
# ── instagram (KR·EN 계정을 코드에서 구성 → LLM 출력 무시하고 교체) ──────────────
|
||||
ig_patch = build_instagram_accounts(
|
||||
instagram, hospital.get("instagramEn") or {}, hospital.get("channelLogos") or {},
|
||||
|
||||
def _naver_blog_summary(blog: dict | None) -> dict | None:
|
||||
"""plan 카드 한 장에 들어가는 건 전체 포스트 수와 최근 활동 시점뿐. 그 외(본문·링크·제목)는
|
||||
던져봐야 토큰만 늘고 LLM 이 무관 정보로 hallucinate 함."""
|
||||
if not blog:
|
||||
return None
|
||||
posts = blog.get("posts") or []
|
||||
return {
|
||||
"totalPosts": blog.get("totalResults"),
|
||||
"latestPostDate": posts[0].get("postDate") if posts else None,
|
||||
}
|
||||
|
||||
|
||||
def _parse_iso_duration_seconds(iso: str) -> int:
|
||||
m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "")
|
||||
if not m:
|
||||
return 0
|
||||
h, mins, s = (int(x or 0) for x in m.groups())
|
||||
return h * 3600 + mins * 60 + s
|
||||
|
||||
|
||||
def _format_seconds(seconds: int) -> str:
|
||||
m, s = divmod(seconds, 60)
|
||||
h, m = divmod(m, 60)
|
||||
return f"{h}시간 {m}분" if h else f"{m}분 {s}초"
|
||||
|
||||
|
||||
def _format_clock(seconds: int) -> str:
|
||||
m, s = divmod(seconds, 60)
|
||||
h, m = divmod(m, 60)
|
||||
return f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
|
||||
|
||||
|
||||
def _calc_avg_video_length(videos: list[dict]) -> str:
|
||||
durations = [_parse_iso_duration_seconds(v.get("duration", "")) for v in videos]
|
||||
durations = [d for d in durations if d > 0]
|
||||
if not durations:
|
||||
return ""
|
||||
return _format_seconds(sum(durations) // len(durations))
|
||||
|
||||
|
||||
def _relative_date(date_str: str) -> str:
|
||||
if not date_str:
|
||||
return ""
|
||||
try:
|
||||
past = datetime.fromisoformat(date_str[:10])
|
||||
except ValueError:
|
||||
return ""
|
||||
days = (datetime.now() - past).days
|
||||
if days < 1:
|
||||
return "오늘"
|
||||
if days < 30:
|
||||
return f"{days}일 전"
|
||||
if days < 365:
|
||||
return f"{days // 30}개월 전"
|
||||
return f"{days // 365}년 전"
|
||||
|
||||
|
||||
def _calc_upload_frequency(videos: list[dict]) -> str:
|
||||
dates = sorted(
|
||||
[v["date"][:10] for v in videos if v.get("date")],
|
||||
reverse=True,
|
||||
)
|
||||
if len(dates) < 2:
|
||||
return ""
|
||||
gaps = [
|
||||
(datetime.fromisoformat(dates[i]) - datetime.fromisoformat(dates[i + 1])).days
|
||||
for i in range(len(dates) - 1)
|
||||
]
|
||||
avg_days = sum(gaps) // len(gaps)
|
||||
if avg_days <= 7:
|
||||
return f"주 {7 // max(avg_days, 1)}회"
|
||||
if avg_days <= 30:
|
||||
return f"월 {30 // avg_days}회"
|
||||
return f"{avg_days}일에 1회"
|
||||
|
||||
# ── facebook (KR=facebook_data, EN=hospital.facebookEn 둘 다 코드 산출, [KR, EN] 순서) ──
|
||||
fb_pages = build_facebook_pages(facebook, hospital.get("facebookEn") or {})
|
||||
|
||||
# ── youtube ───────────────────────────────────────────────────────────────
|
||||
yt_patch: dict = {}
|
||||
async def _build_youtube_audit(youtube: dict) -> dict:
|
||||
videos = youtube.get("videos", [])
|
||||
yt_patch: dict = {
|
||||
"weekly_view_growth": {"absolute": 0, "percentage": 0.0},
|
||||
"estimated_monthly_revenue": {"min": 0, "max": 0},
|
||||
"linked_urls": [],
|
||||
"avg_video_length": _calc_avg_video_length(videos),
|
||||
"upload_frequency": _calc_upload_frequency(videos),
|
||||
}
|
||||
if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"]
|
||||
if youtube.get("handle"): yt_patch["handle"] = youtube["handle"]
|
||||
if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"]
|
||||
|
|
@ -169,60 +219,104 @@ async def _build_overrides(analysis_run_id: str) -> dict:
|
|||
if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"]
|
||||
if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10]
|
||||
if youtube.get("description"): yt_patch["channel_description"] = youtube["description"]
|
||||
if youtube.get("videos"):
|
||||
if youtube.get("playlists"): yt_patch["playlists"] = youtube["playlists"]
|
||||
if videos:
|
||||
yt_patch["top_videos"] = [
|
||||
{
|
||||
"title": v["title"],
|
||||
"views": v["views"],
|
||||
"duration": v.get("duration"),
|
||||
"duration": _format_clock(_parse_iso_duration_seconds(v.get("duration", ""))),
|
||||
"type": "Short" if "M" not in v.get("duration", "") else "Long",
|
||||
"uploaded_ago": v.get("date", "")[:10],
|
||||
"uploaded_ago": _relative_date(v.get("date", "")),
|
||||
}
|
||||
for v in youtube["videos"]
|
||||
for v in videos
|
||||
]
|
||||
|
||||
overrides: dict = {}
|
||||
if snapshot:
|
||||
overrides["clinic_snapshot"] = snapshot
|
||||
if ig_patch:
|
||||
overrides["instagram_audit"] = {"accounts": ig_patch}
|
||||
if fb_pages:
|
||||
overrides["facebook_audit"] = {"pages": fb_pages}
|
||||
if yt_patch:
|
||||
overrides["youtube_audit"] = yt_patch
|
||||
return overrides
|
||||
diagnosis_result = await LLMService(provider="perplexity").generate(
|
||||
youtube_diagnosis_prompt,
|
||||
{
|
||||
"channel_name": yt_patch.get("channel_name"),
|
||||
"subscribers": yt_patch.get("subscribers"),
|
||||
"total_videos": yt_patch.get("total_videos"),
|
||||
"total_views": yt_patch.get("total_views"),
|
||||
"avg_video_length": yt_patch.get("avg_video_length"),
|
||||
"upload_frequency": yt_patch.get("upload_frequency"),
|
||||
"top_videos": json.dumps(yt_patch.get("top_videos", []), ensure_ascii=False),
|
||||
"playlists": json.dumps(yt_patch.get("playlists", []), ensure_ascii=False),
|
||||
},
|
||||
)
|
||||
yt_patch["diagnosis"] = [item.model_dump() for item in diagnosis_result.diagnosis]
|
||||
|
||||
return YouTubeAudit.model_validate(yt_patch).model_dump()
|
||||
|
||||
|
||||
def _deep_merge(base: dict, overrides: dict) -> dict:
|
||||
"""dict 끼리 만나면 재귀로 안쪽까지 합치고, 그 외(list/scalar/None) 는 override 값으로 통째 치환."""
|
||||
for k, v in overrides.items():
|
||||
if isinstance(v, dict) and isinstance(base.get(k), dict):
|
||||
_deep_merge(base[k], v)
|
||||
elif isinstance(v, list) and isinstance(base.get(k), list):
|
||||
for i, item in enumerate(v):
|
||||
if i < len(base[k]) and isinstance(item, dict) and isinstance(base[k][i], dict):
|
||||
_deep_merge(base[k][i], item)
|
||||
else:
|
||||
base[k] = v
|
||||
return base
|
||||
|
||||
def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
|
||||
|
||||
async def _build_overrides(analysis_run_id: str, result: ReportOutput) -> ReportOutput:
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
if not raw:
|
||||
return result
|
||||
|
||||
mainpage = raw.get("mainpage", {}) or {}
|
||||
branding = raw.get("branding", {}) or {}
|
||||
instagram = raw.get("instagram", {}) or {}
|
||||
facebook = raw.get("facebook", {}) or {}
|
||||
youtube = raw.get("youtube", {}) or {}
|
||||
gangnam_unni = raw.get("gangnam_unni", {}) or {}
|
||||
naver_blog = raw.get("naver_blog", {}) or {}
|
||||
instagram_en = raw.get("instagram_en", {}) or {}
|
||||
facebook_en = raw.get("facebook_en", {}) or {}
|
||||
tiktok = raw.get("tiktok", {}) or {}
|
||||
naver_cafe = raw.get("naver_cafe", {}) or {}
|
||||
brand_assets = branding.get("brandAssets") or {}
|
||||
channel_logos = branding.get("channelLogos") or {}
|
||||
logo_url = await select_mainpage_logo_url(analysis_run_id)
|
||||
|
||||
llm_fb_pages = result.model_dump().get("facebook_audit", {}).get("pages", [])
|
||||
|
||||
snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage, brand_assets, logo_url)
|
||||
yt_patch: dict = await _build_youtube_audit(youtube)
|
||||
ig_patch = build_instagram_audit(instagram, instagram_en, channel_logos)
|
||||
fb_patch = build_facebook_audit(facebook, facebook_en, llm_fb_pages)
|
||||
kpi_extras = {
|
||||
"instagramEn": instagram_en,
|
||||
"facebookEn": facebook_en,
|
||||
"tiktok": tiktok,
|
||||
"naverCafe": naver_cafe,
|
||||
}
|
||||
kpi = build_kpi_dashboard(instagram, facebook, youtube, gangnam_unni, kpi_extras, naver_blog)
|
||||
|
||||
overrides: dict = {}
|
||||
if snapshot: overrides["clinic_snapshot"] = snapshot
|
||||
if ig_patch: overrides["instagram_audit"] = ig_patch
|
||||
if fb_patch: overrides["facebook_audit"] = fb_patch
|
||||
if yt_patch: overrides["youtube_audit"] = yt_patch
|
||||
if kpi: overrides["kpi_dashboard"] = kpi
|
||||
|
||||
merged = _deep_merge(result.model_dump(), overrides)
|
||||
# 인스타 계정은 프롬프트에서 LLM이 []로 두게 했고, 코드가 수집 데이터로 채운다 (데이터 없으면 빈 리스트)
|
||||
merged.setdefault("instagram_audit", {})["accounts"] = (overrides.get("instagram_audit") or {}).get("accounts") or []
|
||||
return ReportOutput(**merged)
|
||||
|
||||
|
||||
async def run_report_task(analysis_run_id: str) -> None:
|
||||
logger.info("[report] start run=%s", analysis_run_id)
|
||||
await analyze_branding(analysis_run_id)
|
||||
result = await generate_report(analysis_run_id)
|
||||
result = _patch_report(result, await _build_overrides(analysis_run_id))
|
||||
await save_analysis_report(analysis_run_id, result.model_dump())
|
||||
result = await _build_overrides(analysis_run_id, result)
|
||||
await update_run_report(analysis_run_id, result.model_dump())
|
||||
logger.info("[report] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:
|
||||
"""brand_guide.channel_branding[].profile_photo 는 LLM 안 맡기고 코드가 박는다
|
||||
(모든 채널 동일값 = brand_assets.logo_description). LLM이 fallback 문구 hallucinate 방지."""
|
||||
(모든 채널 동일값 = brand_assets.logo_description). LLM 이 fallback 문구 hallucinate 방지."""
|
||||
p = result.model_dump()
|
||||
for ch in (p.get("brand_guide") or {}).get("channel_branding") or []:
|
||||
ch["profile_photo"] = logo_desc
|
||||
|
|
@ -232,15 +326,10 @@ def _patch_plan(result: PlanOutput, logo_desc: str) -> PlanOutput:
|
|||
async def run_plan_task(analysis_run_id: str) -> None:
|
||||
logger.info("[plan] start run=%s", analysis_run_id)
|
||||
result = await generate_plan(analysis_run_id)
|
||||
# profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단)
|
||||
run = await fetchone("SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,))
|
||||
if run:
|
||||
hr = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (run["hospital_id"],))
|
||||
h = json.loads(hr["raw_data"]) if hr and isinstance(hr.get("raw_data"), str) else (hr or {}).get("raw_data") or {}
|
||||
logo_desc = ((h.get("brandAssets") or {}).get("logo_description")) or ""
|
||||
# profile_photo 는 brand_assets.logo_description 으로 코드가 박음 (LLM "(가이드 미보유)" 같은 hallucination 차단).
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
branding = raw.get("branding") or {}
|
||||
logo_desc = ((branding.get("brandAssets") or {}).get("logo_description")) or ""
|
||||
result = _patch_plan(result, logo_desc)
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET plan_data = %s WHERE analysis_run_id = %s",
|
||||
(json.dumps(result.model_dump(), ensure_ascii=False), analysis_run_id),
|
||||
)
|
||||
await update_run_plan(analysis_run_id, result.model_dump())
|
||||
logger.info("[plan] done run=%s", analysis_run_id)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,172 @@
|
|||
"""collect 단계 - HTML/CSS 텍스트에서 brand 로고 URL + 색상 추출"""
|
||||
import logging
|
||||
import re
|
||||
from collections import Counter
|
||||
from urllib.parse import urljoin
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── 로고 URL 추출 ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
LOGO_IMG_PATTERNS = [
|
||||
re.compile(r'<img[^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
re.compile(r'<img[^>]*\bsrc=["\']([^"\']+)["\'][^>]*\bclass=["\'][^"\']*\blogo\b[^"\']*["\']', re.IGNORECASE),
|
||||
re.compile(r'<img[^>]*\bid=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
re.compile(r'<img[^>]*\balt=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
re.compile(r'<(?:a|h[1-6]|div|span)[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+)["\']', re.IGNORECASE | re.DOTALL),
|
||||
re.compile(r'<(?:a|div|span|h[1-6])[^>]*\b(?:class|id)=["\'][^"\']*\blogo\b[^"\']*["\'][^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)', re.IGNORECASE),
|
||||
re.compile(r'<(?:a|div|span|h[1-6])[^>]*\bstyle=["\'][^"\']*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)[^"\']*["\'][^>]*\b(?:class|id)=["\'][^"\']*\blogo\b', re.IGNORECASE),
|
||||
re.compile(r'<img[^>]*\bsrc=["\']([^"\']*\blogo\b[^"\']*\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE),
|
||||
re.compile(r'<header\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
|
||||
re.compile(r'<nav\b[^>]*>(?:[^<]|<(?!img))*<img[^>]*\bsrc=["\']([^"\']+\.(?:png|svg|jpe?g|webp)[^"\']*)["\']', re.IGNORECASE | re.DOTALL),
|
||||
re.compile(r'<meta[^>]*\bproperty=["\']og:image["\'][^>]*\bcontent=["\']([^"\']+)["\']', re.IGNORECASE),
|
||||
re.compile(r'<meta[^>]*\bcontent=["\']([^"\']+)["\'][^>]*\bproperty=["\']og:image["\']', re.IGNORECASE),
|
||||
]
|
||||
|
||||
LOGO_CSS_PATTERN = re.compile(
|
||||
r'\.[\w-]*\blogo\b[\w-]*\s*(?:,\s*\.[\w-]+\s*)*\{[^}]*background(?:-image)?\s*:\s*url\(\s*["\']?([^"\')\s]+)',
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def find_logo_url_in_html(html: str, base_url: str, css_texts: list[str] | None = None) -> str | None:
|
||||
"""HTML 에서 logo URL 찾기. 우선순위: 1) class/id/alt 명시 img 2) 외부 CSS .logo bg 3) header/nav 첫 img."""
|
||||
|
||||
def _is_noise(src: str) -> bool:
|
||||
if not src or src.startswith("data:"):
|
||||
return True
|
||||
if re.search(r"(blank|spacer|pixel|transparent|1x1)\b", src, re.IGNORECASE):
|
||||
return True
|
||||
if re.search(r"(lang[-_]?(kor|eng|chn|jpn|rus|jp|en|ko|cn|ar|in)|flag|country|icon-|btn-|arrow|prev|next|search)\b", src, re.IGNORECASE):
|
||||
return True
|
||||
return False
|
||||
|
||||
for pat in LOGO_IMG_PATTERNS[:8]:
|
||||
for m in pat.finditer(html):
|
||||
src = m.group(1)
|
||||
if _is_noise(src):
|
||||
continue
|
||||
return urljoin(base_url, src)
|
||||
|
||||
for css in (css_texts or []):
|
||||
m = LOGO_CSS_PATTERN.search(css)
|
||||
if m:
|
||||
src = m.group(1)
|
||||
if not _is_noise(src):
|
||||
return urljoin(base_url, src)
|
||||
|
||||
for pat in LOGO_IMG_PATTERNS[8:]:
|
||||
for m in pat.finditer(html):
|
||||
src = m.group(1)
|
||||
if _is_noise(src):
|
||||
continue
|
||||
return urljoin(base_url, src)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── 색상 추출 ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
HEX6 = re.compile(r"#([0-9a-fA-F]{6})\b")
|
||||
HEX3 = re.compile(r"#([0-9a-fA-F]{3})\b(?![0-9a-fA-F])")
|
||||
RGB = re.compile(r"rgba?\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*(?:,\s*[\d.]+\s*)?\)")
|
||||
STYLE_BLOCK = re.compile(r"<style[^>]*>(.*?)</style>", re.IGNORECASE | re.DOTALL)
|
||||
|
||||
NOISE = {
|
||||
"#ffffff", "#000000", "#fff", "#000",
|
||||
"#333", "#222", "#111", "#444", "#555", "#666", "#777", "#888", "#999",
|
||||
"#aaa", "#bbb", "#ccc", "#ddd", "#eee", "#f0f0f0", "#f5f5f5", "#fafafa",
|
||||
}
|
||||
|
||||
|
||||
def _normalize(hex_str: str) -> str:
|
||||
h = hex_str.lstrip("#").lower()
|
||||
if len(h) == 3:
|
||||
h = "".join(c * 2 for c in h)
|
||||
if len(h) == 8:
|
||||
h = h[:6]
|
||||
return f"#{h}"
|
||||
|
||||
|
||||
def _rgb_to_hex(r: int, g: int, b: int) -> str:
|
||||
return f"#{r:02x}{g:02x}{b:02x}"
|
||||
|
||||
|
||||
def _hex_to_rgb(h: str) -> tuple[int, int, int]:
|
||||
h = h.lstrip("#")
|
||||
return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
|
||||
|
||||
|
||||
def _distance(a: str, b: str) -> float:
|
||||
ar, ag, ab = _hex_to_rgb(a)
|
||||
br, bg, bb = _hex_to_rgb(b)
|
||||
return ((ar - br) ** 2 + (ag - bg) ** 2 + (ab - bb) ** 2) ** 0.5
|
||||
|
||||
|
||||
def _is_grayscale(h: str, tol: int = 12) -> bool:
|
||||
r, g, b = _hex_to_rgb(h)
|
||||
return max(r, g, b) - min(r, g, b) < tol
|
||||
|
||||
|
||||
def _extract_hex(text: str) -> list[str]:
|
||||
out: list[str] = []
|
||||
out.extend(_normalize(m.group(0)) for m in HEX6.finditer(text))
|
||||
out.extend(_normalize(m.group(0)) for m in HEX3.finditer(text))
|
||||
for m in RGB.finditer(text):
|
||||
r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3))
|
||||
if 0 <= r <= 255 and 0 <= g <= 255 and 0 <= b <= 255:
|
||||
out.append(_rgb_to_hex(r, g, b))
|
||||
return out
|
||||
|
||||
|
||||
def _cluster(colors: Counter, threshold: float = 25.0) -> list[tuple[str, int]]:
|
||||
ranked = colors.most_common()
|
||||
clusters: list[tuple[str, int]] = []
|
||||
for color, count in ranked:
|
||||
merged = False
|
||||
for i, (rep, rep_count) in enumerate(clusters):
|
||||
if _distance(color, rep) < threshold:
|
||||
clusters[i] = (rep, rep_count + count)
|
||||
merged = True
|
||||
break
|
||||
if not merged:
|
||||
clusters.append((color, count))
|
||||
return clusters
|
||||
|
||||
|
||||
def extract_brand_colors_from_text(html: str, css_texts: list[str], source_url: str = "") -> dict:
|
||||
"""HTML + CSS 텍스트에서 hex 빈도 분석 → primary/accent/text + palette. (fetch 없음)"""
|
||||
all_text_chunks: list[str] = list(STYLE_BLOCK.findall(html))
|
||||
all_text_chunks.append(html)
|
||||
all_text_chunks.extend(css_texts)
|
||||
|
||||
counter: Counter = Counter()
|
||||
for text in all_text_chunks:
|
||||
for color in _extract_hex(text):
|
||||
if color in NOISE:
|
||||
continue
|
||||
counter[color] += 1
|
||||
|
||||
if not counter:
|
||||
logger.info("[brand_parser] no colors extracted from %s", source_url)
|
||||
return {}
|
||||
|
||||
clustered = _cluster(counter)
|
||||
chromatic = [c for c, _ in clustered if not _is_grayscale(c)]
|
||||
grayscale = [c for c, _ in clustered if _is_grayscale(c)]
|
||||
|
||||
palette_top = clustered[:8]
|
||||
palette = [{"name": f"색상 {i+1}", "hex": h, "usage": f"빈도 {n}"} for i, (h, n) in enumerate(palette_top)]
|
||||
|
||||
return {
|
||||
"brand_colors": {
|
||||
"primary": chromatic[0] if chromatic else None,
|
||||
"accent": chromatic[1] if len(chromatic) > 1 else None,
|
||||
"text": grayscale[0] if grayscale else None,
|
||||
},
|
||||
"color_palette": palette,
|
||||
"extracted_from": "html+css",
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
"""report 단계 - Gemini Vision 으로 로고 묘사 + 채널 로고 매칭."""
|
||||
import logging
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from common.db.source import (
|
||||
select_run_raw_data, update_raw_info_merge,
|
||||
select_branding_info_id, select_mainpage_logo_url,
|
||||
)
|
||||
from common.utils import _run_optional_step
|
||||
from integrations.llm.gemini_vision import VisionClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _describe_logo(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
|
||||
"""공식 로고 정성 묘사. branding raw_info["brandAssets"] 머지.
|
||||
호출 우선순위: raw_info.logo_url 컬럼 (HTML parser canonical) → firecrawl 메타 fallback."""
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
mainpage = raw.get("mainpage") or {}
|
||||
homepage_url = mainpage.get("sourceUrl") or ""
|
||||
branding_meta = mainpage.get("branding") or {}
|
||||
column_logo = await select_mainpage_logo_url(analysis_run_id)
|
||||
candidates = [u for u in [
|
||||
column_logo,
|
||||
branding_meta.get("logoUrl"),
|
||||
branding_meta.get("faviconUrl"),
|
||||
] if u]
|
||||
if homepage_url:
|
||||
parsed = urlparse(homepage_url)
|
||||
if parsed.scheme and parsed.netloc:
|
||||
candidates.append(f"{parsed.scheme}://{parsed.netloc}/favicon.ico")
|
||||
if not candidates:
|
||||
logger.info("[brand_logo] skip — no candidates")
|
||||
return
|
||||
logger.info("[brand_logo] start run=%s candidates=%d", analysis_run_id, len(candidates))
|
||||
result: dict = {}
|
||||
for cand in candidates:
|
||||
result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url)
|
||||
if result:
|
||||
break
|
||||
if result:
|
||||
await update_raw_info_merge(info_id, {"brandAssets": result})
|
||||
logger.info("[brand_logo] done keys=%s", list(result.keys()) if result else None)
|
||||
|
||||
|
||||
async def _describe_channel_logos(analysis_run_id: str, info_id: int, vc: VisionClient) -> None:
|
||||
"""채널 프로필 로고를 공식 로고와 비교. branding raw_info["channelLogos"] 머지."""
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
official = await select_mainpage_logo_url(analysis_run_id)
|
||||
_label = {
|
||||
"instagram": "Instagram",
|
||||
"facebook": "Facebook",
|
||||
"youtube": "YouTube",
|
||||
"instagram_en": "Instagram EN",
|
||||
"facebook_en": "Facebook EN",
|
||||
"tiktok": "TikTok",
|
||||
}
|
||||
logos = [{"channel": label, "url": img}
|
||||
for key, label in _label.items()
|
||||
if (img := (raw.get(key) or {}).get("_logo_url"))]
|
||||
if not logos:
|
||||
logger.info("[channel_logos] skip — no channel profileImages")
|
||||
return
|
||||
logger.info("[channel_logos] start run=%s channels=%s official=%s",
|
||||
analysis_run_id, [l["channel"] for l in logos], bool(official))
|
||||
result = await vc.describe_channel_logos(official, logos)
|
||||
if result:
|
||||
await update_raw_info_merge(info_id, {"channelLogos": result})
|
||||
logger.info("[channel_logos] done keys=%s", list(result.keys()) if result else None)
|
||||
|
||||
|
||||
async def analyze_branding(analysis_run_id: str) -> None:
|
||||
"""report build 직전 호출 — 로고 묘사 + 채널 로고 매칭 (Gemini). 둘 다 격리."""
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
logger.info("[branding] skip — GEMINI_API_KEY 없음")
|
||||
return
|
||||
branding_info_id = await select_branding_info_id(analysis_run_id)
|
||||
if branding_info_id is None:
|
||||
logger.info("[branding] skip — branding source 없음 run=%s", analysis_run_id)
|
||||
return
|
||||
vc = VisionClient(api_key)
|
||||
logger.info("[branding] start run=%s", analysis_run_id)
|
||||
await _run_optional_step(_describe_logo(analysis_run_id, branding_info_id, vc), "brand_logo")
|
||||
await _run_optional_step(_describe_channel_logos(analysis_run_id, branding_info_id, vc), "channel_logos")
|
||||
logger.info("[branding] done run=%s", analysis_run_id)
|
||||
|
|
@ -1,120 +1,199 @@
|
|||
import asyncio
|
||||
import logging
|
||||
from common.db import (
|
||||
fetchone,
|
||||
set_instagram_status, save_instagram_raw_data,
|
||||
set_facebook_status, save_facebook_raw_data,
|
||||
set_naver_blog_status, save_naver_blog_raw_data,
|
||||
set_youtube_status, save_youtube_raw_data,
|
||||
set_gangnam_unni_status, save_gangnam_unni_raw_data,
|
||||
execute, save_hospital_raw_data,
|
||||
)
|
||||
from common.db.hospital import update_hospital_status, update_hospital
|
||||
from common.db.source import select_run_sources, update_raw_info_status, update_raw_info
|
||||
from common.utils import get_env, _run_optional_step
|
||||
from integrations.apify import ApifyClient
|
||||
from integrations.naver import NaverClient
|
||||
from integrations.youtube import YouTubeClient
|
||||
from integrations.firecrawl import FirecrawlClient
|
||||
from services.enrichment import collect_brand_assets, collect_extra_channels, collect_channel_logos
|
||||
from models.status import SourceType
|
||||
from integrations.site_fetcher import fetch_html_and_css
|
||||
from services.brand_parser import find_logo_url_in_html, extract_brand_colors_from_text
|
||||
from common.db.source import update_raw_info_merge, update_raw_info_logo_url, select_run_raw_data
|
||||
from common.db.base import fetchone
|
||||
from services.facebook_audit import transform_for_storage as transform_facebook
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
async def _save_with_logo(info_id: int, data: dict) -> None:
|
||||
await update_raw_info(info_id, data)
|
||||
if data.get("profileImage"):
|
||||
await update_raw_info_logo_url(info_id, data["profileImage"])
|
||||
|
||||
|
||||
async def collect_instagram(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_instagram_status(row_id, "processing")
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_instagram_profile(url)
|
||||
await save_instagram_raw_data(row_id, data)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[instagram] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[instagram] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
async def collect_facebook(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[facebook] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_facebook_status(row_id, "processing")
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_facebook_page(url)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[facebook] failed run=%s", analysis_run_id)
|
||||
return
|
||||
data = transform_facebook(data)
|
||||
await save_facebook_raw_data(row_id, data)
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[facebook] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
async def collect_naver_blog(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[naver_blog] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_naver_blog_status(row_id, "processing")
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_blog_rss(url)
|
||||
await save_naver_blog_raw_data(row_id, data)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[naver_blog] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await update_raw_info(info_id, data)
|
||||
logger.info("[naver_blog] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
async def collect_youtube(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[youtube] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_youtube_status(row_id, "processing")
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).get_channel(url)
|
||||
await save_youtube_raw_data(row_id, data)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[youtube] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[youtube] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
async def collect_gangnam_unni(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[gangnam_unni] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_gangnam_unni_status(row_id, "processing")
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).get_gangnam_unni(url)
|
||||
await save_gangnam_unni_raw_data(row_id, data)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[gangnam_unni] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await update_raw_info(info_id, data)
|
||||
logger.info("[gangnam_unni] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_clinic_info(analysis_run_id: str, hospital_id: str, url: str) -> None:
|
||||
logger.info("[clinic] start run=%s url=%s", analysis_run_id, url)
|
||||
await execute("UPDATE hospital_baseinfo SET status = 'processing' WHERE hospital_id = %s", (hospital_id,))
|
||||
async def collect_mainpage(analysis_run_id: str, info_id: int, hospital_id: str, url: str) -> None:
|
||||
logger.info("[mainpage] start run=%s url=%s", analysis_run_id, url)
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
await update_hospital_status(hospital_id, "processing")
|
||||
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(url)
|
||||
await save_hospital_raw_data(hospital_id, data, analysis_run_id=analysis_run_id)
|
||||
logger.info("[clinic] done run=%s", analysis_run_id)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[mainpage] failed run=%s", analysis_run_id)
|
||||
return
|
||||
# 홈페이지 URL 자체도 raw_data 에 박아둬야 brand_assets / 분석 단계에서 mainpage URL 재조회 없이 사용 가능.
|
||||
data = {**data, "sourceUrl": url}
|
||||
await update_raw_info(info_id, data)
|
||||
await update_hospital(hospital_id, data, analysis_run_id=analysis_run_id)
|
||||
logger.info("[mainpage] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_all(
|
||||
analysis_run_id: str,
|
||||
hospital_id: str,
|
||||
instagram_id: int | None = None,
|
||||
facebook_id: int | None = None,
|
||||
naver_blog_id: int | None = None,
|
||||
youtube_id: int | None = None,
|
||||
gangnam_unni_id: int | None = None,
|
||||
tiktok_url: str | None = None,
|
||||
instagram_en_url: str | None = None,
|
||||
facebook_en_url: str | None = None,
|
||||
kakao_talk_url: str | None = None,
|
||||
naver_cafe_url: str | None = None,
|
||||
) -> None:
|
||||
async def _url(table: str, row_id: int) -> str:
|
||||
row = await fetchone(f"SELECT url FROM {table} WHERE id = %s", (row_id,))
|
||||
return row["url"] if row else ""
|
||||
async def collect_tiktok(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
logger.info("[tiktok] start run=%s url=%s", analysis_run_id, url)
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_tiktok_profile(url)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[tiktok] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await _save_with_logo(info_id, data)
|
||||
logger.info("[tiktok] done run=%s", analysis_run_id)
|
||||
|
||||
hospital = await fetchone("SELECT url FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,))
|
||||
tasks = [collect_clinic_info(analysis_run_id, hospital_id, hospital["url"])]
|
||||
|
||||
if instagram_id:
|
||||
tasks.append(collect_instagram(analysis_run_id, instagram_id, await _url("instagram_data", instagram_id)))
|
||||
if facebook_id:
|
||||
tasks.append(collect_facebook(analysis_run_id, facebook_id, await _url("facebook_data", facebook_id)))
|
||||
if naver_blog_id:
|
||||
tasks.append(collect_naver_blog(analysis_run_id, naver_blog_id, await _url("naver_blog_data", naver_blog_id)))
|
||||
if youtube_id:
|
||||
tasks.append(collect_youtube(analysis_run_id, youtube_id, await _url("youtube_data", youtube_id)))
|
||||
if gangnam_unni_id:
|
||||
tasks.append(collect_gangnam_unni(analysis_run_id, gangnam_unni_id, await _url("gangnam_unni_data", gangnam_unni_id)))
|
||||
async def collect_naver_cafe(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
"""카페는 로그인 필요라 본문 못 봄. URL 활성·cafeId·이름 언급수만 신호로 수집."""
|
||||
logger.info("[naver_cafe] start run=%s url=%s", analysis_run_id, url)
|
||||
await update_raw_info_status(info_id, "processing")
|
||||
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_cafe_info(url)
|
||||
if data is None:
|
||||
await update_raw_info_status(info_id, "failed")
|
||||
logger.warning("[naver_cafe] failed run=%s", analysis_run_id)
|
||||
return
|
||||
await update_raw_info(info_id, data)
|
||||
logger.info("[naver_cafe] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def collect_kakaotalk(analysis_run_id: str, info_id: int, url: str) -> None:
|
||||
"""카카오톡은 수집 X — URL 보관만. LLM이 채널 존재 신호로만 사용."""
|
||||
logger.info("[kakaotalk] url-only run=%s url=%s", analysis_run_id, url)
|
||||
await update_raw_info(info_id, {"url": url})
|
||||
|
||||
|
||||
async def collect_brand_basics(analysis_run_id: str, info_id: int) -> None:
|
||||
logger.info("[brand_basics] start run=%s info=%s", analysis_run_id, info_id)
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
mainpage = raw.get("mainpage") or {}
|
||||
homepage_url = mainpage.get("sourceUrl") or ""
|
||||
branding_meta = mainpage.get("branding") or {}
|
||||
|
||||
html, css_texts = await fetch_html_and_css(homepage_url) if homepage_url else ("", [])
|
||||
html_logo_url = find_logo_url_in_html(html, homepage_url, css_texts) if html else None
|
||||
css_colors = extract_brand_colors_from_text(html, css_texts, homepage_url) if html else {}
|
||||
|
||||
logo_url = html_logo_url or branding_meta.get("logoUrl") or branding_meta.get("ogImage")
|
||||
if logo_url:
|
||||
mainpage_row = await fetchone(
|
||||
"SELECT ri.info_id FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
||||
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage' LIMIT 1",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
if mainpage_row:
|
||||
await update_raw_info_logo_url(mainpage_row["info_id"], logo_url)
|
||||
|
||||
payload: dict = {}
|
||||
if css_colors:
|
||||
if css_colors.get("brand_colors"): payload["brand_colors"] = css_colors["brand_colors"]
|
||||
if css_colors.get("color_palette"): payload["color_palette"] = css_colors["color_palette"]
|
||||
payload["color_source"] = "html+css"
|
||||
if payload:
|
||||
await update_raw_info_merge(info_id, {"brandAssets": payload})
|
||||
logger.info("[brand_basics] done logo_url=%s colors=%s", bool(logo_url), bool(payload))
|
||||
|
||||
|
||||
async def collect_all(analysis_run_id: str, hospital_id: str) -> None:
|
||||
rows = await select_run_sources(analysis_run_id)
|
||||
|
||||
# source_type → collector. KR/EN 구분은 collector 입장에서 동일, language 컬럼만 다름.
|
||||
_collectors = {
|
||||
SourceType.INSTAGRAM: collect_instagram,
|
||||
SourceType.FACEBOOK: collect_facebook,
|
||||
SourceType.NAVER_BLOG: collect_naver_blog,
|
||||
SourceType.YOUTUBE: collect_youtube,
|
||||
SourceType.GANGNAM_UNNI: collect_gangnam_unni,
|
||||
SourceType.TIKTOK: collect_tiktok,
|
||||
SourceType.NAVER_CAFE: collect_naver_cafe,
|
||||
SourceType.KAKAOTALK: collect_kakaotalk,
|
||||
}
|
||||
|
||||
tasks = []
|
||||
branding_info_id: int | None = None
|
||||
for row in rows:
|
||||
info_id = row["info_id"]
|
||||
source_type = row["source_type"]
|
||||
url = row["url"]
|
||||
if source_type == SourceType.BRANDING:
|
||||
branding_info_id = info_id # mainpage·채널 수집 끝난 뒤 2단계에서 사용
|
||||
continue
|
||||
if source_type == SourceType.MAINPAGE:
|
||||
tasks.append(collect_mainpage(analysis_run_id, info_id, hospital_id, url))
|
||||
elif source_type in _collectors:
|
||||
tasks.append(_collectors[source_type](analysis_run_id, info_id, url))
|
||||
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# 아래 3단계는 모두 hospital raw_data를 read-modify-write 하므로 race 방지 위해 순차.
|
||||
# brand_assets : clinic_info가 채운 branding.logoUrl로 공식 로고/hex 추출
|
||||
# extra_channels: 틱톡/인스타EN/페북EN 수집
|
||||
# channel_logos : 공식 로고(brand_assets)+채널 profileImage(extra_channels) 채워진 뒤 Vision 비교
|
||||
# 부가 기능이라 실패해도 리포트는 나와야 하므로 _run_optional_step으로 각각 격리.
|
||||
await _run_optional_step(collect_brand_assets(analysis_run_id, hospital_id), "brand_assets")
|
||||
await _run_optional_step(
|
||||
collect_extra_channels(
|
||||
analysis_run_id, hospital_id,
|
||||
tiktok_url=tiktok_url, instagram_en_url=instagram_en_url, facebook_en_url=facebook_en_url,
|
||||
kakao_talk_url=kakao_talk_url, naver_cafe_url=naver_cafe_url,
|
||||
),
|
||||
"extra_channels",
|
||||
)
|
||||
await _run_optional_step(collect_channel_logos(analysis_run_id, hospital_id), "channel_logos")
|
||||
# 2단계: branding (brandAssets → channelLogos 한 raw_info 안에 머지). mainpage·채널 raw_data 의존이라 순차.
|
||||
# 부가 기능이라 실패해도 리포트는 나와야 하므로 _run_optional_step 으로 격리.
|
||||
if branding_info_id is not None:
|
||||
await _run_optional_step(collect_brand_basics(analysis_run_id, branding_info_id), "brand_basics")
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
from common.db import fetchone, fetch_raw, merge_hospital_raw_data
|
||||
from common.utils import get_env
|
||||
from integrations.apify import ApifyClient
|
||||
from integrations.vision import VisionClient
|
||||
from integrations.color_extractor import extract_brand_assets_from_site
|
||||
from services.facebook_audit import transform_for_storage as transform_facebook
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def collect_brand_assets(analysis_run_id: str, hospital_id: str) -> None:
|
||||
"""홈페이지에서 로고 URL + brand hex 색상을 뽑아 raw_data["brandAssets"]에 저장.
|
||||
- 로고 URL/hex: HTML·CSS 정규식 (color_extractor) — Vision 의존 X, 사이트 전체 컬러 시스템이 더 정확.
|
||||
- 로고 정성 묘사(심볼/워드마크/톤): Gemini Vision (GEMINI_API_KEY 없으면 색상만 저장하고 skip).
|
||||
"""
|
||||
logger.info("[brand_assets] start run=%s", analysis_run_id)
|
||||
row = await fetchone(
|
||||
"SELECT raw_data, url FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
if not row:
|
||||
return
|
||||
raw = row["raw_data"]
|
||||
raw_data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||
branding = raw_data.get("branding") or {}
|
||||
homepage_url = row["url"]
|
||||
|
||||
# 0~1. 사이트 1회 fetch로 logo URL + brand hex 동시 추출 (img/background-image/CSS .logo, Vision 의존 X)
|
||||
site = await extract_brand_assets_from_site(homepage_url) if homepage_url else {}
|
||||
html_logo_url = site.get("logo_url")
|
||||
css_colors = site.get("colors") or {}
|
||||
if html_logo_url:
|
||||
logger.info("[brand_assets] HTML logo found: %s", html_logo_url)
|
||||
if css_colors:
|
||||
logger.info("[brand_assets] css colors: %s", css_colors.get("brand_colors"))
|
||||
|
||||
# 2. 로고/대표 이미지 후보 (logo → og:image → favicon 순)
|
||||
logo_url = html_logo_url or branding.get("logoUrl")
|
||||
og_image = branding.get("ogImage")
|
||||
favicon = branding.get("faviconUrl")
|
||||
candidates: list[tuple[str, str]] = []
|
||||
if logo_url: candidates.append(("logo", logo_url))
|
||||
if og_image: candidates.append(("og", og_image))
|
||||
if favicon: candidates.append(("favicon", favicon))
|
||||
if homepage_url:
|
||||
parsed = urlparse(homepage_url)
|
||||
if parsed.scheme and parsed.netloc:
|
||||
candidates.append(("favicon", f"{parsed.scheme}://{parsed.netloc}/favicon.ico"))
|
||||
|
||||
if not candidates and not css_colors:
|
||||
logger.info("[brand_assets] skip — no logo/og/favicon candidates and no CSS colors")
|
||||
return
|
||||
|
||||
# 3. Vision은 로고 정성 묘사만 (hex는 CSS 추출이 더 정확). 키 없으면 색상만 저장.
|
||||
# Gemini Vision은 SVG 미지원 → SVG URL이 후보로 들어오면 Vision skip하고 URL만 그대로 박음 (묘사 없음).
|
||||
SVG_URL = re.compile(r"\.svg(?:\?|#|$)", re.I)
|
||||
result: dict = {}
|
||||
used_kind: str | None = None
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
if api_key and candidates:
|
||||
vc = VisionClient(api_key)
|
||||
for kind, cand in candidates:
|
||||
if SVG_URL.search(cand):
|
||||
logger.info("[brand_assets] %s URL is SVG — Vision 분석 skip, URL만 보관: %s", kind, cand)
|
||||
result = {"logo_images": {"circle": None, "horizontal": cand, "korean": None}}
|
||||
used_kind = kind
|
||||
break
|
||||
result = await vc.analyze_brand_assets(logo_url=cand, homepage_url=homepage_url)
|
||||
if result:
|
||||
used_kind = kind
|
||||
break
|
||||
# favicon으로만 분석된 경우 진짜 로고가 아니므로 logo URL은 박지 않음 (묘사는 OK)
|
||||
if result and used_kind == "favicon" and result.get("logo_images"):
|
||||
result["logo_images"] = {"circle": None, "horizontal": None, "korean": None}
|
||||
elif not api_key:
|
||||
logger.info("[brand_assets] GEMINI_API_KEY not set — 색상만 저장, Vision 묘사 skip")
|
||||
|
||||
# 4. CSS에서 추출한 brand_colors/palette를 Vision보다 우선 사용
|
||||
if css_colors:
|
||||
if css_colors.get("brand_colors"):
|
||||
result["brand_colors"] = css_colors["brand_colors"]
|
||||
if css_colors.get("color_palette"):
|
||||
result["color_palette"] = css_colors["color_palette"]
|
||||
result["color_source"] = "html+css"
|
||||
elif result:
|
||||
result["color_source"] = "vision"
|
||||
|
||||
if result:
|
||||
result["logo_source"] = used_kind or "none"
|
||||
await merge_hospital_raw_data(hospital_id, {"brandAssets": result})
|
||||
logger.info("[brand_assets] done keys=%s", list(result.keys()) if result else None)
|
||||
|
||||
|
||||
async def collect_extra_channels(
|
||||
analysis_run_id: str,
|
||||
hospital_id: str,
|
||||
tiktok_url: str | None = None,
|
||||
instagram_en_url: str | None = None,
|
||||
facebook_en_url: str | None = None,
|
||||
kakao_talk_url: str | None = None,
|
||||
naver_cafe_url: str | None = None,
|
||||
) -> None:
|
||||
"""틱톡 / 인스타 EN / 페북 EN 수집 + 카카오톡/네이버 카페 URL만 보관 →
|
||||
모두 hospital raw_data에 저장. 인스타EN·페북EN은 기존 Apify 수집기 재사용, 틱톡은 신규 액터.
|
||||
카카오톡·네이버 카페는 콘텐츠 수집 안 함 (URL만 → LLM이 채널 존재 신호로 사용)."""
|
||||
apify = ApifyClient(get_env("APIFY_API_TOKEN"))
|
||||
jobs: dict = {}
|
||||
if instagram_en_url:
|
||||
jobs["instagramEn"] = apify.get_instagram_profile(instagram_en_url)
|
||||
if facebook_en_url:
|
||||
jobs["facebookEn"] = apify.get_facebook_page(facebook_en_url)
|
||||
if tiktok_url:
|
||||
jobs["tiktok"] = apify.get_tiktok_profile(tiktok_url)
|
||||
|
||||
results: dict = {}
|
||||
if jobs:
|
||||
logger.info("[extra_channels] start run=%s channels=%s", analysis_run_id, list(jobs))
|
||||
done = await asyncio.gather(*jobs.values(), return_exceptions=True)
|
||||
for key, res in zip(jobs.keys(), done):
|
||||
if isinstance(res, Exception):
|
||||
logger.warning("[extra_channels] %s 수집 실패: %s", key, res)
|
||||
elif res:
|
||||
if key == "facebookEn":
|
||||
res = transform_facebook(res)
|
||||
results[key] = res
|
||||
|
||||
# URL-only 채널 (수집 X, 존재 여부만)
|
||||
if kakao_talk_url:
|
||||
results["kakaoTalk"] = {"url": kakao_talk_url}
|
||||
if naver_cafe_url:
|
||||
results["naverCafe"] = {"url": naver_cafe_url}
|
||||
|
||||
if not results:
|
||||
logger.info("[extra_channels] 수집 결과 없음 run=%s", analysis_run_id)
|
||||
return
|
||||
|
||||
await merge_hospital_raw_data(hospital_id, results)
|
||||
logger.info("[extra_channels] done run=%s keys=%s", analysis_run_id, list(results))
|
||||
|
||||
|
||||
async def collect_channel_logos(analysis_run_id: str, hospital_id: str) -> None:
|
||||
"""채널별 프로필 이미지(로고)를 모아 Gemini Vision으로 설명 + 공식 로고 일치 여부 평가.
|
||||
→ hospital raw_data["channelLogos"]에 저장. GEMINI_API_KEY 없으면 skip.
|
||||
brand_assets(공식 로고)·extra_channels(틱톡/EN profileImage) 다음에 실행돼야 함."""
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
logger.info("[channel_logos] skip — GEMINI_API_KEY 없음")
|
||||
return
|
||||
|
||||
hrow = await fetchone("SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,))
|
||||
raw = hrow["raw_data"] if hrow else None
|
||||
raw_data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
||||
official = ((raw_data.get("brandAssets") or {}).get("logo_images") or {}).get("horizontal")
|
||||
|
||||
run = await fetchone(
|
||||
"SELECT instagram_data_id, facebook_data_id, youtube_data_id"
|
||||
" FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
logos: list[dict] = []
|
||||
# 전용 테이블 채널 (KR)
|
||||
for ch, table, col in [
|
||||
("Instagram", "instagram_data", "instagram_data_id"),
|
||||
("Facebook", "facebook_data", "facebook_data_id"),
|
||||
("YouTube", "youtube_data", "youtube_data_id"),
|
||||
]:
|
||||
rid = (run or {}).get(col)
|
||||
if rid:
|
||||
d = await fetch_raw(table, rid) or {}
|
||||
if d.get("profileImage"):
|
||||
logos.append({"channel": ch, "url": d["profileImage"]})
|
||||
# 추가 채널 (hospital raw_data)
|
||||
for ch, key in [("Instagram EN", "instagramEn"), ("Facebook EN", "facebookEn"), ("TikTok", "tiktok")]:
|
||||
img = (raw_data.get(key) or {}).get("profileImage")
|
||||
if img:
|
||||
logos.append({"channel": ch, "url": img})
|
||||
|
||||
if not logos:
|
||||
logger.info("[channel_logos] skip — 채널 프로필 이미지 없음")
|
||||
return
|
||||
|
||||
logger.info("[channel_logos] start run=%s channels=%s official=%s", analysis_run_id,
|
||||
[l["channel"] for l in logos], bool(official))
|
||||
result = await VisionClient(api_key).describe_channel_logos(official, logos)
|
||||
if result:
|
||||
# Vision이 못 본 채널도 url은 채워둠 (프론트에서 이미지 표시용)
|
||||
result["logos"] = logos
|
||||
await merge_hospital_raw_data(hospital_id, {"channelLogos": result})
|
||||
logger.info("[channel_logos] done run=%s keys=%s", analysis_run_id, list(result.keys()) if result else None)
|
||||
|
|
@ -4,16 +4,8 @@
|
|||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def _parse_ts(v) -> datetime | None:
|
||||
if isinstance(v, (int, float)):
|
||||
return datetime.fromtimestamp(v, tz=timezone.utc)
|
||||
if isinstance(v, str):
|
||||
try:
|
||||
return datetime.fromisoformat(v.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
from common.utils import parse_ts
|
||||
from integrations.llm.schemas.report import FacebookAudit
|
||||
|
||||
|
||||
def _humanize_age(days: int) -> str:
|
||||
|
|
@ -66,7 +58,7 @@ def transform_for_storage(fb: dict | None) -> dict | None:
|
|||
posts = fb.get("latestPosts") or []
|
||||
out = {k: v for k, v in fb.items() if k != "latestPosts"}
|
||||
if posts:
|
||||
dts = sorted((d for d in (_parse_ts(p.get("timestamp")) for p in posts) if d), reverse=True)
|
||||
dts = sorted((d for d in (parse_ts(p.get("timestamp")) for p in posts) if d), reverse=True)
|
||||
if dts:
|
||||
out["recent_post_age"] = _humanize_age((datetime.now(timezone.utc) - dts[0]).days)
|
||||
if len(dts) > 1:
|
||||
|
|
@ -83,8 +75,9 @@ def transform_for_storage(fb: dict | None) -> dict | None:
|
|||
return out
|
||||
|
||||
|
||||
def _page_patch(fb: dict) -> dict:
|
||||
"""저장된 페북 페이지 → FacebookPage 스키마 필드 패치. 수치 지표는 수집 시점에 박혀있어 그대로 복사."""
|
||||
def _page_patch(fb: dict, language: str, label: str) -> dict:
|
||||
"""저장된 페북 페이지 → FacebookPage 스키마 필드 패치. 수치 지표는 수집 시점에 박혀있어 그대로 복사.
|
||||
language/label 은 데이터 있을 때만 명시적으로 박음 — template-copy 가 KR 값을 EN 슬롯에 잘못 상속시키는 것 방지."""
|
||||
p: dict = {}
|
||||
if fb.get("pageUrl"): p["url"] = p["link"] = fb["pageUrl"]
|
||||
if fb.get("pageName"): p["page_name"] = fb["pageName"]
|
||||
|
|
@ -96,10 +89,18 @@ def _page_patch(fb: dict) -> dict:
|
|||
if fb.get("following") is not None: p["following"] = fb["following"]
|
||||
for key in ("recent_post_age", "post_frequency", "engagement"):
|
||||
if fb.get(key): p[key] = fb[key]
|
||||
if p:
|
||||
p["language"] = language
|
||||
p["label"] = label
|
||||
return p
|
||||
|
||||
|
||||
def build_facebook_pages(facebook: dict, facebook_en: dict) -> list[dict]:
|
||||
"""KR·EN 페북 페이지 패치 리스트 구성. 프롬프트가 pages를 [KR, EN] 순서로 만들므로 동일 순서 유지.
|
||||
빈 패치는 제외 (해당 채널 데이터 없음 → LLM도 페이지 안 만듦 → 인덱스 정렬 유지)."""
|
||||
return [pp for pp in (_page_patch(facebook), _page_patch(facebook_en)) if pp]
|
||||
def build_facebook_audit(facebook: dict, facebook_en: dict, llm_pages: list[dict] | None = None) -> dict:
|
||||
"""KR·EN 페북 페이지 구성. logo/logo_description 은 LLM Vision 결과(첫 페이지) 모든 페이지에 공통 적용,
|
||||
나머지 필드는 코드가 수집 데이터로 계산."""
|
||||
llm_logo = {k: v for k, v in ((llm_pages or [{}])[0]).items() if k in {"logo", "logo_description"} and v}
|
||||
pages = [{**llm_logo, **p} for p in (
|
||||
_page_patch(facebook, "KR", "페이스북 KR"),
|
||||
_page_patch(facebook_en, "EN", "페이스북 EN"),
|
||||
) if p]
|
||||
return FacebookAudit.model_validate({"pages": pages}).model_dump(exclude_unset=True)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@ import logging
|
|||
|
||||
from fastapi import HTTPException, UploadFile
|
||||
|
||||
from common.db import execute, fetchall, fetchone, insert_file_row
|
||||
from common.db.run import select_run
|
||||
from common.db.file_data import insert_file, select_run_files, select_file, delete_file
|
||||
from integrations.azure_blob import AzureBlobUploader
|
||||
from models.file import FileListItem, FileType, FileUploadResponse
|
||||
|
||||
|
|
@ -31,10 +32,7 @@ async def upload_analysis_file(
|
|||
content_type: str | None = None,
|
||||
) -> tuple[int, str]:
|
||||
"""analysis_run에 딸린 파일 업로드. Blob 업로드 + file_data row 생성. (file_id, url) 반환."""
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
run = await select_run(analysis_run_id)
|
||||
if not run:
|
||||
raise HTTPException(status_code=404, detail="analysis_run not found")
|
||||
hospital_id = run["hospital_id"]
|
||||
|
|
@ -47,7 +45,7 @@ async def upload_analysis_file(
|
|||
content_type=content_type,
|
||||
)
|
||||
|
||||
file_id = await insert_file_row(
|
||||
file_id = await insert_file(
|
||||
analysis_run_id=analysis_run_id,
|
||||
hospital_id=hospital_id,
|
||||
file_type=file_type,
|
||||
|
|
@ -61,12 +59,7 @@ async def upload_analysis_file(
|
|||
|
||||
async def list_analysis_files(analysis_run_id: str) -> list[dict]:
|
||||
"""analysis_run에 딸린 (삭제 안 된) 파일 목록."""
|
||||
return await fetchall(
|
||||
"SELECT id, file_type, file_name, file_url, size_bytes, created_at FROM file_data"
|
||||
" WHERE analysis_run_id = %s AND is_deleted = FALSE"
|
||||
" ORDER BY created_at DESC",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return await select_run_files(analysis_run_id)
|
||||
|
||||
|
||||
async def handle_analysis_file_upload(
|
||||
|
|
@ -102,7 +95,7 @@ async def handle_analysis_file_upload(
|
|||
|
||||
async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem]:
|
||||
"""run 존재 확인 + 응답 모델 생성."""
|
||||
if not await fetchone("SELECT 1 FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)):
|
||||
if not await select_run(analysis_run_id):
|
||||
raise HTTPException(status_code=404, detail="analysis_run not found")
|
||||
rows = await list_analysis_files(analysis_run_id)
|
||||
return [FileListItem(**{**r, "created_at": str(r["created_at"])}) for r in rows]
|
||||
|
|
@ -110,14 +103,8 @@ async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem
|
|||
|
||||
async def soft_delete_analysis_file(analysis_run_id: str, file_id: int) -> None:
|
||||
"""analysis_run에 딸린 파일을 소프트 삭제. 멱등성 보장."""
|
||||
row = await fetchone(
|
||||
"SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s",
|
||||
(file_id, analysis_run_id),
|
||||
)
|
||||
row = await select_file(file_id, analysis_run_id)
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="file not found")
|
||||
await execute(
|
||||
"UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE",
|
||||
(file_id,),
|
||||
)
|
||||
await delete_file(file_id)
|
||||
logger.info("soft-deleted analysis file run=%s file_id=%s", analysis_run_id, file_id)
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
"""Instagram audit 계정(KR·EN)을 수집 데이터로 구성.
|
||||
fix 값(handle/followers/highlights/content_format 등)은 전부 코드에서 박는다 — LLM 출력 무시."""
|
||||
|
||||
from integrations.llm.schemas.report import InstagramAudit
|
||||
|
||||
_MEDIA = {"GraphImage": "이미지", "GraphSidecar": "카드뉴스", "GraphVideo": "영상/릴스"}
|
||||
|
||||
|
||||
|
|
@ -38,11 +40,11 @@ def _account(data: dict, language: str, label: str, channel: str, channel_logos:
|
|||
}
|
||||
|
||||
|
||||
def build_instagram_accounts(instagram: dict, instagram_en: dict, channel_logos: dict) -> list[dict]:
|
||||
def build_instagram_audit(instagram: dict, instagram_en: dict, channel_logos: dict) -> dict:
|
||||
"""KR·EN 인스타 계정 리스트 구성 (username 있는 것만)."""
|
||||
accounts: list[dict] = []
|
||||
if instagram.get("username"):
|
||||
accounts.append(_account(instagram, "KR", "인스타그램 KR", "Instagram", channel_logos))
|
||||
if instagram_en.get("username"):
|
||||
accounts.append(_account(instagram_en, "EN", "인스타그램 EN", "Instagram EN", channel_logos))
|
||||
return accounts
|
||||
return InstagramAudit.model_validate({"accounts": accounts}).model_dump()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,96 @@
|
|||
"""mockup 7개 역분석 — 채널 규모별 3개월/12개월 target 성장률 공식."""
|
||||
|
||||
from integrations.llm.schemas.report import KPIMetric
|
||||
|
||||
|
||||
def _round_clean(n: int) -> int:
|
||||
if n < 100: return n
|
||||
if n < 1000: return round(n / 100) * 100
|
||||
if n < 10_000: return round(n / 500) * 500
|
||||
if n < 100_000: return round(n / 1000) * 1000
|
||||
if n < 1_000_000: return round(n / 5000) * 5000
|
||||
return round(n / 50_000) * 50_000
|
||||
|
||||
|
||||
def _target_multiplier(current: int) -> tuple[float, float]:
|
||||
if current < 1_000: return (2.5, 9.0)
|
||||
if current < 5_000: return (1.7, 4.0)
|
||||
if current < 25_000: return (1.5, 2.5)
|
||||
if current < 50_000: return (1.3, 2.2)
|
||||
return (1.1, 1.9)
|
||||
|
||||
|
||||
def _follower_kpi(metric: str, val: int | None, unit: str = "명") -> dict | None:
|
||||
if not val: return None
|
||||
m3, m12 = _target_multiplier(val)
|
||||
return {
|
||||
"metric": metric,
|
||||
"current": f"{val:,}{unit}",
|
||||
"target_3_month": f"{_round_clean(int(val * m3)):,}{unit}",
|
||||
"target_12_month": f"{_round_clean(int(val * m12)):,}{unit}",
|
||||
}
|
||||
|
||||
|
||||
def _blog_frequency(posts: list) -> tuple[str, str, str] | None:
|
||||
"""RSS posts timestamp로 (current, target_3m, target_12m) 라벨 반환. target은 절대 downgrade 안 함."""
|
||||
from common.utils import parse_ts
|
||||
dts = sorted((d for d in (parse_ts(p.get("postDate")) for p in posts) if d), reverse=True)
|
||||
if len(dts) < 2: return None
|
||||
avg_gap = (dts[0] - dts[-1]).days / (len(dts) - 1)
|
||||
if avg_gap > 90: current = f"방치 ({dts[0].strftime('%Y-%m')})"
|
||||
elif avg_gap <= 1: current = f"주 {7 // max(int(avg_gap), 1)}회"
|
||||
elif avg_gap <= 3: current = "주 2~3회"
|
||||
elif avg_gap <= 14: current = "주 1~2회"
|
||||
elif avg_gap <= 30: current = f"월 {max(30 // int(avg_gap), 1)}회"
|
||||
else: current = "월 1회 미만"
|
||||
if avg_gap > 3: return current, "주 2회", "주 3회"
|
||||
if avg_gap > 2: return current, "주 3회", "주 5회"
|
||||
if avg_gap > 1: return current, "주 5회", "주 7회"
|
||||
return current, f"{current} 유지", f"{current} 유지"
|
||||
|
||||
|
||||
def build_kpi_dashboard(
|
||||
instagram: dict, facebook: dict, youtube: dict, gangnam_unni: dict, hospital: dict,
|
||||
naver_blog: dict | None = None,
|
||||
) -> list[dict]:
|
||||
ig_en = hospital.get("instagramEn") or {}
|
||||
fb_en = hospital.get("facebookEn") or {}
|
||||
tiktok = hospital.get("tiktok") or {}
|
||||
cafe = hospital.get("naverCafe") or {}
|
||||
|
||||
kpis: list[dict] = []
|
||||
for k in [
|
||||
_follower_kpi("YouTube 구독자", youtube.get("subscribers")),
|
||||
_follower_kpi("Instagram KR 팔로워", instagram.get("followers")),
|
||||
_follower_kpi("Instagram EN 팔로워", ig_en.get("followers")),
|
||||
_follower_kpi("Facebook KR 팔로워", facebook.get("followers")),
|
||||
_follower_kpi("Facebook EN 팔로워", fb_en.get("followers")),
|
||||
_follower_kpi("TikTok 팔로워", tiktok.get("followers")),
|
||||
_follower_kpi("Naver Cafe 회원 수", cafe.get("memberCount")),
|
||||
]:
|
||||
if k: kpis.append(k)
|
||||
|
||||
if naver_blog:
|
||||
freq = _blog_frequency(naver_blog.get("posts") or [])
|
||||
if freq:
|
||||
cur, t3, t12 = freq
|
||||
kpis.append({
|
||||
"metric": "네이버 블로그 포스팅 빈도",
|
||||
"current": cur,
|
||||
"target_3_month": t3,
|
||||
"target_12_month": t12,
|
||||
})
|
||||
|
||||
gu_reviews = gangnam_unni.get("totalReviews")
|
||||
if gu_reviews:
|
||||
if gu_reviews < 1000: rm3, rm12 = 2.0, 6.0
|
||||
elif gu_reviews < 5000: rm3, rm12 = 1.10, 1.50
|
||||
else: rm3, rm12 = 1.07, 1.27
|
||||
kpis.append({
|
||||
"metric": "강남언니 리뷰",
|
||||
"current": f"{gu_reviews:,}개",
|
||||
"target_3_month": f"{_round_clean(int(gu_reviews * rm3)):,}개",
|
||||
"target_12_month": f"{_round_clean(int(gu_reviews * rm12)):,}개",
|
||||
})
|
||||
|
||||
return [KPIMetric.model_validate(k).model_dump() for k in kpis]
|
||||
|
|
@ -1,7 +1,9 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from common.db import fetchone, execute
|
||||
from common.db.run import select_run
|
||||
from common.db.hospital import select_hospital
|
||||
from common.db.market import upsert_market_status, upsert_market_result
|
||||
from common.db.source import select_run_raw_data
|
||||
from integrations.llm.llm_service import LLMService
|
||||
from integrations.llm.prompt import (
|
||||
market_competitors_prompt,
|
||||
|
|
@ -18,49 +20,27 @@ _TYPES = ["competitors", "keywords", "trend", "target_audience"]
|
|||
async def _save(analysis_run_id: str, analysis_type: str, result, exc: Exception | None) -> None:
|
||||
if exc:
|
||||
logger.warning("[market] %s failed run=%s: %s", analysis_type, analysis_run_id, exc)
|
||||
await execute(
|
||||
"INSERT INTO market_analysis (analysis_run_id, analysis_type, status)"
|
||||
" VALUES (%s, %s, 'failed')"
|
||||
" ON DUPLICATE KEY UPDATE status = 'failed'",
|
||||
(analysis_run_id, analysis_type),
|
||||
)
|
||||
await upsert_market_status(analysis_run_id, analysis_type, "failed")
|
||||
else:
|
||||
await execute(
|
||||
"INSERT INTO market_analysis (analysis_run_id, analysis_type, status, data)"
|
||||
" VALUES (%s, %s, 'done', %s)"
|
||||
" ON DUPLICATE KEY UPDATE status = 'done', data = VALUES(data)",
|
||||
(analysis_run_id, analysis_type, json.dumps(result.model_dump(), ensure_ascii=False)),
|
||||
)
|
||||
await upsert_market_result(analysis_run_id, analysis_type, result.model_dump())
|
||||
|
||||
|
||||
async def run_market_analysis(analysis_run_id: str) -> None:
|
||||
logger.info("[market] start run=%s", analysis_run_id)
|
||||
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
clinic = await fetchone(
|
||||
"SELECT hospital_name, road_address, raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(run["hospital_id"],),
|
||||
)
|
||||
run = await select_run(analysis_run_id)
|
||||
clinic = await select_hospital(run["hospital_id"])
|
||||
raw = await select_run_raw_data(analysis_run_id)
|
||||
mainpage = raw.get("mainpage") or {}
|
||||
|
||||
raw_data = clinic["raw_data"]
|
||||
clinic_data = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
|
||||
|
||||
clinic_name = clinic["hospital_name"] or ""
|
||||
address = clinic["road_address"] or ""
|
||||
services = clinic_data.get("services", [])
|
||||
clinic_name = (clinic or {}).get("hospital_name") or ""
|
||||
address = (clinic or {}).get("road_address") or ""
|
||||
services = mainpage.get("services", [])
|
||||
services_str = ", ".join(services[:3])
|
||||
primary_service = services[0] if services else ""
|
||||
|
||||
for analysis_type in _TYPES:
|
||||
await execute(
|
||||
"INSERT INTO market_analysis (analysis_run_id, analysis_type, status)"
|
||||
" VALUES (%s, %s, 'processing')"
|
||||
" ON DUPLICATE KEY UPDATE status = 'processing'",
|
||||
(analysis_run_id, analysis_type),
|
||||
)
|
||||
await upsert_market_status(analysis_run_id, analysis_type, "processing")
|
||||
|
||||
llm = LLMService(provider="perplexity")
|
||||
results = await asyncio.gather(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import logging
|
||||
from common.db import fetchone, execute
|
||||
from common.db.run import select_run, update_run_status
|
||||
from models.status import AnalysisStatus
|
||||
from services.collect import collect_all
|
||||
from services.market import run_market_analysis
|
||||
|
|
@ -8,51 +8,23 @@ from services.analysis import run_report_task, run_plan_task
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def run_pipeline(analysis_run_id: str, extra_channels: dict | None = None) -> None:
|
||||
async def run_pipeline(analysis_run_id: str) -> None:
|
||||
logger.info("[pipeline] start run=%s", analysis_run_id)
|
||||
extra_channels = extra_channels or {}
|
||||
|
||||
# ── 1. Collect ──────────────────────────────────────────────────────────
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id, instagram_data_id, facebook_data_id,"
|
||||
" naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
|
||||
" FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
await collect_all(
|
||||
analysis_run_id,
|
||||
hospital_id=run["hospital_id"],
|
||||
instagram_id=run["instagram_data_id"],
|
||||
facebook_id=run["facebook_data_id"],
|
||||
naver_blog_id=run["naver_blog_data_id"],
|
||||
youtube_id=run["youtube_data_id"],
|
||||
gangnam_unni_id=run["gangnam_unni_data_id"],
|
||||
tiktok_url=extra_channels.get("tiktok"),
|
||||
instagram_en_url=extra_channels.get("instagram_en"),
|
||||
facebook_en_url=extra_channels.get("facebook_en"),
|
||||
kakao_talk_url=extra_channels.get("kakao_talk"),
|
||||
naver_cafe_url=extra_channels.get("naver_cafe"),
|
||||
)
|
||||
run = await select_run(analysis_run_id)
|
||||
await collect_all(analysis_run_id, hospital_id=run["hospital_id"])
|
||||
|
||||
# ── 2. Market ────────────────────────────────────────────────────────────
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
|
||||
(AnalysisStatus.ANALYZING, analysis_run_id),
|
||||
)
|
||||
await update_run_status(analysis_run_id, AnalysisStatus.ANALYZING)
|
||||
await run_market_analysis(analysis_run_id)
|
||||
|
||||
# ── 3. Report ────────────────────────────────────────────────────────────
|
||||
await run_report_task(analysis_run_id)
|
||||
|
||||
# ── 4. Plan ──────────────────────────────────────────────────────────────
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
|
||||
(AnalysisStatus.PLANNING, analysis_run_id),
|
||||
)
|
||||
await update_run_status(analysis_run_id, AnalysisStatus.PLANNING)
|
||||
await run_plan_task(analysis_run_id)
|
||||
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET status = %s WHERE analysis_run_id = %s",
|
||||
(AnalysisStatus.COMPLETED, analysis_run_id),
|
||||
)
|
||||
await update_run_status(analysis_run_id, AnalysisStatus.COMPLETED)
|
||||
logger.info("[pipeline] done run=%s", analysis_run_id)
|
||||
|
|
|
|||
|
|
@ -10,3 +10,4 @@ passlib[bcrypt]==1.7.4
|
|||
python-multipart==0.0.26
|
||||
uuid6==2025.0.1
|
||||
aiomysql==0.3.2
|
||||
resvg-py==0.3.2
|
||||
|
|
|
|||
Loading…
Reference in New Issue