llm 붙임 및 리포트 생성 확인

upload
jaehwang 2026-05-14 16:16:09 +09:00
parent 26cd946e1b
commit 2b8a90e857
26 changed files with 507 additions and 175 deletions

View File

@ -65,6 +65,9 @@ CREATE TABLE hospital_baseinfo
`brn` VARCHAR(50) NOT NULL,
`road_address` VARCHAR(100) NULL,
`site_address` VARCHAR(100) NULL,
`url` VARCHAR(500) NULL,
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
`raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (hospital_id)
@ -133,6 +136,8 @@ CREATE TABLE analysis_runs
`naver_blog_data_id` INT NULL,
`youtube_data_id` INT NULL,
`gangnam_unni_data_id` INT NULL,
`report_data` JSON NULL,
`plan_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (analysis_run_id)

View File

@ -2,6 +2,5 @@ from .clinics import router as clinics_router
from .analyses import router as analyses_router
from .reports import router as reports_router
from .plans import router as plans_router
from .channels import router as channels_router
routers = [clinics_router, analyses_router, reports_router, plans_router, channels_router]
routers = [clinics_router, analyses_router, reports_router, plans_router]

View File

@ -1,3 +1,4 @@
import logging
import uuid6
from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException
from common.deps import verify_api_key
@ -7,10 +8,12 @@ from models.status import AnalysisStatus
from services.collect import collect_instagram, collect_facebook, collect_naver_blog, collect_youtube, collect_gangnam_unni
router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse)
async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks):
logger.info("POST /api/analyses clinic_id=%s", body.clinic_id)
analysis_run_id = str(uuid6.uuid7())
hospital_id = body.clinic_id
@ -23,8 +26,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
raise HTTPException(status_code=409, detail="Clinic not found")
owner_user_id = hospital["owner_user_id"] if hospital else 0
ig_url = body.channels.instagram[0] if isinstance(body.channels.instagram, list) else body.channels.instagram
ig_id = await insert_instagram_row(hospital_id, ig_url) if ig_url else None
ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None
fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None
nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None
yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None
@ -33,9 +35,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
analysis_run_id = await insert_analysis_run(analysis_run_id, hospital_id, owner_user_id, ig_id, fb_id, nb_id, yt_id, gu_id)
if ig_id:
background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, ig_url)
background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, body.channels.instagram)
if fb_id:
background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, f"https://www.facebook.com/{body.channels.facebook}")
background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, body.channels.facebook)
if nb_id:
background_tasks.add_task(collect_naver_blog, analysis_run_id, nb_id, body.channels.naver_blog)
if yt_id:
@ -54,7 +56,10 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
async def get_analysis_status(run_id: str):
logger.info("GET /api/analyses/%s/status", run_id)
row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,))
if not row:
raise HTTPException(status_code=404, detail="Run not found")
return AnalysisStatusResponse(
analysis_run_id=run_id,
status=AnalysisStatus(row["status"]),

View File

@ -1,23 +0,0 @@
from fastapi import APIRouter, Depends
from common.deps import verify_api_key
from models.channel import ChannelVerifyRequest, ChannelVerifyResponse
router = APIRouter(prefix="/api/channels", tags=["channels"], dependencies=[Depends(verify_api_key)])
# will not use
@router.post("/verify", response_model=ChannelVerifyResponse)
async def verify_channels(body: ChannelVerifyRequest):
return ChannelVerifyResponse(
youtube={
"handle": body.youtube,
"verified": True,
"display_name": "바노바기 BANOBAGI",
"followers": 12345,
} if body.youtube else None,
instagram=[
{"handle": handle, "verified": "unverifiable", "note": "Instagram 로그인 벽"}
for handle in body.instagram
] if body.instagram else None,
)

View File

@ -1,28 +1,64 @@
import logging
import uuid6
from fastapi import APIRouter, Depends, status
from fastapi import APIRouter, Depends, HTTPException, status
from common.deps import verify_api_key
from common.db import insert_hospital
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicHistoryResponse, RunSummary
from common.db import insert_hospital, fetchone
from common.utils import get_env
from integrations.firecrawl import FirecrawlClient
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicResponse, ClinicHistoryResponse, RunSummary
router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
_REQUIRED_FIELDS = ["clinicName"]
_COLLECTED_FIELDS = ["clinicName", "clinicNameEn", "address", "phone", "slogan", "services", "doctors"]
@router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse)
async def create_clinic(body: ClinicCreate):
logger.info("POST /api/clinics url=%s", body.url)
info = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(body.url)
missing = [f for f in _COLLECTED_FIELDS if not (info or {}).get(f)]
required_missing = [f for f in _REQUIRED_FIELDS if f in missing]
if required_missing:
raise HTTPException(status_code=404, detail={"missing": missing})
hospital_id = str(uuid6.uuid7())
row = await insert_hospital(hospital_id, body.name, body.name_en, body.address, body.url)
row = await insert_hospital(
hospital_id,
name=info["clinicName"],
name_en=info.get("clinicNameEn"),
road_address=info.get("address"),
url=body.url,
raw_data=info,
)
return ClinicCreateResponse(
id=hospital_id,
url=body.url,
name=body.name,
name=info["clinicName"],
created_at=str(row["created_at"]),
)
@router.get("/{hospital_id}", response_model=ClinicResponse)
async def get_clinic(hospital_id: str):
logger.info("GET /api/clinics/%s", hospital_id)
row = await fetchone(
"SELECT hospital_id, hospital_name, hospital_name_en, road_address, url, status, raw_data, created_at, updated_at"
" FROM hospital_baseinfo WHERE hospital_id = %s",
(hospital_id,),
)
if not row:
raise HTTPException(status_code=404, detail="Clinic not found")
return ClinicResponse(**{**row, "created_at": str(row["created_at"]), "updated_at": str(row["updated_at"])})
# Not done
@router.get("/{id}/history", response_model=ClinicHistoryResponse)
async def get_clinic_history(id: str):
logger.info("GET /api/clinics/%s/history", id)
return ClinicHistoryResponse(
clinic_id=id,
runs=[

View File

@ -1,12 +1,15 @@
import logging
from fastapi import APIRouter, Depends, status
from common.deps import verify_api_key
from models.plan import PlanCreate, PlanResponse
router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@router.post("", status_code=status.HTTP_201_CREATED, response_model=PlanResponse)
async def create_plan(body: PlanCreate):
logger.info("POST /api/plans run_id=%s", body.analysis_run_id)
return PlanResponse(
id="33333333-3333-3333-3333-333333333333",
analysis_run_id="22222222-2222-2222-2222-222222222222",
@ -20,6 +23,7 @@ async def create_plan(body: PlanCreate):
@router.get("/{id}", response_model=PlanResponse)
async def get_plan(id: str):
logger.info("GET /api/plans/%s", id)
return PlanResponse(
id=id,
analysis_run_id="22222222-2222-2222-2222-222222222222",

View File

@ -1,24 +1,24 @@
from fastapi import APIRouter, Depends
import json
import logging
from fastapi import APIRouter, Depends, HTTPException, Response
from common.db import fetchone
from common.deps import verify_api_key
from models.report import ReportResponse, ClinicInfo
from integrations.llm.schemas.report import ReportOutput
router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@router.get("/{run_id}", response_model=ReportResponse)
@router.get("/{run_id}", response_model=ReportOutput | None)
async def get_report(run_id: str):
return ReportResponse(
id=run_id,
clinic=ClinicInfo(name="바노바기성형외과", url="https://www.banobagi.com"),
overall_score=82,
youtube={},
instagram={},
facebook={},
naver_place={},
naver_blog={},
gangnam_unni={},
conversion_strategy={},
roadmap=[],
kpis=[],
generated_at="2026-04-20T09:01:30Z",
logger.info("GET /api/reports/%s", run_id)
row = await fetchone(
"SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s",
(run_id,),
)
if row is None:
raise HTTPException(status_code=404, detail="Run not found")
if row["report_data"] is None:
return Response(status_code=204)
data = json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"]
return ReportOutput(**data)

View File

@ -76,6 +76,13 @@ async def insert_analysis_run(
async def save_analysis_report(analysis_run_id: str, data: dict) -> None:
await execute(
"UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
(json.dumps(data, ensure_ascii=False), analysis_run_id),
)
async def is_done(table: str, row_id: int | None) -> bool:
if row_id is None:
return True
@ -83,6 +90,30 @@ async def is_done(table: str, row_id: int | None) -> bool:
return r["status"] == "done"
async def _fetch_raw(table: str, row_id: int | None) -> dict | None:
if row_id is None:
return None
row = await fetchone(f"SELECT raw_data FROM {table} WHERE id = %s", (row_id,))
if not row or not row["raw_data"]:
return None
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
async def get_analysis_raw_data(analysis_run_id: str) -> dict:
run = await fetchone(
"SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
" FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
return {
"instagram": await _fetch_raw("instagram_data", run["instagram_data_id"]),
"facebook": await _fetch_raw("facebook_data", run["facebook_data_id"]),
"naver_blog": await _fetch_raw("naver_blog_data", run["naver_blog_data_id"]),
"youtube": await _fetch_raw("youtube_data", run["youtube_data_id"]),
"gangnam_unni": await _fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]),
}
async def set_instagram_status(row_id: int, status: str) -> None:
await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id))
@ -129,14 +160,37 @@ async def insert_hospital(
name_en: str | None = None,
road_address: str | None = None,
site_address: str | None = None,
url: str | None = None,
raw_data: dict | None = None,
owner_user_id: int = 0,
brn: str = "",
) -> dict:
await execute(
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, owner_user_id, brn) VALUES (%s, %s, %s, %s, %s, %s, %s)",
(hospital_id, name, name_en, road_address, site_address, owner_user_id, brn),
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, url, raw_data, status, owner_user_id, brn)"
" VALUES (%s, %s, %s, %s, %s, %s, %s, 'done', %s, %s)",
(hospital_id, name, name_en, road_address, site_address, url,
json.dumps(raw_data, ensure_ascii=False) if raw_data else None,
owner_user_id, brn),
)
return await fetchone(
"SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
(hospital_id,),
)
async def save_hospital_raw_data(hospital_id: str, data: dict) -> None:
await execute(
"UPDATE hospital_baseinfo"
" SET raw_data = %s, status = 'done',"
" hospital_name = COALESCE(%s, hospital_name),"
" hospital_name_en = COALESCE(%s, hospital_name_en),"
" road_address = COALESCE(%s, road_address)"
" WHERE hospital_id = %s",
(
json.dumps(data, ensure_ascii=False),
data.get("clinicName"),
data.get("clinicNameEn"),
data.get("address"),
hospital_id,
),
)

View File

@ -2,7 +2,5 @@ import os
from fastapi import Header, HTTPException
async def verify_api_key(x_api_key: str = Header(...)):
print(x_api_key)
print(os.getenv("API_KEY"))
if x_api_key != os.getenv("API_KEY"):
raise HTTPException(status_code=401, detail="Invalid API Key")

View File

@ -1,4 +1,5 @@
from http import HTTPMethod
from urllib.parse import urlparse
from common.utils import http_request
APIFY_BASE = "https://api.apify.com/v2"
@ -32,8 +33,9 @@ class ApifyClient:
return []
return items_resp.json()
async def fetch_instagram_profile(self, handle: str) -> dict | None:
items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [handle], "resultsLimit": 12})
async def fetch_instagram_profile(self, url: str) -> dict | None:
username = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url.lstrip("@")
items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [username], "resultsLimit": 12})
return items[0] if items else None
async def get_instagram_profile(self, handle: str) -> dict | None:
@ -131,7 +133,7 @@ class ApifyClient:
if not page:
return None
return {
"pageName": page["title"],
"pageName": page.get("title") or page.get("name"),
"pageUrl": page.get("pageUrl", page_url),
"followers": page.get("followers", 0),
"likes": page.get("likes", 0),

View File

@ -67,6 +67,88 @@ class FirecrawlClient:
return []
return (data.get("json") or {}).get("socialLinks", [])
async def fetch_clinic_info(self, url: str) -> dict | None:
resp = await http_request(
HTTPMethod.POST,
url=f"{FIRECRAWL_BASE}/scrape",
headers=self._headers(),
json_body={
"url": url,
"formats": ["json", "links"],
"jsonOptions": {
"prompt": "Extract: clinic name (Korean), clinic name (English), address, phone, business hours, slogan, services offered, doctors with name/title/specialty, brand identity (primary/accent/background/text colors in hex, heading/body fonts, logo URL, favicon URL)",
"schema": {
"type": "object",
"properties": {
"clinicName": {"type": "string"},
"clinicNameEn": {"type": "string"},
"address": {"type": "string"},
"phone": {"type": "string"},
"businessHours": {"type": "string"},
"slogan": {"type": "string"},
"services": {"type": "array", "items": {"type": "string"}},
"doctors": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"title": {"type": "string"},
"specialty": {"type": "string"},
},
},
},
# "socialMedia": {
# "type": "object",
# "properties": {
# "instagram": {"type": "string"},
# "youtube": {"type": "string"},
# "blog": {"type": "string"},
# "facebook": {"type": "string"},
# "tiktok": {"type": "string"},
# "kakao": {"type": "string"},
# },
# },
"branding": {
"type": "object",
"properties": {
"primaryColor": {"type": "string"},
"accentColor": {"type": "string"},
"backgroundColor": {"type": "string"},
"textColor": {"type": "string"},
"headingFont": {"type": "string"},
"bodyFont": {"type": "string"},
"logoUrl": {"type": "string"},
"faviconUrl": {"type": "string"},
},
},
},
},
},
"waitFor": 5000,
},
timeout=60,
label="firecrawl-clinic-info",
)
if not resp or not resp.is_success:
return None
data = resp.json().get("data") or {}
info = data.get("json") or {}
return {
"clinicName": info.get("clinicName"),
"clinicNameEn": info.get("clinicNameEn"),
"address": info.get("address"),
"phone": info.get("phone"),
"businessHours": info.get("businessHours"),
"slogan": info.get("slogan"),
"services": info.get("services", []),
"doctors": info.get("doctors", []),
# "socialMedia": info.get("socialMedia", {}),
"branding": info.get("branding", {}),
"siteLinks": data.get("links", []),
"sourceUrl": url,
}
async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None:
resp = await http_request(
HTTPMethod.POST,

View File

@ -1,3 +1,3 @@
from .service import LLMService
from .llm_service import LLMService
from .prompt import Prompt

View File

@ -0,0 +1,76 @@
from pydantic import BaseModel
from openai import AsyncOpenAI
from common.utils import get_env
from .prompt import Prompt
class LLMResponseError(Exception):
def __init__(self, status: str, code: str = None, message: str = None):
self.status = status
self.code = code
self.message = message
super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
class LLMService:
def __init__(self, provider: str = "openai", max_retries: int = 2):
self.provider = provider
self.max_retries = max_retries
match provider:
case "openai":
self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
case "perplexity":
self.client = AsyncOpenAI(
api_key=get_env("PERPLEXITY_API_KEY"),
base_url="https://api.perplexity.ai",
)
case "gemini":
self.client = AsyncOpenAI(
api_key=get_env("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
)
case _:
raise NotImplementedError(f"Unknown provider: {provider}")
async def generate(
self,
prompt: Prompt,
input_data: dict,
) -> BaseModel:
prompt_text = prompt.build(input_data)
last_error = None
for attempt in range(self.max_retries + 1):
if self.provider == "perplexity":
response = await self.client.chat.completions.create(
model=prompt.model,
messages=[{"role": "user", "content": prompt_text}],
response_format={
"type": "json_schema",
"json_schema": {"name": prompt.output_class.__name__, "schema": prompt.output_class.model_json_schema()},
},
)
choice = response.choices[0]
if choice.finish_reason == "stop":
return prompt.output_class.model_validate_json(choice.message.content)
last_error = LLMResponseError("failed", choice.finish_reason, f"unexpected finish_reason: {choice.finish_reason}")
else:
response = await self.client.beta.chat.completions.parse(
model=prompt.model,
messages=[{"role": "user", "content": prompt_text}],
response_format=prompt.output_class,
)
choice = response.choices[0]
finish_reason = choice.finish_reason
if finish_reason == "stop":
return choice.message.parsed
if finish_reason == "length":
last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
elif finish_reason == "content_filter":
last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
else:
last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
raise last_error

View File

@ -1,19 +1,40 @@
import os
from pydantic import BaseModel
from common.utils import get_env
from integrations.llm.schemas.report import ReportInput, ReportOutput
_PROMPT_DIR = os.path.join(os.path.dirname(__file__), "temp-prompt")
class Prompt:
def __init__(
self,
template: str,
model: str,
input_class: type[BaseModel],
output_class: type[BaseModel],
):
self.template = template
self.model = model
file_name: str
prompt_model: str
input_class: type[BaseModel]
output_class: type[BaseModel]
def __init__(self, file_name: str, prompt_model: str, input_class: type[BaseModel], output_class: type[BaseModel]):
self.file_name = file_name
self.prompt_model = prompt_model
self.input_class = input_class
self.output_class = output_class
self.template, self.model = self._load_prompt()
def _load_prompt(self) -> tuple[str, str]:
with open(os.path.join(_PROMPT_DIR, self.file_name), encoding="utf-8") as f:
template = f.read()
return template, get_env(self.prompt_model)
def _reload_prompt(self):
self.template, self.model = self._load_prompt()
def build(self, input_data: dict) -> str:
verified = self.input_class(**input_data)
return self.template.format(**verified.model_dump())
report_prompt = Prompt(
file_name="report_prompt.txt",
prompt_model="REPORT_MODEL",
input_class=ReportInput,
output_class=ReportOutput,
)

View File

@ -0,0 +1,43 @@
from pydantic import BaseModel
# template.format(**model_dump()) 에 삽입될 변수들
# 각 채널 raw_data를 호출부에서 json.dumps()로 직렬화해서 넘겨야 함
class ReportInput(BaseModel):
clinic_name: str | None = None
clinic_name_en: str | None = None
address: str | None = None
phone: str | None = None
slogan: str | None = None
services: str | None = None
doctors: str | None = None
instagram: str | None = None
facebook: str | None = None
naver_blog: str | None = None
youtube: str | None = None
gangnam_unni: str | None = None
class ChannelScore(BaseModel):
score: int
summary: str
strengths: list[str]
weaknesses: list[str]
class ConversionStrategy(BaseModel):
summary: str
actions: list[str]
# response_format으로 OpenAI structured output에 전달 — dict 필드 사용 불가
class ReportOutput(BaseModel):
overall_score: int
instagram: ChannelScore | None = None
facebook: ChannelScore | None = None
naver_blog: ChannelScore | None = None
youtube: ChannelScore | None = None
gangnam_unni: ChannelScore | None = None
conversion_strategy: ConversionStrategy
roadmap: list[str]
kpis: list[str]

View File

@ -1,61 +0,0 @@
from pydantic import BaseModel
from openai import AsyncOpenAI
from common.utils import get_env
from .prompt import Prompt
class LLMResponseError(Exception):
def __init__(self, status: str, code: str = None, message: str = None):
self.status = status
self.code = code
self.message = message
super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
class LLMService:
def __init__(self, provider: str = "openai", max_retries: int = 2):
self.max_retries = max_retries
match provider:
case "openai":
self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
case "perplexity":
self.client = AsyncOpenAI(
api_key=get_env("PERPLEXITY_API_KEY"),
base_url="https://api.perplexity.ai",
)
case "gemini":
self.client = AsyncOpenAI(
api_key=get_env("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
)
case _:
raise NotImplementedError(f"Unknown provider: {provider}")
async def generate(
self,
prompt: Prompt,
input_data: dict,
) -> BaseModel:
prompt_text = prompt.build(input_data)
last_error = None
for attempt in range(self.max_retries + 1):
response = await self.client.beta.chat.completions.parse(
model=prompt.model,
messages=[{"role": "user", "content": prompt_text}],
response_format=prompt.output_class,
)
choice = response.choices[0]
finish_reason = choice.finish_reason
if finish_reason == "stop":
return choice.message.parsed
if finish_reason == "length":
last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
elif finish_reason == "content_filter":
last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
else:
last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
raise last_error

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,38 @@
당신은 프리미엄 의료 마케팅 전문 분석가입니다. 아래 실제 수집된 데이터를 기반으로 종합 마케팅 리포트를 생성해주세요.
결과물은 한국어로 생성하세요
⚠️ 중요: 데이터가 null인 채널은 해당 항목을 null로 설정하세요. 데이터에 없는 수치는 절대 추측하지 마세요.
## 병원 기본 정보
- 병원명: {clinic_name}
- 영문명: {clinic_name_en}
- 주소: {address}
- 전화: {phone}
- 슬로건: {slogan}
- 시술: {services}
- 의료진: {doctors}
## 채널 데이터
### 인스타그램
{instagram}
### 페이스북
{facebook}
### 네이버 블로그
{naver_blog}
### 유튜브
{youtube}
### 강남언니
{gangnam_unni}
## 분석 지침
- 점수는 0~100 기준입니다.
- strengths와 weaknesses는 각 3개 이상 작성하세요.
- roadmap은 우선순위 순으로 실행 가능한 액션으로 작성하세요.
- kpis는 실제 수집된 수치 기반으로 현실적인 측정 가능 지표로 작성하세요.
- conversion_strategy의 actions는 구체적인 실행 방안으로 작성하세요.

View File

@ -1,5 +1,6 @@
import re
from http import HTTPMethod
from urllib.parse import urlparse
from common.utils import http_request
NAVER_BASE = "https://openapi.naver.com/v1/search"
@ -52,10 +53,10 @@ class NaverClient:
return []
return resp.json().get("items", [])
async def fetch_blog_rss(self, blog_handle: str) -> str | None:
async def fetch_blog_rss(self, handle: str) -> str | None:
resp = await http_request(
HTTPMethod.GET,
url=f"https://rss.blog.naver.com/{blog_handle}.xml",
url=f"https://rss.blog.naver.com/{handle}.xml",
timeout=15,
label="naver-rss",
)
@ -63,7 +64,8 @@ class NaverClient:
return None
return resp.text
async def get_blog_rss(self, blog_handle: str) -> dict | None:
async def get_blog_rss(self, url: str) -> dict | None:
blog_handle = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url
xml = await self.fetch_blog_rss(blog_handle)
if not xml:
return None

View File

@ -1,4 +1,5 @@
from http import HTTPMethod
from urllib.parse import urlparse
from common.utils import http_request
YT = "https://www.googleapis.com/youtube/v3"
@ -9,7 +10,7 @@ class YouTubeClient:
self.api_key = api_key
async def _resolve_channel_id(self, handle: str) -> str:
h = handle.lstrip("@")
h = urlparse(handle).path.strip("/").lstrip("@") if "://" in handle else handle.lstrip("@")
if h.startswith("UC") and len(h) == 24:
return h
for param in ("forHandle", "forUsername"):
@ -47,14 +48,7 @@ class YouTubeClient:
resp = await http_request(
HTTPMethod.GET,
url=f"{YT}/search",
params={
"part": "snippet",
"channelId": channel_id,
"order": "viewCount",
"type": "video",
"maxResults": 10,
"key": self.api_key,
},
params={"part": "snippet", "channelId": channel_id, "order": "viewCount", "type": "video", "maxResults": 10, "key": self.api_key},
label="yt-search",
)
if resp and resp.is_success:
@ -65,11 +59,7 @@ class YouTubeClient:
resp = await http_request(
HTTPMethod.GET,
url=f"{YT}/videos",
params={
"part": "snippet,statistics,contentDetails",
"id": ",".join(video_ids),
"key": self.api_key,
},
params={"part": "snippet,statistics,contentDetails", "id": ",".join(video_ids), "key": self.api_key},
label="yt-videos",
)
if resp and resp.is_success:

View File

@ -1,9 +1,11 @@
import logging
from fastapi import FastAPI
from dotenv import load_dotenv
from api import routers
import os
load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
app = FastAPI()

View File

@ -16,8 +16,7 @@ class AnalysisOptions(BaseModel):
class AnalysisCreate(BaseModel):
clinic_id: str | None = None
url: str | None = None
clinic_id: str
channels: Channels
options: AnalysisOptions = AnalysisOptions()

View File

@ -1,12 +0,0 @@
from pydantic import BaseModel
from typing import Any
class ChannelVerifyRequest(BaseModel):
youtube: str | None = None
instagram: list[str] | None = None
class ChannelVerifyResponse(BaseModel):
youtube: dict[str, Any] | None = None
instagram: list[dict[str, Any]] | None = None

View File

@ -3,9 +3,18 @@ from pydantic import BaseModel
class ClinicCreate(BaseModel):
url: str
name: str
name_en: str | None = None
address: str | None = None
class ClinicResponse(BaseModel):
hospital_id: str
hospital_name: str
hospital_name_en: str | None
road_address: str | None
url: str | None
status: str
raw_data: dict | None
created_at: str
updated_at: str
class ClinicCreateResponse(BaseModel):

View File

@ -1,4 +1,50 @@
from common.db import fetchone, execute, is_done
import asyncio
import json
import logging
from common.db import fetchone, execute, is_done, get_analysis_raw_data, save_analysis_report
from integrations.llm.llm_service import LLMService
from integrations.llm.prompt import report_prompt
from integrations.llm.schemas.report import ReportOutput
logger = logging.getLogger(__name__)
async def generate_report(analysis_run_id: str) -> ReportOutput:
run = await fetchone(
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
clinic_row = await fetchone(
"SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
(run["hospital_id"],),
)
raw_data = clinic_row["raw_data"] if clinic_row else None
clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
raw = await get_analysis_raw_data(analysis_run_id)
input_data = {
"clinic_name": clinic.get("clinicName"),
"clinic_name_en": clinic.get("clinicNameEn"),
"address": clinic.get("address"),
"phone": clinic.get("phone"),
"slogan": clinic.get("slogan"),
"services": json.dumps(clinic.get("services", []), ensure_ascii=False),
"doctors": json.dumps(clinic.get("doctors", []), ensure_ascii=False),
**{
channel: json.dumps(data, ensure_ascii=False) if data else None
for channel, data in raw.items()
},
}
return await LLMService(provider="perplexity").generate(report_prompt, input_data)
async def run_report_task(analysis_run_id: str) -> None:
logger.info("[report] start run=%s", analysis_run_id)
result = await generate_report(analysis_run_id)
await save_analysis_report(analysis_run_id, result.model_dump())
await execute("UPDATE analysis_runs SET status = 'completed' WHERE analysis_run_id = %s", (analysis_run_id,))
logger.info("[report] done run=%s", analysis_run_id)
async def check_and_advance_analysis(analysis_run_id: str) -> None:
@ -16,3 +62,4 @@ async def check_and_advance_analysis(analysis_run_id: str) -> None:
]
if all(results):
await execute("UPDATE analysis_runs SET status = 'analyzing' WHERE analysis_run_id = %s", (analysis_run_id,))
asyncio.create_task(run_report_task(analysis_run_id))

View File

@ -1,53 +1,68 @@
import logging
from common.db import (
set_instagram_status, save_instagram_raw_data,
set_facebook_status, save_facebook_raw_data,
set_naver_blog_status, save_naver_blog_raw_data,
set_youtube_status, save_youtube_raw_data,
set_gangnam_unni_status, save_gangnam_unni_raw_data,
execute, save_hospital_raw_data,
)
from common.utils import get_env, normalize_handle
from common.utils import get_env
from services.analysis import check_and_advance_analysis
from integrations.apify import ApifyClient
from integrations.naver import NaverClient
from integrations.youtube import YouTubeClient
from integrations.firecrawl import FirecrawlClient
logger = logging.getLogger(__name__)
async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a insta")
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
await set_instagram_status(row_id, "processing")
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_instagram_profile(normalize_handle("instagram", url))
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_instagram_profile(url)
await save_instagram_raw_data(row_id, data)
logger.info("[instagram] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id)
async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a facebook")
logger.info("[facebook] start run=%s url=%s", analysis_run_id, url)
await set_facebook_status(row_id, "processing")
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_facebook_page(url)
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_facebook_page(url)
await save_facebook_raw_data(row_id, data)
logger.info("[facebook] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id)
async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a blog")
logger.info("[naver_blog] start run=%s url=%s", analysis_run_id, url)
await set_naver_blog_status(row_id, "processing")
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).fetch_blog_rss(normalize_handle("naver_blog", url))
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_blog_rss(url)
await save_naver_blog_raw_data(row_id, data)
logger.info("[naver_blog] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id)
async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a youtube")
logger.info("[youtube] start run=%s url=%s", analysis_run_id, url)
await set_youtube_status(row_id, "processing")
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).fetch_channel(normalize_handle("youtube", url))
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).get_channel(url)
await save_youtube_raw_data(row_id, data)
logger.info("[youtube] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id)
async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a gangnam_unni")
logger.info("[gangnam_unni] start run=%s url=%s", analysis_run_id, url)
await set_gangnam_unni_status(row_id, "processing")
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_gangnam_unni(url)
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).get_gangnam_unni(url)
await save_gangnam_unni_raw_data(row_id, data)
logger.info("[gangnam_unni] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id)
async def collect_clinic_info(hospital_id: str, url: str) -> None:
await execute("UPDATE hospital_baseinfo SET status = 'processing' WHERE hospital_id = %s", (hospital_id,))
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(url)
await save_hospital_raw_data(hospital_id, data)