llm 붙임 및 리포트 생성 확인

upload
jaehwang 2026-05-14 16:16:09 +09:00
parent 26cd946e1b
commit 2b8a90e857
26 changed files with 507 additions and 175 deletions

View File

@ -65,6 +65,9 @@ CREATE TABLE hospital_baseinfo
`brn` VARCHAR(50) NOT NULL, `brn` VARCHAR(50) NOT NULL,
`road_address` VARCHAR(100) NULL, `road_address` VARCHAR(100) NULL,
`site_address` VARCHAR(100) NULL, `site_address` VARCHAR(100) NULL,
`url` VARCHAR(500) NULL,
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
`raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (hospital_id) PRIMARY KEY (hospital_id)
@ -133,6 +136,8 @@ CREATE TABLE analysis_runs
`naver_blog_data_id` INT NULL, `naver_blog_data_id` INT NULL,
`youtube_data_id` INT NULL, `youtube_data_id` INT NULL,
`gangnam_unni_data_id` INT NULL, `gangnam_unni_data_id` INT NULL,
`report_data` JSON NULL,
`plan_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (analysis_run_id) PRIMARY KEY (analysis_run_id)

View File

@ -2,6 +2,5 @@ from .clinics import router as clinics_router
from .analyses import router as analyses_router from .analyses import router as analyses_router
from .reports import router as reports_router from .reports import router as reports_router
from .plans import router as plans_router from .plans import router as plans_router
from .channels import router as channels_router
routers = [clinics_router, analyses_router, reports_router, plans_router, channels_router] routers = [clinics_router, analyses_router, reports_router, plans_router]

View File

@ -1,3 +1,4 @@
import logging
import uuid6 import uuid6
from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException
from common.deps import verify_api_key from common.deps import verify_api_key
@ -7,10 +8,12 @@ from models.status import AnalysisStatus
from services.collect import collect_instagram, collect_facebook, collect_naver_blog, collect_youtube, collect_gangnam_unni from services.collect import collect_instagram, collect_facebook, collect_naver_blog, collect_youtube, collect_gangnam_unni
router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse) @router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse)
async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks): async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks):
logger.info("POST /api/analyses clinic_id=%s", body.clinic_id)
analysis_run_id = str(uuid6.uuid7()) analysis_run_id = str(uuid6.uuid7())
hospital_id = body.clinic_id hospital_id = body.clinic_id
@ -23,8 +26,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
raise HTTPException(status_code=409, detail="Clinic not found") raise HTTPException(status_code=409, detail="Clinic not found")
owner_user_id = hospital["owner_user_id"] if hospital else 0 owner_user_id = hospital["owner_user_id"] if hospital else 0
ig_url = body.channels.instagram[0] if isinstance(body.channels.instagram, list) else body.channels.instagram ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None
ig_id = await insert_instagram_row(hospital_id, ig_url) if ig_url else None
fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None
nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None
yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None
@ -33,9 +35,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
analysis_run_id = await insert_analysis_run(analysis_run_id, hospital_id, owner_user_id, ig_id, fb_id, nb_id, yt_id, gu_id) analysis_run_id = await insert_analysis_run(analysis_run_id, hospital_id, owner_user_id, ig_id, fb_id, nb_id, yt_id, gu_id)
if ig_id: if ig_id:
background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, ig_url) background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, body.channels.instagram)
if fb_id: if fb_id:
background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, f"https://www.facebook.com/{body.channels.facebook}") background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, body.channels.facebook)
if nb_id: if nb_id:
background_tasks.add_task(collect_naver_blog, analysis_run_id, nb_id, body.channels.naver_blog) background_tasks.add_task(collect_naver_blog, analysis_run_id, nb_id, body.channels.naver_blog)
if yt_id: if yt_id:
@ -54,7 +56,10 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse) @router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
async def get_analysis_status(run_id: str): async def get_analysis_status(run_id: str):
logger.info("GET /api/analyses/%s/status", run_id)
row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,)) row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,))
if not row:
raise HTTPException(status_code=404, detail="Run not found")
return AnalysisStatusResponse( return AnalysisStatusResponse(
analysis_run_id=run_id, analysis_run_id=run_id,
status=AnalysisStatus(row["status"]), status=AnalysisStatus(row["status"]),

View File

@ -1,23 +0,0 @@
from fastapi import APIRouter, Depends
from common.deps import verify_api_key
from models.channel import ChannelVerifyRequest, ChannelVerifyResponse
router = APIRouter(prefix="/api/channels", tags=["channels"], dependencies=[Depends(verify_api_key)])
# will not use
@router.post("/verify", response_model=ChannelVerifyResponse)
async def verify_channels(body: ChannelVerifyRequest):
return ChannelVerifyResponse(
youtube={
"handle": body.youtube,
"verified": True,
"display_name": "바노바기 BANOBAGI",
"followers": 12345,
} if body.youtube else None,
instagram=[
{"handle": handle, "verified": "unverifiable", "note": "Instagram 로그인 벽"}
for handle in body.instagram
] if body.instagram else None,
)

View File

@ -1,28 +1,64 @@
import logging
import uuid6 import uuid6
from fastapi import APIRouter, Depends, status from fastapi import APIRouter, Depends, HTTPException, status
from common.deps import verify_api_key from common.deps import verify_api_key
from common.db import insert_hospital from common.db import insert_hospital, fetchone
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicHistoryResponse, RunSummary from common.utils import get_env
from integrations.firecrawl import FirecrawlClient
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicResponse, ClinicHistoryResponse, RunSummary
router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
_REQUIRED_FIELDS = ["clinicName"]
_COLLECTED_FIELDS = ["clinicName", "clinicNameEn", "address", "phone", "slogan", "services", "doctors"]
@router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse) @router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse)
async def create_clinic(body: ClinicCreate): async def create_clinic(body: ClinicCreate):
logger.info("POST /api/clinics url=%s", body.url)
info = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(body.url)
missing = [f for f in _COLLECTED_FIELDS if not (info or {}).get(f)]
required_missing = [f for f in _REQUIRED_FIELDS if f in missing]
if required_missing:
raise HTTPException(status_code=404, detail={"missing": missing})
hospital_id = str(uuid6.uuid7()) hospital_id = str(uuid6.uuid7())
row = await insert_hospital(hospital_id, body.name, body.name_en, body.address, body.url) row = await insert_hospital(
hospital_id,
name=info["clinicName"],
name_en=info.get("clinicNameEn"),
road_address=info.get("address"),
url=body.url,
raw_data=info,
)
return ClinicCreateResponse( return ClinicCreateResponse(
id=hospital_id, id=hospital_id,
url=body.url, url=body.url,
name=body.name, name=info["clinicName"],
created_at=str(row["created_at"]), created_at=str(row["created_at"]),
) )
@router.get("/{hospital_id}", response_model=ClinicResponse)
async def get_clinic(hospital_id: str):
logger.info("GET /api/clinics/%s", hospital_id)
row = await fetchone(
"SELECT hospital_id, hospital_name, hospital_name_en, road_address, url, status, raw_data, created_at, updated_at"
" FROM hospital_baseinfo WHERE hospital_id = %s",
(hospital_id,),
)
if not row:
raise HTTPException(status_code=404, detail="Clinic not found")
return ClinicResponse(**{**row, "created_at": str(row["created_at"]), "updated_at": str(row["updated_at"])})
# Not done # Not done
@router.get("/{id}/history", response_model=ClinicHistoryResponse) @router.get("/{id}/history", response_model=ClinicHistoryResponse)
async def get_clinic_history(id: str): async def get_clinic_history(id: str):
logger.info("GET /api/clinics/%s/history", id)
return ClinicHistoryResponse( return ClinicHistoryResponse(
clinic_id=id, clinic_id=id,
runs=[ runs=[

View File

@ -1,12 +1,15 @@
import logging
from fastapi import APIRouter, Depends, status from fastapi import APIRouter, Depends, status
from common.deps import verify_api_key from common.deps import verify_api_key
from models.plan import PlanCreate, PlanResponse from models.plan import PlanCreate, PlanResponse
router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@router.post("", status_code=status.HTTP_201_CREATED, response_model=PlanResponse) @router.post("", status_code=status.HTTP_201_CREATED, response_model=PlanResponse)
async def create_plan(body: PlanCreate): async def create_plan(body: PlanCreate):
logger.info("POST /api/plans run_id=%s", body.analysis_run_id)
return PlanResponse( return PlanResponse(
id="33333333-3333-3333-3333-333333333333", id="33333333-3333-3333-3333-333333333333",
analysis_run_id="22222222-2222-2222-2222-222222222222", analysis_run_id="22222222-2222-2222-2222-222222222222",
@ -20,6 +23,7 @@ async def create_plan(body: PlanCreate):
@router.get("/{id}", response_model=PlanResponse) @router.get("/{id}", response_model=PlanResponse)
async def get_plan(id: str): async def get_plan(id: str):
logger.info("GET /api/plans/%s", id)
return PlanResponse( return PlanResponse(
id=id, id=id,
analysis_run_id="22222222-2222-2222-2222-222222222222", analysis_run_id="22222222-2222-2222-2222-222222222222",

View File

@ -1,24 +1,24 @@
from fastapi import APIRouter, Depends import json
import logging
from fastapi import APIRouter, Depends, HTTPException, Response
from common.db import fetchone
from common.deps import verify_api_key from common.deps import verify_api_key
from models.report import ReportResponse, ClinicInfo from integrations.llm.schemas.report import ReportOutput
router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@router.get("/{run_id}", response_model=ReportResponse) @router.get("/{run_id}", response_model=ReportOutput | None)
async def get_report(run_id: str): async def get_report(run_id: str):
return ReportResponse( logger.info("GET /api/reports/%s", run_id)
id=run_id, row = await fetchone(
clinic=ClinicInfo(name="바노바기성형외과", url="https://www.banobagi.com"), "SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s",
overall_score=82, (run_id,),
youtube={},
instagram={},
facebook={},
naver_place={},
naver_blog={},
gangnam_unni={},
conversion_strategy={},
roadmap=[],
kpis=[],
generated_at="2026-04-20T09:01:30Z",
) )
if row is None:
raise HTTPException(status_code=404, detail="Run not found")
if row["report_data"] is None:
return Response(status_code=204)
data = json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"]
return ReportOutput(**data)

View File

@ -76,6 +76,13 @@ async def insert_analysis_run(
async def save_analysis_report(analysis_run_id: str, data: dict) -> None:
await execute(
"UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
(json.dumps(data, ensure_ascii=False), analysis_run_id),
)
async def is_done(table: str, row_id: int | None) -> bool: async def is_done(table: str, row_id: int | None) -> bool:
if row_id is None: if row_id is None:
return True return True
@ -83,6 +90,30 @@ async def is_done(table: str, row_id: int | None) -> bool:
return r["status"] == "done" return r["status"] == "done"
async def _fetch_raw(table: str, row_id: int | None) -> dict | None:
if row_id is None:
return None
row = await fetchone(f"SELECT raw_data FROM {table} WHERE id = %s", (row_id,))
if not row or not row["raw_data"]:
return None
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
async def get_analysis_raw_data(analysis_run_id: str) -> dict:
run = await fetchone(
"SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
" FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
return {
"instagram": await _fetch_raw("instagram_data", run["instagram_data_id"]),
"facebook": await _fetch_raw("facebook_data", run["facebook_data_id"]),
"naver_blog": await _fetch_raw("naver_blog_data", run["naver_blog_data_id"]),
"youtube": await _fetch_raw("youtube_data", run["youtube_data_id"]),
"gangnam_unni": await _fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]),
}
async def set_instagram_status(row_id: int, status: str) -> None: async def set_instagram_status(row_id: int, status: str) -> None:
await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id)) await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id))
@ -129,14 +160,37 @@ async def insert_hospital(
name_en: str | None = None, name_en: str | None = None,
road_address: str | None = None, road_address: str | None = None,
site_address: str | None = None, site_address: str | None = None,
url: str | None = None,
raw_data: dict | None = None,
owner_user_id: int = 0, owner_user_id: int = 0,
brn: str = "", brn: str = "",
) -> dict: ) -> dict:
await execute( await execute(
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, owner_user_id, brn) VALUES (%s, %s, %s, %s, %s, %s, %s)", "INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, url, raw_data, status, owner_user_id, brn)"
(hospital_id, name, name_en, road_address, site_address, owner_user_id, brn), " VALUES (%s, %s, %s, %s, %s, %s, %s, 'done', %s, %s)",
(hospital_id, name, name_en, road_address, site_address, url,
json.dumps(raw_data, ensure_ascii=False) if raw_data else None,
owner_user_id, brn),
) )
return await fetchone( return await fetchone(
"SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s", "SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
(hospital_id,), (hospital_id,),
) )
async def save_hospital_raw_data(hospital_id: str, data: dict) -> None:
await execute(
"UPDATE hospital_baseinfo"
" SET raw_data = %s, status = 'done',"
" hospital_name = COALESCE(%s, hospital_name),"
" hospital_name_en = COALESCE(%s, hospital_name_en),"
" road_address = COALESCE(%s, road_address)"
" WHERE hospital_id = %s",
(
json.dumps(data, ensure_ascii=False),
data.get("clinicName"),
data.get("clinicNameEn"),
data.get("address"),
hospital_id,
),
)

View File

@ -2,7 +2,5 @@ import os
from fastapi import Header, HTTPException from fastapi import Header, HTTPException
async def verify_api_key(x_api_key: str = Header(...)): async def verify_api_key(x_api_key: str = Header(...)):
print(x_api_key)
print(os.getenv("API_KEY"))
if x_api_key != os.getenv("API_KEY"): if x_api_key != os.getenv("API_KEY"):
raise HTTPException(status_code=401, detail="Invalid API Key") raise HTTPException(status_code=401, detail="Invalid API Key")

View File

@ -1,4 +1,5 @@
from http import HTTPMethod from http import HTTPMethod
from urllib.parse import urlparse
from common.utils import http_request from common.utils import http_request
APIFY_BASE = "https://api.apify.com/v2" APIFY_BASE = "https://api.apify.com/v2"
@ -32,8 +33,9 @@ class ApifyClient:
return [] return []
return items_resp.json() return items_resp.json()
async def fetch_instagram_profile(self, handle: str) -> dict | None: async def fetch_instagram_profile(self, url: str) -> dict | None:
items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [handle], "resultsLimit": 12}) username = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url.lstrip("@")
items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [username], "resultsLimit": 12})
return items[0] if items else None return items[0] if items else None
async def get_instagram_profile(self, handle: str) -> dict | None: async def get_instagram_profile(self, handle: str) -> dict | None:
@ -131,7 +133,7 @@ class ApifyClient:
if not page: if not page:
return None return None
return { return {
"pageName": page["title"], "pageName": page.get("title") or page.get("name"),
"pageUrl": page.get("pageUrl", page_url), "pageUrl": page.get("pageUrl", page_url),
"followers": page.get("followers", 0), "followers": page.get("followers", 0),
"likes": page.get("likes", 0), "likes": page.get("likes", 0),

View File

@ -67,6 +67,88 @@ class FirecrawlClient:
return [] return []
return (data.get("json") or {}).get("socialLinks", []) return (data.get("json") or {}).get("socialLinks", [])
async def fetch_clinic_info(self, url: str) -> dict | None:
resp = await http_request(
HTTPMethod.POST,
url=f"{FIRECRAWL_BASE}/scrape",
headers=self._headers(),
json_body={
"url": url,
"formats": ["json", "links"],
"jsonOptions": {
"prompt": "Extract: clinic name (Korean), clinic name (English), address, phone, business hours, slogan, services offered, doctors with name/title/specialty, brand identity (primary/accent/background/text colors in hex, heading/body fonts, logo URL, favicon URL)",
"schema": {
"type": "object",
"properties": {
"clinicName": {"type": "string"},
"clinicNameEn": {"type": "string"},
"address": {"type": "string"},
"phone": {"type": "string"},
"businessHours": {"type": "string"},
"slogan": {"type": "string"},
"services": {"type": "array", "items": {"type": "string"}},
"doctors": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"title": {"type": "string"},
"specialty": {"type": "string"},
},
},
},
# "socialMedia": {
# "type": "object",
# "properties": {
# "instagram": {"type": "string"},
# "youtube": {"type": "string"},
# "blog": {"type": "string"},
# "facebook": {"type": "string"},
# "tiktok": {"type": "string"},
# "kakao": {"type": "string"},
# },
# },
"branding": {
"type": "object",
"properties": {
"primaryColor": {"type": "string"},
"accentColor": {"type": "string"},
"backgroundColor": {"type": "string"},
"textColor": {"type": "string"},
"headingFont": {"type": "string"},
"bodyFont": {"type": "string"},
"logoUrl": {"type": "string"},
"faviconUrl": {"type": "string"},
},
},
},
},
},
"waitFor": 5000,
},
timeout=60,
label="firecrawl-clinic-info",
)
if not resp or not resp.is_success:
return None
data = resp.json().get("data") or {}
info = data.get("json") or {}
return {
"clinicName": info.get("clinicName"),
"clinicNameEn": info.get("clinicNameEn"),
"address": info.get("address"),
"phone": info.get("phone"),
"businessHours": info.get("businessHours"),
"slogan": info.get("slogan"),
"services": info.get("services", []),
"doctors": info.get("doctors", []),
# "socialMedia": info.get("socialMedia", {}),
"branding": info.get("branding", {}),
"siteLinks": data.get("links", []),
"sourceUrl": url,
}
async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None: async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None:
resp = await http_request( resp = await http_request(
HTTPMethod.POST, HTTPMethod.POST,

View File

@ -1,3 +1,3 @@
from .service import LLMService from .llm_service import LLMService
from .prompt import Prompt from .prompt import Prompt

View File

@ -0,0 +1,76 @@
from pydantic import BaseModel
from openai import AsyncOpenAI
from common.utils import get_env
from .prompt import Prompt
class LLMResponseError(Exception):
def __init__(self, status: str, code: str = None, message: str = None):
self.status = status
self.code = code
self.message = message
super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
class LLMService:
def __init__(self, provider: str = "openai", max_retries: int = 2):
self.provider = provider
self.max_retries = max_retries
match provider:
case "openai":
self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
case "perplexity":
self.client = AsyncOpenAI(
api_key=get_env("PERPLEXITY_API_KEY"),
base_url="https://api.perplexity.ai",
)
case "gemini":
self.client = AsyncOpenAI(
api_key=get_env("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
)
case _:
raise NotImplementedError(f"Unknown provider: {provider}")
async def generate(
self,
prompt: Prompt,
input_data: dict,
) -> BaseModel:
prompt_text = prompt.build(input_data)
last_error = None
for attempt in range(self.max_retries + 1):
if self.provider == "perplexity":
response = await self.client.chat.completions.create(
model=prompt.model,
messages=[{"role": "user", "content": prompt_text}],
response_format={
"type": "json_schema",
"json_schema": {"name": prompt.output_class.__name__, "schema": prompt.output_class.model_json_schema()},
},
)
choice = response.choices[0]
if choice.finish_reason == "stop":
return prompt.output_class.model_validate_json(choice.message.content)
last_error = LLMResponseError("failed", choice.finish_reason, f"unexpected finish_reason: {choice.finish_reason}")
else:
response = await self.client.beta.chat.completions.parse(
model=prompt.model,
messages=[{"role": "user", "content": prompt_text}],
response_format=prompt.output_class,
)
choice = response.choices[0]
finish_reason = choice.finish_reason
if finish_reason == "stop":
return choice.message.parsed
if finish_reason == "length":
last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
elif finish_reason == "content_filter":
last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
else:
last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
raise last_error

View File

@ -1,19 +1,40 @@
import os
from pydantic import BaseModel from pydantic import BaseModel
from common.utils import get_env
from integrations.llm.schemas.report import ReportInput, ReportOutput
_PROMPT_DIR = os.path.join(os.path.dirname(__file__), "temp-prompt")
class Prompt: class Prompt:
def __init__( file_name: str
self, prompt_model: str
template: str, input_class: type[BaseModel]
model: str, output_class: type[BaseModel]
input_class: type[BaseModel],
output_class: type[BaseModel], def __init__(self, file_name: str, prompt_model: str, input_class: type[BaseModel], output_class: type[BaseModel]):
): self.file_name = file_name
self.template = template self.prompt_model = prompt_model
self.model = model
self.input_class = input_class self.input_class = input_class
self.output_class = output_class self.output_class = output_class
self.template, self.model = self._load_prompt()
def _load_prompt(self) -> tuple[str, str]:
with open(os.path.join(_PROMPT_DIR, self.file_name), encoding="utf-8") as f:
template = f.read()
return template, get_env(self.prompt_model)
def _reload_prompt(self):
self.template, self.model = self._load_prompt()
def build(self, input_data: dict) -> str: def build(self, input_data: dict) -> str:
verified = self.input_class(**input_data) verified = self.input_class(**input_data)
return self.template.format(**verified.model_dump()) return self.template.format(**verified.model_dump())
report_prompt = Prompt(
file_name="report_prompt.txt",
prompt_model="REPORT_MODEL",
input_class=ReportInput,
output_class=ReportOutput,
)

View File

@ -0,0 +1,43 @@
from pydantic import BaseModel
# template.format(**model_dump()) 에 삽입될 변수들
# 각 채널 raw_data를 호출부에서 json.dumps()로 직렬화해서 넘겨야 함
class ReportInput(BaseModel):
clinic_name: str | None = None
clinic_name_en: str | None = None
address: str | None = None
phone: str | None = None
slogan: str | None = None
services: str | None = None
doctors: str | None = None
instagram: str | None = None
facebook: str | None = None
naver_blog: str | None = None
youtube: str | None = None
gangnam_unni: str | None = None
class ChannelScore(BaseModel):
score: int
summary: str
strengths: list[str]
weaknesses: list[str]
class ConversionStrategy(BaseModel):
summary: str
actions: list[str]
# response_format으로 OpenAI structured output에 전달 — dict 필드 사용 불가
class ReportOutput(BaseModel):
overall_score: int
instagram: ChannelScore | None = None
facebook: ChannelScore | None = None
naver_blog: ChannelScore | None = None
youtube: ChannelScore | None = None
gangnam_unni: ChannelScore | None = None
conversion_strategy: ConversionStrategy
roadmap: list[str]
kpis: list[str]

View File

@ -1,61 +0,0 @@
from pydantic import BaseModel
from openai import AsyncOpenAI
from common.utils import get_env
from .prompt import Prompt
class LLMResponseError(Exception):
def __init__(self, status: str, code: str = None, message: str = None):
self.status = status
self.code = code
self.message = message
super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
class LLMService:
def __init__(self, provider: str = "openai", max_retries: int = 2):
self.max_retries = max_retries
match provider:
case "openai":
self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
case "perplexity":
self.client = AsyncOpenAI(
api_key=get_env("PERPLEXITY_API_KEY"),
base_url="https://api.perplexity.ai",
)
case "gemini":
self.client = AsyncOpenAI(
api_key=get_env("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
)
case _:
raise NotImplementedError(f"Unknown provider: {provider}")
async def generate(
self,
prompt: Prompt,
input_data: dict,
) -> BaseModel:
prompt_text = prompt.build(input_data)
last_error = None
for attempt in range(self.max_retries + 1):
response = await self.client.beta.chat.completions.parse(
model=prompt.model,
messages=[{"role": "user", "content": prompt_text}],
response_format=prompt.output_class,
)
choice = response.choices[0]
finish_reason = choice.finish_reason
if finish_reason == "stop":
return choice.message.parsed
if finish_reason == "length":
last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
elif finish_reason == "content_filter":
last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
else:
last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
raise last_error

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,38 @@
당신은 프리미엄 의료 마케팅 전문 분석가입니다. 아래 실제 수집된 데이터를 기반으로 종합 마케팅 리포트를 생성해주세요.
결과물은 한국어로 생성하세요
⚠️ 중요: 데이터가 null인 채널은 해당 항목을 null로 설정하세요. 데이터에 없는 수치는 절대 추측하지 마세요.
## 병원 기본 정보
- 병원명: {clinic_name}
- 영문명: {clinic_name_en}
- 주소: {address}
- 전화: {phone}
- 슬로건: {slogan}
- 시술: {services}
- 의료진: {doctors}
## 채널 데이터
### 인스타그램
{instagram}
### 페이스북
{facebook}
### 네이버 블로그
{naver_blog}
### 유튜브
{youtube}
### 강남언니
{gangnam_unni}
## 분석 지침
- 점수는 0~100 기준입니다.
- strengths와 weaknesses는 각 3개 이상 작성하세요.
- roadmap은 우선순위 순으로 실행 가능한 액션으로 작성하세요.
- kpis는 실제 수집된 수치 기반으로 현실적인 측정 가능 지표로 작성하세요.
- conversion_strategy의 actions는 구체적인 실행 방안으로 작성하세요.

View File

@ -1,5 +1,6 @@
import re import re
from http import HTTPMethod from http import HTTPMethod
from urllib.parse import urlparse
from common.utils import http_request from common.utils import http_request
NAVER_BASE = "https://openapi.naver.com/v1/search" NAVER_BASE = "https://openapi.naver.com/v1/search"
@ -52,10 +53,10 @@ class NaverClient:
return [] return []
return resp.json().get("items", []) return resp.json().get("items", [])
async def fetch_blog_rss(self, blog_handle: str) -> str | None: async def fetch_blog_rss(self, handle: str) -> str | None:
resp = await http_request( resp = await http_request(
HTTPMethod.GET, HTTPMethod.GET,
url=f"https://rss.blog.naver.com/{blog_handle}.xml", url=f"https://rss.blog.naver.com/{handle}.xml",
timeout=15, timeout=15,
label="naver-rss", label="naver-rss",
) )
@ -63,7 +64,8 @@ class NaverClient:
return None return None
return resp.text return resp.text
async def get_blog_rss(self, blog_handle: str) -> dict | None: async def get_blog_rss(self, url: str) -> dict | None:
blog_handle = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url
xml = await self.fetch_blog_rss(blog_handle) xml = await self.fetch_blog_rss(blog_handle)
if not xml: if not xml:
return None return None

View File

@ -1,4 +1,5 @@
from http import HTTPMethod from http import HTTPMethod
from urllib.parse import urlparse
from common.utils import http_request from common.utils import http_request
YT = "https://www.googleapis.com/youtube/v3" YT = "https://www.googleapis.com/youtube/v3"
@ -9,7 +10,7 @@ class YouTubeClient:
self.api_key = api_key self.api_key = api_key
async def _resolve_channel_id(self, handle: str) -> str: async def _resolve_channel_id(self, handle: str) -> str:
h = handle.lstrip("@") h = urlparse(handle).path.strip("/").lstrip("@") if "://" in handle else handle.lstrip("@")
if h.startswith("UC") and len(h) == 24: if h.startswith("UC") and len(h) == 24:
return h return h
for param in ("forHandle", "forUsername"): for param in ("forHandle", "forUsername"):
@ -47,14 +48,7 @@ class YouTubeClient:
resp = await http_request( resp = await http_request(
HTTPMethod.GET, HTTPMethod.GET,
url=f"{YT}/search", url=f"{YT}/search",
params={ params={"part": "snippet", "channelId": channel_id, "order": "viewCount", "type": "video", "maxResults": 10, "key": self.api_key},
"part": "snippet",
"channelId": channel_id,
"order": "viewCount",
"type": "video",
"maxResults": 10,
"key": self.api_key,
},
label="yt-search", label="yt-search",
) )
if resp and resp.is_success: if resp and resp.is_success:
@ -65,11 +59,7 @@ class YouTubeClient:
resp = await http_request( resp = await http_request(
HTTPMethod.GET, HTTPMethod.GET,
url=f"{YT}/videos", url=f"{YT}/videos",
params={ params={"part": "snippet,statistics,contentDetails", "id": ",".join(video_ids), "key": self.api_key},
"part": "snippet,statistics,contentDetails",
"id": ",".join(video_ids),
"key": self.api_key,
},
label="yt-videos", label="yt-videos",
) )
if resp and resp.is_success: if resp and resp.is_success:

View File

@ -1,9 +1,11 @@
import logging
from fastapi import FastAPI from fastapi import FastAPI
from dotenv import load_dotenv from dotenv import load_dotenv
from api import routers from api import routers
import os import os
load_dotenv() load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
app = FastAPI() app = FastAPI()

View File

@ -16,8 +16,7 @@ class AnalysisOptions(BaseModel):
class AnalysisCreate(BaseModel): class AnalysisCreate(BaseModel):
clinic_id: str | None = None clinic_id: str
url: str | None = None
channels: Channels channels: Channels
options: AnalysisOptions = AnalysisOptions() options: AnalysisOptions = AnalysisOptions()

View File

@ -1,12 +0,0 @@
from pydantic import BaseModel
from typing import Any
class ChannelVerifyRequest(BaseModel):
youtube: str | None = None
instagram: list[str] | None = None
class ChannelVerifyResponse(BaseModel):
youtube: dict[str, Any] | None = None
instagram: list[dict[str, Any]] | None = None

View File

@ -3,9 +3,18 @@ from pydantic import BaseModel
class ClinicCreate(BaseModel): class ClinicCreate(BaseModel):
url: str url: str
name: str
name_en: str | None = None
address: str | None = None class ClinicResponse(BaseModel):
hospital_id: str
hospital_name: str
hospital_name_en: str | None
road_address: str | None
url: str | None
status: str
raw_data: dict | None
created_at: str
updated_at: str
class ClinicCreateResponse(BaseModel): class ClinicCreateResponse(BaseModel):

View File

@ -1,4 +1,50 @@
from common.db import fetchone, execute, is_done import asyncio
import json
import logging
from common.db import fetchone, execute, is_done, get_analysis_raw_data, save_analysis_report
from integrations.llm.llm_service import LLMService
from integrations.llm.prompt import report_prompt
from integrations.llm.schemas.report import ReportOutput
logger = logging.getLogger(__name__)
async def generate_report(analysis_run_id: str) -> ReportOutput:
run = await fetchone(
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
clinic_row = await fetchone(
"SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
(run["hospital_id"],),
)
raw_data = clinic_row["raw_data"] if clinic_row else None
clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
raw = await get_analysis_raw_data(analysis_run_id)
input_data = {
"clinic_name": clinic.get("clinicName"),
"clinic_name_en": clinic.get("clinicNameEn"),
"address": clinic.get("address"),
"phone": clinic.get("phone"),
"slogan": clinic.get("slogan"),
"services": json.dumps(clinic.get("services", []), ensure_ascii=False),
"doctors": json.dumps(clinic.get("doctors", []), ensure_ascii=False),
**{
channel: json.dumps(data, ensure_ascii=False) if data else None
for channel, data in raw.items()
},
}
return await LLMService(provider="perplexity").generate(report_prompt, input_data)
async def run_report_task(analysis_run_id: str) -> None:
logger.info("[report] start run=%s", analysis_run_id)
result = await generate_report(analysis_run_id)
await save_analysis_report(analysis_run_id, result.model_dump())
await execute("UPDATE analysis_runs SET status = 'completed' WHERE analysis_run_id = %s", (analysis_run_id,))
logger.info("[report] done run=%s", analysis_run_id)
async def check_and_advance_analysis(analysis_run_id: str) -> None: async def check_and_advance_analysis(analysis_run_id: str) -> None:
@ -16,3 +62,4 @@ async def check_and_advance_analysis(analysis_run_id: str) -> None:
] ]
if all(results): if all(results):
await execute("UPDATE analysis_runs SET status = 'analyzing' WHERE analysis_run_id = %s", (analysis_run_id,)) await execute("UPDATE analysis_runs SET status = 'analyzing' WHERE analysis_run_id = %s", (analysis_run_id,))
asyncio.create_task(run_report_task(analysis_run_id))

View File

@ -1,53 +1,68 @@
import logging
from common.db import ( from common.db import (
set_instagram_status, save_instagram_raw_data, set_instagram_status, save_instagram_raw_data,
set_facebook_status, save_facebook_raw_data, set_facebook_status, save_facebook_raw_data,
set_naver_blog_status, save_naver_blog_raw_data, set_naver_blog_status, save_naver_blog_raw_data,
set_youtube_status, save_youtube_raw_data, set_youtube_status, save_youtube_raw_data,
set_gangnam_unni_status, save_gangnam_unni_raw_data, set_gangnam_unni_status, save_gangnam_unni_raw_data,
execute, save_hospital_raw_data,
) )
from common.utils import get_env, normalize_handle from common.utils import get_env
from services.analysis import check_and_advance_analysis from services.analysis import check_and_advance_analysis
from integrations.apify import ApifyClient from integrations.apify import ApifyClient
from integrations.naver import NaverClient from integrations.naver import NaverClient
from integrations.youtube import YouTubeClient from integrations.youtube import YouTubeClient
from integrations.firecrawl import FirecrawlClient from integrations.firecrawl import FirecrawlClient
logger = logging.getLogger(__name__)
async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None: async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a insta") logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
await set_instagram_status(row_id, "processing") await set_instagram_status(row_id, "processing")
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_instagram_profile(normalize_handle("instagram", url)) data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_instagram_profile(url)
await save_instagram_raw_data(row_id, data) await save_instagram_raw_data(row_id, data)
logger.info("[instagram] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id) await check_and_advance_analysis(analysis_run_id)
async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None: async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a facebook") logger.info("[facebook] start run=%s url=%s", analysis_run_id, url)
await set_facebook_status(row_id, "processing") await set_facebook_status(row_id, "processing")
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_facebook_page(url) data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_facebook_page(url)
await save_facebook_raw_data(row_id, data) await save_facebook_raw_data(row_id, data)
logger.info("[facebook] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id) await check_and_advance_analysis(analysis_run_id)
async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None: async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a blog") logger.info("[naver_blog] start run=%s url=%s", analysis_run_id, url)
await set_naver_blog_status(row_id, "processing") await set_naver_blog_status(row_id, "processing")
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).fetch_blog_rss(normalize_handle("naver_blog", url)) data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_blog_rss(url)
await save_naver_blog_raw_data(row_id, data) await save_naver_blog_raw_data(row_id, data)
logger.info("[naver_blog] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id) await check_and_advance_analysis(analysis_run_id)
async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None: async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a youtube") logger.info("[youtube] start run=%s url=%s", analysis_run_id, url)
await set_youtube_status(row_id, "processing") await set_youtube_status(row_id, "processing")
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).fetch_channel(normalize_handle("youtube", url)) data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).get_channel(url)
await save_youtube_raw_data(row_id, data) await save_youtube_raw_data(row_id, data)
logger.info("[youtube] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id) await check_and_advance_analysis(analysis_run_id)
async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None: async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None:
print("start a gangnam_unni") logger.info("[gangnam_unni] start run=%s url=%s", analysis_run_id, url)
await set_gangnam_unni_status(row_id, "processing") await set_gangnam_unni_status(row_id, "processing")
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_gangnam_unni(url) data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).get_gangnam_unni(url)
await save_gangnam_unni_raw_data(row_id, data) await save_gangnam_unni_raw_data(row_id, data)
logger.info("[gangnam_unni] done run=%s", analysis_run_id)
await check_and_advance_analysis(analysis_run_id) await check_and_advance_analysis(analysis_run_id)
async def collect_clinic_info(hospital_id: str, url: str) -> None:
await execute("UPDATE hospital_baseinfo SET status = 'processing' WHERE hospital_id = %s", (hospital_id,))
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(url)
await save_hospital_raw_data(hospital_id, data)