llm 붙임 및 리포트 생성 확인
parent
26cd946e1b
commit
2b8a90e857
|
|
@ -65,6 +65,9 @@ CREATE TABLE hospital_baseinfo
|
|||
`brn` VARCHAR(50) NOT NULL,
|
||||
`road_address` VARCHAR(100) NULL,
|
||||
`site_address` VARCHAR(100) NULL,
|
||||
`url` VARCHAR(500) NULL,
|
||||
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
|
||||
`raw_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (hospital_id)
|
||||
|
|
@ -133,6 +136,8 @@ CREATE TABLE analysis_runs
|
|||
`naver_blog_data_id` INT NULL,
|
||||
`youtube_data_id` INT NULL,
|
||||
`gangnam_unni_data_id` INT NULL,
|
||||
`report_data` JSON NULL,
|
||||
`plan_data` JSON NULL,
|
||||
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (analysis_run_id)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,5 @@ from .clinics import router as clinics_router
|
|||
from .analyses import router as analyses_router
|
||||
from .reports import router as reports_router
|
||||
from .plans import router as plans_router
|
||||
from .channels import router as channels_router
|
||||
|
||||
routers = [clinics_router, analyses_router, reports_router, plans_router, channels_router]
|
||||
routers = [clinics_router, analyses_router, reports_router, plans_router]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import logging
|
||||
import uuid6
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException
|
||||
from common.deps import verify_api_key
|
||||
|
|
@ -7,10 +8,12 @@ from models.status import AnalysisStatus
|
|||
from services.collect import collect_instagram, collect_facebook, collect_naver_blog, collect_youtube, collect_gangnam_unni
|
||||
|
||||
router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse)
|
||||
async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks):
|
||||
logger.info("POST /api/analyses clinic_id=%s", body.clinic_id)
|
||||
analysis_run_id = str(uuid6.uuid7())
|
||||
hospital_id = body.clinic_id
|
||||
|
||||
|
|
@ -23,8 +26,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
|||
raise HTTPException(status_code=409, detail="Clinic not found")
|
||||
owner_user_id = hospital["owner_user_id"] if hospital else 0
|
||||
|
||||
ig_url = body.channels.instagram[0] if isinstance(body.channels.instagram, list) else body.channels.instagram
|
||||
ig_id = await insert_instagram_row(hospital_id, ig_url) if ig_url else None
|
||||
ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None
|
||||
fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None
|
||||
nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None
|
||||
yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None
|
||||
|
|
@ -33,9 +35,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
|||
analysis_run_id = await insert_analysis_run(analysis_run_id, hospital_id, owner_user_id, ig_id, fb_id, nb_id, yt_id, gu_id)
|
||||
|
||||
if ig_id:
|
||||
background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, ig_url)
|
||||
background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, body.channels.instagram)
|
||||
if fb_id:
|
||||
background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, f"https://www.facebook.com/{body.channels.facebook}")
|
||||
background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, body.channels.facebook)
|
||||
if nb_id:
|
||||
background_tasks.add_task(collect_naver_blog, analysis_run_id, nb_id, body.channels.naver_blog)
|
||||
if yt_id:
|
||||
|
|
@ -54,7 +56,10 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
|||
|
||||
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
|
||||
async def get_analysis_status(run_id: str):
|
||||
logger.info("GET /api/analyses/%s/status", run_id)
|
||||
row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,))
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
return AnalysisStatusResponse(
|
||||
analysis_run_id=run_id,
|
||||
status=AnalysisStatus(row["status"]),
|
||||
|
|
|
|||
|
|
@ -1,23 +0,0 @@
|
|||
from fastapi import APIRouter, Depends
|
||||
from common.deps import verify_api_key
|
||||
from models.channel import ChannelVerifyRequest, ChannelVerifyResponse
|
||||
|
||||
router = APIRouter(prefix="/api/channels", tags=["channels"], dependencies=[Depends(verify_api_key)])
|
||||
|
||||
|
||||
# will not use
|
||||
|
||||
@router.post("/verify", response_model=ChannelVerifyResponse)
|
||||
async def verify_channels(body: ChannelVerifyRequest):
|
||||
return ChannelVerifyResponse(
|
||||
youtube={
|
||||
"handle": body.youtube,
|
||||
"verified": True,
|
||||
"display_name": "바노바기 BANOBAGI",
|
||||
"followers": 12345,
|
||||
} if body.youtube else None,
|
||||
instagram=[
|
||||
{"handle": handle, "verified": "unverifiable", "note": "Instagram 로그인 벽"}
|
||||
for handle in body.instagram
|
||||
] if body.instagram else None,
|
||||
)
|
||||
|
|
@ -1,28 +1,64 @@
|
|||
import logging
|
||||
import uuid6
|
||||
from fastapi import APIRouter, Depends, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from common.deps import verify_api_key
|
||||
from common.db import insert_hospital
|
||||
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicHistoryResponse, RunSummary
|
||||
from common.db import insert_hospital, fetchone
|
||||
from common.utils import get_env
|
||||
from integrations.firecrawl import FirecrawlClient
|
||||
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicResponse, ClinicHistoryResponse, RunSummary
|
||||
|
||||
router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_REQUIRED_FIELDS = ["clinicName"]
|
||||
_COLLECTED_FIELDS = ["clinicName", "clinicNameEn", "address", "phone", "slogan", "services", "doctors"]
|
||||
|
||||
|
||||
@router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse)
|
||||
async def create_clinic(body: ClinicCreate):
|
||||
logger.info("POST /api/clinics url=%s", body.url)
|
||||
info = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(body.url)
|
||||
|
||||
missing = [f for f in _COLLECTED_FIELDS if not (info or {}).get(f)]
|
||||
required_missing = [f for f in _REQUIRED_FIELDS if f in missing]
|
||||
if required_missing:
|
||||
raise HTTPException(status_code=404, detail={"missing": missing})
|
||||
|
||||
hospital_id = str(uuid6.uuid7())
|
||||
row = await insert_hospital(hospital_id, body.name, body.name_en, body.address, body.url)
|
||||
row = await insert_hospital(
|
||||
hospital_id,
|
||||
name=info["clinicName"],
|
||||
name_en=info.get("clinicNameEn"),
|
||||
road_address=info.get("address"),
|
||||
url=body.url,
|
||||
raw_data=info,
|
||||
)
|
||||
return ClinicCreateResponse(
|
||||
id=hospital_id,
|
||||
url=body.url,
|
||||
name=body.name,
|
||||
name=info["clinicName"],
|
||||
created_at=str(row["created_at"]),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{hospital_id}", response_model=ClinicResponse)
|
||||
async def get_clinic(hospital_id: str):
|
||||
logger.info("GET /api/clinics/%s", hospital_id)
|
||||
row = await fetchone(
|
||||
"SELECT hospital_id, hospital_name, hospital_name_en, road_address, url, status, raw_data, created_at, updated_at"
|
||||
" FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Clinic not found")
|
||||
return ClinicResponse(**{**row, "created_at": str(row["created_at"]), "updated_at": str(row["updated_at"])})
|
||||
|
||||
|
||||
# Not done
|
||||
|
||||
@router.get("/{id}/history", response_model=ClinicHistoryResponse)
|
||||
async def get_clinic_history(id: str):
|
||||
logger.info("GET /api/clinics/%s/history", id)
|
||||
return ClinicHistoryResponse(
|
||||
clinic_id=id,
|
||||
runs=[
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
import logging
|
||||
from fastapi import APIRouter, Depends, status
|
||||
from common.deps import verify_api_key
|
||||
from models.plan import PlanCreate, PlanResponse
|
||||
|
||||
router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.post("", status_code=status.HTTP_201_CREATED, response_model=PlanResponse)
|
||||
async def create_plan(body: PlanCreate):
|
||||
logger.info("POST /api/plans run_id=%s", body.analysis_run_id)
|
||||
return PlanResponse(
|
||||
id="33333333-3333-3333-3333-333333333333",
|
||||
analysis_run_id="22222222-2222-2222-2222-222222222222",
|
||||
|
|
@ -20,6 +23,7 @@ async def create_plan(body: PlanCreate):
|
|||
|
||||
@router.get("/{id}", response_model=PlanResponse)
|
||||
async def get_plan(id: str):
|
||||
logger.info("GET /api/plans/%s", id)
|
||||
return PlanResponse(
|
||||
id=id,
|
||||
analysis_run_id="22222222-2222-2222-2222-222222222222",
|
||||
|
|
|
|||
|
|
@ -1,24 +1,24 @@
|
|||
from fastapi import APIRouter, Depends
|
||||
import json
|
||||
import logging
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response
|
||||
from common.db import fetchone
|
||||
from common.deps import verify_api_key
|
||||
from models.report import ReportResponse, ClinicInfo
|
||||
from integrations.llm.schemas.report import ReportOutput
|
||||
|
||||
router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.get("/{run_id}", response_model=ReportResponse)
|
||||
@router.get("/{run_id}", response_model=ReportOutput | None)
|
||||
async def get_report(run_id: str):
|
||||
return ReportResponse(
|
||||
id=run_id,
|
||||
clinic=ClinicInfo(name="바노바기성형외과", url="https://www.banobagi.com"),
|
||||
overall_score=82,
|
||||
youtube={},
|
||||
instagram={},
|
||||
facebook={},
|
||||
naver_place={},
|
||||
naver_blog={},
|
||||
gangnam_unni={},
|
||||
conversion_strategy={},
|
||||
roadmap=[],
|
||||
kpis=[],
|
||||
generated_at="2026-04-20T09:01:30Z",
|
||||
logger.info("GET /api/reports/%s", run_id)
|
||||
row = await fetchone(
|
||||
"SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(run_id,),
|
||||
)
|
||||
if row is None:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
if row["report_data"] is None:
|
||||
return Response(status_code=204)
|
||||
data = json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"]
|
||||
return ReportOutput(**data)
|
||||
|
|
|
|||
|
|
@ -76,6 +76,13 @@ async def insert_analysis_run(
|
|||
|
||||
|
||||
|
||||
async def save_analysis_report(analysis_run_id: str, data: dict) -> None:
|
||||
await execute(
|
||||
"UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s",
|
||||
(json.dumps(data, ensure_ascii=False), analysis_run_id),
|
||||
)
|
||||
|
||||
|
||||
async def is_done(table: str, row_id: int | None) -> bool:
|
||||
if row_id is None:
|
||||
return True
|
||||
|
|
@ -83,6 +90,30 @@ async def is_done(table: str, row_id: int | None) -> bool:
|
|||
return r["status"] == "done"
|
||||
|
||||
|
||||
async def _fetch_raw(table: str, row_id: int | None) -> dict | None:
|
||||
if row_id is None:
|
||||
return None
|
||||
row = await fetchone(f"SELECT raw_data FROM {table} WHERE id = %s", (row_id,))
|
||||
if not row or not row["raw_data"]:
|
||||
return None
|
||||
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
||||
|
||||
|
||||
async def get_analysis_raw_data(analysis_run_id: str) -> dict:
|
||||
run = await fetchone(
|
||||
"SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
|
||||
" FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
return {
|
||||
"instagram": await _fetch_raw("instagram_data", run["instagram_data_id"]),
|
||||
"facebook": await _fetch_raw("facebook_data", run["facebook_data_id"]),
|
||||
"naver_blog": await _fetch_raw("naver_blog_data", run["naver_blog_data_id"]),
|
||||
"youtube": await _fetch_raw("youtube_data", run["youtube_data_id"]),
|
||||
"gangnam_unni": await _fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]),
|
||||
}
|
||||
|
||||
|
||||
async def set_instagram_status(row_id: int, status: str) -> None:
|
||||
await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id))
|
||||
|
||||
|
|
@ -129,14 +160,37 @@ async def insert_hospital(
|
|||
name_en: str | None = None,
|
||||
road_address: str | None = None,
|
||||
site_address: str | None = None,
|
||||
url: str | None = None,
|
||||
raw_data: dict | None = None,
|
||||
owner_user_id: int = 0,
|
||||
brn: str = "",
|
||||
) -> dict:
|
||||
await execute(
|
||||
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, owner_user_id, brn) VALUES (%s, %s, %s, %s, %s, %s, %s)",
|
||||
(hospital_id, name, name_en, road_address, site_address, owner_user_id, brn),
|
||||
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, url, raw_data, status, owner_user_id, brn)"
|
||||
" VALUES (%s, %s, %s, %s, %s, %s, %s, 'done', %s, %s)",
|
||||
(hospital_id, name, name_en, road_address, site_address, url,
|
||||
json.dumps(raw_data, ensure_ascii=False) if raw_data else None,
|
||||
owner_user_id, brn),
|
||||
)
|
||||
return await fetchone(
|
||||
"SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(hospital_id,),
|
||||
)
|
||||
|
||||
|
||||
async def save_hospital_raw_data(hospital_id: str, data: dict) -> None:
|
||||
await execute(
|
||||
"UPDATE hospital_baseinfo"
|
||||
" SET raw_data = %s, status = 'done',"
|
||||
" hospital_name = COALESCE(%s, hospital_name),"
|
||||
" hospital_name_en = COALESCE(%s, hospital_name_en),"
|
||||
" road_address = COALESCE(%s, road_address)"
|
||||
" WHERE hospital_id = %s",
|
||||
(
|
||||
json.dumps(data, ensure_ascii=False),
|
||||
data.get("clinicName"),
|
||||
data.get("clinicNameEn"),
|
||||
data.get("address"),
|
||||
hospital_id,
|
||||
),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,5 @@ import os
|
|||
from fastapi import Header, HTTPException
|
||||
|
||||
async def verify_api_key(x_api_key: str = Header(...)):
|
||||
print(x_api_key)
|
||||
print(os.getenv("API_KEY"))
|
||||
if x_api_key != os.getenv("API_KEY"):
|
||||
raise HTTPException(status_code=401, detail="Invalid API Key")
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from http import HTTPMethod
|
||||
from urllib.parse import urlparse
|
||||
from common.utils import http_request
|
||||
|
||||
APIFY_BASE = "https://api.apify.com/v2"
|
||||
|
|
@ -32,8 +33,9 @@ class ApifyClient:
|
|||
return []
|
||||
return items_resp.json()
|
||||
|
||||
async def fetch_instagram_profile(self, handle: str) -> dict | None:
|
||||
items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [handle], "resultsLimit": 12})
|
||||
async def fetch_instagram_profile(self, url: str) -> dict | None:
|
||||
username = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url.lstrip("@")
|
||||
items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [username], "resultsLimit": 12})
|
||||
return items[0] if items else None
|
||||
|
||||
async def get_instagram_profile(self, handle: str) -> dict | None:
|
||||
|
|
@ -131,7 +133,7 @@ class ApifyClient:
|
|||
if not page:
|
||||
return None
|
||||
return {
|
||||
"pageName": page["title"],
|
||||
"pageName": page.get("title") or page.get("name"),
|
||||
"pageUrl": page.get("pageUrl", page_url),
|
||||
"followers": page.get("followers", 0),
|
||||
"likes": page.get("likes", 0),
|
||||
|
|
|
|||
|
|
@ -67,6 +67,88 @@ class FirecrawlClient:
|
|||
return []
|
||||
return (data.get("json") or {}).get("socialLinks", [])
|
||||
|
||||
async def fetch_clinic_info(self, url: str) -> dict | None:
|
||||
resp = await http_request(
|
||||
HTTPMethod.POST,
|
||||
url=f"{FIRECRAWL_BASE}/scrape",
|
||||
headers=self._headers(),
|
||||
json_body={
|
||||
"url": url,
|
||||
"formats": ["json", "links"],
|
||||
"jsonOptions": {
|
||||
"prompt": "Extract: clinic name (Korean), clinic name (English), address, phone, business hours, slogan, services offered, doctors with name/title/specialty, brand identity (primary/accent/background/text colors in hex, heading/body fonts, logo URL, favicon URL)",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"clinicName": {"type": "string"},
|
||||
"clinicNameEn": {"type": "string"},
|
||||
"address": {"type": "string"},
|
||||
"phone": {"type": "string"},
|
||||
"businessHours": {"type": "string"},
|
||||
"slogan": {"type": "string"},
|
||||
"services": {"type": "array", "items": {"type": "string"}},
|
||||
"doctors": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"title": {"type": "string"},
|
||||
"specialty": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
# "socialMedia": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "instagram": {"type": "string"},
|
||||
# "youtube": {"type": "string"},
|
||||
# "blog": {"type": "string"},
|
||||
# "facebook": {"type": "string"},
|
||||
# "tiktok": {"type": "string"},
|
||||
# "kakao": {"type": "string"},
|
||||
# },
|
||||
# },
|
||||
"branding": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primaryColor": {"type": "string"},
|
||||
"accentColor": {"type": "string"},
|
||||
"backgroundColor": {"type": "string"},
|
||||
"textColor": {"type": "string"},
|
||||
"headingFont": {"type": "string"},
|
||||
"bodyFont": {"type": "string"},
|
||||
"logoUrl": {"type": "string"},
|
||||
"faviconUrl": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"waitFor": 5000,
|
||||
},
|
||||
timeout=60,
|
||||
label="firecrawl-clinic-info",
|
||||
)
|
||||
if not resp or not resp.is_success:
|
||||
return None
|
||||
data = resp.json().get("data") or {}
|
||||
info = data.get("json") or {}
|
||||
return {
|
||||
"clinicName": info.get("clinicName"),
|
||||
"clinicNameEn": info.get("clinicNameEn"),
|
||||
"address": info.get("address"),
|
||||
"phone": info.get("phone"),
|
||||
"businessHours": info.get("businessHours"),
|
||||
"slogan": info.get("slogan"),
|
||||
"services": info.get("services", []),
|
||||
"doctors": info.get("doctors", []),
|
||||
# "socialMedia": info.get("socialMedia", {}),
|
||||
"branding": info.get("branding", {}),
|
||||
"siteLinks": data.get("links", []),
|
||||
"sourceUrl": url,
|
||||
}
|
||||
|
||||
async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None:
|
||||
resp = await http_request(
|
||||
HTTPMethod.POST,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
from .service import LLMService
|
||||
from .llm_service import LLMService
|
||||
from .prompt import Prompt
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
from pydantic import BaseModel
|
||||
from openai import AsyncOpenAI
|
||||
from common.utils import get_env
|
||||
from .prompt import Prompt
|
||||
|
||||
|
||||
class LLMResponseError(Exception):
|
||||
def __init__(self, status: str, code: str = None, message: str = None):
|
||||
self.status = status
|
||||
self.code = code
|
||||
self.message = message
|
||||
super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
|
||||
|
||||
|
||||
class LLMService:
|
||||
def __init__(self, provider: str = "openai", max_retries: int = 2):
|
||||
self.provider = provider
|
||||
self.max_retries = max_retries
|
||||
match provider:
|
||||
case "openai":
|
||||
self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
|
||||
case "perplexity":
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=get_env("PERPLEXITY_API_KEY"),
|
||||
base_url="https://api.perplexity.ai",
|
||||
)
|
||||
case "gemini":
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=get_env("GEMINI_API_KEY"),
|
||||
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||
)
|
||||
case _:
|
||||
raise NotImplementedError(f"Unknown provider: {provider}")
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
prompt: Prompt,
|
||||
input_data: dict,
|
||||
) -> BaseModel:
|
||||
prompt_text = prompt.build(input_data)
|
||||
last_error = None
|
||||
|
||||
for attempt in range(self.max_retries + 1):
|
||||
if self.provider == "perplexity":
|
||||
response = await self.client.chat.completions.create(
|
||||
model=prompt.model,
|
||||
messages=[{"role": "user", "content": prompt_text}],
|
||||
response_format={
|
||||
"type": "json_schema",
|
||||
"json_schema": {"name": prompt.output_class.__name__, "schema": prompt.output_class.model_json_schema()},
|
||||
},
|
||||
)
|
||||
choice = response.choices[0]
|
||||
if choice.finish_reason == "stop":
|
||||
return prompt.output_class.model_validate_json(choice.message.content)
|
||||
last_error = LLMResponseError("failed", choice.finish_reason, f"unexpected finish_reason: {choice.finish_reason}")
|
||||
else:
|
||||
response = await self.client.beta.chat.completions.parse(
|
||||
model=prompt.model,
|
||||
messages=[{"role": "user", "content": prompt_text}],
|
||||
response_format=prompt.output_class,
|
||||
)
|
||||
choice = response.choices[0]
|
||||
finish_reason = choice.finish_reason
|
||||
|
||||
if finish_reason == "stop":
|
||||
return choice.message.parsed
|
||||
|
||||
if finish_reason == "length":
|
||||
last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
|
||||
elif finish_reason == "content_filter":
|
||||
last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
|
||||
else:
|
||||
last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
|
||||
|
||||
raise last_error
|
||||
|
|
@ -1,19 +1,40 @@
|
|||
import os
|
||||
from pydantic import BaseModel
|
||||
from common.utils import get_env
|
||||
from integrations.llm.schemas.report import ReportInput, ReportOutput
|
||||
|
||||
_PROMPT_DIR = os.path.join(os.path.dirname(__file__), "temp-prompt")
|
||||
|
||||
|
||||
class Prompt:
|
||||
def __init__(
|
||||
self,
|
||||
template: str,
|
||||
model: str,
|
||||
input_class: type[BaseModel],
|
||||
output_class: type[BaseModel],
|
||||
):
|
||||
self.template = template
|
||||
self.model = model
|
||||
file_name: str
|
||||
prompt_model: str
|
||||
input_class: type[BaseModel]
|
||||
output_class: type[BaseModel]
|
||||
|
||||
def __init__(self, file_name: str, prompt_model: str, input_class: type[BaseModel], output_class: type[BaseModel]):
|
||||
self.file_name = file_name
|
||||
self.prompt_model = prompt_model
|
||||
self.input_class = input_class
|
||||
self.output_class = output_class
|
||||
self.template, self.model = self._load_prompt()
|
||||
|
||||
def _load_prompt(self) -> tuple[str, str]:
|
||||
with open(os.path.join(_PROMPT_DIR, self.file_name), encoding="utf-8") as f:
|
||||
template = f.read()
|
||||
return template, get_env(self.prompt_model)
|
||||
|
||||
def _reload_prompt(self):
|
||||
self.template, self.model = self._load_prompt()
|
||||
|
||||
def build(self, input_data: dict) -> str:
|
||||
verified = self.input_class(**input_data)
|
||||
return self.template.format(**verified.model_dump())
|
||||
|
||||
|
||||
report_prompt = Prompt(
|
||||
file_name="report_prompt.txt",
|
||||
prompt_model="REPORT_MODEL",
|
||||
input_class=ReportInput,
|
||||
output_class=ReportOutput,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
# template.format(**model_dump()) 에 삽입될 변수들
|
||||
# 각 채널 raw_data를 호출부에서 json.dumps()로 직렬화해서 넘겨야 함
|
||||
class ReportInput(BaseModel):
|
||||
clinic_name: str | None = None
|
||||
clinic_name_en: str | None = None
|
||||
address: str | None = None
|
||||
phone: str | None = None
|
||||
slogan: str | None = None
|
||||
services: str | None = None
|
||||
doctors: str | None = None
|
||||
instagram: str | None = None
|
||||
facebook: str | None = None
|
||||
naver_blog: str | None = None
|
||||
youtube: str | None = None
|
||||
gangnam_unni: str | None = None
|
||||
|
||||
|
||||
class ChannelScore(BaseModel):
|
||||
score: int
|
||||
summary: str
|
||||
strengths: list[str]
|
||||
weaknesses: list[str]
|
||||
|
||||
|
||||
class ConversionStrategy(BaseModel):
|
||||
summary: str
|
||||
actions: list[str]
|
||||
|
||||
|
||||
# response_format으로 OpenAI structured output에 전달 — dict 필드 사용 불가
|
||||
class ReportOutput(BaseModel):
|
||||
overall_score: int
|
||||
instagram: ChannelScore | None = None
|
||||
facebook: ChannelScore | None = None
|
||||
naver_blog: ChannelScore | None = None
|
||||
youtube: ChannelScore | None = None
|
||||
gangnam_unni: ChannelScore | None = None
|
||||
conversion_strategy: ConversionStrategy
|
||||
roadmap: list[str]
|
||||
kpis: list[str]
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
from pydantic import BaseModel
|
||||
from openai import AsyncOpenAI
|
||||
from common.utils import get_env
|
||||
from .prompt import Prompt
|
||||
|
||||
|
||||
class LLMResponseError(Exception):
|
||||
def __init__(self, status: str, code: str = None, message: str = None):
|
||||
self.status = status
|
||||
self.code = code
|
||||
self.message = message
|
||||
super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
|
||||
|
||||
|
||||
class LLMService:
|
||||
def __init__(self, provider: str = "openai", max_retries: int = 2):
|
||||
self.max_retries = max_retries
|
||||
match provider:
|
||||
case "openai":
|
||||
self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
|
||||
case "perplexity":
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=get_env("PERPLEXITY_API_KEY"),
|
||||
base_url="https://api.perplexity.ai",
|
||||
)
|
||||
case "gemini":
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=get_env("GEMINI_API_KEY"),
|
||||
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||
)
|
||||
case _:
|
||||
raise NotImplementedError(f"Unknown provider: {provider}")
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
prompt: Prompt,
|
||||
input_data: dict,
|
||||
) -> BaseModel:
|
||||
prompt_text = prompt.build(input_data)
|
||||
last_error = None
|
||||
|
||||
for attempt in range(self.max_retries + 1):
|
||||
response = await self.client.beta.chat.completions.parse(
|
||||
model=prompt.model,
|
||||
messages=[{"role": "user", "content": prompt_text}],
|
||||
response_format=prompt.output_class,
|
||||
)
|
||||
choice = response.choices[0]
|
||||
finish_reason = choice.finish_reason
|
||||
|
||||
if finish_reason == "stop":
|
||||
return choice.message.parsed
|
||||
|
||||
if finish_reason == "length":
|
||||
last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
|
||||
elif finish_reason == "content_filter":
|
||||
last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
|
||||
else:
|
||||
last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
|
||||
|
||||
raise last_error
|
||||
|
|
@ -0,0 +1 @@
|
|||
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
당신은 프리미엄 의료 마케팅 전문 분석가입니다. 아래 실제 수집된 데이터를 기반으로 종합 마케팅 리포트를 생성해주세요.
|
||||
결과물은 한국어로 생성하세요
|
||||
|
||||
⚠️ 중요: 데이터가 null인 채널은 해당 항목을 null로 설정하세요. 데이터에 없는 수치는 절대 추측하지 마세요.
|
||||
|
||||
## 병원 기본 정보
|
||||
- 병원명: {clinic_name}
|
||||
- 영문명: {clinic_name_en}
|
||||
- 주소: {address}
|
||||
- 전화: {phone}
|
||||
- 슬로건: {slogan}
|
||||
- 시술: {services}
|
||||
- 의료진: {doctors}
|
||||
|
||||
## 채널 데이터
|
||||
|
||||
### 인스타그램
|
||||
{instagram}
|
||||
|
||||
### 페이스북
|
||||
{facebook}
|
||||
|
||||
### 네이버 블로그
|
||||
{naver_blog}
|
||||
|
||||
### 유튜브
|
||||
{youtube}
|
||||
|
||||
### 강남언니
|
||||
{gangnam_unni}
|
||||
|
||||
## 분석 지침
|
||||
|
||||
- 점수는 0~100 기준입니다.
|
||||
- strengths와 weaknesses는 각 3개 이상 작성하세요.
|
||||
- roadmap은 우선순위 순으로 실행 가능한 액션으로 작성하세요.
|
||||
- kpis는 실제 수집된 수치 기반으로 현실적인 측정 가능 지표로 작성하세요.
|
||||
- conversion_strategy의 actions는 구체적인 실행 방안으로 작성하세요.
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import re
|
||||
from http import HTTPMethod
|
||||
from urllib.parse import urlparse
|
||||
from common.utils import http_request
|
||||
|
||||
NAVER_BASE = "https://openapi.naver.com/v1/search"
|
||||
|
|
@ -52,10 +53,10 @@ class NaverClient:
|
|||
return []
|
||||
return resp.json().get("items", [])
|
||||
|
||||
async def fetch_blog_rss(self, blog_handle: str) -> str | None:
|
||||
async def fetch_blog_rss(self, handle: str) -> str | None:
|
||||
resp = await http_request(
|
||||
HTTPMethod.GET,
|
||||
url=f"https://rss.blog.naver.com/{blog_handle}.xml",
|
||||
url=f"https://rss.blog.naver.com/{handle}.xml",
|
||||
timeout=15,
|
||||
label="naver-rss",
|
||||
)
|
||||
|
|
@ -63,7 +64,8 @@ class NaverClient:
|
|||
return None
|
||||
return resp.text
|
||||
|
||||
async def get_blog_rss(self, blog_handle: str) -> dict | None:
|
||||
async def get_blog_rss(self, url: str) -> dict | None:
|
||||
blog_handle = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url
|
||||
xml = await self.fetch_blog_rss(blog_handle)
|
||||
if not xml:
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from http import HTTPMethod
|
||||
from urllib.parse import urlparse
|
||||
from common.utils import http_request
|
||||
|
||||
YT = "https://www.googleapis.com/youtube/v3"
|
||||
|
|
@ -9,7 +10,7 @@ class YouTubeClient:
|
|||
self.api_key = api_key
|
||||
|
||||
async def _resolve_channel_id(self, handle: str) -> str:
|
||||
h = handle.lstrip("@")
|
||||
h = urlparse(handle).path.strip("/").lstrip("@") if "://" in handle else handle.lstrip("@")
|
||||
if h.startswith("UC") and len(h) == 24:
|
||||
return h
|
||||
for param in ("forHandle", "forUsername"):
|
||||
|
|
@ -47,14 +48,7 @@ class YouTubeClient:
|
|||
resp = await http_request(
|
||||
HTTPMethod.GET,
|
||||
url=f"{YT}/search",
|
||||
params={
|
||||
"part": "snippet",
|
||||
"channelId": channel_id,
|
||||
"order": "viewCount",
|
||||
"type": "video",
|
||||
"maxResults": 10,
|
||||
"key": self.api_key,
|
||||
},
|
||||
params={"part": "snippet", "channelId": channel_id, "order": "viewCount", "type": "video", "maxResults": 10, "key": self.api_key},
|
||||
label="yt-search",
|
||||
)
|
||||
if resp and resp.is_success:
|
||||
|
|
@ -65,11 +59,7 @@ class YouTubeClient:
|
|||
resp = await http_request(
|
||||
HTTPMethod.GET,
|
||||
url=f"{YT}/videos",
|
||||
params={
|
||||
"part": "snippet,statistics,contentDetails",
|
||||
"id": ",".join(video_ids),
|
||||
"key": self.api_key,
|
||||
},
|
||||
params={"part": "snippet,statistics,contentDetails", "id": ",".join(video_ids), "key": self.api_key},
|
||||
label="yt-videos",
|
||||
)
|
||||
if resp and resp.is_success:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import logging
|
||||
from fastapi import FastAPI
|
||||
from dotenv import load_dotenv
|
||||
from api import routers
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,7 @@ class AnalysisOptions(BaseModel):
|
|||
|
||||
|
||||
class AnalysisCreate(BaseModel):
|
||||
clinic_id: str | None = None
|
||||
url: str | None = None
|
||||
clinic_id: str
|
||||
channels: Channels
|
||||
options: AnalysisOptions = AnalysisOptions()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +0,0 @@
|
|||
from pydantic import BaseModel
|
||||
from typing import Any
|
||||
|
||||
|
||||
class ChannelVerifyRequest(BaseModel):
|
||||
youtube: str | None = None
|
||||
instagram: list[str] | None = None
|
||||
|
||||
|
||||
class ChannelVerifyResponse(BaseModel):
|
||||
youtube: dict[str, Any] | None = None
|
||||
instagram: list[dict[str, Any]] | None = None
|
||||
|
|
@ -3,9 +3,18 @@ from pydantic import BaseModel
|
|||
|
||||
class ClinicCreate(BaseModel):
|
||||
url: str
|
||||
name: str
|
||||
name_en: str | None = None
|
||||
address: str | None = None
|
||||
|
||||
|
||||
class ClinicResponse(BaseModel):
|
||||
hospital_id: str
|
||||
hospital_name: str
|
||||
hospital_name_en: str | None
|
||||
road_address: str | None
|
||||
url: str | None
|
||||
status: str
|
||||
raw_data: dict | None
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
class ClinicCreateResponse(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,50 @@
|
|||
from common.db import fetchone, execute, is_done
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from common.db import fetchone, execute, is_done, get_analysis_raw_data, save_analysis_report
|
||||
from integrations.llm.llm_service import LLMService
|
||||
from integrations.llm.prompt import report_prompt
|
||||
from integrations.llm.schemas.report import ReportOutput
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def generate_report(analysis_run_id: str) -> ReportOutput:
|
||||
run = await fetchone(
|
||||
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
clinic_row = await fetchone(
|
||||
"SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s",
|
||||
(run["hospital_id"],),
|
||||
)
|
||||
raw_data = clinic_row["raw_data"] if clinic_row else None
|
||||
clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {})
|
||||
raw = await get_analysis_raw_data(analysis_run_id)
|
||||
|
||||
input_data = {
|
||||
"clinic_name": clinic.get("clinicName"),
|
||||
"clinic_name_en": clinic.get("clinicNameEn"),
|
||||
"address": clinic.get("address"),
|
||||
"phone": clinic.get("phone"),
|
||||
"slogan": clinic.get("slogan"),
|
||||
"services": json.dumps(clinic.get("services", []), ensure_ascii=False),
|
||||
"doctors": json.dumps(clinic.get("doctors", []), ensure_ascii=False),
|
||||
**{
|
||||
channel: json.dumps(data, ensure_ascii=False) if data else None
|
||||
for channel, data in raw.items()
|
||||
},
|
||||
}
|
||||
|
||||
return await LLMService(provider="perplexity").generate(report_prompt, input_data)
|
||||
|
||||
|
||||
async def run_report_task(analysis_run_id: str) -> None:
|
||||
logger.info("[report] start run=%s", analysis_run_id)
|
||||
result = await generate_report(analysis_run_id)
|
||||
await save_analysis_report(analysis_run_id, result.model_dump())
|
||||
await execute("UPDATE analysis_runs SET status = 'completed' WHERE analysis_run_id = %s", (analysis_run_id,))
|
||||
logger.info("[report] done run=%s", analysis_run_id)
|
||||
|
||||
|
||||
async def check_and_advance_analysis(analysis_run_id: str) -> None:
|
||||
|
|
@ -16,3 +62,4 @@ async def check_and_advance_analysis(analysis_run_id: str) -> None:
|
|||
]
|
||||
if all(results):
|
||||
await execute("UPDATE analysis_runs SET status = 'analyzing' WHERE analysis_run_id = %s", (analysis_run_id,))
|
||||
asyncio.create_task(run_report_task(analysis_run_id))
|
||||
|
|
|
|||
|
|
@ -1,53 +1,68 @@
|
|||
import logging
|
||||
from common.db import (
|
||||
set_instagram_status, save_instagram_raw_data,
|
||||
set_facebook_status, save_facebook_raw_data,
|
||||
set_naver_blog_status, save_naver_blog_raw_data,
|
||||
set_youtube_status, save_youtube_raw_data,
|
||||
set_gangnam_unni_status, save_gangnam_unni_raw_data,
|
||||
execute, save_hospital_raw_data,
|
||||
)
|
||||
from common.utils import get_env, normalize_handle
|
||||
from common.utils import get_env
|
||||
from services.analysis import check_and_advance_analysis
|
||||
from integrations.apify import ApifyClient
|
||||
from integrations.naver import NaverClient
|
||||
from integrations.youtube import YouTubeClient
|
||||
from integrations.firecrawl import FirecrawlClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
print("start a insta")
|
||||
logger.info("[instagram] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_instagram_status(row_id, "processing")
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_instagram_profile(normalize_handle("instagram", url))
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_instagram_profile(url)
|
||||
await save_instagram_raw_data(row_id, data)
|
||||
logger.info("[instagram] done run=%s", analysis_run_id)
|
||||
await check_and_advance_analysis(analysis_run_id)
|
||||
|
||||
|
||||
async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
print("start a facebook")
|
||||
logger.info("[facebook] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_facebook_status(row_id, "processing")
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_facebook_page(url)
|
||||
data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_facebook_page(url)
|
||||
await save_facebook_raw_data(row_id, data)
|
||||
logger.info("[facebook] done run=%s", analysis_run_id)
|
||||
await check_and_advance_analysis(analysis_run_id)
|
||||
|
||||
|
||||
async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
print("start a blog")
|
||||
logger.info("[naver_blog] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_naver_blog_status(row_id, "processing")
|
||||
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).fetch_blog_rss(normalize_handle("naver_blog", url))
|
||||
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_blog_rss(url)
|
||||
await save_naver_blog_raw_data(row_id, data)
|
||||
logger.info("[naver_blog] done run=%s", analysis_run_id)
|
||||
await check_and_advance_analysis(analysis_run_id)
|
||||
|
||||
|
||||
async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
print("start a youtube")
|
||||
logger.info("[youtube] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_youtube_status(row_id, "processing")
|
||||
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).fetch_channel(normalize_handle("youtube", url))
|
||||
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).get_channel(url)
|
||||
await save_youtube_raw_data(row_id, data)
|
||||
logger.info("[youtube] done run=%s", analysis_run_id)
|
||||
await check_and_advance_analysis(analysis_run_id)
|
||||
|
||||
|
||||
async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None:
|
||||
print("start a gangnam_unni")
|
||||
logger.info("[gangnam_unni] start run=%s url=%s", analysis_run_id, url)
|
||||
await set_gangnam_unni_status(row_id, "processing")
|
||||
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_gangnam_unni(url)
|
||||
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).get_gangnam_unni(url)
|
||||
await save_gangnam_unni_raw_data(row_id, data)
|
||||
logger.info("[gangnam_unni] done run=%s", analysis_run_id)
|
||||
await check_and_advance_analysis(analysis_run_id)
|
||||
|
||||
|
||||
async def collect_clinic_info(hospital_id: str, url: str) -> None:
|
||||
await execute("UPDATE hospital_baseinfo SET status = 'processing' WHERE hospital_id = %s", (hospital_id,))
|
||||
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(url)
|
||||
await save_hospital_raw_data(hospital_id, data)
|
||||
|
|
|
|||
Loading…
Reference in New Issue