diff --git a/SQL/db_create.sql b/SQL/db_create.sql index 863524b..18e81c6 100644 --- a/SQL/db_create.sql +++ b/SQL/db_create.sql @@ -65,6 +65,9 @@ CREATE TABLE hospital_baseinfo `brn` VARCHAR(50) NOT NULL, `road_address` VARCHAR(100) NULL, `site_address` VARCHAR(100) NULL, + `url` VARCHAR(500) NULL, + `status` VARCHAR(20) NOT NULL DEFAULT 'start', + `raw_data` JSON NULL, `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, PRIMARY KEY (hospital_id) @@ -133,6 +136,8 @@ CREATE TABLE analysis_runs `naver_blog_data_id` INT NULL, `youtube_data_id` INT NULL, `gangnam_unni_data_id` INT NULL, + `report_data` JSON NULL, + `plan_data` JSON NULL, `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, PRIMARY KEY (analysis_run_id) diff --git a/app/api/__init__.py b/app/api/__init__.py index d12583e..c57cdaf 100644 --- a/app/api/__init__.py +++ b/app/api/__init__.py @@ -2,6 +2,5 @@ from .clinics import router as clinics_router from .analyses import router as analyses_router from .reports import router as reports_router from .plans import router as plans_router -from .channels import router as channels_router -routers = [clinics_router, analyses_router, reports_router, plans_router, channels_router] +routers = [clinics_router, analyses_router, reports_router, plans_router] diff --git a/app/api/analyses.py b/app/api/analyses.py index a352bd1..fd995b4 100644 --- a/app/api/analyses.py +++ b/app/api/analyses.py @@ -1,3 +1,4 @@ +import logging import uuid6 from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException from common.deps import verify_api_key @@ -7,10 +8,12 @@ from models.status import AnalysisStatus from services.collect import collect_instagram, collect_facebook, collect_naver_blog, collect_youtube, collect_gangnam_unni router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)]) +logger = logging.getLogger(__name__) @router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse) async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks): + logger.info("POST /api/analyses clinic_id=%s", body.clinic_id) analysis_run_id = str(uuid6.uuid7()) hospital_id = body.clinic_id @@ -23,8 +26,7 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks raise HTTPException(status_code=409, detail="Clinic not found") owner_user_id = hospital["owner_user_id"] if hospital else 0 - ig_url = body.channels.instagram[0] if isinstance(body.channels.instagram, list) else body.channels.instagram - ig_id = await insert_instagram_row(hospital_id, ig_url) if ig_url else None + ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None @@ -33,9 +35,9 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks analysis_run_id = await insert_analysis_run(analysis_run_id, hospital_id, owner_user_id, ig_id, fb_id, nb_id, yt_id, gu_id) if ig_id: - background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, ig_url) + background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, body.channels.instagram) if fb_id: - background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, f"https://www.facebook.com/{body.channels.facebook}") + background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, body.channels.facebook) if nb_id: background_tasks.add_task(collect_naver_blog, analysis_run_id, nb_id, body.channels.naver_blog) if yt_id: @@ -54,7 +56,10 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks @router.get("/{run_id}/status", response_model=AnalysisStatusResponse) async def get_analysis_status(run_id: str): + logger.info("GET /api/analyses/%s/status", run_id) row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,)) + if not row: + raise HTTPException(status_code=404, detail="Run not found") return AnalysisStatusResponse( analysis_run_id=run_id, status=AnalysisStatus(row["status"]), diff --git a/app/api/channels.py b/app/api/channels.py deleted file mode 100644 index c11796a..0000000 --- a/app/api/channels.py +++ /dev/null @@ -1,23 +0,0 @@ -from fastapi import APIRouter, Depends -from common.deps import verify_api_key -from models.channel import ChannelVerifyRequest, ChannelVerifyResponse - -router = APIRouter(prefix="/api/channels", tags=["channels"], dependencies=[Depends(verify_api_key)]) - - -# will not use - -@router.post("/verify", response_model=ChannelVerifyResponse) -async def verify_channels(body: ChannelVerifyRequest): - return ChannelVerifyResponse( - youtube={ - "handle": body.youtube, - "verified": True, - "display_name": "바노바기 BANOBAGI", - "followers": 12345, - } if body.youtube else None, - instagram=[ - {"handle": handle, "verified": "unverifiable", "note": "Instagram 로그인 벽"} - for handle in body.instagram - ] if body.instagram else None, - ) diff --git a/app/api/clinics.py b/app/api/clinics.py index ab31c98..917e7eb 100644 --- a/app/api/clinics.py +++ b/app/api/clinics.py @@ -1,28 +1,64 @@ +import logging import uuid6 -from fastapi import APIRouter, Depends, status +from fastapi import APIRouter, Depends, HTTPException, status from common.deps import verify_api_key -from common.db import insert_hospital -from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicHistoryResponse, RunSummary +from common.db import insert_hospital, fetchone +from common.utils import get_env +from integrations.firecrawl import FirecrawlClient +from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicResponse, ClinicHistoryResponse, RunSummary router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)]) +logger = logging.getLogger(__name__) + +_REQUIRED_FIELDS = ["clinicName"] +_COLLECTED_FIELDS = ["clinicName", "clinicNameEn", "address", "phone", "slogan", "services", "doctors"] @router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse) async def create_clinic(body: ClinicCreate): + logger.info("POST /api/clinics url=%s", body.url) + info = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(body.url) + + missing = [f for f in _COLLECTED_FIELDS if not (info or {}).get(f)] + required_missing = [f for f in _REQUIRED_FIELDS if f in missing] + if required_missing: + raise HTTPException(status_code=404, detail={"missing": missing}) + hospital_id = str(uuid6.uuid7()) - row = await insert_hospital(hospital_id, body.name, body.name_en, body.address, body.url) + row = await insert_hospital( + hospital_id, + name=info["clinicName"], + name_en=info.get("clinicNameEn"), + road_address=info.get("address"), + url=body.url, + raw_data=info, + ) return ClinicCreateResponse( id=hospital_id, url=body.url, - name=body.name, + name=info["clinicName"], created_at=str(row["created_at"]), ) +@router.get("/{hospital_id}", response_model=ClinicResponse) +async def get_clinic(hospital_id: str): + logger.info("GET /api/clinics/%s", hospital_id) + row = await fetchone( + "SELECT hospital_id, hospital_name, hospital_name_en, road_address, url, status, raw_data, created_at, updated_at" + " FROM hospital_baseinfo WHERE hospital_id = %s", + (hospital_id,), + ) + if not row: + raise HTTPException(status_code=404, detail="Clinic not found") + return ClinicResponse(**{**row, "created_at": str(row["created_at"]), "updated_at": str(row["updated_at"])}) + + # Not done @router.get("/{id}/history", response_model=ClinicHistoryResponse) async def get_clinic_history(id: str): + logger.info("GET /api/clinics/%s/history", id) return ClinicHistoryResponse( clinic_id=id, runs=[ diff --git a/app/api/plans.py b/app/api/plans.py index d6e36fd..aec19b8 100644 --- a/app/api/plans.py +++ b/app/api/plans.py @@ -1,12 +1,15 @@ +import logging from fastapi import APIRouter, Depends, status from common.deps import verify_api_key from models.plan import PlanCreate, PlanResponse router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)]) +logger = logging.getLogger(__name__) @router.post("", status_code=status.HTTP_201_CREATED, response_model=PlanResponse) async def create_plan(body: PlanCreate): + logger.info("POST /api/plans run_id=%s", body.analysis_run_id) return PlanResponse( id="33333333-3333-3333-3333-333333333333", analysis_run_id="22222222-2222-2222-2222-222222222222", @@ -20,6 +23,7 @@ async def create_plan(body: PlanCreate): @router.get("/{id}", response_model=PlanResponse) async def get_plan(id: str): + logger.info("GET /api/plans/%s", id) return PlanResponse( id=id, analysis_run_id="22222222-2222-2222-2222-222222222222", diff --git a/app/api/reports.py b/app/api/reports.py index 8ca82bf..1c1ff96 100644 --- a/app/api/reports.py +++ b/app/api/reports.py @@ -1,24 +1,24 @@ -from fastapi import APIRouter, Depends +import json +import logging +from fastapi import APIRouter, Depends, HTTPException, Response +from common.db import fetchone from common.deps import verify_api_key -from models.report import ReportResponse, ClinicInfo +from integrations.llm.schemas.report import ReportOutput router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)]) +logger = logging.getLogger(__name__) -@router.get("/{run_id}", response_model=ReportResponse) +@router.get("/{run_id}", response_model=ReportOutput | None) async def get_report(run_id: str): - return ReportResponse( - id=run_id, - clinic=ClinicInfo(name="바노바기성형외과", url="https://www.banobagi.com"), - overall_score=82, - youtube={}, - instagram={}, - facebook={}, - naver_place={}, - naver_blog={}, - gangnam_unni={}, - conversion_strategy={}, - roadmap=[], - kpis=[], - generated_at="2026-04-20T09:01:30Z", + logger.info("GET /api/reports/%s", run_id) + row = await fetchone( + "SELECT report_data FROM analysis_runs WHERE analysis_run_id = %s", + (run_id,), ) + if row is None: + raise HTTPException(status_code=404, detail="Run not found") + if row["report_data"] is None: + return Response(status_code=204) + data = json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"] + return ReportOutput(**data) diff --git a/app/common/db.py b/app/common/db.py index d13b781..36af89d 100644 --- a/app/common/db.py +++ b/app/common/db.py @@ -76,6 +76,13 @@ async def insert_analysis_run( +async def save_analysis_report(analysis_run_id: str, data: dict) -> None: + await execute( + "UPDATE analysis_runs SET report_data = %s WHERE analysis_run_id = %s", + (json.dumps(data, ensure_ascii=False), analysis_run_id), + ) + + async def is_done(table: str, row_id: int | None) -> bool: if row_id is None: return True @@ -83,6 +90,30 @@ async def is_done(table: str, row_id: int | None) -> bool: return r["status"] == "done" +async def _fetch_raw(table: str, row_id: int | None) -> dict | None: + if row_id is None: + return None + row = await fetchone(f"SELECT raw_data FROM {table} WHERE id = %s", (row_id,)) + if not row or not row["raw_data"]: + return None + return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"] + + +async def get_analysis_raw_data(analysis_run_id: str) -> dict: + run = await fetchone( + "SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id" + " FROM analysis_runs WHERE analysis_run_id = %s", + (analysis_run_id,), + ) + return { + "instagram": await _fetch_raw("instagram_data", run["instagram_data_id"]), + "facebook": await _fetch_raw("facebook_data", run["facebook_data_id"]), + "naver_blog": await _fetch_raw("naver_blog_data", run["naver_blog_data_id"]), + "youtube": await _fetch_raw("youtube_data", run["youtube_data_id"]), + "gangnam_unni": await _fetch_raw("gangnam_unni_data", run["gangnam_unni_data_id"]), + } + + async def set_instagram_status(row_id: int, status: str) -> None: await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id)) @@ -129,14 +160,37 @@ async def insert_hospital( name_en: str | None = None, road_address: str | None = None, site_address: str | None = None, + url: str | None = None, + raw_data: dict | None = None, owner_user_id: int = 0, brn: str = "", ) -> dict: await execute( - "INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, owner_user_id, brn) VALUES (%s, %s, %s, %s, %s, %s, %s)", - (hospital_id, name, name_en, road_address, site_address, owner_user_id, brn), + "INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, url, raw_data, status, owner_user_id, brn)" + " VALUES (%s, %s, %s, %s, %s, %s, %s, 'done', %s, %s)", + (hospital_id, name, name_en, road_address, site_address, url, + json.dumps(raw_data, ensure_ascii=False) if raw_data else None, + owner_user_id, brn), ) return await fetchone( "SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s", (hospital_id,), ) + + +async def save_hospital_raw_data(hospital_id: str, data: dict) -> None: + await execute( + "UPDATE hospital_baseinfo" + " SET raw_data = %s, status = 'done'," + " hospital_name = COALESCE(%s, hospital_name)," + " hospital_name_en = COALESCE(%s, hospital_name_en)," + " road_address = COALESCE(%s, road_address)" + " WHERE hospital_id = %s", + ( + json.dumps(data, ensure_ascii=False), + data.get("clinicName"), + data.get("clinicNameEn"), + data.get("address"), + hospital_id, + ), + ) diff --git a/app/common/deps.py b/app/common/deps.py index 2622c05..1dfe35b 100644 --- a/app/common/deps.py +++ b/app/common/deps.py @@ -2,7 +2,5 @@ import os from fastapi import Header, HTTPException async def verify_api_key(x_api_key: str = Header(...)): - print(x_api_key) - print(os.getenv("API_KEY")) if x_api_key != os.getenv("API_KEY"): raise HTTPException(status_code=401, detail="Invalid API Key") diff --git a/app/integrations/apify.py b/app/integrations/apify.py index f2bf76a..4eea74e 100644 --- a/app/integrations/apify.py +++ b/app/integrations/apify.py @@ -1,4 +1,5 @@ from http import HTTPMethod +from urllib.parse import urlparse from common.utils import http_request APIFY_BASE = "https://api.apify.com/v2" @@ -32,8 +33,9 @@ class ApifyClient: return [] return items_resp.json() - async def fetch_instagram_profile(self, handle: str) -> dict | None: - items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [handle], "resultsLimit": 12}) + async def fetch_instagram_profile(self, url: str) -> dict | None: + username = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url.lstrip("@") + items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [username], "resultsLimit": 12}) return items[0] if items else None async def get_instagram_profile(self, handle: str) -> dict | None: @@ -131,7 +133,7 @@ class ApifyClient: if not page: return None return { - "pageName": page["title"], + "pageName": page.get("title") or page.get("name"), "pageUrl": page.get("pageUrl", page_url), "followers": page.get("followers", 0), "likes": page.get("likes", 0), diff --git a/app/integrations/firecrawl.py b/app/integrations/firecrawl.py index f50503d..f83db21 100644 --- a/app/integrations/firecrawl.py +++ b/app/integrations/firecrawl.py @@ -67,6 +67,88 @@ class FirecrawlClient: return [] return (data.get("json") or {}).get("socialLinks", []) + async def fetch_clinic_info(self, url: str) -> dict | None: + resp = await http_request( + HTTPMethod.POST, + url=f"{FIRECRAWL_BASE}/scrape", + headers=self._headers(), + json_body={ + "url": url, + "formats": ["json", "links"], + "jsonOptions": { + "prompt": "Extract: clinic name (Korean), clinic name (English), address, phone, business hours, slogan, services offered, doctors with name/title/specialty, brand identity (primary/accent/background/text colors in hex, heading/body fonts, logo URL, favicon URL)", + "schema": { + "type": "object", + "properties": { + "clinicName": {"type": "string"}, + "clinicNameEn": {"type": "string"}, + "address": {"type": "string"}, + "phone": {"type": "string"}, + "businessHours": {"type": "string"}, + "slogan": {"type": "string"}, + "services": {"type": "array", "items": {"type": "string"}}, + "doctors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "title": {"type": "string"}, + "specialty": {"type": "string"}, + }, + }, + }, + # "socialMedia": { + # "type": "object", + # "properties": { + # "instagram": {"type": "string"}, + # "youtube": {"type": "string"}, + # "blog": {"type": "string"}, + # "facebook": {"type": "string"}, + # "tiktok": {"type": "string"}, + # "kakao": {"type": "string"}, + # }, + # }, + "branding": { + "type": "object", + "properties": { + "primaryColor": {"type": "string"}, + "accentColor": {"type": "string"}, + "backgroundColor": {"type": "string"}, + "textColor": {"type": "string"}, + "headingFont": {"type": "string"}, + "bodyFont": {"type": "string"}, + "logoUrl": {"type": "string"}, + "faviconUrl": {"type": "string"}, + }, + }, + }, + }, + }, + "waitFor": 5000, + }, + timeout=60, + label="firecrawl-clinic-info", + ) + if not resp or not resp.is_success: + return None + data = resp.json().get("data") or {} + info = data.get("json") or {} + return { + "clinicName": info.get("clinicName"), + "clinicNameEn": info.get("clinicNameEn"), + "address": info.get("address"), + "phone": info.get("phone"), + "businessHours": info.get("businessHours"), + "slogan": info.get("slogan"), + "services": info.get("services", []), + "doctors": info.get("doctors", []), + # "socialMedia": info.get("socialMedia", {}), + "branding": info.get("branding", {}), + "siteLinks": data.get("links", []), + "sourceUrl": url, + } + async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None: resp = await http_request( HTTPMethod.POST, diff --git a/app/integrations/llm/__init__.py b/app/integrations/llm/__init__.py index 4a857dd..915cce6 100644 --- a/app/integrations/llm/__init__.py +++ b/app/integrations/llm/__init__.py @@ -1,3 +1,3 @@ -from .service import LLMService +from .llm_service import LLMService from .prompt import Prompt diff --git a/app/integrations/llm/llm_service.py b/app/integrations/llm/llm_service.py new file mode 100644 index 0000000..8272d54 --- /dev/null +++ b/app/integrations/llm/llm_service.py @@ -0,0 +1,76 @@ +from pydantic import BaseModel +from openai import AsyncOpenAI +from common.utils import get_env +from .prompt import Prompt + + +class LLMResponseError(Exception): + def __init__(self, status: str, code: str = None, message: str = None): + self.status = status + self.code = code + self.message = message + super().__init__(f"LLM response failed: status={status}, code={code}, message={message}") + + +class LLMService: + def __init__(self, provider: str = "openai", max_retries: int = 2): + self.provider = provider + self.max_retries = max_retries + match provider: + case "openai": + self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY")) + case "perplexity": + self.client = AsyncOpenAI( + api_key=get_env("PERPLEXITY_API_KEY"), + base_url="https://api.perplexity.ai", + ) + case "gemini": + self.client = AsyncOpenAI( + api_key=get_env("GEMINI_API_KEY"), + base_url="https://generativelanguage.googleapis.com/v1beta/openai/", + ) + case _: + raise NotImplementedError(f"Unknown provider: {provider}") + + async def generate( + self, + prompt: Prompt, + input_data: dict, + ) -> BaseModel: + prompt_text = prompt.build(input_data) + last_error = None + + for attempt in range(self.max_retries + 1): + if self.provider == "perplexity": + response = await self.client.chat.completions.create( + model=prompt.model, + messages=[{"role": "user", "content": prompt_text}], + response_format={ + "type": "json_schema", + "json_schema": {"name": prompt.output_class.__name__, "schema": prompt.output_class.model_json_schema()}, + }, + ) + choice = response.choices[0] + if choice.finish_reason == "stop": + return prompt.output_class.model_validate_json(choice.message.content) + last_error = LLMResponseError("failed", choice.finish_reason, f"unexpected finish_reason: {choice.finish_reason}") + else: + response = await self.client.beta.chat.completions.parse( + model=prompt.model, + messages=[{"role": "user", "content": prompt_text}], + response_format=prompt.output_class, + ) + choice = response.choices[0] + finish_reason = choice.finish_reason + + if finish_reason == "stop": + return choice.message.parsed + + if finish_reason == "length": + last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached") + elif finish_reason == "content_filter": + last_error = LLMResponseError("failed", finish_reason, "blocked by content filter") + else: + last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}") + + raise last_error diff --git a/app/integrations/llm/prompt.py b/app/integrations/llm/prompt.py index 1e3f04a..ae91dd9 100644 --- a/app/integrations/llm/prompt.py +++ b/app/integrations/llm/prompt.py @@ -1,19 +1,40 @@ +import os from pydantic import BaseModel +from common.utils import get_env +from integrations.llm.schemas.report import ReportInput, ReportOutput + +_PROMPT_DIR = os.path.join(os.path.dirname(__file__), "temp-prompt") class Prompt: - def __init__( - self, - template: str, - model: str, - input_class: type[BaseModel], - output_class: type[BaseModel], - ): - self.template = template - self.model = model + file_name: str + prompt_model: str + input_class: type[BaseModel] + output_class: type[BaseModel] + + def __init__(self, file_name: str, prompt_model: str, input_class: type[BaseModel], output_class: type[BaseModel]): + self.file_name = file_name + self.prompt_model = prompt_model self.input_class = input_class self.output_class = output_class + self.template, self.model = self._load_prompt() + + def _load_prompt(self) -> tuple[str, str]: + with open(os.path.join(_PROMPT_DIR, self.file_name), encoding="utf-8") as f: + template = f.read() + return template, get_env(self.prompt_model) + + def _reload_prompt(self): + self.template, self.model = self._load_prompt() def build(self, input_data: dict) -> str: verified = self.input_class(**input_data) return self.template.format(**verified.model_dump()) + + +report_prompt = Prompt( + file_name="report_prompt.txt", + prompt_model="REPORT_MODEL", + input_class=ReportInput, + output_class=ReportOutput, +) diff --git a/app/integrations/llm/schemas/report.py b/app/integrations/llm/schemas/report.py new file mode 100644 index 0000000..0757e14 --- /dev/null +++ b/app/integrations/llm/schemas/report.py @@ -0,0 +1,43 @@ +from pydantic import BaseModel + + +# template.format(**model_dump()) 에 삽입될 변수들 +# 각 채널 raw_data를 호출부에서 json.dumps()로 직렬화해서 넘겨야 함 +class ReportInput(BaseModel): + clinic_name: str | None = None + clinic_name_en: str | None = None + address: str | None = None + phone: str | None = None + slogan: str | None = None + services: str | None = None + doctors: str | None = None + instagram: str | None = None + facebook: str | None = None + naver_blog: str | None = None + youtube: str | None = None + gangnam_unni: str | None = None + + +class ChannelScore(BaseModel): + score: int + summary: str + strengths: list[str] + weaknesses: list[str] + + +class ConversionStrategy(BaseModel): + summary: str + actions: list[str] + + +# response_format으로 OpenAI structured output에 전달 — dict 필드 사용 불가 +class ReportOutput(BaseModel): + overall_score: int + instagram: ChannelScore | None = None + facebook: ChannelScore | None = None + naver_blog: ChannelScore | None = None + youtube: ChannelScore | None = None + gangnam_unni: ChannelScore | None = None + conversion_strategy: ConversionStrategy + roadmap: list[str] + kpis: list[str] diff --git a/app/integrations/llm/service.py b/app/integrations/llm/service.py deleted file mode 100644 index f554470..0000000 --- a/app/integrations/llm/service.py +++ /dev/null @@ -1,61 +0,0 @@ -from pydantic import BaseModel -from openai import AsyncOpenAI -from common.utils import get_env -from .prompt import Prompt - - -class LLMResponseError(Exception): - def __init__(self, status: str, code: str = None, message: str = None): - self.status = status - self.code = code - self.message = message - super().__init__(f"LLM response failed: status={status}, code={code}, message={message}") - - -class LLMService: - def __init__(self, provider: str = "openai", max_retries: int = 2): - self.max_retries = max_retries - match provider: - case "openai": - self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY")) - case "perplexity": - self.client = AsyncOpenAI( - api_key=get_env("PERPLEXITY_API_KEY"), - base_url="https://api.perplexity.ai", - ) - case "gemini": - self.client = AsyncOpenAI( - api_key=get_env("GEMINI_API_KEY"), - base_url="https://generativelanguage.googleapis.com/v1beta/openai/", - ) - case _: - raise NotImplementedError(f"Unknown provider: {provider}") - - async def generate( - self, - prompt: Prompt, - input_data: dict, - ) -> BaseModel: - prompt_text = prompt.build(input_data) - last_error = None - - for attempt in range(self.max_retries + 1): - response = await self.client.beta.chat.completions.parse( - model=prompt.model, - messages=[{"role": "user", "content": prompt_text}], - response_format=prompt.output_class, - ) - choice = response.choices[0] - finish_reason = choice.finish_reason - - if finish_reason == "stop": - return choice.message.parsed - - if finish_reason == "length": - last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached") - elif finish_reason == "content_filter": - last_error = LLMResponseError("failed", finish_reason, "blocked by content filter") - else: - last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}") - - raise last_error diff --git a/app/integrations/llm/temp-prompt/plan_prompt.txt b/app/integrations/llm/temp-prompt/plan_prompt.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/app/integrations/llm/temp-prompt/plan_prompt.txt @@ -0,0 +1 @@ + diff --git a/app/integrations/llm/temp-prompt/report_prompt.txt b/app/integrations/llm/temp-prompt/report_prompt.txt new file mode 100644 index 0000000..f87383a --- /dev/null +++ b/app/integrations/llm/temp-prompt/report_prompt.txt @@ -0,0 +1,38 @@ +당신은 프리미엄 의료 마케팅 전문 분석가입니다. 아래 실제 수집된 데이터를 기반으로 종합 마케팅 리포트를 생성해주세요. +결과물은 한국어로 생성하세요 + +⚠️ 중요: 데이터가 null인 채널은 해당 항목을 null로 설정하세요. 데이터에 없는 수치는 절대 추측하지 마세요. + +## 병원 기본 정보 +- 병원명: {clinic_name} +- 영문명: {clinic_name_en} +- 주소: {address} +- 전화: {phone} +- 슬로건: {slogan} +- 시술: {services} +- 의료진: {doctors} + +## 채널 데이터 + +### 인스타그램 +{instagram} + +### 페이스북 +{facebook} + +### 네이버 블로그 +{naver_blog} + +### 유튜브 +{youtube} + +### 강남언니 +{gangnam_unni} + +## 분석 지침 + +- 점수는 0~100 기준입니다. +- strengths와 weaknesses는 각 3개 이상 작성하세요. +- roadmap은 우선순위 순으로 실행 가능한 액션으로 작성하세요. +- kpis는 실제 수집된 수치 기반으로 현실적인 측정 가능 지표로 작성하세요. +- conversion_strategy의 actions는 구체적인 실행 방안으로 작성하세요. diff --git a/app/integrations/naver.py b/app/integrations/naver.py index e28371f..168ba47 100644 --- a/app/integrations/naver.py +++ b/app/integrations/naver.py @@ -1,5 +1,6 @@ import re from http import HTTPMethod +from urllib.parse import urlparse from common.utils import http_request NAVER_BASE = "https://openapi.naver.com/v1/search" @@ -52,10 +53,10 @@ class NaverClient: return [] return resp.json().get("items", []) - async def fetch_blog_rss(self, blog_handle: str) -> str | None: + async def fetch_blog_rss(self, handle: str) -> str | None: resp = await http_request( HTTPMethod.GET, - url=f"https://rss.blog.naver.com/{blog_handle}.xml", + url=f"https://rss.blog.naver.com/{handle}.xml", timeout=15, label="naver-rss", ) @@ -63,7 +64,8 @@ class NaverClient: return None return resp.text - async def get_blog_rss(self, blog_handle: str) -> dict | None: + async def get_blog_rss(self, url: str) -> dict | None: + blog_handle = urlparse(url).path.strip("/").split("/")[0] if "://" in url else url xml = await self.fetch_blog_rss(blog_handle) if not xml: return None diff --git a/app/integrations/youtube.py b/app/integrations/youtube.py index 51793d3..edc1100 100644 --- a/app/integrations/youtube.py +++ b/app/integrations/youtube.py @@ -1,4 +1,5 @@ from http import HTTPMethod +from urllib.parse import urlparse from common.utils import http_request YT = "https://www.googleapis.com/youtube/v3" @@ -9,7 +10,7 @@ class YouTubeClient: self.api_key = api_key async def _resolve_channel_id(self, handle: str) -> str: - h = handle.lstrip("@") + h = urlparse(handle).path.strip("/").lstrip("@") if "://" in handle else handle.lstrip("@") if h.startswith("UC") and len(h) == 24: return h for param in ("forHandle", "forUsername"): @@ -47,14 +48,7 @@ class YouTubeClient: resp = await http_request( HTTPMethod.GET, url=f"{YT}/search", - params={ - "part": "snippet", - "channelId": channel_id, - "order": "viewCount", - "type": "video", - "maxResults": 10, - "key": self.api_key, - }, + params={"part": "snippet", "channelId": channel_id, "order": "viewCount", "type": "video", "maxResults": 10, "key": self.api_key}, label="yt-search", ) if resp and resp.is_success: @@ -65,11 +59,7 @@ class YouTubeClient: resp = await http_request( HTTPMethod.GET, url=f"{YT}/videos", - params={ - "part": "snippet,statistics,contentDetails", - "id": ",".join(video_ids), - "key": self.api_key, - }, + params={"part": "snippet,statistics,contentDetails", "id": ",".join(video_ids), "key": self.api_key}, label="yt-videos", ) if resp and resp.is_success: diff --git a/app/main.py b/app/main.py index 1a85be3..4a01db6 100644 --- a/app/main.py +++ b/app/main.py @@ -1,9 +1,11 @@ +import logging from fastapi import FastAPI from dotenv import load_dotenv from api import routers import os load_dotenv() +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s") app = FastAPI() diff --git a/app/models/analysis.py b/app/models/analysis.py index 8a9c220..2b4e7ec 100644 --- a/app/models/analysis.py +++ b/app/models/analysis.py @@ -16,8 +16,7 @@ class AnalysisOptions(BaseModel): class AnalysisCreate(BaseModel): - clinic_id: str | None = None - url: str | None = None + clinic_id: str channels: Channels options: AnalysisOptions = AnalysisOptions() diff --git a/app/models/channel.py b/app/models/channel.py deleted file mode 100644 index 093f422..0000000 --- a/app/models/channel.py +++ /dev/null @@ -1,12 +0,0 @@ -from pydantic import BaseModel -from typing import Any - - -class ChannelVerifyRequest(BaseModel): - youtube: str | None = None - instagram: list[str] | None = None - - -class ChannelVerifyResponse(BaseModel): - youtube: dict[str, Any] | None = None - instagram: list[dict[str, Any]] | None = None diff --git a/app/models/clinic.py b/app/models/clinic.py index 3c06f14..473b4a2 100644 --- a/app/models/clinic.py +++ b/app/models/clinic.py @@ -3,9 +3,18 @@ from pydantic import BaseModel class ClinicCreate(BaseModel): url: str - name: str - name_en: str | None = None - address: str | None = None + + +class ClinicResponse(BaseModel): + hospital_id: str + hospital_name: str + hospital_name_en: str | None + road_address: str | None + url: str | None + status: str + raw_data: dict | None + created_at: str + updated_at: str class ClinicCreateResponse(BaseModel): diff --git a/app/services/analysis.py b/app/services/analysis.py index e2da215..e266ece 100644 --- a/app/services/analysis.py +++ b/app/services/analysis.py @@ -1,4 +1,50 @@ -from common.db import fetchone, execute, is_done +import asyncio +import json +import logging +from common.db import fetchone, execute, is_done, get_analysis_raw_data, save_analysis_report +from integrations.llm.llm_service import LLMService +from integrations.llm.prompt import report_prompt +from integrations.llm.schemas.report import ReportOutput + +logger = logging.getLogger(__name__) + + +async def generate_report(analysis_run_id: str) -> ReportOutput: + run = await fetchone( + "SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", + (analysis_run_id,), + ) + clinic_row = await fetchone( + "SELECT raw_data FROM hospital_baseinfo WHERE hospital_id = %s", + (run["hospital_id"],), + ) + raw_data = clinic_row["raw_data"] if clinic_row else None + clinic = json.loads(raw_data) if isinstance(raw_data, str) else (raw_data or {}) + raw = await get_analysis_raw_data(analysis_run_id) + + input_data = { + "clinic_name": clinic.get("clinicName"), + "clinic_name_en": clinic.get("clinicNameEn"), + "address": clinic.get("address"), + "phone": clinic.get("phone"), + "slogan": clinic.get("slogan"), + "services": json.dumps(clinic.get("services", []), ensure_ascii=False), + "doctors": json.dumps(clinic.get("doctors", []), ensure_ascii=False), + **{ + channel: json.dumps(data, ensure_ascii=False) if data else None + for channel, data in raw.items() + }, + } + + return await LLMService(provider="perplexity").generate(report_prompt, input_data) + + +async def run_report_task(analysis_run_id: str) -> None: + logger.info("[report] start run=%s", analysis_run_id) + result = await generate_report(analysis_run_id) + await save_analysis_report(analysis_run_id, result.model_dump()) + await execute("UPDATE analysis_runs SET status = 'completed' WHERE analysis_run_id = %s", (analysis_run_id,)) + logger.info("[report] done run=%s", analysis_run_id) async def check_and_advance_analysis(analysis_run_id: str) -> None: @@ -16,3 +62,4 @@ async def check_and_advance_analysis(analysis_run_id: str) -> None: ] if all(results): await execute("UPDATE analysis_runs SET status = 'analyzing' WHERE analysis_run_id = %s", (analysis_run_id,)) + asyncio.create_task(run_report_task(analysis_run_id)) diff --git a/app/services/collect.py b/app/services/collect.py index 16b7457..24f7f26 100644 --- a/app/services/collect.py +++ b/app/services/collect.py @@ -1,53 +1,68 @@ +import logging from common.db import ( set_instagram_status, save_instagram_raw_data, set_facebook_status, save_facebook_raw_data, set_naver_blog_status, save_naver_blog_raw_data, set_youtube_status, save_youtube_raw_data, set_gangnam_unni_status, save_gangnam_unni_raw_data, + execute, save_hospital_raw_data, ) -from common.utils import get_env, normalize_handle +from common.utils import get_env from services.analysis import check_and_advance_analysis from integrations.apify import ApifyClient from integrations.naver import NaverClient from integrations.youtube import YouTubeClient from integrations.firecrawl import FirecrawlClient +logger = logging.getLogger(__name__) + async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None: - print("start a insta") + logger.info("[instagram] start run=%s url=%s", analysis_run_id, url) await set_instagram_status(row_id, "processing") - data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_instagram_profile(normalize_handle("instagram", url)) + data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_instagram_profile(url) await save_instagram_raw_data(row_id, data) + logger.info("[instagram] done run=%s", analysis_run_id) await check_and_advance_analysis(analysis_run_id) async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None: - print("start a facebook") + logger.info("[facebook] start run=%s url=%s", analysis_run_id, url) await set_facebook_status(row_id, "processing") - data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_facebook_page(url) + data = await ApifyClient(get_env("APIFY_API_TOKEN")).get_facebook_page(url) await save_facebook_raw_data(row_id, data) + logger.info("[facebook] done run=%s", analysis_run_id) await check_and_advance_analysis(analysis_run_id) async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None: - print("start a blog") + logger.info("[naver_blog] start run=%s url=%s", analysis_run_id, url) await set_naver_blog_status(row_id, "processing") - data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).fetch_blog_rss(normalize_handle("naver_blog", url)) + data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).get_blog_rss(url) await save_naver_blog_raw_data(row_id, data) + logger.info("[naver_blog] done run=%s", analysis_run_id) await check_and_advance_analysis(analysis_run_id) async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None: - print("start a youtube") + logger.info("[youtube] start run=%s url=%s", analysis_run_id, url) await set_youtube_status(row_id, "processing") - data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).fetch_channel(normalize_handle("youtube", url)) + data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).get_channel(url) await save_youtube_raw_data(row_id, data) + logger.info("[youtube] done run=%s", analysis_run_id) await check_and_advance_analysis(analysis_run_id) async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None: - print("start a gangnam_unni") + logger.info("[gangnam_unni] start run=%s url=%s", analysis_run_id, url) await set_gangnam_unni_status(row_id, "processing") - data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_gangnam_unni(url) + data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).get_gangnam_unni(url) await save_gangnam_unni_raw_data(row_id, data) + logger.info("[gangnam_unni] done run=%s", analysis_run_id) await check_and_advance_analysis(analysis_run_id) + + +async def collect_clinic_info(hospital_id: str, url: str) -> None: + await execute("UPDATE hospital_baseinfo SET status = 'processing' WHERE hospital_id = %s", (hospital_id,)) + data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_clinic_info(url) + await save_hospital_raw_data(hospital_id, data)