o2o-infinith-backend/app/api/analysis.py

143 lines
7.2 KiB
Python

import logging
import uuid6
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile, status
from common.deps import verify_api_key
from common.db.hospital import select_hospital
from common.db.source import select_source_mainpage, insert_source, insert_raw_info
from common.db.run import insert_run, select_run_status
from common.utils import _normalize_homepage, _with_scheme
from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse
from models.file import FileListItem, FileType, FileUploadResponse
from models.status import AnalysisStatus, SourceType
from services.pipeline import run_pipeline
from services.file_data import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
from mock_urls import MOCK_CLINICS
router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
# 클라가 일부만 보내거나 빈 값이면 mock_urls 의 동일 homepage 매칭으로 채워줌 (메인 + 부가 채널 동일 규칙).
def _channels_from_mockurls(homepage_url: str) -> dict:
target = _normalize_homepage(homepage_url)
if not target:
return {}
for c in MOCK_CLINICS:
urls = c["urls"]
if _normalize_homepage(urls.get("homepage", "")) == target:
return {
# main
"instagram": _with_scheme(urls.get("instagram")),
"facebook": _with_scheme(urls.get("facebook")),
"naver_blog": _with_scheme(urls.get("naverBlog")),
"youtube": _with_scheme(urls.get("youtube")),
"gangnam_unni": _with_scheme(urls.get("gangnamUnni")),
# extra
"tiktok": _with_scheme(urls.get("tiktok")),
"instagram_en": _with_scheme(urls.get("instagramEn")),
"facebook_en": _with_scheme(urls.get("facebookEn")),
"kakao_talk": _with_scheme(urls.get("kakaoTalk")),
"naver_cafe": _with_scheme(urls.get("naverCafe")),
}
return {}
@router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse)
async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks):
logger.info("POST /api/analysis clinic_id=%s", body.clinic_id)
analysis_run_id = str(uuid6.uuid7())
hospital_id = body.clinic_id
# 사실 hospital 과 owner_user_id 비교 후 검증이 필요한 거지만 일단 PoC 니까. 나중에 바꿉니다.
hospital = await select_hospital(hospital_id)
if not hospital:
raise HTTPException(status_code=409, detail="Clinic not found")
analysis_run_id = await insert_run(analysis_run_id, hospital_id, hospital["owner_user_id"])
mainpage = await select_source_mainpage(hospital_id)
if mainpage:
await insert_raw_info(mainpage["source_id"], analysis_run_id, data_tag=SourceType.MAINPAGE)
# branding (HTML/CSS + Vision 로고 매칭) — mainpage 와 같은 homepage URL 을 source 로 사용.
branding_id = await insert_source(hospital_id, SourceType.BRANDING, mainpage["url"])
await insert_raw_info(branding_id, analysis_run_id, data_tag=SourceType.BRANDING)
# 클라가 안 보낸 채널은 mock_urls 에서 homepage 매칭으로 보충 (main + extra 동일 규칙).
mock = _channels_from_mockurls((mainpage or {}).get("url") or "")
# 메인 5채널 (KR). _with_scheme 으로 'gangnamunni.com/...' 같이 scheme/www 없이 와도 보강.
main_channels = [
(SourceType.INSTAGRAM, _with_scheme(body.channels.instagram) or mock.get("instagram")),
(SourceType.FACEBOOK, _with_scheme(body.channels.facebook) or mock.get("facebook")),
(SourceType.NAVER_BLOG, _with_scheme(body.channels.naver_blog) or mock.get("naver_blog")),
(SourceType.YOUTUBE, _with_scheme(body.channels.youtube) or mock.get("youtube")),
(SourceType.GANGNAM_UNNI, _with_scheme(body.channels.gangnam_unni) or mock.get("gangnam_unni")),
]
for source_type, url in main_channels:
if url:
source_id = await insert_source(hospital_id, source_type, url)
await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
# 부가 채널 — instagram_en/facebook_en 은 동일 source_type 에 language='EN' 으로 구분, 나머지는 자체 source_type.
extra_channels = [
(SourceType.INSTAGRAM, "EN", _with_scheme(body.channels.instagram_en) or mock.get("instagram_en")),
(SourceType.FACEBOOK, "EN", _with_scheme(body.channels.facebook_en) or mock.get("facebook_en")),
(SourceType.TIKTOK, "KR", _with_scheme(body.channels.tiktok) or mock.get("tiktok")),
(SourceType.KAKAOTALK, "KR", _with_scheme(body.channels.kakao_talk) or mock.get("kakao_talk")),
(SourceType.NAVER_CAFE, "KR", _with_scheme(body.channels.naver_cafe) or mock.get("naver_cafe")),
]
for source_type, language, url in extra_channels:
if url:
source_id = await insert_source(hospital_id, source_type, url, language=language)
await insert_raw_info(source_id, analysis_run_id, data_tag=source_type)
logger.info("[analysis] main+extra channels resolved (mock_matched=%s)", bool(mock))
background_tasks.add_task(run_pipeline, analysis_run_id)
return AnalysisStartResponse(
analysis_run_id=analysis_run_id,
clinic_id=hospital_id,
status=AnalysisStatus.DISCOVERING,
estimated_seconds=90,
poll_url=f"/api/analysis/{analysis_run_id}/status",
)
@router.post("/{run_id}/files", status_code=status.HTTP_201_CREATED, response_model=FileUploadResponse)
async def upload_analysis_run_file(
run_id: str,
file: UploadFile = File(..., description="업로드할 파일"),
file_type: FileType = Form(default=FileType.FILE, description="파일 타입 (image/video/audio/document/file)"),
) -> FileUploadResponse:
logger.info("POST /api/analysis/%s/files name=%s file_type=%s", run_id, file.filename, file_type.value)
return await handle_analysis_file_upload(run_id, file, file_type)
@router.get("/{run_id}/files", response_model=list[FileListItem])
async def get_analysis_run_files(run_id: str) -> list[FileListItem]:
logger.info("GET /api/analysis/%s/files", run_id)
return await get_analysis_files_response(run_id)
@router.delete("/{run_id}/files/{file_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_analysis_run_file(run_id: str, file_id: int) -> None:
logger.info("DELETE /api/analysis/%s/files/%s", run_id, file_id)
await soft_delete_analysis_file(analysis_run_id=run_id, file_id=file_id)
return None
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
async def get_analysis_status(run_id: str):
logger.info("GET /api/analysis/%s/status", run_id)
run_status = await select_run_status(run_id)
if run_status is None:
raise HTTPException(status_code=404, detail="Run not found")
return AnalysisStatusResponse(
analysis_run_id=run_id,
status=AnalysisStatus(run_status),
progress=50.0,
current_step="",
channel_errors={},
completed_at=None,
)