file 업로드 엔드포인트 추가 (Azure Blob 연동)

upload
Mina Choi 2026-05-19 16:13:31 +09:00
parent 20fdf53264
commit 18d01357c0
6 changed files with 298 additions and 1 deletions

View File

@ -152,6 +152,23 @@ CREATE INDEX IX_analysis_runs_2
ON analysis_runs(owner_user_id);
-- file_data Table Create SQL
CREATE TABLE file_data
(
`id` INT NOT NULL AUTO_INCREMENT,
`analysis_run_id` CHAR(36) NOT NULL,
`hospital_id` CHAR(36) NULL,
`file_type` ENUM('image','video','audio','document','file') NOT NULL DEFAULT 'file',
`file_name` VARCHAR(255) NOT NULL,
`file_url` VARCHAR(2048) NOT NULL,
`size_bytes` BIGINT NULL,
`is_deleted` BOOLEAN NOT NULL DEFAULT FALSE,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
INDEX IX_file_data_1 (analysis_run_id, is_deleted)
);
-- hospital_history Table Create SQL
CREATE TABLE hospital_history
(

View File

@ -1,11 +1,13 @@
import logging
import uuid6
from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile, status
from common.deps import verify_api_key
from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run
from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse
from models.file import FileListItem, FileType, FileUploadResponse
from models.status import AnalysisStatus
from services.pipeline import run_pipeline
from services.file import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__)
@ -47,6 +49,29 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
)
@router.post("/{run_id}/files", status_code=status.HTTP_201_CREATED, response_model=FileUploadResponse)
async def upload_analysis_run_file(
run_id: str,
file: UploadFile = File(..., description="업로드할 파일"),
file_type: FileType = Form(default=FileType.FILE, description="파일 타입 (image/video/audio/document/file)"),
) -> FileUploadResponse:
logger.info("POST /api/analysis/%s/files name=%s file_type=%s", run_id, file.filename, file_type.value)
return await handle_analysis_file_upload(run_id, file, file_type)
@router.get("/{run_id}/files", response_model=list[FileListItem])
async def get_analysis_run_files(run_id: str) -> list[FileListItem]:
logger.info("GET /api/analysis/%s/files", run_id)
return await get_analysis_files_response(run_id)
@router.delete("/{run_id}/files/{file_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_analysis_run_file(run_id: str, file_id: int) -> None:
logger.info("DELETE /api/analysis/%s/files/%s", run_id, file_id)
await soft_delete_analysis_file(analysis_run_id=run_id, file_id=file_id)
return None
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
async def get_analysis_status(run_id: str):
logger.info("GET /api/analysis/%s/status", run_id)

View File

@ -70,6 +70,15 @@ async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
finally:
conn.close()
async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
pool = await get_pool()
async with pool.acquire() as conn:
await conn.ping(reconnect=True)
async with conn.cursor(aiomysql.DictCursor) as cur:
await cur.execute(sql, args)
return list(await cur.fetchall())
async def insert_instagram_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
@ -90,6 +99,21 @@ async def insert_gangnam_unni_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_file_row(
analysis_run_id: str,
file_type: str,
file_name: str,
file_url: str,
size_bytes: int | None = None,
hospital_id: str | None = None,
) -> int:
return await execute(
"INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)"
" VALUES (%s, %s, %s, %s, %s, %s)",
(analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes),
)
async def insert_analysis_run(
analysis_run_id: str,
hospital_id: str,

View File

@ -0,0 +1,79 @@
import logging
import re
from pathlib import Path
import httpx
from common.utils import get_env
logger = logging.getLogger(__name__)
_DEFAULT_CONTENT_TYPE = "application/octet-stream"
_CONTENT_TYPES = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
".mp3": "audio/mpeg",
".mp4": "video/mp4",
".mov": "video/quicktime",
".pdf": "application/pdf",
".txt": "text/plain",
".json": "application/json",
".csv": "text/csv",
}
_shared_client: httpx.AsyncClient | None = None
async def _get_client() -> httpx.AsyncClient:
global _shared_client
if _shared_client is None or _shared_client.is_closed:
_shared_client = httpx.AsyncClient(
timeout=httpx.Timeout(180.0, connect=10.0),
limits=httpx.Limits(max_keepalive_connections=10, max_connections=20),
)
return _shared_client
def _sanitize_filename(file_name: str) -> str:
stem = Path(file_name).stem
suffix = Path(file_name).suffix
sanitized = re.sub(r"[^가-힣a-zA-Z0-9]", "", stem) or "file"
return f"{sanitized}{suffix}"
def _guess_content_type(file_name: str) -> str:
return _CONTENT_TYPES.get(Path(file_name).suffix.lower(), _DEFAULT_CONTENT_TYPE)
class AzureBlobUploader:
def __init__(self, group: str, category: str = "file"):
self._group = group
self._category = category
self._base_url = get_env("AZURE_BLOB_BASE_URL")
self._sas_token = get_env("AZURE_BLOB_SAS_TOKEN").strip("?'\"")
def _public_url(self, file_name: str) -> str:
return f"{self._base_url}/{self._group}/{self._category}/{file_name}"
def _upload_url(self, file_name: str) -> str:
return f"{self._public_url(file_name)}?{self._sas_token}"
async def upload_bytes(self, content: bytes, file_name: str, content_type: str | None = None) -> str:
file_name = _sanitize_filename(file_name)
ct = content_type or _guess_content_type(file_name)
url = self._upload_url(file_name)
headers = {"Content-Type": ct, "x-ms-blob-type": "BlockBlob"}
client = await _get_client()
resp = await client.put(url, content=content, headers=headers)
if resp.status_code not in (200, 201):
logger.error("Azure Blob upload failed status=%s body=%s", resp.status_code, resp.text[:500])
raise RuntimeError(f"Azure Blob upload failed: {resp.status_code}")
public_url = self._public_url(file_name)
logger.info("Azure Blob uploaded url=%s size=%s", public_url, len(content))
return public_url

29
app/models/file.py Normal file
View File

@ -0,0 +1,29 @@
from enum import Enum
from pydantic import BaseModel
class FileType(str, Enum):
IMAGE = "image"
VIDEO = "video"
AUDIO = "audio"
DOCUMENT = "document"
FILE = "file"
class FileUploadResponse(BaseModel):
id: int
analysis_run_id: str
file_type: FileType
file_name: str
file_url: str
size_bytes: int | None = None
class FileListItem(BaseModel):
id: int
file_type: FileType
file_name: str
file_url: str
size_bytes: int | None = None
created_at: str

123
app/services/file.py Normal file
View File

@ -0,0 +1,123 @@
import logging
from fastapi import HTTPException, UploadFile
from common.db import execute, fetchall, fetchone, insert_file_row
from integrations.azure_blob import AzureBlobUploader
from models.file import FileListItem, FileType, FileUploadResponse
logger = logging.getLogger(__name__)
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50MB
async def upload_file_to_blob(
content: bytes,
file_name: str,
group: str,
category: str = "file",
content_type: str | None = None,
) -> str:
"""Azure Blob에 파일을 업로드하고 public URL을 반환. DB는 건드리지 않음."""
uploader = AzureBlobUploader(group=group, category=category)
return await uploader.upload_bytes(content=content, file_name=file_name, content_type=content_type)
async def upload_analysis_file(
analysis_run_id: str,
content: bytes,
file_name: str,
file_type: str = "file",
content_type: str | None = None,
) -> tuple[int, str]:
"""analysis_run에 딸린 파일 업로드. Blob 업로드 + file_data row 생성. (file_id, url) 반환."""
run = await fetchone(
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
if not run:
raise HTTPException(status_code=404, detail="analysis_run not found")
hospital_id = run["hospital_id"]
public_url = await upload_file_to_blob(
content=content,
file_name=file_name,
group=analysis_run_id,
category=file_type,
content_type=content_type,
)
file_id = await insert_file_row(
analysis_run_id=analysis_run_id,
hospital_id=hospital_id,
file_type=file_type,
file_name=file_name,
file_url=public_url,
size_bytes=len(content),
)
logger.info("uploaded analysis file run=%s file_id=%s url=%s", analysis_run_id, file_id, public_url)
return file_id, public_url
async def list_analysis_files(analysis_run_id: str) -> list[dict]:
"""analysis_run에 딸린 (삭제 안 된) 파일 목록."""
return await fetchall(
"SELECT id, file_type, file_name, file_url, size_bytes, created_at FROM file_data"
" WHERE analysis_run_id = %s AND is_deleted = FALSE"
" ORDER BY created_at DESC",
(analysis_run_id,),
)
async def handle_analysis_file_upload(
analysis_run_id: str,
upload: UploadFile,
file_type: FileType,
) -> FileUploadResponse:
"""multipart UploadFile 검증 + 업로드 + 응답 모델 생성."""
if not upload.filename:
raise HTTPException(status_code=400, detail="filename is required")
content = await upload.read()
if not content:
raise HTTPException(status_code=400, detail="empty file")
if len(content) > _MAX_UPLOAD_BYTES:
raise HTTPException(status_code=413, detail=f"file too large (max {_MAX_UPLOAD_BYTES} bytes)")
file_id, public_url = await upload_analysis_file(
analysis_run_id=analysis_run_id,
content=content,
file_name=upload.filename,
file_type=file_type.value,
content_type=upload.content_type,
)
return FileUploadResponse(
id=file_id,
analysis_run_id=analysis_run_id,
file_type=file_type,
file_name=upload.filename,
file_url=public_url,
size_bytes=len(content),
)
async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem]:
"""run 존재 확인 + 응답 모델 생성."""
if not await fetchone("SELECT 1 FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)):
raise HTTPException(status_code=404, detail="analysis_run not found")
rows = await list_analysis_files(analysis_run_id)
return [FileListItem(**{**r, "created_at": str(r["created_at"])}) for r in rows]
async def soft_delete_analysis_file(analysis_run_id: str, file_id: int) -> None:
"""analysis_run에 딸린 파일을 소프트 삭제. 멱등성 보장."""
row = await fetchone(
"SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s",
(file_id, analysis_run_id),
)
if not row:
raise HTTPException(status_code=404, detail="file not found")
await execute(
"UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE",
(file_id,),
)
logger.info("soft-deleted analysis file run=%s file_id=%s", analysis_run_id, file_id)