file 업로드 엔드포인트 추가 (Azure Blob 연동)

upload
Mina Choi 2026-05-19 16:13:31 +09:00
parent 20fdf53264
commit 18d01357c0
6 changed files with 298 additions and 1 deletions

View File

@ -152,6 +152,23 @@ CREATE INDEX IX_analysis_runs_2
ON analysis_runs(owner_user_id); ON analysis_runs(owner_user_id);
-- file_data Table Create SQL
CREATE TABLE file_data
(
`id` INT NOT NULL AUTO_INCREMENT,
`analysis_run_id` CHAR(36) NOT NULL,
`hospital_id` CHAR(36) NULL,
`file_type` ENUM('image','video','audio','document','file') NOT NULL DEFAULT 'file',
`file_name` VARCHAR(255) NOT NULL,
`file_url` VARCHAR(2048) NOT NULL,
`size_bytes` BIGINT NULL,
`is_deleted` BOOLEAN NOT NULL DEFAULT FALSE,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
INDEX IX_file_data_1 (analysis_run_id, is_deleted)
);
-- hospital_history Table Create SQL -- hospital_history Table Create SQL
CREATE TABLE hospital_history CREATE TABLE hospital_history
( (

View File

@ -1,11 +1,13 @@
import logging import logging
import uuid6 import uuid6
from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile, status
from common.deps import verify_api_key from common.deps import verify_api_key
from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run
from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse
from models.file import FileListItem, FileType, FileUploadResponse
from models.status import AnalysisStatus from models.status import AnalysisStatus
from services.pipeline import run_pipeline from services.pipeline import run_pipeline
from services.file import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file
router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)])
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -47,6 +49,29 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
) )
@router.post("/{run_id}/files", status_code=status.HTTP_201_CREATED, response_model=FileUploadResponse)
async def upload_analysis_run_file(
run_id: str,
file: UploadFile = File(..., description="업로드할 파일"),
file_type: FileType = Form(default=FileType.FILE, description="파일 타입 (image/video/audio/document/file)"),
) -> FileUploadResponse:
logger.info("POST /api/analysis/%s/files name=%s file_type=%s", run_id, file.filename, file_type.value)
return await handle_analysis_file_upload(run_id, file, file_type)
@router.get("/{run_id}/files", response_model=list[FileListItem])
async def get_analysis_run_files(run_id: str) -> list[FileListItem]:
logger.info("GET /api/analysis/%s/files", run_id)
return await get_analysis_files_response(run_id)
@router.delete("/{run_id}/files/{file_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_analysis_run_file(run_id: str, file_id: int) -> None:
logger.info("DELETE /api/analysis/%s/files/%s", run_id, file_id)
await soft_delete_analysis_file(analysis_run_id=run_id, file_id=file_id)
return None
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse) @router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
async def get_analysis_status(run_id: str): async def get_analysis_status(run_id: str):
logger.info("GET /api/analysis/%s/status", run_id) logger.info("GET /api/analysis/%s/status", run_id)

View File

@ -70,6 +70,15 @@ async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
finally: finally:
conn.close() conn.close()
async def fetchall(sql: str, args: tuple = ()) -> list[dict]:
pool = await get_pool()
async with pool.acquire() as conn:
await conn.ping(reconnect=True)
async with conn.cursor(aiomysql.DictCursor) as cur:
await cur.execute(sql, args)
return list(await cur.fetchall())
async def insert_instagram_row(hospital_id: str, url: str) -> int: async def insert_instagram_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url)) return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
@ -90,6 +99,21 @@ async def insert_gangnam_unni_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url)) return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_file_row(
analysis_run_id: str,
file_type: str,
file_name: str,
file_url: str,
size_bytes: int | None = None,
hospital_id: str | None = None,
) -> int:
return await execute(
"INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)"
" VALUES (%s, %s, %s, %s, %s, %s)",
(analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes),
)
async def insert_analysis_run( async def insert_analysis_run(
analysis_run_id: str, analysis_run_id: str,
hospital_id: str, hospital_id: str,

View File

@ -0,0 +1,79 @@
import logging
import re
from pathlib import Path
import httpx
from common.utils import get_env
logger = logging.getLogger(__name__)
_DEFAULT_CONTENT_TYPE = "application/octet-stream"
_CONTENT_TYPES = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
".mp3": "audio/mpeg",
".mp4": "video/mp4",
".mov": "video/quicktime",
".pdf": "application/pdf",
".txt": "text/plain",
".json": "application/json",
".csv": "text/csv",
}
_shared_client: httpx.AsyncClient | None = None
async def _get_client() -> httpx.AsyncClient:
global _shared_client
if _shared_client is None or _shared_client.is_closed:
_shared_client = httpx.AsyncClient(
timeout=httpx.Timeout(180.0, connect=10.0),
limits=httpx.Limits(max_keepalive_connections=10, max_connections=20),
)
return _shared_client
def _sanitize_filename(file_name: str) -> str:
stem = Path(file_name).stem
suffix = Path(file_name).suffix
sanitized = re.sub(r"[^가-힣a-zA-Z0-9]", "", stem) or "file"
return f"{sanitized}{suffix}"
def _guess_content_type(file_name: str) -> str:
return _CONTENT_TYPES.get(Path(file_name).suffix.lower(), _DEFAULT_CONTENT_TYPE)
class AzureBlobUploader:
def __init__(self, group: str, category: str = "file"):
self._group = group
self._category = category
self._base_url = get_env("AZURE_BLOB_BASE_URL")
self._sas_token = get_env("AZURE_BLOB_SAS_TOKEN").strip("?'\"")
def _public_url(self, file_name: str) -> str:
return f"{self._base_url}/{self._group}/{self._category}/{file_name}"
def _upload_url(self, file_name: str) -> str:
return f"{self._public_url(file_name)}?{self._sas_token}"
async def upload_bytes(self, content: bytes, file_name: str, content_type: str | None = None) -> str:
file_name = _sanitize_filename(file_name)
ct = content_type or _guess_content_type(file_name)
url = self._upload_url(file_name)
headers = {"Content-Type": ct, "x-ms-blob-type": "BlockBlob"}
client = await _get_client()
resp = await client.put(url, content=content, headers=headers)
if resp.status_code not in (200, 201):
logger.error("Azure Blob upload failed status=%s body=%s", resp.status_code, resp.text[:500])
raise RuntimeError(f"Azure Blob upload failed: {resp.status_code}")
public_url = self._public_url(file_name)
logger.info("Azure Blob uploaded url=%s size=%s", public_url, len(content))
return public_url

29
app/models/file.py Normal file
View File

@ -0,0 +1,29 @@
from enum import Enum
from pydantic import BaseModel
class FileType(str, Enum):
IMAGE = "image"
VIDEO = "video"
AUDIO = "audio"
DOCUMENT = "document"
FILE = "file"
class FileUploadResponse(BaseModel):
id: int
analysis_run_id: str
file_type: FileType
file_name: str
file_url: str
size_bytes: int | None = None
class FileListItem(BaseModel):
id: int
file_type: FileType
file_name: str
file_url: str
size_bytes: int | None = None
created_at: str

123
app/services/file.py Normal file
View File

@ -0,0 +1,123 @@
import logging
from fastapi import HTTPException, UploadFile
from common.db import execute, fetchall, fetchone, insert_file_row
from integrations.azure_blob import AzureBlobUploader
from models.file import FileListItem, FileType, FileUploadResponse
logger = logging.getLogger(__name__)
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50MB
async def upload_file_to_blob(
content: bytes,
file_name: str,
group: str,
category: str = "file",
content_type: str | None = None,
) -> str:
"""Azure Blob에 파일을 업로드하고 public URL을 반환. DB는 건드리지 않음."""
uploader = AzureBlobUploader(group=group, category=category)
return await uploader.upload_bytes(content=content, file_name=file_name, content_type=content_type)
async def upload_analysis_file(
analysis_run_id: str,
content: bytes,
file_name: str,
file_type: str = "file",
content_type: str | None = None,
) -> tuple[int, str]:
"""analysis_run에 딸린 파일 업로드. Blob 업로드 + file_data row 생성. (file_id, url) 반환."""
run = await fetchone(
"SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
if not run:
raise HTTPException(status_code=404, detail="analysis_run not found")
hospital_id = run["hospital_id"]
public_url = await upload_file_to_blob(
content=content,
file_name=file_name,
group=analysis_run_id,
category=file_type,
content_type=content_type,
)
file_id = await insert_file_row(
analysis_run_id=analysis_run_id,
hospital_id=hospital_id,
file_type=file_type,
file_name=file_name,
file_url=public_url,
size_bytes=len(content),
)
logger.info("uploaded analysis file run=%s file_id=%s url=%s", analysis_run_id, file_id, public_url)
return file_id, public_url
async def list_analysis_files(analysis_run_id: str) -> list[dict]:
"""analysis_run에 딸린 (삭제 안 된) 파일 목록."""
return await fetchall(
"SELECT id, file_type, file_name, file_url, size_bytes, created_at FROM file_data"
" WHERE analysis_run_id = %s AND is_deleted = FALSE"
" ORDER BY created_at DESC",
(analysis_run_id,),
)
async def handle_analysis_file_upload(
analysis_run_id: str,
upload: UploadFile,
file_type: FileType,
) -> FileUploadResponse:
"""multipart UploadFile 검증 + 업로드 + 응답 모델 생성."""
if not upload.filename:
raise HTTPException(status_code=400, detail="filename is required")
content = await upload.read()
if not content:
raise HTTPException(status_code=400, detail="empty file")
if len(content) > _MAX_UPLOAD_BYTES:
raise HTTPException(status_code=413, detail=f"file too large (max {_MAX_UPLOAD_BYTES} bytes)")
file_id, public_url = await upload_analysis_file(
analysis_run_id=analysis_run_id,
content=content,
file_name=upload.filename,
file_type=file_type.value,
content_type=upload.content_type,
)
return FileUploadResponse(
id=file_id,
analysis_run_id=analysis_run_id,
file_type=file_type,
file_name=upload.filename,
file_url=public_url,
size_bytes=len(content),
)
async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem]:
"""run 존재 확인 + 응답 모델 생성."""
if not await fetchone("SELECT 1 FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)):
raise HTTPException(status_code=404, detail="analysis_run not found")
rows = await list_analysis_files(analysis_run_id)
return [FileListItem(**{**r, "created_at": str(r["created_at"])}) for r in rows]
async def soft_delete_analysis_file(analysis_run_id: str, file_id: int) -> None:
"""analysis_run에 딸린 파일을 소프트 삭제. 멱등성 보장."""
row = await fetchone(
"SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s",
(file_id, analysis_run_id),
)
if not row:
raise HTTPException(status_code=404, detail="file not found")
await execute(
"UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE",
(file_id,),
)
logger.info("soft-deleted analysis file run=%s file_id=%s", analysis_run_id, file_id)