From 18d01357c09efd5ba0f96bbfc8cf299985b5dfde Mon Sep 17 00:00:00 2001 From: Mina Choi Date: Tue, 19 May 2026 16:13:31 +0900 Subject: [PATCH] =?UTF-8?q?file=20=EC=97=85=EB=A1=9C=EB=93=9C=20=EC=97=94?= =?UTF-8?q?=EB=93=9C=ED=8F=AC=EC=9D=B8=ED=8A=B8=20=EC=B6=94=EA=B0=80=20(Az?= =?UTF-8?q?ure=20Blob=20=EC=97=B0=EB=8F=99)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SQL/db_create.sql | 17 +++++ app/api/analysis.py | 27 +++++++- app/common/db.py | 24 +++++++ app/integrations/azure_blob.py | 79 +++++++++++++++++++++ app/models/file.py | 29 ++++++++ app/services/file.py | 123 +++++++++++++++++++++++++++++++++ 6 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 app/integrations/azure_blob.py create mode 100644 app/models/file.py create mode 100644 app/services/file.py diff --git a/SQL/db_create.sql b/SQL/db_create.sql index a314918..c4b6f02 100644 --- a/SQL/db_create.sql +++ b/SQL/db_create.sql @@ -152,6 +152,23 @@ CREATE INDEX IX_analysis_runs_2 ON analysis_runs(owner_user_id); +-- file_data Table Create SQL +CREATE TABLE file_data +( + `id` INT NOT NULL AUTO_INCREMENT, + `analysis_run_id` CHAR(36) NOT NULL, + `hospital_id` CHAR(36) NULL, + `file_type` ENUM('image','video','audio','document','file') NOT NULL DEFAULT 'file', + `file_name` VARCHAR(255) NOT NULL, + `file_url` VARCHAR(2048) NOT NULL, + `size_bytes` BIGINT NULL, + `is_deleted` BOOLEAN NOT NULL DEFAULT FALSE, + `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (id), + INDEX IX_file_data_1 (analysis_run_id, is_deleted) +); + + -- hospital_history Table Create SQL CREATE TABLE hospital_history ( diff --git a/app/api/analysis.py b/app/api/analysis.py index 7b7bd6e..7e2691c 100644 --- a/app/api/analysis.py +++ b/app/api/analysis.py @@ -1,11 +1,13 @@ import logging import uuid6 -from fastapi import APIRouter, BackgroundTasks, Depends, status, HTTPException +from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, UploadFile, status from common.deps import verify_api_key from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse +from models.file import FileListItem, FileType, FileUploadResponse from models.status import AnalysisStatus from services.pipeline import run_pipeline +from services.file import get_analysis_files_response, handle_analysis_file_upload, soft_delete_analysis_file router = APIRouter(prefix="/api/analysis", tags=["analysis"], dependencies=[Depends(verify_api_key)]) logger = logging.getLogger(__name__) @@ -47,6 +49,29 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks ) +@router.post("/{run_id}/files", status_code=status.HTTP_201_CREATED, response_model=FileUploadResponse) +async def upload_analysis_run_file( + run_id: str, + file: UploadFile = File(..., description="업로드할 파일"), + file_type: FileType = Form(default=FileType.FILE, description="파일 타입 (image/video/audio/document/file)"), +) -> FileUploadResponse: + logger.info("POST /api/analysis/%s/files name=%s file_type=%s", run_id, file.filename, file_type.value) + return await handle_analysis_file_upload(run_id, file, file_type) + + +@router.get("/{run_id}/files", response_model=list[FileListItem]) +async def get_analysis_run_files(run_id: str) -> list[FileListItem]: + logger.info("GET /api/analysis/%s/files", run_id) + return await get_analysis_files_response(run_id) + + +@router.delete("/{run_id}/files/{file_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_analysis_run_file(run_id: str, file_id: int) -> None: + logger.info("DELETE /api/analysis/%s/files/%s", run_id, file_id) + await soft_delete_analysis_file(analysis_run_id=run_id, file_id=file_id) + return None + + @router.get("/{run_id}/status", response_model=AnalysisStatusResponse) async def get_analysis_status(run_id: str): logger.info("GET /api/analysis/%s/status", run_id) diff --git a/app/common/db.py b/app/common/db.py index 468af75..29259e9 100644 --- a/app/common/db.py +++ b/app/common/db.py @@ -70,6 +70,15 @@ async def fetchall(sql: str, args: tuple = ()) -> list[dict]: finally: conn.close() + +async def fetchall(sql: str, args: tuple = ()) -> list[dict]: + pool = await get_pool() + async with pool.acquire() as conn: + await conn.ping(reconnect=True) + async with conn.cursor(aiomysql.DictCursor) as cur: + await cur.execute(sql, args) + return list(await cur.fetchall()) + async def insert_instagram_row(hospital_id: str, url: str) -> int: return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url)) @@ -90,6 +99,21 @@ async def insert_gangnam_unni_row(hospital_id: str, url: str) -> int: return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url)) +async def insert_file_row( + analysis_run_id: str, + file_type: str, + file_name: str, + file_url: str, + size_bytes: int | None = None, + hospital_id: str | None = None, +) -> int: + return await execute( + "INSERT INTO file_data (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes)" + " VALUES (%s, %s, %s, %s, %s, %s)", + (analysis_run_id, hospital_id, file_type, file_name, file_url, size_bytes), + ) + + async def insert_analysis_run( analysis_run_id: str, hospital_id: str, diff --git a/app/integrations/azure_blob.py b/app/integrations/azure_blob.py new file mode 100644 index 0000000..1b7abe6 --- /dev/null +++ b/app/integrations/azure_blob.py @@ -0,0 +1,79 @@ +import logging +import re +from pathlib import Path + +import httpx + +from common.utils import get_env + +logger = logging.getLogger(__name__) + +_DEFAULT_CONTENT_TYPE = "application/octet-stream" +_CONTENT_TYPES = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".mp3": "audio/mpeg", + ".mp4": "video/mp4", + ".mov": "video/quicktime", + ".pdf": "application/pdf", + ".txt": "text/plain", + ".json": "application/json", + ".csv": "text/csv", +} + +_shared_client: httpx.AsyncClient | None = None + + +async def _get_client() -> httpx.AsyncClient: + global _shared_client + if _shared_client is None or _shared_client.is_closed: + _shared_client = httpx.AsyncClient( + timeout=httpx.Timeout(180.0, connect=10.0), + limits=httpx.Limits(max_keepalive_connections=10, max_connections=20), + ) + return _shared_client + + +def _sanitize_filename(file_name: str) -> str: + stem = Path(file_name).stem + suffix = Path(file_name).suffix + sanitized = re.sub(r"[^가-힣a-zA-Z0-9]", "", stem) or "file" + return f"{sanitized}{suffix}" + + +def _guess_content_type(file_name: str) -> str: + return _CONTENT_TYPES.get(Path(file_name).suffix.lower(), _DEFAULT_CONTENT_TYPE) + + +class AzureBlobUploader: + def __init__(self, group: str, category: str = "file"): + self._group = group + self._category = category + self._base_url = get_env("AZURE_BLOB_BASE_URL") + self._sas_token = get_env("AZURE_BLOB_SAS_TOKEN").strip("?'\"") + + def _public_url(self, file_name: str) -> str: + return f"{self._base_url}/{self._group}/{self._category}/{file_name}" + + def _upload_url(self, file_name: str) -> str: + return f"{self._public_url(file_name)}?{self._sas_token}" + + async def upload_bytes(self, content: bytes, file_name: str, content_type: str | None = None) -> str: + file_name = _sanitize_filename(file_name) + ct = content_type or _guess_content_type(file_name) + url = self._upload_url(file_name) + headers = {"Content-Type": ct, "x-ms-blob-type": "BlockBlob"} + + client = await _get_client() + resp = await client.put(url, content=content, headers=headers) + if resp.status_code not in (200, 201): + logger.error("Azure Blob upload failed status=%s body=%s", resp.status_code, resp.text[:500]) + raise RuntimeError(f"Azure Blob upload failed: {resp.status_code}") + + public_url = self._public_url(file_name) + logger.info("Azure Blob uploaded url=%s size=%s", public_url, len(content)) + return public_url diff --git a/app/models/file.py b/app/models/file.py new file mode 100644 index 0000000..59bb362 --- /dev/null +++ b/app/models/file.py @@ -0,0 +1,29 @@ +from enum import Enum + +from pydantic import BaseModel + + +class FileType(str, Enum): + IMAGE = "image" + VIDEO = "video" + AUDIO = "audio" + DOCUMENT = "document" + FILE = "file" + + +class FileUploadResponse(BaseModel): + id: int + analysis_run_id: str + file_type: FileType + file_name: str + file_url: str + size_bytes: int | None = None + + +class FileListItem(BaseModel): + id: int + file_type: FileType + file_name: str + file_url: str + size_bytes: int | None = None + created_at: str diff --git a/app/services/file.py b/app/services/file.py new file mode 100644 index 0000000..339cc99 --- /dev/null +++ b/app/services/file.py @@ -0,0 +1,123 @@ +import logging + +from fastapi import HTTPException, UploadFile + +from common.db import execute, fetchall, fetchone, insert_file_row +from integrations.azure_blob import AzureBlobUploader +from models.file import FileListItem, FileType, FileUploadResponse + +logger = logging.getLogger(__name__) + +_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50MB + + +async def upload_file_to_blob( + content: bytes, + file_name: str, + group: str, + category: str = "file", + content_type: str | None = None, +) -> str: + """Azure Blob에 파일을 업로드하고 public URL을 반환. DB는 건드리지 않음.""" + uploader = AzureBlobUploader(group=group, category=category) + return await uploader.upload_bytes(content=content, file_name=file_name, content_type=content_type) + + +async def upload_analysis_file( + analysis_run_id: str, + content: bytes, + file_name: str, + file_type: str = "file", + content_type: str | None = None, +) -> tuple[int, str]: + """analysis_run에 딸린 파일 업로드. Blob 업로드 + file_data row 생성. (file_id, url) 반환.""" + run = await fetchone( + "SELECT hospital_id FROM analysis_runs WHERE analysis_run_id = %s", + (analysis_run_id,), + ) + if not run: + raise HTTPException(status_code=404, detail="analysis_run not found") + hospital_id = run["hospital_id"] + + public_url = await upload_file_to_blob( + content=content, + file_name=file_name, + group=analysis_run_id, + category=file_type, + content_type=content_type, + ) + + file_id = await insert_file_row( + analysis_run_id=analysis_run_id, + hospital_id=hospital_id, + file_type=file_type, + file_name=file_name, + file_url=public_url, + size_bytes=len(content), + ) + logger.info("uploaded analysis file run=%s file_id=%s url=%s", analysis_run_id, file_id, public_url) + return file_id, public_url + + +async def list_analysis_files(analysis_run_id: str) -> list[dict]: + """analysis_run에 딸린 (삭제 안 된) 파일 목록.""" + return await fetchall( + "SELECT id, file_type, file_name, file_url, size_bytes, created_at FROM file_data" + " WHERE analysis_run_id = %s AND is_deleted = FALSE" + " ORDER BY created_at DESC", + (analysis_run_id,), + ) + + +async def handle_analysis_file_upload( + analysis_run_id: str, + upload: UploadFile, + file_type: FileType, +) -> FileUploadResponse: + """multipart UploadFile 검증 + 업로드 + 응답 모델 생성.""" + if not upload.filename: + raise HTTPException(status_code=400, detail="filename is required") + content = await upload.read() + if not content: + raise HTTPException(status_code=400, detail="empty file") + if len(content) > _MAX_UPLOAD_BYTES: + raise HTTPException(status_code=413, detail=f"file too large (max {_MAX_UPLOAD_BYTES} bytes)") + + file_id, public_url = await upload_analysis_file( + analysis_run_id=analysis_run_id, + content=content, + file_name=upload.filename, + file_type=file_type.value, + content_type=upload.content_type, + ) + return FileUploadResponse( + id=file_id, + analysis_run_id=analysis_run_id, + file_type=file_type, + file_name=upload.filename, + file_url=public_url, + size_bytes=len(content), + ) + + +async def get_analysis_files_response(analysis_run_id: str) -> list[FileListItem]: + """run 존재 확인 + 응답 모델 생성.""" + if not await fetchone("SELECT 1 FROM analysis_runs WHERE analysis_run_id = %s", (analysis_run_id,)): + raise HTTPException(status_code=404, detail="analysis_run not found") + rows = await list_analysis_files(analysis_run_id) + return [FileListItem(**{**r, "created_at": str(r["created_at"])}) for r in rows] + + +async def soft_delete_analysis_file(analysis_run_id: str, file_id: int) -> None: + """analysis_run에 딸린 파일을 소프트 삭제. 멱등성 보장.""" + row = await fetchone( + "SELECT id FROM file_data WHERE id = %s AND analysis_run_id = %s", + (file_id, analysis_run_id), + ) + if not row: + raise HTTPException(status_code=404, detail="file not found") + await execute( + "UPDATE file_data SET is_deleted = TRUE WHERE id = %s AND is_deleted = FALSE", + (file_id,), + ) + logger.info("soft-deleted analysis file run=%s file_id=%s", analysis_run_id, file_id)