o2o-plagiarism-ai/app/api/routes.py

249 lines
7.9 KiB
Python

from __future__ import annotations
from datetime import datetime, timezone
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Request, UploadFile, status
from app.api.schemas import (
BatchCreatedResponse,
BatchRequest,
BatchStatusResponse,
CorpusItem,
CorpusListResponse,
CorpusUploadRequest,
CorpusUploadResponse,
DetectRequest,
DetectResponse,
HealthResponse,
TaxonomyResponse,
)
from app.core.auth import require_api_key
from app.core.config import get_settings
from app.engine.corpus import add_document, delete_document, list_documents
from app.engine.detector import PlagiarismDetector
from app.jobs.store import JobStore
router = APIRouter(prefix="/v1")
def _detector(request: Request) -> PlagiarismDetector:
return request.app.state.detector
def _job_store(request: Request) -> JobStore:
return request.app.state.job_store
@router.get("/health", response_model=HealthResponse, tags=["meta"])
async def health(request: Request) -> HealthResponse:
settings = get_settings()
det: PlagiarismDetector = request.app.state.detector
taxonomy_version = None
if det.taxonomy:
taxonomy_version = f"meta_tags_{det.taxonomy.meta_tags_version}, cases_{det.taxonomy.cases_version}"
return HealthResponse(
status="ok",
engine_version=settings.engine_version,
corpus_size=det.corpus_size,
taxonomy_version=taxonomy_version,
autobiography_mode=settings.autobiography_mode,
)
@router.get("/taxonomy", response_model=TaxonomyResponse, tags=["meta"])
async def taxonomy(request: Request) -> TaxonomyResponse:
"""분류체계 조회 - 컴북스/바이칼이 동일 라벨링 공유용."""
det: PlagiarismDetector = request.app.state.detector
if not det.taxonomy:
raise HTTPException(status_code=503, detail="Taxonomy not loaded")
return TaxonomyResponse(
meta_tags_version=det.taxonomy.meta_tags_version,
cases_version=det.taxonomy.cases_version,
meta_tags=[
{"id": t.id, "label_ko": t.label_ko, "category": t.category,
"law_ref": t.law_ref, "scope": t.scope, "description": t.description}
for t in det.taxonomy.meta_tags
],
cases=[
{"case_id": c.case_id, "old_no": c.old_no, "subgroup": c.subgroup,
"title": c.title, "actor": c.actor,
"primary_tags": list(c.primary_tags), "secondary_tags": list(c.secondary_tags),
"detectable_internal": c.detectable_internal, "high_risk": c.high_risk,
"note": c.note}
for c in det.taxonomy.cases
],
)
@router.post(
"/plagiarism/detect",
response_model=DetectResponse,
tags=["plagiarism"],
dependencies=[Depends(require_api_key)],
)
async def detect(req: DetectRequest, request: Request) -> DetectResponse:
return _detector(request).detect_request(req)
@router.post(
"/plagiarism/batch",
response_model=BatchCreatedResponse,
status_code=status.HTTP_202_ACCEPTED,
tags=["plagiarism"],
dependencies=[Depends(require_api_key)],
)
async def batch_create(
req: BatchRequest,
request: Request,
background_tasks: BackgroundTasks,
) -> BatchCreatedResponse:
store = _job_store(request)
detector = _detector(request)
job = store.create(total=len(req.items))
background_tasks.add_task(_run_batch, store, detector, job.job_id, req)
return BatchCreatedResponse(
job_id=job.job_id,
status=job.status,
total=job.total,
created_at=job.created_at,
)
@router.get(
"/plagiarism/batch/{job_id}",
response_model=BatchStatusResponse,
tags=["plagiarism"],
dependencies=[Depends(require_api_key)],
)
async def batch_status(job_id: str, request: Request) -> BatchStatusResponse:
job = _job_store(request).get(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return BatchStatusResponse(
job_id=job.job_id,
status=job.status,
total=job.total,
processed=job.processed,
created_at=job.created_at,
finished_at=job.finished_at,
results=job.results if job.status == "completed" else None,
error=job.error,
)
# ---------- 코퍼스 관리 ----------
def _rebuild(request: Request) -> int:
from app.main import rebuild_detector
return rebuild_detector(request.app)
@router.get(
"/corpus",
response_model=CorpusListResponse,
tags=["corpus"],
dependencies=[Depends(require_api_key)],
)
async def corpus_list(request: Request) -> CorpusListResponse:
settings = get_settings()
docs = list_documents(settings.corpus_path)
return CorpusListResponse(
total=len(docs),
docs=[CorpusItem(**d) for d in docs],
)
@router.post(
"/corpus",
response_model=CorpusUploadResponse,
status_code=status.HTTP_201_CREATED,
tags=["corpus"],
dependencies=[Depends(require_api_key)],
)
async def corpus_upload_json(req: CorpusUploadRequest, request: Request) -> CorpusUploadResponse:
"""JSON으로 자서전 1건 업로드. 인덱스 자동 재빌드."""
settings = get_settings()
try:
doc = add_document(settings.corpus_path, req.doc_id, req.title, req.text)
except FileExistsError as e:
raise HTTPException(status_code=409, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
new_size = _rebuild(request)
return CorpusUploadResponse(
doc_id=doc.doc_id, title=doc.title,
size_bytes=len(doc.text.encode("utf-8")),
corpus_size_after=new_size, rebuilt=True,
)
@router.post(
"/corpus/file",
response_model=CorpusUploadResponse,
status_code=status.HTTP_201_CREATED,
tags=["corpus"],
dependencies=[Depends(require_api_key)],
)
async def corpus_upload_file(
request: Request,
title: str = Form(..., description="자서전 제목"),
doc_id: str | None = Form(default=None, description="비우면 자동 생성"),
file: UploadFile = File(..., description=".txt 파일"),
) -> CorpusUploadResponse:
"""multipart로 .txt 파일 업로드 (큰 자서전 파일용)."""
settings = get_settings()
raw = await file.read()
try:
text = raw.decode("utf-8")
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="UTF-8 인코딩 텍스트 파일만 업로드 가능합니다.")
try:
doc = add_document(settings.corpus_path, doc_id, title, text)
except FileExistsError as e:
raise HTTPException(status_code=409, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
new_size = _rebuild(request)
return CorpusUploadResponse(
doc_id=doc.doc_id, title=doc.title,
size_bytes=len(doc.text.encode("utf-8")),
corpus_size_after=new_size, rebuilt=True,
)
@router.delete(
"/corpus/{doc_id}",
status_code=status.HTTP_204_NO_CONTENT,
tags=["corpus"],
dependencies=[Depends(require_api_key)],
)
async def corpus_delete(doc_id: str, request: Request) -> None:
settings = get_settings()
if not delete_document(settings.corpus_path, doc_id):
raise HTTPException(status_code=404, detail=f"doc_id '{doc_id}' not found")
_rebuild(request)
def _run_batch(store: JobStore, detector: PlagiarismDetector, job_id: str, req: BatchRequest) -> None:
store.update(job_id, status="running")
try:
for item in req.items:
result = detector.detect(
doc_id=item.doc_id,
text=item.text,
metadata=item.metadata,
options=req.options,
)
store.append_result(job_id, result)
store.update(job_id, status="completed", finished_at=datetime.now(timezone.utc))
except Exception as exc:
store.update(
job_id,
status="failed",
finished_at=datetime.now(timezone.utc),
error=str(exc),
)