from __future__ import annotations from datetime import datetime, timezone from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, Request, UploadFile, status from app.api.schemas import ( BatchCreatedResponse, BatchRequest, BatchStatusResponse, CorpusItem, CorpusListResponse, CorpusUploadRequest, CorpusUploadResponse, DetectRequest, DetectResponse, HealthResponse, TaxonomyResponse, ) from app.core.config import get_settings from app.engine.corpus import add_document, delete_document, list_documents from app.engine.detector import PlagiarismDetector from app.jobs.store import JobStore router = APIRouter(prefix="/v1") def _detector(request: Request) -> PlagiarismDetector: return request.app.state.detector def _job_store(request: Request) -> JobStore: return request.app.state.job_store @router.get("/health", response_model=HealthResponse, tags=["meta"]) async def health(request: Request) -> HealthResponse: settings = get_settings() det: PlagiarismDetector = request.app.state.detector taxonomy_version = None if det.taxonomy: taxonomy_version = f"meta_tags_{det.taxonomy.meta_tags_version}, cases_{det.taxonomy.cases_version}" return HealthResponse( status="ok", engine_version=settings.engine_version, corpus_size=det.corpus_size, taxonomy_version=taxonomy_version, autobiography_mode=settings.autobiography_mode, ) @router.get("/taxonomy", response_model=TaxonomyResponse, tags=["meta"]) async def taxonomy(request: Request) -> TaxonomyResponse: """분류체계 조회 - 컴북스/바이칼이 동일 라벨링 공유용.""" det: PlagiarismDetector = request.app.state.detector if not det.taxonomy: raise HTTPException(status_code=503, detail="Taxonomy not loaded") return TaxonomyResponse( meta_tags_version=det.taxonomy.meta_tags_version, cases_version=det.taxonomy.cases_version, meta_tags=[ {"id": t.id, "label_ko": t.label_ko, "category": t.category, "law_ref": t.law_ref, "scope": t.scope, "description": t.description} for t in det.taxonomy.meta_tags ], cases=[ {"case_id": c.case_id, "old_no": c.old_no, "subgroup": c.subgroup, "title": c.title, "actor": c.actor, "primary_tags": list(c.primary_tags), "secondary_tags": list(c.secondary_tags), "detectable_internal": c.detectable_internal, "high_risk": c.high_risk, "note": c.note} for c in det.taxonomy.cases ], ) @router.post( "/plagiarism/detect", response_model=DetectResponse, tags=["plagiarism"], ) async def detect(req: DetectRequest, request: Request) -> DetectResponse: return _detector(request).detect_request(req) @router.post( "/plagiarism/batch", response_model=BatchCreatedResponse, status_code=status.HTTP_202_ACCEPTED, tags=["plagiarism"], ) async def batch_create( req: BatchRequest, request: Request, background_tasks: BackgroundTasks, ) -> BatchCreatedResponse: store = _job_store(request) detector = _detector(request) job = store.create(total=len(req.items)) background_tasks.add_task(_run_batch, store, detector, job.job_id, req) return BatchCreatedResponse( job_id=job.job_id, status=job.status, total=job.total, created_at=job.created_at, ) @router.get( "/plagiarism/batch/{job_id}", response_model=BatchStatusResponse, tags=["plagiarism"], ) async def batch_status(job_id: str, request: Request) -> BatchStatusResponse: job = _job_store(request).get(job_id) if not job: raise HTTPException(status_code=404, detail="Job not found") return BatchStatusResponse( job_id=job.job_id, status=job.status, total=job.total, processed=job.processed, created_at=job.created_at, finished_at=job.finished_at, results=job.results if job.status == "completed" else None, error=job.error, ) # ---------- 코퍼스 관리 ---------- def _rebuild(request: Request) -> int: from app.main import rebuild_detector return rebuild_detector(request.app) @router.get( "/corpus", response_model=CorpusListResponse, tags=["corpus"], ) async def corpus_list(request: Request) -> CorpusListResponse: settings = get_settings() docs = list_documents(settings.corpus_path) return CorpusListResponse( total=len(docs), docs=[CorpusItem(**d) for d in docs], ) @router.post( "/corpus", response_model=CorpusUploadResponse, status_code=status.HTTP_201_CREATED, tags=["corpus"], ) async def corpus_upload_json(req: CorpusUploadRequest, request: Request) -> CorpusUploadResponse: """JSON으로 자서전 1건 업로드. 인덱스 자동 재빌드.""" settings = get_settings() try: doc = add_document(settings.corpus_path, req.doc_id, req.title, req.text) except FileExistsError as e: raise HTTPException(status_code=409, detail=str(e)) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) new_size = _rebuild(request) return CorpusUploadResponse( doc_id=doc.doc_id, title=doc.title, size_bytes=len(doc.text.encode("utf-8")), corpus_size_after=new_size, rebuilt=True, ) @router.post( "/corpus/file", response_model=CorpusUploadResponse, status_code=status.HTTP_201_CREATED, tags=["corpus"], ) async def corpus_upload_file( request: Request, title: str = Form(..., description="자서전 제목"), doc_id: str | None = Form(default=None, description="비우면 자동 생성"), file: UploadFile = File(..., description=".txt 파일"), ) -> CorpusUploadResponse: """multipart로 .txt 파일 업로드 (큰 자서전 파일용).""" settings = get_settings() raw = await file.read() try: text = raw.decode("utf-8") except UnicodeDecodeError: raise HTTPException(status_code=400, detail="UTF-8 인코딩 텍스트 파일만 업로드 가능합니다.") try: doc = add_document(settings.corpus_path, doc_id, title, text) except FileExistsError as e: raise HTTPException(status_code=409, detail=str(e)) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) new_size = _rebuild(request) return CorpusUploadResponse( doc_id=doc.doc_id, title=doc.title, size_bytes=len(doc.text.encode("utf-8")), corpus_size_after=new_size, rebuilt=True, ) @router.delete( "/corpus/{doc_id}", status_code=status.HTTP_204_NO_CONTENT, tags=["corpus"], ) async def corpus_delete(doc_id: str, request: Request) -> None: settings = get_settings() if not delete_document(settings.corpus_path, doc_id): raise HTTPException(status_code=404, detail=f"doc_id '{doc_id}' not found") _rebuild(request) def _run_batch(store: JobStore, detector: PlagiarismDetector, job_id: str, req: BatchRequest) -> None: store.update(job_id, status="running") try: for item in req.items: result = detector.detect( doc_id=item.doc_id, text=item.text, metadata=item.metadata, options=req.options, ) store.append_result(job_id, result) store.update(job_id, status="completed", finished_at=datetime.now(timezone.utc)) except Exception as exc: store.update( job_id, status="failed", finished_at=datetime.now(timezone.utc), error=str(exc), )