o2o-plagiarism-ai/app/api/schemas.py

186 lines
5.2 KiB
Python

from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
# 법령 기반 10종 메타 태그 (PDF IV장)
LegalTag = Literal[
"reproduction", # 복제권
"public_transmission", # 공중송신권
"distribution", # 배포권
"derivative_work", # 2차적저작물작성권
"publication", # 공표권
"attribution", # 성명표시권
"integrity", # 동일성유지권
"citation_missing", # 인용 표시 누락
"false_authorship", # 자기 창작인 양 표시
"substandard_derivative", # 2차적저작물 미달 가공
]
TAG_LABEL_KO: dict[str, str] = {
"reproduction": "복제권",
"public_transmission": "공중송신권",
"distribution": "배포권",
"derivative_work": "2차적저작물작성권",
"publication": "공표권",
"attribution": "성명표시권",
"integrity": "동일성유지권",
"citation_missing": "인용 표시 누락",
"false_authorship": "자기 창작인 양 표시",
"substandard_derivative": "2차적저작물 미달 가공",
}
# 후방 호환용
InfringementType = Literal[
"copy", "transform", "plot", "character", "background", "unknown",
]
class DocumentMetadata(BaseModel):
title: str | None = None
author: str | None = None
genre: str | None = None
publisher: str | None = None
publication_year: int | None = None
class DetectOptions(BaseModel):
return_evidence: bool = True
threshold: float | None = Field(default=None, ge=0.0, le=1.0,
description="None이면 서버 설정 사용. PDF VII-4 권장 0.85")
top_k: int = Field(default=5, ge=1, le=50)
autobiography_mode: bool | None = Field(
default=None,
description="None이면 서버 설정 사용. 명시하면 요청 단위 override.",
)
class DetectRequest(BaseModel):
doc_id: str
text: str = Field(..., min_length=1)
metadata: DocumentMetadata | None = None
options: DetectOptions = Field(default_factory=DetectOptions)
class EvidenceSpan(BaseModel):
start: int
end: int
matched: str
class InfringementTag(BaseModel):
"""법령 기반 침해 태그. 주(primary) 또는 보조(secondary) 역할."""
tag: LegalTag
role: Literal["primary", "secondary"]
label_ko: str
class ScoreBreakdown(BaseModel):
text_sim: float = Field(..., ge=0.0, le=1.0)
lemma_sim: float = Field(..., ge=0.0, le=1.0)
character_sim: float = Field(..., ge=0.0, le=1.0)
motif_sim: float = Field(..., ge=0.0, le=1.0)
lsh_jaccard: float | None = Field(default=None, ge=0.0, le=1.0)
class MatchResult(BaseModel):
source_doc: str
source_title: str | None = None
similarity: float = Field(..., ge=0.0, le=1.0)
tags: list[InfringementTag] = Field(default_factory=list)
case_id: str | None = None
case_title: str | None = None
infringement_type: InfringementType = "unknown"
evidence_spans: list[EvidenceSpan] = Field(default_factory=list)
score_breakdown: ScoreBreakdown | None = None
class ExtractedElements(BaseModel):
characters: list[str] = Field(default_factory=list)
motifs: list[str] = Field(default_factory=list)
genre: str | None = None
keywords: list[str] = Field(default_factory=list)
class DetectResponse(BaseModel):
doc_id: str
is_infringement: bool
confidence: float = Field(..., ge=0.0, le=1.0)
extracted_elements: ExtractedElements
matches: list[MatchResult]
ccl_basis: str | None = None
autobiography_mode: bool = False
candidates_before_filter: int | None = None
engine_version: str
analyzed_at: datetime
class BatchItem(BaseModel):
doc_id: str
text: str
metadata: DocumentMetadata | None = None
class BatchRequest(BaseModel):
items: list[BatchItem] = Field(..., min_length=1, max_length=500)
options: DetectOptions = Field(default_factory=DetectOptions)
class BatchCreatedResponse(BaseModel):
job_id: str
status: Literal["queued", "running", "completed", "failed"]
total: int
created_at: datetime
class BatchStatusResponse(BaseModel):
job_id: str
status: Literal["queued", "running", "completed", "failed"]
total: int
processed: int
created_at: datetime
finished_at: datetime | None = None
results: list[DetectResponse] | None = None
error: str | None = None
class HealthResponse(BaseModel):
status: Literal["ok"]
engine_version: str
corpus_size: int
taxonomy_version: str | None = None
autobiography_mode: bool = False
class TaxonomyResponse(BaseModel):
meta_tags_version: str
cases_version: str
meta_tags: list[dict]
cases: list[dict]
class CorpusItem(BaseModel):
doc_id: str
title: str
size_bytes: int = 0
filename: str | None = None
class CorpusListResponse(BaseModel):
total: int
docs: list[CorpusItem]
class CorpusUploadRequest(BaseModel):
doc_id: str | None = Field(default=None, description="비우면 자동 생성")
title: str = Field(..., min_length=1)
text: str = Field(..., min_length=1)
class CorpusUploadResponse(BaseModel):
doc_id: str
title: str
size_bytes: int
corpus_size_after: int
rebuilt: bool