from datetime import datetime from typing import Literal from pydantic import BaseModel, Field # 법령 기반 10종 메타 태그 (PDF IV장) LegalTag = Literal[ "reproduction", # 복제권 "public_transmission", # 공중송신권 "distribution", # 배포권 "derivative_work", # 2차적저작물작성권 "publication", # 공표권 "attribution", # 성명표시권 "integrity", # 동일성유지권 "citation_missing", # 인용 표시 누락 "false_authorship", # 자기 창작인 양 표시 "substandard_derivative", # 2차적저작물 미달 가공 ] TAG_LABEL_KO: dict[str, str] = { "reproduction": "복제권", "public_transmission": "공중송신권", "distribution": "배포권", "derivative_work": "2차적저작물작성권", "publication": "공표권", "attribution": "성명표시권", "integrity": "동일성유지권", "citation_missing": "인용 표시 누락", "false_authorship": "자기 창작인 양 표시", "substandard_derivative": "2차적저작물 미달 가공", } # 후방 호환용 InfringementType = Literal[ "copy", "transform", "plot", "character", "background", "unknown", ] class DocumentMetadata(BaseModel): title: str | None = None author: str | None = None genre: str | None = None publisher: str | None = None publication_year: int | None = None class DetectOptions(BaseModel): return_evidence: bool = True threshold: float | None = Field(default=None, ge=0.0, le=1.0, description="None이면 서버 설정 사용. PDF VII-4 권장 0.85") top_k: int = Field(default=5, ge=1, le=50) autobiography_mode: bool | None = Field( default=None, description="None이면 서버 설정 사용. 명시하면 요청 단위 override.", ) class DetectRequest(BaseModel): doc_id: str text: str = Field(..., min_length=1) metadata: DocumentMetadata | None = None options: DetectOptions = Field(default_factory=DetectOptions) class EvidenceSpan(BaseModel): start: int end: int matched: str class InfringementTag(BaseModel): """법령 기반 침해 태그. 주(primary) 또는 보조(secondary) 역할.""" tag: LegalTag role: Literal["primary", "secondary"] label_ko: str class ScoreBreakdown(BaseModel): text_sim: float = Field(..., ge=0.0, le=1.0) lemma_sim: float = Field(..., ge=0.0, le=1.0) character_sim: float = Field(..., ge=0.0, le=1.0) motif_sim: float = Field(..., ge=0.0, le=1.0) lsh_jaccard: float | None = Field(default=None, ge=0.0, le=1.0) class MatchResult(BaseModel): source_doc: str source_title: str | None = None similarity: float = Field(..., ge=0.0, le=1.0) tags: list[InfringementTag] = Field(default_factory=list) case_id: str | None = None case_title: str | None = None infringement_type: InfringementType = "unknown" evidence_spans: list[EvidenceSpan] = Field(default_factory=list) score_breakdown: ScoreBreakdown | None = None class ExtractedElements(BaseModel): characters: list[str] = Field(default_factory=list) motifs: list[str] = Field(default_factory=list) genre: str | None = None keywords: list[str] = Field(default_factory=list) class DetectResponse(BaseModel): doc_id: str is_infringement: bool confidence: float = Field(..., ge=0.0, le=1.0) extracted_elements: ExtractedElements matches: list[MatchResult] ccl_basis: str | None = None autobiography_mode: bool = False candidates_before_filter: int | None = None engine_version: str analyzed_at: datetime class BatchItem(BaseModel): doc_id: str text: str metadata: DocumentMetadata | None = None class BatchRequest(BaseModel): items: list[BatchItem] = Field(..., min_length=1, max_length=500) options: DetectOptions = Field(default_factory=DetectOptions) class BatchCreatedResponse(BaseModel): job_id: str status: Literal["queued", "running", "completed", "failed"] total: int created_at: datetime class BatchStatusResponse(BaseModel): job_id: str status: Literal["queued", "running", "completed", "failed"] total: int processed: int created_at: datetime finished_at: datetime | None = None results: list[DetectResponse] | None = None error: str | None = None class HealthResponse(BaseModel): status: Literal["ok"] engine_version: str corpus_size: int taxonomy_version: str | None = None autobiography_mode: bool = False class TaxonomyResponse(BaseModel): meta_tags_version: str cases_version: str meta_tags: list[dict] cases: list[dict] class CorpusItem(BaseModel): doc_id: str title: str size_bytes: int = 0 filename: str | None = None class CorpusListResponse(BaseModel): total: int docs: list[CorpusItem] class CorpusUploadRequest(BaseModel): doc_id: str | None = Field(default=None, description="비우면 자동 생성") title: str = Field(..., min_length=1) text: str = Field(..., min_length=1) class CorpusUploadResponse(BaseModel): doc_id: str title: str size_bytes: int corpus_size_after: int rebuilt: bool