186 lines
5.2 KiB
Python
186 lines
5.2 KiB
Python
from datetime import datetime
|
|
from typing import Literal
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
# 법령 기반 10종 메타 태그 (PDF IV장)
|
|
LegalTag = Literal[
|
|
"reproduction", # 복제권
|
|
"public_transmission", # 공중송신권
|
|
"distribution", # 배포권
|
|
"derivative_work", # 2차적저작물작성권
|
|
"publication", # 공표권
|
|
"attribution", # 성명표시권
|
|
"integrity", # 동일성유지권
|
|
"citation_missing", # 인용 표시 누락
|
|
"false_authorship", # 자기 창작인 양 표시
|
|
"substandard_derivative", # 2차적저작물 미달 가공
|
|
]
|
|
|
|
TAG_LABEL_KO: dict[str, str] = {
|
|
"reproduction": "복제권",
|
|
"public_transmission": "공중송신권",
|
|
"distribution": "배포권",
|
|
"derivative_work": "2차적저작물작성권",
|
|
"publication": "공표권",
|
|
"attribution": "성명표시권",
|
|
"integrity": "동일성유지권",
|
|
"citation_missing": "인용 표시 누락",
|
|
"false_authorship": "자기 창작인 양 표시",
|
|
"substandard_derivative": "2차적저작물 미달 가공",
|
|
}
|
|
|
|
# 후방 호환용
|
|
InfringementType = Literal[
|
|
"copy", "transform", "plot", "character", "background", "unknown",
|
|
]
|
|
|
|
|
|
class DocumentMetadata(BaseModel):
|
|
title: str | None = None
|
|
author: str | None = None
|
|
genre: str | None = None
|
|
publisher: str | None = None
|
|
publication_year: int | None = None
|
|
|
|
|
|
class DetectOptions(BaseModel):
|
|
return_evidence: bool = True
|
|
threshold: float | None = Field(default=None, ge=0.0, le=1.0,
|
|
description="None이면 서버 설정 사용. PDF VII-4 권장 0.85")
|
|
top_k: int = Field(default=5, ge=1, le=50)
|
|
autobiography_mode: bool | None = Field(
|
|
default=None,
|
|
description="None이면 서버 설정 사용. 명시하면 요청 단위 override.",
|
|
)
|
|
|
|
|
|
class DetectRequest(BaseModel):
|
|
doc_id: str
|
|
text: str = Field(..., min_length=1)
|
|
metadata: DocumentMetadata | None = None
|
|
options: DetectOptions = Field(default_factory=DetectOptions)
|
|
|
|
|
|
class EvidenceSpan(BaseModel):
|
|
start: int
|
|
end: int
|
|
matched: str
|
|
|
|
|
|
class InfringementTag(BaseModel):
|
|
"""법령 기반 침해 태그. 주(primary) 또는 보조(secondary) 역할."""
|
|
tag: LegalTag
|
|
role: Literal["primary", "secondary"]
|
|
label_ko: str
|
|
|
|
|
|
class ScoreBreakdown(BaseModel):
|
|
text_sim: float = Field(..., ge=0.0, le=1.0)
|
|
lemma_sim: float = Field(..., ge=0.0, le=1.0)
|
|
character_sim: float = Field(..., ge=0.0, le=1.0)
|
|
motif_sim: float = Field(..., ge=0.0, le=1.0)
|
|
lsh_jaccard: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
|
|
|
|
class MatchResult(BaseModel):
|
|
source_doc: str
|
|
source_title: str | None = None
|
|
similarity: float = Field(..., ge=0.0, le=1.0)
|
|
tags: list[InfringementTag] = Field(default_factory=list)
|
|
case_id: str | None = None
|
|
case_title: str | None = None
|
|
infringement_type: InfringementType = "unknown"
|
|
evidence_spans: list[EvidenceSpan] = Field(default_factory=list)
|
|
score_breakdown: ScoreBreakdown | None = None
|
|
|
|
|
|
class ExtractedElements(BaseModel):
|
|
characters: list[str] = Field(default_factory=list)
|
|
motifs: list[str] = Field(default_factory=list)
|
|
genre: str | None = None
|
|
keywords: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class DetectResponse(BaseModel):
|
|
doc_id: str
|
|
is_infringement: bool
|
|
confidence: float = Field(..., ge=0.0, le=1.0)
|
|
extracted_elements: ExtractedElements
|
|
matches: list[MatchResult]
|
|
ccl_basis: str | None = None
|
|
autobiography_mode: bool = False
|
|
candidates_before_filter: int | None = None
|
|
engine_version: str
|
|
analyzed_at: datetime
|
|
|
|
|
|
class BatchItem(BaseModel):
|
|
doc_id: str
|
|
text: str
|
|
metadata: DocumentMetadata | None = None
|
|
|
|
|
|
class BatchRequest(BaseModel):
|
|
items: list[BatchItem] = Field(..., min_length=1, max_length=500)
|
|
options: DetectOptions = Field(default_factory=DetectOptions)
|
|
|
|
|
|
class BatchCreatedResponse(BaseModel):
|
|
job_id: str
|
|
status: Literal["queued", "running", "completed", "failed"]
|
|
total: int
|
|
created_at: datetime
|
|
|
|
|
|
class BatchStatusResponse(BaseModel):
|
|
job_id: str
|
|
status: Literal["queued", "running", "completed", "failed"]
|
|
total: int
|
|
processed: int
|
|
created_at: datetime
|
|
finished_at: datetime | None = None
|
|
results: list[DetectResponse] | None = None
|
|
error: str | None = None
|
|
|
|
|
|
class HealthResponse(BaseModel):
|
|
status: Literal["ok"]
|
|
engine_version: str
|
|
corpus_size: int
|
|
taxonomy_version: str | None = None
|
|
autobiography_mode: bool = False
|
|
|
|
|
|
class TaxonomyResponse(BaseModel):
|
|
meta_tags_version: str
|
|
cases_version: str
|
|
meta_tags: list[dict]
|
|
cases: list[dict]
|
|
|
|
|
|
class CorpusItem(BaseModel):
|
|
doc_id: str
|
|
title: str
|
|
size_bytes: int = 0
|
|
filename: str | None = None
|
|
|
|
|
|
class CorpusListResponse(BaseModel):
|
|
total: int
|
|
docs: list[CorpusItem]
|
|
|
|
|
|
class CorpusUploadRequest(BaseModel):
|
|
doc_id: str | None = Field(default=None, description="비우면 자동 생성")
|
|
title: str = Field(..., min_length=1)
|
|
text: str = Field(..., min_length=1)
|
|
|
|
|
|
class CorpusUploadResponse(BaseModel):
|
|
doc_id: str
|
|
title: str
|
|
size_bytes: int
|
|
corpus_size_after: int
|
|
rebuilt: bool
|