from functools import lru_cache from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") # 서버 바인딩 host: str = "0.0.0.0" port: int = 8000 log_level: str = "info" # debug / info / warning / error reload: bool = False # 개발용 자동 재시작 engine_version: str = "o2o-plagiarism-2.0.0-pdf-v1.2" reference_corpus_dir: str = "./data/reference" taxonomy_dir: str = "./data/taxonomy" autobiography_patterns_path: str = "./data/autobiography/common_patterns.txt" # PDF VII-4 권장: 정밀도 우선 보수적 임계값 similarity_threshold: float = 0.85 # KoSimCSE / KoSBERT (PDF VII-3 권장) - 한국어 오픈소스 임베딩 use_kosimcse: bool = True kosimcse_model: str = "BM-K/KoSimCSE-roberta-multitask" kosimcse_max_length: int = 512 # OpenAI (옵션 - 자체 모델 없을 때 폴백) openai_api_key: str = "" openai_extraction_model: str = "gpt-4o-mini" openai_embedding_model: str = "text-embedding-3-small" use_llm_extractor: bool = False use_embedding_similarity: bool = False # 삼중 유사도 가중치 (실측 기반) weight_text_sim: float = 0.30 weight_lemma_sim: float = 0.45 weight_char_sim: float = 0.15 weight_motif_sim: float = 0.10 # PDF VII-3 캐스케이딩 use_lsh_filter: bool = True lsh_threshold: float = 0.3 # 1차 필터는 느슨하게 (재현율 우선) lsh_top_k: int = 50 # PDF VII-4 자서전 모드 autobiography_mode: bool = True enable_entity_masking: bool = True @property def corpus_path(self) -> Path: return Path(self.reference_corpus_dir).resolve() @property def taxonomy_path(self) -> Path: return Path(self.taxonomy_dir).resolve() @property def has_openai(self) -> bool: return bool(self.openai_api_key.strip()) @lru_cache def get_settings() -> Settings: return Settings()