"""생성된 페어 데이터셋으로 탐지 엔진 정밀도/재현율 평가. 성능지표 #4 (계획서 p.23): "표절 여부 판별 정밀도(precision)" 자체 평가. 사용: python scripts/evaluate_pairs.py --pairs data/training/pairs.jsonl """ from __future__ import annotations import argparse import json import logging import sys from pathlib import Path logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") logger = logging.getLogger("eval-pairs") ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from app.engine.detector import PlagiarismDetector # noqa: E402 def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--pairs", default=str(ROOT / "data/training/pairs.jsonl")) parser.add_argument("--threshold", type=float, default=0.30) args = parser.parse_args() pairs_path = Path(args.pairs) if not pairs_path.exists(): logger.error("Pairs file not found: %s (먼저 generate_plagiarism_pairs.py 실행)", pairs_path) return 1 detector = PlagiarismDetector() logger.info("Engine ready (corpus_size=%d)", detector.corpus_size) tp = fp = tn = fn = 0 by_transformation: dict[str, dict[str, int]] = {} with pairs_path.open("r", encoding="utf-8") as f: for i, line in enumerate(f, 1): row = json.loads(line) transformation = row.get("transformation", "unknown") expected = row["is_plagiarism"] result = detector.detect( doc_id=row["pair_id"], text=row["derived_text"], ) predicted = result.is_infringement and result.confidence >= args.threshold if expected and predicted: tp += 1 elif expected and not predicted: fn += 1 elif not expected and predicted: fp += 1 else: tn += 1 bucket = by_transformation.setdefault( transformation, {"tp": 0, "fp": 0, "tn": 0, "fn": 0} ) if expected and predicted: bucket["tp"] += 1 elif expected and not predicted: bucket["fn"] += 1 elif not expected and predicted: bucket["fp"] += 1 else: bucket["tn"] += 1 if i % 10 == 0: logger.info("[%d] processed...", i) precision = tp / (tp + fp) if (tp + fp) else 0.0 recall = tp / (tp + fn) if (tp + fn) else 0.0 f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0 print() print("=" * 60) print(f"전체 정밀도 (precision): {precision:.4f} (목표 0.95)") print(f"재현율 (recall): {recall:.4f}") print(f"F1: {f1:.4f}") print(f"TP={tp} FP={fp} TN={tn} FN={fn}") print() print("[변형 유형별]") for t, b in by_transformation.items(): total = sum(b.values()) prec_t = b["tp"] / (b["tp"] + b["fp"]) if (b["tp"] + b["fp"]) else 0.0 rec_t = b["tp"] / (b["tp"] + b["fn"]) if (b["tp"] + b["fn"]) else 0.0 print(f" {t:18s} n={total:3d} P={prec_t:.3f} R={rec_t:.3f} " f"TP={b['tp']} FP={b['fp']} TN={b['tn']} FN={b['fn']}") return 0 if __name__ == "__main__": raise SystemExit(main())