o2o-plagiarism-ai/scripts/evaluate_pairs.py

101 lines
3.3 KiB
Python

"""생성된 페어 데이터셋으로 탐지 엔진 정밀도/재현율 평가.
성능지표 #4 (계획서 p.23): "표절 여부 판별 정밀도(precision)" 자체 평가.
사용:
python scripts/evaluate_pairs.py --pairs data/training/pairs.jsonl
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
from pathlib import Path
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
logger = logging.getLogger("eval-pairs")
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from app.engine.detector import PlagiarismDetector # noqa: E402
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--pairs", default=str(ROOT / "data/training/pairs.jsonl"))
parser.add_argument("--threshold", type=float, default=0.30)
args = parser.parse_args()
pairs_path = Path(args.pairs)
if not pairs_path.exists():
logger.error("Pairs file not found: %s (먼저 generate_plagiarism_pairs.py 실행)", pairs_path)
return 1
detector = PlagiarismDetector()
logger.info("Engine ready (corpus_size=%d)", detector.corpus_size)
tp = fp = tn = fn = 0
by_transformation: dict[str, dict[str, int]] = {}
with pairs_path.open("r", encoding="utf-8") as f:
for i, line in enumerate(f, 1):
row = json.loads(line)
transformation = row.get("transformation", "unknown")
expected = row["is_plagiarism"]
result = detector.detect(
doc_id=row["pair_id"],
text=row["derived_text"],
)
predicted = result.is_infringement and result.confidence >= args.threshold
if expected and predicted:
tp += 1
elif expected and not predicted:
fn += 1
elif not expected and predicted:
fp += 1
else:
tn += 1
bucket = by_transformation.setdefault(
transformation, {"tp": 0, "fp": 0, "tn": 0, "fn": 0}
)
if expected and predicted:
bucket["tp"] += 1
elif expected and not predicted:
bucket["fn"] += 1
elif not expected and predicted:
bucket["fp"] += 1
else:
bucket["tn"] += 1
if i % 10 == 0:
logger.info("[%d] processed...", i)
precision = tp / (tp + fp) if (tp + fp) else 0.0
recall = tp / (tp + fn) if (tp + fn) else 0.0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
print()
print("=" * 60)
print(f"전체 정밀도 (precision): {precision:.4f} (목표 0.95)")
print(f"재현율 (recall): {recall:.4f}")
print(f"F1: {f1:.4f}")
print(f"TP={tp} FP={fp} TN={tn} FN={fn}")
print()
print("[변형 유형별]")
for t, b in by_transformation.items():
total = sum(b.values())
prec_t = b["tp"] / (b["tp"] + b["fp"]) if (b["tp"] + b["fp"]) else 0.0
rec_t = b["tp"] / (b["tp"] + b["fn"]) if (b["tp"] + b["fn"]) else 0.0
print(f" {t:18s} n={total:3d} P={prec_t:.3f} R={rec_t:.3f} "
f"TP={b['tp']} FP={b['fp']} TN={b['tn']} FN={b['fn']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())