101 lines
3.3 KiB
Python
101 lines
3.3 KiB
Python
"""생성된 페어 데이터셋으로 탐지 엔진 정밀도/재현율 평가.
|
|
|
|
성능지표 #4 (계획서 p.23): "표절 여부 판별 정밀도(precision)" 자체 평가.
|
|
|
|
사용:
|
|
python scripts/evaluate_pairs.py --pairs data/training/pairs.jsonl
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
|
|
logger = logging.getLogger("eval-pairs")
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
from app.engine.detector import PlagiarismDetector # noqa: E402
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--pairs", default=str(ROOT / "data/training/pairs.jsonl"))
|
|
parser.add_argument("--threshold", type=float, default=0.30)
|
|
args = parser.parse_args()
|
|
|
|
pairs_path = Path(args.pairs)
|
|
if not pairs_path.exists():
|
|
logger.error("Pairs file not found: %s (먼저 generate_plagiarism_pairs.py 실행)", pairs_path)
|
|
return 1
|
|
|
|
detector = PlagiarismDetector()
|
|
logger.info("Engine ready (corpus_size=%d)", detector.corpus_size)
|
|
|
|
tp = fp = tn = fn = 0
|
|
by_transformation: dict[str, dict[str, int]] = {}
|
|
|
|
with pairs_path.open("r", encoding="utf-8") as f:
|
|
for i, line in enumerate(f, 1):
|
|
row = json.loads(line)
|
|
transformation = row.get("transformation", "unknown")
|
|
expected = row["is_plagiarism"]
|
|
result = detector.detect(
|
|
doc_id=row["pair_id"],
|
|
text=row["derived_text"],
|
|
)
|
|
predicted = result.is_infringement and result.confidence >= args.threshold
|
|
|
|
if expected and predicted:
|
|
tp += 1
|
|
elif expected and not predicted:
|
|
fn += 1
|
|
elif not expected and predicted:
|
|
fp += 1
|
|
else:
|
|
tn += 1
|
|
|
|
bucket = by_transformation.setdefault(
|
|
transformation, {"tp": 0, "fp": 0, "tn": 0, "fn": 0}
|
|
)
|
|
if expected and predicted:
|
|
bucket["tp"] += 1
|
|
elif expected and not predicted:
|
|
bucket["fn"] += 1
|
|
elif not expected and predicted:
|
|
bucket["fp"] += 1
|
|
else:
|
|
bucket["tn"] += 1
|
|
|
|
if i % 10 == 0:
|
|
logger.info("[%d] processed...", i)
|
|
|
|
precision = tp / (tp + fp) if (tp + fp) else 0.0
|
|
recall = tp / (tp + fn) if (tp + fn) else 0.0
|
|
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
|
|
|
|
print()
|
|
print("=" * 60)
|
|
print(f"전체 정밀도 (precision): {precision:.4f} (목표 0.95)")
|
|
print(f"재현율 (recall): {recall:.4f}")
|
|
print(f"F1: {f1:.4f}")
|
|
print(f"TP={tp} FP={fp} TN={tn} FN={fn}")
|
|
print()
|
|
print("[변형 유형별]")
|
|
for t, b in by_transformation.items():
|
|
total = sum(b.values())
|
|
prec_t = b["tp"] / (b["tp"] + b["fp"]) if (b["tp"] + b["fp"]) else 0.0
|
|
rec_t = b["tp"] / (b["tp"] + b["fn"]) if (b["tp"] + b["fn"]) else 0.0
|
|
print(f" {t:18s} n={total:3d} P={prec_t:.3f} R={rec_t:.3f} "
|
|
f"TP={b['tp']} FP={b['fp']} TN={b['tn']} FN={b['fn']}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|