/** * Missing Value Guard & Report Quality Validator — Harness 3 * * Detects all known variants of "no data" from LLM outputs and * provides a quality score for generated reports. * * Usage: * - isMissingValue(val) → boolean (use everywhere you check for missing data) * - validateReportQuality(report) → DataQualityReport (use before saving reports) */ // ─── Missing Value Detection ─── /** * All known LLM expressions for "no data available". * Normalized to lowercase for comparison. */ const MISSING_PATTERNS: ReadonlyArray = [ // Korean "데이터 없음", "데이터없음", "데이터 미확인", "데이터미확인", "정보없음", "정보 없음", "정보 미제공", "미제공", "확인불가", "확인 불가", "미확인", "미발견", "알 수 없음", "알수없음", "해당 없음", "해당없음", "없음", "미정", // English "n/a", "na", "none", "null", "undefined", "not available", "unknown", "not found", "no data", // Symbols "-", "—", "–", ".", "...", "N/A", ]; /** * Check if a value represents missing/unavailable data. * Handles null, undefined, empty strings, zero, and LLM "no data" variants. */ export function isMissingValue(val: unknown): boolean { if (val == null) return true; if (typeof val === "number") return val === 0 || isNaN(val); const s = String(val).trim().toLowerCase(); if (s === "" || s === "0") return true; return MISSING_PATTERNS.some((p) => s === p.toLowerCase()); } /** * Clean a value: return it if valid, or return the fallback if missing. * Useful for providing defaults without silent data loss. */ export function cleanValue(val: unknown, fallback: T): T | unknown { return isMissingValue(val) ? fallback : val; } // ─── Report Quality Validation ─── export interface DataQualityReport { score: number; // 0-100 missingCritical: string[]; // Missing critical fields missingImportant: string[]; // Missing important fields missingOptional: string[]; // Missing optional fields warnings: string[]; // Human-readable warnings } /** Fields that MUST be present for a valid report */ const CRITICAL_FIELDS = [ "clinicInfo.name", "clinicInfo.established", ]; /** Fields that significantly impact report quality */ const IMPORTANT_FIELDS = [ "clinicInfo.doctors", "channelAnalysis.youtube", "channelAnalysis.instagram", "channelAnalysis.naverBlog", "channelAnalysis.gangnamUnni", ]; /** Nice-to-have fields */ const OPTIONAL_FIELDS = [ "channelAnalysis.facebook", "channelAnalysis.tiktok", "channelAnalysis.naverPlace", "channelAnalysis.googleMaps", "clinicInfo.location", ]; /** * Validate report data quality and return a score with details. * * Scoring: * - Each critical field missing: -20 points * - Each important field missing: -5 points * - Each optional field missing: -2 points */ export function validateReportQuality( report: Record, ): DataQualityReport { const result: DataQualityReport = { score: 100, missingCritical: [], missingImportant: [], missingOptional: [], warnings: [], }; // Check critical fields for (const path of CRITICAL_FIELDS) { const val = getNestedValue(report, path); if (isMissingValue(val)) { result.missingCritical.push(path); result.score -= 20; result.warnings.push(`❌ Critical: '${path}' is missing`); } } // Check important fields for (const path of IMPORTANT_FIELDS) { const val = getNestedValue(report, path); if (isMissingValue(val)) { result.missingImportant.push(path); result.score -= 5; result.warnings.push(`⚠️ Important: '${path}' is missing`); } } // Check optional fields for (const path of OPTIONAL_FIELDS) { const val = getNestedValue(report, path); if (isMissingValue(val)) { result.missingOptional.push(path); result.score -= 2; } } result.score = Math.max(0, result.score); return result; } // ─── Helpers ─── /** * Traverse a nested object by dot-separated path. * e.g., getNestedValue({ a: { b: 1 } }, "a.b") → 1 */ function getNestedValue(obj: Record, path: string): unknown { return path.split(".").reduce( (current, key) => { if (current == null || typeof current !== "object") return undefined; return (current as Record)[key]; }, obj as unknown, ); } // ─── Self-Test ─── const MISSING_VALUE_TEST_CORPUS: ReadonlyArray = [ [null, true], [undefined, true], ["", true], [" ", true], [0, true], ["데이터 없음", true], ["데이터없음", true], ["N/A", true], ["n/a", true], ["확인 불가", true], ["미확인", true], ["unknown", true], ["-", true], ["—", true], ["none", true], // Valid values ["뷰성형외과", false], [4.5, false], ["2004", false], [387, false], ["https://example.com", false], ]; export function validateDataQuality(): { pass: boolean; failures: string[] } { const failures: string[] = []; for (const [val, expected] of MISSING_VALUE_TEST_CORPUS) { const result = isMissingValue(val); if (result !== expected) { failures.push(`isMissingValue(${JSON.stringify(val)}): expected ${expected}, got ${result}`); } } return { pass: failures.length === 0, failures }; }