o2o-infinith-demo/supabase/functions/_shared/dataQuality.ts

193 lines
5.3 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/**
* Missing Value Guard & Report Quality Validator — Harness 3
*
* Detects all known variants of "no data" from LLM outputs and
* provides a quality score for generated reports.
*
* Usage:
* - isMissingValue(val) → boolean (use everywhere you check for missing data)
* - validateReportQuality(report) → DataQualityReport (use before saving reports)
*/
// ─── Missing Value Detection ───
/**
* All known LLM expressions for "no data available".
* Normalized to lowercase for comparison.
*/
const MISSING_PATTERNS: ReadonlyArray<string> = [
// Korean
"데이터 없음", "데이터없음", "데이터 미확인", "데이터미확인",
"정보없음", "정보 없음", "정보 미제공", "미제공",
"확인불가", "확인 불가", "미확인", "미발견",
"알 수 없음", "알수없음", "해당 없음", "해당없음",
"없음", "미정",
// English
"n/a", "na", "none", "null", "undefined",
"not available", "unknown", "not found", "no data",
// Symbols
"-", "—", "", ".", "...", "N/A",
];
/**
* Check if a value represents missing/unavailable data.
* Handles null, undefined, empty strings, zero, and LLM "no data" variants.
*/
export function isMissingValue(val: unknown): boolean {
if (val == null) return true;
if (typeof val === "number") return val === 0 || isNaN(val);
const s = String(val).trim().toLowerCase();
if (s === "" || s === "0") return true;
return MISSING_PATTERNS.some((p) => s === p.toLowerCase());
}
/**
* Clean a value: return it if valid, or return the fallback if missing.
* Useful for providing defaults without silent data loss.
*/
export function cleanValue<T>(val: unknown, fallback: T): T | unknown {
return isMissingValue(val) ? fallback : val;
}
// ─── Report Quality Validation ───
export interface DataQualityReport {
score: number; // 0-100
missingCritical: string[]; // Missing critical fields
missingImportant: string[]; // Missing important fields
missingOptional: string[]; // Missing optional fields
warnings: string[]; // Human-readable warnings
}
/** Fields that MUST be present for a valid report */
const CRITICAL_FIELDS = [
"clinicInfo.name",
"clinicInfo.established",
];
/** Fields that significantly impact report quality */
const IMPORTANT_FIELDS = [
"clinicInfo.doctors",
"channelAnalysis.youtube",
"channelAnalysis.instagram",
"channelAnalysis.naverBlog",
"channelAnalysis.gangnamUnni",
];
/** Nice-to-have fields */
const OPTIONAL_FIELDS = [
"channelAnalysis.facebook",
"channelAnalysis.tiktok",
"channelAnalysis.naverPlace",
"channelAnalysis.googleMaps",
"clinicInfo.location",
];
/**
* Validate report data quality and return a score with details.
*
* Scoring:
* - Each critical field missing: -20 points
* - Each important field missing: -5 points
* - Each optional field missing: -2 points
*/
export function validateReportQuality(
report: Record<string, unknown>,
): DataQualityReport {
const result: DataQualityReport = {
score: 100,
missingCritical: [],
missingImportant: [],
missingOptional: [],
warnings: [],
};
// Check critical fields
for (const path of CRITICAL_FIELDS) {
const val = getNestedValue(report, path);
if (isMissingValue(val)) {
result.missingCritical.push(path);
result.score -= 20;
result.warnings.push(`❌ Critical: '${path}' is missing`);
}
}
// Check important fields
for (const path of IMPORTANT_FIELDS) {
const val = getNestedValue(report, path);
if (isMissingValue(val)) {
result.missingImportant.push(path);
result.score -= 5;
result.warnings.push(`⚠️ Important: '${path}' is missing`);
}
}
// Check optional fields
for (const path of OPTIONAL_FIELDS) {
const val = getNestedValue(report, path);
if (isMissingValue(val)) {
result.missingOptional.push(path);
result.score -= 2;
}
}
result.score = Math.max(0, result.score);
return result;
}
// ─── Helpers ───
/**
* Traverse a nested object by dot-separated path.
* e.g., getNestedValue({ a: { b: 1 } }, "a.b") → 1
*/
function getNestedValue(obj: Record<string, unknown>, path: string): unknown {
return path.split(".").reduce(
(current, key) => {
if (current == null || typeof current !== "object") return undefined;
return (current as Record<string, unknown>)[key];
},
obj as unknown,
);
}
// ─── Self-Test ───
const MISSING_VALUE_TEST_CORPUS: ReadonlyArray<readonly [unknown, boolean]> = [
[null, true],
[undefined, true],
["", true],
[" ", true],
[0, true],
["데이터 없음", true],
["데이터없음", true],
["N/A", true],
["n/a", true],
["확인 불가", true],
["미확인", true],
["unknown", true],
["-", true],
["—", true],
["none", true],
// Valid values
["뷰성형외과", false],
[4.5, false],
["2004", false],
[387, false],
["https://example.com", false],
];
export function validateDataQuality(): { pass: boolean; failures: string[] } {
const failures: string[] = [];
for (const [val, expected] of MISSING_VALUE_TEST_CORPUS) {
const result = isMissingValue(val);
if (result !== expected) {
failures.push(`isMissingValue(${JSON.stringify(val)}): expected ${expected}, got ${result}`);
}
}
return { pass: failures.length === 0, failures };
}