/** * Import clinic_registry_working.csv into clinic_registry table. * * Usage: * npx tsx scripts/import-registry.ts * * Requires env vars: * SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY * * Or use .env.local file. */ import { createClient } from "@supabase/supabase-js"; import { readFileSync } from "fs"; import { config } from "dotenv"; config({ path: ".env" }); // base env (service role key lives here) config({ path: ".env.local" }); // local overrides (takes precedence) const SUPABASE_URL = process.env.SUPABASE_URL || process.env.VITE_SUPABASE_URL; const SUPABASE_KEY = process.env.SUPABASE_SERVICE_ROLE_KEY; if (!SUPABASE_URL || !SUPABASE_KEY) { console.error("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY"); process.exit(1); } const supabase = createClient(SUPABASE_URL, SUPABASE_KEY); // CSV column indices (0-based) const COL = { name: 0, brand_group: 1, district: 2, branches: 3, website_kr: 4, website_en: 5, youtube_url: 6, // 7: youtube_note instagram_kr_url: 8, // 9: instagram_kr_note instagram_en_url: 10, // 11: instagram_en_note facebook_url: 12, // 13: facebook_note tiktok_url: 14, // 15: tiktok_note gangnam_unni_url: 16, // 17: gangnam_unni_note naver_blog_url: 18, // 19: naver_blog_note naver_place_url: 20, // 21: naver_place_reviews_note google_maps_url: 22, // 23: google_reviews_note } as const; function extractDomain(url: string): string { try { return new URL(url).hostname.replace(/^www\./, ""); } catch { // Handle URLs without protocol const clean = url.replace(/^https?:\/\//, "").replace(/^www\./, ""); return clean.split("/")[0]; } } function parseCSVLine(line: string): string[] { // Simple CSV parser (no quoted fields with commas in this CSV) return line.split(","); } async function main() { const csv = readFileSync("data/clinic-registry/clinic_registry_working.csv", "utf8"); const lines = csv.split("\n").filter((l) => l.trim()); const rows = lines.slice(1); // skip header console.log(`Parsing ${rows.length} clinics from CSV...`); const records: Record[] = []; const skipped: string[] = []; for (const line of rows) { const cols = parseCSVLine(line); const name = cols[COL.name]?.trim(); const website = cols[COL.website_kr]?.trim(); if (!name || !website) { skipped.push(name || "(unnamed)"); continue; } const domain = extractDomain(website); if (!domain) { skipped.push(name); continue; } records.push({ name, name_aliases: [], // Can be enriched later domain, website_url: website, brand_group: cols[COL.brand_group]?.trim() || null, district: cols[COL.district]?.trim() || null, branches: cols[COL.branches]?.trim() || null, website_en: cols[COL.website_en]?.trim() || null, youtube_url: cols[COL.youtube_url]?.trim() || null, instagram_url: cols[COL.instagram_kr_url]?.trim() || null, instagram_en_url: cols[COL.instagram_en_url]?.trim() || null, facebook_url: cols[COL.facebook_url]?.trim() || null, tiktok_url: cols[COL.tiktok_url]?.trim() || null, gangnam_unni_url: cols[COL.gangnam_unni_url]?.trim() || null, naver_blog_url: cols[COL.naver_blog_url]?.trim() || null, naver_place_url: cols[COL.naver_place_url]?.trim() || null, google_maps_url: cols[COL.google_maps_url]?.trim() || null, }); } console.log(`Prepared ${records.length} records (skipped ${skipped.length}: ${skipped.join(", ")})`); // Upsert in batches of 20 const BATCH_SIZE = 20; let inserted = 0; let updated = 0; let errors = 0; for (let i = 0; i < records.length; i += BATCH_SIZE) { const batch = records.slice(i, i + BATCH_SIZE); const { data, error } = await supabase .from("clinic_registry") .upsert(batch, { onConflict: "domain" }) .select("id, domain"); if (error) { console.error(`Batch ${i / BATCH_SIZE + 1} error:`, error.message); errors += batch.length; } else { inserted += data?.length || 0; console.log(`Batch ${i / BATCH_SIZE + 1}: ${data?.length} rows upserted`); } } console.log(`\nDone! Inserted/updated: ${inserted}, Errors: ${errors}`); // Verify const { count } = await supabase .from("clinic_registry") .select("*", { count: "exact", head: true }); console.log(`Total rows in clinic_registry: ${count}`); } main().catch(console.error);