From f65f0e85b3a88e2585680c85ef85471f429a124e Mon Sep 17 00:00:00 2001 From: Haewon Kam Date: Sat, 4 Apr 2026 00:03:26 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20robust=20handle=20extraction=20=E2=80=94?= =?UTF-8?q?=20reject=20non-platform=20URLs,=20fix=20type=20safety?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit discover-channels: new extractHandle() validates each handle belongs to its platform (rejects hospital-internal URLs like /idtube/view being treated as YouTube). Extracts handles from full URLs correctly. collect-channel-data: explicit Record typing for DB JSON fields — fixes TypeScript property access on VerifiedChannels from DB. verifyHandles: fix TikTok double-URL concatenation. Co-Authored-By: Claude Opus 4.6 (1M context) --- supabase/functions/_shared/verifyHandles.ts | 3 +- .../functions/collect-channel-data/index.ts | 22 ++++--- supabase/functions/discover-channels/index.ts | 59 +++++++++++++++++-- 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/supabase/functions/_shared/verifyHandles.ts b/supabase/functions/_shared/verifyHandles.ts index c9a15f2..da44462 100644 --- a/supabase/functions/_shared/verifyHandles.ts +++ b/supabase/functions/_shared/verifyHandles.ts @@ -228,7 +228,8 @@ export async function verifyAllHandles( // TikTok — skip verification for now (TikTok blocks HEAD requests) if (candidates.tiktok.length > 0) { - result.tiktok = { handle: candidates.tiktok[0], verified: false, url: `https://tiktok.com/@${candidates.tiktok[0]}` }; + const tkHandle = candidates.tiktok[0].replace(/^@/, ''); + result.tiktok = { handle: tkHandle, verified: false, url: `https://tiktok.com/@${tkHandle}` }; } await Promise.allSettled(tasks); diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index 0aaf78c..0188985 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -73,10 +73,11 @@ Deno.serve(async (req) => { const tasks: Promise[] = []; // ─── 1. Instagram (multi-account) ─── - if (APIFY_TOKEN && verified.instagram?.length > 0) { + const igVerified = (verified.instagram || []).filter((v: Record) => v.verified && v.handle); + if (APIFY_TOKEN && igVerified.length > 0) { tasks.push((async () => { const accounts: Record[] = []; - for (const ig of verified.instagram.filter(v => v.verified)) { + for (const ig of igVerified) { const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN); const profile = (items as Record[])[0]; if (profile && !profile.error) { @@ -104,10 +105,11 @@ Deno.serve(async (req) => { } // ─── 2. YouTube ─── - if (YOUTUBE_API_KEY && verified.youtube?.verified) { + const ytVerified = verified.youtube as Record | null; + if (YOUTUBE_API_KEY && ytVerified?.verified) { tasks.push((async () => { const YT = "https://www.googleapis.com/youtube/v3"; - const channelId = verified.youtube!.channelId || ""; + const channelId = (ytVerified?.channelId as string) || ""; if (!channelId) return; const chRes = await fetch(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`); @@ -154,9 +156,10 @@ Deno.serve(async (req) => { } // ─── 3. Facebook ─── - if (APIFY_TOKEN && verified.facebook?.verified) { + const fbVerified = verified.facebook as Record | null; + if (APIFY_TOKEN && fbVerified?.verified) { tasks.push((async () => { - const fbUrl = verified.facebook!.url || `https://www.facebook.com/${verified.facebook!.handle}`; + const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`; const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN); const page = (items as Record[])[0]; if (page?.title) { @@ -172,13 +175,14 @@ Deno.serve(async (req) => { } // ─── 4. 강남언니 ─── - if (FIRECRAWL_API_KEY && verified.gangnamUnni?.verified && verified.gangnamUnni.url) { + const guVerified = verified.gangnamUnni as Record | null; + if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) { tasks.push((async () => { const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, body: JSON.stringify({ - url: verified.gangnamUnni!.url, + url: guVerified!.url as string, formats: ["json"], jsonOptions: { prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges", @@ -202,7 +206,7 @@ Deno.serve(async (req) => { name: hospital.hospitalName, rating: hospital.rating, ratingScale: "/10", totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10), procedures: hospital.procedures || [], address: hospital.address, - badges: hospital.badges || [], sourceUrl: verified.gangnamUnni!.url, + badges: hospital.badges || [], sourceUrl: guVerified!.url as string, }; } })()); diff --git a/supabase/functions/discover-channels/index.ts b/supabase/functions/discover-channels/index.ts index 25b05d9..d2fc291 100644 --- a/supabase/functions/discover-channels/index.ts +++ b/supabase/functions/discover-channels/index.ts @@ -202,13 +202,60 @@ Deno.serve(async (req) => { const merged = mergeSocialLinks(linkHandles, firecrawlHandles, perplexityHandles); - // Clean up handles (remove @ prefix, URL parts) + // Robust handle extraction — handles may be full URLs, @handles, or bare usernames + function extractHandle(raw: string, platform: string): string | null { + if (!raw || raw.length < 2) return null; + let h = raw.trim(); + + // Platform-specific URL extraction + if (platform === 'instagram') { + const m = h.match(/instagram\.com\/([a-zA-Z0-9._]+)/); + if (m) return m[1]; + h = h.replace(/^@/, '').replace(/\/$/, ''); + if (/^[a-zA-Z0-9._]+$/.test(h) && h.length >= 2) return h; + return null; + } + if (platform === 'youtube') { + const m = h.match(/youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/); + if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null; + h = h.replace(/^@/, ''); + // Reject if it looks like a non-YouTube URL + if (h.includes('http') || h.includes('/') || h.includes('.com')) return null; + if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`; + return null; + } + if (platform === 'facebook') { + const m = h.match(/facebook\.com\/([a-zA-Z0-9._-]+)/); + if (m) return m[1]; + h = h.replace(/^@/, '').replace(/\/$/, ''); + if (h.includes('http') || h.includes('/')) return null; + if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h; + return null; + } + if (platform === 'naverBlog') { + const m = h.match(/blog\.naver\.com\/([a-zA-Z0-9_-]+)/); + if (m) return m[1]; + if (h.includes('http') || h.includes('/')) return null; + if (/^[a-zA-Z0-9_-]+$/.test(h) && h.length >= 2) return h; + return null; + } + if (platform === 'tiktok') { + const m = h.match(/tiktok\.com\/@([a-zA-Z0-9._-]+)/); + if (m) return m[1]; + h = h.replace(/^@/, ''); + if (h.includes('http') || h.includes('/')) return null; + if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h; + return null; + } + return h; + } + const cleanHandles = { - instagram: merged.instagram.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1), - youtube: merged.youtube.map(h => h.replace(/^https?:\/\/(www\.)?youtube\.com\//, '')).filter(h => h.length > 1), - facebook: merged.facebook.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1), - naverBlog: merged.naverBlog.filter(h => h.length > 1), - tiktok: merged.tiktok.map(h => h.replace(/^@/, '')).filter(h => h.length > 1), + instagram: merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null), + youtube: merged.youtube.map(h => extractHandle(h, 'youtube')).filter((h): h is string => h !== null), + facebook: merged.facebook.map(h => extractHandle(h, 'facebook')).filter((h): h is string => h !== null), + naverBlog: merged.naverBlog.map(h => extractHandle(h, 'naverBlog')).filter((h): h is string => h !== null), + tiktok: merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null), }; const verified: VerifiedChannels = await verifyAllHandles(