fix: robust handle extraction — reject non-platform URLs, fix type safety

discover-channels: new extractHandle() validates each handle belongs to its platform (rejects hospital-internal URLs like /idtube/view being treated as YouTube). Extracts handles from full URLs correctly. collect-channel-data: explicit Record<string,unknown> typing for DB JSON fields — fixes TypeScript property access on VerifiedChannels from DB. verifyHandles: fix TikTok double-URL concatenation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 00:03:26 +09:00 · 2026-04-04 00:03:26 +09:00 · f65f0e85b3
parent 0d72750982
commit f65f0e85b3
3 changed files with 68 additions and 16 deletions
--- a/supabase/functions/_shared/verifyHandles.ts
+++ b/supabase/functions/_shared/verifyHandles.ts
@ -228,7 +228,8 @@ export async function verifyAllHandles(

  // TikTok — skip verification for now (TikTok blocks HEAD requests)
  if (candidates.tiktok.length > 0) {
-    result.tiktok = { handle: candidates.tiktok[0], verified: false, url: `https://tiktok.com/@${candidates.tiktok[0]}` };
+    const tkHandle = candidates.tiktok[0].replace(/^@/, '');
+    result.tiktok = { handle: tkHandle, verified: false, url: `https://tiktok.com/@${tkHandle}` };
  }

  await Promise.allSettled(tasks);
--- a/supabase/functions/collect-channel-data/index.ts
+++ b/supabase/functions/collect-channel-data/index.ts
@ -73,10 +73,11 @@ Deno.serve(async (req) => {
    const tasks: Promise<void>[] = [];

    // ─── 1. Instagram (multi-account) ───
-    if (APIFY_TOKEN && verified.instagram?.length > 0) {
+    const igVerified = (verified.instagram || []).filter((v: Record<string, unknown>) => v.verified && v.handle);
+    if (APIFY_TOKEN && igVerified.length > 0) {
      tasks.push((async () => {
        const accounts: Record<string, unknown>[] = [];
-        for (const ig of verified.instagram.filter(v => v.verified)) {
+        for (const ig of igVerified) {
          const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN);
          const profile = (items as Record<string, unknown>[])[0];
          if (profile && !profile.error) {
@ -104,10 +105,11 @@ Deno.serve(async (req) => {
    }

    // ─── 2. YouTube ───
-    if (YOUTUBE_API_KEY && verified.youtube?.verified) {
+    const ytVerified = verified.youtube as Record<string, unknown> | null;
+    if (YOUTUBE_API_KEY && ytVerified?.verified) {
      tasks.push((async () => {
        const YT = "https://www.googleapis.com/youtube/v3";
-        const channelId = verified.youtube!.channelId || "";
+        const channelId = (ytVerified?.channelId as string) || "";
        if (!channelId) return;

        const chRes = await fetch(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`);
@ -154,9 +156,10 @@ Deno.serve(async (req) => {
    }

    // ─── 3. Facebook ───
-    if (APIFY_TOKEN && verified.facebook?.verified) {
+    const fbVerified = verified.facebook as Record<string, unknown> | null;
+    if (APIFY_TOKEN && fbVerified?.verified) {
      tasks.push((async () => {
-        const fbUrl = verified.facebook!.url || `https://www.facebook.com/${verified.facebook!.handle}`;
+        const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`;
        const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN);
        const page = (items as Record<string, unknown>[])[0];
        if (page?.title) {
@ -172,13 +175,14 @@ Deno.serve(async (req) => {
    }

    // ─── 4. 강남언니 ───
-    if (FIRECRAWL_API_KEY && verified.gangnamUnni?.verified && verified.gangnamUnni.url) {
+    const guVerified = verified.gangnamUnni as Record<string, unknown> | null;
+    if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) {
      tasks.push((async () => {
        const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
          method: "POST",
          headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
          body: JSON.stringify({
-            url: verified.gangnamUnni!.url,
+            url: guVerified!.url as string,
            formats: ["json"],
            jsonOptions: {
              prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
@ -202,7 +206,7 @@ Deno.serve(async (req) => {
            name: hospital.hospitalName, rating: hospital.rating, ratingScale: "/10",
            totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
            procedures: hospital.procedures || [], address: hospital.address,
-            badges: hospital.badges || [], sourceUrl: verified.gangnamUnni!.url,
+            badges: hospital.badges || [], sourceUrl: guVerified!.url as string,
          };
        }
      })());
--- a/supabase/functions/discover-channels/index.ts
+++ b/supabase/functions/discover-channels/index.ts
@ -202,13 +202,60 @@ Deno.serve(async (req) => {

    const merged = mergeSocialLinks(linkHandles, firecrawlHandles, perplexityHandles);

-    // Clean up handles (remove @ prefix, URL parts)
+    // Robust handle extraction — handles may be full URLs, @handles, or bare usernames
+    function extractHandle(raw: string, platform: string): string | null {
+      if (!raw || raw.length < 2) return null;
+      let h = raw.trim();
+
+      // Platform-specific URL extraction
+      if (platform === 'instagram') {
+        const m = h.match(/instagram\.com\/([a-zA-Z0-9._]+)/);
+        if (m) return m[1];
+        h = h.replace(/^@/, '').replace(/\/$/, '');
+        if (/^[a-zA-Z0-9._]+$/.test(h) && h.length >= 2) return h;
+        return null;
+      }
+      if (platform === 'youtube') {
+        const m = h.match(/youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/);
+        if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null;
+        h = h.replace(/^@/, '');
+        // Reject if it looks like a non-YouTube URL
+        if (h.includes('http') || h.includes('/') || h.includes('.com')) return null;
+        if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`;
+        return null;
+      }
+      if (platform === 'facebook') {
+        const m = h.match(/facebook\.com\/([a-zA-Z0-9._-]+)/);
+        if (m) return m[1];
+        h = h.replace(/^@/, '').replace(/\/$/, '');
+        if (h.includes('http') || h.includes('/')) return null;
+        if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
+        return null;
+      }
+      if (platform === 'naverBlog') {
+        const m = h.match(/blog\.naver\.com\/([a-zA-Z0-9_-]+)/);
+        if (m) return m[1];
+        if (h.includes('http') || h.includes('/')) return null;
+        if (/^[a-zA-Z0-9_-]+$/.test(h) && h.length >= 2) return h;
+        return null;
+      }
+      if (platform === 'tiktok') {
+        const m = h.match(/tiktok\.com\/@([a-zA-Z0-9._-]+)/);
+        if (m) return m[1];
+        h = h.replace(/^@/, '');
+        if (h.includes('http') || h.includes('/')) return null;
+        if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
+        return null;
+      }
+      return h;
+    }
+
    const cleanHandles = {
-      instagram: merged.instagram.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1),
-      youtube: merged.youtube.map(h => h.replace(/^https?:\/\/(www\.)?youtube\.com\//, '')).filter(h => h.length > 1),
-      facebook: merged.facebook.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1),
-      naverBlog: merged.naverBlog.filter(h => h.length > 1),
-      tiktok: merged.tiktok.map(h => h.replace(/^@/, '')).filter(h => h.length > 1),
+      instagram: merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null),
+      youtube: merged.youtube.map(h => extractHandle(h, 'youtube')).filter((h): h is string => h !== null),
+      facebook: merged.facebook.map(h => extractHandle(h, 'facebook')).filter((h): h is string => h !== null),
+      naverBlog: merged.naverBlog.map(h => extractHandle(h, 'naverBlog')).filter((h): h is string => h !== null),
+      tiktok: merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null),
    };

    const verified: VerifiedChannels = await verifyAllHandles(