From ed37f23f786aefd2aafd6cc0b4ab371c7a03bbb3 Mon Sep 17 00:00:00 2001 From: Haewon Kam Date: Sat, 4 Apr 2026 23:41:27 +0900 Subject: [PATCH] feat: extract social links from JS-rendered buttons on clinic website MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added A4 parallel Firecrawl call with actions: [wait 3s, scrape] to execute JavaScript and extract social button href URLs from header/footer. This is the most reliable source — most Korean clinics have Facebook/Instagram/YouTube/Blog icons in their nav. Results merged as Source 3 (buttonHandles) alongside HTML links, JSON extraction, and API searches. Co-Authored-By: Claude Opus 4.6 (1M context) --- supabase/functions/discover-channels/index.ts | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/supabase/functions/discover-channels/index.ts b/supabase/functions/discover-channels/index.ts index 4e2a146..05da48e 100644 --- a/supabase/functions/discover-channels/index.ts +++ b/supabase/functions/discover-channels/index.ts @@ -95,7 +95,8 @@ Deno.serve(async (req) => { // STAGE A: Firecrawl scrape + map (parallel) // ═══════════════════════════════════════════ - const [scrapeResult, mapResult, brandResult] = await Promise.allSettled([ + const [scrapeResult, mapResult, brandResult, socialButtonResult] = await Promise.allSettled([ + // A1. Main scrape — clinic info + links fetch("https://api.firecrawl.dev/v1/scrape", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, @@ -119,12 +120,14 @@ Deno.serve(async (req) => { }), }).then(r => r.json()), + // A2. Map site fetch("https://api.firecrawl.dev/v1/map", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, body: JSON.stringify({ url, limit: 50 }), }).then(r => r.json()), + // A3. Branding extraction fetch("https://api.firecrawl.dev/v1/scrape", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, @@ -137,12 +140,49 @@ Deno.serve(async (req) => { waitFor: 3000, }), }).then(r => r.json()).catch(() => ({ data: { json: {} } })), + + // A4. Social button links — execute JS to extract all pointing to social platforms + fetch("https://api.firecrawl.dev/v1/scrape", { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, + body: JSON.stringify({ + url, + formats: ["json"], + jsonOptions: { + prompt: "Find ALL social media link URLs on this page. Look in the header, footer, sidebar, and floating buttons. Extract the actual href URLs (not just text) for: Instagram, YouTube, Facebook, TikTok, Naver Blog (blog.naver.com), KakaoTalk (pf.kakao.com), Twitter/X. Include ALL variants found.", + schema: { + type: "object", + properties: { + socialLinks: { + type: "array", + items: { + type: "object", + properties: { + platform: { type: "string" }, + url: { type: "string" }, + }, + }, + }, + }, + }, + }, + waitFor: 5000, + actions: [ + { type: "wait", milliseconds: 3000 }, + { type: "scrape" }, + ], + }), + }).then(r => r.json()).catch(() => ({ data: { json: {} } })), ]); const scrapeData = scrapeResult.status === "fulfilled" ? scrapeResult.value : { data: {} }; const mapData = mapResult.status === "fulfilled" ? mapResult.value : {}; const brandData = brandResult.status === "fulfilled" ? brandResult.value : { data: { json: {} } }; + // A4 result: social buttons from JS-rendered page + const socialButtonData = socialButtonResult.status === "fulfilled" ? socialButtonResult.value : { data: { json: {} } }; + const socialButtons = (socialButtonData.data?.json?.socialLinks || []) as { platform?: string; url?: string }[]; + const clinic = scrapeData.data?.json || {}; let resolvedName = inputClinicName || clinic.clinicName || clinic.clinicNameEn || ""; @@ -184,6 +224,12 @@ Deno.serve(async (req) => { kakao: scrapeSocial.kakao ? [scrapeSocial.kakao] : [], }; + // Source 3: Social button links from JS-rendered page (most reliable!) + const socialButtonUrls = socialButtons + .map(b => typeof b.url === 'string' ? b.url : '') + .filter(u => u.length > 5); + const buttonHandles = extractSocialLinks(socialButtonUrls); + // ═══════════════════════════════════════════ // STAGE B: Direct API searches + Perplexity (ALL PARALLEL) // Each API directly searches for the clinic's presence @@ -410,7 +456,7 @@ Deno.serve(async (req) => { // STAGE C: Merge ALL sources + Verify // ═══════════════════════════════════════════ - const merged = mergeSocialLinks(linkHandles, firecrawlHandles, apiHandles); + const merged = mergeSocialLinks(linkHandles, firecrawlHandles, buttonHandles, apiHandles); const cleanHandles = { instagram: [...new Set(merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null))],