/** * Verify social media handles exist via lightweight API checks. * Each check runs independently — one failure doesn't block others. */ export interface VerifiedChannel { handle: string; verified: boolean | "unverifiable"; url?: string; channelId?: string; // YouTube channel ID if resolved } export interface VerifiedChannels { instagram: VerifiedChannel[]; youtube: VerifiedChannel | null; facebook: VerifiedChannel | null; naverBlog: VerifiedChannel | null; gangnamUnni: VerifiedChannel | null; tiktok: VerifiedChannel | null; } /** * Verify an Instagram handle exists. * Uses a lightweight fetch to the profile page. */ async function verifyInstagram(handle: string): Promise { try { const url = `https://www.instagram.com/${handle}/`; const res = await fetch(url, { method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' }, redirect: 'manual', // Don't follow redirects — detect login page }); // 302/301 to login page → Instagram blocks unauthenticated access if (res.status === 301 || res.status === 302) { const location = res.headers.get('location') || ''; if (location.includes('/accounts/login') || location.includes('/challenge')) { return { handle, verified: 'unverifiable', url }; } } // 200 → profile exists; 404 → definitely not found if (res.status === 200) { // Double-check: some 200 responses are actually the login page const bodySnippet = await res.text().then(t => t.slice(0, 2000)).catch(() => ''); if (bodySnippet.includes('/accounts/login') && !bodySnippet.includes(`"username":"${handle}"`)) { return { handle, verified: 'unverifiable', url }; } return { handle, verified: true, url }; } if (res.status === 404) { return { handle, verified: false, url }; } // Any other status → unverifiable (don't assume it doesn't exist) return { handle, verified: 'unverifiable', url }; } catch { return { handle, verified: 'unverifiable' }; } } /** * Verify a YouTube handle/channel exists using YouTube Data API v3. */ async function verifyYouTube(handle: string, apiKey: string): Promise { try { const YT_BASE = 'https://www.googleapis.com/youtube/v3'; const cleanHandle = handle.replace(/^@/, ''); // Try forHandle first, then forUsername for (const param of ['forHandle', 'forUsername']) { const res = await fetch(`${YT_BASE}/channels?part=id,snippet&${param}=${cleanHandle}&key=${apiKey}`); const data = await res.json(); const channel = data.items?.[0]; if (channel) { return { handle, verified: true, channelId: channel.id, channelTitle: channel.snippet?.title || '', url: `https://youtube.com/@${cleanHandle}`, }; } } // Try as channel ID directly (starts with UC) if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) { const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`); const data = await res.json(); const channel = data.items?.[0]; if (channel) { return { handle: cleanHandle, verified: true, channelId: cleanHandle, channelTitle: channel.snippet?.title || '', url: `https://youtube.com/channel/${cleanHandle}`, }; } } return { handle, verified: false }; } catch { return { handle, verified: false }; } } /** * Verify a Facebook page exists via HEAD request. */ async function verifyFacebook(handle: string): Promise { try { const url = `https://www.facebook.com/${handle}/`; // Use GET instead of HEAD — Facebook blocks HEAD requests const res = await fetch(url, { method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' }, redirect: 'follow', }); if (res.status === 200) { // Check if it's a real page or a redirect to login/error const bodySnippet = await res.text().then(t => t.slice(0, 3000)).catch(() => ''); if (bodySnippet.includes('page_not_found') || bodySnippet.includes('This content isn')) { return { handle, verified: false, url }; } return { handle, verified: true, url }; } if (res.status === 404) { return { handle, verified: false, url }; } // Facebook often blocks bots → unverifiable, not false return { handle, verified: 'unverifiable', url }; } catch { return { handle, verified: 'unverifiable' }; } } /** * Verify Naver Blog exists. */ async function verifyNaverBlog(blogId: string): Promise { try { const url = `https://blog.naver.com/${blogId}`; const res = await fetch(url, { method: 'HEAD', redirect: 'follow', }); return { handle: blogId, verified: res.status === 200, url }; } catch { return { handle: blogId, verified: false }; } } /** * Find and verify gangnamunni hospital page using Firecrawl search. */ async function verifyGangnamUnni( clinicName: string, firecrawlKey: string, hintUrl?: string, ): Promise { try { // If we already have a URL hint from Perplexity, just verify it if (hintUrl && hintUrl.includes('gangnamunni.com/hospitals/')) { const res = await fetch(hintUrl, { method: 'HEAD', redirect: 'follow' }); if (res.status === 200) { return { handle: clinicName, verified: true, url: hintUrl }; } } // Otherwise, search with multiple fallback queries const shortName = clinicName.replace(/성형외과|의원|병원|클리닉|피부과/g, '').trim(); const queries = [ `${clinicName} site:gangnamunni.com`, `${shortName} 성형외과 site:gangnamunni.com`, `${clinicName} 강남언니`, ]; for (const query of queries) { const searchRes = await fetch('https://api.firecrawl.dev/v1/search', { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${firecrawlKey}`, }, body: JSON.stringify({ query, limit: 5 }), }); const data = await searchRes.json(); const url = (data.data || []) .map((r: Record) => r.url) .find((u: string) => u?.includes('gangnamunni.com/hospitals/')); if (url) { return { handle: clinicName, verified: true, url }; } } return { handle: clinicName, verified: false }; } catch { return { handle: clinicName, verified: false }; } } /** * Verify all discovered handles in parallel. */ export async function verifyAllHandles( candidates: { instagram: string[]; youtube: string[]; facebook: string[]; naverBlog: string[]; tiktok: string[]; }, clinicName: string, gangnamUnniHintUrl?: string, ): Promise { const YOUTUBE_API_KEY = Deno.env.get('YOUTUBE_API_KEY') || ''; const FIRECRAWL_API_KEY = Deno.env.get('FIRECRAWL_API_KEY') || ''; const tasks: Promise[] = []; const result: VerifiedChannels = { instagram: [], youtube: null, facebook: null, naverBlog: null, gangnamUnni: null, tiktok: null, }; // Instagram — verify each candidate, keep unverified as fallback for (const handle of candidates.instagram.slice(0, 5)) { tasks.push( verifyInstagram(handle).then(v => { result.instagram.push(v); }) ); } // YouTube — verify ALL candidates, pick the best match by clinic name if (candidates.youtube.length > 0) { const ytCandidates = candidates.youtube.slice(0, 5); const ytResults: (VerifiedChannel & { channelTitle?: string })[] = []; const ytTasks = ytCandidates.map(handle => verifyYouTube(handle, YOUTUBE_API_KEY).then(v => { if (v.verified) ytResults.push(v); }) ); tasks.push( Promise.allSettled(ytTasks).then(() => { if (ytResults.length === 0) { // None verified — use first candidate as unverified result.youtube = { handle: ytCandidates[0], verified: false }; return; } // Pick best match: channel title containing clinic name const nameL = clinicName.toLowerCase().replace(/성형외과|병원|의원|클리닉/g, '').trim(); const nameWords = [clinicName.toLowerCase(), nameL].filter(w => w.length >= 2); const bestMatch = ytResults.find(r => nameWords.some(w => (r.channelTitle || '').toLowerCase().includes(w)) ); result.youtube = bestMatch || ytResults[0]; if (bestMatch) { console.log(`[verify] YouTube matched: "${bestMatch.channelTitle}" for "${clinicName}"`); } else { console.warn(`[verify] YouTube no name match — using first verified: "${ytResults[0].channelTitle}"`); } }) ); } // Facebook — try all candidates, also try clinic name as fallback const fbCandidates = [...candidates.facebook]; // Fallback: try common Facebook page name patterns from clinic name if (clinicName) { const domain = fbCandidates.length > 0 ? '' : clinicName.toLowerCase() .replace(/성형외과|병원|의원|클리닉|피부과/g, '').trim().replace(/\s+/g, ''); // Try English brand name patterns (e.g. "아이디병원" → site URL "idhospital") // This is handled by extractSocialLinks from siteLinks already if (domain && !fbCandidates.includes(domain)) fbCandidates.push(domain); } if (fbCandidates.length > 0) { tasks.push( (async () => { for (const handle of fbCandidates.slice(0, 3)) { const v = await verifyFacebook(handle); if (v.verified === true || v.verified === 'unverifiable') { result.facebook = v; return; } } // All failed — store first as unverified result.facebook = { handle: fbCandidates[0], verified: false }; })() ); } // Naver Blog — first candidate if (candidates.naverBlog.length > 0) { tasks.push( verifyNaverBlog(candidates.naverBlog[0]).then(v => { result.naverBlog = v; }) ); } // 강남언니 — always try if clinicName exists if (clinicName && FIRECRAWL_API_KEY) { tasks.push( verifyGangnamUnni(clinicName, FIRECRAWL_API_KEY, gangnamUnniHintUrl) .then(v => { result.gangnamUnni = v; }) ); } // TikTok — skip verification for now (TikTok blocks HEAD requests) if (candidates.tiktok.length > 0) { const tkHandle = candidates.tiktok[0].replace(/^@/, ''); result.tiktok = { handle: tkHandle, verified: false, url: `https://tiktok.com/@${tkHandle}` }; } await Promise.allSettled(tasks); return result; }