/** * Extract social media handles from a list of URLs. * Parses known platform patterns deterministically — no AI guessing. */ export interface ExtractedSocialLinks { instagram: string[]; youtube: string[]; facebook: string[]; naverBlog: string[]; tiktok: string[]; kakao: string[]; } const PATTERNS: { platform: keyof ExtractedSocialLinks; regex: RegExp; extract: (m: RegExpMatchArray) => string }[] = [ // Instagram: instagram.com/{handle} or instagram.com/p/{postId} (skip posts) { platform: 'instagram', regex: /(?:www\.)?instagram\.com\/([a-zA-Z0-9._]+)\/?(?:\?|$)/, extract: (m) => m[1], }, // YouTube: youtube.com/@{handle} or youtube.com/channel/{id} or youtube.com/c/{custom} { platform: 'youtube', regex: /(?:www\.)?youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/, extract: (m) => m[1] ? `@${m[1]}` : m[2] || m[3] || '', }, // Facebook: facebook.com/{page} (skip common paths) { platform: 'facebook', regex: /(?:www\.)?facebook\.com\/([a-zA-Z0-9._-]+)\/?(?:\?|$)/, extract: (m) => m[1], }, // Naver Blog: blog.naver.com/{blogId} { platform: 'naverBlog', regex: /blog\.naver\.com\/([a-zA-Z0-9_-]+)/, extract: (m) => m[1], }, // TikTok: tiktok.com/@{handle} { platform: 'tiktok', regex: /(?:www\.)?tiktok\.com\/@([a-zA-Z0-9._-]+)/, extract: (m) => m[1], }, // KakaoTalk Channel: pf.kakao.com/{id} { platform: 'kakao', regex: /pf\.kakao\.com\/([a-zA-Z0-9_-]+)/, extract: (m) => m[1], }, ]; // Common Facebook paths that are NOT page names const FB_SKIP = new Set([ 'sharer', 'share', 'login', 'help', 'pages', 'events', 'groups', 'marketplace', 'watch', 'gaming', 'privacy', 'policies', 'tr', 'dialog', 'plugins', 'photo', 'video', 'reel', ]); // Common Instagram paths that are NOT handles const IG_SKIP = new Set([ 'p', 'reel', 'reels', 'stories', 'explore', 'accounts', 'about', 'developer', 'legal', 'privacy', 'terms', ]); export function extractSocialLinks(urls: string[]): ExtractedSocialLinks { const result: ExtractedSocialLinks = { instagram: [], youtube: [], facebook: [], naverBlog: [], tiktok: [], kakao: [], }; const seen: Record> = {}; for (const key of Object.keys(result)) { seen[key] = new Set(); } for (const rawUrl of urls) { // Ensure we only process strings const url = typeof rawUrl === 'string' ? rawUrl : String(rawUrl || ''); if (!url || url.length < 5) continue; for (const { platform, regex, extract } of PATTERNS) { const match = url.match(regex); if (!match) continue; const handle = extract(match); if (!handle || handle.length < 2) continue; // Skip known non-handle paths if (platform === 'facebook' && FB_SKIP.has(handle.toLowerCase())) continue; if (platform === 'instagram' && IG_SKIP.has(handle.toLowerCase())) continue; const normalized = handle.toLowerCase(); if (!seen[platform].has(normalized)) { seen[platform].add(normalized); result[platform].push(handle); } } } return result; } /** * Merge social links from multiple sources, deduplicating. */ export function mergeSocialLinks(...sources: Partial[]): ExtractedSocialLinks { const merged: ExtractedSocialLinks = { instagram: [], youtube: [], facebook: [], naverBlog: [], tiktok: [], kakao: [], }; for (const source of sources) { for (const key of Object.keys(merged) as (keyof ExtractedSocialLinks)[]) { const vals = source[key]; if (Array.isArray(vals)) { for (const rawV of vals) { const v = typeof rawV === 'string' ? rawV.trim() : ''; if (v && v.length >= 2 && !merged[key].some(existing => existing.toLowerCase() === v.toLowerCase())) { merged[key].push(v); } } } } } return merged; }