138 lines
3.9 KiB
TypeScript
138 lines
3.9 KiB
TypeScript
/**
|
|
* Extract social media handles from a list of URLs.
|
|
* Parses known platform patterns deterministically — no AI guessing.
|
|
*/
|
|
|
|
export interface ExtractedSocialLinks {
|
|
instagram: string[];
|
|
youtube: string[];
|
|
facebook: string[];
|
|
naverBlog: string[];
|
|
tiktok: string[];
|
|
kakao: string[];
|
|
}
|
|
|
|
const PATTERNS: { platform: keyof ExtractedSocialLinks; regex: RegExp; extract: (m: RegExpMatchArray) => string }[] = [
|
|
// Instagram: instagram.com/{handle} or instagram.com/p/{postId} (skip posts)
|
|
{
|
|
platform: 'instagram',
|
|
regex: /(?:www\.)?instagram\.com\/([a-zA-Z0-9._]+)\/?(?:\?|$)/,
|
|
extract: (m) => m[1],
|
|
},
|
|
// YouTube: youtube.com/@{handle} or youtube.com/channel/{id} or youtube.com/c/{custom}
|
|
{
|
|
platform: 'youtube',
|
|
regex: /(?:www\.)?youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/,
|
|
extract: (m) => m[1] ? `@${m[1]}` : m[2] || m[3] || '',
|
|
},
|
|
// Facebook: facebook.com/{page} (skip common paths)
|
|
{
|
|
platform: 'facebook',
|
|
regex: /(?:www\.)?facebook\.com\/([a-zA-Z0-9._-]+)\/?(?:\?|$)/,
|
|
extract: (m) => m[1],
|
|
},
|
|
// Naver Blog: blog.naver.com/{blogId}
|
|
{
|
|
platform: 'naverBlog',
|
|
regex: /blog\.naver\.com\/([a-zA-Z0-9_-]+)/,
|
|
extract: (m) => m[1],
|
|
},
|
|
// TikTok: tiktok.com/@{handle}
|
|
{
|
|
platform: 'tiktok',
|
|
regex: /(?:www\.)?tiktok\.com\/@([a-zA-Z0-9._-]+)/,
|
|
extract: (m) => m[1],
|
|
},
|
|
// KakaoTalk Channel: pf.kakao.com/{id}
|
|
{
|
|
platform: 'kakao',
|
|
regex: /pf\.kakao\.com\/([a-zA-Z0-9_-]+)/,
|
|
extract: (m) => m[1],
|
|
},
|
|
];
|
|
|
|
// Common Facebook paths that are NOT page names
|
|
const FB_SKIP = new Set([
|
|
'sharer', 'share', 'login', 'help', 'pages', 'events', 'groups',
|
|
'marketplace', 'watch', 'gaming', 'privacy', 'policies', 'tr',
|
|
'dialog', 'plugins', 'photo', 'video', 'reel',
|
|
]);
|
|
|
|
// Common Instagram paths that are NOT handles
|
|
const IG_SKIP = new Set([
|
|
'p', 'reel', 'reels', 'stories', 'explore', 'accounts', 'about',
|
|
'developer', 'legal', 'privacy', 'terms',
|
|
]);
|
|
|
|
export function extractSocialLinks(urls: string[]): ExtractedSocialLinks {
|
|
const result: ExtractedSocialLinks = {
|
|
instagram: [],
|
|
youtube: [],
|
|
facebook: [],
|
|
naverBlog: [],
|
|
tiktok: [],
|
|
kakao: [],
|
|
};
|
|
|
|
const seen: Record<string, Set<string>> = {};
|
|
for (const key of Object.keys(result)) {
|
|
seen[key] = new Set();
|
|
}
|
|
|
|
for (const rawUrl of urls) {
|
|
// Ensure we only process strings
|
|
const url = typeof rawUrl === 'string' ? rawUrl : String(rawUrl || '');
|
|
if (!url || url.length < 5) continue;
|
|
|
|
for (const { platform, regex, extract } of PATTERNS) {
|
|
const match = url.match(regex);
|
|
if (!match) continue;
|
|
|
|
const handle = extract(match);
|
|
if (!handle || handle.length < 2) continue;
|
|
|
|
// Skip known non-handle paths
|
|
if (platform === 'facebook' && FB_SKIP.has(handle.toLowerCase())) continue;
|
|
if (platform === 'instagram' && IG_SKIP.has(handle.toLowerCase())) continue;
|
|
|
|
const normalized = handle.toLowerCase();
|
|
if (!seen[platform].has(normalized)) {
|
|
seen[platform].add(normalized);
|
|
result[platform].push(handle);
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Merge social links from multiple sources, deduplicating.
|
|
*/
|
|
export function mergeSocialLinks(...sources: Partial<ExtractedSocialLinks>[]): ExtractedSocialLinks {
|
|
const merged: ExtractedSocialLinks = {
|
|
instagram: [],
|
|
youtube: [],
|
|
facebook: [],
|
|
naverBlog: [],
|
|
tiktok: [],
|
|
kakao: [],
|
|
};
|
|
|
|
for (const source of sources) {
|
|
for (const key of Object.keys(merged) as (keyof ExtractedSocialLinks)[]) {
|
|
const vals = source[key];
|
|
if (Array.isArray(vals)) {
|
|
for (const rawV of vals) {
|
|
const v = typeof rawV === 'string' ? rawV.trim() : '';
|
|
if (v && v.length >= 2 && !merged[key].some(existing => existing.toLowerCase() === v.toLowerCase())) {
|
|
merged[key].push(v);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return merged;
|
|
}
|