o2o-infinith-demo/supabase/functions/_shared/verifyHandles.ts

321 lines
11 KiB
TypeScript

/**
* Verify social media handles exist via lightweight API checks.
* Each check runs independently — one failure doesn't block others.
*/
export interface VerifiedChannel {
handle: string;
verified: boolean | "unverifiable";
url?: string;
channelId?: string; // YouTube channel ID if resolved
}
export interface VerifiedChannels {
instagram: VerifiedChannel[];
youtube: VerifiedChannel | null;
facebook: VerifiedChannel | null;
naverBlog: VerifiedChannel | null;
gangnamUnni: VerifiedChannel | null;
tiktok: VerifiedChannel | null;
}
/**
* Verify an Instagram handle exists.
* Uses a lightweight fetch to the profile page.
*/
async function verifyInstagram(handle: string): Promise<VerifiedChannel> {
try {
const url = `https://www.instagram.com/${handle}/`;
const res = await fetch(url, {
method: 'GET',
headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' },
redirect: 'manual', // Don't follow redirects — detect login page
});
// 302/301 to login page → Instagram blocks unauthenticated access
if (res.status === 301 || res.status === 302) {
const location = res.headers.get('location') || '';
if (location.includes('/accounts/login') || location.includes('/challenge')) {
return { handle, verified: 'unverifiable', url };
}
}
// 200 → profile exists; 404 → definitely not found
if (res.status === 200) {
// Double-check: some 200 responses are actually the login page
const bodySnippet = await res.text().then(t => t.slice(0, 2000)).catch(() => '');
if (bodySnippet.includes('/accounts/login') && !bodySnippet.includes(`"username":"${handle}"`)) {
return { handle, verified: 'unverifiable', url };
}
return { handle, verified: true, url };
}
if (res.status === 404) {
return { handle, verified: false, url };
}
// Any other status → unverifiable (don't assume it doesn't exist)
return { handle, verified: 'unverifiable', url };
} catch {
return { handle, verified: 'unverifiable' };
}
}
/**
* Verify a YouTube handle/channel exists using YouTube Data API v3.
*/
async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedChannel & { channelTitle?: string }> {
try {
const YT_BASE = 'https://www.googleapis.com/youtube/v3';
const cleanHandle = handle.replace(/^@/, '');
// Try forHandle first, then forUsername
for (const param of ['forHandle', 'forUsername']) {
const res = await fetch(`${YT_BASE}/channels?part=id,snippet&${param}=${cleanHandle}&key=${apiKey}`);
const data = await res.json();
const channel = data.items?.[0];
if (channel) {
return {
handle,
verified: true,
channelId: channel.id,
channelTitle: channel.snippet?.title || '',
url: `https://youtube.com/@${cleanHandle}`,
};
}
}
// Try as channel ID directly (starts with UC)
if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) {
const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`);
const data = await res.json();
const channel = data.items?.[0];
if (channel) {
return {
handle: cleanHandle, verified: true, channelId: cleanHandle,
channelTitle: channel.snippet?.title || '',
url: `https://youtube.com/channel/${cleanHandle}`,
};
}
}
return { handle, verified: false };
} catch {
return { handle, verified: false };
}
}
/**
* Verify a Facebook page exists via HEAD request.
*/
async function verifyFacebook(handle: string): Promise<VerifiedChannel> {
try {
const url = `https://www.facebook.com/${handle}/`;
// Use GET instead of HEAD — Facebook blocks HEAD requests
const res = await fetch(url, {
method: 'GET',
headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' },
redirect: 'follow',
});
if (res.status === 200) {
// Check if it's a real page or a redirect to login/error
const bodySnippet = await res.text().then(t => t.slice(0, 3000)).catch(() => '');
if (bodySnippet.includes('page_not_found') || bodySnippet.includes('This content isn')) {
return { handle, verified: false, url };
}
return { handle, verified: true, url };
}
if (res.status === 404) {
return { handle, verified: false, url };
}
// Facebook often blocks bots → unverifiable, not false
return { handle, verified: 'unverifiable', url };
} catch {
return { handle, verified: 'unverifiable' };
}
}
/**
* Verify Naver Blog exists.
*/
async function verifyNaverBlog(blogId: string): Promise<VerifiedChannel> {
try {
const url = `https://blog.naver.com/${blogId}`;
const res = await fetch(url, {
method: 'HEAD',
redirect: 'follow',
});
return { handle: blogId, verified: res.status === 200, url };
} catch {
return { handle: blogId, verified: false };
}
}
/**
* Find and verify gangnamunni hospital page using Firecrawl search.
*/
async function verifyGangnamUnni(
clinicName: string,
firecrawlKey: string,
hintUrl?: string,
): Promise<VerifiedChannel> {
try {
// If we already have a URL hint from Perplexity, just verify it
if (hintUrl && hintUrl.includes('gangnamunni.com/hospitals/')) {
const res = await fetch(hintUrl, { method: 'HEAD', redirect: 'follow' });
if (res.status === 200) {
return { handle: clinicName, verified: true, url: hintUrl };
}
}
// Otherwise, search with multiple fallback queries
const shortName = clinicName.replace(/성형외과|의원|병원|클리닉|피부과/g, '').trim();
const queries = [
`${clinicName} site:gangnamunni.com`,
`${shortName} 성형외과 site:gangnamunni.com`,
`${clinicName} 강남언니`,
];
for (const query of queries) {
const searchRes = await fetch('https://api.firecrawl.dev/v1/search', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${firecrawlKey}`,
},
body: JSON.stringify({ query, limit: 5 }),
});
const data = await searchRes.json();
const url = (data.data || [])
.map((r: Record<string, string>) => r.url)
.find((u: string) => u?.includes('gangnamunni.com/hospitals/'));
if (url) {
return { handle: clinicName, verified: true, url };
}
}
return { handle: clinicName, verified: false };
} catch {
return { handle: clinicName, verified: false };
}
}
/**
* Verify all discovered handles in parallel.
*/
export async function verifyAllHandles(
candidates: {
instagram: string[];
youtube: string[];
facebook: string[];
naverBlog: string[];
tiktok: string[];
},
clinicName: string,
gangnamUnniHintUrl?: string,
): Promise<VerifiedChannels> {
const YOUTUBE_API_KEY = Deno.env.get('YOUTUBE_API_KEY') || '';
const FIRECRAWL_API_KEY = Deno.env.get('FIRECRAWL_API_KEY') || '';
const tasks: Promise<void>[] = [];
const result: VerifiedChannels = {
instagram: [],
youtube: null,
facebook: null,
naverBlog: null,
gangnamUnni: null,
tiktok: null,
};
// Instagram — verify each candidate, keep unverified as fallback
for (const handle of candidates.instagram.slice(0, 5)) {
tasks.push(
verifyInstagram(handle).then(v => { result.instagram.push(v); })
);
}
// YouTube — verify ALL candidates, pick the best match by clinic name
if (candidates.youtube.length > 0) {
const ytCandidates = candidates.youtube.slice(0, 5);
const ytResults: (VerifiedChannel & { channelTitle?: string })[] = [];
const ytTasks = ytCandidates.map(handle =>
verifyYouTube(handle, YOUTUBE_API_KEY).then(v => { if (v.verified) ytResults.push(v); })
);
tasks.push(
Promise.allSettled(ytTasks).then(() => {
if (ytResults.length === 0) {
// None verified — use first candidate as unverified
result.youtube = { handle: ytCandidates[0], verified: false };
return;
}
// Pick best match: channel title containing clinic name
const nameL = clinicName.toLowerCase().replace(/성형외과|병원|의원|클리닉/g, '').trim();
const nameWords = [clinicName.toLowerCase(), nameL].filter(w => w.length >= 2);
const bestMatch = ytResults.find(r =>
nameWords.some(w => (r.channelTitle || '').toLowerCase().includes(w))
);
result.youtube = bestMatch || ytResults[0];
if (bestMatch) {
console.log(`[verify] YouTube matched: "${bestMatch.channelTitle}" for "${clinicName}"`);
} else {
console.warn(`[verify] YouTube no name match — using first verified: "${ytResults[0].channelTitle}"`);
}
})
);
}
// Facebook — try all candidates, also try clinic name as fallback
const fbCandidates = [...candidates.facebook];
// Fallback: try common Facebook page name patterns from clinic name
if (clinicName) {
const domain = fbCandidates.length > 0 ? '' : clinicName.toLowerCase()
.replace(/성형외과|병원|의원|클리닉|피부과/g, '').trim().replace(/\s+/g, '');
// Try English brand name patterns (e.g. "아이디병원" → site URL "idhospital")
// This is handled by extractSocialLinks from siteLinks already
if (domain && !fbCandidates.includes(domain)) fbCandidates.push(domain);
}
if (fbCandidates.length > 0) {
tasks.push(
(async () => {
for (const handle of fbCandidates.slice(0, 3)) {
const v = await verifyFacebook(handle);
if (v.verified === true || v.verified === 'unverifiable') {
result.facebook = v;
return;
}
}
// All failed — store first as unverified
result.facebook = { handle: fbCandidates[0], verified: false };
})()
);
}
// Naver Blog — first candidate
if (candidates.naverBlog.length > 0) {
tasks.push(
verifyNaverBlog(candidates.naverBlog[0]).then(v => { result.naverBlog = v; })
);
}
// 강남언니 — always try if clinicName exists
if (clinicName && FIRECRAWL_API_KEY) {
tasks.push(
verifyGangnamUnni(clinicName, FIRECRAWL_API_KEY, gangnamUnniHintUrl)
.then(v => { result.gangnamUnni = v; })
);
}
// TikTok — skip verification for now (TikTok blocks HEAD requests)
if (candidates.tiktok.length > 0) {
const tkHandle = candidates.tiktok[0].replace(/^@/, '');
result.tiktok = { handle: tkHandle, verified: false, url: `https://tiktok.com/@${tkHandle}` };
}
await Promise.allSettled(tasks);
return result;
}