fix: YouTube name matching + Facebook domain fallback in channel discovery

YouTube now verifies all candidates and picks best match by channel title.
Facebook tries all candidates with domain-name fallback when Firecrawl returns empty.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
claude/bold-hawking
Haewon Kam 2026-04-05 12:15:37 +09:00
parent d1157da39c
commit 2ca9ec0306
2 changed files with 67 additions and 12 deletions

View File

@ -64,7 +64,7 @@ async function verifyInstagram(handle: string): Promise<VerifiedChannel> {
/** /**
* Verify a YouTube handle/channel exists using YouTube Data API v3. * Verify a YouTube handle/channel exists using YouTube Data API v3.
*/ */
async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedChannel> { async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedChannel & { channelTitle?: string }> {
try { try {
const YT_BASE = 'https://www.googleapis.com/youtube/v3'; const YT_BASE = 'https://www.googleapis.com/youtube/v3';
const cleanHandle = handle.replace(/^@/, ''); const cleanHandle = handle.replace(/^@/, '');
@ -79,6 +79,7 @@ async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedCh
handle, handle,
verified: true, verified: true,
channelId: channel.id, channelId: channel.id,
channelTitle: channel.snippet?.title || '',
url: `https://youtube.com/@${cleanHandle}`, url: `https://youtube.com/@${cleanHandle}`,
}; };
} }
@ -88,8 +89,13 @@ async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedCh
if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) { if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) {
const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`); const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`);
const data = await res.json(); const data = await res.json();
if (data.items?.[0]) { const channel = data.items?.[0];
return { handle: cleanHandle, verified: true, channelId: cleanHandle, url: `https://youtube.com/channel/${cleanHandle}` }; if (channel) {
return {
handle: cleanHandle, verified: true, channelId: cleanHandle,
channelTitle: channel.snippet?.title || '',
url: `https://youtube.com/channel/${cleanHandle}`,
};
} }
} }
@ -232,17 +238,59 @@ export async function verifyAllHandles(
); );
} }
// YouTube — first candidate // YouTube — verify ALL candidates, pick the best match by clinic name
if (candidates.youtube.length > 0) { if (candidates.youtube.length > 0) {
const ytCandidates = candidates.youtube.slice(0, 5);
const ytResults: (VerifiedChannel & { channelTitle?: string })[] = [];
const ytTasks = ytCandidates.map(handle =>
verifyYouTube(handle, YOUTUBE_API_KEY).then(v => { if (v.verified) ytResults.push(v); })
);
tasks.push( tasks.push(
verifyYouTube(candidates.youtube[0], YOUTUBE_API_KEY).then(v => { result.youtube = v; }) Promise.allSettled(ytTasks).then(() => {
if (ytResults.length === 0) {
// None verified — use first candidate as unverified
result.youtube = { handle: ytCandidates[0], verified: false };
return;
}
// Pick best match: channel title containing clinic name
const nameL = clinicName.toLowerCase().replace(/성형외과|병원|의원|클리닉/g, '').trim();
const nameWords = [clinicName.toLowerCase(), nameL].filter(w => w.length >= 2);
const bestMatch = ytResults.find(r =>
nameWords.some(w => (r.channelTitle || '').toLowerCase().includes(w))
);
result.youtube = bestMatch || ytResults[0];
if (bestMatch) {
console.log(`[verify] YouTube matched: "${bestMatch.channelTitle}" for "${clinicName}"`);
} else {
console.warn(`[verify] YouTube no name match — using first verified: "${ytResults[0].channelTitle}"`);
}
})
); );
} }
// Facebook — first candidate // Facebook — try all candidates, also try clinic name as fallback
if (candidates.facebook.length > 0) { const fbCandidates = [...candidates.facebook];
// Fallback: try common Facebook page name patterns from clinic name
if (clinicName) {
const domain = fbCandidates.length > 0 ? '' : clinicName.toLowerCase()
.replace(/성형외과|병원|의원|클리닉|피부과/g, '').trim().replace(/\s+/g, '');
// Try English brand name patterns (e.g. "아이디병원" → site URL "idhospital")
// This is handled by extractSocialLinks from siteLinks already
if (domain && !fbCandidates.includes(domain)) fbCandidates.push(domain);
}
if (fbCandidates.length > 0) {
tasks.push( tasks.push(
verifyFacebook(candidates.facebook[0]).then(v => { result.facebook = v; }) (async () => {
for (const handle of fbCandidates.slice(0, 3)) {
const v = await verifyFacebook(handle);
if (v.verified === true || v.verified === 'unverifiable') {
result.facebook = v;
return;
}
}
// All failed — store first as unverified
result.facebook = { handle: fbCandidates[0], verified: false };
})()
); );
} }

View File

@ -251,12 +251,10 @@ Deno.serve(async (req) => {
`https://www.googleapis.com/youtube/v3/search?part=snippet&type=channel&q=${q}&maxResults=3&key=${YOUTUBE_API_KEY}` `https://www.googleapis.com/youtube/v3/search?part=snippet&type=channel&q=${q}&maxResults=3&key=${YOUTUBE_API_KEY}`
); );
const data = await res.json(); const data = await res.json();
// Add ALL search results — let verifyAllHandles pick the best match by name
for (const item of (data.items || [])) { for (const item of (data.items || [])) {
const channelId = item.snippet?.channelId || item.id?.channelId; const channelId = item.snippet?.channelId || item.id?.channelId;
const title = (item.snippet?.title || "").toLowerCase(); if (channelId) {
const nameL = resolvedName.toLowerCase();
// Match if title contains clinic name or vice versa
if (channelId && (title.includes(nameL) || nameL.includes(title) || title.includes(nameL.replace(/성형외과|병원|의원|클리닉/g, '').trim()))) {
apiHandles.youtube!.push(channelId); apiHandles.youtube!.push(channelId);
} }
} }
@ -466,6 +464,15 @@ Deno.serve(async (req) => {
tiktok: [...new Set(merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null))], tiktok: [...new Set(merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null))],
}; };
// Fallback: try domain name as Facebook/Instagram handle if no candidates found
try {
const domain = new URL(url).hostname.replace('www.', '').split('.')[0]; // e.g. "idhospital"
if (domain && domain.length >= 3) {
if (cleanHandles.facebook.length === 0) cleanHandles.facebook.push(domain);
if (cleanHandles.instagram.length === 0) cleanHandles.instagram.push(domain);
}
} catch { /* ignore */ }
const verified: VerifiedChannels = await verifyAllHandles( const verified: VerifiedChannels = await verifyAllHandles(
cleanHandles, resolvedName, gangnamUnniHintUrl, cleanHandles, resolvedName, gangnamUnniHintUrl,
); );