fix: YouTube name matching + Facebook domain fallback in channel discovery
YouTube now verifies all candidates and picks best match by channel title. Facebook tries all candidates with domain-name fallback when Firecrawl returns empty. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>claude/bold-hawking
parent
d1157da39c
commit
2ca9ec0306
|
|
@ -64,7 +64,7 @@ async function verifyInstagram(handle: string): Promise<VerifiedChannel> {
|
||||||
/**
|
/**
|
||||||
* Verify a YouTube handle/channel exists using YouTube Data API v3.
|
* Verify a YouTube handle/channel exists using YouTube Data API v3.
|
||||||
*/
|
*/
|
||||||
async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedChannel> {
|
async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedChannel & { channelTitle?: string }> {
|
||||||
try {
|
try {
|
||||||
const YT_BASE = 'https://www.googleapis.com/youtube/v3';
|
const YT_BASE = 'https://www.googleapis.com/youtube/v3';
|
||||||
const cleanHandle = handle.replace(/^@/, '');
|
const cleanHandle = handle.replace(/^@/, '');
|
||||||
|
|
@ -79,6 +79,7 @@ async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedCh
|
||||||
handle,
|
handle,
|
||||||
verified: true,
|
verified: true,
|
||||||
channelId: channel.id,
|
channelId: channel.id,
|
||||||
|
channelTitle: channel.snippet?.title || '',
|
||||||
url: `https://youtube.com/@${cleanHandle}`,
|
url: `https://youtube.com/@${cleanHandle}`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -88,8 +89,13 @@ async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedCh
|
||||||
if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) {
|
if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) {
|
||||||
const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`);
|
const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
if (data.items?.[0]) {
|
const channel = data.items?.[0];
|
||||||
return { handle: cleanHandle, verified: true, channelId: cleanHandle, url: `https://youtube.com/channel/${cleanHandle}` };
|
if (channel) {
|
||||||
|
return {
|
||||||
|
handle: cleanHandle, verified: true, channelId: cleanHandle,
|
||||||
|
channelTitle: channel.snippet?.title || '',
|
||||||
|
url: `https://youtube.com/channel/${cleanHandle}`,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -232,17 +238,59 @@ export async function verifyAllHandles(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// YouTube — first candidate
|
// YouTube — verify ALL candidates, pick the best match by clinic name
|
||||||
if (candidates.youtube.length > 0) {
|
if (candidates.youtube.length > 0) {
|
||||||
|
const ytCandidates = candidates.youtube.slice(0, 5);
|
||||||
|
const ytResults: (VerifiedChannel & { channelTitle?: string })[] = [];
|
||||||
|
const ytTasks = ytCandidates.map(handle =>
|
||||||
|
verifyYouTube(handle, YOUTUBE_API_KEY).then(v => { if (v.verified) ytResults.push(v); })
|
||||||
|
);
|
||||||
tasks.push(
|
tasks.push(
|
||||||
verifyYouTube(candidates.youtube[0], YOUTUBE_API_KEY).then(v => { result.youtube = v; })
|
Promise.allSettled(ytTasks).then(() => {
|
||||||
|
if (ytResults.length === 0) {
|
||||||
|
// None verified — use first candidate as unverified
|
||||||
|
result.youtube = { handle: ytCandidates[0], verified: false };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Pick best match: channel title containing clinic name
|
||||||
|
const nameL = clinicName.toLowerCase().replace(/성형외과|병원|의원|클리닉/g, '').trim();
|
||||||
|
const nameWords = [clinicName.toLowerCase(), nameL].filter(w => w.length >= 2);
|
||||||
|
const bestMatch = ytResults.find(r =>
|
||||||
|
nameWords.some(w => (r.channelTitle || '').toLowerCase().includes(w))
|
||||||
|
);
|
||||||
|
result.youtube = bestMatch || ytResults[0];
|
||||||
|
if (bestMatch) {
|
||||||
|
console.log(`[verify] YouTube matched: "${bestMatch.channelTitle}" for "${clinicName}"`);
|
||||||
|
} else {
|
||||||
|
console.warn(`[verify] YouTube no name match — using first verified: "${ytResults[0].channelTitle}"`);
|
||||||
|
}
|
||||||
|
})
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Facebook — first candidate
|
// Facebook — try all candidates, also try clinic name as fallback
|
||||||
if (candidates.facebook.length > 0) {
|
const fbCandidates = [...candidates.facebook];
|
||||||
|
// Fallback: try common Facebook page name patterns from clinic name
|
||||||
|
if (clinicName) {
|
||||||
|
const domain = fbCandidates.length > 0 ? '' : clinicName.toLowerCase()
|
||||||
|
.replace(/성형외과|병원|의원|클리닉|피부과/g, '').trim().replace(/\s+/g, '');
|
||||||
|
// Try English brand name patterns (e.g. "아이디병원" → site URL "idhospital")
|
||||||
|
// This is handled by extractSocialLinks from siteLinks already
|
||||||
|
if (domain && !fbCandidates.includes(domain)) fbCandidates.push(domain);
|
||||||
|
}
|
||||||
|
if (fbCandidates.length > 0) {
|
||||||
tasks.push(
|
tasks.push(
|
||||||
verifyFacebook(candidates.facebook[0]).then(v => { result.facebook = v; })
|
(async () => {
|
||||||
|
for (const handle of fbCandidates.slice(0, 3)) {
|
||||||
|
const v = await verifyFacebook(handle);
|
||||||
|
if (v.verified === true || v.verified === 'unverifiable') {
|
||||||
|
result.facebook = v;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// All failed — store first as unverified
|
||||||
|
result.facebook = { handle: fbCandidates[0], verified: false };
|
||||||
|
})()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -251,12 +251,10 @@ Deno.serve(async (req) => {
|
||||||
`https://www.googleapis.com/youtube/v3/search?part=snippet&type=channel&q=${q}&maxResults=3&key=${YOUTUBE_API_KEY}`
|
`https://www.googleapis.com/youtube/v3/search?part=snippet&type=channel&q=${q}&maxResults=3&key=${YOUTUBE_API_KEY}`
|
||||||
);
|
);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
// Add ALL search results — let verifyAllHandles pick the best match by name
|
||||||
for (const item of (data.items || [])) {
|
for (const item of (data.items || [])) {
|
||||||
const channelId = item.snippet?.channelId || item.id?.channelId;
|
const channelId = item.snippet?.channelId || item.id?.channelId;
|
||||||
const title = (item.snippet?.title || "").toLowerCase();
|
if (channelId) {
|
||||||
const nameL = resolvedName.toLowerCase();
|
|
||||||
// Match if title contains clinic name or vice versa
|
|
||||||
if (channelId && (title.includes(nameL) || nameL.includes(title) || title.includes(nameL.replace(/성형외과|병원|의원|클리닉/g, '').trim()))) {
|
|
||||||
apiHandles.youtube!.push(channelId);
|
apiHandles.youtube!.push(channelId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -466,6 +464,15 @@ Deno.serve(async (req) => {
|
||||||
tiktok: [...new Set(merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null))],
|
tiktok: [...new Set(merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null))],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Fallback: try domain name as Facebook/Instagram handle if no candidates found
|
||||||
|
try {
|
||||||
|
const domain = new URL(url).hostname.replace('www.', '').split('.')[0]; // e.g. "idhospital"
|
||||||
|
if (domain && domain.length >= 3) {
|
||||||
|
if (cleanHandles.facebook.length === 0) cleanHandles.facebook.push(domain);
|
||||||
|
if (cleanHandles.instagram.length === 0) cleanHandles.instagram.push(domain);
|
||||||
|
}
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
|
||||||
const verified: VerifiedChannels = await verifyAllHandles(
|
const verified: VerifiedChannels = await verifyAllHandles(
|
||||||
cleanHandles, resolvedName, gangnamUnniHintUrl,
|
cleanHandles, resolvedName, gangnamUnniHintUrl,
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue