fix: robust handle extraction — reject non-platform URLs, fix type safety

discover-channels: new extractHandle() validates each handle belongs to
its platform (rejects hospital-internal URLs like /idtube/view being
treated as YouTube). Extracts handles from full URLs correctly.

collect-channel-data: explicit Record<string,unknown> typing for DB JSON
fields — fixes TypeScript property access on VerifiedChannels from DB.

verifyHandles: fix TikTok double-URL concatenation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
claude/bold-hawking
Haewon Kam 2026-04-04 00:03:26 +09:00
parent 0d72750982
commit f65f0e85b3
3 changed files with 68 additions and 16 deletions

View File

@ -228,7 +228,8 @@ export async function verifyAllHandles(
// TikTok — skip verification for now (TikTok blocks HEAD requests)
if (candidates.tiktok.length > 0) {
result.tiktok = { handle: candidates.tiktok[0], verified: false, url: `https://tiktok.com/@${candidates.tiktok[0]}` };
const tkHandle = candidates.tiktok[0].replace(/^@/, '');
result.tiktok = { handle: tkHandle, verified: false, url: `https://tiktok.com/@${tkHandle}` };
}
await Promise.allSettled(tasks);

View File

@ -73,10 +73,11 @@ Deno.serve(async (req) => {
const tasks: Promise<void>[] = [];
// ─── 1. Instagram (multi-account) ───
if (APIFY_TOKEN && verified.instagram?.length > 0) {
const igVerified = (verified.instagram || []).filter((v: Record<string, unknown>) => v.verified && v.handle);
if (APIFY_TOKEN && igVerified.length > 0) {
tasks.push((async () => {
const accounts: Record<string, unknown>[] = [];
for (const ig of verified.instagram.filter(v => v.verified)) {
for (const ig of igVerified) {
const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN);
const profile = (items as Record<string, unknown>[])[0];
if (profile && !profile.error) {
@ -104,10 +105,11 @@ Deno.serve(async (req) => {
}
// ─── 2. YouTube ───
if (YOUTUBE_API_KEY && verified.youtube?.verified) {
const ytVerified = verified.youtube as Record<string, unknown> | null;
if (YOUTUBE_API_KEY && ytVerified?.verified) {
tasks.push((async () => {
const YT = "https://www.googleapis.com/youtube/v3";
const channelId = verified.youtube!.channelId || "";
const channelId = (ytVerified?.channelId as string) || "";
if (!channelId) return;
const chRes = await fetch(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`);
@ -154,9 +156,10 @@ Deno.serve(async (req) => {
}
// ─── 3. Facebook ───
if (APIFY_TOKEN && verified.facebook?.verified) {
const fbVerified = verified.facebook as Record<string, unknown> | null;
if (APIFY_TOKEN && fbVerified?.verified) {
tasks.push((async () => {
const fbUrl = verified.facebook!.url || `https://www.facebook.com/${verified.facebook!.handle}`;
const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`;
const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN);
const page = (items as Record<string, unknown>[])[0];
if (page?.title) {
@ -172,13 +175,14 @@ Deno.serve(async (req) => {
}
// ─── 4. 강남언니 ───
if (FIRECRAWL_API_KEY && verified.gangnamUnni?.verified && verified.gangnamUnni.url) {
const guVerified = verified.gangnamUnni as Record<string, unknown> | null;
if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) {
tasks.push((async () => {
const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
body: JSON.stringify({
url: verified.gangnamUnni!.url,
url: guVerified!.url as string,
formats: ["json"],
jsonOptions: {
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
@ -202,7 +206,7 @@ Deno.serve(async (req) => {
name: hospital.hospitalName, rating: hospital.rating, ratingScale: "/10",
totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
procedures: hospital.procedures || [], address: hospital.address,
badges: hospital.badges || [], sourceUrl: verified.gangnamUnni!.url,
badges: hospital.badges || [], sourceUrl: guVerified!.url as string,
};
}
})());

View File

@ -202,13 +202,60 @@ Deno.serve(async (req) => {
const merged = mergeSocialLinks(linkHandles, firecrawlHandles, perplexityHandles);
// Clean up handles (remove @ prefix, URL parts)
// Robust handle extraction — handles may be full URLs, @handles, or bare usernames
function extractHandle(raw: string, platform: string): string | null {
if (!raw || raw.length < 2) return null;
let h = raw.trim();
// Platform-specific URL extraction
if (platform === 'instagram') {
const m = h.match(/instagram\.com\/([a-zA-Z0-9._]+)/);
if (m) return m[1];
h = h.replace(/^@/, '').replace(/\/$/, '');
if (/^[a-zA-Z0-9._]+$/.test(h) && h.length >= 2) return h;
return null;
}
if (platform === 'youtube') {
const m = h.match(/youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/);
if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null;
h = h.replace(/^@/, '');
// Reject if it looks like a non-YouTube URL
if (h.includes('http') || h.includes('/') || h.includes('.com')) return null;
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`;
return null;
}
if (platform === 'facebook') {
const m = h.match(/facebook\.com\/([a-zA-Z0-9._-]+)/);
if (m) return m[1];
h = h.replace(/^@/, '').replace(/\/$/, '');
if (h.includes('http') || h.includes('/')) return null;
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
return null;
}
if (platform === 'naverBlog') {
const m = h.match(/blog\.naver\.com\/([a-zA-Z0-9_-]+)/);
if (m) return m[1];
if (h.includes('http') || h.includes('/')) return null;
if (/^[a-zA-Z0-9_-]+$/.test(h) && h.length >= 2) return h;
return null;
}
if (platform === 'tiktok') {
const m = h.match(/tiktok\.com\/@([a-zA-Z0-9._-]+)/);
if (m) return m[1];
h = h.replace(/^@/, '');
if (h.includes('http') || h.includes('/')) return null;
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
return null;
}
return h;
}
const cleanHandles = {
instagram: merged.instagram.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1),
youtube: merged.youtube.map(h => h.replace(/^https?:\/\/(www\.)?youtube\.com\//, '')).filter(h => h.length > 1),
facebook: merged.facebook.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1),
naverBlog: merged.naverBlog.filter(h => h.length > 1),
tiktok: merged.tiktok.map(h => h.replace(/^@/, '')).filter(h => h.length > 1),
instagram: merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null),
youtube: merged.youtube.map(h => extractHandle(h, 'youtube')).filter((h): h is string => h !== null),
facebook: merged.facebook.map(h => extractHandle(h, 'facebook')).filter((h): h is string => h !== null),
naverBlog: merged.naverBlog.map(h => extractHandle(h, 'naverBlog')).filter((h): h is string => h !== null),
tiktok: merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null),
};
const verified: VerifiedChannels = await verifyAllHandles(