fix: robust handle extraction — reject non-platform URLs, fix type safety
discover-channels: new extractHandle() validates each handle belongs to its platform (rejects hospital-internal URLs like /idtube/view being treated as YouTube). Extracts handles from full URLs correctly. collect-channel-data: explicit Record<string,unknown> typing for DB JSON fields — fixes TypeScript property access on VerifiedChannels from DB. verifyHandles: fix TikTok double-URL concatenation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>claude/bold-hawking
parent
0d72750982
commit
f65f0e85b3
|
|
@ -228,7 +228,8 @@ export async function verifyAllHandles(
|
|||
|
||||
// TikTok — skip verification for now (TikTok blocks HEAD requests)
|
||||
if (candidates.tiktok.length > 0) {
|
||||
result.tiktok = { handle: candidates.tiktok[0], verified: false, url: `https://tiktok.com/@${candidates.tiktok[0]}` };
|
||||
const tkHandle = candidates.tiktok[0].replace(/^@/, '');
|
||||
result.tiktok = { handle: tkHandle, verified: false, url: `https://tiktok.com/@${tkHandle}` };
|
||||
}
|
||||
|
||||
await Promise.allSettled(tasks);
|
||||
|
|
|
|||
|
|
@ -73,10 +73,11 @@ Deno.serve(async (req) => {
|
|||
const tasks: Promise<void>[] = [];
|
||||
|
||||
// ─── 1. Instagram (multi-account) ───
|
||||
if (APIFY_TOKEN && verified.instagram?.length > 0) {
|
||||
const igVerified = (verified.instagram || []).filter((v: Record<string, unknown>) => v.verified && v.handle);
|
||||
if (APIFY_TOKEN && igVerified.length > 0) {
|
||||
tasks.push((async () => {
|
||||
const accounts: Record<string, unknown>[] = [];
|
||||
for (const ig of verified.instagram.filter(v => v.verified)) {
|
||||
for (const ig of igVerified) {
|
||||
const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN);
|
||||
const profile = (items as Record<string, unknown>[])[0];
|
||||
if (profile && !profile.error) {
|
||||
|
|
@ -104,10 +105,11 @@ Deno.serve(async (req) => {
|
|||
}
|
||||
|
||||
// ─── 2. YouTube ───
|
||||
if (YOUTUBE_API_KEY && verified.youtube?.verified) {
|
||||
const ytVerified = verified.youtube as Record<string, unknown> | null;
|
||||
if (YOUTUBE_API_KEY && ytVerified?.verified) {
|
||||
tasks.push((async () => {
|
||||
const YT = "https://www.googleapis.com/youtube/v3";
|
||||
const channelId = verified.youtube!.channelId || "";
|
||||
const channelId = (ytVerified?.channelId as string) || "";
|
||||
if (!channelId) return;
|
||||
|
||||
const chRes = await fetch(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`);
|
||||
|
|
@ -154,9 +156,10 @@ Deno.serve(async (req) => {
|
|||
}
|
||||
|
||||
// ─── 3. Facebook ───
|
||||
if (APIFY_TOKEN && verified.facebook?.verified) {
|
||||
const fbVerified = verified.facebook as Record<string, unknown> | null;
|
||||
if (APIFY_TOKEN && fbVerified?.verified) {
|
||||
tasks.push((async () => {
|
||||
const fbUrl = verified.facebook!.url || `https://www.facebook.com/${verified.facebook!.handle}`;
|
||||
const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`;
|
||||
const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN);
|
||||
const page = (items as Record<string, unknown>[])[0];
|
||||
if (page?.title) {
|
||||
|
|
@ -172,13 +175,14 @@ Deno.serve(async (req) => {
|
|||
}
|
||||
|
||||
// ─── 4. 강남언니 ───
|
||||
if (FIRECRAWL_API_KEY && verified.gangnamUnni?.verified && verified.gangnamUnni.url) {
|
||||
const guVerified = verified.gangnamUnni as Record<string, unknown> | null;
|
||||
if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) {
|
||||
tasks.push((async () => {
|
||||
const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
||||
body: JSON.stringify({
|
||||
url: verified.gangnamUnni!.url,
|
||||
url: guVerified!.url as string,
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
|
||||
|
|
@ -202,7 +206,7 @@ Deno.serve(async (req) => {
|
|||
name: hospital.hospitalName, rating: hospital.rating, ratingScale: "/10",
|
||||
totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
|
||||
procedures: hospital.procedures || [], address: hospital.address,
|
||||
badges: hospital.badges || [], sourceUrl: verified.gangnamUnni!.url,
|
||||
badges: hospital.badges || [], sourceUrl: guVerified!.url as string,
|
||||
};
|
||||
}
|
||||
})());
|
||||
|
|
|
|||
|
|
@ -202,13 +202,60 @@ Deno.serve(async (req) => {
|
|||
|
||||
const merged = mergeSocialLinks(linkHandles, firecrawlHandles, perplexityHandles);
|
||||
|
||||
// Clean up handles (remove @ prefix, URL parts)
|
||||
// Robust handle extraction — handles may be full URLs, @handles, or bare usernames
|
||||
function extractHandle(raw: string, platform: string): string | null {
|
||||
if (!raw || raw.length < 2) return null;
|
||||
let h = raw.trim();
|
||||
|
||||
// Platform-specific URL extraction
|
||||
if (platform === 'instagram') {
|
||||
const m = h.match(/instagram\.com\/([a-zA-Z0-9._]+)/);
|
||||
if (m) return m[1];
|
||||
h = h.replace(/^@/, '').replace(/\/$/, '');
|
||||
if (/^[a-zA-Z0-9._]+$/.test(h) && h.length >= 2) return h;
|
||||
return null;
|
||||
}
|
||||
if (platform === 'youtube') {
|
||||
const m = h.match(/youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/);
|
||||
if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null;
|
||||
h = h.replace(/^@/, '');
|
||||
// Reject if it looks like a non-YouTube URL
|
||||
if (h.includes('http') || h.includes('/') || h.includes('.com')) return null;
|
||||
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`;
|
||||
return null;
|
||||
}
|
||||
if (platform === 'facebook') {
|
||||
const m = h.match(/facebook\.com\/([a-zA-Z0-9._-]+)/);
|
||||
if (m) return m[1];
|
||||
h = h.replace(/^@/, '').replace(/\/$/, '');
|
||||
if (h.includes('http') || h.includes('/')) return null;
|
||||
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
|
||||
return null;
|
||||
}
|
||||
if (platform === 'naverBlog') {
|
||||
const m = h.match(/blog\.naver\.com\/([a-zA-Z0-9_-]+)/);
|
||||
if (m) return m[1];
|
||||
if (h.includes('http') || h.includes('/')) return null;
|
||||
if (/^[a-zA-Z0-9_-]+$/.test(h) && h.length >= 2) return h;
|
||||
return null;
|
||||
}
|
||||
if (platform === 'tiktok') {
|
||||
const m = h.match(/tiktok\.com\/@([a-zA-Z0-9._-]+)/);
|
||||
if (m) return m[1];
|
||||
h = h.replace(/^@/, '');
|
||||
if (h.includes('http') || h.includes('/')) return null;
|
||||
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
|
||||
return null;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
const cleanHandles = {
|
||||
instagram: merged.instagram.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1),
|
||||
youtube: merged.youtube.map(h => h.replace(/^https?:\/\/(www\.)?youtube\.com\//, '')).filter(h => h.length > 1),
|
||||
facebook: merged.facebook.map(h => h.replace(/^@/, '').replace(/\/$/, '')).filter(h => h.length > 1),
|
||||
naverBlog: merged.naverBlog.filter(h => h.length > 1),
|
||||
tiktok: merged.tiktok.map(h => h.replace(/^@/, '')).filter(h => h.length > 1),
|
||||
instagram: merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null),
|
||||
youtube: merged.youtube.map(h => extractHandle(h, 'youtube')).filter((h): h is string => h !== null),
|
||||
facebook: merged.facebook.map(h => extractHandle(h, 'facebook')).filter((h): h is string => h !== null),
|
||||
naverBlog: merged.naverBlog.map(h => extractHandle(h, 'naverBlog')).filter((h): h is string => h !== null),
|
||||
tiktok: merged.tiktok.map(h => extractHandle(h, 'tiktok')).filter((h): h is string => h !== null),
|
||||
};
|
||||
|
||||
const verified: VerifiedChannels = await verifyAllHandles(
|
||||
|
|
|
|||
Loading…
Reference in New Issue