fix: 2-stage discovery — Firecrawl first for clinicName, then Perplexity
Previously Firecrawl and Perplexity ran in parallel, so Perplexity received raw URL instead of clinic name → poor search results. Now: Stage A: Firecrawl scrape+map (parallel) → extract clinicName from HTML Stage B: Perplexity searches using extracted clinicName → finds Instagram, YouTube, Facebook handles that Firecrawl HTML parsing missed Stage C: Merge 3 sources + verify all handles Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>claude/bold-hawking
parent
df8f84c3b9
commit
122b1915f0
|
|
@ -13,13 +13,63 @@ interface DiscoverRequest {
|
||||||
clinicName?: string;
|
clinicName?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Robust handle extraction — handles may be full URLs, @handles, or bare usernames.
|
||||||
|
* Validates each handle actually belongs to its platform.
|
||||||
|
*/
|
||||||
|
function extractHandle(raw: string, platform: string): string | null {
|
||||||
|
if (!raw || raw.length < 2) return null;
|
||||||
|
let h = raw.trim();
|
||||||
|
|
||||||
|
if (platform === 'instagram') {
|
||||||
|
const m = h.match(/instagram\.com\/([a-zA-Z0-9._]+)/);
|
||||||
|
if (m) return m[1];
|
||||||
|
h = h.replace(/^@/, '').replace(/\/$/, '');
|
||||||
|
if (/^[a-zA-Z0-9._]+$/.test(h) && h.length >= 2) return h;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (platform === 'youtube') {
|
||||||
|
const m = h.match(/youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/);
|
||||||
|
if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null;
|
||||||
|
h = h.replace(/^@/, '');
|
||||||
|
if (h.includes('http') || h.includes('/') || h.includes('.com')) return null;
|
||||||
|
if (/^UC[a-zA-Z0-9_-]{20,}$/.test(h)) return h;
|
||||||
|
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (platform === 'facebook') {
|
||||||
|
const m = h.match(/facebook\.com\/([a-zA-Z0-9._-]+)/);
|
||||||
|
if (m) return m[1];
|
||||||
|
h = h.replace(/^@/, '').replace(/\/$/, '');
|
||||||
|
if (h.includes('http') || h.includes('/')) return null;
|
||||||
|
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (platform === 'naverBlog') {
|
||||||
|
const m = h.match(/blog\.naver\.com\/([a-zA-Z0-9_-]+)/);
|
||||||
|
if (m) return m[1];
|
||||||
|
if (h.includes('http') || h.includes('/')) return null;
|
||||||
|
if (/^[a-zA-Z0-9_-]+$/.test(h) && h.length >= 2) return h;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (platform === 'tiktok') {
|
||||||
|
const m = h.match(/tiktok\.com\/@([a-zA-Z0-9._-]+)/);
|
||||||
|
if (m) return m[1];
|
||||||
|
h = h.replace(/^@/, '');
|
||||||
|
if (h.includes('http') || h.includes('/')) return null;
|
||||||
|
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Phase 1: Discover & Verify Channels
|
* Phase 1: Discover & Verify Channels
|
||||||
*
|
*
|
||||||
* 3-source channel discovery:
|
* Two-stage discovery:
|
||||||
* A. Firecrawl scrape + map → extract social links from HTML
|
* Stage A: Firecrawl scrape + map (parallel) → extract clinicName + social links
|
||||||
* B. Perplexity search → find social handles via web search
|
* Stage B: Perplexity search using clinicName (parallel) → find more handles
|
||||||
* C. Merge + deduplicate → verify each handle exists
|
* Stage C: Merge + Verify all handles
|
||||||
*/
|
*/
|
||||||
Deno.serve(async (req) => {
|
Deno.serve(async (req) => {
|
||||||
if (req.method === "OPTIONS") {
|
if (req.method === "OPTIONS") {
|
||||||
|
|
@ -27,7 +77,7 @@ Deno.serve(async (req) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { url, clinicName } = (await req.json()) as DiscoverRequest;
|
const { url, clinicName: inputClinicName } = (await req.json()) as DiscoverRequest;
|
||||||
if (!url) {
|
if (!url) {
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({ error: "URL is required" }),
|
JSON.stringify({ error: "URL is required" }),
|
||||||
|
|
@ -39,10 +89,12 @@ Deno.serve(async (req) => {
|
||||||
const PERPLEXITY_API_KEY = Deno.env.get("PERPLEXITY_API_KEY");
|
const PERPLEXITY_API_KEY = Deno.env.get("PERPLEXITY_API_KEY");
|
||||||
if (!FIRECRAWL_API_KEY) throw new Error("FIRECRAWL_API_KEY not configured");
|
if (!FIRECRAWL_API_KEY) throw new Error("FIRECRAWL_API_KEY not configured");
|
||||||
|
|
||||||
// ─── A. Parallel: Firecrawl scrape/map + Perplexity search ───
|
// ═══════════════════════════════════════════
|
||||||
|
// STAGE A: Firecrawl scrape + map (parallel)
|
||||||
|
// → Extract clinicName + social links from HTML
|
||||||
|
// ═══════════════════════════════════════════
|
||||||
|
|
||||||
const [scrapeResult, mapResult, brandResult, perplexityResult] = await Promise.allSettled([
|
const [scrapeResult, mapResult, brandResult] = await Promise.allSettled([
|
||||||
// A1. Scrape website — structured JSON + links
|
|
||||||
fetch("https://api.firecrawl.dev/v1/scrape", {
|
fetch("https://api.firecrawl.dev/v1/scrape", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
||||||
|
|
@ -50,11 +102,12 @@ Deno.serve(async (req) => {
|
||||||
url,
|
url,
|
||||||
formats: ["json", "links"],
|
formats: ["json", "links"],
|
||||||
jsonOptions: {
|
jsonOptions: {
|
||||||
prompt: "Extract: clinic name, address, phone, services offered, doctors with specialties, social media links (instagram, youtube, blog, facebook, tiktok, kakao), business hours, slogan",
|
prompt: "Extract: clinic name (Korean), clinic name (English), address, phone, services offered, doctors with specialties, ALL social media links (instagram handles/URLs, youtube channel URL/handle, naver blog URL, facebook page URL, tiktok, kakao channel), business hours, slogan",
|
||||||
schema: {
|
schema: {
|
||||||
type: "object",
|
type: "object",
|
||||||
properties: {
|
properties: {
|
||||||
clinicName: { type: "string" },
|
clinicName: { type: "string" },
|
||||||
|
clinicNameEn: { type: "string" },
|
||||||
address: { type: "string" },
|
address: { type: "string" },
|
||||||
phone: { type: "string" },
|
phone: { type: "string" },
|
||||||
businessHours: { type: "string" },
|
businessHours: { type: "string" },
|
||||||
|
|
@ -69,88 +122,41 @@ Deno.serve(async (req) => {
|
||||||
}),
|
}),
|
||||||
}).then(r => r.json()),
|
}).then(r => r.json()),
|
||||||
|
|
||||||
// A2. Map site — discover all linked pages
|
|
||||||
fetch("https://api.firecrawl.dev/v1/map", {
|
fetch("https://api.firecrawl.dev/v1/map", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
||||||
body: JSON.stringify({ url, limit: 50 }),
|
body: JSON.stringify({ url, limit: 50 }),
|
||||||
}).then(r => r.json()),
|
}).then(r => r.json()),
|
||||||
|
|
||||||
// A3. Branding extraction
|
|
||||||
fetch("https://api.firecrawl.dev/v1/scrape", {
|
fetch("https://api.firecrawl.dev/v1/scrape", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
url,
|
url, formats: ["json"],
|
||||||
formats: ["json"],
|
|
||||||
jsonOptions: {
|
jsonOptions: {
|
||||||
prompt: "Extract brand identity: primary/accent/background/text colors (hex), heading/body fonts, logo URL, favicon URL, tagline",
|
prompt: "Extract brand identity: primary/accent/background/text colors (hex), heading/body fonts, logo URL, favicon URL, tagline",
|
||||||
schema: {
|
schema: { type: "object", properties: { primaryColor: { type: "string" }, accentColor: { type: "string" }, backgroundColor: { type: "string" }, textColor: { type: "string" }, headingFont: { type: "string" }, bodyFont: { type: "string" }, logoUrl: { type: "string" }, faviconUrl: { type: "string" }, tagline: { type: "string" } } },
|
||||||
type: "object",
|
|
||||||
properties: {
|
|
||||||
primaryColor: { type: "string" }, accentColor: { type: "string" },
|
|
||||||
backgroundColor: { type: "string" }, textColor: { type: "string" },
|
|
||||||
headingFont: { type: "string" }, bodyFont: { type: "string" },
|
|
||||||
logoUrl: { type: "string" }, faviconUrl: { type: "string" }, tagline: { type: "string" },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
waitFor: 3000,
|
waitFor: 3000,
|
||||||
}),
|
}),
|
||||||
}).then(r => r.json()).catch(() => ({ data: { json: {} } })),
|
}).then(r => r.json()).catch(() => ({ data: { json: {} } })),
|
||||||
|
|
||||||
// A4. Perplexity — find social handles via web search
|
|
||||||
PERPLEXITY_API_KEY
|
|
||||||
? Promise.allSettled([
|
|
||||||
// Query 1: Social media handles
|
|
||||||
fetch("https://api.perplexity.ai/chat/completions", {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: "sonar",
|
|
||||||
messages: [
|
|
||||||
{ role: "system", content: "You find official social media accounts for Korean medical clinics. Respond ONLY with valid JSON. If unsure, use null. Never guess." },
|
|
||||||
{ role: "user", content: `"${clinicName || url}" 성형외과의 공식 소셜 미디어 계정을 찾아줘. 반드시 확인된 계정만 포함.\n\n{"instagram": ["핸들1", "핸들2"], "youtube": "핸들 또는 URL", "facebook": "페이지명", "tiktok": "핸들", "naverBlog": "블로그ID", "kakao": "채널ID"}` },
|
|
||||||
],
|
|
||||||
temperature: 0.1,
|
|
||||||
}),
|
|
||||||
}).then(r => r.json()),
|
|
||||||
|
|
||||||
// Query 2: Platform presence (강남언니, 네이버, 바비톡)
|
|
||||||
fetch("https://api.perplexity.ai/chat/completions", {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: "sonar",
|
|
||||||
messages: [
|
|
||||||
{ role: "system", content: "You research Korean medical clinic platform presence. Respond ONLY with valid JSON." },
|
|
||||||
{ role: "user", content: `"${clinicName || url}" 성형외과의 강남언니, 네이버 플레이스, 바비톡 등록 현황을 찾아줘.\n\n{"gangnamUnni": {"registered": true/false, "url": "URL 또는 null", "rating": 숫자 또는 null}, "naverPlace": {"registered": true/false, "rating": 숫자 또는 null}, "babitok": {"registered": true/false}}` },
|
|
||||||
],
|
|
||||||
temperature: 0.1,
|
|
||||||
}),
|
|
||||||
}).then(r => r.json()),
|
|
||||||
])
|
|
||||||
: Promise.resolve([]),
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// ─── B. Parse results ───
|
|
||||||
|
|
||||||
const scrapeData = scrapeResult.status === "fulfilled" ? scrapeResult.value : { data: {} };
|
const scrapeData = scrapeResult.status === "fulfilled" ? scrapeResult.value : { data: {} };
|
||||||
const mapData = mapResult.status === "fulfilled" ? mapResult.value : {};
|
const mapData = mapResult.status === "fulfilled" ? mapResult.value : {};
|
||||||
const brandData = brandResult.status === "fulfilled" ? brandResult.value : { data: { json: {} } };
|
const brandData = brandResult.status === "fulfilled" ? brandResult.value : { data: { json: {} } };
|
||||||
|
|
||||||
const clinic = scrapeData.data?.json || {};
|
const clinic = scrapeData.data?.json || {};
|
||||||
const resolvedName = clinicName || clinic.clinicName || url;
|
const resolvedName = inputClinicName || clinic.clinicName || clinic.clinicNameEn || new URL(url).hostname.replace('www.', '').split('.')[0];
|
||||||
const siteLinks: string[] = scrapeData.data?.links || [];
|
const siteLinks: string[] = scrapeData.data?.links || [];
|
||||||
const siteMap: string[] = mapData.links || [];
|
const siteMap: string[] = mapData.links || [];
|
||||||
const allUrls = [...siteLinks, ...siteMap];
|
|
||||||
|
|
||||||
// Source 1: Parse links from HTML
|
// Source 1: Parse links from HTML
|
||||||
const linkHandles = extractSocialLinks(allUrls);
|
const linkHandles = extractSocialLinks([...siteLinks, ...siteMap]);
|
||||||
|
|
||||||
// Source 2: Parse Firecrawl JSON extraction socialMedia field
|
// Source 2: Firecrawl JSON extraction socialMedia field
|
||||||
const scrapeSocial = clinic.socialMedia || {};
|
const scrapeSocial = clinic.socialMedia || {};
|
||||||
const firecrawlHandles: Partial<typeof linkHandles> = {
|
const firecrawlHandles = {
|
||||||
instagram: scrapeSocial.instagram ? [scrapeSocial.instagram] : [],
|
instagram: scrapeSocial.instagram ? [scrapeSocial.instagram] : [],
|
||||||
youtube: scrapeSocial.youtube ? [scrapeSocial.youtube] : [],
|
youtube: scrapeSocial.youtube ? [scrapeSocial.youtube] : [],
|
||||||
facebook: scrapeSocial.facebook ? [scrapeSocial.facebook] : [],
|
facebook: scrapeSocial.facebook ? [scrapeSocial.facebook] : [],
|
||||||
|
|
@ -159,14 +165,46 @@ Deno.serve(async (req) => {
|
||||||
kakao: scrapeSocial.kakao ? [scrapeSocial.kakao] : [],
|
kakao: scrapeSocial.kakao ? [scrapeSocial.kakao] : [],
|
||||||
};
|
};
|
||||||
|
|
||||||
// Source 3: Parse Perplexity results
|
// ═══════════════════════════════════════════
|
||||||
|
// STAGE B: Perplexity search using CLINIC NAME
|
||||||
|
// → Find social handles that Firecrawl missed
|
||||||
|
// ═══════════════════════════════════════════
|
||||||
|
|
||||||
let perplexityHandles: Partial<typeof linkHandles> = {};
|
let perplexityHandles: Partial<typeof linkHandles> = {};
|
||||||
let gangnamUnniHintUrl: string | undefined;
|
let gangnamUnniHintUrl: string | undefined;
|
||||||
|
|
||||||
if (perplexityResult.status === "fulfilled" && Array.isArray(perplexityResult.value)) {
|
if (PERPLEXITY_API_KEY && resolvedName) {
|
||||||
const pResults = perplexityResult.value;
|
const pResults = await Promise.allSettled([
|
||||||
|
// Query 1: Social media accounts — using clinic name, not URL
|
||||||
|
fetch("https://api.perplexity.ai/chat/completions", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "sonar",
|
||||||
|
messages: [
|
||||||
|
{ role: "system", content: "You find official social media accounts for Korean medical clinics. Respond ONLY with valid JSON. If unsure, use null. Never guess or make up handles." },
|
||||||
|
{ role: "user", content: `"${resolvedName}" 성형외과/병원의 공식 소셜 미디어 계정을 찾아줘. 인스타그램 계정이 여러개일 수 있어 (국문용, 영문용 등). 반드시 확인된 계정만 포함.\n\n{"instagram": ["핸들1", "핸들2"], "youtube": "채널 핸들 또는 URL (@ 포함)", "facebook": "페이지명 또는 URL", "tiktok": "핸들", "naverBlog": "블로그ID", "kakao": "채널ID"}` },
|
||||||
|
],
|
||||||
|
temperature: 0.1,
|
||||||
|
}),
|
||||||
|
}).then(r => r.json()),
|
||||||
|
|
||||||
// Social handles query
|
// Query 2: Platform presence — 강남언니, 네이버, 바비톡
|
||||||
|
fetch("https://api.perplexity.ai/chat/completions", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "sonar",
|
||||||
|
messages: [
|
||||||
|
{ role: "system", content: "You research Korean medical clinic platform presence. Respond ONLY with valid JSON." },
|
||||||
|
{ role: "user", content: `"${resolvedName}" 성형외과/병원의 강남언니, 네이버 플레이스, 바비톡 등록 현황을 찾아줘.\n\n{"gangnamUnni": {"registered": true/false, "url": "gangnamunni.com URL 또는 null", "rating": 숫자/10 또는 null}, "naverPlace": {"registered": true/false}, "babitok": {"registered": true/false}}` },
|
||||||
|
],
|
||||||
|
temperature: 0.1,
|
||||||
|
}),
|
||||||
|
}).then(r => r.json()),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Parse social handles
|
||||||
if (pResults[0]?.status === "fulfilled") {
|
if (pResults[0]?.status === "fulfilled") {
|
||||||
try {
|
try {
|
||||||
let text = pResults[0].value?.choices?.[0]?.message?.content || "";
|
let text = pResults[0].value?.choices?.[0]?.message?.content || "";
|
||||||
|
|
@ -181,77 +219,27 @@ Deno.serve(async (req) => {
|
||||||
tiktok: parsed.tiktok ? [parsed.tiktok] : [],
|
tiktok: parsed.tiktok ? [parsed.tiktok] : [],
|
||||||
kakao: parsed.kakao ? [parsed.kakao] : [],
|
kakao: parsed.kakao ? [parsed.kakao] : [],
|
||||||
};
|
};
|
||||||
} catch { /* JSON parse failed — skip */ }
|
} catch { /* JSON parse failed */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Platform presence query
|
// Parse platform presence
|
||||||
if (pResults[1]?.status === "fulfilled") {
|
if (pResults[1]?.status === "fulfilled") {
|
||||||
try {
|
try {
|
||||||
let text = pResults[1].value?.choices?.[0]?.message?.content || "";
|
let text = pResults[1].value?.choices?.[0]?.message?.content || "";
|
||||||
const jsonMatch = text.match(/```(?:json)?\n?([\s\S]*?)```/);
|
const jsonMatch = text.match(/```(?:json)?\n?([\s\S]*?)```/);
|
||||||
if (jsonMatch) text = jsonMatch[1];
|
if (jsonMatch) text = jsonMatch[1];
|
||||||
const parsed = JSON.parse(text);
|
const parsed = JSON.parse(text);
|
||||||
if (parsed.gangnamUnni?.url) {
|
if (parsed.gangnamUnni?.url) gangnamUnniHintUrl = parsed.gangnamUnni.url;
|
||||||
gangnamUnniHintUrl = parsed.gangnamUnni.url;
|
} catch { /* JSON parse failed */ }
|
||||||
}
|
|
||||||
} catch { /* JSON parse failed — skip */ }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── C. Merge + Deduplicate + Verify ───
|
// ═══════════════════════════════════════════
|
||||||
|
// STAGE C: Merge + Deduplicate + Verify
|
||||||
|
// ═══════════════════════════════════════════
|
||||||
|
|
||||||
const merged = mergeSocialLinks(linkHandles, firecrawlHandles, perplexityHandles);
|
const merged = mergeSocialLinks(linkHandles, firecrawlHandles, perplexityHandles);
|
||||||
|
|
||||||
// Robust handle extraction — handles may be full URLs, @handles, or bare usernames
|
|
||||||
function extractHandle(raw: string, platform: string): string | null {
|
|
||||||
if (!raw || raw.length < 2) return null;
|
|
||||||
let h = raw.trim();
|
|
||||||
|
|
||||||
// Platform-specific URL extraction
|
|
||||||
if (platform === 'instagram') {
|
|
||||||
const m = h.match(/instagram\.com\/([a-zA-Z0-9._]+)/);
|
|
||||||
if (m) return m[1];
|
|
||||||
h = h.replace(/^@/, '').replace(/\/$/, '');
|
|
||||||
if (/^[a-zA-Z0-9._]+$/.test(h) && h.length >= 2) return h;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (platform === 'youtube') {
|
|
||||||
const m = h.match(/youtube\.com\/(?:@([a-zA-Z0-9._-]+)|channel\/(UC[a-zA-Z0-9_-]+)|c\/([a-zA-Z0-9._-]+))/);
|
|
||||||
if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null;
|
|
||||||
h = h.replace(/^@/, '');
|
|
||||||
// Reject if it looks like a non-YouTube URL
|
|
||||||
if (h.includes('http') || h.includes('/') || h.includes('.com')) return null;
|
|
||||||
// Channel IDs start with UC — don't add @ prefix
|
|
||||||
if (/^UC[a-zA-Z0-9_-]{20,}$/.test(h)) return h;
|
|
||||||
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (platform === 'facebook') {
|
|
||||||
const m = h.match(/facebook\.com\/([a-zA-Z0-9._-]+)/);
|
|
||||||
if (m) return m[1];
|
|
||||||
h = h.replace(/^@/, '').replace(/\/$/, '');
|
|
||||||
if (h.includes('http') || h.includes('/')) return null;
|
|
||||||
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (platform === 'naverBlog') {
|
|
||||||
const m = h.match(/blog\.naver\.com\/([a-zA-Z0-9_-]+)/);
|
|
||||||
if (m) return m[1];
|
|
||||||
if (h.includes('http') || h.includes('/')) return null;
|
|
||||||
if (/^[a-zA-Z0-9_-]+$/.test(h) && h.length >= 2) return h;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (platform === 'tiktok') {
|
|
||||||
const m = h.match(/tiktok\.com\/@([a-zA-Z0-9._-]+)/);
|
|
||||||
if (m) return m[1];
|
|
||||||
h = h.replace(/^@/, '');
|
|
||||||
if (h.includes('http') || h.includes('/')) return null;
|
|
||||||
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return h;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cleanHandles = {
|
const cleanHandles = {
|
||||||
instagram: merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null),
|
instagram: merged.instagram.map(h => extractHandle(h, 'instagram')).filter((h): h is string => h !== null),
|
||||||
youtube: merged.youtube.map(h => extractHandle(h, 'youtube')).filter((h): h is string => h !== null),
|
youtube: merged.youtube.map(h => extractHandle(h, 'youtube')).filter((h): h is string => h !== null),
|
||||||
|
|
@ -261,31 +249,27 @@ Deno.serve(async (req) => {
|
||||||
};
|
};
|
||||||
|
|
||||||
const verified: VerifiedChannels = await verifyAllHandles(
|
const verified: VerifiedChannels = await verifyAllHandles(
|
||||||
cleanHandles,
|
cleanHandles, resolvedName, gangnamUnniHintUrl,
|
||||||
resolvedName,
|
|
||||||
gangnamUnniHintUrl,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// ─── D. Save to DB ───
|
// ═══════════════════════════════════════════
|
||||||
|
// Save to DB
|
||||||
|
// ═══════════════════════════════════════════
|
||||||
|
|
||||||
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
||||||
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
||||||
const supabase = createClient(supabaseUrl, supabaseKey);
|
const supabase = createClient(supabaseUrl, supabaseKey);
|
||||||
|
|
||||||
const scrapeDataFull = {
|
const scrapeDataFull = {
|
||||||
clinic,
|
clinic, branding: brandData.data?.json || {},
|
||||||
branding: brandData.data?.json || {},
|
siteLinks, siteMap: mapData.links || [],
|
||||||
siteLinks,
|
sourceUrl: url, scrapedAt: new Date().toISOString(),
|
||||||
siteMap: mapData.links || [],
|
|
||||||
sourceUrl: url,
|
|
||||||
scrapedAt: new Date().toISOString(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const { data: saved, error: saveError } = await supabase
|
const { data: saved, error: saveError } = await supabase
|
||||||
.from("marketing_reports")
|
.from("marketing_reports")
|
||||||
.insert({
|
.insert({
|
||||||
url,
|
url, clinic_name: resolvedName,
|
||||||
clinic_name: resolvedName,
|
|
||||||
status: "discovered",
|
status: "discovered",
|
||||||
verified_channels: verified,
|
verified_channels: verified,
|
||||||
scrape_data: scrapeDataFull,
|
scrape_data: scrapeDataFull,
|
||||||
|
|
@ -299,8 +283,7 @@ Deno.serve(async (req) => {
|
||||||
|
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
success: true,
|
success: true, reportId: saved.id,
|
||||||
reportId: saved.id,
|
|
||||||
clinicName: resolvedName,
|
clinicName: resolvedName,
|
||||||
verifiedChannels: verified,
|
verifiedChannels: verified,
|
||||||
address: clinic.address || "",
|
address: clinic.address || "",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue