feat: 강남언니 real-time data collection via Firecrawl scraping
- enrich-channels: add 강남언니 scraping module (search + structured JSON extraction) - Collects: rating/10, reviews, doctors with ratings, procedures, certifications - transformReport: merge 강남언니 data into clinicSnapshot + otherChannels - Updates lead doctor info, certifications, and review counts from real data Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>claude/bold-hawking
parent
e5399486f7
commit
cf482d1bd7
|
|
@ -399,6 +399,17 @@ export interface EnrichmentData {
|
|||
thumbnail?: string;
|
||||
}[];
|
||||
};
|
||||
gangnamUnni?: {
|
||||
name?: string;
|
||||
rating?: number;
|
||||
ratingScale?: string;
|
||||
totalReviews?: number;
|
||||
doctors?: { name?: string; rating?: number; reviews?: number; specialty?: string }[];
|
||||
procedures?: string[];
|
||||
address?: string;
|
||||
badges?: string[];
|
||||
sourceUrl?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -533,5 +544,50 @@ export function mergeEnrichment(
|
|||
}
|
||||
}
|
||||
|
||||
// 강남언니 enrichment
|
||||
if (enrichment.gangnamUnni) {
|
||||
const gu = enrichment.gangnamUnni;
|
||||
|
||||
// Update clinic snapshot with real gangnamUnni data
|
||||
if (gu.rating) {
|
||||
merged.clinicSnapshot = {
|
||||
...merged.clinicSnapshot,
|
||||
overallRating: gu.rating,
|
||||
totalReviews: gu.totalReviews ?? merged.clinicSnapshot.totalReviews,
|
||||
certifications: gu.badges?.length ? gu.badges : merged.clinicSnapshot.certifications,
|
||||
};
|
||||
}
|
||||
|
||||
// Update lead doctor with gangnamUnni doctor data
|
||||
if (gu.doctors?.length) {
|
||||
const topDoctor = gu.doctors[0];
|
||||
if (topDoctor?.name) {
|
||||
merged.clinicSnapshot = {
|
||||
...merged.clinicSnapshot,
|
||||
leadDoctor: {
|
||||
name: topDoctor.name,
|
||||
credentials: topDoctor.specialty || merged.clinicSnapshot.leadDoctor.credentials,
|
||||
rating: topDoctor.rating ?? 0,
|
||||
reviewCount: topDoctor.reviews ?? 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Update gangnamUnni channel in otherChannels
|
||||
const guChannelIdx = merged.otherChannels.findIndex(c => c.name === '강남언니');
|
||||
const guChannel = {
|
||||
name: '강남언니',
|
||||
status: 'active' as const,
|
||||
details: `평점: ${gu.rating ?? '-'}${gu.ratingScale || '/10'} / 리뷰: ${gu.totalReviews?.toLocaleString() ?? '-'}건`,
|
||||
url: gu.sourceUrl || '',
|
||||
};
|
||||
if (guChannelIdx >= 0) {
|
||||
merged.otherChannels[guChannelIdx] = guChannel;
|
||||
} else {
|
||||
merged.otherChannels = [...merged.otherChannels, guChannel];
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -160,7 +160,92 @@ Deno.serve(async (req) => {
|
|||
);
|
||||
}
|
||||
|
||||
// 3. YouTube Channel (using YouTube Data API v3)
|
||||
// 3. 강남언니 (Gangnam Unni) — Firecrawl scraping
|
||||
if (clinicName) {
|
||||
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
|
||||
if (FIRECRAWL_API_KEY) {
|
||||
tasks.push(
|
||||
(async () => {
|
||||
// Step 1: Search for the clinic's gangnamunni page
|
||||
const searchRes = await fetch("https://api.firecrawl.dev/v1/search", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
query: `${clinicName} site:gangnamunni.com`,
|
||||
limit: 3,
|
||||
}),
|
||||
});
|
||||
const searchData = await searchRes.json();
|
||||
const hospitalUrl = (searchData.data || [])
|
||||
.map((r: Record<string, string>) => r.url)
|
||||
.find((u: string) => u?.includes("gangnamunni.com/hospitals/"));
|
||||
|
||||
if (!hospitalUrl) return;
|
||||
|
||||
// Step 2: Scrape the hospital page with structured JSON extraction
|
||||
const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
url: hospitalUrl,
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
hospitalName: { type: "string" },
|
||||
rating: { type: "number" },
|
||||
totalReviews: { type: "number" },
|
||||
doctors: {
|
||||
type: "array",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
name: { type: "string" },
|
||||
rating: { type: "number" },
|
||||
reviews: { type: "number" },
|
||||
specialty: { type: "string" },
|
||||
},
|
||||
},
|
||||
},
|
||||
procedures: { type: "array", items: { type: "string" } },
|
||||
address: { type: "string" },
|
||||
badges: { type: "array", items: { type: "string" } },
|
||||
},
|
||||
},
|
||||
},
|
||||
waitFor: 5000,
|
||||
}),
|
||||
});
|
||||
const scrapeData = await scrapeRes.json();
|
||||
const hospital = scrapeData.data?.json;
|
||||
|
||||
if (hospital?.hospitalName) {
|
||||
enrichment.gangnamUnni = {
|
||||
name: hospital.hospitalName,
|
||||
rating: hospital.rating,
|
||||
ratingScale: "/10",
|
||||
totalReviews: hospital.totalReviews,
|
||||
doctors: (hospital.doctors || []).slice(0, 10),
|
||||
procedures: hospital.procedures || [],
|
||||
address: hospital.address,
|
||||
badges: hospital.badges || [],
|
||||
sourceUrl: hospitalUrl,
|
||||
};
|
||||
}
|
||||
})()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. YouTube Channel (using YouTube Data API v3)
|
||||
if (youtubeChannelId) {
|
||||
const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
|
||||
if (YOUTUBE_API_KEY) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue