feat: 강남언니 real-time data collection via Firecrawl scraping

- enrich-channels: add 강남언니 scraping module (search + structured JSON extraction)
- Collects: rating/10, reviews, doctors with ratings, procedures, certifications
- transformReport: merge 강남언니 data into clinicSnapshot + otherChannels
- Updates lead doctor info, certifications, and review counts from real data

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
claude/bold-hawking
Haewon Kam 2026-04-03 14:51:47 +09:00
parent e5399486f7
commit cf482d1bd7
2 changed files with 142 additions and 1 deletions

View File

@ -399,6 +399,17 @@ export interface EnrichmentData {
thumbnail?: string; thumbnail?: string;
}[]; }[];
}; };
gangnamUnni?: {
name?: string;
rating?: number;
ratingScale?: string;
totalReviews?: number;
doctors?: { name?: string; rating?: number; reviews?: number; specialty?: string }[];
procedures?: string[];
address?: string;
badges?: string[];
sourceUrl?: string;
};
} }
/** /**
@ -533,5 +544,50 @@ export function mergeEnrichment(
} }
} }
// 강남언니 enrichment
if (enrichment.gangnamUnni) {
const gu = enrichment.gangnamUnni;
// Update clinic snapshot with real gangnamUnni data
if (gu.rating) {
merged.clinicSnapshot = {
...merged.clinicSnapshot,
overallRating: gu.rating,
totalReviews: gu.totalReviews ?? merged.clinicSnapshot.totalReviews,
certifications: gu.badges?.length ? gu.badges : merged.clinicSnapshot.certifications,
};
}
// Update lead doctor with gangnamUnni doctor data
if (gu.doctors?.length) {
const topDoctor = gu.doctors[0];
if (topDoctor?.name) {
merged.clinicSnapshot = {
...merged.clinicSnapshot,
leadDoctor: {
name: topDoctor.name,
credentials: topDoctor.specialty || merged.clinicSnapshot.leadDoctor.credentials,
rating: topDoctor.rating ?? 0,
reviewCount: topDoctor.reviews ?? 0,
},
};
}
}
// Update gangnamUnni channel in otherChannels
const guChannelIdx = merged.otherChannels.findIndex(c => c.name === '강남언니');
const guChannel = {
name: '강남언니',
status: 'active' as const,
details: `평점: ${gu.rating ?? '-'}${gu.ratingScale || '/10'} / 리뷰: ${gu.totalReviews?.toLocaleString() ?? '-'}`,
url: gu.sourceUrl || '',
};
if (guChannelIdx >= 0) {
merged.otherChannels[guChannelIdx] = guChannel;
} else {
merged.otherChannels = [...merged.otherChannels, guChannel];
}
}
return merged; return merged;
} }

View File

@ -160,7 +160,92 @@ Deno.serve(async (req) => {
); );
} }
// 3. YouTube Channel (using YouTube Data API v3) // 3. 강남언니 (Gangnam Unni) — Firecrawl scraping
if (clinicName) {
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
if (FIRECRAWL_API_KEY) {
tasks.push(
(async () => {
// Step 1: Search for the clinic's gangnamunni page
const searchRes = await fetch("https://api.firecrawl.dev/v1/search", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
query: `${clinicName} site:gangnamunni.com`,
limit: 3,
}),
});
const searchData = await searchRes.json();
const hospitalUrl = (searchData.data || [])
.map((r: Record<string, string>) => r.url)
.find((u: string) => u?.includes("gangnamunni.com/hospitals/"));
if (!hospitalUrl) return;
// Step 2: Scrape the hospital page with structured JSON extraction
const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
url: hospitalUrl,
formats: ["json"],
jsonOptions: {
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
schema: {
type: "object",
properties: {
hospitalName: { type: "string" },
rating: { type: "number" },
totalReviews: { type: "number" },
doctors: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
rating: { type: "number" },
reviews: { type: "number" },
specialty: { type: "string" },
},
},
},
procedures: { type: "array", items: { type: "string" } },
address: { type: "string" },
badges: { type: "array", items: { type: "string" } },
},
},
},
waitFor: 5000,
}),
});
const scrapeData = await scrapeRes.json();
const hospital = scrapeData.data?.json;
if (hospital?.hospitalName) {
enrichment.gangnamUnni = {
name: hospital.hospitalName,
rating: hospital.rating,
ratingScale: "/10",
totalReviews: hospital.totalReviews,
doctors: (hospital.doctors || []).slice(0, 10),
procedures: hospital.procedures || [],
address: hospital.address,
badges: hospital.badges || [],
sourceUrl: hospitalUrl,
};
}
})()
);
}
}
// 4. YouTube Channel (using YouTube Data API v3)
if (youtubeChannelId) { if (youtubeChannelId) {
const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY"); const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
if (YOUTUBE_API_KEY) { if (YOUTUBE_API_KEY) {