feat: 강남언니 real-time data collection via Firecrawl scraping
- enrich-channels: add 강남언니 scraping module (search + structured JSON extraction) - Collects: rating/10, reviews, doctors with ratings, procedures, certifications - transformReport: merge 강남언니 data into clinicSnapshot + otherChannels - Updates lead doctor info, certifications, and review counts from real data Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>claude/bold-hawking
parent
e5399486f7
commit
cf482d1bd7
|
|
@ -399,6 +399,17 @@ export interface EnrichmentData {
|
||||||
thumbnail?: string;
|
thumbnail?: string;
|
||||||
}[];
|
}[];
|
||||||
};
|
};
|
||||||
|
gangnamUnni?: {
|
||||||
|
name?: string;
|
||||||
|
rating?: number;
|
||||||
|
ratingScale?: string;
|
||||||
|
totalReviews?: number;
|
||||||
|
doctors?: { name?: string; rating?: number; reviews?: number; specialty?: string }[];
|
||||||
|
procedures?: string[];
|
||||||
|
address?: string;
|
||||||
|
badges?: string[];
|
||||||
|
sourceUrl?: string;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -533,5 +544,50 @@ export function mergeEnrichment(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 강남언니 enrichment
|
||||||
|
if (enrichment.gangnamUnni) {
|
||||||
|
const gu = enrichment.gangnamUnni;
|
||||||
|
|
||||||
|
// Update clinic snapshot with real gangnamUnni data
|
||||||
|
if (gu.rating) {
|
||||||
|
merged.clinicSnapshot = {
|
||||||
|
...merged.clinicSnapshot,
|
||||||
|
overallRating: gu.rating,
|
||||||
|
totalReviews: gu.totalReviews ?? merged.clinicSnapshot.totalReviews,
|
||||||
|
certifications: gu.badges?.length ? gu.badges : merged.clinicSnapshot.certifications,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update lead doctor with gangnamUnni doctor data
|
||||||
|
if (gu.doctors?.length) {
|
||||||
|
const topDoctor = gu.doctors[0];
|
||||||
|
if (topDoctor?.name) {
|
||||||
|
merged.clinicSnapshot = {
|
||||||
|
...merged.clinicSnapshot,
|
||||||
|
leadDoctor: {
|
||||||
|
name: topDoctor.name,
|
||||||
|
credentials: topDoctor.specialty || merged.clinicSnapshot.leadDoctor.credentials,
|
||||||
|
rating: topDoctor.rating ?? 0,
|
||||||
|
reviewCount: topDoctor.reviews ?? 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update gangnamUnni channel in otherChannels
|
||||||
|
const guChannelIdx = merged.otherChannels.findIndex(c => c.name === '강남언니');
|
||||||
|
const guChannel = {
|
||||||
|
name: '강남언니',
|
||||||
|
status: 'active' as const,
|
||||||
|
details: `평점: ${gu.rating ?? '-'}${gu.ratingScale || '/10'} / 리뷰: ${gu.totalReviews?.toLocaleString() ?? '-'}건`,
|
||||||
|
url: gu.sourceUrl || '',
|
||||||
|
};
|
||||||
|
if (guChannelIdx >= 0) {
|
||||||
|
merged.otherChannels[guChannelIdx] = guChannel;
|
||||||
|
} else {
|
||||||
|
merged.otherChannels = [...merged.otherChannels, guChannel];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -160,7 +160,92 @@ Deno.serve(async (req) => {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. YouTube Channel (using YouTube Data API v3)
|
// 3. 강남언니 (Gangnam Unni) — Firecrawl scraping
|
||||||
|
if (clinicName) {
|
||||||
|
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
|
||||||
|
if (FIRECRAWL_API_KEY) {
|
||||||
|
tasks.push(
|
||||||
|
(async () => {
|
||||||
|
// Step 1: Search for the clinic's gangnamunni page
|
||||||
|
const searchRes = await fetch("https://api.firecrawl.dev/v1/search", {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: `${clinicName} site:gangnamunni.com`,
|
||||||
|
limit: 3,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
const searchData = await searchRes.json();
|
||||||
|
const hospitalUrl = (searchData.data || [])
|
||||||
|
.map((r: Record<string, string>) => r.url)
|
||||||
|
.find((u: string) => u?.includes("gangnamunni.com/hospitals/"));
|
||||||
|
|
||||||
|
if (!hospitalUrl) return;
|
||||||
|
|
||||||
|
// Step 2: Scrape the hospital page with structured JSON extraction
|
||||||
|
const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
url: hospitalUrl,
|
||||||
|
formats: ["json"],
|
||||||
|
jsonOptions: {
|
||||||
|
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
|
||||||
|
schema: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
hospitalName: { type: "string" },
|
||||||
|
rating: { type: "number" },
|
||||||
|
totalReviews: { type: "number" },
|
||||||
|
doctors: {
|
||||||
|
type: "array",
|
||||||
|
items: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
name: { type: "string" },
|
||||||
|
rating: { type: "number" },
|
||||||
|
reviews: { type: "number" },
|
||||||
|
specialty: { type: "string" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
procedures: { type: "array", items: { type: "string" } },
|
||||||
|
address: { type: "string" },
|
||||||
|
badges: { type: "array", items: { type: "string" } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
waitFor: 5000,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
const scrapeData = await scrapeRes.json();
|
||||||
|
const hospital = scrapeData.data?.json;
|
||||||
|
|
||||||
|
if (hospital?.hospitalName) {
|
||||||
|
enrichment.gangnamUnni = {
|
||||||
|
name: hospital.hospitalName,
|
||||||
|
rating: hospital.rating,
|
||||||
|
ratingScale: "/10",
|
||||||
|
totalReviews: hospital.totalReviews,
|
||||||
|
doctors: (hospital.doctors || []).slice(0, 10),
|
||||||
|
procedures: hospital.procedures || [],
|
||||||
|
address: hospital.address,
|
||||||
|
badges: hospital.badges || [],
|
||||||
|
sourceUrl: hospitalUrl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. YouTube Channel (using YouTube Data API v3)
|
||||||
if (youtubeChannelId) {
|
if (youtubeChannelId) {
|
||||||
const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
|
const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
|
||||||
if (YOUTUBE_API_KEY) {
|
if (YOUTUBE_API_KEY) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue