의료진 수 firecrawl 해킹, 만약 필요하다면 직접 스크래핑으로 해결 필요

insta-data
jaehwang 2026-05-20 17:58:58 +09:00
parent 09bb7a71ee
commit e8406dc0ee
2 changed files with 40 additions and 2 deletions

View File

@ -158,13 +158,14 @@ class FirecrawlClient:
"url": hospital_url,
"formats": ["json"],
"jsonOptions": {
"prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges",
"prompt": "Extract: hospital name, overall rating (out of 10), total review count, number of major staffs, all doctor with names/ratings/review counts/specialties(please check html, there are not only 4 doctors!), procedures, address, badges.",
"schema": {
"type": "object",
"properties": {
"hospitalName": {"type": "string"},
"rating": {"type": "number"},
"totalReviews": {"type": "number"},
"totalMajorStaffs" : {"type" : "number"},
"doctors": {
"type": "array",
"items": {
@ -202,7 +203,8 @@ class FirecrawlClient:
"rating": raw.get("rating"),
"ratingScale": "/10",
"totalReviews": raw.get("totalReviews", 0),
"doctors": (raw.get("doctors") or [])[:10],
"doctors": (raw.get("doctors") or []),
"totalMajorStaffs": raw.get("totalMajorStaffs", 0),
"procedures": raw.get("procedures", []),
"address": raw.get("address", ""),
"badges": raw.get("badges", []),

View File

@ -132,11 +132,47 @@ async def _build_overrides(analysis_run_id: str) -> dict:
if instagram.get("bio"): ig_patch["bio"] = instagram["bio"]
if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/"
# ── facebook ──────────────────────────────────────────────────────────────
fb_patch: dict = {}
if facebook.get("pageUrl"): fb_patch["url"] = facebook["pageUrl"]
if facebook.get("pageUrl"): fb_patch["link"] = facebook["pageUrl"]
if facebook.get("pageName"): fb_patch["page_name"] = facebook["pageName"]
if facebook.get("followers"): fb_patch["followers"] = facebook["followers"]
if facebook.get("intro"): fb_patch["bio"] = facebook["intro"]
if facebook.get("categories"): fb_patch["category"] = ", ".join(facebook["categories"])
if facebook.get("website"): fb_patch["linked_domain"] = facebook["website"]
# ── youtube ───────────────────────────────────────────────────────────────
yt_patch: dict = {}
if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"]
if youtube.get("handle"): yt_patch["handle"] = youtube["handle"]
if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"]
if youtube.get("totalVideos"): yt_patch["total_videos"] = youtube["totalVideos"]
if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"]
if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10]
if youtube.get("description"): yt_patch["channel_description"] = youtube["description"]
if youtube.get("publishedAt"): snapshot["established"] = youtube["publishedAt"][:4]
if youtube.get("videos"):
yt_patch["top_videos"] = [
{
"title": v["title"],
"views": v["views"],
"duration": v.get("duration"),
"type": "Short" if "M" not in v.get("duration", "") else "Long",
"uploaded_ago": v.get("date", "")[:10],
}
for v in youtube["videos"]
]
overrides: dict = {}
if snapshot:
overrides["clinic_snapshot"] = snapshot
if ig_patch:
overrides["instagram_audit"] = {"accounts": [ig_patch]}
if fb_patch:
overrides["facebook_audit"] = {"pages": [fb_patch]}
if yt_patch:
overrides["youtube_audit"] = yt_patch
return overrides