의료진 수 firecrawl 해킹, 만약 필요하다면 직접 스크래핑으로 해결 필요
parent
09bb7a71ee
commit
e8406dc0ee
|
|
@ -158,13 +158,14 @@ class FirecrawlClient:
|
|||
"url": hospital_url,
|
||||
"formats": ["json"],
|
||||
"jsonOptions": {
|
||||
"prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges",
|
||||
"prompt": "Extract: hospital name, overall rating (out of 10), total review count, number of major staffs, all doctor with names/ratings/review counts/specialties(please check html, there are not only 4 doctors!), procedures, address, badges.",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hospitalName": {"type": "string"},
|
||||
"rating": {"type": "number"},
|
||||
"totalReviews": {"type": "number"},
|
||||
"totalMajorStaffs" : {"type" : "number"},
|
||||
"doctors": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -202,7 +203,8 @@ class FirecrawlClient:
|
|||
"rating": raw.get("rating"),
|
||||
"ratingScale": "/10",
|
||||
"totalReviews": raw.get("totalReviews", 0),
|
||||
"doctors": (raw.get("doctors") or [])[:10],
|
||||
"doctors": (raw.get("doctors") or []),
|
||||
"totalMajorStaffs": raw.get("totalMajorStaffs", 0),
|
||||
"procedures": raw.get("procedures", []),
|
||||
"address": raw.get("address", ""),
|
||||
"badges": raw.get("badges", []),
|
||||
|
|
|
|||
|
|
@ -132,11 +132,47 @@ async def _build_overrides(analysis_run_id: str) -> dict:
|
|||
if instagram.get("bio"): ig_patch["bio"] = instagram["bio"]
|
||||
if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/"
|
||||
|
||||
# ── facebook ──────────────────────────────────────────────────────────────
|
||||
fb_patch: dict = {}
|
||||
if facebook.get("pageUrl"): fb_patch["url"] = facebook["pageUrl"]
|
||||
if facebook.get("pageUrl"): fb_patch["link"] = facebook["pageUrl"]
|
||||
if facebook.get("pageName"): fb_patch["page_name"] = facebook["pageName"]
|
||||
if facebook.get("followers"): fb_patch["followers"] = facebook["followers"]
|
||||
if facebook.get("intro"): fb_patch["bio"] = facebook["intro"]
|
||||
if facebook.get("categories"): fb_patch["category"] = ", ".join(facebook["categories"])
|
||||
if facebook.get("website"): fb_patch["linked_domain"] = facebook["website"]
|
||||
|
||||
# ── youtube ───────────────────────────────────────────────────────────────
|
||||
yt_patch: dict = {}
|
||||
if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"]
|
||||
if youtube.get("handle"): yt_patch["handle"] = youtube["handle"]
|
||||
if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"]
|
||||
if youtube.get("totalVideos"): yt_patch["total_videos"] = youtube["totalVideos"]
|
||||
if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"]
|
||||
if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10]
|
||||
if youtube.get("description"): yt_patch["channel_description"] = youtube["description"]
|
||||
if youtube.get("publishedAt"): snapshot["established"] = youtube["publishedAt"][:4]
|
||||
if youtube.get("videos"):
|
||||
yt_patch["top_videos"] = [
|
||||
{
|
||||
"title": v["title"],
|
||||
"views": v["views"],
|
||||
"duration": v.get("duration"),
|
||||
"type": "Short" if "M" not in v.get("duration", "") else "Long",
|
||||
"uploaded_ago": v.get("date", "")[:10],
|
||||
}
|
||||
for v in youtube["videos"]
|
||||
]
|
||||
|
||||
overrides: dict = {}
|
||||
if snapshot:
|
||||
overrides["clinic_snapshot"] = snapshot
|
||||
if ig_patch:
|
||||
overrides["instagram_audit"] = {"accounts": [ig_patch]}
|
||||
if fb_patch:
|
||||
overrides["facebook_audit"] = {"pages": [fb_patch]}
|
||||
if yt_patch:
|
||||
overrides["youtube_audit"] = yt_patch
|
||||
return overrides
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue