diff --git a/app/integrations/firecrawl.py b/app/integrations/firecrawl.py index f83db21..40a1066 100644 --- a/app/integrations/firecrawl.py +++ b/app/integrations/firecrawl.py @@ -158,13 +158,14 @@ class FirecrawlClient: "url": hospital_url, "formats": ["json"], "jsonOptions": { - "prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges", + "prompt": "Extract: hospital name, overall rating (out of 10), total review count, number of major staffs, all doctor with names/ratings/review counts/specialties(please check html, there are not only 4 doctors!), procedures, address, badges.", "schema": { "type": "object", "properties": { "hospitalName": {"type": "string"}, "rating": {"type": "number"}, "totalReviews": {"type": "number"}, + "totalMajorStaffs" : {"type" : "number"}, "doctors": { "type": "array", "items": { @@ -202,7 +203,8 @@ class FirecrawlClient: "rating": raw.get("rating"), "ratingScale": "/10", "totalReviews": raw.get("totalReviews", 0), - "doctors": (raw.get("doctors") or [])[:10], + "doctors": (raw.get("doctors") or []), + "totalMajorStaffs": raw.get("totalMajorStaffs", 0), "procedures": raw.get("procedures", []), "address": raw.get("address", ""), "badges": raw.get("badges", []), diff --git a/app/services/analysis.py b/app/services/analysis.py index 0a2450e..9f22922 100644 --- a/app/services/analysis.py +++ b/app/services/analysis.py @@ -132,11 +132,47 @@ async def _build_overrides(analysis_run_id: str) -> dict: if instagram.get("bio"): ig_patch["bio"] = instagram["bio"] if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/" + # ── facebook ────────────────────────────────────────────────────────────── + fb_patch: dict = {} + if facebook.get("pageUrl"): fb_patch["url"] = facebook["pageUrl"] + if facebook.get("pageUrl"): fb_patch["link"] = facebook["pageUrl"] + if facebook.get("pageName"): fb_patch["page_name"] = facebook["pageName"] + if facebook.get("followers"): fb_patch["followers"] = facebook["followers"] + if facebook.get("intro"): fb_patch["bio"] = facebook["intro"] + if facebook.get("categories"): fb_patch["category"] = ", ".join(facebook["categories"]) + if facebook.get("website"): fb_patch["linked_domain"] = facebook["website"] + + # ── youtube ─────────────────────────────────────────────────────────────── + yt_patch: dict = {} + if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"] + if youtube.get("handle"): yt_patch["handle"] = youtube["handle"] + if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"] + if youtube.get("totalVideos"): yt_patch["total_videos"] = youtube["totalVideos"] + if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"] + if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10] + if youtube.get("description"): yt_patch["channel_description"] = youtube["description"] + if youtube.get("publishedAt"): snapshot["established"] = youtube["publishedAt"][:4] + if youtube.get("videos"): + yt_patch["top_videos"] = [ + { + "title": v["title"], + "views": v["views"], + "duration": v.get("duration"), + "type": "Short" if "M" not in v.get("duration", "") else "Long", + "uploaded_ago": v.get("date", "")[:10], + } + for v in youtube["videos"] + ] + overrides: dict = {} if snapshot: overrides["clinic_snapshot"] = snapshot if ig_patch: overrides["instagram_audit"] = {"accounts": [ig_patch]} + if fb_patch: + overrides["facebook_audit"] = {"pages": [fb_patch]} + if yt_patch: + overrides["youtube_audit"] = yt_patch return overrides