From e8406dc0ee1b0d9037a3a0b0f9a6e92107cadf03 Mon Sep 17 00:00:00 2001 From: jaehwang Date: Wed, 20 May 2026 17:58:58 +0900 Subject: [PATCH] =?UTF-8?q?=EC=9D=98=EB=A3=8C=EC=A7=84=20=EC=88=98=20firec?= =?UTF-8?q?rawl=20=ED=95=B4=ED=82=B9,=20=EB=A7=8C=EC=95=BD=20=ED=95=84?= =?UTF-8?q?=EC=9A=94=ED=95=98=EB=8B=A4=EB=A9=B4=20=EC=A7=81=EC=A0=91=20?= =?UTF-8?q?=EC=8A=A4=ED=81=AC=EB=9E=98=ED=95=91=EC=9C=BC=EB=A1=9C=20?= =?UTF-8?q?=ED=95=B4=EA=B2=B0=20=ED=95=84=EC=9A=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/integrations/firecrawl.py | 6 ++++-- app/services/analysis.py | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/app/integrations/firecrawl.py b/app/integrations/firecrawl.py index f83db21..40a1066 100644 --- a/app/integrations/firecrawl.py +++ b/app/integrations/firecrawl.py @@ -158,13 +158,14 @@ class FirecrawlClient: "url": hospital_url, "formats": ["json"], "jsonOptions": { - "prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges", + "prompt": "Extract: hospital name, overall rating (out of 10), total review count, number of major staffs, all doctor with names/ratings/review counts/specialties(please check html, there are not only 4 doctors!), procedures, address, badges.", "schema": { "type": "object", "properties": { "hospitalName": {"type": "string"}, "rating": {"type": "number"}, "totalReviews": {"type": "number"}, + "totalMajorStaffs" : {"type" : "number"}, "doctors": { "type": "array", "items": { @@ -202,7 +203,8 @@ class FirecrawlClient: "rating": raw.get("rating"), "ratingScale": "/10", "totalReviews": raw.get("totalReviews", 0), - "doctors": (raw.get("doctors") or [])[:10], + "doctors": (raw.get("doctors") or []), + "totalMajorStaffs": raw.get("totalMajorStaffs", 0), "procedures": raw.get("procedures", []), "address": raw.get("address", ""), "badges": raw.get("badges", []), diff --git a/app/services/analysis.py b/app/services/analysis.py index 0a2450e..9f22922 100644 --- a/app/services/analysis.py +++ b/app/services/analysis.py @@ -132,11 +132,47 @@ async def _build_overrides(analysis_run_id: str) -> dict: if instagram.get("bio"): ig_patch["bio"] = instagram["bio"] if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/" + # ── facebook ────────────────────────────────────────────────────────────── + fb_patch: dict = {} + if facebook.get("pageUrl"): fb_patch["url"] = facebook["pageUrl"] + if facebook.get("pageUrl"): fb_patch["link"] = facebook["pageUrl"] + if facebook.get("pageName"): fb_patch["page_name"] = facebook["pageName"] + if facebook.get("followers"): fb_patch["followers"] = facebook["followers"] + if facebook.get("intro"): fb_patch["bio"] = facebook["intro"] + if facebook.get("categories"): fb_patch["category"] = ", ".join(facebook["categories"]) + if facebook.get("website"): fb_patch["linked_domain"] = facebook["website"] + + # ── youtube ─────────────────────────────────────────────────────────────── + yt_patch: dict = {} + if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"] + if youtube.get("handle"): yt_patch["handle"] = youtube["handle"] + if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"] + if youtube.get("totalVideos"): yt_patch["total_videos"] = youtube["totalVideos"] + if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"] + if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10] + if youtube.get("description"): yt_patch["channel_description"] = youtube["description"] + if youtube.get("publishedAt"): snapshot["established"] = youtube["publishedAt"][:4] + if youtube.get("videos"): + yt_patch["top_videos"] = [ + { + "title": v["title"], + "views": v["views"], + "duration": v.get("duration"), + "type": "Short" if "M" not in v.get("duration", "") else "Long", + "uploaded_ago": v.get("date", "")[:10], + } + for v in youtube["videos"] + ] + overrides: dict = {} if snapshot: overrides["clinic_snapshot"] = snapshot if ig_patch: overrides["instagram_audit"] = {"accounts": [ig_patch]} + if fb_patch: + overrides["facebook_audit"] = {"pages": [fb_patch]} + if yt_patch: + overrides["youtube_audit"] = yt_patch return overrides