의료진 수 firecrawl 해킹, 만약 필요하다면 직접 스크래핑으로 해결 필요
parent
09bb7a71ee
commit
e8406dc0ee
|
|
@ -158,13 +158,14 @@ class FirecrawlClient:
|
||||||
"url": hospital_url,
|
"url": hospital_url,
|
||||||
"formats": ["json"],
|
"formats": ["json"],
|
||||||
"jsonOptions": {
|
"jsonOptions": {
|
||||||
"prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges",
|
"prompt": "Extract: hospital name, overall rating (out of 10), total review count, number of major staffs, all doctor with names/ratings/review counts/specialties(please check html, there are not only 4 doctors!), procedures, address, badges.",
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"hospitalName": {"type": "string"},
|
"hospitalName": {"type": "string"},
|
||||||
"rating": {"type": "number"},
|
"rating": {"type": "number"},
|
||||||
"totalReviews": {"type": "number"},
|
"totalReviews": {"type": "number"},
|
||||||
|
"totalMajorStaffs" : {"type" : "number"},
|
||||||
"doctors": {
|
"doctors": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -202,7 +203,8 @@ class FirecrawlClient:
|
||||||
"rating": raw.get("rating"),
|
"rating": raw.get("rating"),
|
||||||
"ratingScale": "/10",
|
"ratingScale": "/10",
|
||||||
"totalReviews": raw.get("totalReviews", 0),
|
"totalReviews": raw.get("totalReviews", 0),
|
||||||
"doctors": (raw.get("doctors") or [])[:10],
|
"doctors": (raw.get("doctors") or []),
|
||||||
|
"totalMajorStaffs": raw.get("totalMajorStaffs", 0),
|
||||||
"procedures": raw.get("procedures", []),
|
"procedures": raw.get("procedures", []),
|
||||||
"address": raw.get("address", ""),
|
"address": raw.get("address", ""),
|
||||||
"badges": raw.get("badges", []),
|
"badges": raw.get("badges", []),
|
||||||
|
|
|
||||||
|
|
@ -132,11 +132,47 @@ async def _build_overrides(analysis_run_id: str) -> dict:
|
||||||
if instagram.get("bio"): ig_patch["bio"] = instagram["bio"]
|
if instagram.get("bio"): ig_patch["bio"] = instagram["bio"]
|
||||||
if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/"
|
if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/"
|
||||||
|
|
||||||
|
# ── facebook ──────────────────────────────────────────────────────────────
|
||||||
|
fb_patch: dict = {}
|
||||||
|
if facebook.get("pageUrl"): fb_patch["url"] = facebook["pageUrl"]
|
||||||
|
if facebook.get("pageUrl"): fb_patch["link"] = facebook["pageUrl"]
|
||||||
|
if facebook.get("pageName"): fb_patch["page_name"] = facebook["pageName"]
|
||||||
|
if facebook.get("followers"): fb_patch["followers"] = facebook["followers"]
|
||||||
|
if facebook.get("intro"): fb_patch["bio"] = facebook["intro"]
|
||||||
|
if facebook.get("categories"): fb_patch["category"] = ", ".join(facebook["categories"])
|
||||||
|
if facebook.get("website"): fb_patch["linked_domain"] = facebook["website"]
|
||||||
|
|
||||||
|
# ── youtube ───────────────────────────────────────────────────────────────
|
||||||
|
yt_patch: dict = {}
|
||||||
|
if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"]
|
||||||
|
if youtube.get("handle"): yt_patch["handle"] = youtube["handle"]
|
||||||
|
if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"]
|
||||||
|
if youtube.get("totalVideos"): yt_patch["total_videos"] = youtube["totalVideos"]
|
||||||
|
if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"]
|
||||||
|
if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10]
|
||||||
|
if youtube.get("description"): yt_patch["channel_description"] = youtube["description"]
|
||||||
|
if youtube.get("publishedAt"): snapshot["established"] = youtube["publishedAt"][:4]
|
||||||
|
if youtube.get("videos"):
|
||||||
|
yt_patch["top_videos"] = [
|
||||||
|
{
|
||||||
|
"title": v["title"],
|
||||||
|
"views": v["views"],
|
||||||
|
"duration": v.get("duration"),
|
||||||
|
"type": "Short" if "M" not in v.get("duration", "") else "Long",
|
||||||
|
"uploaded_ago": v.get("date", "")[:10],
|
||||||
|
}
|
||||||
|
for v in youtube["videos"]
|
||||||
|
]
|
||||||
|
|
||||||
overrides: dict = {}
|
overrides: dict = {}
|
||||||
if snapshot:
|
if snapshot:
|
||||||
overrides["clinic_snapshot"] = snapshot
|
overrides["clinic_snapshot"] = snapshot
|
||||||
if ig_patch:
|
if ig_patch:
|
||||||
overrides["instagram_audit"] = {"accounts": [ig_patch]}
|
overrides["instagram_audit"] = {"accounts": [ig_patch]}
|
||||||
|
if fb_patch:
|
||||||
|
overrides["facebook_audit"] = {"pages": [fb_patch]}
|
||||||
|
if yt_patch:
|
||||||
|
overrides["youtube_audit"] = yt_patch
|
||||||
return overrides
|
return overrides
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue