347 lines
15 KiB
Python
347 lines
15 KiB
Python
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
from datetime import datetime
|
|
from urllib.parse import urlparse
|
|
from common.db.run import select_run, update_run_report, update_run_plan
|
|
from common.db.source import select_run_raw_data, select_run_mainpage_url
|
|
from common.db.market import select_market
|
|
from integrations.llm.llm_service import LLMService
|
|
from integrations.llm.prompt import report_prompt, plan_prompt, youtube_diagnosis_prompt, brand_consistency_prompt
|
|
from integrations.llm.schemas.report import ReportOutput, ClinicSnapshot, YouTubeAudit, BrandConsistencyOutput
|
|
from integrations.llm.schemas.plan import PlanOutput
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def generate_report(analysis_run_id: str) -> ReportOutput:
|
|
raw = await select_run_raw_data(analysis_run_id)
|
|
clinic = raw.get("mainpage") or {}
|
|
market = await select_market(analysis_run_id)
|
|
|
|
def _json(v) -> str | None:
|
|
return json.dumps(v, ensure_ascii=False) if v else None
|
|
|
|
input_data = {
|
|
"clinic_name": clinic.get("clinicName"),
|
|
"clinic_name_en": clinic.get("clinicNameEn"),
|
|
"address": clinic.get("address"),
|
|
"phone": clinic.get("phone"),
|
|
"slogan": clinic.get("slogan"),
|
|
"services": json.dumps(clinic.get("services", []), ensure_ascii=False),
|
|
"doctors": json.dumps(clinic.get("doctors", []), ensure_ascii=False),
|
|
"market_competitors": _json(market.get("competitors")),
|
|
"market_keywords": _json(market.get("keywords")),
|
|
"market_trend": _json(market.get("trend")),
|
|
"market_target_audience": _json(market.get("target_audience")),
|
|
**{
|
|
source_type: _json(data)
|
|
for source_type, data in raw.items()
|
|
if source_type != "mainpage"
|
|
},
|
|
}
|
|
|
|
return await LLMService(provider="perplexity").generate(report_prompt, input_data)
|
|
|
|
async def generate_plan(analysis_run_id: str) -> PlanOutput:
|
|
run = await select_run(analysis_run_id)
|
|
raw = await select_run_raw_data(analysis_run_id)
|
|
clinic = raw.get("mainpage") or {}
|
|
report_data = run["report_data"]
|
|
report = json.loads(report_data) if isinstance(report_data, str) else report_data
|
|
market = await select_market(analysis_run_id)
|
|
|
|
def _json(v) -> str | None:
|
|
return json.dumps(v, ensure_ascii=False) if v else None
|
|
|
|
input_data = {
|
|
"clinic_name": clinic.get("clinicName"),
|
|
"clinic_name_en": clinic.get("clinicNameEn"),
|
|
"address": clinic.get("address"),
|
|
"phone": clinic.get("phone"),
|
|
"slogan": clinic.get("slogan"),
|
|
"services": json.dumps(clinic.get("services", []), ensure_ascii=False),
|
|
"doctors": json.dumps(clinic.get("doctors", []), ensure_ascii=False),
|
|
"report": _json(report),
|
|
"market_competitors": _json(market.get("competitors")),
|
|
"market_keywords": _json(market.get("keywords")),
|
|
"market_trend": _json(market.get("trend")),
|
|
"market_target_audience": _json(market.get("target_audience")),
|
|
}
|
|
|
|
return await LLMService(provider="perplexity").generate(plan_prompt, input_data)
|
|
|
|
|
|
def _build_clinic_snapshot(gangnam_unni: dict, hospital: dict) -> dict:
|
|
snapshot: dict = {}
|
|
doctors = gangnam_unni.get("doctors", [])
|
|
lead = max(doctors, key=lambda d: d.get("reviews", 0)) if doctors else None
|
|
if gangnam_unni.get("name"): snapshot["name"] = gangnam_unni["name"]
|
|
if hospital.get("clinicNameEn"): snapshot["name_en"] = hospital["clinicNameEn"]
|
|
if hospital.get("phone"): snapshot["phone"] = hospital["phone"]
|
|
domain = hospital.get("domain") or urlparse(hospital.get("sourceUrl") or "").netloc
|
|
if domain: snapshot["domain"] = domain
|
|
if gangnam_unni.get("rating"): snapshot["overall_rating"] = gangnam_unni["rating"]
|
|
if gangnam_unni.get("totalReviews"): snapshot["total_reviews"] = gangnam_unni["totalReviews"]
|
|
if gangnam_unni.get("address"): snapshot["location"] = gangnam_unni["address"]
|
|
if gangnam_unni.get("badges"): snapshot["certifications"] = gangnam_unni["badges"]
|
|
if gangnam_unni.get("totalMajorStaffs"): snapshot["staff_count"] = gangnam_unni["totalMajorStaffs"]
|
|
if lead:
|
|
snapshot["lead_doctor"] = {
|
|
"name": lead.get("name"),
|
|
"credentials": lead.get("specialty"),
|
|
"rating": lead.get("rating"),
|
|
"review_count": lead.get("reviews"),
|
|
}
|
|
return ClinicSnapshot.model_validate(snapshot).model_dump()
|
|
|
|
|
|
def _parse_iso_duration_seconds(iso: str) -> int:
|
|
m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "")
|
|
if not m:
|
|
return 0
|
|
h, mins, s = (int(x or 0) for x in m.groups())
|
|
return h * 3600 + mins * 60 + s
|
|
|
|
|
|
def _format_seconds(seconds: int) -> str:
|
|
m, s = divmod(seconds, 60)
|
|
h, m = divmod(m, 60)
|
|
return f"{h}시간 {m}분" if h else f"{m}분 {s}초"
|
|
|
|
|
|
def _format_clock(seconds: int) -> str:
|
|
m, s = divmod(seconds, 60)
|
|
h, m = divmod(m, 60)
|
|
return f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
|
|
|
|
|
|
def _calc_avg_video_length(videos: list[dict]) -> str:
|
|
durations = [_parse_iso_duration_seconds(v.get("duration", "")) for v in videos]
|
|
durations = [d for d in durations if d > 0]
|
|
if not durations:
|
|
return ""
|
|
return _format_seconds(sum(durations) // len(durations))
|
|
|
|
|
|
def _relative_date(date_str: str) -> str:
|
|
if not date_str:
|
|
return ""
|
|
try:
|
|
past = datetime.fromisoformat(date_str[:10])
|
|
except ValueError:
|
|
return ""
|
|
days = (datetime.now() - past).days
|
|
if days < 1:
|
|
return "오늘"
|
|
if days < 30:
|
|
return f"{days}일 전"
|
|
if days < 365:
|
|
return f"{days // 30}개월 전"
|
|
return f"{days // 365}년 전"
|
|
|
|
|
|
def _calc_upload_frequency(videos: list[dict]) -> str:
|
|
dates = sorted(
|
|
[v["date"][:10] for v in videos if v.get("date")],
|
|
reverse=True,
|
|
)
|
|
if len(dates) < 2:
|
|
return ""
|
|
gaps = [
|
|
(datetime.fromisoformat(dates[i]) - datetime.fromisoformat(dates[i + 1])).days
|
|
for i in range(len(dates) - 1)
|
|
]
|
|
avg_days = sum(gaps) // len(gaps)
|
|
if avg_days <= 7:
|
|
return f"주 {7 // max(avg_days, 1)}회"
|
|
if avg_days <= 30:
|
|
return f"월 {30 // avg_days}회"
|
|
return f"{avg_days}일에 1회"
|
|
|
|
|
|
async def _build_youtube_audit(youtube: dict) -> dict:
|
|
videos = youtube.get("videos", [])
|
|
yt_patch: dict = {
|
|
"weekly_view_growth": {"absolute": 0, "percentage": 0.0},
|
|
"estimated_monthly_revenue": {"min": 0, "max": 0},
|
|
"linked_urls": [],
|
|
"avg_video_length": _calc_avg_video_length(videos),
|
|
"upload_frequency": _calc_upload_frequency(videos),
|
|
}
|
|
if youtube.get("channelName"): yt_patch["channel_name"] = youtube["channelName"]
|
|
if youtube.get("handle"): yt_patch["handle"] = youtube["handle"]
|
|
if youtube.get("subscribers"): yt_patch["subscribers"] = youtube["subscribers"]
|
|
if youtube.get("totalVideos"): yt_patch["total_videos"] = youtube["totalVideos"]
|
|
if youtube.get("totalViews"): yt_patch["total_views"] = youtube["totalViews"]
|
|
if youtube.get("publishedAt"): yt_patch["channel_created_date"] = youtube["publishedAt"][:10]
|
|
if youtube.get("description"): yt_patch["channel_description"] = youtube["description"]
|
|
if youtube.get("playlists"): yt_patch["playlists"] = youtube["playlists"]
|
|
if videos:
|
|
yt_patch["top_videos"] = [
|
|
{
|
|
"title": v["title"],
|
|
"views": v["views"],
|
|
"duration": _format_clock(_parse_iso_duration_seconds(v.get("duration", ""))),
|
|
"type": "Short" if "M" not in v.get("duration", "") else "Long",
|
|
"uploaded_ago": _relative_date(v.get("date", "")),
|
|
}
|
|
for v in videos
|
|
]
|
|
|
|
diagnosis_result = await LLMService(provider="perplexity").generate(
|
|
youtube_diagnosis_prompt,
|
|
{
|
|
"channel_name": yt_patch.get("channel_name"),
|
|
"subscribers": yt_patch.get("subscribers"),
|
|
"total_videos": yt_patch.get("total_videos"),
|
|
"total_views": yt_patch.get("total_views"),
|
|
"avg_video_length": yt_patch.get("avg_video_length"),
|
|
"upload_frequency": yt_patch.get("upload_frequency"),
|
|
"top_videos": json.dumps(yt_patch.get("top_videos", []), ensure_ascii=False),
|
|
"playlists": json.dumps(yt_patch.get("playlists", []), ensure_ascii=False),
|
|
},
|
|
)
|
|
yt_patch["diagnosis"] = [item.model_dump() for item in diagnosis_result.diagnosis]
|
|
|
|
return YouTubeAudit.model_validate(yt_patch).model_dump()
|
|
|
|
|
|
async def _build_overrides(analysis_run_id: str) -> dict:
|
|
raw = await select_run_raw_data(analysis_run_id)
|
|
if not raw:
|
|
return {}
|
|
|
|
mainpage = raw.get("mainpage", {}) or {}
|
|
instagram = raw.get("instagram", {}) or {}
|
|
facebook = raw.get("facebook", {}) or {}
|
|
youtube = raw.get("youtube", {}) or {}
|
|
gangnam_unni = raw.get("gangnam_unni", {}) or {}
|
|
|
|
snapshot: dict = _build_clinic_snapshot(gangnam_unni, mainpage)
|
|
yt_patch: dict = await _build_youtube_audit(youtube)
|
|
|
|
# ── instagram ─────────────────────────────────────────────────────────────
|
|
ig_patch: dict = {}
|
|
if instagram.get("username"): ig_patch["handle"] = instagram["username"]
|
|
if instagram.get("posts"): ig_patch["posts"] = instagram["posts"]
|
|
if instagram.get("followers"): ig_patch["followers"] = instagram["followers"]
|
|
if instagram.get("following"): ig_patch["following"] = instagram["following"]
|
|
if instagram.get("bio"): ig_patch["bio"] = instagram["bio"]
|
|
if instagram.get("username"): ig_patch["profile_link"] = f"https://www.instagram.com/{instagram['username']}/"
|
|
|
|
# ── facebook ──────────────────────────────────────────────────────────────
|
|
fb_pages: dict = {}
|
|
if facebook.get("pageUrl"): fb_pages["url"] = facebook["pageUrl"]
|
|
if facebook.get("pageUrl"): fb_pages["link"] = facebook["pageUrl"]
|
|
if facebook.get("pageName"): fb_pages["page_name"] = facebook["pageName"]
|
|
if facebook.get("followers"): fb_pages["followers"] = facebook["followers"]
|
|
if facebook.get("intro"): fb_pages["bio"] = facebook["intro"]
|
|
if facebook.get("categories"): fb_pages["category"] = ", ".join(facebook["categories"])
|
|
if facebook.get("website"): fb_pages["linked_domain"] = facebook["website"]
|
|
|
|
brand = await generate_brand_consistency(analysis_run_id)
|
|
brand_patch = brand.model_dump()["brand_inconsistencies"]
|
|
|
|
fb_patch: dict = {}
|
|
if fb_pages:
|
|
fb_patch["pages"] = [fb_pages]
|
|
if brand_patch:
|
|
fb_patch["brand_inconsistencies"] = brand_patch
|
|
|
|
overrides: dict = {}
|
|
if snapshot:
|
|
overrides["clinic_snapshot"] = snapshot
|
|
if ig_patch:
|
|
overrides["instagram_audit"] = {"accounts": [ig_patch]}
|
|
if fb_patch:
|
|
overrides["facebook_audit"] = fb_patch
|
|
if yt_patch:
|
|
overrides["youtube_audit"] = yt_patch
|
|
return overrides
|
|
|
|
|
|
def _deep_merge(base: dict, overrides: dict) -> dict:
|
|
for k, v in overrides.items():
|
|
if isinstance(v, dict) and isinstance(base.get(k), dict):
|
|
_deep_merge(base[k], v)
|
|
elif isinstance(v, list) and isinstance(base.get(k), list):
|
|
for i, item in enumerate(v):
|
|
if i < len(base[k]) and isinstance(item, dict) and isinstance(base[k][i], dict):
|
|
_deep_merge(base[k][i], item)
|
|
else:
|
|
base[k] = v
|
|
return base
|
|
|
|
def _patch_report(result: ReportOutput, overrides: dict) -> ReportOutput:
|
|
base = result.model_dump()
|
|
for key in overrides:
|
|
base.pop(key, None)
|
|
return ReportOutput(**_deep_merge(base, overrides))
|
|
|
|
|
|
_MOCK_DOMAINS = {"viewclinic.com"}
|
|
_MOCK_REPORT_PATH = os.path.join(os.path.dirname(__file__), "../mock/report_viewclinic.json")
|
|
|
|
|
|
async def _is_mock(analysis_run_id: str) -> bool:
|
|
url = await select_run_mainpage_url(analysis_run_id)
|
|
return any(domain in url for domain in _MOCK_DOMAINS)
|
|
|
|
|
|
def _load_mock_report() -> ReportOutput:
|
|
with open(_MOCK_REPORT_PATH, encoding="utf-8") as f:
|
|
return ReportOutput(**json.load(f))
|
|
|
|
|
|
_MOCK_PLAN_PATH = os.path.join(os.path.dirname(__file__), "../mock/plan_viewclinic.json")
|
|
|
|
|
|
def _load_mock_plan() -> PlanOutput:
|
|
with open(_MOCK_PLAN_PATH, encoding="utf-8") as f:
|
|
return PlanOutput(**json.load(f))
|
|
|
|
|
|
async def generate_brand_consistency(analysis_run_id: str) -> BrandConsistencyOutput:
|
|
raw = await select_run_raw_data(analysis_run_id)
|
|
|
|
def _json(v) -> str | None:
|
|
return json.dumps(v, ensure_ascii=False) if v else None
|
|
|
|
mainpage = raw.get("mainpage") or {}
|
|
input_data = {
|
|
"clinic_name": mainpage.get("clinicName"),
|
|
"mainpage": _json(mainpage),
|
|
"instagram": _json(raw.get("instagram")),
|
|
"facebook": _json(raw.get("facebook")),
|
|
"youtube": _json(raw.get("youtube")),
|
|
"gangnam_unni": _json(raw.get("gangnam_unni")),
|
|
}
|
|
return await LLMService(provider="perplexity").generate(brand_consistency_prompt, input_data)
|
|
|
|
|
|
async def run_report_task(analysis_run_id: str) -> None:
|
|
logger.info("[report] start run=%s", analysis_run_id)
|
|
if await _is_mock(analysis_run_id):
|
|
logger.info("[report] mock mode run=%s", analysis_run_id)
|
|
result = _load_mock_report()
|
|
result.youtube_audit.linked_urls = []
|
|
else:
|
|
result = await generate_report(analysis_run_id)
|
|
|
|
result = _patch_report(result, await _build_overrides(analysis_run_id))
|
|
await update_run_report(analysis_run_id, result.model_dump())
|
|
logger.info("[report] done run=%s", analysis_run_id)
|
|
|
|
|
|
async def run_plan_task(analysis_run_id: str) -> None:
|
|
logger.info("[plan] start run=%s", analysis_run_id)
|
|
if await _is_mock(analysis_run_id):
|
|
logger.info("[plan] mock mode run=%s", analysis_run_id)
|
|
result = _load_mock_plan()
|
|
else:
|
|
result = await generate_plan(analysis_run_id)
|
|
await update_run_plan(analysis_run_id, result.model_dump())
|
|
logger.info("[plan] done run=%s", analysis_run_id)
|