diff --git a/app/api/analysis.py b/app/api/analysis.py index ed4b0ae..89a77af 100644 --- a/app/api/analysis.py +++ b/app/api/analysis.py @@ -27,6 +27,8 @@ def _extra_channels_from_mockurls(homepage_url: str) -> dict: "tiktok": _with_scheme(urls.get("tiktok")), "instagram_en": _with_scheme(urls.get("instagramEn")), "facebook_en": _with_scheme(urls.get("facebookEn")), + "kakao_talk": _with_scheme(urls.get("kakaoTalk")), + "naver_cafe": _with_scheme(urls.get("naverCafe")), } return {} @@ -45,11 +47,12 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks if not hospital: raise HTTPException(status_code=409, detail="Clinic not found") - ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None - fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None - nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None - yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None - gu_id = await insert_gangnam_unni_row(hospital_id, body.channels.gangnam_unni) if body.channels.gangnam_unni else None + # 사용자가 'gangnamunni.com/...' 같이 scheme/www 없이 줘도 _with_scheme이 https://www. 보강. + ig_id = await insert_instagram_row(hospital_id, _with_scheme(body.channels.instagram)) if body.channels.instagram else None + fb_id = await insert_facebook_row(hospital_id, _with_scheme(body.channels.facebook)) if body.channels.facebook else None + nb_id = await insert_naver_blog_row(hospital_id, _with_scheme(body.channels.naver_blog)) if body.channels.naver_blog else None + yt_id = await insert_youtube_row(hospital_id, _with_scheme(body.channels.youtube)) if body.channels.youtube else None + gu_id = await insert_gangnam_unni_row(hospital_id, _with_scheme(body.channels.gangnam_unni)) if body.channels.gangnam_unni else None analysis_run_id = await insert_analysis_run( analysis_run_id, hospital_id, hospital["owner_user_id"], @@ -62,6 +65,8 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks "tiktok": body.channels.tiktok or mock_extra.get("tiktok"), "instagram_en": body.channels.instagram_en or mock_extra.get("instagram_en"), "facebook_en": body.channels.facebook_en or mock_extra.get("facebook_en"), + "kakao_talk": body.channels.kakao_talk or mock_extra.get("kakao_talk"), + "naver_cafe": body.channels.naver_cafe or mock_extra.get("naver_cafe"), } logger.info("[analysis] extra_channels=%s (mock_matched=%s)", extra_channels, bool(mock_extra)) background_tasks.add_task(run_pipeline, analysis_run_id, extra_channels) diff --git a/app/common/utils.py b/app/common/utils.py index 783937d..bf76b98 100644 --- a/app/common/utils.py +++ b/app/common/utils.py @@ -61,6 +61,27 @@ def _normalize_homepage(url: str) -> str: return u.rstrip("/") +# SSL 인증서가 www.* 에만 유효한 도메인 — bare 도메인이면 사용자 클릭 시 브라우저 SSL warning 뜸. +_WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com") + + def _with_scheme(u: str | None) -> str | None: - """scheme 없는 URL에 https:// 보정 (수집기 파싱용). 빈 값은 None.""" - return (u if "://" in u else "https://" + u) if u else None + """scheme 없는 URL에 https:// 보정 (수집기/링크 표시용). 빈 값은 None. + + 중첩된 https://가 끼어있으면 마지막 URL만 추출 (LLM이 가끔 'https://www.X/https://Y' 같이 만듦). + + SSL 엄격 도메인(gangnamunni/facebook/instagram)은 www. 자동 보강.""" + if not u: + return None + u = u.strip() + # 'https://www.facebook.com/https://facebook.com/X' 같은 중첩 → 마지막 'http(s)://' 부터 잘라 사용 + last = max(u.rfind("https://"), u.rfind("http://")) + if last > 0: + u = u[last:] + if "://" not in u: + u = "https://" + u + # scheme 뒤가 www. 없이 SSL 엄격 도메인이면 www. 추가 + for dom in _WWW_REQUIRED: + for scheme in ("https://", "http://"): + if u.startswith(scheme + dom): + u = scheme + "www." + u[len(scheme):] + break + return u