From dca0c78860bf0ff08d24dea6de39f18e41bd9607 Mon Sep 17 00:00:00 2001 From: Mina Choi Date: Fri, 29 May 2026 10:44:53 +0900 Subject: [PATCH] =?UTF-8?q?fix(url):=20=5Fwith=5Fscheme=20=EA=B0=95?= =?UTF-8?q?=ED=99=94=20=E2=80=94=20www=20=EC=9E=90=EB=8F=99=20=EB=B3=B4?= =?UTF-8?q?=EA=B0=95=20+=20=EC=A4=91=EC=B2=A9=20https://=20=EC=A0=95?= =?UTF-8?q?=EB=A6=AC=20+=20API=20=EC=9E=85=EB=A0=A5=20=EC=A0=81=EC=9A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 문제 1: gangnamunni.com 의 SSL 인증서가 www.gangnamunni.com 에만 유효 → 사용자가 'gangnamunni.com/hospitals/189' 같이 줬을 때 클릭 시 브라우저 SSL warning. 문제 2: LLM 출력에 'https://www.facebook.com/https://facebook.com/X' 같이 중첩된 URL이 가끔 박힘. 수정 (_with_scheme): - 중첩된 'http(s)://' 발견 시 마지막 URL 만 잘라 사용 - _WWW_REQUIRED 도메인 (gangnamunni / facebook / instagram) 은 bare 도메인이면 www. 자동 보강 api/analysis.py: main 채널(instagram/facebook/naver_blog/youtube/gangnam_unni) URL 도 _with_scheme 적용해서 DB에 정규화된 형태로 저장. 이전엔 extra channels (tiktok/EN/카카오톡/카페) 에만 적용돼있어서 강남언니 같은 main 채널이 빠져있었음. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/analysis.py | 15 ++++++++++----- app/common/utils.py | 25 +++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/app/api/analysis.py b/app/api/analysis.py index ed4b0ae..89a77af 100644 --- a/app/api/analysis.py +++ b/app/api/analysis.py @@ -27,6 +27,8 @@ def _extra_channels_from_mockurls(homepage_url: str) -> dict: "tiktok": _with_scheme(urls.get("tiktok")), "instagram_en": _with_scheme(urls.get("instagramEn")), "facebook_en": _with_scheme(urls.get("facebookEn")), + "kakao_talk": _with_scheme(urls.get("kakaoTalk")), + "naver_cafe": _with_scheme(urls.get("naverCafe")), } return {} @@ -45,11 +47,12 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks if not hospital: raise HTTPException(status_code=409, detail="Clinic not found") - ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None - fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None - nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None - yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None - gu_id = await insert_gangnam_unni_row(hospital_id, body.channels.gangnam_unni) if body.channels.gangnam_unni else None + # 사용자가 'gangnamunni.com/...' 같이 scheme/www 없이 줘도 _with_scheme이 https://www. 보강. + ig_id = await insert_instagram_row(hospital_id, _with_scheme(body.channels.instagram)) if body.channels.instagram else None + fb_id = await insert_facebook_row(hospital_id, _with_scheme(body.channels.facebook)) if body.channels.facebook else None + nb_id = await insert_naver_blog_row(hospital_id, _with_scheme(body.channels.naver_blog)) if body.channels.naver_blog else None + yt_id = await insert_youtube_row(hospital_id, _with_scheme(body.channels.youtube)) if body.channels.youtube else None + gu_id = await insert_gangnam_unni_row(hospital_id, _with_scheme(body.channels.gangnam_unni)) if body.channels.gangnam_unni else None analysis_run_id = await insert_analysis_run( analysis_run_id, hospital_id, hospital["owner_user_id"], @@ -62,6 +65,8 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks "tiktok": body.channels.tiktok or mock_extra.get("tiktok"), "instagram_en": body.channels.instagram_en or mock_extra.get("instagram_en"), "facebook_en": body.channels.facebook_en or mock_extra.get("facebook_en"), + "kakao_talk": body.channels.kakao_talk or mock_extra.get("kakao_talk"), + "naver_cafe": body.channels.naver_cafe or mock_extra.get("naver_cafe"), } logger.info("[analysis] extra_channels=%s (mock_matched=%s)", extra_channels, bool(mock_extra)) background_tasks.add_task(run_pipeline, analysis_run_id, extra_channels) diff --git a/app/common/utils.py b/app/common/utils.py index 783937d..bf76b98 100644 --- a/app/common/utils.py +++ b/app/common/utils.py @@ -61,6 +61,27 @@ def _normalize_homepage(url: str) -> str: return u.rstrip("/") +# SSL 인증서가 www.* 에만 유효한 도메인 — bare 도메인이면 사용자 클릭 시 브라우저 SSL warning 뜸. +_WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com") + + def _with_scheme(u: str | None) -> str | None: - """scheme 없는 URL에 https:// 보정 (수집기 파싱용). 빈 값은 None.""" - return (u if "://" in u else "https://" + u) if u else None + """scheme 없는 URL에 https:// 보정 (수집기/링크 표시용). 빈 값은 None. + + 중첩된 https://가 끼어있으면 마지막 URL만 추출 (LLM이 가끔 'https://www.X/https://Y' 같이 만듦). + + SSL 엄격 도메인(gangnamunni/facebook/instagram)은 www. 자동 보강.""" + if not u: + return None + u = u.strip() + # 'https://www.facebook.com/https://facebook.com/X' 같은 중첩 → 마지막 'http(s)://' 부터 잘라 사용 + last = max(u.rfind("https://"), u.rfind("http://")) + if last > 0: + u = u[last:] + if "://" not in u: + u = "https://" + u + # scheme 뒤가 www. 없이 SSL 엄격 도메인이면 www. 추가 + for dom in _WWW_REQUIRED: + for scheme in ("https://", "http://"): + if u.startswith(scheme + dom): + u = scheme + "www." + u[len(scheme):] + break + return u