fix(url): _with_scheme 강화 — www 자동 보강 + 중첩 https:// 정리 + API 입력 적용
문제 1: gangnamunni.com 의 SSL 인증서가 www.gangnamunni.com 에만 유효 → 사용자가 'gangnamunni.com/hospitals/189' 같이 줬을 때 클릭 시 브라우저 SSL warning. 문제 2: LLM 출력에 'https://www.facebook.com/https://facebook.com/X' 같이 중첩된 URL이 가끔 박힘. 수정 (_with_scheme): - 중첩된 'http(s)://' 발견 시 마지막 URL 만 잘라 사용 - _WWW_REQUIRED 도메인 (gangnamunni / facebook / instagram) 은 bare 도메인이면 www. 자동 보강 api/analysis.py: main 채널(instagram/facebook/naver_blog/youtube/gangnam_unni) URL 도 _with_scheme 적용해서 DB에 정규화된 형태로 저장. 이전엔 extra channels (tiktok/EN/카카오톡/카페) 에만 적용돼있어서 강남언니 같은 main 채널이 빠져있었음. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>channel-brand
parent
db42805fdb
commit
dca0c78860
|
|
@ -27,6 +27,8 @@ def _extra_channels_from_mockurls(homepage_url: str) -> dict:
|
||||||
"tiktok": _with_scheme(urls.get("tiktok")),
|
"tiktok": _with_scheme(urls.get("tiktok")),
|
||||||
"instagram_en": _with_scheme(urls.get("instagramEn")),
|
"instagram_en": _with_scheme(urls.get("instagramEn")),
|
||||||
"facebook_en": _with_scheme(urls.get("facebookEn")),
|
"facebook_en": _with_scheme(urls.get("facebookEn")),
|
||||||
|
"kakao_talk": _with_scheme(urls.get("kakaoTalk")),
|
||||||
|
"naver_cafe": _with_scheme(urls.get("naverCafe")),
|
||||||
}
|
}
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
@ -45,11 +47,12 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
||||||
if not hospital:
|
if not hospital:
|
||||||
raise HTTPException(status_code=409, detail="Clinic not found")
|
raise HTTPException(status_code=409, detail="Clinic not found")
|
||||||
|
|
||||||
ig_id = await insert_instagram_row(hospital_id, body.channels.instagram) if body.channels.instagram else None
|
# 사용자가 'gangnamunni.com/...' 같이 scheme/www 없이 줘도 _with_scheme이 https://www. 보강.
|
||||||
fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None
|
ig_id = await insert_instagram_row(hospital_id, _with_scheme(body.channels.instagram)) if body.channels.instagram else None
|
||||||
nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None
|
fb_id = await insert_facebook_row(hospital_id, _with_scheme(body.channels.facebook)) if body.channels.facebook else None
|
||||||
yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None
|
nb_id = await insert_naver_blog_row(hospital_id, _with_scheme(body.channels.naver_blog)) if body.channels.naver_blog else None
|
||||||
gu_id = await insert_gangnam_unni_row(hospital_id, body.channels.gangnam_unni) if body.channels.gangnam_unni else None
|
yt_id = await insert_youtube_row(hospital_id, _with_scheme(body.channels.youtube)) if body.channels.youtube else None
|
||||||
|
gu_id = await insert_gangnam_unni_row(hospital_id, _with_scheme(body.channels.gangnam_unni)) if body.channels.gangnam_unni else None
|
||||||
|
|
||||||
analysis_run_id = await insert_analysis_run(
|
analysis_run_id = await insert_analysis_run(
|
||||||
analysis_run_id, hospital_id, hospital["owner_user_id"],
|
analysis_run_id, hospital_id, hospital["owner_user_id"],
|
||||||
|
|
@ -62,6 +65,8 @@ async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks
|
||||||
"tiktok": body.channels.tiktok or mock_extra.get("tiktok"),
|
"tiktok": body.channels.tiktok or mock_extra.get("tiktok"),
|
||||||
"instagram_en": body.channels.instagram_en or mock_extra.get("instagram_en"),
|
"instagram_en": body.channels.instagram_en or mock_extra.get("instagram_en"),
|
||||||
"facebook_en": body.channels.facebook_en or mock_extra.get("facebook_en"),
|
"facebook_en": body.channels.facebook_en or mock_extra.get("facebook_en"),
|
||||||
|
"kakao_talk": body.channels.kakao_talk or mock_extra.get("kakao_talk"),
|
||||||
|
"naver_cafe": body.channels.naver_cafe or mock_extra.get("naver_cafe"),
|
||||||
}
|
}
|
||||||
logger.info("[analysis] extra_channels=%s (mock_matched=%s)", extra_channels, bool(mock_extra))
|
logger.info("[analysis] extra_channels=%s (mock_matched=%s)", extra_channels, bool(mock_extra))
|
||||||
background_tasks.add_task(run_pipeline, analysis_run_id, extra_channels)
|
background_tasks.add_task(run_pipeline, analysis_run_id, extra_channels)
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,27 @@ def _normalize_homepage(url: str) -> str:
|
||||||
return u.rstrip("/")
|
return u.rstrip("/")
|
||||||
|
|
||||||
|
|
||||||
|
# SSL 인증서가 www.* 에만 유효한 도메인 — bare 도메인이면 사용자 클릭 시 브라우저 SSL warning 뜸.
|
||||||
|
_WWW_REQUIRED = ("gangnamunni.com", "facebook.com", "instagram.com")
|
||||||
|
|
||||||
|
|
||||||
def _with_scheme(u: str | None) -> str | None:
|
def _with_scheme(u: str | None) -> str | None:
|
||||||
"""scheme 없는 URL에 https:// 보정 (수집기 파싱용). 빈 값은 None."""
|
"""scheme 없는 URL에 https:// 보정 (수집기/링크 표시용). 빈 값은 None.
|
||||||
return (u if "://" in u else "https://" + u) if u else None
|
+ 중첩된 https://가 끼어있으면 마지막 URL만 추출 (LLM이 가끔 'https://www.X/https://Y' 같이 만듦).
|
||||||
|
+ SSL 엄격 도메인(gangnamunni/facebook/instagram)은 www. 자동 보강."""
|
||||||
|
if not u:
|
||||||
|
return None
|
||||||
|
u = u.strip()
|
||||||
|
# 'https://www.facebook.com/https://facebook.com/X' 같은 중첩 → 마지막 'http(s)://' 부터 잘라 사용
|
||||||
|
last = max(u.rfind("https://"), u.rfind("http://"))
|
||||||
|
if last > 0:
|
||||||
|
u = u[last:]
|
||||||
|
if "://" not in u:
|
||||||
|
u = "https://" + u
|
||||||
|
# scheme 뒤가 www. 없이 SSL 엄격 도메인이면 www. 추가
|
||||||
|
for dom in _WWW_REQUIRED:
|
||||||
|
for scheme in ("https://", "http://"):
|
||||||
|
if u.startswith(scheme + dom):
|
||||||
|
u = scheme + "www." + u[len(scheme):]
|
||||||
|
break
|
||||||
|
return u
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue