From 3f75b6d61d685115c801639fdb8df0d553f9fb92 Mon Sep 17 00:00:00 2001 From: Dohyun Lim Date: Mon, 12 Jan 2026 16:50:16 +0900 Subject: [PATCH] add facilities from result of crawling --- .DS_Store | Bin 0 -> 8196 bytes app/.DS_Store | Bin 0 -> 8196 bytes app/home/.DS_Store | Bin 0 -> 6148 bytes app/home/api/routers/v1/home.py | 7 +- app/lyric/.DS_Store | Bin 0 -> 6148 bytes app/song/.DS_Store | Bin 0 -> 6148 bytes app/utils/chatgpt_prompt.py | 33 +++-- app/utils/nvMapPwScraper.py | 113 ++++++++++++++++++ app/video/.DS_Store | Bin 0 -> 6148 bytes docs/.DS_Store | Bin 0 -> 6148 bytes image/.DS_Store | Bin 0 -> 6148 bytes image/2025-12-26/.DS_Store | Bin 0 -> 10244 bytes poc/.DS_Store | Bin 0 -> 6148 bytes poc/crawling/2026-01-12/main-PwScraper.py | 29 +++++ poc/crawling/2026-01-12/nvMapPwScraper.py | 113 ++++++++++++++++++ .../nvMapScraper.py} | 10 +- 16 files changed, 286 insertions(+), 19 deletions(-) create mode 100644 .DS_Store create mode 100644 app/.DS_Store create mode 100644 app/home/.DS_Store create mode 100644 app/lyric/.DS_Store create mode 100644 app/song/.DS_Store create mode 100644 app/utils/nvMapPwScraper.py create mode 100644 app/video/.DS_Store create mode 100644 docs/.DS_Store create mode 100644 image/.DS_Store create mode 100644 image/2025-12-26/.DS_Store create mode 100644 poc/.DS_Store create mode 100644 poc/crawling/2026-01-12/main-PwScraper.py create mode 100644 poc/crawling/2026-01-12/nvMapPwScraper.py rename poc/crawling/{nvMapScraper-2026-01-12.py => 2026-01-12/nvMapScraper.py} (96%) diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8e90c8cff201144f5b4d7f469ffbde513a443e25 GIT binary patch literal 8196 zcmeHL&ubGw6n>MWCYVbE4}}(71;HK^tABuNN%1Na1wn{SH>P5f4K^(Z9`+)XLZKpv z7jIq^{3oPPk3D$sD4wb(@efeH`O)1s-3>wmQs|p9^V0d=oA1rHv%9<{A~m-b%@R!# zQ4L+&Mjb;z)41-rR&q0!U=`wNr8~Q@6!!)>Pni$}L;+Di6c7bOf#ae8-r2gxx-3Pl-qtREP6QlYgniUOiQUIAUZSLh0L=@AXR`u)tGFC6Q| z&1QQgZey5vATKF5#Q(B@$^kw9N8J-K##pk0vd0snjziqock64%I z_-}8XFOku!j4TH<@barad4>EO!t<%#+BnE3MKg`^sK?g`uqe(&L@{((^%$$A&m7~N zch9ry?qd0_i*S#(dRy~O;4JpA*DiHD=9&3DALZOzFW!B;XU`mq=j_EE9{*pi$FDJ+U5ax4+K2ZUKl8{mFZS^GEyEO4zlPp2=W>;X&?3TrZF5Q=n>!}Lz0-k0 zUAl}s`}B}vKiBCuSnP-^@qOL7|2t?|U5~t4ros5D`Fff2QpR0@bL#K^^*;9R5Wb(O zz5Rn{Q;KH!@kdRcS6X=a8hUt#j$>T0200fUcYNORO~^7)&`1Yszt0SB@Ls b0IoVFbT+XTGid+)Lx8;hjqWm>Yz6)Tru2?* literal 0 HcmV?d00001 diff --git a/app/.DS_Store b/app/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..725195a47e501098ecf095c065f5fda01d6ca58c GIT binary patch literal 8196 zcmeHMziSjh6n=BL!-ZI+OkuI0e}KUzV6(#lpo0;2tlifSC5z#j=^G)`>x8Iv@=5~1-A~I|H{S~5RB8t!% zmliS9G`4fEw1&TO4QLQg)T8_814^k=@zx8ffGVI0r~;~hD)2WbfOj^xXvTZrS#?we zRDu6e0e(Kj=!}WS!l8Y1V6Y_su!+mIVU9Y$#>5^IkA*`UihH{1!T3<)OAO<}VUO7z zn0PE4x^OZsoQ!8SzCtmU9dWUBCzCkTQ58@HstR!JzKCkQLt}ar)bE|)*2+dN&&DpO zbvWBI%iHblFz>=6dUx;n!RY1JTl2j8zu`R|$Sh*q7Eg>G<+MX-nBk2~+GI|bRu5nK zoXXfNpJq{K4|zA4H=;hiZ|tFW(RVNM-#)7Ik$F5$k0;tx?j`e?=NIN>H|H|)O0j<_ zjqGd$eYWw}Zm7@Io5*jbugvqw(ZknXuQ~ox{eHzi%sW9U*vA3(F}t9n`F=;*WM1p% zr{Btb^s!R6I(xv|c+0#k8se<)K%-uukooX2v`X%iyJr(_5ii;6JwpBi=5ONcG-;Riv?DZZY z|8-t>iynZT&uSj7eE1`@O8)ns%C(Dl$zSgg@+Zu%O%+fDrV1>$=`z><>$AWAPXlzO z3aA4AoC2oV-e|93S-!TeERbvM7=06+8~YUwZ3qTijstBu4lMs+hh!GY> z82C*rh`U!)-uY77O^HZUPg-T75)pYQjQK@WQ-b@sq+(9DECYqS zN0aubNgZlaJ;inn&H?AZZ*zdxZV{`sP6Il}+D+DPy|-VkHKYDONnUx#X7-~nZ1kcA z7@k;vzCOHsugqe%zQG=JOciS^CI^)R>RS55(oNE1>Uq0;8tKvF`caZU-D~9&a`Fo_ z)Vo?AnTJ+h+P*;E35J>fQ@o7wr&x)OY?h@5(?)pE)21Ujr7m=}Cl$>-PUD!Gs;l>} zJL(yw?q#|&i)ZU9v95VKW;LiMJSuM4y{49rjj<_2)-P!JN8p5 z+(CiWZ)&)zj5SPfk|&hKv2{ov*o(gqII}stqC!_Y2b=@Wfw2R;Klmt&fyP*&+&WOn zR{&rh%`$K;_duY}4ZuKStPm{-p@st0P@!Kjgc^=^U-JTuu|hSRgv^Zl=*&XDp$NSL z3t!Sn1PWd49B>ZU4&>Cw5}*H@+3)}MAos~R;2ii@4v2hM3#*utK3h|h@CM0cnG)oC81Vz!%t*(Aoe1 literal 0 HcmV?d00001 diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py index 440cb8b..fad8ca7 100644 --- a/app/home/api/routers/v1/home.py +++ b/app/home/api/routers/v1/home.py @@ -193,9 +193,12 @@ async def crawling(request_body: CrawlingRequest): logger.info(f"[crawling] Step 3-3: GPT API 호출 완료 - 응답 {len(raw_response)}자 ({step3_3_elapsed:.1f}ms)") print(f"[crawling] Step 3-3: GPT API 호출 완료 - 응답 {len(raw_response)}자 ({step3_3_elapsed:.1f}ms)") - # Step 3-4: 응답 파싱 + # Step 3-4: 응답 파싱 (크롤링에서 가져온 facility_info 전달) step3_4_start = time.perf_counter() - parsed = await chatgpt_service.parse_marketing_analysis(raw_response) + print(f"[crawling] Step 3-4: 응답 파싱 시작 - facility_info: {scraper.facility_info}") + parsed = await chatgpt_service.parse_marketing_analysis( + raw_response, facility_info=scraper.facility_info + ) marketing_analysis = MarketingAnalysis(**parsed) step3_4_elapsed = (time.perf_counter() - step3_4_start) * 1000 print(f"[crawling] Step 3-4: 응답 파싱 완료 ({step3_4_elapsed:.1f}ms)") diff --git a/app/lyric/.DS_Store b/app/lyric/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6b464c008fe676bed2297a8954ed4be0be1fbc58 GIT binary patch literal 6148 zcmeHLJx{|h5IxfZ5fxI|81ob8$bb~8up@P1YUzh)MN1-DiVy>tI|09f5eWeU8wEY^V^s+rKy7`UhxNWl7U@|$djHzSu8+B8U$>+U3=A-j^QCdIiwd=icy=Cg_ zw^|RC`*z)}+iZkc-)j!8wH_1mm^@}j*?elu#{-`h9n+9H;MJOBH2jV+=GV3gxQmpwOeT{*}L?Jvdp;Cb=Rp=vzQ0W-=bzGn^QK-^M=);H5m4!Z`2w5HT`!bwF zpwRVB0jGeiKu+BjdH>&;eg3zT+?`XvDe$ip5c#kYmhqAF-kSMvyx00@H8eJkOBBi! lbb33M7rYho{|eIJ^Z5c8XiOAh1m=DSNEuw`6!=jEz5vRc)eryx literal 0 HcmV?d00001 diff --git a/app/song/.DS_Store b/app/song/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4e8bde65f06b362ed74882aacfb10bbdec489631 GIT binary patch literal 6148 zcmeHLyH3L}6uqVeDvD4g79^H@0{w$fm4ykZ6H}oN(TavdEfjUj%mjP|GXpTQAhC60 zW8pgx=h{|dH%TQH1jv?rPhubYo}9>WT_O^-i%ylOLPRB0#^M@6M&W+0sj{S7wtzy; z(WYZ+Q;)jTOtD>qbHF+9+Z^DvTgGZ_QbZH1-DLeX2Pf4=I}9Qvd2^W048nT7H3(Z^ zl*XOMo3pEz-9_xqH`r0nRI$Ti3Q!qQ-_|#sCOxK}r|p>@JuV27{OMjRpYSj#td0qVdgz+by1Mg<#-CXnQ`7p}-SeS( zN2$3?Rf~8Tx|Zpj)gaD!#G=XQHMMNK&$L*KN4Rf4CMIOi2aUg~Kk$^p$OR22TMdyHXz&S8;fWHqODxJcgz=EVx-5Cpv) i%M0F$`F{mj(C6_5(AO9%L<@@hAs}sV!8!1w4txPc dict: + async def parse_marketing_analysis( + self, raw_response: str, facility_info: str | None = None + ) -> dict: """ChatGPT 마케팅 분석 응답을 파싱하고 요약하여 딕셔너리로 반환 + Args: + raw_response: ChatGPT 마케팅 분석 응답 원문 + facility_info: 크롤링에서 가져온 편의시설 정보 문자열 + Returns: dict: {"report": str, "tags": list[str], "facilities": list[str]} """ @@ -377,7 +374,7 @@ class ChatgptService: try: json_data = json.loads(json_match.group(1)) tags = json_data.get("tags", []) - facilities = json_data.get("facilities", []) + print(f"[parse_marketing_analysis] GPT 응답에서 tags 파싱 완료: {tags}") # JSON 블록을 제외한 리포트 부분 추출 report = raw_response[: json_match.start()].strip() # --- 구분자 제거 @@ -386,10 +383,22 @@ class ChatgptService: if report.endswith("---"): report = report[:-3].strip() except json.JSONDecodeError: + print("[parse_marketing_analysis] JSON 파싱 실패") pass + # 크롤링에서 가져온 facility_info로 facilities 설정 + print(f"[parse_marketing_analysis] 크롤링 facility_info 원본: {facility_info}") + if facility_info: + # 쉼표로 구분된 편의시설 문자열을 리스트로 변환 + facilities = [f.strip() for f in facility_info.split(",") if f.strip()] + print(f"[parse_marketing_analysis] facility_info 파싱 결과: {facilities}") + else: + facilities = ["등록된 정보 없음"] + print("[parse_marketing_analysis] facility_info 없음 - '등록된 정보 없음' 설정") + # 리포트 내용을 500자로 요약 if report: report = await self.summarize_marketing(report) + print(f"[parse_marketing_analysis] 최종 facilities: {facilities}") return {"report": report, "tags": tags, "facilities": facilities} diff --git a/app/utils/nvMapPwScraper.py b/app/utils/nvMapPwScraper.py new file mode 100644 index 0000000..d724764 --- /dev/null +++ b/app/utils/nvMapPwScraper.py @@ -0,0 +1,113 @@ +import asyncio +from playwright.async_api import async_playwright +from urllib import parse + +class nvMapPwScraper(): + # cls vars + is_ready = False + _playwright = None + _browser = None + _context = None + _win_width = 1280 + _win_height = 720 + _max_retry = 30 # place id timeout threshold seconds + + # instance var + page = None + + @classmethod + def default_context_builder(cls): + context_builder_dict = {} + context_builder_dict['viewport'] = { + 'width' : cls._win_width, + 'height' : cls._win_height + } + context_builder_dict['screen'] = { + 'width' : cls._win_width, + 'height' : cls._win_height + } + context_builder_dict['user_agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" + context_builder_dict['locale'] = 'ko-KR' + context_builder_dict['timezone_id']='Asia/Seoul' + + return context_builder_dict + + @classmethod + async def initiate_scraper(cls): + if not cls._playwright: + cls._playwright = await async_playwright().start() + if not cls._browser: + cls._browser = await cls._playwright.chromium.launch(headless=True) + if not cls._context: + cls._context = await cls._browser.new_context(**cls.default_context_builder()) + cls.is_ready = True + + def __init__(self): + if not self.is_ready: + raise Exception("nvMapScraper is not initiated") + + async def __aenter__(self): + await self.create_page() + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.page.close() + + async def create_page(self): + self.page = await self._context.new_page() + await self.page.add_init_script( +'''const defaultGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" +).get; +defaultGetter.apply(navigator); +defaultGetter.toString(); +Object.defineProperty(Navigator.prototype, "webdriver", { + set: undefined, + enumerable: true, + configurable: true, + get: new Proxy(defaultGetter, { + apply: (target, thisArg, args) => { + Reflect.apply(target, thisArg, args); + return false; + }, + }), +}); +const patchedGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" +).get; +patchedGetter.apply(navigator); +patchedGetter.toString();''') + + await self.page.set_extra_http_headers({ + 'sec-ch-ua': '\"Not?A_Brand\";v=\"99\", \"Chromium\";v=\"130\"' + }) + await self.page.goto("http://google.com") + + async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000): + page = self.page + await page.goto(url, wait_until=wait_until, timeout=timeout) + + async def get_place_id_url(self, selected): + + title = selected['title'].replace("", "").replace("", "") + address = selected.get('roadAddress', selected['address']).replace("", "").replace("", "") + encoded_query = parse.quote(f"{address} {title}") + url = f"https://map.naver.com/p/search/{encoded_query}" + + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + + url = self.page.url.replace("?","?isCorrectAnswer=true&") + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + + if (count == self._max_retry / 2): + raise Exception("Failed to identify place id. loading timeout") + else: + raise Exception("Failed to identify place id. item is ambiguous") diff --git a/app/video/.DS_Store b/app/video/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2803a5f4acad20189bdaea8da4322a0407f7e065 GIT binary patch literal 6148 zcmeHLyH3L}6uqVeD!P=7F#{U|lrIQX*%4)7D)b>*wIvZP&nYt#urMI8v$1tWUHUit z00W$BTaBHxl~@oUTe6Sidz^c6BFA-!NR-YSMWO-`4@D`Csp)C`Wpt?C z(G*_n&ODy2D?&QhQ(nS`YOIl?(GCg>niS jy&cOAZ^it-f-H!6+yVL;BZU}2aX$p44X$wx{HOz85n|L( literal 0 HcmV?d00001 diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d146433ed984bdbf6ee9824c0a8f271827a3c1db GIT binary patch literal 6148 zcmeHKu};G<5IvU&6&TtHb<7Vi^bbN6B$zuP7Sg7)h^DDp7?|)1f5K~2BSi|*ju`=(trzKs}aystz zSF)u;6c7dengVtMNN7e0O{wz#9p>5GkPXgmfK7~hIawaZgK0kCv4@N2*Yl^h<$w&sCFiK2igAPN);@ct0O86$_d!UE-!MZE!uKY?((7B%*x$Rl$jmx4^29$ z$f871Kosy5*s+goKL1bF-~at0c@hOgfq$id3gUj;!;;+Dy0SPvYeV=YoQ?CeMN@*{ hj$_l{qqqY%hOv+vz{sJth#r`H2pAb8hyuT=z$a~|q#*zR literal 0 HcmV?d00001 diff --git a/image/.DS_Store b/image/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e870d4fa32d44e97cf0ce5967fdcfbd8a4ce6dcb GIT binary patch literal 6148 zcmeHKy-ou$47Q=e36_qGSs6~^km!g)bqvgX0aPl{q2;=C&&-R#z`$$p7JUAp;>1;A zKnNjQa=s+C6W=FY5)%s8Cd8qPD$=Z z$oC9-fy{ULukM@KUANw7(x>&yKEHL>w)0gthrPPFe0zR)e7f52yZE;6X8C#lS4|_D zU?3O>27-ZL;0GAMoh?!w8%7%p1OvgqjsZCz5>zld7DGKc&}az&lxMUG`cg|sOmfVQ z#Sj(nC+5&W6zj1HnL_fkPXP<^I3M zFEd!=_d{Y73 literal 0 HcmV?d00001 diff --git a/image/2025-12-26/.DS_Store b/image/2025-12-26/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..bf8036b15815419a806e4f2d25a1034702b464f5 GIT binary patch literal 10244 zcmeI1KX21O7{=eDRTT*a#*Q93uprq^ z!Z+YkFfexP^WL5C(b%#Z*9f8ath>iP-#u1-esj@#AtH5jUIauQ5sgq;pFTlTQ}{mD zjq=F$>_Zji6D`O`oziot6d@o4gn$qb0z%+lB!J&+E~Cdrr4<4~KnVOM!0UsD%9>a$ zjq0TXjZOieyXe*h=g0%}9M5WEwKS@y!qa3A%1%`_#ZYEC?t45OnpiE3n(3s>bW--o z%4R4^KOOTtJ)BfxR9Yb*1U3n9?tXx5J))Qn|H$8$$m0WK@>AS1XETiBgR}bu<+P*_ zy3qGCN+~~i8yx5B)tP?G-j7!g>AhOd=h?|RgXie$mrvhbzyJQ?Vf~9i{m&4WI#Z(SiHc-Y)m6pN-sm_*dMA@NePH{6a8} zK{>G8WAx>BxQ{{J*D;^t4Fx!6G;ZPE%iHCCoqw*2_n!V0_aXdSxQB@B0*rahi4oCe zor$ofxbdEWa*p>CV^jv_A+8Twx%cvRxu0Ita_{M1aUa6Jm3yS4JkzTTM?SCbk^T-; z>r9Hb!*8ZQyVUIj@gKFsdk7z1xwi6lxnF*0;NHu>;y#3b2kt4@goxr42Xi+u?Z&$g zekoS<363$M-p3XCI&k0G+vR@qv5|WZ|BCw%{x#hBw^fP|5CTF#2nYcoP)}gTbbGx2 z|FZf2|N6<&D+GkVe?dU)&X4Cu_$YR7ed9y%Ub~0-29*nsD~;+YX!LeGRBy-Qg16(J zq4HRtl?%=jtEEx&p#1ee19%U!VQ<0Bhla?{SL-St8p6LOA9D8@d*$dI@=(eDIiUA8 Q_IH{4);V+s?)Ub-1%kTjUH||9 literal 0 HcmV?d00001 diff --git a/poc/.DS_Store b/poc/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..098ffaf34cbae05b39806147847404c8ff36efcf GIT binary patch literal 6148 zcmeHKu};H44D~^Ws4#RwjQNE24?>lGK_@0?+NdBkq?A@nNSP5Iz&G#@NEHh^8-K&I zeQFvt6_f!1vZeF8^VxU4C*_aTlCEex z8F%^{-exccjDg?A0Drpy4QWOLx}~-6cbI2WO}6(?=ac1eoJ{hB=YqTT`|$qtq~34z zCBNxKr(fN스테이,머뭄', + 'link': 'https://www.instagram.com/staymeomoom', + 'category': '숙박>펜션', + 'description': '', + 'telephone': '', + 'address': '전북특별자치도 군산시 신흥동 63-18', + 'roadAddress': '전북특별자치도 군산시 절골길 18', + 'mapx': '1267061254', + 'mapy': '359864175', + 'lng': 126.7061254, + 'lat': 35.9864175} + + async with nvMapPwScraper() as pw_scraper: + new_url = await pw_scraper.get_place_id_url(selected) + + print(new_url) + nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우 + await nv_scraper.scrap() + print(nv_scraper.rawdata) + return + +print("running main_funtion..") +asyncio.run(main_function()) \ No newline at end of file diff --git a/poc/crawling/2026-01-12/nvMapPwScraper.py b/poc/crawling/2026-01-12/nvMapPwScraper.py new file mode 100644 index 0000000..d724764 --- /dev/null +++ b/poc/crawling/2026-01-12/nvMapPwScraper.py @@ -0,0 +1,113 @@ +import asyncio +from playwright.async_api import async_playwright +from urllib import parse + +class nvMapPwScraper(): + # cls vars + is_ready = False + _playwright = None + _browser = None + _context = None + _win_width = 1280 + _win_height = 720 + _max_retry = 30 # place id timeout threshold seconds + + # instance var + page = None + + @classmethod + def default_context_builder(cls): + context_builder_dict = {} + context_builder_dict['viewport'] = { + 'width' : cls._win_width, + 'height' : cls._win_height + } + context_builder_dict['screen'] = { + 'width' : cls._win_width, + 'height' : cls._win_height + } + context_builder_dict['user_agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" + context_builder_dict['locale'] = 'ko-KR' + context_builder_dict['timezone_id']='Asia/Seoul' + + return context_builder_dict + + @classmethod + async def initiate_scraper(cls): + if not cls._playwright: + cls._playwright = await async_playwright().start() + if not cls._browser: + cls._browser = await cls._playwright.chromium.launch(headless=True) + if not cls._context: + cls._context = await cls._browser.new_context(**cls.default_context_builder()) + cls.is_ready = True + + def __init__(self): + if not self.is_ready: + raise Exception("nvMapScraper is not initiated") + + async def __aenter__(self): + await self.create_page() + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.page.close() + + async def create_page(self): + self.page = await self._context.new_page() + await self.page.add_init_script( +'''const defaultGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" +).get; +defaultGetter.apply(navigator); +defaultGetter.toString(); +Object.defineProperty(Navigator.prototype, "webdriver", { + set: undefined, + enumerable: true, + configurable: true, + get: new Proxy(defaultGetter, { + apply: (target, thisArg, args) => { + Reflect.apply(target, thisArg, args); + return false; + }, + }), +}); +const patchedGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" +).get; +patchedGetter.apply(navigator); +patchedGetter.toString();''') + + await self.page.set_extra_http_headers({ + 'sec-ch-ua': '\"Not?A_Brand\";v=\"99\", \"Chromium\";v=\"130\"' + }) + await self.page.goto("http://google.com") + + async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000): + page = self.page + await page.goto(url, wait_until=wait_until, timeout=timeout) + + async def get_place_id_url(self, selected): + + title = selected['title'].replace("", "").replace("", "") + address = selected.get('roadAddress', selected['address']).replace("", "").replace("", "") + encoded_query = parse.quote(f"{address} {title}") + url = f"https://map.naver.com/p/search/{encoded_query}" + + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + + url = self.page.url.replace("?","?isCorrectAnswer=true&") + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + + if (count == self._max_retry / 2): + raise Exception("Failed to identify place id. loading timeout") + else: + raise Exception("Failed to identify place id. item is ambiguous") diff --git a/poc/crawling/nvMapScraper-2026-01-12.py b/poc/crawling/2026-01-12/nvMapScraper.py similarity index 96% rename from poc/crawling/nvMapScraper-2026-01-12.py rename to poc/crawling/2026-01-12/nvMapScraper.py index 7d155e4..38bc1cd 100644 --- a/poc/crawling/nvMapScraper-2026-01-12.py +++ b/poc/crawling/2026-01-12/nvMapScraper.py @@ -112,8 +112,8 @@ class nvMapScraper(): facilities = c_elem.parent.parent.find('div').string return facilities -url = "https://naver.me/IgJGCCic" -scraper = nvMapScraper(url) -asyncio.run(scraper.scrap()) -print(scraper.image_link_list) -print(len(scraper.image_link_list)) \ No newline at end of file +# url = "https://naver.me/IgJGCCic" +# scraper = nvMapScraper(url) +# asyncio.run(scraper.scrap()) +# print(scraper.image_link_list) +# print(len(scraper.image_link_list)) \ No newline at end of file