From 2e1ccebe4379d05874d1056fbe9c3e887f68742a Mon Sep 17 00:00:00 2001 From: jaehwang Date: Mon, 12 Jan 2026 14:55:48 +0900 Subject: [PATCH] =?UTF-8?q?=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=BC=80?= =?UTF-8?q?=EC=9D=B4=EC=8A=A4=20=EC=B6=94=EA=B0=80=20=EB=B0=8F=201?= =?UTF-8?q?=EC=B0=A8=20=EC=8B=9C=EB=8F=84=20=EC=8B=A4=ED=8C=A8=EC=8B=9C=20?= =?UTF-8?q?2=EC=B0=A8=20=EC=8B=9C=EB=8F=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- poc/crawling/main.py | 29 +++++++++++++++++++++++++ poc/crawling/nvMapPwScraper.py | 39 ++++++++++++++++++++++------------ poc/crawling/nvMapScraper.py | 10 ++++----- 3 files changed, 60 insertions(+), 18 deletions(-) create mode 100644 poc/crawling/main.py diff --git a/poc/crawling/main.py b/poc/crawling/main.py new file mode 100644 index 0000000..5030706 --- /dev/null +++ b/poc/crawling/main.py @@ -0,0 +1,29 @@ +import asyncio +from nvMapScraper import nvMapScraper +from nvMapPwScraper import nvMapPwScraper + +async def main_function(): + await nvMapPwScraper.initiate_scraper() + selected = {'title': '스테이,머뭄', + 'link': 'https://www.instagram.com/staymeomoom', + 'category': '숙박>펜션', + 'description': '', + 'telephone': '', + 'address': '전북특별자치도 군산시 신흥동 63-18', + 'roadAddress': '전북특별자치도 군산시 절골길 18', + 'mapx': '1267061254', + 'mapy': '359864175', + 'lng': 126.7061254, + 'lat': 35.9864175} + + async with nvMapPwScraper() as pw_scraper: + new_url = await pw_scraper.get_place_id_url(selected) + + print(new_url) + nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우 + await nv_scraper.scrap() + print(nv_scraper.rawdata) + return + +print("running main_funtion..") +asyncio.run(main_function()) \ No newline at end of file diff --git a/poc/crawling/nvMapPwScraper.py b/poc/crawling/nvMapPwScraper.py index b9d4662..d724764 100644 --- a/poc/crawling/nvMapPwScraper.py +++ b/poc/crawling/nvMapPwScraper.py @@ -10,6 +10,10 @@ class nvMapPwScraper(): _context = None _win_width = 1280 _win_height = 720 + _max_retry = 30 # place id timeout threshold seconds + + # instance var + page = None @classmethod def default_context_builder(cls): @@ -41,11 +45,15 @@ class nvMapPwScraper(): def __init__(self): if not self.is_ready: raise Exception("nvMapScraper is not initiated") - + + async def __aenter__(self): + await self.create_page() + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.page.close() async def create_page(self): - while(not self.is_ready): - asyncio.sleep(1000) self.page = await self._context.new_page() await self.page.add_init_script( '''const defaultGetter = Object.getOwnPropertyDescriptor( @@ -77,9 +85,9 @@ patchedGetter.toString();''') }) await self.page.goto("http://google.com") - async def goto_url(self, url): + async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000): page = self.page - await page.goto(url, wait_until="domcontentloaded", timeout=20000) + await page.goto(url, wait_until=wait_until, timeout=timeout) async def get_place_id_url(self, selected): @@ -88,13 +96,18 @@ patchedGetter.toString();''') encoded_query = parse.quote(f"{address} {title}") url = f"https://map.naver.com/p/search/{encoded_query}" - await self.goto_url(url) + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) - count = 0 - while(count < 5): - if "isCorrectAnswer=true" in self.page.url: - return self.page.url - await asyncio.sleep(1) - count += 1 + if "/place/" in self.page.url: + return self.page.url - raise Exception("Failed to identify place id. item is ambiguous") \ No newline at end of file + url = self.page.url.replace("?","?isCorrectAnswer=true&") + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + + if (count == self._max_retry / 2): + raise Exception("Failed to identify place id. loading timeout") + else: + raise Exception("Failed to identify place id. item is ambiguous") diff --git a/poc/crawling/nvMapScraper.py b/poc/crawling/nvMapScraper.py index 7d155e4..38bc1cd 100644 --- a/poc/crawling/nvMapScraper.py +++ b/poc/crawling/nvMapScraper.py @@ -112,8 +112,8 @@ class nvMapScraper(): facilities = c_elem.parent.parent.find('div').string return facilities -url = "https://naver.me/IgJGCCic" -scraper = nvMapScraper(url) -asyncio.run(scraper.scrap()) -print(scraper.image_link_list) -print(len(scraper.image_link_list)) \ No newline at end of file +# url = "https://naver.me/IgJGCCic" +# scraper = nvMapScraper(url) +# asyncio.run(scraper.scrap()) +# print(scraper.image_link_list) +# print(len(scraper.image_link_list)) \ No newline at end of file