테스트 케이스 추가 및 1차 시도 실패시 2차 시도

2026-01-12 14:55:48 +09:00 · 2026-01-12 14:55:48 +09:00 · 2e1ccebe43
parent b7edba8c80
commit 2e1ccebe43
3 changed files with 60 additions and 18 deletions
--- a/poc/crawling/main.py
+++ b/poc/crawling/main.py
@ -0,0 +1,29 @@
 import asyncio
 from nvMapScraper import nvMapScraper
 from nvMapPwScraper import nvMapPwScraper
 async def main_function():
    await nvMapPwScraper.initiate_scraper()
    selected = {'title': '<b>스테이</b>,<b>머뭄</b>',
                    'link': 'https://www.instagram.com/staymeomoom',
                    'category': '숙박>펜션',
                    'description': '',
                    'telephone': '',
                    'address': '전북특별자치도 군산시 신흥동 63-18',
                    'roadAddress': '전북특별자치도 군산시 절골길 18',
                    'mapx': '1267061254',
                    'mapy': '359864175',
                    'lng': 126.7061254,
                    'lat': 35.9864175}
    async with nvMapPwScraper() as pw_scraper:
        new_url = await pw_scraper.get_place_id_url(selected)
    print(new_url)
    nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우
    await nv_scraper.scrap()
    print(nv_scraper.rawdata)
    return
 print("running main_funtion..")
 asyncio.run(main_function())
--- a/poc/crawling/nvMapPwScraper.py
+++ b/poc/crawling/nvMapPwScraper.py
@ -10,6 +10,10 @@ class nvMapPwScraper():
    _context = None
    _win_width = 1280
    _win_height = 720
    _max_retry = 30 # place id timeout threshold seconds
    # instance var
    page = None
    @classmethod
    def default_context_builder(cls):
@ -42,10 +46,14 @@ class nvMapPwScraper():
        if not self.is_ready:
            raise Exception("nvMapScraper is not initiated")
    async def __aenter__(self):
        await self.create_page()
        return self
    async def __aexit__(self, exc_type, exc, tb):
        await self.page.close()
    async def create_page(self):
        while(not self.is_ready):
            asyncio.sleep(1000)
        self.page = await self._context.new_page()
        await self.page.add_init_script(
 '''const defaultGetter = Object.getOwnPropertyDescriptor(
@ -77,9 +85,9 @@ patchedGetter.toString();''')
        })
        await self.page.goto("http://google.com")
-    async def goto_url(self, url):
+    async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
        page = self.page
-        await page.goto(url, wait_until="domcontentloaded", timeout=20000)
+        await page.goto(url, wait_until=wait_until, timeout=timeout)
    async def get_place_id_url(self, selected):
@ -88,13 +96,18 @@ patchedGetter.toString();''')
        encoded_query = parse.quote(f"{address} {title}")
        url = f"https://map.naver.com/p/search/{encoded_query}"
-        await self.goto_url(url)
+        await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
-        count = 0
+        if "/place/" in self.page.url:
        while(count < 5):
            if "isCorrectAnswer=true" in self.page.url:
            return self.page.url
            await asyncio.sleep(1)
            count += 1
        url = self.page.url.replace("?","?isCorrectAnswer=true&")
        await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
        if "/place/" in self.page.url:
            return self.page.url
        if (count == self._max_retry / 2):
            raise Exception("Failed to identify place id. loading timeout")
        else:
            raise Exception("Failed to identify place id. item is ambiguous")
--- a/poc/crawling/nvMapScraper.py
+++ b/poc/crawling/nvMapScraper.py
@ -112,8 +112,8 @@ class nvMapScraper():
                facilities = c_elem.parent.parent.find('div').string
        return facilities
-url = "https://naver.me/IgJGCCic"
+# url = "https://naver.me/IgJGCCic"
-scraper = nvMapScraper(url)
+# scraper = nvMapScraper(url)
-asyncio.run(scraper.scrap())
+# asyncio.run(scraper.scrap())
-print(scraper.image_link_list)
+# print(scraper.image_link_list)
-print(len(scraper.image_link_list))
+# print(len(scraper.image_link_list))