diff --git a/poc/crawling/main.py b/poc/crawling/main.py
new file mode 100644
index 0000000..5030706
--- /dev/null
+++ b/poc/crawling/main.py
@@ -0,0 +1,29 @@
+import asyncio
+from nvMapScraper import nvMapScraper
+from nvMapPwScraper import nvMapPwScraper
+
+async def main_function():
+ await nvMapPwScraper.initiate_scraper()
+ selected = {'title': '스테이,머뭄',
+ 'link': 'https://www.instagram.com/staymeomoom',
+ 'category': '숙박>펜션',
+ 'description': '',
+ 'telephone': '',
+ 'address': '전북특별자치도 군산시 신흥동 63-18',
+ 'roadAddress': '전북특별자치도 군산시 절골길 18',
+ 'mapx': '1267061254',
+ 'mapy': '359864175',
+ 'lng': 126.7061254,
+ 'lat': 35.9864175}
+
+ async with nvMapPwScraper() as pw_scraper:
+ new_url = await pw_scraper.get_place_id_url(selected)
+
+ print(new_url)
+ nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우
+ await nv_scraper.scrap()
+ print(nv_scraper.rawdata)
+ return
+
+print("running main_funtion..")
+asyncio.run(main_function())
\ No newline at end of file
diff --git a/poc/crawling/nvMapPwScraper.py b/poc/crawling/nvMapPwScraper.py
index b9d4662..d724764 100644
--- a/poc/crawling/nvMapPwScraper.py
+++ b/poc/crawling/nvMapPwScraper.py
@@ -10,6 +10,10 @@ class nvMapPwScraper():
_context = None
_win_width = 1280
_win_height = 720
+ _max_retry = 30 # place id timeout threshold seconds
+
+ # instance var
+ page = None
@classmethod
def default_context_builder(cls):
@@ -41,11 +45,15 @@ class nvMapPwScraper():
def __init__(self):
if not self.is_ready:
raise Exception("nvMapScraper is not initiated")
-
+
+ async def __aenter__(self):
+ await self.create_page()
+ return self
+
+ async def __aexit__(self, exc_type, exc, tb):
+ await self.page.close()
async def create_page(self):
- while(not self.is_ready):
- asyncio.sleep(1000)
self.page = await self._context.new_page()
await self.page.add_init_script(
'''const defaultGetter = Object.getOwnPropertyDescriptor(
@@ -77,9 +85,9 @@ patchedGetter.toString();''')
})
await self.page.goto("http://google.com")
- async def goto_url(self, url):
+ async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
page = self.page
- await page.goto(url, wait_until="domcontentloaded", timeout=20000)
+ await page.goto(url, wait_until=wait_until, timeout=timeout)
async def get_place_id_url(self, selected):
@@ -88,13 +96,18 @@ patchedGetter.toString();''')
encoded_query = parse.quote(f"{address} {title}")
url = f"https://map.naver.com/p/search/{encoded_query}"
- await self.goto_url(url)
+ await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
- count = 0
- while(count < 5):
- if "isCorrectAnswer=true" in self.page.url:
- return self.page.url
- await asyncio.sleep(1)
- count += 1
+ if "/place/" in self.page.url:
+ return self.page.url
- raise Exception("Failed to identify place id. item is ambiguous")
\ No newline at end of file
+ url = self.page.url.replace("?","?isCorrectAnswer=true&")
+ await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
+
+ if "/place/" in self.page.url:
+ return self.page.url
+
+ if (count == self._max_retry / 2):
+ raise Exception("Failed to identify place id. loading timeout")
+ else:
+ raise Exception("Failed to identify place id. item is ambiguous")
diff --git a/poc/crawling/nvMapScraper.py b/poc/crawling/nvMapScraper.py
index 7d155e4..38bc1cd 100644
--- a/poc/crawling/nvMapScraper.py
+++ b/poc/crawling/nvMapScraper.py
@@ -112,8 +112,8 @@ class nvMapScraper():
facilities = c_elem.parent.parent.find('div').string
return facilities
-url = "https://naver.me/IgJGCCic"
-scraper = nvMapScraper(url)
-asyncio.run(scraper.scrap())
-print(scraper.image_link_list)
-print(len(scraper.image_link_list))
\ No newline at end of file
+# url = "https://naver.me/IgJGCCic"
+# scraper = nvMapScraper(url)
+# asyncio.run(scraper.scrap())
+# print(scraper.image_link_list)
+# print(len(scraper.image_link_list))
\ No newline at end of file