update crawler retry and timeout setting

main
jaehwang 2026-02-11 07:09:34 +00:00
parent bc2342163f
commit 18635d7995
1 changed files with 12 additions and 6 deletions

View File

@ -15,7 +15,8 @@ class NvMapPwScraper():
_context = None _context = None
_win_width = 1280 _win_width = 1280
_win_height = 720 _win_height = 720
_max_retry = 60 # place id timeout threshold seconds _max_retry = 3
_timeout = 60 # place id timeout threshold seconds
# instance var # instance var
page = None page = None
@ -97,7 +98,7 @@ patchedGetter.toString();''')
async def get_place_id_url(self, selected): async def get_place_id_url(self, selected):
count = 0 count = 0
get_place_id_url_start = time.perf_counter() get_place_id_url_start = time.perf_counter()
while (count <= 1): while (count <= self._max_retry):
title = selected['title'].replace("<b>", "").replace("</b>", "") title = selected['title'].replace("<b>", "").replace("</b>", "")
address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "") address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "")
encoded_query = parse.quote(f"{address} {title}") encoded_query = parse.quote(f"{address} {title}")
@ -106,9 +107,12 @@ patchedGetter.toString();''')
wait_first_start = time.perf_counter() wait_first_start = time.perf_counter()
try: try:
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) await self.goto_url(url, wait_until="networkidle",timeout = self._timeout*1000)
except: except:
await self.page.reload(wait_until="networkidle", timeout = self._max_retry/2*1000) if "/place/" in self.page.url:
return self.page.url
logger.error(f"[ERROR] Can't Finish networkidle")
wait_first_time = (time.perf_counter() - wait_first_start) * 1000 wait_first_time = (time.perf_counter() - wait_first_start) * 1000
@ -123,9 +127,11 @@ patchedGetter.toString();''')
url = self.page.url.replace("?","?isCorrectAnswer=true&") url = self.page.url.replace("?","?isCorrectAnswer=true&")
try: try:
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) await self.goto_url(url, wait_until="networkidle",timeout = self._timeout*1000)
except: except:
await self.page.reload(wait_until="networkidle", timeout = self._max_retry/2*1000) if "/place/" in self.page.url:
return self.page.url
logger.error(f"[ERROR] Can't Finish networkidle")
wait_forced_correct_time = (time.perf_counter() - wait_forced_correct_start) * 1000 wait_forced_correct_time = (time.perf_counter() - wait_forced_correct_start) * 1000
logger.debug(f"[DEBUG] Try {count+1} : Wait for forced isCorrectAnswer flag : {wait_forced_correct_time}ms") logger.debug(f"[DEBUG] Try {count+1} : Wait for forced isCorrectAnswer flag : {wait_forced_correct_time}ms")