크롤링 디버그 로그 추가

get_video
dhlim 2026-02-04 01:05:59 +00:00
parent 89ea0c783e
commit f24ff46b09
1 changed files with 22 additions and 1 deletions

View File

@ -1,6 +1,11 @@
import asyncio import asyncio
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
from urllib import parse from urllib import parse
import time
from app.utils.logger import get_logger
# 로거 설정
logger = get_logger("pwscraper")
class NvMapPwScraper(): class NvMapPwScraper():
# cls vars # cls vars
@ -91,25 +96,41 @@ patchedGetter.toString();''')
async def get_place_id_url(self, selected): async def get_place_id_url(self, selected):
count = 0 count = 0
get_place_id_url_start = time.perf_counter()
while (count <= 1): while (count <= 1):
title = selected['title'].replace("<b>", "").replace("</b>", "") title = selected['title'].replace("<b>", "").replace("</b>", "")
address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "") address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "")
encoded_query = parse.quote(f"{address} {title}") encoded_query = parse.quote(f"{address} {title}")
url = f"https://map.naver.com/p/search/{encoded_query}" url = f"https://map.naver.com/p/search/{encoded_query}"
wait_first_start = time.perf_counter()
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
wait_first_time = (time.perf_counter() - wait_first_start) * 1000
logger.debug(f"[DEBUG] Try {count+1} : Wait for perfect matching : {wait_first_time}ms")
if "/place/" in self.page.url: if "/place/" in self.page.url:
return self.page.url return self.page.url
logger.debug(f"[DEBUG] Try {count+1} : url place id not found, retry for forced collect answer")
wait_forced_correct_start = time.perf_counter()
url = self.page.url.replace("?","?isCorrectAnswer=true&") url = self.page.url.replace("?","?isCorrectAnswer=true&")
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
wait_forced_correct_time = (time.perf_counter() - wait_forced_correct_start) * 1000
logger.debug(f"[DEBUG] Try {count+1} : Wait for forced isCorrectAnswer flag : {wait_forced_correct_time}ms")
if "/place/" in self.page.url: if "/place/" in self.page.url:
return self.page.url return self.page.url
count += 1 count += 1
print("Not found url for {selected}") logger.error("[ERROR] Not found url for {selected}")
return None # 404 return None # 404