테스트 케이스 추가 및 1차 시도 실패시 2차 시도

insta
jaehwang 2026-01-12 14:55:48 +09:00
parent b7edba8c80
commit 2e1ccebe43
3 changed files with 60 additions and 18 deletions

29
poc/crawling/main.py Normal file
View File

@ -0,0 +1,29 @@
import asyncio
from nvMapScraper import nvMapScraper
from nvMapPwScraper import nvMapPwScraper
async def main_function():
await nvMapPwScraper.initiate_scraper()
selected = {'title': '<b>스테이</b>,<b>머뭄</b>',
'link': 'https://www.instagram.com/staymeomoom',
'category': '숙박>펜션',
'description': '',
'telephone': '',
'address': '전북특별자치도 군산시 신흥동 63-18',
'roadAddress': '전북특별자치도 군산시 절골길 18',
'mapx': '1267061254',
'mapy': '359864175',
'lng': 126.7061254,
'lat': 35.9864175}
async with nvMapPwScraper() as pw_scraper:
new_url = await pw_scraper.get_place_id_url(selected)
print(new_url)
nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우
await nv_scraper.scrap()
print(nv_scraper.rawdata)
return
print("running main_funtion..")
asyncio.run(main_function())

View File

@ -10,6 +10,10 @@ class nvMapPwScraper():
_context = None _context = None
_win_width = 1280 _win_width = 1280
_win_height = 720 _win_height = 720
_max_retry = 30 # place id timeout threshold seconds
# instance var
page = None
@classmethod @classmethod
def default_context_builder(cls): def default_context_builder(cls):
@ -42,10 +46,14 @@ class nvMapPwScraper():
if not self.is_ready: if not self.is_ready:
raise Exception("nvMapScraper is not initiated") raise Exception("nvMapScraper is not initiated")
async def __aenter__(self):
await self.create_page()
return self
async def __aexit__(self, exc_type, exc, tb):
await self.page.close()
async def create_page(self): async def create_page(self):
while(not self.is_ready):
asyncio.sleep(1000)
self.page = await self._context.new_page() self.page = await self._context.new_page()
await self.page.add_init_script( await self.page.add_init_script(
'''const defaultGetter = Object.getOwnPropertyDescriptor( '''const defaultGetter = Object.getOwnPropertyDescriptor(
@ -77,9 +85,9 @@ patchedGetter.toString();''')
}) })
await self.page.goto("http://google.com") await self.page.goto("http://google.com")
async def goto_url(self, url): async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
page = self.page page = self.page
await page.goto(url, wait_until="domcontentloaded", timeout=20000) await page.goto(url, wait_until=wait_until, timeout=timeout)
async def get_place_id_url(self, selected): async def get_place_id_url(self, selected):
@ -88,13 +96,18 @@ patchedGetter.toString();''')
encoded_query = parse.quote(f"{address} {title}") encoded_query = parse.quote(f"{address} {title}")
url = f"https://map.naver.com/p/search/{encoded_query}" url = f"https://map.naver.com/p/search/{encoded_query}"
await self.goto_url(url) await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
count = 0 if "/place/" in self.page.url:
while(count < 5):
if "isCorrectAnswer=true" in self.page.url:
return self.page.url return self.page.url
await asyncio.sleep(1)
count += 1
url = self.page.url.replace("?","?isCorrectAnswer=true&")
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
if "/place/" in self.page.url:
return self.page.url
if (count == self._max_retry / 2):
raise Exception("Failed to identify place id. loading timeout")
else:
raise Exception("Failed to identify place id. item is ambiguous") raise Exception("Failed to identify place id. item is ambiguous")

View File

@ -112,8 +112,8 @@ class nvMapScraper():
facilities = c_elem.parent.parent.find('div').string facilities = c_elem.parent.parent.find('div').string
return facilities return facilities
url = "https://naver.me/IgJGCCic" # url = "https://naver.me/IgJGCCic"
scraper = nvMapScraper(url) # scraper = nvMapScraper(url)
asyncio.run(scraper.scrap()) # asyncio.run(scraper.scrap())
print(scraper.image_link_list) # print(scraper.image_link_list)
print(len(scraper.image_link_list)) # print(len(scraper.image_link_list))