테스트 케이스 추가 및 1차 시도 실패시 2차 시도
parent
b7edba8c80
commit
2e1ccebe43
|
|
@ -0,0 +1,29 @@
|
|||
import asyncio
|
||||
from nvMapScraper import nvMapScraper
|
||||
from nvMapPwScraper import nvMapPwScraper
|
||||
|
||||
async def main_function():
|
||||
await nvMapPwScraper.initiate_scraper()
|
||||
selected = {'title': '<b>스테이</b>,<b>머뭄</b>',
|
||||
'link': 'https://www.instagram.com/staymeomoom',
|
||||
'category': '숙박>펜션',
|
||||
'description': '',
|
||||
'telephone': '',
|
||||
'address': '전북특별자치도 군산시 신흥동 63-18',
|
||||
'roadAddress': '전북특별자치도 군산시 절골길 18',
|
||||
'mapx': '1267061254',
|
||||
'mapy': '359864175',
|
||||
'lng': 126.7061254,
|
||||
'lat': 35.9864175}
|
||||
|
||||
async with nvMapPwScraper() as pw_scraper:
|
||||
new_url = await pw_scraper.get_place_id_url(selected)
|
||||
|
||||
print(new_url)
|
||||
nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우
|
||||
await nv_scraper.scrap()
|
||||
print(nv_scraper.rawdata)
|
||||
return
|
||||
|
||||
print("running main_funtion..")
|
||||
asyncio.run(main_function())
|
||||
|
|
@ -10,6 +10,10 @@ class nvMapPwScraper():
|
|||
_context = None
|
||||
_win_width = 1280
|
||||
_win_height = 720
|
||||
_max_retry = 30 # place id timeout threshold seconds
|
||||
|
||||
# instance var
|
||||
page = None
|
||||
|
||||
@classmethod
|
||||
def default_context_builder(cls):
|
||||
|
|
@ -42,10 +46,14 @@ class nvMapPwScraper():
|
|||
if not self.is_ready:
|
||||
raise Exception("nvMapScraper is not initiated")
|
||||
|
||||
async def __aenter__(self):
|
||||
await self.create_page()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
await self.page.close()
|
||||
|
||||
async def create_page(self):
|
||||
while(not self.is_ready):
|
||||
asyncio.sleep(1000)
|
||||
self.page = await self._context.new_page()
|
||||
await self.page.add_init_script(
|
||||
'''const defaultGetter = Object.getOwnPropertyDescriptor(
|
||||
|
|
@ -77,9 +85,9 @@ patchedGetter.toString();''')
|
|||
})
|
||||
await self.page.goto("http://google.com")
|
||||
|
||||
async def goto_url(self, url):
|
||||
async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
|
||||
page = self.page
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
||||
await page.goto(url, wait_until=wait_until, timeout=timeout)
|
||||
|
||||
async def get_place_id_url(self, selected):
|
||||
|
||||
|
|
@ -88,13 +96,18 @@ patchedGetter.toString();''')
|
|||
encoded_query = parse.quote(f"{address} {title}")
|
||||
url = f"https://map.naver.com/p/search/{encoded_query}"
|
||||
|
||||
await self.goto_url(url)
|
||||
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
||||
|
||||
count = 0
|
||||
while(count < 5):
|
||||
if "isCorrectAnswer=true" in self.page.url:
|
||||
if "/place/" in self.page.url:
|
||||
return self.page.url
|
||||
await asyncio.sleep(1)
|
||||
count += 1
|
||||
|
||||
url = self.page.url.replace("?","?isCorrectAnswer=true&")
|
||||
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
||||
|
||||
if "/place/" in self.page.url:
|
||||
return self.page.url
|
||||
|
||||
if (count == self._max_retry / 2):
|
||||
raise Exception("Failed to identify place id. loading timeout")
|
||||
else:
|
||||
raise Exception("Failed to identify place id. item is ambiguous")
|
||||
|
|
@ -112,8 +112,8 @@ class nvMapScraper():
|
|||
facilities = c_elem.parent.parent.find('div').string
|
||||
return facilities
|
||||
|
||||
url = "https://naver.me/IgJGCCic"
|
||||
scraper = nvMapScraper(url)
|
||||
asyncio.run(scraper.scrap())
|
||||
print(scraper.image_link_list)
|
||||
print(len(scraper.image_link_list))
|
||||
# url = "https://naver.me/IgJGCCic"
|
||||
# scraper = nvMapScraper(url)
|
||||
# asyncio.run(scraper.scrap())
|
||||
# print(scraper.image_link_list)
|
||||
# print(len(scraper.image_link_list))
|
||||
Loading…
Reference in New Issue