테스트 케이스 추가 및 1차 시도 실패시 2차 시도
parent
b7edba8c80
commit
2e1ccebe43
|
|
@ -0,0 +1,29 @@
|
||||||
|
import asyncio
|
||||||
|
from nvMapScraper import nvMapScraper
|
||||||
|
from nvMapPwScraper import nvMapPwScraper
|
||||||
|
|
||||||
|
async def main_function():
|
||||||
|
await nvMapPwScraper.initiate_scraper()
|
||||||
|
selected = {'title': '<b>스테이</b>,<b>머뭄</b>',
|
||||||
|
'link': 'https://www.instagram.com/staymeomoom',
|
||||||
|
'category': '숙박>펜션',
|
||||||
|
'description': '',
|
||||||
|
'telephone': '',
|
||||||
|
'address': '전북특별자치도 군산시 신흥동 63-18',
|
||||||
|
'roadAddress': '전북특별자치도 군산시 절골길 18',
|
||||||
|
'mapx': '1267061254',
|
||||||
|
'mapy': '359864175',
|
||||||
|
'lng': 126.7061254,
|
||||||
|
'lat': 35.9864175}
|
||||||
|
|
||||||
|
async with nvMapPwScraper() as pw_scraper:
|
||||||
|
new_url = await pw_scraper.get_place_id_url(selected)
|
||||||
|
|
||||||
|
print(new_url)
|
||||||
|
nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우
|
||||||
|
await nv_scraper.scrap()
|
||||||
|
print(nv_scraper.rawdata)
|
||||||
|
return
|
||||||
|
|
||||||
|
print("running main_funtion..")
|
||||||
|
asyncio.run(main_function())
|
||||||
|
|
@ -10,6 +10,10 @@ class nvMapPwScraper():
|
||||||
_context = None
|
_context = None
|
||||||
_win_width = 1280
|
_win_width = 1280
|
||||||
_win_height = 720
|
_win_height = 720
|
||||||
|
_max_retry = 30 # place id timeout threshold seconds
|
||||||
|
|
||||||
|
# instance var
|
||||||
|
page = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def default_context_builder(cls):
|
def default_context_builder(cls):
|
||||||
|
|
@ -42,10 +46,14 @@ class nvMapPwScraper():
|
||||||
if not self.is_ready:
|
if not self.is_ready:
|
||||||
raise Exception("nvMapScraper is not initiated")
|
raise Exception("nvMapScraper is not initiated")
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
await self.create_page()
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc, tb):
|
||||||
|
await self.page.close()
|
||||||
|
|
||||||
async def create_page(self):
|
async def create_page(self):
|
||||||
while(not self.is_ready):
|
|
||||||
asyncio.sleep(1000)
|
|
||||||
self.page = await self._context.new_page()
|
self.page = await self._context.new_page()
|
||||||
await self.page.add_init_script(
|
await self.page.add_init_script(
|
||||||
'''const defaultGetter = Object.getOwnPropertyDescriptor(
|
'''const defaultGetter = Object.getOwnPropertyDescriptor(
|
||||||
|
|
@ -77,9 +85,9 @@ patchedGetter.toString();''')
|
||||||
})
|
})
|
||||||
await self.page.goto("http://google.com")
|
await self.page.goto("http://google.com")
|
||||||
|
|
||||||
async def goto_url(self, url):
|
async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
|
||||||
page = self.page
|
page = self.page
|
||||||
await page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
await page.goto(url, wait_until=wait_until, timeout=timeout)
|
||||||
|
|
||||||
async def get_place_id_url(self, selected):
|
async def get_place_id_url(self, selected):
|
||||||
|
|
||||||
|
|
@ -88,13 +96,18 @@ patchedGetter.toString();''')
|
||||||
encoded_query = parse.quote(f"{address} {title}")
|
encoded_query = parse.quote(f"{address} {title}")
|
||||||
url = f"https://map.naver.com/p/search/{encoded_query}"
|
url = f"https://map.naver.com/p/search/{encoded_query}"
|
||||||
|
|
||||||
await self.goto_url(url)
|
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
||||||
|
|
||||||
count = 0
|
if "/place/" in self.page.url:
|
||||||
while(count < 5):
|
return self.page.url
|
||||||
if "isCorrectAnswer=true" in self.page.url:
|
|
||||||
return self.page.url
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
raise Exception("Failed to identify place id. item is ambiguous")
|
url = self.page.url.replace("?","?isCorrectAnswer=true&")
|
||||||
|
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
||||||
|
|
||||||
|
if "/place/" in self.page.url:
|
||||||
|
return self.page.url
|
||||||
|
|
||||||
|
if (count == self._max_retry / 2):
|
||||||
|
raise Exception("Failed to identify place id. loading timeout")
|
||||||
|
else:
|
||||||
|
raise Exception("Failed to identify place id. item is ambiguous")
|
||||||
|
|
|
||||||
|
|
@ -112,8 +112,8 @@ class nvMapScraper():
|
||||||
facilities = c_elem.parent.parent.find('div').string
|
facilities = c_elem.parent.parent.find('div').string
|
||||||
return facilities
|
return facilities
|
||||||
|
|
||||||
url = "https://naver.me/IgJGCCic"
|
# url = "https://naver.me/IgJGCCic"
|
||||||
scraper = nvMapScraper(url)
|
# scraper = nvMapScraper(url)
|
||||||
asyncio.run(scraper.scrap())
|
# asyncio.run(scraper.scrap())
|
||||||
print(scraper.image_link_list)
|
# print(scraper.image_link_list)
|
||||||
print(len(scraper.image_link_list))
|
# print(len(scraper.image_link_list))
|
||||||
Loading…
Reference in New Issue