해시태그 #출력 제거(프롬프트), 자동완성 크롤링 내부 에러 발생 시 500출력, 재시도 로직 추가, 타임아웃 시간 30초로 증가
parent
96597dd555
commit
c568f949c7
|
|
@ -375,7 +375,7 @@ async def _autocomplete_logic(autocomplete_item:dict):
|
||||||
)
|
)
|
||||||
logger.exception("[crawling] Autocomplete 상세 오류:")
|
logger.exception("[crawling] Autocomplete 상세 오류:")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="자동완성 place id 추출 실패",
|
detail="자동완성 place id 추출 실패",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ class NvMapPwScraper():
|
||||||
_context = None
|
_context = None
|
||||||
_win_width = 1280
|
_win_width = 1280
|
||||||
_win_height = 720
|
_win_height = 720
|
||||||
_max_retry = 30 # place id timeout threshold seconds
|
_max_retry = 60 # place id timeout threshold seconds
|
||||||
|
|
||||||
# instance var
|
# instance var
|
||||||
page = None
|
page = None
|
||||||
|
|
@ -90,22 +90,28 @@ patchedGetter.toString();''')
|
||||||
await page.goto(url, wait_until=wait_until, timeout=timeout)
|
await page.goto(url, wait_until=wait_until, timeout=timeout)
|
||||||
|
|
||||||
async def get_place_id_url(self, selected):
|
async def get_place_id_url(self, selected):
|
||||||
|
count = 0
|
||||||
|
while (count <= 1):
|
||||||
|
title = selected['title'].replace("<b>", "").replace("</b>", "")
|
||||||
|
address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "")
|
||||||
|
encoded_query = parse.quote(f"{address} {title}")
|
||||||
|
url = f"https://map.naver.com/p/search/{encoded_query}"
|
||||||
|
|
||||||
title = selected['title'].replace("<b>", "").replace("</b>", "")
|
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
||||||
address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "")
|
|
||||||
encoded_query = parse.quote(f"{address} {title}")
|
|
||||||
url = f"https://map.naver.com/p/search/{encoded_query}"
|
|
||||||
|
|
||||||
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
if "/place/" in self.page.url:
|
||||||
|
return self.page.url
|
||||||
|
|
||||||
if "/place/" in self.page.url:
|
url = self.page.url.replace("?","?isCorrectAnswer=true&")
|
||||||
return self.page.url
|
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
||||||
|
|
||||||
url = self.page.url.replace("?","?isCorrectAnswer=true&")
|
if "/place/" in self.page.url:
|
||||||
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
|
return self.page.url
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
print("Not found url for {selected}")
|
||||||
|
return None # 404
|
||||||
|
|
||||||
if "/place/" in self.page.url:
|
|
||||||
return self.page.url
|
|
||||||
|
|
||||||
# if (count == self._max_retry / 2):
|
# if (count == self._max_retry / 2):
|
||||||
# raise Exception("Failed to identify place id. loading timeout")
|
# raise Exception("Failed to identify place id. loading timeout")
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ Generate 5-8 selling points:
|
||||||
* **`english_category`**: Strictly use one keyword from the English allowed list provided in the Output Rules.
|
* **`english_category`**: Strictly use one keyword from the English allowed list provided in the Output Rules.
|
||||||
* **`korean category`**: Strictly use one keyword from the Korean allowed list provided in the Output Rules . It must be matched with english category.
|
* **`korean category`**: Strictly use one keyword from the Korean allowed list provided in the Output Rules . It must be matched with english category.
|
||||||
* **`description`**: A short, punchy marketing phrase in Korean (15~30 characters).
|
* **`description`**: A short, punchy marketing phrase in Korean (15~30 characters).
|
||||||
* **`score`**: An integer (70-99) representing the strength of this feature based on the brand's potential.
|
* **`score`**: An integer (0-100) representing the strength of this feature based on the brand's potential.
|
||||||
|
|
||||||
### 5. target_keywords
|
### 5. target_keywords
|
||||||
* **`target_keywords`**: Provide a list of 10 highly relevant marketing keywords or hashtags for search engine optimization and social media targeting.
|
* **`target_keywords`**: Provide a list of 10 highly relevant marketing keywords or hashtags for search engine optimization and social media targeting. Do not insert # in front of hashtag.
|
||||||
Loading…
Reference in New Issue