해시태그 #출력 제거(프롬프트), 자동완성 크롤링 내부 에러 발생 시 500출력, 재시도 로직 추가, 타임아웃 시간 30초로 증가

get_video
dhlim 2026-02-03 06:45:16 +00:00
parent 96597dd555
commit c568f949c7
3 changed files with 25 additions and 19 deletions

View File

@ -375,7 +375,7 @@ async def _autocomplete_logic(autocomplete_item:dict):
)
logger.exception("[crawling] Autocomplete 상세 오류:")
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="자동완성 place id 추출 실패",
)

View File

@ -10,7 +10,7 @@ class NvMapPwScraper():
_context = None
_win_width = 1280
_win_height = 720
_max_retry = 30 # place id timeout threshold seconds
_max_retry = 60 # place id timeout threshold seconds
# instance var
page = None
@ -90,22 +90,28 @@ patchedGetter.toString();''')
await page.goto(url, wait_until=wait_until, timeout=timeout)
async def get_place_id_url(self, selected):
count = 0
while (count <= 1):
title = selected['title'].replace("<b>", "").replace("</b>", "")
address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "")
encoded_query = parse.quote(f"{address} {title}")
url = f"https://map.naver.com/p/search/{encoded_query}"
title = selected['title'].replace("<b>", "").replace("</b>", "")
address = selected.get('roadAddress', selected['address']).replace("<b>", "").replace("</b>", "")
encoded_query = parse.quote(f"{address} {title}")
url = f"https://map.naver.com/p/search/{encoded_query}"
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
if "/place/" in self.page.url:
return self.page.url
if "/place/" in self.page.url:
return self.page.url
url = self.page.url.replace("?","?isCorrectAnswer=true&")
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
url = self.page.url.replace("?","?isCorrectAnswer=true&")
await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
if "/place/" in self.page.url:
return self.page.url
count += 1
print("Not found url for {selected}")
return None # 404
if "/place/" in self.page.url:
return self.page.url
# if (count == self._max_retry / 2):
# raise Exception("Failed to identify place id. loading timeout")

View File

@ -36,7 +36,7 @@ Generate 5-8 selling points:
* **`english_category`**: Strictly use one keyword from the English allowed list provided in the Output Rules.
* **`korean category`**: Strictly use one keyword from the Korean allowed list provided in the Output Rules . It must be matched with english category.
* **`description`**: A short, punchy marketing phrase in Korean (15~30 characters).
* **`score`**: An integer (70-99) representing the strength of this feature based on the brand's potential.
* **`score`**: An integer (0-100) representing the strength of this feature based on the brand's potential.
### 5. target_keywords
* **`target_keywords`**: Provide a list of 10 highly relevant marketing keywords or hashtags for search engine optimization and social media targeting.
* **`target_keywords`**: Provide a list of 10 highly relevant marketing keywords or hashtags for search engine optimization and social media targeting. Do not insert # in front of hashtag.