From c568f949c78ccbd6acc0edada1225308f3c928d7 Mon Sep 17 00:00:00 2001 From: dhlim Date: Tue, 3 Feb 2026 06:45:16 +0000 Subject: [PATCH] =?UTF-8?q?=ED=95=B4=EC=8B=9C=ED=83=9C=EA=B7=B8=20#?= =?UTF-8?q?=EC=B6=9C=EB=A0=A5=20=EC=A0=9C=EA=B1=B0(=ED=94=84=EB=A1=AC?= =?UTF-8?q?=ED=94=84=ED=8A=B8),=20=EC=9E=90=EB=8F=99=EC=99=84=EC=84=B1=20?= =?UTF-8?q?=ED=81=AC=EB=A1=A4=EB=A7=81=20=EB=82=B4=EB=B6=80=20=EC=97=90?= =?UTF-8?q?=EB=9F=AC=20=EB=B0=9C=EC=83=9D=20=EC=8B=9C=20500=EC=B6=9C?= =?UTF-8?q?=EB=A0=A5,=20=EC=9E=AC=EC=8B=9C=EB=8F=84=20=EB=A1=9C=EC=A7=81?= =?UTF-8?q?=20=EC=B6=94=EA=B0=80,=20=ED=83=80=EC=9E=84=EC=95=84=EC=9B=83?= =?UTF-8?q?=20=EC=8B=9C=EA=B0=84=2030=EC=B4=88=EB=A1=9C=20=EC=A6=9D?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/home/api/routers/v1/home.py | 2 +- app/utils/nvMapPwScraper.py | 38 +++++++++++-------- .../prompts/templates/marketing_prompt.txt | 4 +- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py index 24b8b80..e0cb037 100644 --- a/app/home/api/routers/v1/home.py +++ b/app/home/api/routers/v1/home.py @@ -375,7 +375,7 @@ async def _autocomplete_logic(autocomplete_item:dict): ) logger.exception("[crawling] Autocomplete 상세 오류:") raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="자동완성 place id 추출 실패", ) diff --git a/app/utils/nvMapPwScraper.py b/app/utils/nvMapPwScraper.py index 6b3f2a6..cadc492 100644 --- a/app/utils/nvMapPwScraper.py +++ b/app/utils/nvMapPwScraper.py @@ -10,7 +10,7 @@ class NvMapPwScraper(): _context = None _win_width = 1280 _win_height = 720 - _max_retry = 30 # place id timeout threshold seconds + _max_retry = 60 # place id timeout threshold seconds # instance var page = None @@ -90,22 +90,28 @@ patchedGetter.toString();''') await page.goto(url, wait_until=wait_until, timeout=timeout) async def get_place_id_url(self, selected): + count = 0 + while (count <= 1): + title = selected['title'].replace("", "").replace("", "") + address = selected.get('roadAddress', selected['address']).replace("", "").replace("", "") + encoded_query = parse.quote(f"{address} {title}") + url = f"https://map.naver.com/p/search/{encoded_query}" + + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + + url = self.page.url.replace("?","?isCorrectAnswer=true&") + await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) + + if "/place/" in self.page.url: + return self.page.url + count += 1 + + print("Not found url for {selected}") + return None # 404 - title = selected['title'].replace("", "").replace("", "") - address = selected.get('roadAddress', selected['address']).replace("", "").replace("", "") - encoded_query = parse.quote(f"{address} {title}") - url = f"https://map.naver.com/p/search/{encoded_query}" - - await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) - - if "/place/" in self.page.url: - return self.page.url - - url = self.page.url.replace("?","?isCorrectAnswer=true&") - await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000) - - if "/place/" in self.page.url: - return self.page.url # if (count == self._max_retry / 2): # raise Exception("Failed to identify place id. loading timeout") diff --git a/app/utils/prompts/templates/marketing_prompt.txt b/app/utils/prompts/templates/marketing_prompt.txt index 2e994bd..3a97061 100644 --- a/app/utils/prompts/templates/marketing_prompt.txt +++ b/app/utils/prompts/templates/marketing_prompt.txt @@ -36,7 +36,7 @@ Generate 5-8 selling points: * **`english_category`**: Strictly use one keyword from the English allowed list provided in the Output Rules. * **`korean category`**: Strictly use one keyword from the Korean allowed list provided in the Output Rules . It must be matched with english category. * **`description`**: A short, punchy marketing phrase in Korean (15~30 characters). -* **`score`**: An integer (70-99) representing the strength of this feature based on the brand's potential. +* **`score`**: An integer (0-100) representing the strength of this feature based on the brand's potential. ### 5. target_keywords -* **`target_keywords`**: Provide a list of 10 highly relevant marketing keywords or hashtags for search engine optimization and social media targeting. \ No newline at end of file +* **`target_keywords`**: Provide a list of 10 highly relevant marketing keywords or hashtags for search engine optimization and social media targeting. Do not insert # in front of hashtag. \ No newline at end of file