diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py index 426b7f2..e025525 100644 --- a/app/home/api/routers/v1/home.py +++ b/app/home/api/routers/v1/home.py @@ -33,7 +33,7 @@ from app.utils.upload_blob_as_request import AzureBlobUploader from app.utils.prompts.chatgpt_prompt import ChatgptService, ChatGPTResponseError from app.utils.common import generate_task_id from app.utils.logger import get_logger -from app.utils.nvMapScraper import NvMapScraper, GraphQLException +from app.utils.nvMapScraper import NvMapScraper, GraphQLException, URLNotFoundException from app.utils.nvMapPwScraper import NvMapPwScraper from app.utils.prompts.prompts import marketing_prompt from app.utils.autotag import autotag_images @@ -220,6 +220,15 @@ async def _crawling_logic( status_code=status.HTTP_502_BAD_GATEWAY, detail=f"네이버 지도 크롤링에 실패했습니다: {e}", ) + except URLNotFoundException as e: + step1_elapsed = (time.perf_counter() - step1_start) * 1000 + logger.error( + f"[crawling] Step 1 FAILED - 크롤링 실패: {e} ({step1_elapsed:.1f}ms)" + ) + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Place ID를 확인할 수 없습니다. URL을 확인하세요. : {e}", + ) except Exception as e: step1_elapsed = (time.perf_counter() - step1_start) * 1000 logger.error( diff --git a/app/utils/nvMapScraper.py b/app/utils/nvMapScraper.py index 3ece3a6..409fcc2 100644 --- a/app/utils/nvMapScraper.py +++ b/app/utils/nvMapScraper.py @@ -16,6 +16,10 @@ class GraphQLException(Exception): """GraphQL 요청 실패 시 발생하는 예외""" pass +class URLNotFoundException(Exception): + """Place ID 발견 불가능 시 발생하는 예외""" + pass + class CrawlingTimeoutException(Exception): """크롤링 타임아웃 시 발생하는 예외""" @@ -86,34 +90,28 @@ query getAccommodation($id: String!, $deviceType: String) { async with session.get(self.url) as response: self.url = str(response.url) else: - raise GraphQLException("This URL does not contain a place ID") + raise URLNotFoundException("This URL does not contain a place ID") match = re.search(place_pattern, self.url) if not match: - raise GraphQLException("Failed to parse place ID from URL") + raise URLNotFoundException("Failed to parse place ID from URL") return match[1] async def scrap(self): - try: - place_id = await self.parse_url() - data = await self._call_get_accommodation(place_id) - self.rawdata = data - fac_data = await self._get_facility_string(place_id) - # Naver 기준임, 구글 등 다른 데이터 소스의 경우 고유 Identifier 사용할 것. - self.place_id = self.data_source_identifier + place_id - self.rawdata["facilities"] = fac_data - self.image_link_list = [ - nv_image["origin"] - for nv_image in data["data"]["business"]["images"]["images"] - ] - self.base_info = data["data"]["business"]["base"] - self.facility_info = fac_data - self.scrap_type = "GraphQL" - - except GraphQLException: - logger.debug("GraphQL failed, fallback to Playwright") - self.scrap_type = "Playwright" - pass # 나중에 pw 이용한 crawling으로 fallback 추가 + place_id = await self.parse_url() + data = await self._call_get_accommodation(place_id) + self.rawdata = data + fac_data = await self._get_facility_string(place_id) + # Naver 기준임, 구글 등 다른 데이터 소스의 경우 고유 Identifier 사용할 것. + self.place_id = self.data_source_identifier + place_id + self.rawdata["facilities"] = fac_data + self.image_link_list = [ + nv_image["origin"] + for nv_image in data["data"]["business"]["images"]["images"] + ] + self.base_info = data["data"]["business"]["base"] + self.facility_info = fac_data + self.scrap_type = "GraphQL" return diff --git a/config.py b/config.py index bb7e501..ada2ab8 100644 --- a/config.py +++ b/config.py @@ -192,8 +192,8 @@ class PromptSettings(BaseSettings): YOUTUBE_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt") YOUTUBE_PROMPT_MODEL : str = Field(default="gpt-5-mini") - IMAGE_TAG_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt") - IMAGE_TAG_PROMPT_MODEL : str = Field(default="gpt-5-mini") + IMAGE_TAG_PROMPT_FILE_NAME : str = Field(...) + IMAGE_TAG_PROMPT_MODEL : str = Field(...) SUBTITLE_PROMPT_FILE_NAME : str = Field(...) SUBTITLE_PROMPT_MODEL : str = Field(...)