url 에러 시 404 출력
parent
cc7ee58006
commit
0fd028a49f
|
|
@ -33,7 +33,7 @@ from app.utils.upload_blob_as_request import AzureBlobUploader
|
|||
from app.utils.prompts.chatgpt_prompt import ChatgptService, ChatGPTResponseError
|
||||
from app.utils.common import generate_task_id
|
||||
from app.utils.logger import get_logger
|
||||
from app.utils.nvMapScraper import NvMapScraper, GraphQLException
|
||||
from app.utils.nvMapScraper import NvMapScraper, GraphQLException, URLNotFoundException
|
||||
from app.utils.nvMapPwScraper import NvMapPwScraper
|
||||
from app.utils.prompts.prompts import marketing_prompt
|
||||
from app.utils.autotag import autotag_images
|
||||
|
|
@ -220,6 +220,15 @@ async def _crawling_logic(
|
|||
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"네이버 지도 크롤링에 실패했습니다: {e}",
|
||||
)
|
||||
except URLNotFoundException as e:
|
||||
step1_elapsed = (time.perf_counter() - step1_start) * 1000
|
||||
logger.error(
|
||||
f"[crawling] Step 1 FAILED - 크롤링 실패: {e} ({step1_elapsed:.1f}ms)"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Place ID를 확인할 수 없습니다. URL을 확인하세요. : {e}",
|
||||
)
|
||||
except Exception as e:
|
||||
step1_elapsed = (time.perf_counter() - step1_start) * 1000
|
||||
logger.error(
|
||||
|
|
|
|||
|
|
@ -16,6 +16,10 @@ class GraphQLException(Exception):
|
|||
"""GraphQL 요청 실패 시 발생하는 예외"""
|
||||
pass
|
||||
|
||||
class URLNotFoundException(Exception):
|
||||
"""Place ID 발견 불가능 시 발생하는 예외"""
|
||||
pass
|
||||
|
||||
|
||||
class CrawlingTimeoutException(Exception):
|
||||
"""크롤링 타임아웃 시 발생하는 예외"""
|
||||
|
|
@ -86,34 +90,28 @@ query getAccommodation($id: String!, $deviceType: String) {
|
|||
async with session.get(self.url) as response:
|
||||
self.url = str(response.url)
|
||||
else:
|
||||
raise GraphQLException("This URL does not contain a place ID")
|
||||
raise URLNotFoundException("This URL does not contain a place ID")
|
||||
|
||||
match = re.search(place_pattern, self.url)
|
||||
if not match:
|
||||
raise GraphQLException("Failed to parse place ID from URL")
|
||||
raise URLNotFoundException("Failed to parse place ID from URL")
|
||||
return match[1]
|
||||
|
||||
async def scrap(self):
|
||||
try:
|
||||
place_id = await self.parse_url()
|
||||
data = await self._call_get_accommodation(place_id)
|
||||
self.rawdata = data
|
||||
fac_data = await self._get_facility_string(place_id)
|
||||
# Naver 기준임, 구글 등 다른 데이터 소스의 경우 고유 Identifier 사용할 것.
|
||||
self.place_id = self.data_source_identifier + place_id
|
||||
self.rawdata["facilities"] = fac_data
|
||||
self.image_link_list = [
|
||||
nv_image["origin"]
|
||||
for nv_image in data["data"]["business"]["images"]["images"]
|
||||
]
|
||||
self.base_info = data["data"]["business"]["base"]
|
||||
self.facility_info = fac_data
|
||||
self.scrap_type = "GraphQL"
|
||||
|
||||
except GraphQLException:
|
||||
logger.debug("GraphQL failed, fallback to Playwright")
|
||||
self.scrap_type = "Playwright"
|
||||
pass # 나중에 pw 이용한 crawling으로 fallback 추가
|
||||
place_id = await self.parse_url()
|
||||
data = await self._call_get_accommodation(place_id)
|
||||
self.rawdata = data
|
||||
fac_data = await self._get_facility_string(place_id)
|
||||
# Naver 기준임, 구글 등 다른 데이터 소스의 경우 고유 Identifier 사용할 것.
|
||||
self.place_id = self.data_source_identifier + place_id
|
||||
self.rawdata["facilities"] = fac_data
|
||||
self.image_link_list = [
|
||||
nv_image["origin"]
|
||||
for nv_image in data["data"]["business"]["images"]["images"]
|
||||
]
|
||||
self.base_info = data["data"]["business"]["base"]
|
||||
self.facility_info = fac_data
|
||||
self.scrap_type = "GraphQL"
|
||||
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -192,8 +192,8 @@ class PromptSettings(BaseSettings):
|
|||
YOUTUBE_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt")
|
||||
YOUTUBE_PROMPT_MODEL : str = Field(default="gpt-5-mini")
|
||||
|
||||
IMAGE_TAG_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt")
|
||||
IMAGE_TAG_PROMPT_MODEL : str = Field(default="gpt-5-mini")
|
||||
IMAGE_TAG_PROMPT_FILE_NAME : str = Field(...)
|
||||
IMAGE_TAG_PROMPT_MODEL : str = Field(...)
|
||||
|
||||
SUBTITLE_PROMPT_FILE_NAME : str = Field(...)
|
||||
SUBTITLE_PROMPT_MODEL : str = Field(...)
|
||||
|
|
|
|||
Loading…
Reference in New Issue