url 에러 시 404 출력
parent
cc7ee58006
commit
0fd028a49f
|
|
@ -33,7 +33,7 @@ from app.utils.upload_blob_as_request import AzureBlobUploader
|
||||||
from app.utils.prompts.chatgpt_prompt import ChatgptService, ChatGPTResponseError
|
from app.utils.prompts.chatgpt_prompt import ChatgptService, ChatGPTResponseError
|
||||||
from app.utils.common import generate_task_id
|
from app.utils.common import generate_task_id
|
||||||
from app.utils.logger import get_logger
|
from app.utils.logger import get_logger
|
||||||
from app.utils.nvMapScraper import NvMapScraper, GraphQLException
|
from app.utils.nvMapScraper import NvMapScraper, GraphQLException, URLNotFoundException
|
||||||
from app.utils.nvMapPwScraper import NvMapPwScraper
|
from app.utils.nvMapPwScraper import NvMapPwScraper
|
||||||
from app.utils.prompts.prompts import marketing_prompt
|
from app.utils.prompts.prompts import marketing_prompt
|
||||||
from app.utils.autotag import autotag_images
|
from app.utils.autotag import autotag_images
|
||||||
|
|
@ -220,6 +220,15 @@ async def _crawling_logic(
|
||||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||||
detail=f"네이버 지도 크롤링에 실패했습니다: {e}",
|
detail=f"네이버 지도 크롤링에 실패했습니다: {e}",
|
||||||
)
|
)
|
||||||
|
except URLNotFoundException as e:
|
||||||
|
step1_elapsed = (time.perf_counter() - step1_start) * 1000
|
||||||
|
logger.error(
|
||||||
|
f"[crawling] Step 1 FAILED - 크롤링 실패: {e} ({step1_elapsed:.1f}ms)"
|
||||||
|
)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail=f"Place ID를 확인할 수 없습니다. URL을 확인하세요. : {e}",
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
step1_elapsed = (time.perf_counter() - step1_start) * 1000
|
step1_elapsed = (time.perf_counter() - step1_start) * 1000
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,10 @@ class GraphQLException(Exception):
|
||||||
"""GraphQL 요청 실패 시 발생하는 예외"""
|
"""GraphQL 요청 실패 시 발생하는 예외"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class URLNotFoundException(Exception):
|
||||||
|
"""Place ID 발견 불가능 시 발생하는 예외"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class CrawlingTimeoutException(Exception):
|
class CrawlingTimeoutException(Exception):
|
||||||
"""크롤링 타임아웃 시 발생하는 예외"""
|
"""크롤링 타임아웃 시 발생하는 예외"""
|
||||||
|
|
@ -86,34 +90,28 @@ query getAccommodation($id: String!, $deviceType: String) {
|
||||||
async with session.get(self.url) as response:
|
async with session.get(self.url) as response:
|
||||||
self.url = str(response.url)
|
self.url = str(response.url)
|
||||||
else:
|
else:
|
||||||
raise GraphQLException("This URL does not contain a place ID")
|
raise URLNotFoundException("This URL does not contain a place ID")
|
||||||
|
|
||||||
match = re.search(place_pattern, self.url)
|
match = re.search(place_pattern, self.url)
|
||||||
if not match:
|
if not match:
|
||||||
raise GraphQLException("Failed to parse place ID from URL")
|
raise URLNotFoundException("Failed to parse place ID from URL")
|
||||||
return match[1]
|
return match[1]
|
||||||
|
|
||||||
async def scrap(self):
|
async def scrap(self):
|
||||||
try:
|
place_id = await self.parse_url()
|
||||||
place_id = await self.parse_url()
|
data = await self._call_get_accommodation(place_id)
|
||||||
data = await self._call_get_accommodation(place_id)
|
self.rawdata = data
|
||||||
self.rawdata = data
|
fac_data = await self._get_facility_string(place_id)
|
||||||
fac_data = await self._get_facility_string(place_id)
|
# Naver 기준임, 구글 등 다른 데이터 소스의 경우 고유 Identifier 사용할 것.
|
||||||
# Naver 기준임, 구글 등 다른 데이터 소스의 경우 고유 Identifier 사용할 것.
|
self.place_id = self.data_source_identifier + place_id
|
||||||
self.place_id = self.data_source_identifier + place_id
|
self.rawdata["facilities"] = fac_data
|
||||||
self.rawdata["facilities"] = fac_data
|
self.image_link_list = [
|
||||||
self.image_link_list = [
|
nv_image["origin"]
|
||||||
nv_image["origin"]
|
for nv_image in data["data"]["business"]["images"]["images"]
|
||||||
for nv_image in data["data"]["business"]["images"]["images"]
|
]
|
||||||
]
|
self.base_info = data["data"]["business"]["base"]
|
||||||
self.base_info = data["data"]["business"]["base"]
|
self.facility_info = fac_data
|
||||||
self.facility_info = fac_data
|
self.scrap_type = "GraphQL"
|
||||||
self.scrap_type = "GraphQL"
|
|
||||||
|
|
||||||
except GraphQLException:
|
|
||||||
logger.debug("GraphQL failed, fallback to Playwright")
|
|
||||||
self.scrap_type = "Playwright"
|
|
||||||
pass # 나중에 pw 이용한 crawling으로 fallback 추가
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -192,8 +192,8 @@ class PromptSettings(BaseSettings):
|
||||||
YOUTUBE_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt")
|
YOUTUBE_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt")
|
||||||
YOUTUBE_PROMPT_MODEL : str = Field(default="gpt-5-mini")
|
YOUTUBE_PROMPT_MODEL : str = Field(default="gpt-5-mini")
|
||||||
|
|
||||||
IMAGE_TAG_PROMPT_FILE_NAME : str = Field(default="yt_upload_prompt.txt")
|
IMAGE_TAG_PROMPT_FILE_NAME : str = Field(...)
|
||||||
IMAGE_TAG_PROMPT_MODEL : str = Field(default="gpt-5-mini")
|
IMAGE_TAG_PROMPT_MODEL : str = Field(...)
|
||||||
|
|
||||||
SUBTITLE_PROMPT_FILE_NAME : str = Field(...)
|
SUBTITLE_PROMPT_FILE_NAME : str = Field(...)
|
||||||
SUBTITLE_PROMPT_MODEL : str = Field(...)
|
SUBTITLE_PROMPT_MODEL : str = Field(...)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue