diff --git a/app/core/common.py b/app/core/common.py index 520fab4..85cb08d 100644 --- a/app/core/common.py +++ b/app/core/common.py @@ -5,7 +5,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI from app.utils.logger import get_logger - +from app.utils.nvMapPwScraper import NvMapPwScraper logger = get_logger("core") @@ -24,6 +24,7 @@ async def lifespan(app: FastAPI): await create_db_tables() logger.info("Database tables created (DEBUG mode)") + await NvMapPwScraper.initiate_scraper() except asyncio.TimeoutError: logger.error("Database initialization timed out") # 타임아웃 시 앱 시작 중단하려면 raise, 계속하려면 pass diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py index 489bf2d..2250d40 100644 --- a/app/home/api/routers/v1/home.py +++ b/app/home/api/routers/v1/home.py @@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.database.session import get_session, AsyncSessionLocal from app.home.models import Image from app.home.schemas.home_schema import ( + AutoCompleteRequest, CrawlingRequest, CrawlingResponse, ErrorResponse, @@ -27,6 +28,7 @@ from app.utils.chatgpt_prompt import ChatgptService from app.utils.common import generate_task_id from app.utils.logger import get_logger from app.utils.nvMapScraper import NvMapScraper, GraphQLException +from app.utils.nvMapPwScraper import NvMapPwScraper from app.utils.prompts.prompts import marketing_prompt from config import MEDIA_ROOT @@ -105,17 +107,52 @@ def _extract_region_from_address(road_address: str | None) -> str: tags=["Crawling"], ) async def crawling(request_body: CrawlingRequest): - """네이버 지도 장소 크롤링""" + return await _crawling_logic(request_body.url) + +@router.post( + "/autocomplete", + summary="네이버 자동완성 크롤링", + description=""" +네이버 검색 API 정보를 활용하여 Place ID를 추출한 뒤 자동으로 크롤링합니다. + +## 요청 필드 +- **url**: 네이버 지도 장소 URL (필수) + +## 반환 정보 +- **image_list**: 장소 이미지 URL 목록 +- **image_count**: 이미지 개수 +- **processed_info**: 가공된 장소 정보 (customer_name, region, detail_region_info) + """, + response_model=CrawlingResponse, + response_description="크롤링 결과", + responses={ + 200: {"description": "크롤링 성공", "model": CrawlingResponse}, + 400: { + "description": "잘못된 URL", + "model": ErrorResponse, + }, + 502: { + "description": "크롤링 실패", + "model": ErrorResponse, + }, + }, + tags=["Crawling"], +) +async def autocomplete_crawling(request_body: AutoCompleteRequest): + url = _autocomplete_logic(request_body.dict()) + return await _crawling_logic(url) + +async def _crawling_logic(url:str): request_start = time.perf_counter() logger.info("[crawling] ========== START ==========") - logger.info(f"[crawling] URL: {request_body.url[:80]}...") + logger.info(f"[crawling] URL: {url[:80]}...") # ========== Step 1: 네이버 지도 크롤링 ========== step1_start = time.perf_counter() logger.info("[crawling] Step 1: 네이버 지도 크롤링 시작...") try: - scraper = NvMapScraper(request_body.url) + scraper = NvMapScraper(url) await scraper.scrap() except GraphQLException as e: step1_elapsed = (time.perf_counter() - step1_start) * 1000 @@ -288,6 +325,23 @@ async def crawling(request_body: CrawlingRequest): } +async def _autocomplete_logic(autocomplete_item:dict): + step1_start = time.perf_counter() + try: + async with NvMapPwScraper() as pw_scraper: + new_url = await pw_scraper.get_place_id_url(autocomplete_item) + except Exception as e: + step1_elapsed = (time.perf_counter() - step1_start) * 1000 + logger.error( + f"[crawling] Autocomplete FAILED - 자동완성 예기치 않은 오류: {e} ({step1_elapsed:.1f}ms)" + ) + logger.exception("[crawling] Autocomplete 상세 오류:") + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail="자동완성 place id 추출 실패", + ) + return new_url + def _extract_image_name(url: str, index: int) -> str: """URL에서 이미지 이름 추출 또는 기본 이름 생성""" try: diff --git a/app/home/schemas/home_schema.py b/app/home/schemas/home_schema.py index 249c8de..069aa6b 100644 --- a/app/home/schemas/home_schema.py +++ b/app/home/schemas/home_schema.py @@ -122,6 +122,22 @@ class CrawlingRequest(BaseModel): url: str = Field(..., description="네이버 지도 장소 URL") +class AutoCompleteRequest(BaseModel): + """자동완성 요청 스키마""" + + model_config = ConfigDict( + json_schema_extra={ + "example": { + 'title': '스테이,머뭄', + 'address': '전북특별자치도 군산시 신흥동 63-18', + 'roadAddress': '전북특별자치도 군산시 절골길 18', + } + } + ) + + title: str = Field(..., description="네이버 검색 place API Title") + address: str = Field(..., description="네이버 검색 place API 지번주소") + roadAddress: Optional[str] = Field(None, description="네이버 검색 place API 도로명주소") class ProcessedInfo(BaseModel): """가공된 장소 정보 스키마""" diff --git a/app/utils/nvMapPwScraper.py b/app/utils/nvMapPwScraper.py index d724764..6b3f2a6 100644 --- a/app/utils/nvMapPwScraper.py +++ b/app/utils/nvMapPwScraper.py @@ -2,7 +2,7 @@ import asyncio from playwright.async_api import async_playwright from urllib import parse -class nvMapPwScraper(): +class NvMapPwScraper(): # cls vars is_ready = False _playwright = None @@ -107,7 +107,7 @@ patchedGetter.toString();''') if "/place/" in self.page.url: return self.page.url - if (count == self._max_retry / 2): - raise Exception("Failed to identify place id. loading timeout") - else: - raise Exception("Failed to identify place id. item is ambiguous") + # if (count == self._max_retry / 2): + # raise Exception("Failed to identify place id. loading timeout") + # else: + # raise Exception("Failed to identify place id. item is ambiguous")