diff --git a/app/core/common.py b/app/core/common.py
index 520fab4..85cb08d 100644
--- a/app/core/common.py
+++ b/app/core/common.py
@@ -5,7 +5,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.utils.logger import get_logger
-
+from app.utils.nvMapPwScraper import NvMapPwScraper
logger = get_logger("core")
@@ -24,6 +24,7 @@ async def lifespan(app: FastAPI):
await create_db_tables()
logger.info("Database tables created (DEBUG mode)")
+ await NvMapPwScraper.initiate_scraper()
except asyncio.TimeoutError:
logger.error("Database initialization timed out")
# 타임아웃 시 앱 시작 중단하려면 raise, 계속하려면 pass
diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py
index 489bf2d..2250d40 100644
--- a/app/home/api/routers/v1/home.py
+++ b/app/home/api/routers/v1/home.py
@@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.database.session import get_session, AsyncSessionLocal
from app.home.models import Image
from app.home.schemas.home_schema import (
+ AutoCompleteRequest,
CrawlingRequest,
CrawlingResponse,
ErrorResponse,
@@ -27,6 +28,7 @@ from app.utils.chatgpt_prompt import ChatgptService
from app.utils.common import generate_task_id
from app.utils.logger import get_logger
from app.utils.nvMapScraper import NvMapScraper, GraphQLException
+from app.utils.nvMapPwScraper import NvMapPwScraper
from app.utils.prompts.prompts import marketing_prompt
from config import MEDIA_ROOT
@@ -105,17 +107,52 @@ def _extract_region_from_address(road_address: str | None) -> str:
tags=["Crawling"],
)
async def crawling(request_body: CrawlingRequest):
- """네이버 지도 장소 크롤링"""
+ return await _crawling_logic(request_body.url)
+
+@router.post(
+ "/autocomplete",
+ summary="네이버 자동완성 크롤링",
+ description="""
+네이버 검색 API 정보를 활용하여 Place ID를 추출한 뒤 자동으로 크롤링합니다.
+
+## 요청 필드
+- **url**: 네이버 지도 장소 URL (필수)
+
+## 반환 정보
+- **image_list**: 장소 이미지 URL 목록
+- **image_count**: 이미지 개수
+- **processed_info**: 가공된 장소 정보 (customer_name, region, detail_region_info)
+ """,
+ response_model=CrawlingResponse,
+ response_description="크롤링 결과",
+ responses={
+ 200: {"description": "크롤링 성공", "model": CrawlingResponse},
+ 400: {
+ "description": "잘못된 URL",
+ "model": ErrorResponse,
+ },
+ 502: {
+ "description": "크롤링 실패",
+ "model": ErrorResponse,
+ },
+ },
+ tags=["Crawling"],
+)
+async def autocomplete_crawling(request_body: AutoCompleteRequest):
+ url = _autocomplete_logic(request_body.dict())
+ return await _crawling_logic(url)
+
+async def _crawling_logic(url:str):
request_start = time.perf_counter()
logger.info("[crawling] ========== START ==========")
- logger.info(f"[crawling] URL: {request_body.url[:80]}...")
+ logger.info(f"[crawling] URL: {url[:80]}...")
# ========== Step 1: 네이버 지도 크롤링 ==========
step1_start = time.perf_counter()
logger.info("[crawling] Step 1: 네이버 지도 크롤링 시작...")
try:
- scraper = NvMapScraper(request_body.url)
+ scraper = NvMapScraper(url)
await scraper.scrap()
except GraphQLException as e:
step1_elapsed = (time.perf_counter() - step1_start) * 1000
@@ -288,6 +325,23 @@ async def crawling(request_body: CrawlingRequest):
}
+async def _autocomplete_logic(autocomplete_item:dict):
+ step1_start = time.perf_counter()
+ try:
+ async with NvMapPwScraper() as pw_scraper:
+ new_url = await pw_scraper.get_place_id_url(autocomplete_item)
+ except Exception as e:
+ step1_elapsed = (time.perf_counter() - step1_start) * 1000
+ logger.error(
+ f"[crawling] Autocomplete FAILED - 자동완성 예기치 않은 오류: {e} ({step1_elapsed:.1f}ms)"
+ )
+ logger.exception("[crawling] Autocomplete 상세 오류:")
+ raise HTTPException(
+ status_code=status.HTTP_502_BAD_GATEWAY,
+ detail="자동완성 place id 추출 실패",
+ )
+ return new_url
+
def _extract_image_name(url: str, index: int) -> str:
"""URL에서 이미지 이름 추출 또는 기본 이름 생성"""
try:
diff --git a/app/home/schemas/home_schema.py b/app/home/schemas/home_schema.py
index 249c8de..069aa6b 100644
--- a/app/home/schemas/home_schema.py
+++ b/app/home/schemas/home_schema.py
@@ -122,6 +122,22 @@ class CrawlingRequest(BaseModel):
url: str = Field(..., description="네이버 지도 장소 URL")
+class AutoCompleteRequest(BaseModel):
+ """자동완성 요청 스키마"""
+
+ model_config = ConfigDict(
+ json_schema_extra={
+ "example": {
+ 'title': '스테이,머뭄',
+ 'address': '전북특별자치도 군산시 신흥동 63-18',
+ 'roadAddress': '전북특별자치도 군산시 절골길 18',
+ }
+ }
+ )
+
+ title: str = Field(..., description="네이버 검색 place API Title")
+ address: str = Field(..., description="네이버 검색 place API 지번주소")
+ roadAddress: Optional[str] = Field(None, description="네이버 검색 place API 도로명주소")
class ProcessedInfo(BaseModel):
"""가공된 장소 정보 스키마"""
diff --git a/app/utils/nvMapPwScraper.py b/app/utils/nvMapPwScraper.py
index d724764..6b3f2a6 100644
--- a/app/utils/nvMapPwScraper.py
+++ b/app/utils/nvMapPwScraper.py
@@ -2,7 +2,7 @@ import asyncio
from playwright.async_api import async_playwright
from urllib import parse
-class nvMapPwScraper():
+class NvMapPwScraper():
# cls vars
is_ready = False
_playwright = None
@@ -107,7 +107,7 @@ patchedGetter.toString();''')
if "/place/" in self.page.url:
return self.page.url
- if (count == self._max_retry / 2):
- raise Exception("Failed to identify place id. loading timeout")
- else:
- raise Exception("Failed to identify place id. item is ambiguous")
+ # if (count == self._max_retry / 2):
+ # raise Exception("Failed to identify place id. loading timeout")
+ # else:
+ # raise Exception("Failed to identify place id. item is ambiguous")