자동완성 기능 추가

insta
jaehwang 2026-01-26 16:59:13 +09:00
parent fc88eedfa2
commit f29ac29649
4 changed files with 80 additions and 9 deletions

View File

@ -5,7 +5,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.utils.logger import get_logger
from app.utils.nvMapPwScraper import NvMapPwScraper
logger = get_logger("core")
@ -24,6 +24,7 @@ async def lifespan(app: FastAPI):
await create_db_tables()
logger.info("Database tables created (DEBUG mode)")
await NvMapPwScraper.initiate_scraper()
except asyncio.TimeoutError:
logger.error("Database initialization timed out")
# 타임아웃 시 앱 시작 중단하려면 raise, 계속하려면 pass

View File

@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.database.session import get_session, AsyncSessionLocal
from app.home.models import Image
from app.home.schemas.home_schema import (
AutoCompleteRequest,
CrawlingRequest,
CrawlingResponse,
ErrorResponse,
@ -27,6 +28,7 @@ from app.utils.chatgpt_prompt import ChatgptService
from app.utils.common import generate_task_id
from app.utils.logger import get_logger
from app.utils.nvMapScraper import NvMapScraper, GraphQLException
from app.utils.nvMapPwScraper import NvMapPwScraper
from app.utils.prompts.prompts import marketing_prompt
from config import MEDIA_ROOT
@ -105,17 +107,52 @@ def _extract_region_from_address(road_address: str | None) -> str:
tags=["Crawling"],
)
async def crawling(request_body: CrawlingRequest):
"""네이버 지도 장소 크롤링"""
return await _crawling_logic(request_body.url)
@router.post(
"/autocomplete",
summary="네이버 자동완성 크롤링",
description="""
네이버 검색 API 정보를 활용하여 Place ID를 추출한 자동으로 크롤링합니다.
## 요청 필드
- **url**: 네이버 지도 장소 URL (필수)
## 반환 정보
- **image_list**: 장소 이미지 URL 목록
- **image_count**: 이미지 개수
- **processed_info**: 가공된 장소 정보 (customer_name, region, detail_region_info)
""",
response_model=CrawlingResponse,
response_description="크롤링 결과",
responses={
200: {"description": "크롤링 성공", "model": CrawlingResponse},
400: {
"description": "잘못된 URL",
"model": ErrorResponse,
},
502: {
"description": "크롤링 실패",
"model": ErrorResponse,
},
},
tags=["Crawling"],
)
async def autocomplete_crawling(request_body: AutoCompleteRequest):
url = _autocomplete_logic(request_body.dict())
return await _crawling_logic(url)
async def _crawling_logic(url:str):
request_start = time.perf_counter()
logger.info("[crawling] ========== START ==========")
logger.info(f"[crawling] URL: {request_body.url[:80]}...")
logger.info(f"[crawling] URL: {url[:80]}...")
# ========== Step 1: 네이버 지도 크롤링 ==========
step1_start = time.perf_counter()
logger.info("[crawling] Step 1: 네이버 지도 크롤링 시작...")
try:
scraper = NvMapScraper(request_body.url)
scraper = NvMapScraper(url)
await scraper.scrap()
except GraphQLException as e:
step1_elapsed = (time.perf_counter() - step1_start) * 1000
@ -288,6 +325,23 @@ async def crawling(request_body: CrawlingRequest):
}
async def _autocomplete_logic(autocomplete_item:dict):
step1_start = time.perf_counter()
try:
async with NvMapPwScraper() as pw_scraper:
new_url = await pw_scraper.get_place_id_url(autocomplete_item)
except Exception as e:
step1_elapsed = (time.perf_counter() - step1_start) * 1000
logger.error(
f"[crawling] Autocomplete FAILED - 자동완성 예기치 않은 오류: {e} ({step1_elapsed:.1f}ms)"
)
logger.exception("[crawling] Autocomplete 상세 오류:")
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail="자동완성 place id 추출 실패",
)
return new_url
def _extract_image_name(url: str, index: int) -> str:
"""URL에서 이미지 이름 추출 또는 기본 이름 생성"""
try:

View File

@ -122,6 +122,22 @@ class CrawlingRequest(BaseModel):
url: str = Field(..., description="네이버 지도 장소 URL")
class AutoCompleteRequest(BaseModel):
"""자동완성 요청 스키마"""
model_config = ConfigDict(
json_schema_extra={
"example": {
'title': '<b>스테이</b>,<b>머뭄</b>',
'address': '전북특별자치도 군산시 신흥동 63-18',
'roadAddress': '전북특별자치도 군산시 절골길 18',
}
}
)
title: str = Field(..., description="네이버 검색 place API Title")
address: str = Field(..., description="네이버 검색 place API 지번주소")
roadAddress: Optional[str] = Field(None, description="네이버 검색 place API 도로명주소")
class ProcessedInfo(BaseModel):
"""가공된 장소 정보 스키마"""

View File

@ -2,7 +2,7 @@ import asyncio
from playwright.async_api import async_playwright
from urllib import parse
class nvMapPwScraper():
class NvMapPwScraper():
# cls vars
is_ready = False
_playwright = None
@ -107,7 +107,7 @@ patchedGetter.toString();''')
if "/place/" in self.page.url:
return self.page.url
if (count == self._max_retry / 2):
raise Exception("Failed to identify place id. loading timeout")
else:
raise Exception("Failed to identify place id. item is ambiguous")
# if (count == self._max_retry / 2):
# raise Exception("Failed to identify place id. loading timeout")
# else:
# raise Exception("Failed to identify place id. item is ambiguous")