483 lines
18 KiB
Python
483 lines
18 KiB
Python
"""
|
|
네이버 플레이스 검색 API 모듈
|
|
업체명으로 검색하여 place_id를 찾고, 상세정보(사진 포함)를 조회
|
|
"""
|
|
|
|
import asyncio
|
|
import re
|
|
import requests
|
|
from dataclasses import dataclass
|
|
from typing import Optional, List, Dict, Any
|
|
|
|
|
|
# ============================================================
|
|
# Data Classes
|
|
# ============================================================
|
|
|
|
@dataclass
|
|
class NaverConfig:
|
|
"""네이버 API 설정"""
|
|
naver_client_id: str = "cp5MzIsZ8PSQPeQQkVKR"
|
|
naver_client_secret: str = "lhdrHgx31G"
|
|
naver_local_api_url: str = "https://openapi.naver.com/v1/search/local.json"
|
|
|
|
|
|
@dataclass
|
|
class PlaceDetailInfo:
|
|
"""네이버 플레이스 상세 정보"""
|
|
place_id: str
|
|
name: str
|
|
category: str
|
|
address: str
|
|
road_address: str
|
|
phone: str
|
|
description: str
|
|
images: List[str]
|
|
business_hours: str
|
|
homepage: str
|
|
keywords: List[str]
|
|
facilities: List[str]
|
|
|
|
|
|
# ============================================================
|
|
# Main API Class
|
|
# ============================================================
|
|
|
|
class NaverPlaceAPI:
|
|
"""
|
|
네이버 플레이스 API 클래스
|
|
|
|
주요 기능:
|
|
- quick_search(): 빠른 자동완성 검색 (place_id 없음)
|
|
- autocomplete_search(): place_id 포함 검색 (브라우저 폴백)
|
|
- get_place_detail(): place_id로 상세정보 조회
|
|
- convert_to_crawling_response(): CrawlingResponse 형식 변환
|
|
"""
|
|
|
|
ACCOMMODATION_CATEGORIES = [
|
|
"펜션", "숙박", "호텔", "모텔", "리조트", "게스트하우스",
|
|
"민박", "글램핑", "캠핑", "풀빌라", "스테이", "독채"
|
|
]
|
|
|
|
def __init__(self, config: NaverConfig = None):
|
|
self.config = config or NaverConfig()
|
|
self.search_url = self.config.naver_local_api_url
|
|
self.headers = {
|
|
"X-Naver-Client-Id": self.config.naver_client_id,
|
|
"X-Naver-Client-Secret": self.config.naver_client_secret,
|
|
}
|
|
self.browser_headers = {
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
"Accept": "application/json, text/plain, */*",
|
|
"Accept-Language": "ko-KR,ko;q=0.9",
|
|
"Referer": "https://map.naver.com/",
|
|
}
|
|
|
|
# ============================================================
|
|
# Public Methods
|
|
# ============================================================
|
|
|
|
async def quick_search(self, query: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
빠른 자동완성 검색 (place_id 조회 없음)
|
|
|
|
Args:
|
|
query: 검색어
|
|
|
|
Returns:
|
|
[{"title": "업체명", "category": "카테고리", "address": "주소"}, ...]
|
|
"""
|
|
try:
|
|
response = await asyncio.to_thread(
|
|
requests.get,
|
|
self.search_url,
|
|
headers=self.headers,
|
|
params={"query": query, "display": 10},
|
|
timeout=5
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
return []
|
|
|
|
items = response.json().get("items", [])
|
|
return [
|
|
{
|
|
"title": self._clean_html(item.get("title", "")),
|
|
"category": item.get("category", ""),
|
|
"address": item.get("roadAddress") or item.get("address", ""),
|
|
}
|
|
for item in items
|
|
]
|
|
|
|
except Exception:
|
|
return []
|
|
|
|
async def autocomplete_search(self, query: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
place_id 포함 검색 (API 실패 시 브라우저 폴백)
|
|
|
|
Args:
|
|
query: 검색어 또는 네이버 지도 URL
|
|
|
|
Returns:
|
|
[{"place_id": "123", "title": "업체명", "category": "카테고리",
|
|
"address": "주소", "is_accommodation": True}, ...]
|
|
"""
|
|
# URL인 경우 place_id 추출
|
|
if query.startswith("http"):
|
|
place_id = self._extract_place_id_from_url(query)
|
|
if place_id:
|
|
detail = await self.get_place_detail(place_id)
|
|
if detail:
|
|
return [{
|
|
"place_id": place_id,
|
|
"title": detail.name,
|
|
"category": detail.category,
|
|
"address": detail.road_address or detail.address,
|
|
"is_accommodation": self._is_accommodation(detail.category),
|
|
}]
|
|
return []
|
|
|
|
# API로 검색
|
|
api_results = await self._search_with_api(query)
|
|
if api_results and any(r.get("place_id") for r in api_results):
|
|
return api_results
|
|
|
|
# API 실패 시 브라우저로 검색
|
|
print("API에서 place_id를 찾지 못함. 브라우저 검색 시도...")
|
|
browser_results = await self._search_with_browser(query)
|
|
|
|
if browser_results and any(r.get("place_id") for r in browser_results):
|
|
# API 결과에 브라우저에서 찾은 place_id 매칭
|
|
if api_results:
|
|
self._merge_place_ids(api_results, browser_results)
|
|
return api_results
|
|
return browser_results
|
|
|
|
return api_results or []
|
|
|
|
async def get_place_detail(self, place_id: str) -> Optional[PlaceDetailInfo]:
|
|
"""
|
|
place_id로 상세정보 조회
|
|
|
|
Args:
|
|
place_id: 네이버 플레이스 ID
|
|
|
|
Returns:
|
|
PlaceDetailInfo 또는 None
|
|
"""
|
|
if not place_id:
|
|
return None
|
|
|
|
try:
|
|
response = await asyncio.to_thread(
|
|
requests.get,
|
|
f"https://map.naver.com/p/api/place/summary/{place_id}",
|
|
headers={**self.browser_headers, "Referer": f"https://map.naver.com/p/entry/place/{place_id}"}
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
print(f"Detail API Error: {response.status_code}")
|
|
return None
|
|
|
|
pd = response.json().get("data", {}).get("placeDetail", {})
|
|
if not pd:
|
|
print("No placeDetail in response")
|
|
return None
|
|
|
|
return PlaceDetailInfo(
|
|
place_id=place_id,
|
|
name=pd.get("name", ""),
|
|
category=self._parse_category(pd.get("category")),
|
|
address=self._parse_address(pd.get("address"), "address"),
|
|
road_address=self._parse_address(pd.get("address"), "roadAddress"),
|
|
phone="",
|
|
description="",
|
|
images=self._parse_images(pd.get("images")),
|
|
business_hours=self._parse_business_hours(pd.get("businessHours")),
|
|
homepage="",
|
|
keywords=self._parse_keywords(pd.get("visitorReviews")),
|
|
facilities=self._parse_facilities(pd.get("labels"))
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Detail fetch error: {e}")
|
|
return None
|
|
|
|
def convert_to_crawling_response(self, detail: PlaceDetailInfo) -> Dict[str, Any]:
|
|
"""PlaceDetailInfo를 CrawlingResponse 형식으로 변환"""
|
|
address = detail.road_address or detail.address
|
|
address_parts = address.split() if address else []
|
|
region = address_parts[0] if address_parts else ""
|
|
|
|
# 태그 생성
|
|
tags = []
|
|
if region:
|
|
tags.append(f"#{region}")
|
|
for keyword in detail.keywords[:5]:
|
|
tags.append(f"#{keyword}" if not keyword.startswith("#") else keyword)
|
|
|
|
# 시설 정보
|
|
facilities = detail.facilities[:]
|
|
if detail.category:
|
|
for cat in detail.category.split(">"):
|
|
cat = cat.strip()
|
|
if cat and cat not in facilities:
|
|
facilities.append(cat)
|
|
|
|
return {
|
|
"image_list": detail.images,
|
|
"image_count": len(detail.images),
|
|
"processed_info": {
|
|
"customer_name": detail.name,
|
|
"region": region,
|
|
"detail_region_info": address
|
|
},
|
|
"marketing_analysis": {
|
|
"report": self._generate_report(detail, address),
|
|
"tags": tags,
|
|
"facilities": facilities
|
|
}
|
|
}
|
|
|
|
# ============================================================
|
|
# Private Methods - Search
|
|
# ============================================================
|
|
|
|
async def _search_with_api(self, query: str) -> List[Dict[str, Any]]:
|
|
"""Local Search API로 검색 후 좌표로 place_id 조회"""
|
|
try:
|
|
response = await asyncio.to_thread(
|
|
requests.get,
|
|
self.search_url,
|
|
headers=self.headers,
|
|
params={"query": query, "display": 5},
|
|
timeout=10
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
print(f"Local Search API Error: {response.status_code}")
|
|
return []
|
|
|
|
items = response.json().get("items", [])
|
|
results = []
|
|
|
|
for item in items:
|
|
title = self._clean_html(item.get("title", ""))
|
|
category = item.get("category", "")
|
|
mapx, mapy = item.get("mapx", ""), item.get("mapy", "")
|
|
lng = float(mapx) / 10000000 if mapx else 0
|
|
lat = float(mapy) / 10000000 if mapy else 0
|
|
|
|
results.append({
|
|
"place_id": "",
|
|
"title": title,
|
|
"category": category,
|
|
"address": item.get("roadAddress") or item.get("address", ""),
|
|
"lng": lng,
|
|
"lat": lat,
|
|
"is_accommodation": self._is_accommodation(category),
|
|
})
|
|
|
|
# 좌표로 place_id 찾기
|
|
for result in results:
|
|
result["place_id"] = await self._find_place_id_by_coord(
|
|
result["title"], result["lng"], result["lat"]
|
|
)
|
|
|
|
return results
|
|
|
|
except Exception as e:
|
|
print(f"Search error: {e}")
|
|
return []
|
|
|
|
async def _find_place_id_by_coord(self, name: str, lng: float, lat: float) -> str:
|
|
"""좌표와 업체명으로 place_id 찾기"""
|
|
try:
|
|
response = await asyncio.to_thread(
|
|
requests.get,
|
|
"https://map.naver.com/p/api/search/allSearch",
|
|
headers=self.browser_headers,
|
|
params={"query": name, "type": "place", "searchCoord": f"{lng};{lat}", "displayCount": 1},
|
|
timeout=5
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json().get("result", {})
|
|
if "ncaptcha" not in result:
|
|
place_list = result.get("place", {}).get("list", [])
|
|
if place_list:
|
|
return str(place_list[0].get("id", ""))
|
|
return ""
|
|
|
|
except Exception:
|
|
return ""
|
|
|
|
async def _search_with_browser(self, query: str) -> List[Dict[str, Any]]:
|
|
"""Playwright 브라우저로 place_id 검색"""
|
|
try:
|
|
from playwright.async_api import async_playwright
|
|
except ImportError:
|
|
print("playwright가 설치되지 않았습니다. pip install playwright")
|
|
return []
|
|
|
|
results = []
|
|
|
|
try:
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
context = await browser.new_context(user_agent=self.browser_headers["User-Agent"])
|
|
page = await context.new_page()
|
|
|
|
await page.goto(f"https://map.naver.com/p/search/{query}", wait_until="domcontentloaded", timeout=20000)
|
|
await page.wait_for_timeout(5000)
|
|
|
|
search_frame = page.frame(name="searchIframe")
|
|
if search_frame:
|
|
html = await search_frame.content()
|
|
text = await search_frame.inner_text('body')
|
|
results = self._parse_browser_results(html, text)
|
|
|
|
await browser.close()
|
|
|
|
except Exception as e:
|
|
print(f"Browser search error: {e}")
|
|
|
|
return results[:10]
|
|
|
|
def _parse_browser_results(self, html: str, text: str) -> List[Dict[str, Any]]:
|
|
"""브라우저 HTML에서 검색 결과 파싱"""
|
|
# place_id 추출
|
|
place_ids = []
|
|
for pattern in [r'"id":"(\d+)"', r'/place/(\d+)', r'data-id="(\d+)"']:
|
|
place_ids.extend(re.findall(pattern, html))
|
|
place_ids = list(dict.fromkeys(place_ids)) # 중복 제거
|
|
|
|
# 텍스트에서 업체 정보 파싱
|
|
results = []
|
|
lines = text.split('\n')
|
|
current_place = {}
|
|
place_index = 0
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
if line.startswith('이미지수'):
|
|
if current_place.get('title') and place_index < len(place_ids):
|
|
current_place['place_id'] = place_ids[place_index]
|
|
results.append(current_place)
|
|
place_index += 1
|
|
current_place = {}
|
|
continue
|
|
|
|
if not current_place.get('title') and len(line) > 1 and not line.isdigit():
|
|
if line not in ['네이버페이', '톡톡', '쿠폰', '알림받기']:
|
|
current_place['title'] = line
|
|
continue
|
|
|
|
if not current_place.get('category'):
|
|
for keyword in self.ACCOMMODATION_CATEGORIES + ['장소대여', '전통숙소']:
|
|
if keyword in line:
|
|
current_place['category'] = line
|
|
current_place['is_accommodation'] = self._is_accommodation(line)
|
|
break
|
|
|
|
# 에라 모르겄다 그냥 전국 다 쳐넣어
|
|
if not current_place.get('address'):
|
|
regions = ['서울', '부산', '대구', '인천', '광주', '대전', '울산', '세종',
|
|
'경기', '강원', '충북', '충남', '전북', '전남', '경북', '경남', '제주']
|
|
for region in regions:
|
|
if line.startswith(region):
|
|
current_place['address'] = line
|
|
break
|
|
|
|
if current_place.get('title') and place_index < len(place_ids):
|
|
current_place['place_id'] = place_ids[place_index]
|
|
results.append(current_place)
|
|
|
|
return results
|
|
|
|
def _merge_place_ids(self, api_results: List[Dict], browser_results: List[Dict]):
|
|
"""브라우저 결과의 place_id를 API 결과에 매칭"""
|
|
for api_r in api_results:
|
|
for br_r in browser_results:
|
|
if br_r.get("place_id") and api_r.get("title"):
|
|
if api_r["title"] in br_r.get("title", "") or br_r.get("title", "") in api_r["title"]:
|
|
api_r["place_id"] = br_r["place_id"]
|
|
break
|
|
|
|
# ============================================================
|
|
# Private Methods - Parsing
|
|
# ============================================================
|
|
|
|
def _clean_html(self, text: str) -> str:
|
|
"""HTML 태그 제거"""
|
|
return text.replace("<b>", "").replace("</b>", "")
|
|
|
|
def _is_accommodation(self, category: str) -> bool:
|
|
"""숙박 카테고리 여부"""
|
|
return bool(category and any(k in category for k in self.ACCOMMODATION_CATEGORIES))
|
|
|
|
def _extract_place_id_from_url(self, url: str) -> str:
|
|
"""URL에서 place_id 추출"""
|
|
for pattern in [r'/place/(\d+)', r'/entry/place/(\d+)', r'place_id=(\d+)']:
|
|
match = re.search(pattern, url)
|
|
if match:
|
|
return match.group(1)
|
|
return ""
|
|
|
|
def _parse_category(self, category_data) -> str:
|
|
if isinstance(category_data, dict):
|
|
return category_data.get("category", "")
|
|
return category_data if isinstance(category_data, str) else ""
|
|
|
|
def _parse_address(self, address_data, key: str) -> str:
|
|
if isinstance(address_data, dict):
|
|
return address_data.get(key, "")
|
|
return address_data if isinstance(address_data, str) and key == "address" else ""
|
|
|
|
def _parse_images(self, images_data, limit: int = 20) -> List[str]:
|
|
images = []
|
|
if isinstance(images_data, dict):
|
|
for img in images_data.get("images", [])[:limit]:
|
|
if isinstance(img, dict):
|
|
url = img.get("origin") or img.get("url") or img.get("thumbnail")
|
|
if url:
|
|
images.append(url)
|
|
elif isinstance(img, str):
|
|
images.append(img)
|
|
return images
|
|
|
|
def _parse_business_hours(self, hours_data) -> str:
|
|
if isinstance(hours_data, dict):
|
|
return hours_data.get("status", "")
|
|
return hours_data if isinstance(hours_data, str) else ""
|
|
|
|
def _parse_keywords(self, reviews_data) -> List[str]:
|
|
if isinstance(reviews_data, dict):
|
|
display_text = reviews_data.get("displayText", "")
|
|
return [display_text] if display_text else []
|
|
return []
|
|
|
|
def _parse_facilities(self, labels_data) -> List[str]:
|
|
facilities = []
|
|
if isinstance(labels_data, dict):
|
|
if labels_data.get("booking"):
|
|
facilities.append("예약가능")
|
|
if labels_data.get("nPay"):
|
|
facilities.append("네이버페이")
|
|
if labels_data.get("talktalk"):
|
|
facilities.append("톡톡")
|
|
return facilities
|
|
|
|
def _generate_report(self, detail: PlaceDetailInfo, address: str) -> str:
|
|
return (
|
|
f"## 업체 정보\n{detail.name}은(는) {detail.category} 카테고리에 속한 업체입니다.\n\n"
|
|
f"## 위치\n{address}\n\n"
|
|
f"## 연락처\n{detail.phone or '정보 없음'}\n\n"
|
|
f"## 영업시간\n{detail.business_hours or '정보 없음'}\n\n"
|
|
f"## 설명\n{detail.description or '정보 없음'}"
|
|
)
|