o2o-castad-frontend/test/main.py

483 lines
18 KiB
Python

"""
네이버 플레이스 검색 API 모듈
업체명으로 검색하여 place_id를 찾고, 상세정보(사진 포함)를 조회
"""
import asyncio
import re
import requests
from dataclasses import dataclass
from typing import Optional, List, Dict, Any
# ============================================================
# Data Classes
# ============================================================
@dataclass
class NaverConfig:
"""네이버 API 설정"""
naver_client_id: str = "cp5MzIsZ8PSQPeQQkVKR"
naver_client_secret: str = "lhdrHgx31G"
naver_local_api_url: str = "https://openapi.naver.com/v1/search/local.json"
@dataclass
class PlaceDetailInfo:
"""네이버 플레이스 상세 정보"""
place_id: str
name: str
category: str
address: str
road_address: str
phone: str
description: str
images: List[str]
business_hours: str
homepage: str
keywords: List[str]
facilities: List[str]
# ============================================================
# Main API Class
# ============================================================
class NaverPlaceAPI:
"""
네이버 플레이스 API 클래스
주요 기능:
- quick_search(): 빠른 자동완성 검색 (place_id 없음)
- autocomplete_search(): place_id 포함 검색 (브라우저 폴백)
- get_place_detail(): place_id로 상세정보 조회
- convert_to_crawling_response(): CrawlingResponse 형식 변환
"""
ACCOMMODATION_CATEGORIES = [
"펜션", "숙박", "호텔", "모텔", "리조트", "게스트하우스",
"민박", "글램핑", "캠핑", "풀빌라", "스테이", "독채"
]
def __init__(self, config: NaverConfig = None):
self.config = config or NaverConfig()
self.search_url = self.config.naver_local_api_url
self.headers = {
"X-Naver-Client-Id": self.config.naver_client_id,
"X-Naver-Client-Secret": self.config.naver_client_secret,
}
self.browser_headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept": "application/json, text/plain, */*",
"Accept-Language": "ko-KR,ko;q=0.9",
"Referer": "https://map.naver.com/",
}
# ============================================================
# Public Methods
# ============================================================
async def quick_search(self, query: str) -> List[Dict[str, Any]]:
"""
빠른 자동완성 검색 (place_id 조회 없음)
Args:
query: 검색어
Returns:
[{"title": "업체명", "category": "카테고리", "address": "주소"}, ...]
"""
try:
response = await asyncio.to_thread(
requests.get,
self.search_url,
headers=self.headers,
params={"query": query, "display": 10},
timeout=5
)
if response.status_code != 200:
return []
items = response.json().get("items", [])
return [
{
"title": self._clean_html(item.get("title", "")),
"category": item.get("category", ""),
"address": item.get("roadAddress") or item.get("address", ""),
}
for item in items
]
except Exception:
return []
async def autocomplete_search(self, query: str) -> List[Dict[str, Any]]:
"""
place_id 포함 검색 (API 실패 시 브라우저 폴백)
Args:
query: 검색어 또는 네이버 지도 URL
Returns:
[{"place_id": "123", "title": "업체명", "category": "카테고리",
"address": "주소", "is_accommodation": True}, ...]
"""
# URL인 경우 place_id 추출
if query.startswith("http"):
place_id = self._extract_place_id_from_url(query)
if place_id:
detail = await self.get_place_detail(place_id)
if detail:
return [{
"place_id": place_id,
"title": detail.name,
"category": detail.category,
"address": detail.road_address or detail.address,
"is_accommodation": self._is_accommodation(detail.category),
}]
return []
# API로 검색
api_results = await self._search_with_api(query)
if api_results and any(r.get("place_id") for r in api_results):
return api_results
# API 실패 시 브라우저로 검색
print("API에서 place_id를 찾지 못함. 브라우저 검색 시도...")
browser_results = await self._search_with_browser(query)
if browser_results and any(r.get("place_id") for r in browser_results):
# API 결과에 브라우저에서 찾은 place_id 매칭
if api_results:
self._merge_place_ids(api_results, browser_results)
return api_results
return browser_results
return api_results or []
async def get_place_detail(self, place_id: str) -> Optional[PlaceDetailInfo]:
"""
place_id로 상세정보 조회
Args:
place_id: 네이버 플레이스 ID
Returns:
PlaceDetailInfo 또는 None
"""
if not place_id:
return None
try:
response = await asyncio.to_thread(
requests.get,
f"https://map.naver.com/p/api/place/summary/{place_id}",
headers={**self.browser_headers, "Referer": f"https://map.naver.com/p/entry/place/{place_id}"}
)
if response.status_code != 200:
print(f"Detail API Error: {response.status_code}")
return None
pd = response.json().get("data", {}).get("placeDetail", {})
if not pd:
print("No placeDetail in response")
return None
return PlaceDetailInfo(
place_id=place_id,
name=pd.get("name", ""),
category=self._parse_category(pd.get("category")),
address=self._parse_address(pd.get("address"), "address"),
road_address=self._parse_address(pd.get("address"), "roadAddress"),
phone="",
description="",
images=self._parse_images(pd.get("images")),
business_hours=self._parse_business_hours(pd.get("businessHours")),
homepage="",
keywords=self._parse_keywords(pd.get("visitorReviews")),
facilities=self._parse_facilities(pd.get("labels"))
)
except Exception as e:
print(f"Detail fetch error: {e}")
return None
def convert_to_crawling_response(self, detail: PlaceDetailInfo) -> Dict[str, Any]:
"""PlaceDetailInfo를 CrawlingResponse 형식으로 변환"""
address = detail.road_address or detail.address
address_parts = address.split() if address else []
region = address_parts[0] if address_parts else ""
# 태그 생성
tags = []
if region:
tags.append(f"#{region}")
for keyword in detail.keywords[:5]:
tags.append(f"#{keyword}" if not keyword.startswith("#") else keyword)
# 시설 정보
facilities = detail.facilities[:]
if detail.category:
for cat in detail.category.split(">"):
cat = cat.strip()
if cat and cat not in facilities:
facilities.append(cat)
return {
"image_list": detail.images,
"image_count": len(detail.images),
"processed_info": {
"customer_name": detail.name,
"region": region,
"detail_region_info": address
},
"marketing_analysis": {
"report": self._generate_report(detail, address),
"tags": tags,
"facilities": facilities
}
}
# ============================================================
# Private Methods - Search
# ============================================================
async def _search_with_api(self, query: str) -> List[Dict[str, Any]]:
"""Local Search API로 검색 후 좌표로 place_id 조회"""
try:
response = await asyncio.to_thread(
requests.get,
self.search_url,
headers=self.headers,
params={"query": query, "display": 5},
timeout=10
)
if response.status_code != 200:
print(f"Local Search API Error: {response.status_code}")
return []
items = response.json().get("items", [])
results = []
for item in items:
title = self._clean_html(item.get("title", ""))
category = item.get("category", "")
mapx, mapy = item.get("mapx", ""), item.get("mapy", "")
lng = float(mapx) / 10000000 if mapx else 0
lat = float(mapy) / 10000000 if mapy else 0
results.append({
"place_id": "",
"title": title,
"category": category,
"address": item.get("roadAddress") or item.get("address", ""),
"lng": lng,
"lat": lat,
"is_accommodation": self._is_accommodation(category),
})
# 좌표로 place_id 찾기
for result in results:
result["place_id"] = await self._find_place_id_by_coord(
result["title"], result["lng"], result["lat"]
)
return results
except Exception as e:
print(f"Search error: {e}")
return []
async def _find_place_id_by_coord(self, name: str, lng: float, lat: float) -> str:
"""좌표와 업체명으로 place_id 찾기"""
try:
response = await asyncio.to_thread(
requests.get,
"https://map.naver.com/p/api/search/allSearch",
headers=self.browser_headers,
params={"query": name, "type": "place", "searchCoord": f"{lng};{lat}", "displayCount": 1},
timeout=5
)
if response.status_code == 200:
result = response.json().get("result", {})
if "ncaptcha" not in result:
place_list = result.get("place", {}).get("list", [])
if place_list:
return str(place_list[0].get("id", ""))
return ""
except Exception:
return ""
async def _search_with_browser(self, query: str) -> List[Dict[str, Any]]:
"""Playwright 브라우저로 place_id 검색"""
try:
from playwright.async_api import async_playwright
except ImportError:
print("playwright가 설치되지 않았습니다. pip install playwright")
return []
results = []
try:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(user_agent=self.browser_headers["User-Agent"])
page = await context.new_page()
await page.goto(f"https://map.naver.com/p/search/{query}", wait_until="domcontentloaded", timeout=20000)
await page.wait_for_timeout(5000)
search_frame = page.frame(name="searchIframe")
if search_frame:
html = await search_frame.content()
text = await search_frame.inner_text('body')
results = self._parse_browser_results(html, text)
await browser.close()
except Exception as e:
print(f"Browser search error: {e}")
return results[:10]
def _parse_browser_results(self, html: str, text: str) -> List[Dict[str, Any]]:
"""브라우저 HTML에서 검색 결과 파싱"""
# place_id 추출
place_ids = []
for pattern in [r'"id":"(\d+)"', r'/place/(\d+)', r'data-id="(\d+)"']:
place_ids.extend(re.findall(pattern, html))
place_ids = list(dict.fromkeys(place_ids)) # 중복 제거
# 텍스트에서 업체 정보 파싱
results = []
lines = text.split('\n')
current_place = {}
place_index = 0
for line in lines:
line = line.strip()
if not line:
continue
if line.startswith('이미지수'):
if current_place.get('title') and place_index < len(place_ids):
current_place['place_id'] = place_ids[place_index]
results.append(current_place)
place_index += 1
current_place = {}
continue
if not current_place.get('title') and len(line) > 1 and not line.isdigit():
if line not in ['네이버페이', '톡톡', '쿠폰', '알림받기']:
current_place['title'] = line
continue
if not current_place.get('category'):
for keyword in self.ACCOMMODATION_CATEGORIES + ['장소대여', '전통숙소']:
if keyword in line:
current_place['category'] = line
current_place['is_accommodation'] = self._is_accommodation(line)
break
# 에라 모르겄다 그냥 전국 다 쳐넣어
if not current_place.get('address'):
regions = ['서울', '부산', '대구', '인천', '광주', '대전', '울산', '세종',
'경기', '강원', '충북', '충남', '전북', '전남', '경북', '경남', '제주']
for region in regions:
if line.startswith(region):
current_place['address'] = line
break
if current_place.get('title') and place_index < len(place_ids):
current_place['place_id'] = place_ids[place_index]
results.append(current_place)
return results
def _merge_place_ids(self, api_results: List[Dict], browser_results: List[Dict]):
"""브라우저 결과의 place_id를 API 결과에 매칭"""
for api_r in api_results:
for br_r in browser_results:
if br_r.get("place_id") and api_r.get("title"):
if api_r["title"] in br_r.get("title", "") or br_r.get("title", "") in api_r["title"]:
api_r["place_id"] = br_r["place_id"]
break
# ============================================================
# Private Methods - Parsing
# ============================================================
def _clean_html(self, text: str) -> str:
"""HTML 태그 제거"""
return text.replace("<b>", "").replace("</b>", "")
def _is_accommodation(self, category: str) -> bool:
"""숙박 카테고리 여부"""
return bool(category and any(k in category for k in self.ACCOMMODATION_CATEGORIES))
def _extract_place_id_from_url(self, url: str) -> str:
"""URL에서 place_id 추출"""
for pattern in [r'/place/(\d+)', r'/entry/place/(\d+)', r'place_id=(\d+)']:
match = re.search(pattern, url)
if match:
return match.group(1)
return ""
def _parse_category(self, category_data) -> str:
if isinstance(category_data, dict):
return category_data.get("category", "")
return category_data if isinstance(category_data, str) else ""
def _parse_address(self, address_data, key: str) -> str:
if isinstance(address_data, dict):
return address_data.get(key, "")
return address_data if isinstance(address_data, str) and key == "address" else ""
def _parse_images(self, images_data, limit: int = 20) -> List[str]:
images = []
if isinstance(images_data, dict):
for img in images_data.get("images", [])[:limit]:
if isinstance(img, dict):
url = img.get("origin") or img.get("url") or img.get("thumbnail")
if url:
images.append(url)
elif isinstance(img, str):
images.append(img)
return images
def _parse_business_hours(self, hours_data) -> str:
if isinstance(hours_data, dict):
return hours_data.get("status", "")
return hours_data if isinstance(hours_data, str) else ""
def _parse_keywords(self, reviews_data) -> List[str]:
if isinstance(reviews_data, dict):
display_text = reviews_data.get("displayText", "")
return [display_text] if display_text else []
return []
def _parse_facilities(self, labels_data) -> List[str]:
facilities = []
if isinstance(labels_data, dict):
if labels_data.get("booking"):
facilities.append("예약가능")
if labels_data.get("nPay"):
facilities.append("네이버페이")
if labels_data.get("talktalk"):
facilities.append("톡톡")
return facilities
def _generate_report(self, detail: PlaceDetailInfo, address: str) -> str:
return (
f"## 업체 정보\n{detail.name}은(는) {detail.category} 카테고리에 속한 업체입니다.\n\n"
f"## 위치\n{address}\n\n"
f"## 연락처\n{detail.phone or '정보 없음'}\n\n"
f"## 영업시간\n{detail.business_hours or '정보 없음'}\n\n"
f"## 설명\n{detail.description or '정보 없음'}"
)