""" 네이버 플레이스 검색 API 모듈 업체명으로 검색하여 place_id를 찾고, 상세정보(사진 포함)를 조회 """ import asyncio import re import requests from dataclasses import dataclass from typing import Optional, List, Dict, Any # ============================================================ # Data Classes # ============================================================ @dataclass class NaverConfig: """네이버 API 설정""" naver_client_id: str = "cp5MzIsZ8PSQPeQQkVKR" naver_client_secret: str = "lhdrHgx31G" naver_local_api_url: str = "https://openapi.naver.com/v1/search/local.json" @dataclass class PlaceDetailInfo: """네이버 플레이스 상세 정보""" place_id: str name: str category: str address: str road_address: str phone: str description: str images: List[str] business_hours: str homepage: str keywords: List[str] facilities: List[str] # ============================================================ # Main API Class # ============================================================ class NaverPlaceAPI: """ 네이버 플레이스 API 클래스 주요 기능: - quick_search(): 빠른 자동완성 검색 (place_id 없음) - autocomplete_search(): place_id 포함 검색 (브라우저 폴백) - get_place_detail(): place_id로 상세정보 조회 - convert_to_crawling_response(): CrawlingResponse 형식 변환 """ ACCOMMODATION_CATEGORIES = [ "펜션", "숙박", "호텔", "모텔", "리조트", "게스트하우스", "민박", "글램핑", "캠핑", "풀빌라", "스테이", "독채" ] def __init__(self, config: NaverConfig = None): self.config = config or NaverConfig() self.search_url = self.config.naver_local_api_url self.headers = { "X-Naver-Client-Id": self.config.naver_client_id, "X-Naver-Client-Secret": self.config.naver_client_secret, } self.browser_headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Accept": "application/json, text/plain, */*", "Accept-Language": "ko-KR,ko;q=0.9", "Referer": "https://map.naver.com/", } # ============================================================ # Public Methods # ============================================================ async def quick_search(self, query: str) -> List[Dict[str, Any]]: """ 빠른 자동완성 검색 (place_id 조회 없음) Args: query: 검색어 Returns: [{"title": "업체명", "category": "카테고리", "address": "주소"}, ...] """ try: response = await asyncio.to_thread( requests.get, self.search_url, headers=self.headers, params={"query": query, "display": 10}, timeout=5 ) if response.status_code != 200: return [] items = response.json().get("items", []) return [ { "title": self._clean_html(item.get("title", "")), "category": item.get("category", ""), "address": item.get("roadAddress") or item.get("address", ""), } for item in items ] except Exception: return [] async def autocomplete_search(self, query: str) -> List[Dict[str, Any]]: """ place_id 포함 검색 (API 실패 시 브라우저 폴백) Args: query: 검색어 또는 네이버 지도 URL Returns: [{"place_id": "123", "title": "업체명", "category": "카테고리", "address": "주소", "is_accommodation": True}, ...] """ # URL인 경우 place_id 추출 if query.startswith("http"): place_id = self._extract_place_id_from_url(query) if place_id: detail = await self.get_place_detail(place_id) if detail: return [{ "place_id": place_id, "title": detail.name, "category": detail.category, "address": detail.road_address or detail.address, "is_accommodation": self._is_accommodation(detail.category), }] return [] # API로 검색 api_results = await self._search_with_api(query) if api_results and any(r.get("place_id") for r in api_results): return api_results # API 실패 시 브라우저로 검색 print("API에서 place_id를 찾지 못함. 브라우저 검색 시도...") browser_results = await self._search_with_browser(query) if browser_results and any(r.get("place_id") for r in browser_results): # API 결과에 브라우저에서 찾은 place_id 매칭 if api_results: self._merge_place_ids(api_results, browser_results) return api_results return browser_results return api_results or [] async def get_place_detail(self, place_id: str) -> Optional[PlaceDetailInfo]: """ place_id로 상세정보 조회 Args: place_id: 네이버 플레이스 ID Returns: PlaceDetailInfo 또는 None """ if not place_id: return None try: response = await asyncio.to_thread( requests.get, f"https://map.naver.com/p/api/place/summary/{place_id}", headers={**self.browser_headers, "Referer": f"https://map.naver.com/p/entry/place/{place_id}"} ) if response.status_code != 200: print(f"Detail API Error: {response.status_code}") return None pd = response.json().get("data", {}).get("placeDetail", {}) if not pd: print("No placeDetail in response") return None return PlaceDetailInfo( place_id=place_id, name=pd.get("name", ""), category=self._parse_category(pd.get("category")), address=self._parse_address(pd.get("address"), "address"), road_address=self._parse_address(pd.get("address"), "roadAddress"), phone="", description="", images=self._parse_images(pd.get("images")), business_hours=self._parse_business_hours(pd.get("businessHours")), homepage="", keywords=self._parse_keywords(pd.get("visitorReviews")), facilities=self._parse_facilities(pd.get("labels")) ) except Exception as e: print(f"Detail fetch error: {e}") return None def convert_to_crawling_response(self, detail: PlaceDetailInfo) -> Dict[str, Any]: """PlaceDetailInfo를 CrawlingResponse 형식으로 변환""" address = detail.road_address or detail.address address_parts = address.split() if address else [] region = address_parts[0] if address_parts else "" # 태그 생성 tags = [] if region: tags.append(f"#{region}") for keyword in detail.keywords[:5]: tags.append(f"#{keyword}" if not keyword.startswith("#") else keyword) # 시설 정보 facilities = detail.facilities[:] if detail.category: for cat in detail.category.split(">"): cat = cat.strip() if cat and cat not in facilities: facilities.append(cat) return { "image_list": detail.images, "image_count": len(detail.images), "processed_info": { "customer_name": detail.name, "region": region, "detail_region_info": address }, "marketing_analysis": { "report": self._generate_report(detail, address), "tags": tags, "facilities": facilities } } # ============================================================ # Private Methods - Search # ============================================================ async def _search_with_api(self, query: str) -> List[Dict[str, Any]]: """Local Search API로 검색 후 좌표로 place_id 조회""" try: response = await asyncio.to_thread( requests.get, self.search_url, headers=self.headers, params={"query": query, "display": 5}, timeout=10 ) if response.status_code != 200: print(f"Local Search API Error: {response.status_code}") return [] items = response.json().get("items", []) results = [] for item in items: title = self._clean_html(item.get("title", "")) category = item.get("category", "") mapx, mapy = item.get("mapx", ""), item.get("mapy", "") lng = float(mapx) / 10000000 if mapx else 0 lat = float(mapy) / 10000000 if mapy else 0 results.append({ "place_id": "", "title": title, "category": category, "address": item.get("roadAddress") or item.get("address", ""), "lng": lng, "lat": lat, "is_accommodation": self._is_accommodation(category), }) # 좌표로 place_id 찾기 for result in results: result["place_id"] = await self._find_place_id_by_coord( result["title"], result["lng"], result["lat"] ) return results except Exception as e: print(f"Search error: {e}") return [] async def _find_place_id_by_coord(self, name: str, lng: float, lat: float) -> str: """좌표와 업체명으로 place_id 찾기""" try: response = await asyncio.to_thread( requests.get, "https://map.naver.com/p/api/search/allSearch", headers=self.browser_headers, params={"query": name, "type": "place", "searchCoord": f"{lng};{lat}", "displayCount": 1}, timeout=5 ) if response.status_code == 200: result = response.json().get("result", {}) if "ncaptcha" not in result: place_list = result.get("place", {}).get("list", []) if place_list: return str(place_list[0].get("id", "")) return "" except Exception: return "" async def _search_with_browser(self, query: str) -> List[Dict[str, Any]]: """Playwright 브라우저로 place_id 검색""" try: from playwright.async_api import async_playwright except ImportError: print("playwright가 설치되지 않았습니다. pip install playwright") return [] results = [] try: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context(user_agent=self.browser_headers["User-Agent"]) page = await context.new_page() await page.goto(f"https://map.naver.com/p/search/{query}", wait_until="domcontentloaded", timeout=20000) await page.wait_for_timeout(5000) search_frame = page.frame(name="searchIframe") if search_frame: html = await search_frame.content() text = await search_frame.inner_text('body') results = self._parse_browser_results(html, text) await browser.close() except Exception as e: print(f"Browser search error: {e}") return results[:10] def _parse_browser_results(self, html: str, text: str) -> List[Dict[str, Any]]: """브라우저 HTML에서 검색 결과 파싱""" # place_id 추출 place_ids = [] for pattern in [r'"id":"(\d+)"', r'/place/(\d+)', r'data-id="(\d+)"']: place_ids.extend(re.findall(pattern, html)) place_ids = list(dict.fromkeys(place_ids)) # 중복 제거 # 텍스트에서 업체 정보 파싱 results = [] lines = text.split('\n') current_place = {} place_index = 0 for line in lines: line = line.strip() if not line: continue if line.startswith('이미지수'): if current_place.get('title') and place_index < len(place_ids): current_place['place_id'] = place_ids[place_index] results.append(current_place) place_index += 1 current_place = {} continue if not current_place.get('title') and len(line) > 1 and not line.isdigit(): if line not in ['네이버페이', '톡톡', '쿠폰', '알림받기']: current_place['title'] = line continue if not current_place.get('category'): for keyword in self.ACCOMMODATION_CATEGORIES + ['장소대여', '전통숙소']: if keyword in line: current_place['category'] = line current_place['is_accommodation'] = self._is_accommodation(line) break # 에라 모르겄다 그냥 전국 다 쳐넣어 if not current_place.get('address'): regions = ['서울', '부산', '대구', '인천', '광주', '대전', '울산', '세종', '경기', '강원', '충북', '충남', '전북', '전남', '경북', '경남', '제주'] for region in regions: if line.startswith(region): current_place['address'] = line break if current_place.get('title') and place_index < len(place_ids): current_place['place_id'] = place_ids[place_index] results.append(current_place) return results def _merge_place_ids(self, api_results: List[Dict], browser_results: List[Dict]): """브라우저 결과의 place_id를 API 결과에 매칭""" for api_r in api_results: for br_r in browser_results: if br_r.get("place_id") and api_r.get("title"): if api_r["title"] in br_r.get("title", "") or br_r.get("title", "") in api_r["title"]: api_r["place_id"] = br_r["place_id"] break # ============================================================ # Private Methods - Parsing # ============================================================ def _clean_html(self, text: str) -> str: """HTML 태그 제거""" return text.replace("", "").replace("", "") def _is_accommodation(self, category: str) -> bool: """숙박 카테고리 여부""" return bool(category and any(k in category for k in self.ACCOMMODATION_CATEGORIES)) def _extract_place_id_from_url(self, url: str) -> str: """URL에서 place_id 추출""" for pattern in [r'/place/(\d+)', r'/entry/place/(\d+)', r'place_id=(\d+)']: match = re.search(pattern, url) if match: return match.group(1) return "" def _parse_category(self, category_data) -> str: if isinstance(category_data, dict): return category_data.get("category", "") return category_data if isinstance(category_data, str) else "" def _parse_address(self, address_data, key: str) -> str: if isinstance(address_data, dict): return address_data.get(key, "") return address_data if isinstance(address_data, str) and key == "address" else "" def _parse_images(self, images_data, limit: int = 20) -> List[str]: images = [] if isinstance(images_data, dict): for img in images_data.get("images", [])[:limit]: if isinstance(img, dict): url = img.get("origin") or img.get("url") or img.get("thumbnail") if url: images.append(url) elif isinstance(img, str): images.append(img) return images def _parse_business_hours(self, hours_data) -> str: if isinstance(hours_data, dict): return hours_data.get("status", "") return hours_data if isinstance(hours_data, str) else "" def _parse_keywords(self, reviews_data) -> List[str]: if isinstance(reviews_data, dict): display_text = reviews_data.get("displayText", "") return [display_text] if display_text else [] return [] def _parse_facilities(self, labels_data) -> List[str]: facilities = [] if isinstance(labels_data, dict): if labels_data.get("booking"): facilities.append("예약가능") if labels_data.get("nPay"): facilities.append("네이버페이") if labels_data.get("talktalk"): facilities.append("톡톡") return facilities def _generate_report(self, detail: PlaceDetailInfo, address: str) -> str: return ( f"## 업체 정보\n{detail.name}은(는) {detail.category} 카테고리에 속한 업체입니다.\n\n" f"## 위치\n{address}\n\n" f"## 연락처\n{detail.phone or '정보 없음'}\n\n" f"## 영업시간\n{detail.business_hours or '정보 없음'}\n\n" f"## 설명\n{detail.description or '정보 없음'}" )