From 6180c231512d98d84e9495a11f580f65c6325e54 Mon Sep 17 00:00:00 2001 From: bluebamus Date: Fri, 19 Dec 2025 16:41:42 +0900 Subject: [PATCH] added poc for crawling --- poc/crawling/nvMapScraper.py | 105 ++++++++++++++++++++++++++++++++++ poc/crawling/nvMapScraper.zip | Bin 0 -> 3237 bytes 2 files changed, 105 insertions(+) create mode 100644 poc/crawling/nvMapScraper.py create mode 100644 poc/crawling/nvMapScraper.zip diff --git a/poc/crawling/nvMapScraper.py b/poc/crawling/nvMapScraper.py new file mode 100644 index 0000000..0988192 --- /dev/null +++ b/poc/crawling/nvMapScraper.py @@ -0,0 +1,105 @@ +import asyncio +import json +import re + +import aiohttp + +GRAPHQL_URL = "https://pcmap-api.place.naver.com/graphql" +NAVER_COOKIES = "NAC=mQ7mBownbQf4A; NNB=TQPII6AKDBFGQ; PLACE_LANGUAGE=ko; NACT=1; nid_inf=1431570813; NID_AUT=k2T7FraXOdIMRCHzEZIFtHQup+I7b87M5fd7+p65AXZTdGB/gelRmW8s/Q4oDxm8; tooltipDisplayed=true; SRT30=1762660151; NID_SES=AAAB1Lpy3y3hGzuPbJpJl8vvFx18C+HXXuZEFou/YPgocHe7k2/5MpFlgE48X1JF7c7IPoU2khZKkkuLx+tsvWAzOf0TnG/G8RrBGeawnSluSJcKcTdKKRJ4cygKc/OabVxoc3TNZJWxer3vFtXBoXkDS5querVNS6wvcMhA/p4vkPKOeepwKLR+1IJERlQJWZw4q29IdAysrbBNn3Akf9mDA5eTYvMDLYyRkToRh10TVMW/yhyNQeMXlIdnR8U1ZCNqe/9ErYdos5gQDstswEJQQA0T2cHFGJOtmlYMPlnhWado5w521iZXGJyKcA9ZawizM/i5nK5xNYtPGS3cvImUYl6B5ulIipUJSqpj8v2XstK0TZlOGxHToXaVDrCNmSfCA9vFYbTb6xJHB2JRAT3Jik/z6QgLjJLBWRnsucMDqldxoiEDAUHEhY3pjgZ89quR3c3hwAuTlI9hBn5I3e5VQR0Y/GxoS9mIkMF8pJmcGneqnE0BNIt91RN6Se5rDM69B+JWppBXtSir1JGuXADaRLLMP8VlxJX949iH0UYTKWKsrD4OgNNK5aUx24nAH494WPknBMlx4fCMIeWzy7K3sEZkNUn/+A+eHraqIFfbGpveSCNM+8EqEjMgA+YRgg3eig==; _naver_usersession_=Kkgzim/64JicPJzgkIIvqQ==; page_uid=jesTPsqVWUZssE4qJeossssssD0-011300; SRT5=1762662010; BUC=z5Fu3sAYtFwpbRDrrDFYdn4AgK5hNkOqX-DdaLU7VJM=" + +OVERVIEW_QUERY = """ +query getAccommodation($id: String!, $deviceType: String) { + business: placeDetail(input: {id: $id, isNx: true, deviceType: $deviceType}) { + base { + id + name + category + roadAddress + address + phone + virtualPhone + microReviews + conveniences + visitorReviewsTotal + } + images { images { origin url } } + cpImages(source: [ugcImage]) { images { origin url } } + } +}""" +REQUEST_HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", + "Referer": "https://map.naver.com/", + "Origin": "https://map.naver.com", + "Content-Type": "application/json", + "Cookie": NAVER_COOKIES, +} + + +class GraphQLException(Exception): + pass + + +class nvMapScraper: + url: str = None + scrap_type: str = None + rawdata: dict = None + image_link_list: list[str] = None + base_info: dict = None + + def __init__(self, url): + self.url = url + + def parse_url(self): + place_pattern = r"/place/(\d+)" + try: + place_id = re.search(place_pattern, self.url)[1] + except: + raise GraphQLException() + + return place_id + + async def scrap(self): + try: + place_id = self.parse_url() + data = await self.call_get_accomodation(place_id) + self.rawdata = data + self.image_link_list = [ + nv_image["origin"] + for nv_image in data["data"]["business"]["images"]["images"] + ] + self.base_info = data["data"]["business"]["base"] + self.scrap_type = "GraphQL" + + except GraphQLException: + print("fallback") + self.scrap_type = "Playwright" + pass # 나중에 pw 이용한 crawling으로 fallback 추가 + + return + + async def call_get_accomodation(self, place_id): + payload = { + "operationName": "getAccommodation", + "variables": {"id": place_id, "deviceType": "pc"}, + "query": OVERVIEW_QUERY, + } + json_payload = json.dumps(payload) + + async with aiohttp.ClientSession() as session: + async with session.post( + GRAPHQL_URL, data=json_payload, headers=REQUEST_HEADERS + ) as response: + response.encoding = "utf-8" + if response.status == 200: # 요청 성공 + return await response.json() # await 주의 + else: # 요청 실패 + print("실패 상태 코드:", response.status) + raise Exception() + + +url = "https://map.naver.com/p/search/%EC%8A%A4%ED%85%8C%EC%9D%B4%EB%A8%B8%EB%AD%84/place/1133638931?c=14.70,0,0,0,dh&placePath=/photo?businessCategory=pension&fromPanelNum=2&locale=ko&searchText=%EC%8A%A4%ED%85%8C%EC%9D%B4%EB%A8%B8%EB%AD%84&svcName=map_pcv5×tamp=202512191123&fromPanelNum=2&locale=ko&searchText=%EC%8A%A4%ED%85%8C%EC%9D%B4%EB%A8%B8%EB%AD%84&svcName=map_pcv5×tamp=202512191007&from=map&entry=bmp&filterType=%EC%97%85%EC%B2%B4&businessCategory=pension" +scraper = nvMapScraper(url) +asyncio.run(scraper.scrap()) +print(scraper.image_link_list) +print(len(scraper.image_link_list)) +print(scraper.base_info) diff --git a/poc/crawling/nvMapScraper.zip b/poc/crawling/nvMapScraper.zip new file mode 100644 index 0000000000000000000000000000000000000000..b437b3c1c95ce4e3dbe70990dee0d8c3cc3f671c GIT binary patch literal 3237 zcmai%c{~#iAIH&6`*6cj>B@A+jc@tkV z4heIPyoav4q6LJdeO|qEFSA`e)!#F*-)jbfOl;DF_}>%2S|>J7oz;Im!>>z7AYbOQ zpMa3-G{bTZMC zVt{?ugmg|e1;mLp2nFgq;;Uvi@P-P4s=*u@f*Fp@FHZR~7F^pJP*aK=EL-)h=_Grj(H0K9joO({}2u z=gh?3r=Z`LZ^SSk*^+a6YsK@LghXsRDY+X8w4RczdS37p4hg!Mx^*nOOj1cB!p^L zO#PUiYohcwCy))0r857tV5f)MVDIC-ho0!X6=#d@KG1<9-;lc8WamSMegsnP774c( z82J8<8@nHU9*HNd(}dM@?qqb1HZYdFDI)LrTkCT9hJ}0q2*__X-RtFj#n>{`a&CA{ zv$Cysui?%5kJgw|l7>g>TK2B$EH`_j!CU#V)6=>(ww{>Cpw5;f}39{p7Wove0yE5pi%% z*I5jz38VN-Pel&?aK0?!2+F0d6_~Y6OmW0;Ks4poy$-VtkovK?oI`~N4rhVIUuT|3 z{`9*xXVBarcv)g%jf+b)bs484R~bj@w<>ym>LDX|UK?QyP2`%M(2LqMkbrdPC&{{8 zpGLV^Zeas^yYcvqs;!K1s5?CQxLKghYGwrSQ>3u((a(3?`6#_ar~d9b5rQQ3)6%8v zLl5dH0w;FIESsmZvVn0ZIaIdT-}f43825ack!m^3(AZ%12;6DzMmz{Gx%p4s%qYIf z9?tDlsf)EtRxjRWJu;@tdLVquV#w)7(Y*?u=E!Q{NbF<#+Pq2j<{;1shWCf?D|}5( zVYVk>HSnpm+>O)I(ybq@b2RZLB?rY3rR0fhm9SFm=R`%*WUnpi%NOpiP6<|?6uThV zfyNwNA81CI7qE%m%MN;Rp^_IwT^0eO%gQ7YSH9BR#DDT3f@iLeY}&P)szlmqOIY)~ z8rVUMK2`q4EF{MPRF0J^odrfcs4ZpU5(*T~IhBeHKwQ|4dgXr*;Ge5QYgDa6^g9jE z&<){%v*f-+xypMgf z1L;k7@D9JZN@RVitN`P_j}Z(~de$;b=F9k7Qo7v!NMUNab3TvM5j0Jzm%FfrYbDm572RL0?yk(dcdH&h69mpmlYB=5q$hos< zVL{9fe@t9ryYw`)F^^CqX5?m{-eaX_qcvI%YN;RJW`bI+FlJ0as+q-kXRDkwI4A29 zO_kG;*y#dtfulih!|^GYyH(-{`WU5nNb?~~z|_Hn^Mhg!Re(k$tUvoB&k3nwmX zAjS=&SSQpZ=hyKLydPHFLrCdGG!xn8n=Z(*K3W#UwzzUaA7sp*N2ov$o1x`a1(^!G zh;2}*fbm}F?AaLSzcFZPlxnf7JsuhLtAqBJ5_U-E8ahOFBtZeKj4K7%(RW!id79VM zaj|GOW6Ehi-cKrLT8@I*7r!^9b@GENW_uM?;oDP8nOaS9#{qY?->jQudT~Rng=$+)`JN)_10NyOdSZqd(*xS&~) ziCZiH(N1}5M>ZNgx!(;mis#b(*iSo<=G`*!w(@4C=QM72p{7B3D@Vmg_hAcxS?nf? zjcrFovHN%5(H5t1=wR^AfurTweKI#dEOc=x)^~4sx0)M_h)WU`E$nL7Bdqe>%rZ8M zn*;U07O{zm!tn}2lJL-9^+QAp{du7zbwQI>ukiF2klE*@EQHM31oLRgAi-(Ce6#9k zYvLom{p{DUn8sr06S|Mdw5g4hKKjxARI%iyPk6VTC>K;LI>d`;l!Ga!^+(8s+^pI*Y?#F2}qG!w?e(eFo||BLnl~o zBc`U+f}inYXil8QB^Fcdy1JN5xl!itEPim;?%*+F}bO-A78Us#iL+p2=BcX877-&!i~4O>){*X|bgU9vsE?bIdIU z`DUIO^4<|iBxq{;F!<~N#u`^tnR7u_Ek9D)!^^o|E?&V8CaMq+8?7Oztf5@wLA9Q5 zG{Q7}yBF`;EPB1Max~~7d(*kvPip41Q{A2V$PFci5jbAaqHhce%dDgZN#-RYt-YhY z9OE4t(xMQ+M3CY%O43I5{gUXz6tgtNmuS^56yM3VU5nK39h0d0brH$6F>N<(RTgVc zHkU4l;$|yS)G!Qdwe_%pli{cD2BVI-(IJO*EgI*XQHS}yXG6Q>5qOxq>P2|f(p}Z(Hjc_FYP}C^1+Xu!YM|$- zAAq+}y9AJcD$2bx{01hV_r(h$0wXYguBVYB;mRWTTNqmyhq4GW>(hFW=cUuu-;0x1zHV(-oBID= zZyBJ5(Sb!&=}Ev1gTe^tYS-xK?}MqM<)==la4_mrI^g&Fx<$VHyRFa$E&wOzi0YNz zIb)T_x9yU|i;ho98z<$l+jLoMJNGSrQTu|A@&KMQSVyx6|E%DCF2!FPdbd}uinwC2 zS=QhT)=N8E3qKOFmu%mNGfLTnJ`n{Jh(%6OrPS_UqK0~m=Yw?KxszCEXk>V4 zXbkme>44|{%MSm0*J$i<}cG^EO{nH_=f46@Db(<<7 literal 0 HcmV?d00001