diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..8e90c8c
Binary files /dev/null and b/.DS_Store differ
diff --git a/app/.DS_Store b/app/.DS_Store
new file mode 100644
index 0000000..725195a
Binary files /dev/null and b/app/.DS_Store differ
diff --git a/app/home/.DS_Store b/app/home/.DS_Store
new file mode 100644
index 0000000..aa241cc
Binary files /dev/null and b/app/home/.DS_Store differ
diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py
index 440cb8b..fad8ca7 100644
--- a/app/home/api/routers/v1/home.py
+++ b/app/home/api/routers/v1/home.py
@@ -193,9 +193,12 @@ async def crawling(request_body: CrawlingRequest):
logger.info(f"[crawling] Step 3-3: GPT API 호출 완료 - 응답 {len(raw_response)}자 ({step3_3_elapsed:.1f}ms)")
print(f"[crawling] Step 3-3: GPT API 호출 완료 - 응답 {len(raw_response)}자 ({step3_3_elapsed:.1f}ms)")
- # Step 3-4: 응답 파싱
+ # Step 3-4: 응답 파싱 (크롤링에서 가져온 facility_info 전달)
step3_4_start = time.perf_counter()
- parsed = await chatgpt_service.parse_marketing_analysis(raw_response)
+ print(f"[crawling] Step 3-4: 응답 파싱 시작 - facility_info: {scraper.facility_info}")
+ parsed = await chatgpt_service.parse_marketing_analysis(
+ raw_response, facility_info=scraper.facility_info
+ )
marketing_analysis = MarketingAnalysis(**parsed)
step3_4_elapsed = (time.perf_counter() - step3_4_start) * 1000
print(f"[crawling] Step 3-4: 응답 파싱 완료 ({step3_4_elapsed:.1f}ms)")
diff --git a/app/lyric/.DS_Store b/app/lyric/.DS_Store
new file mode 100644
index 0000000..6b464c0
Binary files /dev/null and b/app/lyric/.DS_Store differ
diff --git a/app/song/.DS_Store b/app/song/.DS_Store
new file mode 100644
index 0000000..4e8bde6
Binary files /dev/null and b/app/song/.DS_Store differ
diff --git a/app/utils/chatgpt_prompt.py b/app/utils/chatgpt_prompt.py
index f2c2ef9..cca878e 100644
--- a/app/utils/chatgpt_prompt.py
+++ b/app/utils/chatgpt_prompt.py
@@ -160,18 +160,10 @@ Provide comprehensive marketing analysis including:
- Return as JSON with key "tags"
- **MUST be written in Korean (한국어)**
-2. Facilities
- - Based on the business name and region details, identify 5 likely facilities/amenities
- - Consider typical facilities for accommodations in the given region
- - Examples: 바베큐장, 수영장, 주차장, 와이파이, 주방, 테라스, 정원, etc.
- - Return as JSON with key "facilities"
- - **MUST be written in Korean (한국어)**
-
[CRITICAL LANGUAGE REQUIREMENT - ABSOLUTE RULE]
ALL OUTPUT MUST BE WRITTEN IN KOREAN (한국어)
- Analysis sections: Korean only
- Tags: Korean only
-- Facilities: Korean only
- This is a NON-NEGOTIABLE requirement
- Any output in English or other languages is considered a FAILURE
- Violation of this rule invalidates the entire response
@@ -203,8 +195,7 @@ ALL OUTPUT MUST BE WRITTEN IN KOREAN (한국어)
## JSON Data
```json
{{
- "tags": ["태그1", "태그2", "태그3", "태그4", "태그5"],
- "facilities": ["부대시설1", "부대시설2", "부대시설3", "부대시설4", "부대시설5"]
+ "tags": ["태그1", "태그2", "태그3", "태그4", "태그5"]
}}
```
---
@@ -361,9 +352,15 @@ class ChatgptService:
return result
- async def parse_marketing_analysis(self, raw_response: str) -> dict:
+ async def parse_marketing_analysis(
+ self, raw_response: str, facility_info: str | None = None
+ ) -> dict:
"""ChatGPT 마케팅 분석 응답을 파싱하고 요약하여 딕셔너리로 반환
+ Args:
+ raw_response: ChatGPT 마케팅 분석 응답 원문
+ facility_info: 크롤링에서 가져온 편의시설 정보 문자열
+
Returns:
dict: {"report": str, "tags": list[str], "facilities": list[str]}
"""
@@ -377,7 +374,7 @@ class ChatgptService:
try:
json_data = json.loads(json_match.group(1))
tags = json_data.get("tags", [])
- facilities = json_data.get("facilities", [])
+ print(f"[parse_marketing_analysis] GPT 응답에서 tags 파싱 완료: {tags}")
# JSON 블록을 제외한 리포트 부분 추출
report = raw_response[: json_match.start()].strip()
# --- 구분자 제거
@@ -386,10 +383,22 @@ class ChatgptService:
if report.endswith("---"):
report = report[:-3].strip()
except json.JSONDecodeError:
+ print("[parse_marketing_analysis] JSON 파싱 실패")
pass
+ # 크롤링에서 가져온 facility_info로 facilities 설정
+ print(f"[parse_marketing_analysis] 크롤링 facility_info 원본: {facility_info}")
+ if facility_info:
+ # 쉼표로 구분된 편의시설 문자열을 리스트로 변환
+ facilities = [f.strip() for f in facility_info.split(",") if f.strip()]
+ print(f"[parse_marketing_analysis] facility_info 파싱 결과: {facilities}")
+ else:
+ facilities = ["등록된 정보 없음"]
+ print("[parse_marketing_analysis] facility_info 없음 - '등록된 정보 없음' 설정")
+
# 리포트 내용을 500자로 요약
if report:
report = await self.summarize_marketing(report)
+ print(f"[parse_marketing_analysis] 최종 facilities: {facilities}")
return {"report": report, "tags": tags, "facilities": facilities}
diff --git a/app/utils/nvMapPwScraper.py b/app/utils/nvMapPwScraper.py
new file mode 100644
index 0000000..d724764
--- /dev/null
+++ b/app/utils/nvMapPwScraper.py
@@ -0,0 +1,113 @@
+import asyncio
+from playwright.async_api import async_playwright
+from urllib import parse
+
+class nvMapPwScraper():
+ # cls vars
+ is_ready = False
+ _playwright = None
+ _browser = None
+ _context = None
+ _win_width = 1280
+ _win_height = 720
+ _max_retry = 30 # place id timeout threshold seconds
+
+ # instance var
+ page = None
+
+ @classmethod
+ def default_context_builder(cls):
+ context_builder_dict = {}
+ context_builder_dict['viewport'] = {
+ 'width' : cls._win_width,
+ 'height' : cls._win_height
+ }
+ context_builder_dict['screen'] = {
+ 'width' : cls._win_width,
+ 'height' : cls._win_height
+ }
+ context_builder_dict['user_agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
+ context_builder_dict['locale'] = 'ko-KR'
+ context_builder_dict['timezone_id']='Asia/Seoul'
+
+ return context_builder_dict
+
+ @classmethod
+ async def initiate_scraper(cls):
+ if not cls._playwright:
+ cls._playwright = await async_playwright().start()
+ if not cls._browser:
+ cls._browser = await cls._playwright.chromium.launch(headless=True)
+ if not cls._context:
+ cls._context = await cls._browser.new_context(**cls.default_context_builder())
+ cls.is_ready = True
+
+ def __init__(self):
+ if not self.is_ready:
+ raise Exception("nvMapScraper is not initiated")
+
+ async def __aenter__(self):
+ await self.create_page()
+ return self
+
+ async def __aexit__(self, exc_type, exc, tb):
+ await self.page.close()
+
+ async def create_page(self):
+ self.page = await self._context.new_page()
+ await self.page.add_init_script(
+'''const defaultGetter = Object.getOwnPropertyDescriptor(
+ Navigator.prototype,
+ "webdriver"
+).get;
+defaultGetter.apply(navigator);
+defaultGetter.toString();
+Object.defineProperty(Navigator.prototype, "webdriver", {
+ set: undefined,
+ enumerable: true,
+ configurable: true,
+ get: new Proxy(defaultGetter, {
+ apply: (target, thisArg, args) => {
+ Reflect.apply(target, thisArg, args);
+ return false;
+ },
+ }),
+});
+const patchedGetter = Object.getOwnPropertyDescriptor(
+ Navigator.prototype,
+ "webdriver"
+).get;
+patchedGetter.apply(navigator);
+patchedGetter.toString();''')
+
+ await self.page.set_extra_http_headers({
+ 'sec-ch-ua': '\"Not?A_Brand\";v=\"99\", \"Chromium\";v=\"130\"'
+ })
+ await self.page.goto("http://google.com")
+
+ async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
+ page = self.page
+ await page.goto(url, wait_until=wait_until, timeout=timeout)
+
+ async def get_place_id_url(self, selected):
+
+ title = selected['title'].replace("", "").replace("", "")
+ address = selected.get('roadAddress', selected['address']).replace("", "").replace("", "")
+ encoded_query = parse.quote(f"{address} {title}")
+ url = f"https://map.naver.com/p/search/{encoded_query}"
+
+ await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
+
+ if "/place/" in self.page.url:
+ return self.page.url
+
+ url = self.page.url.replace("?","?isCorrectAnswer=true&")
+ await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
+
+ if "/place/" in self.page.url:
+ return self.page.url
+
+ if (count == self._max_retry / 2):
+ raise Exception("Failed to identify place id. loading timeout")
+ else:
+ raise Exception("Failed to identify place id. item is ambiguous")
diff --git a/app/video/.DS_Store b/app/video/.DS_Store
new file mode 100644
index 0000000..2803a5f
Binary files /dev/null and b/app/video/.DS_Store differ
diff --git a/docs/.DS_Store b/docs/.DS_Store
new file mode 100644
index 0000000..d146433
Binary files /dev/null and b/docs/.DS_Store differ
diff --git a/image/.DS_Store b/image/.DS_Store
new file mode 100644
index 0000000..e870d4f
Binary files /dev/null and b/image/.DS_Store differ
diff --git a/image/2025-12-26/.DS_Store b/image/2025-12-26/.DS_Store
new file mode 100644
index 0000000..bf8036b
Binary files /dev/null and b/image/2025-12-26/.DS_Store differ
diff --git a/poc/.DS_Store b/poc/.DS_Store
new file mode 100644
index 0000000..098ffaf
Binary files /dev/null and b/poc/.DS_Store differ
diff --git a/poc/crawling/2026-01-12/main-PwScraper.py b/poc/crawling/2026-01-12/main-PwScraper.py
new file mode 100644
index 0000000..5030706
--- /dev/null
+++ b/poc/crawling/2026-01-12/main-PwScraper.py
@@ -0,0 +1,29 @@
+import asyncio
+from nvMapScraper import nvMapScraper
+from nvMapPwScraper import nvMapPwScraper
+
+async def main_function():
+ await nvMapPwScraper.initiate_scraper()
+ selected = {'title': '스테이,머뭄',
+ 'link': 'https://www.instagram.com/staymeomoom',
+ 'category': '숙박>펜션',
+ 'description': '',
+ 'telephone': '',
+ 'address': '전북특별자치도 군산시 신흥동 63-18',
+ 'roadAddress': '전북특별자치도 군산시 절골길 18',
+ 'mapx': '1267061254',
+ 'mapy': '359864175',
+ 'lng': 126.7061254,
+ 'lat': 35.9864175}
+
+ async with nvMapPwScraper() as pw_scraper:
+ new_url = await pw_scraper.get_place_id_url(selected)
+
+ print(new_url)
+ nv_scraper = nvMapScraper(new_url) # 이후 동일한 플로우
+ await nv_scraper.scrap()
+ print(nv_scraper.rawdata)
+ return
+
+print("running main_funtion..")
+asyncio.run(main_function())
\ No newline at end of file
diff --git a/poc/crawling/2026-01-12/nvMapPwScraper.py b/poc/crawling/2026-01-12/nvMapPwScraper.py
new file mode 100644
index 0000000..d724764
--- /dev/null
+++ b/poc/crawling/2026-01-12/nvMapPwScraper.py
@@ -0,0 +1,113 @@
+import asyncio
+from playwright.async_api import async_playwright
+from urllib import parse
+
+class nvMapPwScraper():
+ # cls vars
+ is_ready = False
+ _playwright = None
+ _browser = None
+ _context = None
+ _win_width = 1280
+ _win_height = 720
+ _max_retry = 30 # place id timeout threshold seconds
+
+ # instance var
+ page = None
+
+ @classmethod
+ def default_context_builder(cls):
+ context_builder_dict = {}
+ context_builder_dict['viewport'] = {
+ 'width' : cls._win_width,
+ 'height' : cls._win_height
+ }
+ context_builder_dict['screen'] = {
+ 'width' : cls._win_width,
+ 'height' : cls._win_height
+ }
+ context_builder_dict['user_agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
+ context_builder_dict['locale'] = 'ko-KR'
+ context_builder_dict['timezone_id']='Asia/Seoul'
+
+ return context_builder_dict
+
+ @classmethod
+ async def initiate_scraper(cls):
+ if not cls._playwright:
+ cls._playwright = await async_playwright().start()
+ if not cls._browser:
+ cls._browser = await cls._playwright.chromium.launch(headless=True)
+ if not cls._context:
+ cls._context = await cls._browser.new_context(**cls.default_context_builder())
+ cls.is_ready = True
+
+ def __init__(self):
+ if not self.is_ready:
+ raise Exception("nvMapScraper is not initiated")
+
+ async def __aenter__(self):
+ await self.create_page()
+ return self
+
+ async def __aexit__(self, exc_type, exc, tb):
+ await self.page.close()
+
+ async def create_page(self):
+ self.page = await self._context.new_page()
+ await self.page.add_init_script(
+'''const defaultGetter = Object.getOwnPropertyDescriptor(
+ Navigator.prototype,
+ "webdriver"
+).get;
+defaultGetter.apply(navigator);
+defaultGetter.toString();
+Object.defineProperty(Navigator.prototype, "webdriver", {
+ set: undefined,
+ enumerable: true,
+ configurable: true,
+ get: new Proxy(defaultGetter, {
+ apply: (target, thisArg, args) => {
+ Reflect.apply(target, thisArg, args);
+ return false;
+ },
+ }),
+});
+const patchedGetter = Object.getOwnPropertyDescriptor(
+ Navigator.prototype,
+ "webdriver"
+).get;
+patchedGetter.apply(navigator);
+patchedGetter.toString();''')
+
+ await self.page.set_extra_http_headers({
+ 'sec-ch-ua': '\"Not?A_Brand\";v=\"99\", \"Chromium\";v=\"130\"'
+ })
+ await self.page.goto("http://google.com")
+
+ async def goto_url(self, url, wait_until="domcontentloaded", timeout=20000):
+ page = self.page
+ await page.goto(url, wait_until=wait_until, timeout=timeout)
+
+ async def get_place_id_url(self, selected):
+
+ title = selected['title'].replace("", "").replace("", "")
+ address = selected.get('roadAddress', selected['address']).replace("", "").replace("", "")
+ encoded_query = parse.quote(f"{address} {title}")
+ url = f"https://map.naver.com/p/search/{encoded_query}"
+
+ await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
+
+ if "/place/" in self.page.url:
+ return self.page.url
+
+ url = self.page.url.replace("?","?isCorrectAnswer=true&")
+ await self.goto_url(url, wait_until="networkidle",timeout = self._max_retry/2*1000)
+
+ if "/place/" in self.page.url:
+ return self.page.url
+
+ if (count == self._max_retry / 2):
+ raise Exception("Failed to identify place id. loading timeout")
+ else:
+ raise Exception("Failed to identify place id. item is ambiguous")
diff --git a/poc/crawling/nvMapScraper-2026-01-12.py b/poc/crawling/2026-01-12/nvMapScraper.py
similarity index 96%
rename from poc/crawling/nvMapScraper-2026-01-12.py
rename to poc/crawling/2026-01-12/nvMapScraper.py
index 7d155e4..38bc1cd 100644
--- a/poc/crawling/nvMapScraper-2026-01-12.py
+++ b/poc/crawling/2026-01-12/nvMapScraper.py
@@ -112,8 +112,8 @@ class nvMapScraper():
facilities = c_elem.parent.parent.find('div').string
return facilities
-url = "https://naver.me/IgJGCCic"
-scraper = nvMapScraper(url)
-asyncio.run(scraper.scrap())
-print(scraper.image_link_list)
-print(len(scraper.image_link_list))
\ No newline at end of file
+# url = "https://naver.me/IgJGCCic"
+# scraper = nvMapScraper(url)
+# asyncio.run(scraper.scrap())
+# print(scraper.image_link_list)
+# print(len(scraper.image_link_list))
\ No newline at end of file