38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract Naver Place IDs from links arrays"""
|
|
import re
|
|
import json
|
|
import sys
|
|
|
|
def extract_place_id(links):
|
|
"""Extract first valid Naver Place ID from list of URLs"""
|
|
place_ids = set()
|
|
for link in links:
|
|
# Pattern: place/DIGITS in map.naver.com URLs
|
|
# But NOT in search URLs or directions URLs with coordinates
|
|
if 'map.naver.com' in link:
|
|
matches = re.findall(r'place/(\d{7,12})', link)
|
|
for m in matches:
|
|
# Filter out coordinate-like numbers (14140xxx pattern)
|
|
if not m.startswith('1414'):
|
|
place_ids.add(m)
|
|
|
|
if place_ids:
|
|
# Return the most common ID (first one found in entry/place URLs)
|
|
for link in links:
|
|
if 'entry/place/' in link:
|
|
match = re.search(r'entry/place/(\d{7,12})', link)
|
|
if match and not match.group(1).startswith('1414'):
|
|
return match.group(1)
|
|
# Fallback: return smallest ID (usually the main one)
|
|
return min(place_ids, key=len)
|
|
return None
|
|
|
|
if __name__ == '__main__':
|
|
data = json.load(sys.stdin)
|
|
pid = extract_place_id(data)
|
|
if pid:
|
|
print(pid)
|
|
else:
|
|
print("NOT_FOUND")
|