o2o-infinith-demo/data/clinic-registry/extract_place_ids.py

38 lines
1.2 KiB
Python

#!/usr/bin/env python3
"""Extract Naver Place IDs from links arrays"""
import re
import json
import sys
def extract_place_id(links):
"""Extract first valid Naver Place ID from list of URLs"""
place_ids = set()
for link in links:
# Pattern: place/DIGITS in map.naver.com URLs
# But NOT in search URLs or directions URLs with coordinates
if 'map.naver.com' in link:
matches = re.findall(r'place/(\d{7,12})', link)
for m in matches:
# Filter out coordinate-like numbers (14140xxx pattern)
if not m.startswith('1414'):
place_ids.add(m)
if place_ids:
# Return the most common ID (first one found in entry/place URLs)
for link in links:
if 'entry/place/' in link:
match = re.search(r'entry/place/(\d{7,12})', link)
if match and not match.group(1).startswith('1414'):
return match.group(1)
# Fallback: return smallest ID (usually the main one)
return min(place_ids, key=len)
return None
if __name__ == '__main__':
data = json.load(sys.stdin)
pid = extract_place_id(data)
if pid:
print(pid)
else:
print("NOT_FOUND")