#!/usr/bin/env python3 """ update-csv-gangnamunni.py 강남언니 스크래핑 데이터를 두 CSV에 추가합니다: - data/clinic-registry/clinic_registry_working.csv - data/clinic-registry/INFINITH_Outbound_List.csv 추가 컬럼: clinic_registry_working.csv → gangnam_unni_rating, gangnam_unni_reviews, lead_doctor INFINITH_Outbound_List.csv → 강남언니 평점, 강남언니 리뷰수, 대표원장 """ import csv import re import os import sys # ── 스크래핑 데이터 (gangnamunni hospital ID → rating, reviews, lead_doctor) ───── SCRAPED = { 23: (9.2, 6843, "반재상"), 189: (9.1, 18840, "최순우"), 257: (9.5, 14933, "박상훈"), 62: (9.8, 1531, "이세환"), 729: (9.1, 3185, "이강우"), 250: (9.6, 69859, "이상우"), 2500: (9.3, 11789, "강문석"), 215: (9.3, 12735, "김태규"), 926: (9.6, 1696, "조정남"), 938: (9.6, 63166, "황동연"), 2186: (9.8, 9209, "유지한"), 3004: (9.5, 23631, "김주연"), 139: (9.5, 1934, "박형준"), 141: (9.3, 15207, "서일범"), 55: (9.5, 2560, "윤용일"), 116: (9.5, 12506, "이상균"), 248: (9.6, 9227, "김신영"), 1449: (9.9, 150, "최민"), 2196: (9.6, 812, "최동훈"), 398: (9.3, 21088, "송훈"), 1515: (None, 419, None), # 페이스성형외과 — 평점/원장 미확인 213: (9.4, 8529, "이민구"), 623: (7.8, 44, "조배정"), 331: (8.3, 658, "이성준"), 2122: (9.6, 9179, "신종인"), 166: (9.5, 23378, "김한조"), 108: (9.1, 9214, "김석한"), 69: (8.8, 5411, "김정배"), 660: (9.5, 9022, "박일"), 231: (9.6, 342, "신동진"), 300: (9.3, 13769, "이한정"), 563: (9.2, 9172, "김준호"), 58: (9.0, 20810, None), # 아이웰 — 대표원장 미확인 54: (9.4, 8416, "노봉일"), 1181: (9.2, 915, "김흥규"), 66: (9.5, 5134, "심재선"), 431: (9.6, 4487, "노경환"), 339: (9.2, 207, "박재현"), 912: (9.4, 2329, "이용우"), 839: (9.4, 86, "최규진"), 369: (9.5, 5710, "윤석호"), 450: (9.4, 351, "나민화"), 413: (9.6, 883, "이무영"), 3000: (9.5, 2490, "오재윤"), 4749: (9.7, 48, "김승준"), 5500: (9.0, 225, "정진욱"), 2052: (9.2, 8606, "우경식"), 4459: (8.9, 738, "허찬"), 1265: (9.5, 587, "김진우"), 5554: (9.7, 274, "박영규"), 4212: (9.4, 3352, "민성기"), 2414: (9.8, 571, "정태원"), 3569: (9.8, 2053, "권순범"), 6680: (10.0, 216, "권영훈"), 6204: (None, None, None), # 에비뉴 — 신규 등록, 데이터 없음 1178: (9.0, 624, "정창호"), 3429: (9.5, 1034, "정태광"), 5636: (10.0, 1, "방난석"), 2991: (9.3, 10278, "손유석"), 4154: (9.7, 1368, "김의건"), 6597: (10.0, 11, "이석준"), 4244: (9.6, 92, "엄수진"), 5870: (9.3, 1636, "주락균"), } def extract_id(url: str): """강남언니 URL에서 병원 ID를 추출합니다.""" if not url: return None m = re.search(r'/hospitals/(\d+)', url) return int(m.group(1)) if m else None def lookup(url: str): """URL → (rating_str, reviews_str, doctor_str) 반환.""" hid = extract_id(url) if hid is None or hid not in SCRAPED: return ("", "", "") rating, reviews, doctor = SCRAPED[hid] return ( str(rating) if rating is not None else "", str(reviews) if reviews is not None else "", doctor if doctor else "", ) # ── 1. clinic_registry_working.csv ────────────────────────────────────────── BASE = os.path.join(os.path.dirname(__file__), '..', 'data', 'clinic-registry') reg_path = os.path.join(BASE, 'clinic_registry_working.csv') with open(reg_path, newline='', encoding='utf-8-sig') as f: reader = csv.DictReader(f) orig_fields = reader.fieldnames or [] rows = list(reader) # gangnam_unni_note 바로 뒤에 3개 컬럼 삽입 (중복 방지) new_fields = list(orig_fields) insert_after = 'gangnam_unni_note' if 'gangnam_unni_rating' not in new_fields: idx = new_fields.index(insert_after) + 1 if insert_after in new_fields else len(new_fields) new_fields[idx:idx] = ['gangnam_unni_rating', 'gangnam_unni_reviews', 'lead_doctor'] for row in rows: rating, reviews, doctor = lookup(row.get('gangnam_unni_url', '')) row.setdefault('gangnam_unni_rating', rating) row.setdefault('gangnam_unni_reviews', reviews) row.setdefault('lead_doctor', doctor) # 이미 값이 있으면 덮어쓰지 않음 — 명시적으로 덮어쓰려면 아래 주석 해제 # row['gangnam_unni_rating'] = rating # row['gangnam_unni_reviews'] = reviews # row['lead_doctor'] = doctor with open(reg_path, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.DictWriter(f, fieldnames=new_fields, extrasaction='ignore') writer.writeheader() writer.writerows(rows) print(f"✅ clinic_registry_working.csv 업데이트 완료 ({len(rows)}행)") # ── 2. INFINITH_Outbound_List.csv ──────────────────────────────────────────── out_path = os.path.join(BASE, 'INFINITH_Outbound_List.csv') with open(out_path, newline='', encoding='utf-8-sig') as f: reader = csv.DictReader(f) orig_fields2 = reader.fieldnames or [] rows2 = list(reader) new_fields2 = list(orig_fields2) insert_after2 = '강남언니 비고' if '강남언니 평점' not in new_fields2: idx2 = new_fields2.index(insert_after2) + 1 if insert_after2 in new_fields2 else len(new_fields2) new_fields2[idx2:idx2] = ['강남언니 평점', '강남언니 리뷰수', '대표원장'] for row in rows2: rating, reviews, doctor = lookup(row.get('강남언니', '')) row.setdefault('강남언니 평점', rating) row.setdefault('강남언니 리뷰수', reviews) row.setdefault('대표원장', doctor) with open(out_path, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.DictWriter(f, fieldnames=new_fields2, extrasaction='ignore') writer.writeheader() writer.writerows(rows2) print(f"✅ INFINITH_Outbound_List.csv 업데이트 완료 ({len(rows2)}행)") print() print("샘플 확인:") for row in rows2[:5]: print(f" {row.get('병원명',''):<16} 평점={row.get('강남언니 평점',''):>4} 리뷰={row.get('강남언니 리뷰수',''):>6} 원장={row.get('대표원장','')}")