diff --git a/scripts/archive-screenshots.py b/scripts/archive-screenshots.py new file mode 100644 index 0000000..626cdfd --- /dev/null +++ b/scripts/archive-screenshots.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +""" +archive-screenshots.py +GCS 임시 URL(7일 만료)로 저장된 스크린샷을 Supabase Storage에 영구 아카이브. + +실행: python3 scripts/archive-screenshots.py +환경: SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY (.env 또는 환경변수) +""" + +import os, json, re, urllib.request, urllib.parse + +# ── 환경변수 로드 ────────────────────────────────────────────────────────────── +def load_env(): + env = {} + env_path = os.path.join(os.path.dirname(__file__), '..', '.env') + try: + with open(env_path) as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + k, v = line.split('=', 1) + env[k.strip()] = v.strip().strip('"').strip("'") + except FileNotFoundError: + pass + # 환경변수 우선 + for k in ('SUPABASE_URL', 'SUPABASE_SERVICE_ROLE_KEY', 'VITE_SUPABASE_URL'): + if os.environ.get(k): + env[k] = os.environ[k] + return env + +env = load_env() + +SUPABASE_URL = env.get('SUPABASE_URL') or env.get('VITE_SUPABASE_URL', '').replace('VITE_', '') +SERVICE_KEY = env.get('SUPABASE_SERVICE_ROLE_KEY', '') +BUCKET = 'screenshots' +DB_DIR = os.path.join(os.path.dirname(__file__), '..', 'src', 'data', 'db') + +if not SUPABASE_URL or not SERVICE_KEY: + print('❌ SUPABASE_URL 또는 SUPABASE_SERVICE_ROLE_KEY 환경변수가 없습니다.') + print(' .env 파일에 추가하거나 환경변수로 설정해주세요.') + raise SystemExit(1) + +# ── Supabase Storage 업로드 ──────────────────────────────────────────────────── +def upload_to_storage(storage_path: str, image_bytes: bytes) -> str: + """Supabase Storage에 업로드 후 public URL 반환.""" + encoded_path = urllib.parse.quote(storage_path, safe='/') + url = f'{SUPABASE_URL}/storage/v1/object/{BUCKET}/{encoded_path}' + req = urllib.request.Request( + url, + data=image_bytes, + method='POST', + headers={ + 'Authorization': f'Bearer {SERVICE_KEY}', + 'Content-Type': 'image/png', + 'x-upsert': 'true', # 중복이면 덮어쓰기 + } + ) + try: + with urllib.request.urlopen(req, timeout=30) as r: + r.read() + except urllib.error.HTTPError as e: + body = e.read().decode() + raise RuntimeError(f'Upload failed {e.code}: {body}') + + public_url = f'{SUPABASE_URL}/storage/v1/object/public/{BUCKET}/{encoded_path}' + return public_url + +# ── GCS URL에서 이미지 다운로드 ──────────────────────────────────────────────── +def fetch_image(gcs_url: str) -> bytes: + req = urllib.request.Request(gcs_url, headers={'User-Agent': 'Mozilla/5.0'}) + with urllib.request.urlopen(req, timeout=20) as r: + return r.read() + +# ── Supabase DB에서 모든 리포트 조회 + URL 업데이트 ─────────────────────────── +def fetch_reports(): + url = f'{SUPABASE_URL}/rest/v1/marketing_reports?select=id,clinic_name,url,channel_data,report' + req = urllib.request.Request(url, headers={ + 'Authorization': f'Bearer {SERVICE_KEY}', + 'apikey': SERVICE_KEY, + 'Accept': 'application/json', + }) + with urllib.request.urlopen(req, timeout=30) as r: + return json.loads(r.read()) + +def update_report_screenshots(report_id: str, channel_data: dict, report: dict): + """Supabase DB에 업데이트된 channel_data, report JSONB 저장.""" + payload = json.dumps({'channel_data': channel_data, 'report': report}).encode() + url = f'{SUPABASE_URL}/rest/v1/marketing_reports?id=eq.{report_id}' + req = urllib.request.Request(url, data=payload, method='PATCH', headers={ + 'Authorization': f'Bearer {SERVICE_KEY}', + 'apikey': SERVICE_KEY, + 'Content-Type': 'application/json', + 'Prefer': 'return=minimal', + }) + with urllib.request.urlopen(req, timeout=30) as r: + r.read() + +# ── 메인 ─────────────────────────────────────────────────────────────────────── +def get_domain(site_url: str) -> str: + try: + return urllib.parse.urlparse(site_url).hostname.replace('www.', '') or 'unknown' + except Exception: + return 'unknown' + +def archive_screenshots_for_report(row: dict) -> int: + report_id = row['id'] + clinic_name = row.get('clinic_name', '?') + domain = get_domain(row.get('url', '')) + channel_data = row.get('channel_data') or {} + report = row.get('report') or {} + + ss_list = channel_data.get('screenshots', []) + rss_list = report.get('screenshots', []) + all_ss = {s['id']: s for s in ss_list + rss_list if s.get('url')} + + archived = 0 + for ss_id, ss in all_ss.items(): + url = ss.get('url', '') + if 'googleapis.com' not in url and 'firecrawl' not in url: + continue # 이미 Supabase URL이거나 다른 URL + + storage_path = f'clinics/{domain}/{report_id}/screenshots/{ss_id}.png' + + try: + print(f' → 다운로드: {ss_id} ({url[:60]}...)') + image_bytes = fetch_image(url) + public_url = upload_to_storage(storage_path, image_bytes) + + # in-place URL 교체 + ss['url'] = public_url + if ss_id in {s.get('id') for s in ss_list}: + for s in ss_list: + if s.get('id') == ss_id: + s['url'] = public_url + if ss_id in {s.get('id') for s in rss_list}: + for s in rss_list: + if s.get('id') == ss_id: + s['url'] = public_url + + print(f' ✅ 아카이브 완료 → {public_url[:70]}...') + archived += 1 + + except Exception as e: + print(f' ❌ 실패: {e}') + + if archived > 0: + # DB 업데이트 + update_report_screenshots(report_id, channel_data, report) + print(f' 💾 DB 업데이트 완료 ({clinic_name})') + + return archived + +def main(): + print('=== Supabase Screenshot 영구 아카이브 ===') + print(f'대상: {SUPABASE_URL}') + print() + + print('DB에서 리포트 목록 조회 중...') + reports = fetch_reports() + print(f'총 {len(reports)}개 리포트') + print() + + total_archived = 0 + for row in reports: + name = row.get('clinic_name', '?') + ss_count = len((row.get('channel_data') or {}).get('screenshots', [])) + rss_count = len((row.get('report') or {}).get('screenshots', [])) + has_gcs = any( + 'googleapis.com' in (s.get('url', '')) + for s in (row.get('channel_data') or {}).get('screenshots', []) + + (row.get('report') or {}).get('screenshots', []) + ) + if not has_gcs: + continue + + print(f'[{name}] channel_data={ss_count}개 / report={rss_count}개 스크린샷') + n = archive_screenshots_for_report(row) + total_archived += n + print() + + print(f'=== 완료: 총 {total_archived}개 스크린샷 영구 저장 ===') + +if __name__ == '__main__': + main() diff --git a/src/components/report/ClinicSnapshot.tsx b/src/components/report/ClinicSnapshot.tsx index 704b332..54690a8 100644 --- a/src/components/report/ClinicSnapshot.tsx +++ b/src/components/report/ClinicSnapshot.tsx @@ -28,7 +28,7 @@ const infoFields = (data: ClinicSnapshotType): InfoField[] => [ data.phone ? { label: '전화', value: data.phone, icon: Phone, href: `tel:${data.phone.replace(/[^+0-9]/g, '')}` } : null, data.domain ? { label: '도메인', value: data.domain, icon: Globe, href: `https://${data.domain.replace(/^https?:\/\//, '')}` } : null, data.registryData?.websiteEn ? { label: '영문 사이트', value: data.registryData.websiteEn, icon: Globe, href: data.registryData.websiteEn } : null, -].filter((f): f is NonNullable => f !== null); +].filter(Boolean) as InfoField[]; export default function ClinicSnapshot({ data }: ClinicSnapshotProps) { const fields = infoFields(data); diff --git a/src/components/report/ui/SectionErrorBoundary.tsx b/src/components/report/ui/SectionErrorBoundary.tsx index 91fad50..bce8f44 100644 --- a/src/components/report/ui/SectionErrorBoundary.tsx +++ b/src/components/report/ui/SectionErrorBoundary.tsx @@ -10,6 +10,7 @@ interface State { } export class SectionErrorBoundary extends Component { + declare props: Props; state: State = { hasError: false }; static getDerivedStateFromError() { diff --git a/src/lib/transformReport.ts b/src/lib/transformReport.ts index 941479d..75a1dc5 100644 --- a/src/lib/transformReport.ts +++ b/src/lib/transformReport.ts @@ -550,7 +550,7 @@ export function transformApiReport( name: doctor?.name || '', credentials: doctor?.specialty || '', rating: doctor?.rating ?? 0, - reviewCount: doctor?.reviewCount ?? doctor?.reviews ?? 0, + reviewCount: (doctor as { reviewCount?: number })?.reviewCount ?? (doctor as { reviews?: number })?.reviews ?? 0, }, // 강남언니 is 10-point scale. AI sometimes gives 5-point — auto-correct. overallRating: (() => { @@ -746,6 +746,15 @@ export interface EnrichmentData { badges?: string[]; sourceUrl?: string; }; + // 스크래핑 시 캡처된 스크린샷 목록 (channel_data.screenshots) + screenshots?: { + id: string; + url: string; + channel: string; + caption: string; + capturedAt?: string; + sourceUrl?: string; + }[]; naverBlog?: { totalResults?: number; searchQuery?: string; @@ -1119,7 +1128,7 @@ export function mergeEnrichment( linkedDomain: fb.website || '', reviews: (() => { // Facebook rating 문자열 파싱: "Not yet rated (3 Reviews)" or "4.8 (120 Reviews)" - const m = (fb.rating || '').match(/\((\d+)\s+Reviews?\)/i); + const m = String(fb.rating || '').match(/\((\d+)\s+Reviews?\)/i); return m ? parseInt(m[1], 10) : 0; })(), recentPostAge: '', @@ -1178,5 +1187,74 @@ export function mergeEnrichment( merged.problemDiagnosis = [...merged.problemDiagnosis, ...enrichDiagnosis]; } + // ── 스크린샷 영구 반영 ────────────────────────────────────────────────────── + // channel_data.screenshots → report.screenshots 로 옮기고, + // 채널별로 diagnosis evidenceIds 자동 연결 + if (enrichment.screenshots?.length) { + const ss = enrichment.screenshots; + + // 1) report.screenshots 세팅 (ScreenshotEvidence 형식으로 변환) + merged.screenshots = ss.map(s => ({ + id: s.id, + url: s.url, + channel: s.channel, + caption: s.caption, + capturedAt: s.capturedAt ?? new Date().toISOString(), + sourceUrl: s.sourceUrl, + })); + + // 2) 채널명 → screenshot IDs 매핑 테이블 생성 + // channel_data의 channel 필드: "YouTube", "웹사이트", "Instagram", "Facebook" 등 + const CHANNEL_ALIAS: Record = { + youtube: ['youtube', 'YouTube', 'yt'], + instagram: ['instagram', 'Instagram', 'ig'], + facebook: ['facebook', 'Facebook', 'fb'], + website: ['웹사이트', 'website', 'Website'], + gangnamUnni: ['강남언니', 'gangnamUnni'], + naverPlace: ['네이버 플레이스', 'naverPlace'], + naverBlog: ['네이버 블로그', 'naverBlog'], + }; + + const channelToIds: Record = {}; + for (const s of ss) { + for (const [key, aliases] of Object.entries(CHANNEL_ALIAS)) { + if (aliases.some(a => s.channel.toLowerCase().includes(a.toLowerCase()))) { + channelToIds[key] = [...(channelToIds[key] ?? []), s.id]; + break; + } + } + } + + // 3) 채널별 audit.diagnosis 배열에 evidenceIds 연결 + // YouTubeAudit / InstagramAudit / FacebookAudit 컴포넌트가 이 필드를 사용함 + const linkIds = (diagItems: import('../types/report').DiagnosisItem[], channelKey: string): import('../types/report').DiagnosisItem[] => { + const ids = channelToIds[channelKey] ?? []; + if (!ids.length) return diagItems; + return diagItems.map(item => ({ ...item, evidenceIds: [...(item.evidenceIds ?? []), ...ids] })); + }; + + if (merged.youtubeAudit?.diagnosis?.length) { + merged.youtubeAudit = { ...merged.youtubeAudit, diagnosis: linkIds(merged.youtubeAudit.diagnosis, 'youtube') }; + } + if (merged.instagramAudit?.diagnosis?.length) { + merged.instagramAudit = { ...merged.instagramAudit, diagnosis: linkIds(merged.instagramAudit.diagnosis, 'instagram') }; + } + if (merged.facebookAudit?.diagnosis?.length) { + merged.facebookAudit = { ...merged.facebookAudit, diagnosis: linkIds(merged.facebookAudit.diagnosis, 'facebook') }; + } + // websiteAudit / 기타 채널은 EvidenceGallery를 직접 받지 않으므로 problemDiagnosis에만 연결 + merged.problemDiagnosis = merged.problemDiagnosis.map(item => { + const catLower = item.category.toLowerCase(); + let ids: string[] = []; + for (const [key, ssIds] of Object.entries(channelToIds)) { + if (catLower.includes(key) || key.includes(catLower)) { + ids = [...ids, ...ssIds]; + } + } + return ids.length > 0 ? { ...item, evidenceIds: ids } : item; + }); + } + // ─────────────────────────────────────────────────────────────────────────── + return merged; } diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index f6ea7e7..d99e66b 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -638,60 +638,52 @@ Deno.serve(async (req) => { throw new Error(`No screenshots captured: ${debugInfo}`); } - // ─── Step 2: Archive to Supabase Storage (replace 7-day GCS URLs) ─────── - // Firecrawl returns signed GCS URLs that expire after ~7 days. - // We already have the image as base64 in memory — upload it permanently - // to Supabase Storage and replace ss.url in-place before storing to DB. - // - // Upload happens in parallel; failures are non-fatal — the screenshot - // keeps its GCS URL as a fallback so Vision analysis still proceeds. - // clinics/{domain}/{reportId}/screenshots/{id}.png + // ─── Step 2: Archive to Supabase Storage (GCS 7일 임시 URL → 영구 저장) ── + // base64가 메모리에 있는 지금 즉시 업로드. 실패 시 GCS URL 유지(비치명적). + // 경로: clinics/{domain}/{reportId}/screenshots/{id}.png const domain = (() => { - try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; } + try { + const h = new URL(row.url || "").hostname.replace('www.', ''); + return h || "unknown"; + } catch { return "unknown"; } })(); const SUPABASE_STORAGE_BUCKET = "screenshots"; - const archiveTasks = screenshots.map(async (ss) => { - if (!ss.base64) return; // no image data — skip + + // 순차 업로드 (병렬 시 Supabase rate-limit 위험 방지) + for (const ss of screenshots) { + if (!ss.base64) { + console.warn(`[archive] ${ss.id}: base64 없음 — GCS URL 유지`); + continue; + } try { - // base64 → Uint8Array const binaryStr = atob(ss.base64); const bytes = new Uint8Array(binaryStr.length); - for (let i = 0; i < binaryStr.length; i++) { - bytes[i] = binaryStr.charCodeAt(i); - } + for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i); - // Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`; const { error: uploadError } = await supabase.storage .from(SUPABASE_STORAGE_BUCKET) - .upload(storagePath, bytes, { - contentType: "image/png", - upsert: true, // overwrite if re-running same analysis - }); + .upload(storagePath, bytes, { contentType: "image/png", upsert: true }); if (uploadError) { - // Non-fatal: log and keep GCS URL as fallback - console.warn(`[archive] Storage upload failed for ${ss.id}: ${uploadError.message}`); - return; + console.error(`[archive] ❌ 업로드 실패 ${ss.id}: ${uploadError.message}`); + continue; // GCS URL 유지 } - // Replace GCS temp URL with permanent Supabase Storage public URL const { data: { publicUrl } } = supabase.storage .from(SUPABASE_STORAGE_BUCKET) .getPublicUrl(storagePath); - ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL - console.log(`[archive] ${ss.id} → clinics/${domain}/${reportId}/screenshots/`); - } catch (archiveErr) { - // Non-fatal: Vision analysis still proceeds with base64 - console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr); + ss.url = publicUrl; + ss.archived = true; + console.log(`[archive] ✅ ${ss.id} → Supabase Storage`); + } catch (err) { + console.error(`[archive] ❌ 예외 ${ss.id}:`, err instanceof Error ? err.message : err); } - }); + } - await Promise.allSettled(archiveTasks); - - const archivedCount = screenshots.filter(ss => ss.url.includes("supabase")).length; - console.log(`[archive] ${archivedCount}/${screenshots.length} screenshots archived to Supabase Storage`); + const archivedCount = screenshots.filter(ss => ss.archived).length; + console.log(`[archive] ${archivedCount}/${screenshots.length}개 영구 저장 완료`); // Step 3: Run Gemini Vision on captured screenshots (base64 still in memory) if (GEMINI_API_KEY && screenshots.length > 0) {