feat: 스크린샷 리포트 반영 + 영구 저장 인프라 강화

- transformReport: channel_data.screenshots → report.screenshots 자동 매핑 - transformReport: youtubeAudit/instagramAudit/facebookAudit diagnosis에 evidenceIds 자동 연결 (채널별 스크린샷 → 진단 항목 연결) - collect-channel-data: 스크린샷 아카이브를 병렬→순차로 변경 (rate-limit 방지), 실패 시 상세 로그 - scripts/archive-screenshots.py: 기존 GCS 임시 URL → Supabase Storage 일괄 재아카이브 스크립트 추가 - TypeScript 기존 에러 3개 수정 (SectionErrorBoundary, ClinicSnapshot, reviewCount 유니언 타입) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 14:43:25 +09:00 · 2026-04-07 14:43:25 +09:00 · 9c4d10609f
parent 2d1937944a
commit 9c4d10609f
5 changed files with 292 additions and 37 deletions
--- a/scripts/archive-screenshots.py
+++ b/scripts/archive-screenshots.py
@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+"""
+archive-screenshots.py
+GCS 임시 URL(7일 만료)로 저장된 스크린샷을 Supabase Storage에 영구 아카이브.
+
+실행: python3 scripts/archive-screenshots.py
+환경: SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY (.env 또는 환경변수)
+"""
+
+import os, json, re, urllib.request, urllib.parse
+
+# ── 환경변수 로드 ──────────────────────────────────────────────────────────────
+def load_env():
+    env = {}
+    env_path = os.path.join(os.path.dirname(__file__), '..', '.env')
+    try:
+        with open(env_path) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#') and '=' in line:
+                    k, v = line.split('=', 1)
+                    env[k.strip()] = v.strip().strip('"').strip("'")
+    except FileNotFoundError:
+        pass
+    # 환경변수 우선
+    for k in ('SUPABASE_URL', 'SUPABASE_SERVICE_ROLE_KEY', 'VITE_SUPABASE_URL'):
+        if os.environ.get(k):
+            env[k] = os.environ[k]
+    return env
+
+env = load_env()
+
+SUPABASE_URL   = env.get('SUPABASE_URL') or env.get('VITE_SUPABASE_URL', '').replace('VITE_', '')
+SERVICE_KEY    = env.get('SUPABASE_SERVICE_ROLE_KEY', '')
+BUCKET         = 'screenshots'
+DB_DIR         = os.path.join(os.path.dirname(__file__), '..', 'src', 'data', 'db')
+
+if not SUPABASE_URL or not SERVICE_KEY:
+    print('❌ SUPABASE_URL 또는 SUPABASE_SERVICE_ROLE_KEY 환경변수가 없습니다.')
+    print('   .env 파일에 추가하거나 환경변수로 설정해주세요.')
+    raise SystemExit(1)
+
+# ── Supabase Storage 업로드 ────────────────────────────────────────────────────
+def upload_to_storage(storage_path: str, image_bytes: bytes) -> str:
+    """Supabase Storage에 업로드 후 public URL 반환."""
+    encoded_path = urllib.parse.quote(storage_path, safe='/')
+    url = f'{SUPABASE_URL}/storage/v1/object/{BUCKET}/{encoded_path}'
+    req = urllib.request.Request(
+        url,
+        data=image_bytes,
+        method='POST',
+        headers={
+            'Authorization': f'Bearer {SERVICE_KEY}',
+            'Content-Type': 'image/png',
+            'x-upsert': 'true',   # 중복이면 덮어쓰기
+        }
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=30) as r:
+            r.read()
+    except urllib.error.HTTPError as e:
+        body = e.read().decode()
+        raise RuntimeError(f'Upload failed {e.code}: {body}')
+
+    public_url = f'{SUPABASE_URL}/storage/v1/object/public/{BUCKET}/{encoded_path}'
+    return public_url
+
+# ── GCS URL에서 이미지 다운로드 ────────────────────────────────────────────────
+def fetch_image(gcs_url: str) -> bytes:
+    req = urllib.request.Request(gcs_url, headers={'User-Agent': 'Mozilla/5.0'})
+    with urllib.request.urlopen(req, timeout=20) as r:
+        return r.read()
+
+# ── Supabase DB에서 모든 리포트 조회 + URL 업데이트 ───────────────────────────
+def fetch_reports():
+    url = f'{SUPABASE_URL}/rest/v1/marketing_reports?select=id,clinic_name,url,channel_data,report'
+    req = urllib.request.Request(url, headers={
+        'Authorization': f'Bearer {SERVICE_KEY}',
+        'apikey': SERVICE_KEY,
+        'Accept': 'application/json',
+    })
+    with urllib.request.urlopen(req, timeout=30) as r:
+        return json.loads(r.read())
+
+def update_report_screenshots(report_id: str, channel_data: dict, report: dict):
+    """Supabase DB에 업데이트된 channel_data, report JSONB 저장."""
+    payload = json.dumps({'channel_data': channel_data, 'report': report}).encode()
+    url = f'{SUPABASE_URL}/rest/v1/marketing_reports?id=eq.{report_id}'
+    req = urllib.request.Request(url, data=payload, method='PATCH', headers={
+        'Authorization': f'Bearer {SERVICE_KEY}',
+        'apikey': SERVICE_KEY,
+        'Content-Type': 'application/json',
+        'Prefer': 'return=minimal',
+    })
+    with urllib.request.urlopen(req, timeout=30) as r:
+        r.read()
+
+# ── 메인 ───────────────────────────────────────────────────────────────────────
+def get_domain(site_url: str) -> str:
+    try:
+        return urllib.parse.urlparse(site_url).hostname.replace('www.', '') or 'unknown'
+    except Exception:
+        return 'unknown'
+
+def archive_screenshots_for_report(row: dict) -> int:
+    report_id   = row['id']
+    clinic_name = row.get('clinic_name', '?')
+    domain      = get_domain(row.get('url', ''))
+    channel_data = row.get('channel_data') or {}
+    report       = row.get('report') or {}
+
+    ss_list  = channel_data.get('screenshots', [])
+    rss_list = report.get('screenshots', [])
+    all_ss   = {s['id']: s for s in ss_list + rss_list if s.get('url')}
+
+    archived = 0
+    for ss_id, ss in all_ss.items():
+        url = ss.get('url', '')
+        if 'googleapis.com' not in url and 'firecrawl' not in url:
+            continue  # 이미 Supabase URL이거나 다른 URL
+
+        storage_path = f'clinics/{domain}/{report_id}/screenshots/{ss_id}.png'
+
+        try:
+            print(f'  → 다운로드: {ss_id} ({url[:60]}...)')
+            image_bytes = fetch_image(url)
+            public_url  = upload_to_storage(storage_path, image_bytes)
+
+            # in-place URL 교체
+            ss['url'] = public_url
+            if ss_id in {s.get('id') for s in ss_list}:
+                for s in ss_list:
+                    if s.get('id') == ss_id:
+                        s['url'] = public_url
+            if ss_id in {s.get('id') for s in rss_list}:
+                for s in rss_list:
+                    if s.get('id') == ss_id:
+                        s['url'] = public_url
+
+            print(f'     ✅ 아카이브 완료 → {public_url[:70]}...')
+            archived += 1
+
+        except Exception as e:
+            print(f'     ❌ 실패: {e}')
+
+    if archived > 0:
+        # DB 업데이트
+        update_report_screenshots(report_id, channel_data, report)
+        print(f'  💾 DB 업데이트 완료 ({clinic_name})')
+
+    return archived
+
+def main():
+    print('=== Supabase Screenshot 영구 아카이브 ===')
+    print(f'대상: {SUPABASE_URL}')
+    print()
+
+    print('DB에서 리포트 목록 조회 중...')
+    reports = fetch_reports()
+    print(f'총 {len(reports)}개 리포트')
+    print()
+
+    total_archived = 0
+    for row in reports:
+        name = row.get('clinic_name', '?')
+        ss_count = len((row.get('channel_data') or {}).get('screenshots', []))
+        rss_count = len((row.get('report') or {}).get('screenshots', []))
+        has_gcs = any(
+            'googleapis.com' in (s.get('url', ''))
+            for s in (row.get('channel_data') or {}).get('screenshots', [])
+            + (row.get('report') or {}).get('screenshots', [])
+        )
+        if not has_gcs:
+            continue
+
+        print(f'[{name}] channel_data={ss_count}개 / report={rss_count}개 스크린샷')
+        n = archive_screenshots_for_report(row)
+        total_archived += n
+        print()
+
+    print(f'=== 완료: 총 {total_archived}개 스크린샷 영구 저장 ===')
+
+if __name__ == '__main__':
+    main()
--- a/src/components/report/ClinicSnapshot.tsx
+++ b/src/components/report/ClinicSnapshot.tsx
@ -28,7 +28,7 @@ const infoFields = (data: ClinicSnapshotType): InfoField[] => [
  data.phone ? { label: '전화', value: data.phone, icon: Phone, href: `tel:${data.phone.replace(/[^+0-9]/g, '')}` } : null,
  data.domain ? { label: '도메인', value: data.domain, icon: Globe, href: `https://${data.domain.replace(/^https?:\/\//, '')}` } : null,
  data.registryData?.websiteEn ? { label: '영문 사이트', value: data.registryData.websiteEn, icon: Globe, href: data.registryData.websiteEn } : null,
-].filter((f): f is NonNullable<InfoField> => f !== null);
+].filter(Boolean) as InfoField[];

 export default function ClinicSnapshot({ data }: ClinicSnapshotProps) {
  const fields = infoFields(data);
--- a/src/components/report/ui/SectionErrorBoundary.tsx
+++ b/src/components/report/ui/SectionErrorBoundary.tsx
@ -10,6 +10,7 @@ interface State {
 }

 export class SectionErrorBoundary extends Component<Props, State> {
+  declare props: Props;
  state: State = { hasError: false };

  static getDerivedStateFromError() {
--- a/src/lib/transformReport.ts
+++ b/src/lib/transformReport.ts
@ -550,7 +550,7 @@ export function transformApiReport(
        name: doctor?.name || '',
        credentials: doctor?.specialty || '',
        rating: doctor?.rating ?? 0,
-        reviewCount: doctor?.reviewCount ?? doctor?.reviews ?? 0,
+        reviewCount: (doctor as { reviewCount?: number })?.reviewCount ?? (doctor as { reviews?: number })?.reviews ?? 0,
      },
      // 강남언니 is 10-point scale. AI sometimes gives 5-point — auto-correct.
      overallRating: (() => {
@ -746,6 +746,15 @@ export interface EnrichmentData {
    badges?: string[];
    sourceUrl?: string;
  };
+  // 스크래핑 시 캡처된 스크린샷 목록 (channel_data.screenshots)
+  screenshots?: {
+    id: string;
+    url: string;
+    channel: string;
+    caption: string;
+    capturedAt?: string;
+    sourceUrl?: string;
+  }[];
  naverBlog?: {
    totalResults?: number;
    searchQuery?: string;
@ -1119,7 +1128,7 @@ export function mergeEnrichment(
        linkedDomain: fb.website || '',
        reviews: (() => {
          // Facebook rating 문자열 파싱: "Not yet rated (3 Reviews)" or "4.8 (120 Reviews)"
-          const m = (fb.rating || '').match(/\((\d+)\s+Reviews?\)/i);
+          const m = String(fb.rating || '').match(/\((\d+)\s+Reviews?\)/i);
          return m ? parseInt(m[1], 10) : 0;
        })(),
        recentPostAge: '',
@ -1178,5 +1187,74 @@ export function mergeEnrichment(
    merged.problemDiagnosis = [...merged.problemDiagnosis, ...enrichDiagnosis];
  }

+  // ── 스크린샷 영구 반영 ──────────────────────────────────────────────────────
+  // channel_data.screenshots → report.screenshots 로 옮기고,
+  // 채널별로 diagnosis evidenceIds 자동 연결
+  if (enrichment.screenshots?.length) {
+    const ss = enrichment.screenshots;
+
+    // 1) report.screenshots 세팅 (ScreenshotEvidence 형식으로 변환)
+    merged.screenshots = ss.map(s => ({
+      id: s.id,
+      url: s.url,
+      channel: s.channel,
+      caption: s.caption,
+      capturedAt: s.capturedAt ?? new Date().toISOString(),
+      sourceUrl: s.sourceUrl,
+    }));
+
+    // 2) 채널명 → screenshot IDs 매핑 테이블 생성
+    //    channel_data의 channel 필드: "YouTube", "웹사이트", "Instagram", "Facebook" 등
+    const CHANNEL_ALIAS: Record<string, string[]> = {
+      youtube: ['youtube', 'YouTube', 'yt'],
+      instagram: ['instagram', 'Instagram', 'ig'],
+      facebook: ['facebook', 'Facebook', 'fb'],
+      website: ['웹사이트', 'website', 'Website'],
+      gangnamUnni: ['강남언니', 'gangnamUnni'],
+      naverPlace: ['네이버 플레이스', 'naverPlace'],
+      naverBlog: ['네이버 블로그', 'naverBlog'],
+    };
+
+    const channelToIds: Record<string, string[]> = {};
+    for (const s of ss) {
+      for (const [key, aliases] of Object.entries(CHANNEL_ALIAS)) {
+        if (aliases.some(a => s.channel.toLowerCase().includes(a.toLowerCase()))) {
+          channelToIds[key] = [...(channelToIds[key] ?? []), s.id];
+          break;
+        }
+      }
+    }
+
+    // 3) 채널별 audit.diagnosis 배열에 evidenceIds 연결
+    //    YouTubeAudit / InstagramAudit / FacebookAudit 컴포넌트가 이 필드를 사용함
+    const linkIds = (diagItems: import('../types/report').DiagnosisItem[], channelKey: string): import('../types/report').DiagnosisItem[] => {
+      const ids = channelToIds[channelKey] ?? [];
+      if (!ids.length) return diagItems;
+      return diagItems.map(item => ({ ...item, evidenceIds: [...(item.evidenceIds ?? []), ...ids] }));
+    };
+
+    if (merged.youtubeAudit?.diagnosis?.length) {
+      merged.youtubeAudit = { ...merged.youtubeAudit, diagnosis: linkIds(merged.youtubeAudit.diagnosis, 'youtube') };
+    }
+    if (merged.instagramAudit?.diagnosis?.length) {
+      merged.instagramAudit = { ...merged.instagramAudit, diagnosis: linkIds(merged.instagramAudit.diagnosis, 'instagram') };
+    }
+    if (merged.facebookAudit?.diagnosis?.length) {
+      merged.facebookAudit = { ...merged.facebookAudit, diagnosis: linkIds(merged.facebookAudit.diagnosis, 'facebook') };
+    }
+    // websiteAudit / 기타 채널은 EvidenceGallery를 직접 받지 않으므로 problemDiagnosis에만 연결
+    merged.problemDiagnosis = merged.problemDiagnosis.map(item => {
+      const catLower = item.category.toLowerCase();
+      let ids: string[] = [];
+      for (const [key, ssIds] of Object.entries(channelToIds)) {
+        if (catLower.includes(key) || key.includes(catLower)) {
+          ids = [...ids, ...ssIds];
+        }
+      }
+      return ids.length > 0 ? { ...item, evidenceIds: ids } : item;
+    });
+  }
+  // ───────────────────────────────────────────────────────────────────────────
+
  return merged;
 }
--- a/supabase/functions/collect-channel-data/index.ts
+++ b/supabase/functions/collect-channel-data/index.ts
@ -638,60 +638,52 @@ Deno.serve(async (req) => {
          throw new Error(`No screenshots captured: ${debugInfo}`);
        }

-        // ─── Step 2: Archive to Supabase Storage (replace 7-day GCS URLs) ───────
-        // Firecrawl returns signed GCS URLs that expire after ~7 days.
-        // We already have the image as base64 in memory — upload it permanently
-        // to Supabase Storage and replace ss.url in-place before storing to DB.
-        //
-        // Upload happens in parallel; failures are non-fatal — the screenshot
-        // keeps its GCS URL as a fallback so Vision analysis still proceeds.
-        // clinics/{domain}/{reportId}/screenshots/{id}.png
+        // ─── Step 2: Archive to Supabase Storage (GCS 7일 임시 URL → 영구 저장) ──
+        // base64가 메모리에 있는 지금 즉시 업로드. 실패 시 GCS URL 유지(비치명적).
+        // 경로: clinics/{domain}/{reportId}/screenshots/{id}.png
        const domain = (() => {
-          try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
+          try {
+            const h = new URL(row.url || "").hostname.replace('www.', '');
+            return h || "unknown";
+          } catch { return "unknown"; }
        })();
        const SUPABASE_STORAGE_BUCKET = "screenshots";
-        const archiveTasks = screenshots.map(async (ss) => {
-          if (!ss.base64) return; // no image data — skip
+
+        // 순차 업로드 (병렬 시 Supabase rate-limit 위험 방지)
+        for (const ss of screenshots) {
+          if (!ss.base64) {
+            console.warn(`[archive] ${ss.id}: base64 없음 — GCS URL 유지`);
+            continue;
+          }
          try {
-            // base64 → Uint8Array
            const binaryStr = atob(ss.base64);
            const bytes = new Uint8Array(binaryStr.length);
-            for (let i = 0; i < binaryStr.length; i++) {
-              bytes[i] = binaryStr.charCodeAt(i);
-            }
+            for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i);

-            // Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png
            const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`;
            const { error: uploadError } = await supabase.storage
              .from(SUPABASE_STORAGE_BUCKET)
-              .upload(storagePath, bytes, {
-                contentType: "image/png",
-                upsert: true,           // overwrite if re-running same analysis
-              });
+              .upload(storagePath, bytes, { contentType: "image/png", upsert: true });

            if (uploadError) {
-              // Non-fatal: log and keep GCS URL as fallback
-              console.warn(`[archive] Storage upload failed for ${ss.id}: ${uploadError.message}`);
-              return;
+              console.error(`[archive] ❌ 업로드 실패 ${ss.id}: ${uploadError.message}`);
+              continue; // GCS URL 유지
            }

-            // Replace GCS temp URL with permanent Supabase Storage public URL
            const { data: { publicUrl } } = supabase.storage
              .from(SUPABASE_STORAGE_BUCKET)
              .getPublicUrl(storagePath);

-            ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL
-            console.log(`[archive] ${ss.id} → clinics/${domain}/${reportId}/screenshots/`);
-          } catch (archiveErr) {
-            // Non-fatal: Vision analysis still proceeds with base64
-            console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr);
+            ss.url = publicUrl;
+            ss.archived = true;
+            console.log(`[archive] ✅ ${ss.id} → Supabase Storage`);
+          } catch (err) {
+            console.error(`[archive] ❌ 예외 ${ss.id}:`, err instanceof Error ? err.message : err);
+          }
        }
-        });

-        await Promise.allSettled(archiveTasks);
-
-        const archivedCount = screenshots.filter(ss => ss.url.includes("supabase")).length;
-        console.log(`[archive] ${archivedCount}/${screenshots.length} screenshots archived to Supabase Storage`);
+        const archivedCount = screenshots.filter(ss => ss.archived).length;
+        console.log(`[archive] ${archivedCount}/${screenshots.length}개 영구 저장 완료`);

        // Step 3: Run Gemini Vision on captured screenshots (base64 still in memory)
        if (GEMINI_API_KEY && screenshots.length > 0) {