From 2cda26a649d5f76c083d35659dcee5d0e4f064bd Mon Sep 17 00:00:00 2001 From: Haewon Kam Date: Tue, 7 Apr 2026 10:04:52 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20per-URL=20clinic=20folder=20=E2=80=94?= =?UTF-8?q?=20auto-save=20all=20scraped=20data=20to=20Storage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each analysis run now creates a dedicated folder in Supabase Storage: clinics/{domain}/{reportId}/ ├── scrape_data.json (discover-channels: website scrape + Perplexity) ├── channel_data.json (collect-channel-data: all channel API results) └── report.json (generate-report: final AI-generated report) Screenshots also moved from {reportId}/{id}.png to: clinics/{domain}/{reportId}/screenshots/{id}.png Migration: 20260407_clinic_data_storage.sql creates 'clinic-data' bucket (private, 10MB/file, JSON only). All writes are non-fatal — pipeline continues even if Storage upload fails. Co-Authored-By: Claude Sonnet 4.6 --- src/data/mockReport.ts | 9 +++++ .../functions/collect-channel-data/index.ts | 32 +++++++++++++++-- supabase/functions/discover-channels/index.ts | 14 ++++++++ supabase/functions/generate-report/index.ts | 15 ++++++++ .../20260407_clinic_data_storage.sql | 36 +++++++++++++++++++ 5 files changed, 103 insertions(+), 3 deletions(-) create mode 100644 supabase/migrations/20260407_clinic_data_storage.sql diff --git a/src/data/mockReport.ts b/src/data/mockReport.ts index 42a6675..fde1fc3 100644 --- a/src/data/mockReport.ts +++ b/src/data/mockReport.ts @@ -39,6 +39,15 @@ export const mockReport: MarketingReport = { nearestStation: '9호선 신논현역 3번 출구 50m', phone: '02-539-1177', domain: 'viewclinic.com', + source: 'registry' as const, + registryData: { + district: '강남', + branches: '본점 1개', + brandGroup: '프리미엄', + naverPlaceUrl: 'https://map.naver.com/v5/entry/place/1234567890', + gangnamUnniUrl: 'https://www.gangnamunni.com/hospitals/view', + googleMapsUrl: 'https://maps.google.com/?q=뷰성형외과', + }, logoImages: { circle: '/assets/clients/view-clinic/logo-circle.png', horizontal: '/assets/clients/view-clinic/logo-horizontal.png', diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index 586cdbf..5f559d9 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -614,6 +614,10 @@ Deno.serve(async (req) => { // // Upload happens in parallel; failures are non-fatal — the screenshot // keeps its GCS URL as a fallback so Vision analysis still proceeds. + // clinics/{domain}/{reportId}/screenshots/{id}.png + const domain = (() => { + try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; } + })(); const SUPABASE_STORAGE_BUCKET = "screenshots"; const archiveTasks = screenshots.map(async (ss) => { if (!ss.base64) return; // no image data — skip @@ -625,8 +629,8 @@ Deno.serve(async (req) => { bytes[i] = binaryStr.charCodeAt(i); } - // Upload: screenshots/{reportId}/{screenshotId}.png - const storagePath = `${reportId}/${ss.id}.png`; + // Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png + const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`; const { error: uploadError } = await supabase.storage .from(SUPABASE_STORAGE_BUCKET) .upload(storagePath, bytes, { @@ -646,7 +650,7 @@ Deno.serve(async (req) => { .getPublicUrl(storagePath); ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL - console.log(`[archive] ${ss.id} → ${publicUrl.slice(-60)}`); + console.log(`[archive] ${ss.id} → clinics/${domain}/${reportId}/screenshots/`); } catch (archiveErr) { // Non-fatal: Vision analysis still proceeds with base64 console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr); @@ -721,6 +725,28 @@ Deno.serve(async (req) => { console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors)); } + // ─── Storage: save channel_data.json to clinics/{domain}/{reportId}/ ─── + try { + const domain = (() => { + try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; } + })(); + const payload = { + channelData, analysisData, channelErrors, + clinicName, address, services, + collectionStatus, collectedAt: new Date().toISOString(), + }; + const jsonBytes = new TextEncoder().encode(JSON.stringify(payload, null, 2)); + await supabase.storage + .from('clinic-data') + .upload(`clinics/${domain}/${reportId}/channel_data.json`, jsonBytes, { + contentType: 'application/json', + upsert: true, + }); + console.log(`[storage] channel_data.json → clinics/${domain}/${reportId}/`); + } catch (e) { + console.warn('[storage] channel_data.json upload failed:', e instanceof Error ? e.message : e); + } + // ─── UNCONDITIONAL Legacy Save: always persist whatever we have ─── await supabase.from("marketing_reports").update({ channel_data: channelData, diff --git a/supabase/functions/discover-channels/index.ts b/supabase/functions/discover-channels/index.ts index 0162c2c..c745954 100644 --- a/supabase/functions/discover-channels/index.ts +++ b/supabase/functions/discover-channels/index.ts @@ -724,6 +724,20 @@ Deno.serve(async (req) => { if (saveError) throw new Error(`DB save failed: ${saveError.message}`); + // ─── Storage: save scrape_data.json to clinics/{domain}/{reportId}/ ─── + try { + const domain = new URL(url).hostname.replace('www.', ''); + const jsonBytes = new TextEncoder().encode(JSON.stringify(scrapeDataFull, null, 2)); + await supabase.storage + .from('clinic-data') + .upload(`${domain}/${saved.id}/scrape_data.json`, jsonBytes, { + contentType: 'application/json', + upsert: true, + }); + } catch (e) { + console.warn('[storage] scrape_data.json upload failed:', e instanceof Error ? e.message : e); + } + // ─── V3: clinics + analysis_runs (dual-write) ─── let clinicId: string | null = null; let runId: string | null = null; diff --git a/supabase/functions/generate-report/index.ts b/supabase/functions/generate-report/index.ts index a7df93f..490a741 100644 --- a/supabase/functions/generate-report/index.ts +++ b/supabase/functions/generate-report/index.ts @@ -358,6 +358,21 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)} } catch (e) { console.error("V3 clinic update error:", e); } } + // ─── Storage: save report.json to clinics/{domain}/{reportId}/ ─── + try { + const domain = new URL(row.url || "").hostname.replace('www.', ''); + const jsonBytes = new TextEncoder().encode(JSON.stringify(report, null, 2)); + await supabase.storage + .from('clinic-data') + .upload(`clinics/${domain}/${body.reportId}/report.json`, jsonBytes, { + contentType: 'application/json', + upsert: true, + }); + console.log(`[storage] report.json → clinics/${domain}/${body.reportId}/`); + } catch (e) { + console.warn('[storage] report.json upload failed:', e instanceof Error ? e.message : e); + } + return new Response( JSON.stringify({ success: true, diff --git a/supabase/migrations/20260407_clinic_data_storage.sql b/supabase/migrations/20260407_clinic_data_storage.sql new file mode 100644 index 0000000..df1001e --- /dev/null +++ b/supabase/migrations/20260407_clinic_data_storage.sql @@ -0,0 +1,36 @@ +-- clinic-data bucket: JSON text data per clinic URL analysis run +-- Structure: clinics/{domain}/{reportId}/ +-- ├── scrape_data.json (discover-channels output) +-- ├── channel_data.json (collect-channel-data output) +-- └── report.json (generate-report output) + +INSERT INTO storage.buckets (id, name, public, file_size_limit, allowed_mime_types) +VALUES ( + 'clinic-data', + 'clinic-data', + false, -- private: requires service_role key + 10485760, -- 10 MB per file + ARRAY['application/json'] +) +ON CONFLICT (id) DO NOTHING; + +-- RLS: only service_role can read/write (backend-to-backend) +CREATE POLICY "service_role read clinic-data" + ON storage.objects FOR SELECT + TO service_role + USING (bucket_id = 'clinic-data'); + +CREATE POLICY "service_role insert clinic-data" + ON storage.objects FOR INSERT + TO service_role + WITH CHECK (bucket_id = 'clinic-data'); + +CREATE POLICY "service_role update clinic-data" + ON storage.objects FOR UPDATE + TO service_role + USING (bucket_id = 'clinic-data'); + +-- screenshots bucket: update allowed_mime_types to include PNG/JPEG/WebP (idempotent) +UPDATE storage.buckets +SET allowed_mime_types = ARRAY['image/png', 'image/jpeg', 'image/webp'] +WHERE id = 'screenshots';