feat: per-URL clinic folder — auto-save all scraped data to Storage

Each analysis run now creates a dedicated folder in Supabase Storage:
  clinics/{domain}/{reportId}/
    ├── scrape_data.json    (discover-channels: website scrape + Perplexity)
    ├── channel_data.json   (collect-channel-data: all channel API results)
    └── report.json         (generate-report: final AI-generated report)

Screenshots also moved from {reportId}/{id}.png to:
  clinics/{domain}/{reportId}/screenshots/{id}.png

Migration: 20260407_clinic_data_storage.sql creates 'clinic-data' bucket
(private, 10MB/file, JSON only). All writes are non-fatal — pipeline
continues even if Storage upload fails.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
claude/bold-hawking
Haewon Kam 2026-04-07 10:04:52 +09:00
parent ae87953fa0
commit 2cda26a649
5 changed files with 103 additions and 3 deletions

View File

@ -39,6 +39,15 @@ export const mockReport: MarketingReport = {
nearestStation: '9호선 신논현역 3번 출구 50m', nearestStation: '9호선 신논현역 3번 출구 50m',
phone: '02-539-1177', phone: '02-539-1177',
domain: 'viewclinic.com', domain: 'viewclinic.com',
source: 'registry' as const,
registryData: {
district: '강남',
branches: '본점 1개',
brandGroup: '프리미엄',
naverPlaceUrl: 'https://map.naver.com/v5/entry/place/1234567890',
gangnamUnniUrl: 'https://www.gangnamunni.com/hospitals/view',
googleMapsUrl: 'https://maps.google.com/?q=뷰성형외과',
},
logoImages: { logoImages: {
circle: '/assets/clients/view-clinic/logo-circle.png', circle: '/assets/clients/view-clinic/logo-circle.png',
horizontal: '/assets/clients/view-clinic/logo-horizontal.png', horizontal: '/assets/clients/view-clinic/logo-horizontal.png',

View File

@ -614,6 +614,10 @@ Deno.serve(async (req) => {
// //
// Upload happens in parallel; failures are non-fatal — the screenshot // Upload happens in parallel; failures are non-fatal — the screenshot
// keeps its GCS URL as a fallback so Vision analysis still proceeds. // keeps its GCS URL as a fallback so Vision analysis still proceeds.
// clinics/{domain}/{reportId}/screenshots/{id}.png
const domain = (() => {
try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
})();
const SUPABASE_STORAGE_BUCKET = "screenshots"; const SUPABASE_STORAGE_BUCKET = "screenshots";
const archiveTasks = screenshots.map(async (ss) => { const archiveTasks = screenshots.map(async (ss) => {
if (!ss.base64) return; // no image data — skip if (!ss.base64) return; // no image data — skip
@ -625,8 +629,8 @@ Deno.serve(async (req) => {
bytes[i] = binaryStr.charCodeAt(i); bytes[i] = binaryStr.charCodeAt(i);
} }
// Upload: screenshots/{reportId}/{screenshotId}.png // Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png
const storagePath = `${reportId}/${ss.id}.png`; const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`;
const { error: uploadError } = await supabase.storage const { error: uploadError } = await supabase.storage
.from(SUPABASE_STORAGE_BUCKET) .from(SUPABASE_STORAGE_BUCKET)
.upload(storagePath, bytes, { .upload(storagePath, bytes, {
@ -646,7 +650,7 @@ Deno.serve(async (req) => {
.getPublicUrl(storagePath); .getPublicUrl(storagePath);
ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL
console.log(`[archive] ${ss.id}${publicUrl.slice(-60)}`); console.log(`[archive] ${ss.id}clinics/${domain}/${reportId}/screenshots/`);
} catch (archiveErr) { } catch (archiveErr) {
// Non-fatal: Vision analysis still proceeds with base64 // Non-fatal: Vision analysis still proceeds with base64
console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr); console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr);
@ -721,6 +725,28 @@ Deno.serve(async (req) => {
console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors)); console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors));
} }
// ─── Storage: save channel_data.json to clinics/{domain}/{reportId}/ ───
try {
const domain = (() => {
try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
})();
const payload = {
channelData, analysisData, channelErrors,
clinicName, address, services,
collectionStatus, collectedAt: new Date().toISOString(),
};
const jsonBytes = new TextEncoder().encode(JSON.stringify(payload, null, 2));
await supabase.storage
.from('clinic-data')
.upload(`clinics/${domain}/${reportId}/channel_data.json`, jsonBytes, {
contentType: 'application/json',
upsert: true,
});
console.log(`[storage] channel_data.json → clinics/${domain}/${reportId}/`);
} catch (e) {
console.warn('[storage] channel_data.json upload failed:', e instanceof Error ? e.message : e);
}
// ─── UNCONDITIONAL Legacy Save: always persist whatever we have ─── // ─── UNCONDITIONAL Legacy Save: always persist whatever we have ───
await supabase.from("marketing_reports").update({ await supabase.from("marketing_reports").update({
channel_data: channelData, channel_data: channelData,

View File

@ -724,6 +724,20 @@ Deno.serve(async (req) => {
if (saveError) throw new Error(`DB save failed: ${saveError.message}`); if (saveError) throw new Error(`DB save failed: ${saveError.message}`);
// ─── Storage: save scrape_data.json to clinics/{domain}/{reportId}/ ───
try {
const domain = new URL(url).hostname.replace('www.', '');
const jsonBytes = new TextEncoder().encode(JSON.stringify(scrapeDataFull, null, 2));
await supabase.storage
.from('clinic-data')
.upload(`${domain}/${saved.id}/scrape_data.json`, jsonBytes, {
contentType: 'application/json',
upsert: true,
});
} catch (e) {
console.warn('[storage] scrape_data.json upload failed:', e instanceof Error ? e.message : e);
}
// ─── V3: clinics + analysis_runs (dual-write) ─── // ─── V3: clinics + analysis_runs (dual-write) ───
let clinicId: string | null = null; let clinicId: string | null = null;
let runId: string | null = null; let runId: string | null = null;

View File

@ -358,6 +358,21 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)}
} catch (e) { console.error("V3 clinic update error:", e); } } catch (e) { console.error("V3 clinic update error:", e); }
} }
// ─── Storage: save report.json to clinics/{domain}/{reportId}/ ───
try {
const domain = new URL(row.url || "").hostname.replace('www.', '');
const jsonBytes = new TextEncoder().encode(JSON.stringify(report, null, 2));
await supabase.storage
.from('clinic-data')
.upload(`clinics/${domain}/${body.reportId}/report.json`, jsonBytes, {
contentType: 'application/json',
upsert: true,
});
console.log(`[storage] report.json → clinics/${domain}/${body.reportId}/`);
} catch (e) {
console.warn('[storage] report.json upload failed:', e instanceof Error ? e.message : e);
}
return new Response( return new Response(
JSON.stringify({ JSON.stringify({
success: true, success: true,

View File

@ -0,0 +1,36 @@
-- clinic-data bucket: JSON text data per clinic URL analysis run
-- Structure: clinics/{domain}/{reportId}/
-- ├── scrape_data.json (discover-channels output)
-- ├── channel_data.json (collect-channel-data output)
-- └── report.json (generate-report output)
INSERT INTO storage.buckets (id, name, public, file_size_limit, allowed_mime_types)
VALUES (
'clinic-data',
'clinic-data',
false, -- private: requires service_role key
10485760, -- 10 MB per file
ARRAY['application/json']
)
ON CONFLICT (id) DO NOTHING;
-- RLS: only service_role can read/write (backend-to-backend)
CREATE POLICY "service_role read clinic-data"
ON storage.objects FOR SELECT
TO service_role
USING (bucket_id = 'clinic-data');
CREATE POLICY "service_role insert clinic-data"
ON storage.objects FOR INSERT
TO service_role
WITH CHECK (bucket_id = 'clinic-data');
CREATE POLICY "service_role update clinic-data"
ON storage.objects FOR UPDATE
TO service_role
USING (bucket_id = 'clinic-data');
-- screenshots bucket: update allowed_mime_types to include PNG/JPEG/WebP (idempotent)
UPDATE storage.buckets
SET allowed_mime_types = ARRAY['image/png', 'image/jpeg', 'image/webp']
WHERE id = 'screenshots';