feat: per-URL clinic folder — auto-save all scraped data to Storage
Each analysis run now creates a dedicated folder in Supabase Storage:
clinics/{domain}/{reportId}/
├── scrape_data.json (discover-channels: website scrape + Perplexity)
├── channel_data.json (collect-channel-data: all channel API results)
└── report.json (generate-report: final AI-generated report)
Screenshots also moved from {reportId}/{id}.png to:
clinics/{domain}/{reportId}/screenshots/{id}.png
Migration: 20260407_clinic_data_storage.sql creates 'clinic-data' bucket
(private, 10MB/file, JSON only). All writes are non-fatal — pipeline
continues even if Storage upload fails.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
claude/bold-hawking
parent
ae87953fa0
commit
2cda26a649
|
|
@ -39,6 +39,15 @@ export const mockReport: MarketingReport = {
|
|||
nearestStation: '9호선 신논현역 3번 출구 50m',
|
||||
phone: '02-539-1177',
|
||||
domain: 'viewclinic.com',
|
||||
source: 'registry' as const,
|
||||
registryData: {
|
||||
district: '강남',
|
||||
branches: '본점 1개',
|
||||
brandGroup: '프리미엄',
|
||||
naverPlaceUrl: 'https://map.naver.com/v5/entry/place/1234567890',
|
||||
gangnamUnniUrl: 'https://www.gangnamunni.com/hospitals/view',
|
||||
googleMapsUrl: 'https://maps.google.com/?q=뷰성형외과',
|
||||
},
|
||||
logoImages: {
|
||||
circle: '/assets/clients/view-clinic/logo-circle.png',
|
||||
horizontal: '/assets/clients/view-clinic/logo-horizontal.png',
|
||||
|
|
|
|||
|
|
@ -614,6 +614,10 @@ Deno.serve(async (req) => {
|
|||
//
|
||||
// Upload happens in parallel; failures are non-fatal — the screenshot
|
||||
// keeps its GCS URL as a fallback so Vision analysis still proceeds.
|
||||
// clinics/{domain}/{reportId}/screenshots/{id}.png
|
||||
const domain = (() => {
|
||||
try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
|
||||
})();
|
||||
const SUPABASE_STORAGE_BUCKET = "screenshots";
|
||||
const archiveTasks = screenshots.map(async (ss) => {
|
||||
if (!ss.base64) return; // no image data — skip
|
||||
|
|
@ -625,8 +629,8 @@ Deno.serve(async (req) => {
|
|||
bytes[i] = binaryStr.charCodeAt(i);
|
||||
}
|
||||
|
||||
// Upload: screenshots/{reportId}/{screenshotId}.png
|
||||
const storagePath = `${reportId}/${ss.id}.png`;
|
||||
// Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png
|
||||
const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`;
|
||||
const { error: uploadError } = await supabase.storage
|
||||
.from(SUPABASE_STORAGE_BUCKET)
|
||||
.upload(storagePath, bytes, {
|
||||
|
|
@ -646,7 +650,7 @@ Deno.serve(async (req) => {
|
|||
.getPublicUrl(storagePath);
|
||||
|
||||
ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL
|
||||
console.log(`[archive] ${ss.id} → ${publicUrl.slice(-60)}`);
|
||||
console.log(`[archive] ${ss.id} → clinics/${domain}/${reportId}/screenshots/`);
|
||||
} catch (archiveErr) {
|
||||
// Non-fatal: Vision analysis still proceeds with base64
|
||||
console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr);
|
||||
|
|
@ -721,6 +725,28 @@ Deno.serve(async (req) => {
|
|||
console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors));
|
||||
}
|
||||
|
||||
// ─── Storage: save channel_data.json to clinics/{domain}/{reportId}/ ───
|
||||
try {
|
||||
const domain = (() => {
|
||||
try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
|
||||
})();
|
||||
const payload = {
|
||||
channelData, analysisData, channelErrors,
|
||||
clinicName, address, services,
|
||||
collectionStatus, collectedAt: new Date().toISOString(),
|
||||
};
|
||||
const jsonBytes = new TextEncoder().encode(JSON.stringify(payload, null, 2));
|
||||
await supabase.storage
|
||||
.from('clinic-data')
|
||||
.upload(`clinics/${domain}/${reportId}/channel_data.json`, jsonBytes, {
|
||||
contentType: 'application/json',
|
||||
upsert: true,
|
||||
});
|
||||
console.log(`[storage] channel_data.json → clinics/${domain}/${reportId}/`);
|
||||
} catch (e) {
|
||||
console.warn('[storage] channel_data.json upload failed:', e instanceof Error ? e.message : e);
|
||||
}
|
||||
|
||||
// ─── UNCONDITIONAL Legacy Save: always persist whatever we have ───
|
||||
await supabase.from("marketing_reports").update({
|
||||
channel_data: channelData,
|
||||
|
|
|
|||
|
|
@ -724,6 +724,20 @@ Deno.serve(async (req) => {
|
|||
|
||||
if (saveError) throw new Error(`DB save failed: ${saveError.message}`);
|
||||
|
||||
// ─── Storage: save scrape_data.json to clinics/{domain}/{reportId}/ ───
|
||||
try {
|
||||
const domain = new URL(url).hostname.replace('www.', '');
|
||||
const jsonBytes = new TextEncoder().encode(JSON.stringify(scrapeDataFull, null, 2));
|
||||
await supabase.storage
|
||||
.from('clinic-data')
|
||||
.upload(`${domain}/${saved.id}/scrape_data.json`, jsonBytes, {
|
||||
contentType: 'application/json',
|
||||
upsert: true,
|
||||
});
|
||||
} catch (e) {
|
||||
console.warn('[storage] scrape_data.json upload failed:', e instanceof Error ? e.message : e);
|
||||
}
|
||||
|
||||
// ─── V3: clinics + analysis_runs (dual-write) ───
|
||||
let clinicId: string | null = null;
|
||||
let runId: string | null = null;
|
||||
|
|
|
|||
|
|
@ -358,6 +358,21 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)}
|
|||
} catch (e) { console.error("V3 clinic update error:", e); }
|
||||
}
|
||||
|
||||
// ─── Storage: save report.json to clinics/{domain}/{reportId}/ ───
|
||||
try {
|
||||
const domain = new URL(row.url || "").hostname.replace('www.', '');
|
||||
const jsonBytes = new TextEncoder().encode(JSON.stringify(report, null, 2));
|
||||
await supabase.storage
|
||||
.from('clinic-data')
|
||||
.upload(`clinics/${domain}/${body.reportId}/report.json`, jsonBytes, {
|
||||
contentType: 'application/json',
|
||||
upsert: true,
|
||||
});
|
||||
console.log(`[storage] report.json → clinics/${domain}/${body.reportId}/`);
|
||||
} catch (e) {
|
||||
console.warn('[storage] report.json upload failed:', e instanceof Error ? e.message : e);
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
success: true,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
-- clinic-data bucket: JSON text data per clinic URL analysis run
|
||||
-- Structure: clinics/{domain}/{reportId}/
|
||||
-- ├── scrape_data.json (discover-channels output)
|
||||
-- ├── channel_data.json (collect-channel-data output)
|
||||
-- └── report.json (generate-report output)
|
||||
|
||||
INSERT INTO storage.buckets (id, name, public, file_size_limit, allowed_mime_types)
|
||||
VALUES (
|
||||
'clinic-data',
|
||||
'clinic-data',
|
||||
false, -- private: requires service_role key
|
||||
10485760, -- 10 MB per file
|
||||
ARRAY['application/json']
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- RLS: only service_role can read/write (backend-to-backend)
|
||||
CREATE POLICY "service_role read clinic-data"
|
||||
ON storage.objects FOR SELECT
|
||||
TO service_role
|
||||
USING (bucket_id = 'clinic-data');
|
||||
|
||||
CREATE POLICY "service_role insert clinic-data"
|
||||
ON storage.objects FOR INSERT
|
||||
TO service_role
|
||||
WITH CHECK (bucket_id = 'clinic-data');
|
||||
|
||||
CREATE POLICY "service_role update clinic-data"
|
||||
ON storage.objects FOR UPDATE
|
||||
TO service_role
|
||||
USING (bucket_id = 'clinic-data');
|
||||
|
||||
-- screenshots bucket: update allowed_mime_types to include PNG/JPEG/WebP (idempotent)
|
||||
UPDATE storage.buckets
|
||||
SET allowed_mime_types = ARRAY['image/png', 'image/jpeg', 'image/webp']
|
||||
WHERE id = 'screenshots';
|
||||
Loading…
Reference in New Issue