feat: per-URL clinic folder — auto-save all scraped data to Storage
Each analysis run now creates a dedicated folder in Supabase Storage:
clinics/{domain}/{reportId}/
├── scrape_data.json (discover-channels: website scrape + Perplexity)
├── channel_data.json (collect-channel-data: all channel API results)
└── report.json (generate-report: final AI-generated report)
Screenshots also moved from {reportId}/{id}.png to:
clinics/{domain}/{reportId}/screenshots/{id}.png
Migration: 20260407_clinic_data_storage.sql creates 'clinic-data' bucket
(private, 10MB/file, JSON only). All writes are non-fatal — pipeline
continues even if Storage upload fails.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
claude/bold-hawking
parent
ae87953fa0
commit
2cda26a649
|
|
@ -39,6 +39,15 @@ export const mockReport: MarketingReport = {
|
||||||
nearestStation: '9호선 신논현역 3번 출구 50m',
|
nearestStation: '9호선 신논현역 3번 출구 50m',
|
||||||
phone: '02-539-1177',
|
phone: '02-539-1177',
|
||||||
domain: 'viewclinic.com',
|
domain: 'viewclinic.com',
|
||||||
|
source: 'registry' as const,
|
||||||
|
registryData: {
|
||||||
|
district: '강남',
|
||||||
|
branches: '본점 1개',
|
||||||
|
brandGroup: '프리미엄',
|
||||||
|
naverPlaceUrl: 'https://map.naver.com/v5/entry/place/1234567890',
|
||||||
|
gangnamUnniUrl: 'https://www.gangnamunni.com/hospitals/view',
|
||||||
|
googleMapsUrl: 'https://maps.google.com/?q=뷰성형외과',
|
||||||
|
},
|
||||||
logoImages: {
|
logoImages: {
|
||||||
circle: '/assets/clients/view-clinic/logo-circle.png',
|
circle: '/assets/clients/view-clinic/logo-circle.png',
|
||||||
horizontal: '/assets/clients/view-clinic/logo-horizontal.png',
|
horizontal: '/assets/clients/view-clinic/logo-horizontal.png',
|
||||||
|
|
|
||||||
|
|
@ -614,6 +614,10 @@ Deno.serve(async (req) => {
|
||||||
//
|
//
|
||||||
// Upload happens in parallel; failures are non-fatal — the screenshot
|
// Upload happens in parallel; failures are non-fatal — the screenshot
|
||||||
// keeps its GCS URL as a fallback so Vision analysis still proceeds.
|
// keeps its GCS URL as a fallback so Vision analysis still proceeds.
|
||||||
|
// clinics/{domain}/{reportId}/screenshots/{id}.png
|
||||||
|
const domain = (() => {
|
||||||
|
try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
|
||||||
|
})();
|
||||||
const SUPABASE_STORAGE_BUCKET = "screenshots";
|
const SUPABASE_STORAGE_BUCKET = "screenshots";
|
||||||
const archiveTasks = screenshots.map(async (ss) => {
|
const archiveTasks = screenshots.map(async (ss) => {
|
||||||
if (!ss.base64) return; // no image data — skip
|
if (!ss.base64) return; // no image data — skip
|
||||||
|
|
@ -625,8 +629,8 @@ Deno.serve(async (req) => {
|
||||||
bytes[i] = binaryStr.charCodeAt(i);
|
bytes[i] = binaryStr.charCodeAt(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upload: screenshots/{reportId}/{screenshotId}.png
|
// Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png
|
||||||
const storagePath = `${reportId}/${ss.id}.png`;
|
const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`;
|
||||||
const { error: uploadError } = await supabase.storage
|
const { error: uploadError } = await supabase.storage
|
||||||
.from(SUPABASE_STORAGE_BUCKET)
|
.from(SUPABASE_STORAGE_BUCKET)
|
||||||
.upload(storagePath, bytes, {
|
.upload(storagePath, bytes, {
|
||||||
|
|
@ -646,7 +650,7 @@ Deno.serve(async (req) => {
|
||||||
.getPublicUrl(storagePath);
|
.getPublicUrl(storagePath);
|
||||||
|
|
||||||
ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL
|
ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL
|
||||||
console.log(`[archive] ${ss.id} → ${publicUrl.slice(-60)}`);
|
console.log(`[archive] ${ss.id} → clinics/${domain}/${reportId}/screenshots/`);
|
||||||
} catch (archiveErr) {
|
} catch (archiveErr) {
|
||||||
// Non-fatal: Vision analysis still proceeds with base64
|
// Non-fatal: Vision analysis still proceeds with base64
|
||||||
console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr);
|
console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr);
|
||||||
|
|
@ -721,6 +725,28 @@ Deno.serve(async (req) => {
|
||||||
console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors));
|
console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Storage: save channel_data.json to clinics/{domain}/{reportId}/ ───
|
||||||
|
try {
|
||||||
|
const domain = (() => {
|
||||||
|
try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
|
||||||
|
})();
|
||||||
|
const payload = {
|
||||||
|
channelData, analysisData, channelErrors,
|
||||||
|
clinicName, address, services,
|
||||||
|
collectionStatus, collectedAt: new Date().toISOString(),
|
||||||
|
};
|
||||||
|
const jsonBytes = new TextEncoder().encode(JSON.stringify(payload, null, 2));
|
||||||
|
await supabase.storage
|
||||||
|
.from('clinic-data')
|
||||||
|
.upload(`clinics/${domain}/${reportId}/channel_data.json`, jsonBytes, {
|
||||||
|
contentType: 'application/json',
|
||||||
|
upsert: true,
|
||||||
|
});
|
||||||
|
console.log(`[storage] channel_data.json → clinics/${domain}/${reportId}/`);
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[storage] channel_data.json upload failed:', e instanceof Error ? e.message : e);
|
||||||
|
}
|
||||||
|
|
||||||
// ─── UNCONDITIONAL Legacy Save: always persist whatever we have ───
|
// ─── UNCONDITIONAL Legacy Save: always persist whatever we have ───
|
||||||
await supabase.from("marketing_reports").update({
|
await supabase.from("marketing_reports").update({
|
||||||
channel_data: channelData,
|
channel_data: channelData,
|
||||||
|
|
|
||||||
|
|
@ -724,6 +724,20 @@ Deno.serve(async (req) => {
|
||||||
|
|
||||||
if (saveError) throw new Error(`DB save failed: ${saveError.message}`);
|
if (saveError) throw new Error(`DB save failed: ${saveError.message}`);
|
||||||
|
|
||||||
|
// ─── Storage: save scrape_data.json to clinics/{domain}/{reportId}/ ───
|
||||||
|
try {
|
||||||
|
const domain = new URL(url).hostname.replace('www.', '');
|
||||||
|
const jsonBytes = new TextEncoder().encode(JSON.stringify(scrapeDataFull, null, 2));
|
||||||
|
await supabase.storage
|
||||||
|
.from('clinic-data')
|
||||||
|
.upload(`${domain}/${saved.id}/scrape_data.json`, jsonBytes, {
|
||||||
|
contentType: 'application/json',
|
||||||
|
upsert: true,
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[storage] scrape_data.json upload failed:', e instanceof Error ? e.message : e);
|
||||||
|
}
|
||||||
|
|
||||||
// ─── V3: clinics + analysis_runs (dual-write) ───
|
// ─── V3: clinics + analysis_runs (dual-write) ───
|
||||||
let clinicId: string | null = null;
|
let clinicId: string | null = null;
|
||||||
let runId: string | null = null;
|
let runId: string | null = null;
|
||||||
|
|
|
||||||
|
|
@ -358,6 +358,21 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)}
|
||||||
} catch (e) { console.error("V3 clinic update error:", e); }
|
} catch (e) { console.error("V3 clinic update error:", e); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Storage: save report.json to clinics/{domain}/{reportId}/ ───
|
||||||
|
try {
|
||||||
|
const domain = new URL(row.url || "").hostname.replace('www.', '');
|
||||||
|
const jsonBytes = new TextEncoder().encode(JSON.stringify(report, null, 2));
|
||||||
|
await supabase.storage
|
||||||
|
.from('clinic-data')
|
||||||
|
.upload(`clinics/${domain}/${body.reportId}/report.json`, jsonBytes, {
|
||||||
|
contentType: 'application/json',
|
||||||
|
upsert: true,
|
||||||
|
});
|
||||||
|
console.log(`[storage] report.json → clinics/${domain}/${body.reportId}/`);
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[storage] report.json upload failed:', e instanceof Error ? e.message : e);
|
||||||
|
}
|
||||||
|
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
-- clinic-data bucket: JSON text data per clinic URL analysis run
|
||||||
|
-- Structure: clinics/{domain}/{reportId}/
|
||||||
|
-- ├── scrape_data.json (discover-channels output)
|
||||||
|
-- ├── channel_data.json (collect-channel-data output)
|
||||||
|
-- └── report.json (generate-report output)
|
||||||
|
|
||||||
|
INSERT INTO storage.buckets (id, name, public, file_size_limit, allowed_mime_types)
|
||||||
|
VALUES (
|
||||||
|
'clinic-data',
|
||||||
|
'clinic-data',
|
||||||
|
false, -- private: requires service_role key
|
||||||
|
10485760, -- 10 MB per file
|
||||||
|
ARRAY['application/json']
|
||||||
|
)
|
||||||
|
ON CONFLICT (id) DO NOTHING;
|
||||||
|
|
||||||
|
-- RLS: only service_role can read/write (backend-to-backend)
|
||||||
|
CREATE POLICY "service_role read clinic-data"
|
||||||
|
ON storage.objects FOR SELECT
|
||||||
|
TO service_role
|
||||||
|
USING (bucket_id = 'clinic-data');
|
||||||
|
|
||||||
|
CREATE POLICY "service_role insert clinic-data"
|
||||||
|
ON storage.objects FOR INSERT
|
||||||
|
TO service_role
|
||||||
|
WITH CHECK (bucket_id = 'clinic-data');
|
||||||
|
|
||||||
|
CREATE POLICY "service_role update clinic-data"
|
||||||
|
ON storage.objects FOR UPDATE
|
||||||
|
TO service_role
|
||||||
|
USING (bucket_id = 'clinic-data');
|
||||||
|
|
||||||
|
-- screenshots bucket: update allowed_mime_types to include PNG/JPEG/WebP (idempotent)
|
||||||
|
UPDATE storage.buckets
|
||||||
|
SET allowed_mime_types = ARRAY['image/png', 'image/jpeg', 'image/webp']
|
||||||
|
WHERE id = 'screenshots';
|
||||||
Loading…
Reference in New Issue