From 79950925a1b0375f66c2f67cb782d826f52e0616 Mon Sep 17 00:00:00 2001 From: Haewon Kam Date: Sun, 5 Apr 2026 10:08:03 +0900 Subject: [PATCH] fix: add Authorization header to all Edge Function calls + fix Vision Analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - All fetch calls to Supabase Edge Functions now include Authorization: Bearer (was missing → 401 errors) - Fix Firecrawl screenshot API: remove invalid screenshotOptions, use "screenshot@fullPage" format (v2 API compatibility) - Fix screenshot response handling: v2 returns URL not base64, now downloads and converts to base64 for Gemini Vision - Add about page to Vision Analysis capture targets - Add retry utility, channel error tracking, pipeline resume, enrichment retry, EmptyState improvements (Sprint 2-3) Co-Authored-By: Claude Opus 4.6 --- src/App.tsx | 2 +- src/components/report/ui/EmptyState.tsx | 94 ++++- src/hooks/useEnrichment.ts | 71 ++-- src/lib/supabase.ts | 50 ++- src/main.tsx | 1 + src/pages/AnalysisLoadingPage.tsx | 365 +++++++++++++----- supabase/functions/_shared/retry.ts | 224 +++++++++++ supabase/functions/_shared/verifyHandles.ts | 63 ++- supabase/functions/_shared/visionAnalysis.ts | 119 ++++-- .../functions/collect-channel-data/index.ts | 155 +++++--- supabase/functions/generate-report/index.ts | 29 ++ .../migrations/20260405_channel_errors.sql | 2 + 12 files changed, 961 insertions(+), 214 deletions(-) create mode 100644 supabase/functions/_shared/retry.ts create mode 100644 supabase/migrations/20260405_channel_errors.sql diff --git a/src/App.tsx b/src/App.tsx index aa457c6..3c55b81 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -5,7 +5,7 @@ import PageNavigator from './components/PageNavigator'; export default function App() { const location = useLocation(); - const isLoadingPage = location.pathname === '/report/loading'; + const isLoadingPage = location.pathname.startsWith('/report/loading'); return (
diff --git a/src/components/report/ui/EmptyState.tsx b/src/components/report/ui/EmptyState.tsx index 9e05afb..561eb44 100644 --- a/src/components/report/ui/EmptyState.tsx +++ b/src/components/report/ui/EmptyState.tsx @@ -1,19 +1,83 @@ import { motion } from 'motion/react'; -import { Search } from 'lucide-react'; +import { Search, AlertCircle, Info, RefreshCw } from 'lucide-react'; +import { useEffect, useState } from 'react'; + +type EmptyStatus = 'loading' | 'error' | 'not_found' | 'timeout'; interface EmptyStateProps { message?: string; subtext?: string; + status?: EmptyStatus; + onRetry?: () => void; + /** Auto-timeout: switch to 'timeout' status after N seconds (default: 60) */ + autoTimeoutSec?: number; } +const STATUS_CONFIG: Record = { + loading: { + icon: Search, + iconColor: 'text-slate-400', + bgColor: 'bg-slate-100', + defaultMessage: '데이터 수집 중', + defaultSubtext: '채널 데이터 보강이 완료되면 자동으로 업데이트됩니다.', + }, + error: { + icon: AlertCircle, + iconColor: 'text-red-400', + bgColor: 'bg-red-50', + defaultMessage: '데이터 수집 실패', + defaultSubtext: '일시적인 오류가 발생했습니다. 다시 시도해 주세요.', + }, + not_found: { + icon: Info, + iconColor: 'text-blue-400', + bgColor: 'bg-blue-50', + defaultMessage: '채널을 찾을 수 없음', + defaultSubtext: '이 채널은 발견되지 않았거나 데이터가 없습니다.', + }, + timeout: { + icon: AlertCircle, + iconColor: 'text-amber-400', + bgColor: 'bg-amber-50', + defaultMessage: '응답 시간 초과', + defaultSubtext: '데이터 수집에 시간이 오래 걸리고 있습니다.', + }, +}; + /** * Shown inside report sections when data is not yet available - * (e.g., before enrichment completes or when a channel is not found). + * (e.g., before enrichment completes, when a channel is not found, or on error). */ export function EmptyState({ - message = '데이터 수집 중', - subtext = '채널 데이터 보강이 완료되면 자동으로 업데이트됩니다.', + message, + subtext, + status = 'loading', + onRetry, + autoTimeoutSec = 60, }: EmptyStateProps) { + const [currentStatus, setCurrentStatus] = useState(status); + + // Auto-timeout: switch from 'loading' to 'timeout' after N seconds + useEffect(() => { + if (status !== 'loading') return; + const timer = setTimeout(() => setCurrentStatus('timeout'), autoTimeoutSec * 1000); + return () => clearTimeout(timer); + }, [status, autoTimeoutSec]); + + // Sync external status changes + useEffect(() => { + setCurrentStatus(status); + }, [status]); + + const config = STATUS_CONFIG[currentStatus]; + const Icon = config.icon; + return ( -
- +
+ {currentStatus === 'loading' ? ( +
+ ) : ( + + )}
-

{message}

-

{subtext}

+

{message || config.defaultMessage}

+

{subtext || config.defaultSubtext}

+ + {onRetry && (currentStatus === 'error' || currentStatus === 'timeout') && ( + + )} ); } diff --git a/src/hooks/useEnrichment.ts b/src/hooks/useEnrichment.ts index 7588dee..a6bf05d 100644 --- a/src/hooks/useEnrichment.ts +++ b/src/hooks/useEnrichment.ts @@ -8,6 +8,10 @@ type EnrichmentStatus = 'idle' | 'loading' | 'success' | 'error'; interface UseEnrichmentResult { status: EnrichmentStatus; enrichedReport: MarketingReport | null; + /** Number of retry attempts made */ + retryCount: number; + /** Call this to retry enrichment (max 2 retries) */ + retry: () => void; } interface EnrichmentParams { @@ -20,10 +24,12 @@ interface EnrichmentParams { address?: string; } +const MAX_RETRIES = 2; + /** * Triggers background channel enrichment after Phase 1 report renders. * Fires once, waits for the Edge Function to complete (~27s), - * then returns the merged report. + * then returns the merged report. Supports up to 2 manual retries. */ export function useEnrichment( baseReport: MarketingReport | null, @@ -31,40 +37,55 @@ export function useEnrichment( ): UseEnrichmentResult { const [status, setStatus] = useState('idle'); const [enrichedReport, setEnrichedReport] = useState(null); + const [retryCount, setRetryCount] = useState(0); const hasTriggered = useRef(false); - useEffect(() => { - if (!baseReport || !params?.reportId || hasTriggered.current) return; - // Always enrich if clinicName exists — Naver, 강남언니, Google Maps work with name alone + const doEnrich = useCallback(async () => { + if (!baseReport || !params?.reportId) return; - hasTriggered.current = true; setStatus('loading'); - enrichChannels({ - reportId: params.reportId, - clinicName: params.clinicName, - instagramHandle: params.instagramHandle, - instagramHandles: params.instagramHandles, - youtubeChannelId: params.youtubeChannelId, - facebookHandle: params.facebookHandle, - address: params.address, - }) - .then((result) => { - if (result.success && result.data) { - const merged = mergeEnrichment(baseReport, result.data as EnrichmentData); - setEnrichedReport(merged); - setStatus('success'); - } else { - setStatus('error'); - } - }) - .catch(() => { - setStatus('error'); + try { + const result = await enrichChannels({ + reportId: params.reportId, + clinicName: params.clinicName, + instagramHandle: params.instagramHandle, + instagramHandles: params.instagramHandles, + youtubeChannelId: params.youtubeChannelId, + facebookHandle: params.facebookHandle, + address: params.address, }); + + if (result.success && result.data) { + const merged = mergeEnrichment(baseReport, result.data as EnrichmentData); + setEnrichedReport(merged); + setStatus('success'); + } else { + setStatus('error'); + } + } catch { + setStatus('error'); + } }, [baseReport, params]); + // Initial trigger + useEffect(() => { + if (!baseReport || !params?.reportId || hasTriggered.current) return; + hasTriggered.current = true; + doEnrich(); + }, [baseReport, params, doEnrich]); + + // Manual retry + const retry = useCallback(() => { + if (retryCount >= MAX_RETRIES) return; + setRetryCount(prev => prev + 1); + doEnrich(); + }, [retryCount, doEnrich]); + return { status, enrichedReport, + retryCount, + retry, }; } diff --git a/src/lib/supabase.ts b/src/lib/supabase.ts index 20bdb84..c81c9dc 100644 --- a/src/lib/supabase.ts +++ b/src/lib/supabase.ts @@ -5,12 +5,18 @@ const supabaseAnonKey = import.meta.env.VITE_SUPABASE_ANON_KEY; export const supabase = createClient(supabaseUrl, supabaseAnonKey); +/** Common headers for Edge Function calls (includes JWT auth) */ +const fnHeaders = () => ({ + "Content-Type": "application/json", + "Authorization": `Bearer ${supabaseAnonKey}`, +}); + export async function generateMarketingReport(url: string, clinicName?: string) { const response = await fetch( `${supabaseUrl}/functions/v1/generate-report`, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: fnHeaders(), body: JSON.stringify({ url, clinicName }), } ); @@ -33,6 +39,38 @@ export async function fetchReportById(reportId: string) { return data; } +/** + * Fetch pipeline status for a report. + * Used by AnalysisLoadingPage to resume interrupted pipelines. + */ +export interface PipelineStatus { + reportId: string; + clinicId?: string; + runId?: string; + status: string; + clinicName?: string; + hasChannelData: boolean; + hasReport: boolean; +} + +export async function fetchPipelineStatus(reportId: string): Promise { + const { data, error } = await supabase + .from("marketing_reports") + .select("id, status, clinic_name, channel_data, report") + .eq("id", reportId) + .single(); + + if (error || !data) throw new Error(`Report not found: ${error?.message}`); + + return { + reportId: data.id, + status: data.status || "unknown", + clinicName: data.clinic_name, + hasChannelData: !!data.channel_data && Object.keys(data.channel_data).length > 0, + hasReport: !!data.report && Object.keys(data.report).length > 0, + }; +} + export interface EnrichChannelsRequest { reportId: string; clinicName: string; @@ -52,7 +90,7 @@ export async function enrichChannels(params: EnrichChannelsRequest) { `${supabaseUrl}/functions/v1/enrich-channels`, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: fnHeaders(), body: JSON.stringify(params), } ); @@ -69,7 +107,7 @@ export async function scrapeWebsite(url: string, clinicName?: string) { `${supabaseUrl}/functions/v1/scrape-website`, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: fnHeaders(), body: JSON.stringify({ url, clinicName }), } ); @@ -92,7 +130,7 @@ export async function discoverChannels(url: string, clinicName?: string) { `${supabaseUrl}/functions/v1/discover-channels`, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: fnHeaders(), body: JSON.stringify({ url, clinicName }), } ); @@ -113,7 +151,7 @@ export async function collectChannelData(reportId: string, clinicId?: string, ru `${supabaseUrl}/functions/v1/collect-channel-data`, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: fnHeaders(), body: JSON.stringify({ reportId, clinicId, runId }), } ); @@ -133,7 +171,7 @@ export async function generateReportV2(reportId: string, clinicId?: string, runI `${supabaseUrl}/functions/v1/generate-report`, { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: fnHeaders(), body: JSON.stringify({ reportId, clinicId, runId }), } ); diff --git a/src/main.tsx b/src/main.tsx index f774379..3e06fb3 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -21,6 +21,7 @@ createRoot(document.getElementById('root')!).render( }> } /> } /> + } /> } /> } /> } /> diff --git a/src/pages/AnalysisLoadingPage.tsx b/src/pages/AnalysisLoadingPage.tsx index 51c64d5..4c616fd 100644 --- a/src/pages/AnalysisLoadingPage.tsx +++ b/src/pages/AnalysisLoadingPage.tsx @@ -1,10 +1,15 @@ -import { useState, useEffect, useRef } from 'react'; -import { useNavigate, useLocation } from 'react-router'; +import { useState, useEffect, useRef, useCallback } from 'react'; +import { useNavigate, useLocation, useParams } from 'react-router'; import { motion } from 'motion/react'; -import { Check, AlertCircle } from 'lucide-react'; -import { discoverChannels, collectChannelData, generateReportV2 } from '../lib/supabase'; +import { Check, AlertCircle, RefreshCw } from 'lucide-react'; +import { + discoverChannels, + collectChannelData, + generateReportV2, + fetchPipelineStatus, +} from '../lib/supabase'; -type Phase = 'discovering' | 'collecting' | 'generating' | 'complete'; +type Phase = 'resuming' | 'discovering' | 'collecting' | 'generating' | 'complete'; const PHASE_STEPS = [ { key: 'discovering' as Phase, label: 'Scanning website & discovering channels...', labelDone: 'Channels discovered' }, @@ -13,47 +18,95 @@ const PHASE_STEPS = [ { key: 'complete' as Phase, label: 'Finalizing report...', labelDone: 'Complete' }, ]; +// Session keys for pipeline resume +const SESSION_KEYS = { + reportId: 'infinith_reportId', + clinicId: 'infinith_clinicId', + runId: 'infinith_runId', + url: 'infinith_url', +}; + +function saveSession(data: { reportId: string; clinicId?: string; runId?: string; url?: string }) { + sessionStorage.setItem(SESSION_KEYS.reportId, data.reportId); + if (data.clinicId) sessionStorage.setItem(SESSION_KEYS.clinicId, data.clinicId); + if (data.runId) sessionStorage.setItem(SESSION_KEYS.runId, data.runId); + if (data.url) sessionStorage.setItem(SESSION_KEYS.url, data.url); +} + +function loadSession() { + return { + reportId: sessionStorage.getItem(SESSION_KEYS.reportId), + clinicId: sessionStorage.getItem(SESSION_KEYS.clinicId), + runId: sessionStorage.getItem(SESSION_KEYS.runId), + url: sessionStorage.getItem(SESSION_KEYS.url), + }; +} + +function clearSession() { + Object.values(SESSION_KEYS).forEach(k => sessionStorage.removeItem(k)); +} + export default function AnalysisLoadingPage() { const [phase, setPhase] = useState('discovering'); const [error, setError] = useState(null); + const [errorDetails, setErrorDetails] = useState | null>(null); const navigate = useNavigate(); const location = useLocation(); + const { reportId: urlReportId } = useParams<{ reportId?: string }>(); const url = (location.state as { url?: string })?.url; const hasStarted = useRef(false); const phaseIndex = PHASE_STEPS.findIndex(s => s.key === phase); - useEffect(() => { - if (hasStarted.current) return; - hasStarted.current = true; + const runPipeline = useCallback(async ( + startUrl?: string, + resumeFrom?: { reportId: string; clinicId?: string; runId?: string; phase: Phase }, + ) => { + try { + let reportId = resumeFrom?.reportId || ''; + let clinicId = resumeFrom?.clinicId; + let runId = resumeFrom?.runId; + let startPhase = resumeFrom?.phase || 'discovering'; - if (!url) { - navigate('/', { replace: true }); - return; - } - - const runPipeline = async () => { - try { - // Phase 1: Discover Channels + // Phase 1: Discover Channels (skip if resuming from later phase) + if (startPhase === 'discovering') { + if (!startUrl) throw new Error('No URL provided'); setPhase('discovering'); - const discovery = await discoverChannels(url); + const discovery = await discoverChannels(startUrl); if (!discovery.success) throw new Error(discovery.error || 'Channel discovery failed'); - const reportId = discovery.reportId; - const clinicId = discovery.clinicId; // V3 - const runId = discovery.runId; // V3 + reportId = discovery.reportId; + clinicId = discovery.clinicId; + runId = discovery.runId; - // Phase 2: Collect Channel Data + // Save to session + update URL for resume + saveSession({ reportId, clinicId, runId, url: startUrl }); + window.history.replaceState(null, '', `/report/loading/${reportId}`); + startPhase = 'collecting'; + } + + // Phase 2: Collect Channel Data + if (startPhase === 'collecting') { setPhase('collecting'); const collection = await collectChannelData(reportId, clinicId, runId); - if (!collection.success) throw new Error(collection.error || 'Data collection failed'); + // Allow partial success — only fail on total failure + if (collection.success === false && !collection.partialFailure) { + throw new Error(collection.error || 'Data collection failed'); + } + if (collection.channelErrors && Object.keys(collection.channelErrors).length > 0) { + console.warn('[pipeline] Partial failures:', collection.channelErrors); + } + startPhase = 'generating'; + } - // Phase 3: Generate Report + // Phase 3: Generate Report + if (startPhase === 'generating') { setPhase('generating'); const result = await generateReportV2(reportId, clinicId, runId); if (!result.success) throw new Error(result.error || 'Report generation failed'); // Complete — navigate to report setPhase('complete'); + clearSession(); setTimeout(() => { navigate(`/report/${reportId}`, { @@ -63,13 +116,117 @@ export default function AnalysisLoadingPage() { : undefined, }); }, 800); - } catch (err) { - setError(err instanceof Error ? err.message : 'An error occurred'); } - }; + } catch (err) { + const msg = err instanceof Error ? err.message : 'An error occurred'; + setError(msg); + } + }, [navigate]); - runPipeline(); - }, [url, navigate]); + // Retry from the current failed phase + const handleRetry = useCallback(() => { + setError(null); + setErrorDetails(null); + const session = loadSession(); + if (session.reportId) { + // Resume from the phase that failed + runPipeline(undefined, { + reportId: session.reportId, + clinicId: session.clinicId || undefined, + runId: session.runId || undefined, + phase, + }); + } else if (url || session.url) { + // Restart from scratch + hasStarted.current = false; + runPipeline(url || session.url || undefined); + } + }, [phase, url, runPipeline]); + + useEffect(() => { + if (hasStarted.current) return; + hasStarted.current = true; + + // 1. Try URL param resume (e.g., /report/loading/abc-123) + if (urlReportId) { + setPhase('resuming'); + fetchPipelineStatus(urlReportId) + .then((status) => { + // Also check sessionStorage for clinicId/runId + const session = loadSession(); + const clinicId = session.clinicId || status.clinicId; + const runId = session.runId || status.runId; + + if (status.hasReport || status.status === 'complete') { + // Already done — go to report + navigate(`/report/${urlReportId}`, { replace: true }); + return; + } + + let resumePhase: Phase = 'discovering'; + if (status.status === 'discovered' || status.status === 'discovering') { + resumePhase = 'collecting'; + } else if (['collecting', 'collected', 'partial'].includes(status.status)) { + resumePhase = 'generating'; + } else if (status.status === 'collection_failed') { + setError('Data collection failed. Please retry.'); + setPhase('collecting'); + return; + } + + saveSession({ reportId: urlReportId, clinicId, runId, url: session.url || undefined }); + runPipeline(undefined, { reportId: urlReportId, clinicId, runId, phase: resumePhase }); + }) + .catch(() => { + setError('Could not resume analysis. Please try again.'); + }); + return; + } + + // 2. Try sessionStorage resume + const session = loadSession(); + if (session.reportId && !url) { + setPhase('resuming'); + fetchPipelineStatus(session.reportId) + .then((status) => { + if (status.hasReport || status.status === 'complete') { + clearSession(); + navigate(`/report/${session.reportId}`, { replace: true }); + return; + } + + let resumePhase: Phase = 'discovering'; + if (['discovered', 'discovering'].includes(status.status)) { + resumePhase = 'collecting'; + } else if (['collecting', 'collected', 'partial'].includes(status.status)) { + resumePhase = 'generating'; + } + + runPipeline(undefined, { + reportId: session.reportId!, + clinicId: session.clinicId || undefined, + runId: session.runId || undefined, + phase: resumePhase, + }); + }) + .catch(() => { + clearSession(); + navigate('/', { replace: true }); + }); + return; + } + + // 3. Fresh start with URL + if (!url) { + navigate('/', { replace: true }); + return; + } + + runPipeline(url); + }, [url, urlReportId, navigate, runPipeline]); + + // Adjust phaseIndex for 'resuming' state + const displayPhaseIndex = phase === 'resuming' ? -1 : phaseIndex; return (
@@ -91,14 +248,14 @@ export default function AnalysisLoadingPage() { INFINITH - {url && ( + {(url || loadSession().url) && ( - {url} + {url || loadSession().url} )} @@ -110,68 +267,102 @@ export default function AnalysisLoadingPage() { >

{error}

- + + {errorDetails && ( +
+

Failed channels:

+ {Object.entries(errorDetails).map(([ch, err]) => ( +

+ • {ch}: {err} +

+ ))} +
+ )} + +
+ + +
) : ( <> -
- {PHASE_STEPS.map((step, index) => { - const isCompleted = phaseIndex > index || (step.key === 'complete' && phase === 'complete'); - const isActive = phaseIndex === index && phase !== 'complete'; - - return ( - -
- {isCompleted ? ( - - - - ) : isActive ? ( -
- ) : ( -
- )} -
- - {isCompleted ? step.labelDone : step.label} - - - ); - })} -
- -
+ {phase === 'resuming' ? ( -
+ initial={{ opacity: 0 }} + animate={{ opacity: 1 }} + className="flex flex-col items-center gap-4 mb-14" + > +
+

Resuming analysis...

+ + ) : ( + <> +
+ {PHASE_STEPS.map((step, index) => { + const isCompleted = displayPhaseIndex > index || (step.key === 'complete' && phase === 'complete'); + const isActive = displayPhaseIndex === index && phase !== 'complete'; -

- AI가 마케팅 데이터를 분석하고 있습니다. 약 1~2분 소요됩니다. -

+ return ( + +
+ {isCompleted ? ( + + + + ) : isActive ? ( +
+ ) : ( +
+ )} +
+ + {isCompleted ? step.labelDone : step.label} + + + ); + })} +
+ +
+ +
+ +

+ AI가 마케팅 데이터를 분석하고 있습니다. 약 1~2분 소요됩니다. +

+ + )} )}
diff --git a/supabase/functions/_shared/retry.ts b/supabase/functions/_shared/retry.ts new file mode 100644 index 0000000..646e177 --- /dev/null +++ b/supabase/functions/_shared/retry.ts @@ -0,0 +1,224 @@ +/** + * Retry utility for external API calls. + * + * Features: + * - Exponential backoff with jitter + * - Respects Retry-After header (429) + * - Permanent failure detection (400/401/403/404 → no retry) + * - Per-request timeout via AbortController + * - Domain-level rate limiting (e.g., Firecrawl 500ms gap) + */ + +// ─── Types ─── + +export interface RetryOptions { + /** Max number of retries (default: 2) */ + maxRetries?: number; + /** Backoff delays in ms per attempt (default: [1000, 3000]) */ + backoffMs?: number[]; + /** HTTP status codes to retry on (default: [429, 500, 502, 503]) */ + retryOn?: number[]; + /** Per-request timeout in ms (default: 45000) */ + timeoutMs?: number; + /** Label for logging */ + label?: string; +} + +interface RetryResult { + response: Response; + attempts: number; + retried: boolean; +} + +// ─── Domain Rate Limiter ─── + +const domainLastCall = new Map(); +const DOMAIN_INTERVALS: Record = { + "api.firecrawl.dev": 500, + "api.perplexity.ai": 200, +}; + +function getDomain(url: string): string { + try { + return new URL(url).hostname; + } catch { + return ""; + } +} + +async function waitForDomainSlot(url: string): Promise { + const domain = getDomain(url); + const interval = DOMAIN_INTERVALS[domain]; + if (!interval) return; + + const last = domainLastCall.get(domain) || 0; + const elapsed = Date.now() - last; + if (elapsed < interval) { + await new Promise((r) => setTimeout(r, interval - elapsed)); + } + domainLastCall.set(domain, Date.now()); +} + +// ─── Permanent failure codes (never retry) ─── + +const PERMANENT_FAILURES = new Set([400, 401, 403, 404, 405, 409, 422]); + +// ─── Main Function ─── + +/** + * fetch() with automatic retry, timeout, and rate limiting. + * + * @example + * const res = await fetchWithRetry("https://api.example.com/data", { + * method: "POST", + * headers: { "Content-Type": "application/json" }, + * body: JSON.stringify({ query: "test" }), + * }, { maxRetries: 2, timeoutMs: 30000, label: "example-api" }); + */ +export async function fetchWithRetry( + url: string, + init?: RequestInit, + opts?: RetryOptions, +): Promise { + const { + maxRetries = 2, + backoffMs = [1000, 3000], + retryOn = [429, 500, 502, 503], + timeoutMs = 45000, + label = getDomain(url), + } = opts || {}; + + let lastError: Error | null = null; + const retrySet = new Set(retryOn); + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + // Rate limit between domain calls + await waitForDomainSlot(url); + + // AbortController for per-request timeout + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeoutMs); + + try { + const response = await fetch(url, { + ...init, + signal: controller.signal, + }); + clearTimeout(timer); + + // Success + if (response.ok) return response; + + // Permanent failure — don't retry + if (PERMANENT_FAILURES.has(response.status)) { + console.warn( + `[retry:${label}] Permanent failure ${response.status} on attempt ${attempt + 1}`, + ); + return response; + } + + // Retryable failure + if (retrySet.has(response.status) && attempt < maxRetries) { + let delay = backoffMs[attempt] || backoffMs[backoffMs.length - 1] || 3000; + + // Respect Retry-After header for 429 + if (response.status === 429) { + const retryAfter = response.headers.get("Retry-After"); + if (retryAfter) { + const parsed = parseInt(retryAfter, 10); + if (!isNaN(parsed)) { + delay = Math.max(delay, parsed * 1000); + } + } + } + + // Add jitter (±20%) + delay = delay * (0.8 + Math.random() * 0.4); + + console.warn( + `[retry:${label}] Status ${response.status}, retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${maxRetries + 1})`, + ); + await new Promise((r) => setTimeout(r, delay)); + continue; + } + + // Non-retryable or exhausted retries + return response; + } catch (err) { + clearTimeout(timer); + lastError = err instanceof Error ? err : new Error(String(err)); + + if (attempt < maxRetries) { + const delay = (backoffMs[attempt] || 3000) * (0.8 + Math.random() * 0.4); + console.warn( + `[retry:${label}] Network error: ${lastError.message}, retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${maxRetries + 1})`, + ); + await new Promise((r) => setTimeout(r, delay)); + continue; + } + } + } + + throw lastError || new Error(`[retry:${label}] All ${maxRetries + 1} attempts failed`); +} + +// ─── Convenience: JSON fetch with retry ─── + +export async function fetchJsonWithRetry( + url: string, + init?: RequestInit, + opts?: RetryOptions, +): Promise<{ data: T | null; status: number; error?: string }> { + try { + const res = await fetchWithRetry(url, init, opts); + if (!res.ok) { + const text = await res.text().catch(() => ""); + return { data: null, status: res.status, error: `HTTP ${res.status}: ${text.slice(0, 200)}` }; + } + const data = (await res.json()) as T; + return { data, status: res.status }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { data: null, status: 0, error: msg }; + } +} + +// ─── Channel Task Wrapper ─── + +export interface ChannelTaskResult { + channel: string; + success: boolean; + error?: string; + httpStatus?: number; + durationMs: number; +} + +/** + * Wraps a channel collection task with timing and error capture. + * Used by collect-channel-data to track per-channel success/failure. + * + * @example + * const [result, taskMeta] = await wrapChannelTask("instagram", async () => { + * // ... collect instagram data ... + * channelData.instagram = data; + * }); + */ +export async function wrapChannelTask( + channel: string, + task: () => Promise, +): Promise { + const start = Date.now(); + try { + await task(); + return { channel, success: true, durationMs: Date.now() - start }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[channel:${channel}] Error: ${msg}`); + return { + channel, + success: false, + error: msg, + durationMs: Date.now() - start, + }; + } +} diff --git a/supabase/functions/_shared/verifyHandles.ts b/supabase/functions/_shared/verifyHandles.ts index 1f896b6..8f9e555 100644 --- a/supabase/functions/_shared/verifyHandles.ts +++ b/supabase/functions/_shared/verifyHandles.ts @@ -5,7 +5,7 @@ export interface VerifiedChannel { handle: string; - verified: boolean; + verified: boolean | "unverifiable"; url?: string; channelId?: string; // YouTube channel ID if resolved } @@ -28,17 +28,36 @@ async function verifyInstagram(handle: string): Promise { const url = `https://www.instagram.com/${handle}/`; const res = await fetch(url, { method: 'GET', - headers: { 'User-Agent': 'Mozilla/5.0' }, - redirect: 'follow', + headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' }, + redirect: 'manual', // Don't follow redirects — detect login page }); - // Instagram returns 200 for existing profiles, 404 for missing - return { - handle, - verified: res.status === 200, - url, - }; + + // 302/301 to login page → Instagram blocks unauthenticated access + if (res.status === 301 || res.status === 302) { + const location = res.headers.get('location') || ''; + if (location.includes('/accounts/login') || location.includes('/challenge')) { + return { handle, verified: 'unverifiable', url }; + } + } + + // 200 → profile exists; 404 → definitely not found + if (res.status === 200) { + // Double-check: some 200 responses are actually the login page + const bodySnippet = await res.text().then(t => t.slice(0, 2000)).catch(() => ''); + if (bodySnippet.includes('/accounts/login') && !bodySnippet.includes(`"username":"${handle}"`)) { + return { handle, verified: 'unverifiable', url }; + } + return { handle, verified: true, url }; + } + + if (res.status === 404) { + return { handle, verified: false, url }; + } + + // Any other status → unverifiable (don't assume it doesn't exist) + return { handle, verified: 'unverifiable', url }; } catch { - return { handle, verified: false }; + return { handle, verified: 'unverifiable' }; } } @@ -86,14 +105,30 @@ async function verifyYouTube(handle: string, apiKey: string): Promise { try { const url = `https://www.facebook.com/${handle}/`; + // Use GET instead of HEAD — Facebook blocks HEAD requests const res = await fetch(url, { - method: 'HEAD', - headers: { 'User-Agent': 'Mozilla/5.0' }, + method: 'GET', + headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' }, redirect: 'follow', }); - return { handle, verified: res.status === 200, url }; + + if (res.status === 200) { + // Check if it's a real page or a redirect to login/error + const bodySnippet = await res.text().then(t => t.slice(0, 3000)).catch(() => ''); + if (bodySnippet.includes('page_not_found') || bodySnippet.includes('This content isn')) { + return { handle, verified: false, url }; + } + return { handle, verified: true, url }; + } + + if (res.status === 404) { + return { handle, verified: false, url }; + } + + // Facebook often blocks bots → unverifiable, not false + return { handle, verified: 'unverifiable', url }; } catch { - return { handle, verified: false }; + return { handle, verified: 'unverifiable' }; } } diff --git a/supabase/functions/_shared/visionAnalysis.ts b/supabase/functions/_shared/visionAnalysis.ts index e398d1c..d53dab5 100644 --- a/supabase/functions/_shared/visionAnalysis.ts +++ b/supabase/functions/_shared/visionAnalysis.ts @@ -6,6 +6,8 @@ * certifications, social icons, brand colors, etc.). */ +import { fetchWithRetry } from "./retry.ts"; + const FIRECRAWL_BASE = "https://api.firecrawl.dev/v1"; export interface ScreenshotResult { @@ -34,15 +36,20 @@ export interface VisionAnalysisResult { } /** - * Capture screenshot of a URL via Firecrawl. - * Returns base64 image data. + * Capture screenshot of a URL via Firecrawl v2. + * Returns { screenshotUrl, base64 } — URL from Firecrawl, base64 fetched for Vision analysis. + * + * Firecrawl v2 returns a GCS URL (not base64). We download it and convert to base64 + * so Gemini Vision can consume it via inlineData. */ async function captureScreenshot( url: string, firecrawlKey: string, -): Promise { +): Promise<{ screenshotUrl: string; base64: string } | null> { try { - const res = await fetch(`${FIRECRAWL_BASE}/scrape`, { + console.log(`[vision] Capturing screenshot: ${url}`); + // Firecrawl v2: use "screenshot@fullPage" format (no separate screenshotOptions) + const res = await fetchWithRetry(`${FIRECRAWL_BASE}/scrape`, { method: "POST", headers: { "Content-Type": "application/json", @@ -50,18 +57,47 @@ async function captureScreenshot( }, body: JSON.stringify({ url, - formats: ["screenshot"], + formats: ["screenshot@fullPage"], waitFor: 5000, - screenshotOptions: { - fullPage: false, - quality: 80, - }, }), - }); - if (!res.ok) return null; + }, { label: `firecrawl-screenshot`, timeoutMs: 45000, maxRetries: 1 }); + + if (!res.ok) { + const errText = await res.text().catch(() => ""); + console.error(`[vision] Screenshot failed for ${url}: HTTP ${res.status} — ${errText.slice(0, 200)}`); + return null; + } const data = await res.json(); - return data.data?.screenshot || null; // base64 string - } catch { + const screenshotUrl: string | null = data.data?.screenshot || null; + if (!screenshotUrl) { + console.warn(`[vision] Screenshot response OK but no screenshot URL for ${url}. Keys: ${JSON.stringify(Object.keys(data.data || {}))}`); + return null; + } + + console.log(`[vision] Screenshot URL received: ${url} → ${screenshotUrl.slice(0, 80)}...`); + + // Download the screenshot image and convert to base64 for Gemini Vision + const imgRes = await fetchWithRetry(screenshotUrl, undefined, { + label: `screenshot-download`, + timeoutMs: 30000, + maxRetries: 1, + }); + if (!imgRes.ok) { + console.error(`[vision] Failed to download screenshot image: HTTP ${imgRes.status}`); + return null; + } + const imgBuffer = await imgRes.arrayBuffer(); + const bytes = new Uint8Array(imgBuffer); + + // Use Deno's standard base64 encoding (efficient for large binaries) + const { encode: encodeBase64 } = await import("https://deno.land/std@0.224.0/encoding/base64.ts"); + const base64 = encodeBase64(bytes); + + console.log(`[vision] Screenshot captured & converted: ${url} (${Math.round(base64.length / 1024)}KB base64, ${Math.round(bytes.length / 1024)}KB raw)`); + + return { screenshotUrl, base64 }; + } catch (err) { + console.error(`[vision] Screenshot error for ${url}:`, err instanceof Error ? err.message : err); return null; } } @@ -125,6 +161,9 @@ export async function captureAllScreenshots( if (pages.surgeryPage) { captureTargets.push({ id: 'website-surgery', url: pages.surgeryPage, channel: '웹사이트', caption: '시술 안내 페이지' }); } + if (pages.aboutPage) { + captureTargets.push({ id: 'website-about', url: pages.aboutPage, channel: '웹사이트', caption: '병원 소개 페이지' }); + } // YouTube channel landing const yt = verifiedChannels.youtube as Record | null; @@ -154,16 +193,16 @@ export async function captureAllScreenshots( // Capture all in parallel (max 6 concurrent) const capturePromises = captureTargets.map(async (target) => { - const base64 = await captureScreenshot(target.url, firecrawlKey); - if (base64) { + const result = await captureScreenshot(target.url, firecrawlKey); + if (result) { results.push({ id: target.id, - url: `data:image/png;base64,${base64.slice(0, 100)}...`, // Placeholder — will be replaced with Storage URL + url: result.screenshotUrl, // GCS URL from Firecrawl (permanent for ~7 days) channel: target.channel, capturedAt: now, caption: target.caption, sourceUrl: target.url, - base64, + base64: result.base64, }); } }); @@ -181,19 +220,41 @@ export async function analyzeScreenshot( geminiKey: string, ): Promise { const prompts: Record = { - 'website-main': `이 한국 성형외과 병원 메인 페이지 스크린샷을 분석해줘. 다음 정보를 JSON으로 추출해줘: -- foundingYear: 개원 연도 (배너에 "SINCE 2004", "21년 무사고" 등이 있으면) + 'website-main': `이 한국 성형외과/피부과 병원 메인 페이지 스크린샷을 꼼꼼히 분석해줘. 다음 정보를 JSON으로 추출해줘: + +- foundingYear: 개원 연도. 반드시 찾아줘! 다음 패턴 중 하나라도 있으면 계산해: + "22주년" → 2026 - 22 = 2004 + "22년 동안" → 2026 - 22 = 2004 + "SINCE 2004" → 2004 + "20년 전통" → 2026 - 20 = 2006 + "개원 15주년" → 2026 - 15 = 2011 + 배너, 이벤트 팝업, 로고 옆 텍스트, 하단 footer 등 모든 곳을 확인해줘. +- operationYears: 운영 기간 (숫자만. "22주년"이면 22) - certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등) - socialIcons: 보이는 소셜 미디어 아이콘 (Instagram, YouTube, Facebook, Blog, KakaoTalk 등) - floatingButtons: 플로팅 상담 버튼 (카카오톡, LINE, WhatsApp 등) - brandColors: 메인 컬러와 액센트 컬러 (hex) -- slogans: 배너 텍스트나 슬로건 +- slogans: 배너 텍스트나 슬로건 (이벤트 텍스트 포함) - serviceCategories: 네비게이션 메뉴에 보이는 시술 카테고리`, 'website-doctors': `이 성형외과 의료진 페이지 스크린샷을 분석해줘. JSON으로 추출: - doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}] 프로필 사진 옆에 적힌 이름과 전문 분야를 모두 읽어줘.`, + 'website-about': `이 성형외과 병원 소개 페이지를 꼼꼼히 분석해줘. JSON으로 추출: + +- foundingYear: 개원 연도. 반드시 찾아줘! 소개 페이지에 자주 나오는 패턴: + "22주년" → 2026 - 22 = 2004 + "22년 동안" → 2026 - 22 = 2004 + "SINCE 2004" → 2004 + "2004년 개원" → 2004 + "20년 전통" → 2026 - 20 = 2006 + 연혁, 소개글, 대표원장 인사말 등 모든 텍스트를 꼼꼼히 확인해줘. +- operationYears: 운영 기간 (숫자만) +- doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}] +- certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등) +- slogans: 소개 텍스트, 미션/비전 문구`, + 'website-surgery': `이 성형외과 시술 안내 페이지를 분석해줘. JSON으로 추출: - serviceCategories: 보이는 시술 카테고리 목록 (눈성형, 코성형, 가슴성형, 안면윤곽 등) - certifications: 보이는 인증/수상 마크`, @@ -212,7 +273,8 @@ export async function analyzeScreenshot( const prompt = prompts[pageType] || `이 웹페이지 스크린샷을 분석해줘. 보이는 모든 텍스트와 정보를 JSON으로 추출해줘.`; try { - const res = await fetch( + console.log(`[vision] Analyzing screenshot: ${pageType} (${Math.round(base64.length / 1024)}KB)`); + const res = await fetchWithRetry( `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${geminiKey}`, { method: "POST", @@ -230,18 +292,27 @@ export async function analyzeScreenshot( }, }), }, + { label: `gemini-vision:${pageType}`, timeoutMs: 45000, maxRetries: 1 }, ); - if (!res.ok) return {}; + if (!res.ok) { + const errText = await res.text().catch(() => ""); + console.error(`[vision] Gemini failed for ${pageType}: HTTP ${res.status} — ${errText.slice(0, 200)}`); + return {}; + } const data = await res.json(); const text = data.candidates?.[0]?.content?.parts?.[0]?.text || ""; const jsonMatch = text.match(/\{[\s\S]*\}/); if (jsonMatch) { - return JSON.parse(jsonMatch[0]); + const result = JSON.parse(jsonMatch[0]); + console.log(`[vision] Gemini result for ${pageType}:`, JSON.stringify(result).slice(0, 300)); + return result; } + console.warn(`[vision] Gemini returned no JSON for ${pageType}. Raw: ${text.slice(0, 200)}`); return {}; - } catch { + } catch (err) { + console.error(`[vision] Gemini error for ${pageType}:`, err instanceof Error ? err.message : err); return {}; } } diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index cb3ba9a..6c96830 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -3,6 +3,7 @@ import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; import type { VerifiedChannels } from "../_shared/verifyHandles.ts"; import { PERPLEXITY_MODEL } from "../_shared/config.ts"; import { captureAllScreenshots, runVisionAnalysis, type ScreenshotResult } from "../_shared/visionAnalysis.ts"; +import { fetchWithRetry, fetchJsonWithRetry, wrapChannelTask, type ChannelTaskResult } from "../_shared/retry.ts"; const corsHeaders = { "Access-Control-Allow-Origin": "*", @@ -18,15 +19,20 @@ interface CollectRequest { } async function runApifyActor(actorId: string, input: Record, token: string): Promise { - const res = await fetch(`${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(input), - }); + const res = await fetchWithRetry( + `${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`, + { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(input) }, + { maxRetries: 1, timeoutMs: 130000, label: `apify:${actorId.split('~')[1] || actorId}` }, + ); + if (!res.ok) throw new Error(`Apify ${actorId} returned ${res.status}`); const run = await res.json(); const datasetId = run.data?.defaultDatasetId; - if (!datasetId) return []; - const itemsRes = await fetch(`${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`); + if (!datasetId) throw new Error(`Apify ${actorId}: no dataset returned`); + const itemsRes = await fetchWithRetry( + `${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`, + undefined, + { maxRetries: 1, timeoutMs: 30000, label: `apify-dataset:${actorId.split('~')[1] || actorId}` }, + ); return itemsRes.json(); } @@ -74,12 +80,12 @@ Deno.serve(async (req) => { const channelData: Record = {}; const analysisData: Record = {}; - const tasks: Promise[] = []; + const channelTasks: Promise[] = []; - // ─── 1. Instagram (multi-account) — try ALL candidates including unverified ─── - const igCandidates = (verified.instagram || []).filter((v: Record) => v.handle); + // ─── 1. Instagram (multi-account) — try ALL candidates including unverified/unverifiable ─── + const igCandidates = (verified.instagram || []).filter((v: Record) => v.handle && v.verified !== false); if (APIFY_TOKEN && igCandidates.length > 0) { - tasks.push((async () => { + channelTasks.push(wrapChannelTask("instagram", async () => { const accounts: Record[] = []; for (const ig of igCandidates) { const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN); @@ -104,14 +110,16 @@ Deno.serve(async (req) => { if (accounts.length > 0) { channelData.instagramAccounts = accounts; channelData.instagram = accounts[0]; + } else { + throw new Error("No Instagram profiles found via Apify"); } - })()); + })); } // ─── 2. YouTube ─── const ytVerified = verified.youtube as Record | null; - if (YOUTUBE_API_KEY && ytVerified?.verified) { - tasks.push((async () => { + if (YOUTUBE_API_KEY && (ytVerified?.verified === true || ytVerified?.verified === "unverifiable")) { + channelTasks.push(wrapChannelTask("youtube", async () => { const YT = "https://www.googleapis.com/youtube/v3"; let channelId = (ytVerified?.channelId as string) || ""; @@ -129,24 +137,24 @@ Deno.serve(async (req) => { } } } - if (!channelId) return; + if (!channelId) throw new Error("Could not resolve YouTube channel ID"); - const chRes = await fetch(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`); + const chRes = await fetchWithRetry(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-api" }); const chData = await chRes.json(); const channel = chData.items?.[0]; - if (!channel) return; + if (!channel) throw new Error("YouTube channel not found in API response"); const stats = channel.statistics || {}; const snippet = channel.snippet || {}; // Popular videos - const searchRes = await fetch(`${YT}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`); + const searchRes = await fetchWithRetry(`${YT}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-search" }); const searchData = await searchRes.json(); const videoIds = (searchData.items || []).map((i: Record) => (i.id as Record)?.videoId).filter(Boolean).join(","); let videos: Record[] = []; if (videoIds) { - const vRes = await fetch(`${YT}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`); + const vRes = await fetchWithRetry(`${YT}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-videos" }); const vData = await vRes.json(); videos = vData.items || []; } @@ -171,13 +179,13 @@ Deno.serve(async (req) => { }; }), }; - })()); + })); } // ─── 3. Facebook ─── const fbVerified = verified.facebook as Record | null; - if (APIFY_TOKEN && fbVerified?.verified) { - tasks.push((async () => { + if (APIFY_TOKEN && (fbVerified?.verified === true || fbVerified?.verified === "unverifiable")) { + channelTasks.push(wrapChannelTask("facebook", async () => { const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`; const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN); const page = (items as Record[])[0]; @@ -189,15 +197,17 @@ Deno.serve(async (req) => { address: page.address, intro: page.intro, rating: page.rating, profilePictureUrl: page.profilePictureUrl, }; + } else { + throw new Error("Facebook page scraper returned no data"); } - })()); + })); } // ─── 4. 강남언니 ─── const guVerified = verified.gangnamUnni as Record | null; if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) { - tasks.push((async () => { - const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", { + channelTasks.push(wrapChannelTask("gangnamUnni", async () => { + const scrapeRes = await fetchWithRetry("https://api.firecrawl.dev/v1/scrape", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, body: JSON.stringify({ @@ -217,7 +227,8 @@ Deno.serve(async (req) => { }, waitFor: 5000, }), - }); + }, { label: "firecrawl-gangnamunni", timeoutMs: 60000 }); + if (!scrapeRes.ok) throw new Error(`Firecrawl 강남언니 scrape failed: ${scrapeRes.status}`); const data = await scrapeRes.json(); const hospital = data.data?.json; if (hospital?.hospitalName) { @@ -230,22 +241,24 @@ Deno.serve(async (req) => { procedures: hospital.procedures || [], address: hospital.address, badges: hospital.badges || [], sourceUrl: guVerified!.url as string, }; + } else { + throw new Error("강남언니 scrape returned no hospital data"); } - })()); + })); } // ─── 5. Naver Blog + Place ─── if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET && clinicName) { const naverHeaders = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET }; - tasks.push((async () => { + channelTasks.push(wrapChannelTask("naverBlog", async () => { // Get verified Naver Blog handle from Phase 1 for official blog URL const nbVerified = verified.naverBlog as Record | null; const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null; const query = encodeURIComponent(`${clinicName} 후기`); - const res = await fetch(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders }); - if (!res.ok) return; + const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders }, { label: "naver-blog" }); + if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`); const data = await res.json(); channelData.naverBlog = { totalResults: data.total || 0, searchQuery: `${clinicName} 후기`, @@ -259,9 +272,9 @@ Deno.serve(async (req) => { link: item.link, bloggerName: item.bloggername, postDate: item.postdate, })), }; - })()); + })); - tasks.push((async () => { + channelTasks.push(wrapChannelTask("naverPlace", async () => { // Try multiple queries to find the correct place (avoid same-name different clinics) const queries = [ `${clinicName} 성형외과`, @@ -270,7 +283,7 @@ Deno.serve(async (req) => { ]; for (const q of queries) { const query = encodeURIComponent(q); - const res = await fetch(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }); + const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" }); if (!res.ok) continue; const data = await res.json(); // Find the best match: prefer category containing 성형 or 피부 @@ -291,12 +304,12 @@ Deno.serve(async (req) => { break; } } - })()); + })); } // ─── 6. Google Maps ─── if (APIFY_TOKEN && clinicName) { - tasks.push((async () => { + channelTasks.push(wrapChannelTask("googleMaps", async () => { const queries = [`${clinicName} 성형외과`, clinicName, `${clinicName} ${address || "강남"}`]; let items: unknown[] = []; for (const q of queries) { @@ -317,13 +330,15 @@ Deno.serve(async (req) => { stars: r.stars, text: r.text, publishedAtDate: r.publishedAtDate, })), }; + } else { + throw new Error("Google Maps: no matching place found"); } - })()); + })); } // ─── 7. Market Analysis (Perplexity) ─── if (PERPLEXITY_API_KEY && services.length > 0) { - tasks.push((async () => { + channelTasks.push(wrapChannelTask("marketAnalysis", async () => { const queries = [ { id: "competitors", prompt: `${address || "강남"} 근처 ${services.slice(0, 3).join(", ")} 전문 성형외과/피부과 경쟁 병원 5곳을 분석해줘. 각 병원의 이름, 주요 시술, 온라인 평판, 마케팅 채널을 JSON 형식으로 제공해줘.` }, { id: "keywords", prompt: `한국 ${services.slice(0, 3).join(", ")} 관련 검색 키워드 트렌드. 네이버와 구글에서 월간 검색량이 높은 키워드 20개, 경쟁 강도, 추천 롱테일 키워드를 JSON 형식으로 제공해줘.` }, @@ -332,7 +347,7 @@ Deno.serve(async (req) => { ]; const results = await Promise.allSettled(queries.map(async q => { - const res = await fetch("https://api.perplexity.ai/chat/completions", { + const res = await fetchWithRetry("https://api.perplexity.ai/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` }, body: JSON.stringify({ @@ -341,11 +356,12 @@ Deno.serve(async (req) => { { role: "user", content: q.prompt }, ], temperature: 0.3, }), - }); + }, { label: `perplexity:${q.id}`, timeoutMs: 60000 }); const data = await res.json(); return { id: q.id, content: data.choices?.[0]?.message?.content || "", citations: data.citations || [] }; })); + let successCount = 0; for (const r of results) { if (r.status === "fulfilled") { const { id, content, citations } = r.value; @@ -353,9 +369,11 @@ Deno.serve(async (req) => { const jsonMatch = content.match(/```json\n?([\s\S]*?)```/); if (jsonMatch) { try { parsed = JSON.parse(jsonMatch[1]); } catch {} } analysisData[id] = { data: parsed, citations }; + successCount++; } } - })()); + if (successCount === 0) throw new Error("All Perplexity queries failed"); + })); } // ─── 8. Vision Analysis: Screenshots + Gemini Vision ─── @@ -366,7 +384,7 @@ Deno.serve(async (req) => { const mainUrl = row.url || ""; const siteMap: string[] = row.scrape_data?.siteMap || []; - tasks.push((async () => { + channelTasks.push(wrapChannelTask("vision", async () => { // Capture screenshots of relevant pages + social channel landings screenshots = await captureAllScreenshots(mainUrl, siteMap, verified, FIRECRAWL_API_KEY); @@ -386,17 +404,46 @@ Deno.serve(async (req) => { caption: ss.caption, sourceUrl: ss.sourceUrl, })); - })()); + + if (screenshots.length === 0) throw new Error("No screenshots captured"); + })); } - // ─── Execute all tasks ─── - await Promise.allSettled(tasks); + // ─── Execute all channel tasks ─── + const taskResults = await Promise.all(channelTasks); - // ─── Legacy: Save to marketing_reports ─── + // ─── Build channelErrors from task results ─── + const channelErrors: Record = {}; + let failedCount = 0; + let successCount = 0; + for (const result of taskResults) { + if (result.success) { + successCount++; + } else { + failedCount++; + channelErrors[result.channel] = { + error: result.error || "Unknown error", + durationMs: result.durationMs, + }; + } + } + + const totalTasks = taskResults.length; + const isPartial = failedCount > 0 && successCount > 0; + const isFullFailure = failedCount > 0 && successCount === 0; + const collectionStatus = isFullFailure ? "collection_failed" : isPartial ? "partial" : "collected"; + + console.log(`[collect] ${successCount}/${totalTasks} tasks succeeded. Status: ${collectionStatus}`); + if (failedCount > 0) { + console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors)); + } + + // ─── UNCONDITIONAL Legacy Save: always persist whatever we have ─── await supabase.from("marketing_reports").update({ channel_data: channelData, analysis_data: { clinicName, services, address, analysis: analysisData, analyzedAt: new Date().toISOString() }, - status: "collected", + channel_errors: channelErrors, + status: collectionStatus, updated_at: new Date().toISOString(), }).eq("id", reportId); @@ -479,12 +526,13 @@ Deno.serve(async (req) => { ); } - // Update analysis_run + // Update analysis_run with status + errors await supabase.from("analysis_runs").update({ raw_channel_data: channelData, analysis_data: { clinicName, services, address, analysis: analysisData }, vision_analysis: channelData.visionAnalysis || {}, - status: "collecting", + channel_errors: channelErrors, + status: collectionStatus, }).eq("id", runId); } catch (e) { @@ -493,7 +541,16 @@ Deno.serve(async (req) => { } return new Response( - JSON.stringify({ success: true, channelData, analysisData, collectedAt: new Date().toISOString() }), + JSON.stringify({ + success: !isFullFailure, + status: collectionStatus, + channelData, + analysisData, + channelErrors: Object.keys(channelErrors).length > 0 ? channelErrors : undefined, + partialFailure: isPartial, + taskSummary: { total: totalTasks, succeeded: successCount, failed: failedCount }, + collectedAt: new Date().toISOString(), + }), { headers: { ...corsHeaders, "Content-Type": "application/json" } }, ); } catch (error) { diff --git a/supabase/functions/generate-report/index.ts b/supabase/functions/generate-report/index.ts index b4f7343..886b9e9 100644 --- a/supabase/functions/generate-report/index.ts +++ b/supabase/functions/generate-report/index.ts @@ -131,6 +131,35 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)} let report; try { report = JSON.parse(reportText); } catch { report = { raw: reportText, parseError: true }; } + // ─── Post-processing: Inject Vision Analysis data directly ─── + // Perplexity may ignore Vision data in prompt, so we force-inject critical fields + const vision = channelData.visionAnalysis as Record | undefined; + if (vision) { + // Force-inject foundingYear if Vision found it but Perplexity didn't + if (vision.foundingYear && (!report.clinicInfo?.established || report.clinicInfo.established === "데이터 없음")) { + report.clinicInfo = report.clinicInfo || {}; + report.clinicInfo.established = String(vision.foundingYear); + console.log(`[report] Injected foundingYear from Vision: ${vision.foundingYear}`); + } + if (vision.operationYears && (!report.clinicInfo?.established || report.clinicInfo.established === "데이터 없음")) { + const year = new Date().getFullYear() - Number(vision.operationYears); + report.clinicInfo = report.clinicInfo || {}; + report.clinicInfo.established = String(year); + console.log(`[report] Calculated foundingYear from operationYears: ${vision.operationYears} → ${year}`); + } + + // Force-inject doctors from Vision if report has none + const visionDoctors = vision.doctors as { name: string; specialty: string; position?: string }[] | undefined; + if (visionDoctors?.length && (!report.clinicInfo?.doctors?.length)) { + report.clinicInfo = report.clinicInfo || {}; + report.clinicInfo.doctors = visionDoctors; + console.log(`[report] Injected ${visionDoctors.length} doctors from Vision`); + } + + // Store Vision analysis separately for frontend + report.visionAnalysis = vision; + } + // Embed channel enrichment data for frontend mergeEnrichment() report.channelEnrichment = channelData; report.enrichedAt = new Date().toISOString(); diff --git a/supabase/migrations/20260405_channel_errors.sql b/supabase/migrations/20260405_channel_errors.sql new file mode 100644 index 0000000..d211e39 --- /dev/null +++ b/supabase/migrations/20260405_channel_errors.sql @@ -0,0 +1,2 @@ +-- Add channel_errors column to marketing_reports for error tracking +ALTER TABLE marketing_reports ADD COLUMN IF NOT EXISTS channel_errors JSONB DEFAULT '{}';