diff --git a/supabase/functions/_shared/visionAnalysis.ts b/supabase/functions/_shared/visionAnalysis.ts new file mode 100644 index 0000000..e398d1c --- /dev/null +++ b/supabase/functions/_shared/visionAnalysis.ts @@ -0,0 +1,281 @@ +/** + * Vision Analysis — Gemini 2.0 Flash for screenshot analysis. + * + * Captures screenshots of clinic website pages and social channel landings, + * then uses Gemini Vision to extract structured data (founding year, doctors, + * certifications, social icons, brand colors, etc.). + */ + +const FIRECRAWL_BASE = "https://api.firecrawl.dev/v1"; + +export interface ScreenshotResult { + id: string; + url: string; // Supabase Storage signed URL or data URI + channel: string; // 'website', 'youtube', 'instagram', 'gangnamUnni', etc. + capturedAt: string; + caption: string; + sourceUrl: string; // Original page URL + base64?: string; // Raw base64 (for Vision analysis, not stored in report) +} + +export interface VisionAnalysisResult { + foundingYear?: string; + operationYears?: number; + doctors?: { name: string; specialty: string; position?: string }[]; + certifications?: string[]; + serviceCategories?: string[]; + socialIcons?: { platform: string; visible: boolean }[]; + floatingButtons?: string[]; + brandColors?: { primary?: string; accent?: string }; + slogans?: string[]; + youtubeStats?: { subscribers?: string; videos?: string; recentUpload?: string }; + instagramStats?: { followers?: string; posts?: string; bio?: string }; + gangnamUnniStats?: { rating?: string; reviews?: string; doctors?: number }; +} + +/** + * Capture screenshot of a URL via Firecrawl. + * Returns base64 image data. + */ +async function captureScreenshot( + url: string, + firecrawlKey: string, +): Promise { + try { + const res = await fetch(`${FIRECRAWL_BASE}/scrape`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${firecrawlKey}`, + }, + body: JSON.stringify({ + url, + formats: ["screenshot"], + waitFor: 5000, + screenshotOptions: { + fullPage: false, + quality: 80, + }, + }), + }); + if (!res.ok) return null; + const data = await res.json(); + return data.data?.screenshot || null; // base64 string + } catch { + return null; + } +} + +/** + * Find relevant sub-pages from siteMap for additional screenshots. + */ +export function findRelevantPages( + siteMap: string[], + baseUrl: string, +): { doctorPage?: string; surgeryPage?: string; aboutPage?: string } { + const result: { doctorPage?: string; surgeryPage?: string; aboutPage?: string } = {}; + + for (const url of siteMap) { + const lower = url.toLowerCase(); + if (!result.doctorPage && ( + lower.includes('/doctor') || lower.includes('/team') || lower.includes('/staff') || + lower.includes('/specialist') || lower.includes('/professor') || + lower.includes('/의료진') || lower.includes('/원장') + )) { + result.doctorPage = url; + } + if (!result.surgeryPage && ( + lower.includes('/surgery') || lower.includes('/service') || lower.includes('/procedure') || + lower.includes('/treatment') || lower.includes('/시술') || lower.includes('/수술') + )) { + result.surgeryPage = url; + } + if (!result.aboutPage && ( + lower.includes('/about') || lower.includes('/intro') || lower.includes('/소개') || + lower.includes('/greeting') + )) { + result.aboutPage = url; + } + } + + return result; +} + +/** + * Capture all relevant screenshots for a clinic. + */ +export async function captureAllScreenshots( + mainUrl: string, + siteMap: string[], + verifiedChannels: Record, + firecrawlKey: string, +): Promise { + const results: ScreenshotResult[] = []; + const pages = findRelevantPages(siteMap, mainUrl); + const now = new Date().toISOString(); + + // Build capture list + const captureTargets: { id: string; url: string; channel: string; caption: string }[] = [ + { id: 'website-main', url: mainUrl, channel: '웹사이트', caption: '병원 메인 페이지' }, + ]; + + if (pages.doctorPage) { + captureTargets.push({ id: 'website-doctors', url: pages.doctorPage, channel: '웹사이트', caption: '의료진 소개 페이지' }); + } + if (pages.surgeryPage) { + captureTargets.push({ id: 'website-surgery', url: pages.surgeryPage, channel: '웹사이트', caption: '시술 안내 페이지' }); + } + + // YouTube channel landing + const yt = verifiedChannels.youtube as Record | null; + if (yt?.verified || yt?.verified === 'unverifiable') { + const handle = yt.handle as string; + const ytUrl = handle?.startsWith('UC') + ? `https://www.youtube.com/channel/${handle}` + : `https://www.youtube.com/${handle?.startsWith('@') ? handle : `@${handle}`}`; + captureTargets.push({ id: 'youtube-landing', url: ytUrl, channel: 'YouTube', caption: 'YouTube 채널 랜딩' }); + } + + // Instagram profile + const igList = (verifiedChannels.instagram || []) as Record[]; + if (igList.length > 0) { + const ig = igList[0]; + const handle = (ig.handle as string || '').replace(/^@/, ''); + if (handle) { + captureTargets.push({ id: 'instagram-landing', url: `https://www.instagram.com/${handle}/`, channel: 'Instagram', caption: `Instagram @${handle} 프로필` }); + } + } + + // 강남언니 + const gu = verifiedChannels.gangnamUnni as Record | null; + if (gu?.url) { + captureTargets.push({ id: 'gangnamunni-page', url: gu.url as string, channel: '강남언니', caption: '강남언니 병원 페이지' }); + } + + // Capture all in parallel (max 6 concurrent) + const capturePromises = captureTargets.map(async (target) => { + const base64 = await captureScreenshot(target.url, firecrawlKey); + if (base64) { + results.push({ + id: target.id, + url: `data:image/png;base64,${base64.slice(0, 100)}...`, // Placeholder — will be replaced with Storage URL + channel: target.channel, + capturedAt: now, + caption: target.caption, + sourceUrl: target.url, + base64, + }); + } + }); + + await Promise.allSettled(capturePromises); + return results; +} + +/** + * Analyze a screenshot with Gemini Vision. + */ +export async function analyzeScreenshot( + base64: string, + pageType: string, + geminiKey: string, +): Promise { + const prompts: Record = { + 'website-main': `이 한국 성형외과 병원 메인 페이지 스크린샷을 분석해줘. 다음 정보를 JSON으로 추출해줘: +- foundingYear: 개원 연도 (배너에 "SINCE 2004", "21년 무사고" 등이 있으면) +- certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등) +- socialIcons: 보이는 소셜 미디어 아이콘 (Instagram, YouTube, Facebook, Blog, KakaoTalk 등) +- floatingButtons: 플로팅 상담 버튼 (카카오톡, LINE, WhatsApp 등) +- brandColors: 메인 컬러와 액센트 컬러 (hex) +- slogans: 배너 텍스트나 슬로건 +- serviceCategories: 네비게이션 메뉴에 보이는 시술 카테고리`, + + 'website-doctors': `이 성형외과 의료진 페이지 스크린샷을 분석해줘. JSON으로 추출: +- doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}] + 프로필 사진 옆에 적힌 이름과 전문 분야를 모두 읽어줘.`, + + 'website-surgery': `이 성형외과 시술 안내 페이지를 분석해줘. JSON으로 추출: +- serviceCategories: 보이는 시술 카테고리 목록 (눈성형, 코성형, 가슴성형, 안면윤곽 등) +- certifications: 보이는 인증/수상 마크`, + + 'youtube-landing': `이 YouTube 채널 랜딩 페이지를 분석해줘. JSON으로 추출: +- youtubeStats: {subscribers: "구독자 수 텍스트", videos: "영상 수 텍스트", recentUpload: "최근 업로드 제목"}`, + + 'instagram-landing': `이 Instagram 프로필 페이지를 분석해줘. JSON으로 추출: +- instagramStats: {followers: "팔로워 수 텍스트", posts: "게시물 수 텍스트", bio: "바이오 텍스트"}`, + + 'gangnamunni-page': `이 강남언니 병원 페이지를 분석해줘. JSON으로 추출: +- gangnamUnniStats: {rating: "평점 텍스트", reviews: "리뷰 수 텍스트", doctors: 의사 수(숫자)} +- doctors: [{name: "이름", specialty: "전문 분야"}] (보이는 의사 정보)`, + }; + + const prompt = prompts[pageType] || `이 웹페이지 스크린샷을 분석해줘. 보이는 모든 텍스트와 정보를 JSON으로 추출해줘.`; + + try { + const res = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${geminiKey}`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [{ + parts: [ + { text: `${prompt}\n\nJSON만 반환해줘, 설명 없이.` }, + { inlineData: { mimeType: "image/png", data: base64 } }, + ], + }], + generationConfig: { + temperature: 0.1, + maxOutputTokens: 2048, + }, + }), + }, + ); + + if (!res.ok) return {}; + + const data = await res.json(); + const text = data.candidates?.[0]?.content?.parts?.[0]?.text || ""; + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (jsonMatch) { + return JSON.parse(jsonMatch[0]); + } + return {}; + } catch { + return {}; + } +} + +/** + * Run full vision analysis on all captured screenshots. + */ +export async function runVisionAnalysis( + screenshots: ScreenshotResult[], + geminiKey: string, +): Promise<{ merged: VisionAnalysisResult; perPage: Record }> { + const perPage: Record = {}; + const merged: VisionAnalysisResult = {}; + + const tasks = screenshots.map(async (ss) => { + if (!ss.base64) return; + const analysis = await analyzeScreenshot(ss.base64, ss.id, geminiKey); + perPage[ss.id] = analysis; + + // Merge into combined result + if (analysis.foundingYear && !merged.foundingYear) merged.foundingYear = analysis.foundingYear; + if (analysis.operationYears && !merged.operationYears) merged.operationYears = analysis.operationYears; + if (analysis.doctors?.length) merged.doctors = [...(merged.doctors || []), ...analysis.doctors]; + if (analysis.certifications?.length) merged.certifications = [...new Set([...(merged.certifications || []), ...analysis.certifications])]; + if (analysis.serviceCategories?.length) merged.serviceCategories = [...new Set([...(merged.serviceCategories || []), ...analysis.serviceCategories])]; + if (analysis.socialIcons?.length) merged.socialIcons = analysis.socialIcons; + if (analysis.floatingButtons?.length) merged.floatingButtons = analysis.floatingButtons; + if (analysis.brandColors) merged.brandColors = analysis.brandColors; + if (analysis.slogans?.length) merged.slogans = [...(merged.slogans || []), ...analysis.slogans]; + if (analysis.youtubeStats) merged.youtubeStats = analysis.youtubeStats; + if (analysis.instagramStats) merged.instagramStats = analysis.instagramStats; + if (analysis.gangnamUnniStats) merged.gangnamUnniStats = analysis.gangnamUnniStats; + }); + + await Promise.allSettled(tasks); + return { merged, perPage }; +} diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index e957cb5..53c0bd8 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -2,6 +2,7 @@ import "@supabase/functions-js/edge-runtime.d.ts"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; import type { VerifiedChannels } from "../_shared/verifyHandles.ts"; import { PERPLEXITY_MODEL } from "../_shared/config.ts"; +import { captureAllScreenshots, runVisionAnalysis, type ScreenshotResult } from "../_shared/visionAnalysis.ts"; const corsHeaders = { "Access-Control-Allow-Origin": "*", @@ -355,6 +356,37 @@ Deno.serve(async (req) => { })()); } + // ─── 8. Vision Analysis: Screenshots + Gemini Vision ─── + const GEMINI_API_KEY = Deno.env.get("GEMINI_API_KEY") || ""; + let screenshots: ScreenshotResult[] = []; + + if (FIRECRAWL_API_KEY) { + const mainUrl = row.url || ""; + const siteMap: string[] = row.scrape_data?.siteMap || []; + + tasks.push((async () => { + // Capture screenshots of relevant pages + social channel landings + screenshots = await captureAllScreenshots(mainUrl, siteMap, verified, FIRECRAWL_API_KEY); + + // Run Gemini Vision on captured screenshots + if (GEMINI_API_KEY && screenshots.length > 0) { + const vision = await runVisionAnalysis(screenshots, GEMINI_API_KEY); + channelData.visionAnalysis = vision.merged; + channelData.visionPerPage = vision.perPage; + } + + // Store screenshots (without base64 — just metadata for report) + channelData.screenshots = screenshots.map(ss => ({ + id: ss.id, + url: ss.base64 ? `data:image/png;base64,${ss.base64}` : ss.url, + channel: ss.channel, + capturedAt: ss.capturedAt, + caption: ss.caption, + sourceUrl: ss.sourceUrl, + })); + })()); + } + // ─── Execute all tasks ─── await Promise.allSettled(tasks); diff --git a/supabase/functions/generate-report/index.ts b/supabase/functions/generate-report/index.ts index 642959e..f91b0d2 100644 --- a/supabase/functions/generate-report/index.ts +++ b/supabase/functions/generate-report/index.ts @@ -132,6 +132,19 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)} report.channelEnrichment = channelData; report.enrichedAt = new Date().toISOString(); + // Embed screenshots as evidence for frontend EvidenceGallery + const screenshots = (channelData.screenshots || []) as Record[]; + if (screenshots.length > 0) { + report.screenshots = screenshots.map((ss: Record) => ({ + id: ss.id, + url: ss.url, // data URI or Storage URL + channel: ss.channel, + capturedAt: ss.capturedAt, + caption: ss.caption, + sourceUrl: ss.sourceUrl, + })); + } + // Embed verified handles const igHandles = (verified.instagram || []).filter((v: { verified: boolean }) => v.verified).map((v: { handle: string }) => v.handle); report.socialHandles = { @@ -344,5 +357,27 @@ function buildChannelSummary(channelData: Record, verified: Rec parts.push(`### 네이버 플레이스: ${np.name} (${np.category})`); } + // Vision Analysis (from Gemini Vision on screenshots) + const vision = channelData.visionAnalysis as Record | undefined; + if (vision) { + parts.push("\n### Vision Analysis (스크린샷 기반 추출 데이터)"); + if (vision.foundingYear) parts.push(`- 개원 연도: ${vision.foundingYear}`); + if (vision.operationYears) parts.push(`- 운영 기간: ${vision.operationYears}년`); + const doctors = vision.doctors as { name: string; specialty: string; position?: string }[] | undefined; + if (doctors?.length) { + parts.push(`- 의료진 (스크린샷 확인): ${doctors.map(d => `${d.name}(${d.specialty}${d.position ? ', ' + d.position : ''})`).join(', ')}`); + } + const certs = vision.certifications as string[] | undefined; + if (certs?.length) parts.push(`- 인증: ${certs.join(', ')}`); + const services = vision.serviceCategories as string[] | undefined; + if (services?.length) parts.push(`- 시술 카테고리: ${services.join(', ')}`); + const slogans = vision.slogans as string[] | undefined; + if (slogans?.length) parts.push(`- 슬로건: ${slogans.join(' / ')}`); + const ytStats = vision.youtubeStats as Record | undefined; + if (ytStats) parts.push(`- YouTube (스크린샷): 구독자 ${ytStats.subscribers || '?'}, 영상 ${ytStats.videos || '?'}`); + const igStats = vision.instagramStats as Record | undefined; + if (igStats) parts.push(`- Instagram (스크린샷): 팔로워 ${igStats.followers || '?'}, 게시물 ${igStats.posts || '?'}`); + } + return parts.join("\n"); }