353 lines
15 KiB
TypeScript
353 lines
15 KiB
TypeScript
/**
|
|
* Vision Analysis — Gemini 2.0 Flash for screenshot analysis.
|
|
*
|
|
* Captures screenshots of clinic website pages and social channel landings,
|
|
* then uses Gemini Vision to extract structured data (founding year, doctors,
|
|
* certifications, social icons, brand colors, etc.).
|
|
*/
|
|
|
|
import { fetchWithRetry } from "./retry.ts";
|
|
|
|
const FIRECRAWL_BASE = "https://api.firecrawl.dev/v1";
|
|
|
|
export interface ScreenshotResult {
|
|
id: string;
|
|
url: string; // Supabase Storage signed URL or data URI
|
|
channel: string; // 'website', 'youtube', 'instagram', 'gangnamUnni', etc.
|
|
capturedAt: string;
|
|
caption: string;
|
|
sourceUrl: string; // Original page URL
|
|
base64?: string; // Raw base64 (for Vision analysis, not stored in report)
|
|
}
|
|
|
|
export interface VisionAnalysisResult {
|
|
foundingYear?: string;
|
|
operationYears?: number;
|
|
doctors?: { name: string; specialty: string; position?: string }[];
|
|
certifications?: string[];
|
|
serviceCategories?: string[];
|
|
socialIcons?: { platform: string; visible: boolean }[];
|
|
floatingButtons?: string[];
|
|
brandColors?: { primary?: string; accent?: string };
|
|
slogans?: string[];
|
|
youtubeStats?: { subscribers?: string; videos?: string; recentUpload?: string };
|
|
instagramStats?: { followers?: string; posts?: string; bio?: string };
|
|
gangnamUnniStats?: { rating?: string; reviews?: string; doctors?: number };
|
|
}
|
|
|
|
/**
|
|
* Capture screenshot of a URL via Firecrawl v2.
|
|
* Returns { screenshotUrl, base64 } — URL from Firecrawl, base64 fetched for Vision analysis.
|
|
*
|
|
* Firecrawl v2 returns a GCS URL (not base64). We download it and convert to base64
|
|
* so Gemini Vision can consume it via inlineData.
|
|
*/
|
|
async function captureScreenshot(
|
|
url: string,
|
|
firecrawlKey: string,
|
|
): Promise<{ screenshotUrl: string; base64: string } | null> {
|
|
try {
|
|
console.log(`[vision] Capturing screenshot: ${url}`);
|
|
// Firecrawl v2: use "screenshot@fullPage" format (no separate screenshotOptions)
|
|
const res = await fetchWithRetry(`${FIRECRAWL_BASE}/scrape`, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${firecrawlKey}`,
|
|
},
|
|
body: JSON.stringify({
|
|
url,
|
|
formats: ["screenshot@fullPage"],
|
|
waitFor: 5000,
|
|
}),
|
|
}, { label: `firecrawl-screenshot`, timeoutMs: 45000, maxRetries: 1 });
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text().catch(() => "");
|
|
console.error(`[vision] Screenshot failed for ${url}: HTTP ${res.status} — ${errText.slice(0, 200)}`);
|
|
return null;
|
|
}
|
|
const data = await res.json();
|
|
const screenshotUrl: string | null = data.data?.screenshot || null;
|
|
if (!screenshotUrl) {
|
|
console.warn(`[vision] Screenshot response OK but no screenshot URL for ${url}. Keys: ${JSON.stringify(Object.keys(data.data || {}))}`);
|
|
return null;
|
|
}
|
|
|
|
console.log(`[vision] Screenshot URL received: ${url} → ${screenshotUrl.slice(0, 80)}...`);
|
|
|
|
// Download the screenshot image and convert to base64 for Gemini Vision
|
|
const imgRes = await fetchWithRetry(screenshotUrl, undefined, {
|
|
label: `screenshot-download`,
|
|
timeoutMs: 30000,
|
|
maxRetries: 1,
|
|
});
|
|
if (!imgRes.ok) {
|
|
console.error(`[vision] Failed to download screenshot image: HTTP ${imgRes.status}`);
|
|
return null;
|
|
}
|
|
const imgBuffer = await imgRes.arrayBuffer();
|
|
const bytes = new Uint8Array(imgBuffer);
|
|
|
|
// Use Deno's standard base64 encoding (efficient for large binaries)
|
|
const { encode: encodeBase64 } = await import("https://deno.land/std@0.224.0/encoding/base64.ts");
|
|
const base64 = encodeBase64(bytes);
|
|
|
|
console.log(`[vision] Screenshot captured & converted: ${url} (${Math.round(base64.length / 1024)}KB base64, ${Math.round(bytes.length / 1024)}KB raw)`);
|
|
|
|
return { screenshotUrl, base64 };
|
|
} catch (err) {
|
|
console.error(`[vision] Screenshot error for ${url}:`, err instanceof Error ? err.message : err);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find relevant sub-pages from siteMap for additional screenshots.
|
|
*/
|
|
export function findRelevantPages(
|
|
siteMap: string[],
|
|
baseUrl: string,
|
|
): { doctorPage?: string; surgeryPage?: string; aboutPage?: string } {
|
|
const result: { doctorPage?: string; surgeryPage?: string; aboutPage?: string } = {};
|
|
|
|
for (const url of siteMap) {
|
|
const lower = url.toLowerCase();
|
|
if (!result.doctorPage && (
|
|
lower.includes('/doctor') || lower.includes('/team') || lower.includes('/staff') ||
|
|
lower.includes('/specialist') || lower.includes('/professor') ||
|
|
lower.includes('/의료진') || lower.includes('/원장')
|
|
)) {
|
|
result.doctorPage = url;
|
|
}
|
|
if (!result.surgeryPage && (
|
|
lower.includes('/surgery') || lower.includes('/service') || lower.includes('/procedure') ||
|
|
lower.includes('/treatment') || lower.includes('/시술') || lower.includes('/수술')
|
|
)) {
|
|
result.surgeryPage = url;
|
|
}
|
|
if (!result.aboutPage && (
|
|
lower.includes('/about') || lower.includes('/intro') || lower.includes('/소개') ||
|
|
lower.includes('/greeting')
|
|
)) {
|
|
result.aboutPage = url;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Capture all relevant screenshots for a clinic.
|
|
*/
|
|
export async function captureAllScreenshots(
|
|
mainUrl: string,
|
|
siteMap: string[],
|
|
verifiedChannels: Record<string, unknown>,
|
|
firecrawlKey: string,
|
|
): Promise<ScreenshotResult[]> {
|
|
const results: ScreenshotResult[] = [];
|
|
const pages = findRelevantPages(siteMap, mainUrl);
|
|
const now = new Date().toISOString();
|
|
|
|
// Build capture list
|
|
const captureTargets: { id: string; url: string; channel: string; caption: string }[] = [
|
|
{ id: 'website-main', url: mainUrl, channel: '웹사이트', caption: '병원 메인 페이지' },
|
|
];
|
|
|
|
if (pages.doctorPage) {
|
|
captureTargets.push({ id: 'website-doctors', url: pages.doctorPage, channel: '웹사이트', caption: '의료진 소개 페이지' });
|
|
}
|
|
if (pages.surgeryPage) {
|
|
captureTargets.push({ id: 'website-surgery', url: pages.surgeryPage, channel: '웹사이트', caption: '시술 안내 페이지' });
|
|
}
|
|
if (pages.aboutPage) {
|
|
captureTargets.push({ id: 'website-about', url: pages.aboutPage, channel: '웹사이트', caption: '병원 소개 페이지' });
|
|
}
|
|
|
|
// YouTube channel landing
|
|
const yt = verifiedChannels.youtube as Record<string, unknown> | null;
|
|
if (yt?.verified || yt?.verified === 'unverifiable') {
|
|
const handle = yt.handle as string;
|
|
const ytUrl = handle?.startsWith('UC')
|
|
? `https://www.youtube.com/channel/${handle}`
|
|
: `https://www.youtube.com/${handle?.startsWith('@') ? handle : `@${handle}`}`;
|
|
captureTargets.push({ id: 'youtube-landing', url: ytUrl, channel: 'YouTube', caption: 'YouTube 채널 랜딩' });
|
|
}
|
|
|
|
// Instagram profile
|
|
const igList = (verifiedChannels.instagram || []) as Record<string, unknown>[];
|
|
if (igList.length > 0) {
|
|
const ig = igList[0];
|
|
const handle = (ig.handle as string || '').replace(/^@/, '');
|
|
if (handle) {
|
|
captureTargets.push({ id: 'instagram-landing', url: `https://www.instagram.com/${handle}/`, channel: 'Instagram', caption: `Instagram @${handle} 프로필` });
|
|
}
|
|
}
|
|
|
|
// 강남언니
|
|
const gu = verifiedChannels.gangnamUnni as Record<string, unknown> | null;
|
|
if (gu?.url) {
|
|
captureTargets.push({ id: 'gangnamunni-page', url: gu.url as string, channel: '강남언니', caption: '강남언니 병원 페이지' });
|
|
}
|
|
|
|
// Capture all in parallel (max 6 concurrent)
|
|
const capturePromises = captureTargets.map(async (target) => {
|
|
const result = await captureScreenshot(target.url, firecrawlKey);
|
|
if (result) {
|
|
results.push({
|
|
id: target.id,
|
|
url: result.screenshotUrl, // GCS URL from Firecrawl (permanent for ~7 days)
|
|
channel: target.channel,
|
|
capturedAt: now,
|
|
caption: target.caption,
|
|
sourceUrl: target.url,
|
|
base64: result.base64,
|
|
});
|
|
}
|
|
});
|
|
|
|
await Promise.allSettled(capturePromises);
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Analyze a screenshot with Gemini Vision.
|
|
*/
|
|
export async function analyzeScreenshot(
|
|
base64: string,
|
|
pageType: string,
|
|
geminiKey: string,
|
|
): Promise<VisionAnalysisResult> {
|
|
const prompts: Record<string, string> = {
|
|
'website-main': `이 한국 성형외과/피부과 병원 메인 페이지 스크린샷을 꼼꼼히 분석해줘. 다음 정보를 JSON으로 추출해줘:
|
|
|
|
- foundingYear: 개원 연도. 반드시 찾아줘! 다음 패턴 중 하나라도 있으면 계산해:
|
|
"22주년" → 2026 - 22 = 2004
|
|
"22년 동안" → 2026 - 22 = 2004
|
|
"SINCE 2004" → 2004
|
|
"20년 전통" → 2026 - 20 = 2006
|
|
"개원 15주년" → 2026 - 15 = 2011
|
|
배너, 이벤트 팝업, 로고 옆 텍스트, 하단 footer 등 모든 곳을 확인해줘.
|
|
- operationYears: 운영 기간 (숫자만. "22주년"이면 22)
|
|
- certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등)
|
|
- socialIcons: 보이는 소셜 미디어 아이콘 (Instagram, YouTube, Facebook, Blog, KakaoTalk 등)
|
|
- floatingButtons: 플로팅 상담 버튼 (카카오톡, LINE, WhatsApp 등)
|
|
- brandColors: 메인 컬러와 액센트 컬러 (hex)
|
|
- slogans: 배너 텍스트나 슬로건 (이벤트 텍스트 포함)
|
|
- serviceCategories: 네비게이션 메뉴에 보이는 시술 카테고리`,
|
|
|
|
'website-doctors': `이 성형외과 의료진 페이지 스크린샷을 분석해줘. JSON으로 추출:
|
|
- doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}]
|
|
프로필 사진 옆에 적힌 이름과 전문 분야를 모두 읽어줘.`,
|
|
|
|
'website-about': `이 성형외과 병원 소개 페이지를 꼼꼼히 분석해줘. JSON으로 추출:
|
|
|
|
- foundingYear: 개원 연도. 반드시 찾아줘! 소개 페이지에 자주 나오는 패턴:
|
|
"22주년" → 2026 - 22 = 2004
|
|
"22년 동안" → 2026 - 22 = 2004
|
|
"SINCE 2004" → 2004
|
|
"2004년 개원" → 2004
|
|
"20년 전통" → 2026 - 20 = 2006
|
|
연혁, 소개글, 대표원장 인사말 등 모든 텍스트를 꼼꼼히 확인해줘.
|
|
- operationYears: 운영 기간 (숫자만)
|
|
- doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}]
|
|
- certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등)
|
|
- slogans: 소개 텍스트, 미션/비전 문구`,
|
|
|
|
'website-surgery': `이 성형외과 시술 안내 페이지를 분석해줘. JSON으로 추출:
|
|
- serviceCategories: 보이는 시술 카테고리 목록 (눈성형, 코성형, 가슴성형, 안면윤곽 등)
|
|
- certifications: 보이는 인증/수상 마크`,
|
|
|
|
'youtube-landing': `이 YouTube 채널 랜딩 페이지를 분석해줘. JSON으로 추출:
|
|
- youtubeStats: {subscribers: "구독자 수 텍스트", videos: "영상 수 텍스트", recentUpload: "최근 업로드 제목"}`,
|
|
|
|
'instagram-landing': `이 Instagram 프로필 페이지를 분석해줘. JSON으로 추출:
|
|
- instagramStats: {followers: "팔로워 수 텍스트", posts: "게시물 수 텍스트", bio: "바이오 텍스트"}`,
|
|
|
|
'gangnamunni-page': `이 강남언니 병원 페이지를 분석해줘. JSON으로 추출:
|
|
- gangnamUnniStats: {rating: "평점 텍스트", reviews: "리뷰 수 텍스트", doctors: 의사 수(숫자)}
|
|
- doctors: [{name: "이름", specialty: "전문 분야"}] (보이는 의사 정보)`,
|
|
};
|
|
|
|
const prompt = prompts[pageType] || `이 웹페이지 스크린샷을 분석해줘. 보이는 모든 텍스트와 정보를 JSON으로 추출해줘.`;
|
|
|
|
try {
|
|
console.log(`[vision] Analyzing screenshot: ${pageType} (${Math.round(base64.length / 1024)}KB)`);
|
|
const res = await fetchWithRetry(
|
|
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${geminiKey}`,
|
|
{
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
contents: [{
|
|
parts: [
|
|
{ text: `${prompt}\n\nJSON만 반환해줘, 설명 없이.` },
|
|
{ inlineData: { mimeType: "image/png", data: base64 } },
|
|
],
|
|
}],
|
|
generationConfig: {
|
|
temperature: 0.1,
|
|
maxOutputTokens: 2048,
|
|
},
|
|
}),
|
|
},
|
|
{ label: `gemini-vision:${pageType}`, timeoutMs: 45000, maxRetries: 1 },
|
|
);
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text().catch(() => "");
|
|
console.error(`[vision] Gemini failed for ${pageType}: HTTP ${res.status} — ${errText.slice(0, 200)}`);
|
|
return {};
|
|
}
|
|
|
|
const data = await res.json();
|
|
const text = data.candidates?.[0]?.content?.parts?.[0]?.text || "";
|
|
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
if (jsonMatch) {
|
|
const result = JSON.parse(jsonMatch[0]);
|
|
console.log(`[vision] Gemini result for ${pageType}:`, JSON.stringify(result).slice(0, 300));
|
|
return result;
|
|
}
|
|
console.warn(`[vision] Gemini returned no JSON for ${pageType}. Raw: ${text.slice(0, 200)}`);
|
|
return {};
|
|
} catch (err) {
|
|
console.error(`[vision] Gemini error for ${pageType}:`, err instanceof Error ? err.message : err);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Run full vision analysis on all captured screenshots.
|
|
*/
|
|
export async function runVisionAnalysis(
|
|
screenshots: ScreenshotResult[],
|
|
geminiKey: string,
|
|
): Promise<{ merged: VisionAnalysisResult; perPage: Record<string, VisionAnalysisResult> }> {
|
|
const perPage: Record<string, VisionAnalysisResult> = {};
|
|
const merged: VisionAnalysisResult = {};
|
|
|
|
const tasks = screenshots.map(async (ss) => {
|
|
if (!ss.base64) return;
|
|
const analysis = await analyzeScreenshot(ss.base64, ss.id, geminiKey);
|
|
perPage[ss.id] = analysis;
|
|
|
|
// Merge into combined result
|
|
if (analysis.foundingYear && !merged.foundingYear) merged.foundingYear = analysis.foundingYear;
|
|
if (analysis.operationYears && !merged.operationYears) merged.operationYears = analysis.operationYears;
|
|
if (analysis.doctors?.length) merged.doctors = [...(merged.doctors || []), ...analysis.doctors];
|
|
if (analysis.certifications?.length) merged.certifications = [...new Set([...(merged.certifications || []), ...analysis.certifications])];
|
|
if (analysis.serviceCategories?.length) merged.serviceCategories = [...new Set([...(merged.serviceCategories || []), ...analysis.serviceCategories])];
|
|
if (analysis.socialIcons?.length) merged.socialIcons = analysis.socialIcons;
|
|
if (analysis.floatingButtons?.length) merged.floatingButtons = analysis.floatingButtons;
|
|
if (analysis.brandColors) merged.brandColors = analysis.brandColors;
|
|
if (analysis.slogans?.length) merged.slogans = [...(merged.slogans || []), ...analysis.slogans];
|
|
if (analysis.youtubeStats) merged.youtubeStats = analysis.youtubeStats;
|
|
if (analysis.instagramStats) merged.instagramStats = analysis.instagramStats;
|
|
if (analysis.gangnamUnniStats) merged.gangnamUnniStats = analysis.gangnamUnniStats;
|
|
});
|
|
|
|
await Promise.allSettled(tasks);
|
|
return { merged, perPage };
|
|
}
|