o2o-infinith-demo/supabase/functions/_shared/visionAnalysis.ts

/**
 * Vision Analysis — Gemini 2.0 Flash for screenshot analysis.
 *
 * Captures screenshots of clinic website pages and social channel landings,
 * then uses Gemini Vision to extract structured data (founding year, doctors,
 * certifications, social icons, brand colors, etc.).
 */

import { fetchWithRetry } from "./retry.ts";
import { classifyPageUrl, validateClassifier } from "./urlClassifier.ts";

// Run URL classifier self-test on cold-start
const classifierValidation = validateClassifier();
if (!classifierValidation.pass) {
  console.warn(`[harness] URL classifier self-test FAILED:`, classifierValidation.failures);
}

const FIRECRAWL_BASE = "https://api.firecrawl.dev/v1";

export interface ScreenshotResult {
  id: string;
  url: string;         // Supabase Storage signed URL or data URI
  channel: string;     // 'website', 'youtube', 'instagram', 'gangnamUnni', etc.
  capturedAt: string;
  caption: string;
  sourceUrl: string;   // Original page URL
  base64?: string;     // Raw base64 (for Vision analysis, not stored in report)
}

export interface VisionAnalysisResult {
  foundingYear?: string;
  operationYears?: number;
  doctors?: { name: string; specialty: string; position?: string }[];
  certifications?: string[];
  serviceCategories?: string[];
  socialIcons?: { platform: string; visible: boolean }[];
  floatingButtons?: string[];
  brandColors?: { primary?: string; accent?: string };
  slogans?: string[];
  youtubeStats?: { subscribers?: string; videos?: string; recentUpload?: string };
  instagramStats?: { followers?: string; posts?: string; bio?: string };
  gangnamUnniStats?: { rating?: string; reviews?: string; doctors?: number };
}

/**
 * Capture screenshot of a URL via Firecrawl v2.
 * Returns { screenshotUrl, base64 } — URL from Firecrawl, base64 fetched for Vision analysis.
 *
 * Firecrawl v2 returns a GCS URL (not base64). We download it and convert to base64
 * so Gemini Vision can consume it via inlineData.
 */
// Track per-screenshot errors for debugging
export const screenshotErrors: string[] = [];

async function captureScreenshot(
  url: string,
  firecrawlKey: string,
): Promise<{ screenshotUrl: string; base64: string } | null> {
  try {
    console.log(`[vision] Capturing screenshot: ${url}`);
    console.log(`[vision] Firecrawl key present: ${!!firecrawlKey}, length: ${firecrawlKey?.length}`);

    // Firecrawl v2: use "screenshot@fullPage" format (no separate screenshotOptions)
    const res = await fetchWithRetry(`${FIRECRAWL_BASE}/scrape`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${firecrawlKey}`,
      },
      body: JSON.stringify({
        url,
        formats: ["screenshot@fullPage"],
        waitFor: 5000,
      }),
    }, { label: `firecrawl-screenshot`, timeoutMs: 45000, maxRetries: 1 });

    if (!res.ok) {
      const errText = await res.text().catch(() => "");
      const msg = `Screenshot HTTP ${res.status} for ${url}: ${errText.slice(0, 200)}`;
      console.error(`[vision] ${msg}`);
      screenshotErrors.push(msg);
      return null;
    }
    const data = await res.json();
    const screenshotUrl: string | null = data.data?.screenshot || null;
    if (!screenshotUrl) {
      const msg = `Screenshot OK but no URL for ${url}. Keys: ${JSON.stringify(Object.keys(data.data || {}))}`;
      console.warn(`[vision] ${msg}`);
      screenshotErrors.push(msg);
      return null;
    }

    console.log(`[vision] Screenshot URL received: ${url} → ${screenshotUrl.slice(0, 80)}...`);

    // Download the screenshot image and convert to base64 for Gemini Vision
    const imgRes = await fetchWithRetry(screenshotUrl, undefined, {
      label: `screenshot-download`,
      timeoutMs: 30000,
      maxRetries: 1,
    });
    if (!imgRes.ok) {
      const msg = `Screenshot download failed: HTTP ${imgRes.status} for ${url}`;
      console.error(`[vision] ${msg}`);
      screenshotErrors.push(msg);
      return null;
    }
    const imgBuffer = await imgRes.arrayBuffer();
    const bytes = new Uint8Array(imgBuffer);

    // Convert to base64 — build binary string in chunks, then encode once
    const chunkSize = 8192;
    let binaryStr = "";
    for (let i = 0; i < bytes.length; i += chunkSize) {
      const chunk = bytes.subarray(i, Math.min(i + chunkSize, bytes.length));
      for (let j = 0; j < chunk.length; j++) {
        binaryStr += String.fromCharCode(chunk[j]);
      }
    }
    const base64 = btoa(binaryStr);

    console.log(`[vision] Screenshot captured & converted: ${url} (${Math.round(base64.length / 1024)}KB base64, ${Math.round(bytes.length / 1024)}KB raw)`);

    return { screenshotUrl, base64 };
  } catch (err) {
    const msg = `Screenshot exception for ${url}: ${err instanceof Error ? err.message : String(err)}`;
    console.error(`[vision] ${msg}`);
    screenshotErrors.push(msg);
    return null;
  }
}

/**
 * Find relevant sub-pages from siteMap for additional screenshots.
 * Delegates classification to urlClassifier.ts for testability.
 */
export function findRelevantPages(
  siteMap: string[],
  _baseUrl: string,
): { doctorPage?: string; surgeryPage?: string; aboutPage?: string } {
  const doctorUrls: string[] = [];
  const surgeryUrls: string[] = [];
  const aboutUrls: string[] = [];

  for (const url of siteMap) {
    const category = classifyPageUrl(url);
    if (category === "doctor") doctorUrls.push(url);
    else if (category === "surgery") surgeryUrls.push(url);
    else if (category === "about") aboutUrls.push(url);
  }

  return {
    doctorPage: doctorUrls[0],
    surgeryPage: surgeryUrls[0],
    aboutPage: aboutUrls[0],
  };
}

/**
 * Capture all relevant screenshots for a clinic.
 */
export async function captureAllScreenshots(
  mainUrl: string,
  siteMap: string[],
  verifiedChannels: Record<string, unknown>,
  firecrawlKey: string,
): Promise<ScreenshotResult[]> {
  const results: ScreenshotResult[] = [];
  const pages = findRelevantPages(siteMap, mainUrl);
  const now = new Date().toISOString();

  // Build capture list
  const captureTargets: { id: string; url: string; channel: string; caption: string }[] = [
    { id: 'website-main', url: mainUrl, channel: '웹사이트', caption: '병원 메인 페이지' },
  ];

  if (pages.doctorPage) {
    captureTargets.push({ id: 'website-doctors', url: pages.doctorPage, channel: '웹사이트', caption: '의료진 소개 페이지' });
  }
  if (pages.surgeryPage) {
    captureTargets.push({ id: 'website-surgery', url: pages.surgeryPage, channel: '웹사이트', caption: '시술 안내 페이지' });
  }
  if (pages.aboutPage) {
    captureTargets.push({ id: 'website-about', url: pages.aboutPage, channel: '웹사이트', caption: '병원 소개 페이지' });
  }

  // YouTube channel landing
  const yt = verifiedChannels.youtube as Record<string, unknown> | null;
  if (yt?.verified || yt?.verified === 'unverifiable') {
    const handle = yt.handle as string;
    const ytUrl = handle?.startsWith('UC')
      ? `https://www.youtube.com/channel/${handle}`
      : `https://www.youtube.com/${handle?.startsWith('@') ? handle : `@${handle}`}`;
    captureTargets.push({ id: 'youtube-landing', url: ytUrl, channel: 'YouTube', caption: 'YouTube 채널 랜딩' });
  }

  // Instagram profile
  const igList = (verifiedChannels.instagram || []) as Record<string, unknown>[];
  if (igList.length > 0) {
    const ig = igList[0];
    const handle = (ig.handle as string || '').replace(/^@/, '');
    if (handle) {
      captureTargets.push({ id: 'instagram-landing', url: `https://www.instagram.com/${handle}/`, channel: 'Instagram', caption: `Instagram @${handle} 프로필` });
    }
  }

  // Instagram EN (두 번째 계정이 있는 경우)
  if (igList.length > 1) {
    const igEn = igList[1];
    const handleEn = (igEn.handle as string || '').replace(/^@/, '');
    if (handleEn) {
      captureTargets.push({ id: 'instagram-en', url: `https://www.instagram.com/${handleEn}/`, channel: 'Instagram', caption: `Instagram @${handleEn} (EN)` });
    }
  }

  // Facebook
  const fb = verifiedChannels.facebook as Record<string, unknown> | null;
  if (fb?.verified || fb?.verified === 'unverifiable') {
    const fbUrl = (fb.url as string) || `https://www.facebook.com/${fb.handle}`;
    captureTargets.push({ id: 'facebook-page', url: fbUrl, channel: 'Facebook', caption: 'Facebook 페이지' });
  }

  // TikTok
  const tt = verifiedChannels.tiktok as Record<string, unknown> | null;
  if (tt?.handle) {
    const ttHandle = (tt.handle as string).replace(/^@/, '');
    captureTargets.push({ id: 'tiktok-landing', url: `https://www.tiktok.com/@${ttHandle}`, channel: 'TikTok', caption: `TikTok @${ttHandle}` });
  }

  // 강남언니
  const gu = verifiedChannels.gangnamUnni as Record<string, unknown> | null;
  if (gu?.url) {
    captureTargets.push({ id: 'gangnamunni-page', url: gu.url as string, channel: '강남언니', caption: '강남언니 병원 페이지' });
  }

  // 네이버 블로그
  const nb = verifiedChannels.naverBlog as Record<string, unknown> | null;
  if (nb?.handle) {
    captureTargets.push({ id: 'naver-blog', url: `https://blog.naver.com/${nb.handle}`, channel: '네이버 블로그', caption: '공식 네이버 블로그' });
  }

  // 네이버 플레이스
  const np = verifiedChannels.naverPlace as Record<string, unknown> | null;
  if (np?.url) {
    captureTargets.push({ id: 'naver-place', url: np.url as string, channel: '네이버 플레이스', caption: '네이버 플레이스' });
  }

  // Google Maps
  const gm = verifiedChannels.googleMaps as Record<string, unknown> | null;
  if (gm?.url) {
    captureTargets.push({ id: 'google-maps', url: gm.url as string, channel: 'Google Maps', caption: 'Google Maps' });
  }

  // 전후사진/갤러리 페이지 (siteMap에서 탐색)
  const galleryPage = siteMap.find(u => {
    const l = u.toLowerCase();
    return l.includes('/gallery') || l.includes('/before') || l.includes('/전후')
      || l.includes('/photos') || l.includes('/case') || l.includes('/사례');
  });
  if (galleryPage) {
    captureTargets.push({ id: 'website-gallery', url: galleryPage, channel: '웹사이트', caption: '전후사진/갤러리' });
  }

  // 영문 웹사이트
  const enSite = verifiedChannels.websiteEn as string | null;
  if (enSite) {
    captureTargets.push({ id: 'website-en', url: enSite, channel: '웹사이트(EN)', caption: '영문 웹사이트' });
  }

  console.log(`[vision] Capture targets: ${captureTargets.length} pages (${captureTargets.map(t => t.id).join(', ')})`);

  // Capture all in parallel
  const capturePromises = captureTargets.map(async (target) => {
    const result = await captureScreenshot(target.url, firecrawlKey);
    if (result) {
      results.push({
        id: target.id,
        url: result.screenshotUrl,  // GCS URL from Firecrawl (permanent for ~7 days)
        channel: target.channel,
        capturedAt: now,
        caption: target.caption,
        sourceUrl: target.url,
        base64: result.base64,
      });
    }
  });

  await Promise.allSettled(capturePromises);
  return results;
}

/**
 * Analyze a screenshot with Gemini Vision.
 */
export async function analyzeScreenshot(
  base64: string,
  pageType: string,
  geminiKey: string,
): Promise<VisionAnalysisResult> {
  const prompts: Record<string, string> = {
    'website-main': `이 한국 성형외과/피부과 병원 메인 페이지 스크린샷을 꼼꼼히 분석해줘. 다음 정보를 JSON으로 추출해줘:

- foundingYear: 개원 연도. 반드시 찾아줘! 다음 패턴 중 하나라도 있으면 계산해:
  "22주년" → 2026 - 22 = 2004
  "22년 동안" → 2026 - 22 = 2004
  "SINCE 2004" → 2004
  "20년 전통" → 2026 - 20 = 2006
  "개원 15주년" → 2026 - 15 = 2011
  "2004년개원" → 2004
  "2004년 개원" → 2004
  "2004년개원 이래" → 2004
  "설립 2004년" → 2004
  배너, 이벤트 팝업, 로고 옆 텍스트, 하단 footer, 원장 인사말, 병원 소개 섹션 등 페이지 전체를 꼼꼼히 확인해줘. 특히 스크롤 아래쪽도 확인할 것!
- operationYears: 운영 기간 (숫자만. "22주년"이면 22)
- certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등)
- socialIcons: 보이는 소셜 미디어 아이콘 (Instagram, YouTube, Facebook, Blog, KakaoTalk 등)
- floatingButtons: 플로팅 상담 버튼 (카카오톡, LINE, WhatsApp 등)
- brandColors: 메인 컬러와 액센트 컬러 (hex)
- slogans: 배너 텍스트나 슬로건 (이벤트 텍스트 포함)
- serviceCategories: 네비게이션 메뉴에 보이는 시술 카테고리`,

    'website-doctors': `이 성형외과 의료진 페이지 스크린샷을 분석해줘. JSON으로 추출:
- doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}]
  프로필 사진 옆에 적힌 이름과 전문 분야를 모두 읽어줘.`,

    'website-about': `이 성형외과 병원 소개 페이지를 꼼꼼히 분석해줘. JSON으로 추출:

- foundingYear: 개원 연도. 반드시 찾아줘! 소개 페이지에 자주 나오는 패턴:
  "22주년" → 2026 - 22 = 2004
  "22년 동안" → 2026 - 22 = 2004
  "SINCE 2004" → 2004
  "2004년 개원" → 2004
  "2004년개원" → 2004
  "2004년개원 이래" → 2004
  "설립 2004년" → 2004
  "20년 전통" → 2026 - 20 = 2006
  연혁, 소개글, 대표원장 인사말 등 모든 텍스트를 꼼꼼히 확인해줘.
- operationYears: 운영 기간 (숫자만)
- doctors: [{name: "이름", specialty: "전문 분야", position: "대표원장/원장 등"}]
- certifications: 인증 마크 (JCI, 보건복지부, 의료관광 등)
- slogans: 소개 텍스트, 미션/비전 문구`,

    'website-surgery': `이 성형외과 시술 안내 페이지를 분석해줘. JSON으로 추출:
- serviceCategories: 보이는 시술 카테고리 목록 (눈성형, 코성형, 가슴성형, 안면윤곽 등)
- certifications: 보이는 인증/수상 마크`,

    'youtube-landing': `이 YouTube 채널 랜딩 페이지를 분석해줘. JSON으로 추출:
- youtubeStats: {subscribers: "구독자 수 텍스트", videos: "영상 수 텍스트", recentUpload: "최근 업로드 제목"}`,

    'instagram-landing': `이 Instagram 프로필 페이지를 분석해줘. JSON으로 추출:
- instagramStats: {followers: "팔로워 수 텍스트", posts: "게시물 수 텍스트", bio: "바이오 텍스트"}`,

    'gangnamunni-page': `이 강남언니 병원 페이지를 분석해줘. JSON으로 추출:
- gangnamUnniStats: {rating: "평점 텍스트", reviews: "리뷰 수 텍스트", doctors: 의사 수(숫자)}
- doctors: [{name: "이름", specialty: "전문 분야"}] (보이는 의사 정보)`,

    'facebook-page': `이 Facebook 페이지 스크린샷을 분석해줘. JSON으로 추출:
- facebookStats: {followers: "팔로워 수 텍스트", likes: "좋아요 수 텍스트", rating: "평점 (있으면)", category: "카테고리", recentPost: "최근 게시물 요약"}`,

    'tiktok-landing': `이 TikTok 프로필 스크린샷을 분석해줘. JSON으로 추출:
- tiktokStats: {followers: "팔로워 수", likes: "총 좋아요 수", videos: "영상 수", bio: "바이오 텍스트", recentVideo: "최근 영상 설명"}`,

    'naver-blog': `이 네이버 블로그 스크린샷을 분석해줘. JSON으로 추출:
- naverBlogStats: {neighbors: "이웃 수 텍스트", visitors: "방문자 수 텍스트 (있으면)", recentTitle: "최근 글 제목", blogName: "블로그 이름"}`,

    'naver-place': `이 네이버 플레이스 페이지를 분석해줘. JSON으로 추출:
- naverPlaceStats: {rating: "별점", reviews: "리뷰 수", visitorReviews: "방문자 리뷰 수", address: "주소", hours: "영업시간", category: "카테고리"}`,

    'google-maps': `이 Google Maps 페이지를 분석해줘. JSON으로 추출:
- googleMapsStats: {rating: "별점", reviews: "리뷰 수", address: "주소", hours: "영업시간 (있으면)", photos: "사진 수 (있으면)"}`,

    'website-gallery': `이 성형외과 전후사진/갤러리 페이지를 분석해줘. JSON으로 추출:
- galleryStats: {estimatedPhotoCount: 추정 사진 수(숫자), categories: ["보이는 시술 카테고리 목록"]}`,

    'website-en': `Analyze this English version of a Korean plastic surgery clinic website. Extract JSON:
- enSiteStats: {languages: ["supported languages visible"], targetMarkets: ["target countries/regions mentioned"], keyServices: ["listed services in English"]}`,

    'instagram-en': `이 Instagram 프로필(글로벌/영문 계정)을 분석해줘. JSON으로 추출:
- instagramEnStats: {followers: "팔로워 수 텍스트", posts: "게시물 수 텍스트", bio: "바이오 텍스트", language: "주 사용 언어"}`,
  };

  const prompt = prompts[pageType] || `이 웹페이지 스크린샷을 분석해줘. 보이는 모든 텍스트와 정보를 JSON으로 추출해줘.`;

  try {
    console.log(`[vision] Analyzing screenshot: ${pageType} (${Math.round(base64.length / 1024)}KB)`);
    const res = await fetchWithRetry(
      `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${geminiKey}`,
      {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          contents: [{
            parts: [
              { text: `${prompt}\n\nJSON만 반환해줘, 설명 없이.` },
              { inlineData: { mimeType: "image/png", data: base64 } },
            ],
          }],
          generationConfig: {
            temperature: 0.1,
            maxOutputTokens: 2048,
          },
        }),
      },
      { label: `gemini-vision:${pageType}`, timeoutMs: 45000, maxRetries: 1 },
    );

    if (!res.ok) {
      const errText = await res.text().catch(() => "");
      console.error(`[vision] Gemini failed for ${pageType}: HTTP ${res.status} — ${errText.slice(0, 200)}`);
      return {};
    }

    const data = await res.json();
    const text = data.candidates?.[0]?.content?.parts?.[0]?.text || "";
    const jsonMatch = text.match(/\{[\s\S]*\}/);
    if (jsonMatch) {
      const result = JSON.parse(jsonMatch[0]);
      console.log(`[vision] Gemini result for ${pageType}:`, JSON.stringify(result).slice(0, 300));
      return result;
    }
    console.warn(`[vision] Gemini returned no JSON for ${pageType}. Raw: ${text.slice(0, 200)}`);
    return {};
  } catch (err) {
    console.error(`[vision] Gemini error for ${pageType}:`, err instanceof Error ? err.message : err);
    return {};
  }
}

/**
 * Run full vision analysis on all captured screenshots.
 */
export async function runVisionAnalysis(
  screenshots: ScreenshotResult[],
  geminiKey: string,
): Promise<{ merged: VisionAnalysisResult; perPage: Record<string, VisionAnalysisResult> }> {
  const perPage: Record<string, VisionAnalysisResult> = {};
  const merged: VisionAnalysisResult = {};

  const tasks = screenshots.map(async (ss) => {
    if (!ss.base64) return;
    const analysis = await analyzeScreenshot(ss.base64, ss.id, geminiKey);
    perPage[ss.id] = analysis;

    // Merge into combined result
    if (analysis.foundingYear && !merged.foundingYear) merged.foundingYear = analysis.foundingYear;
    if (analysis.operationYears && !merged.operationYears) merged.operationYears = analysis.operationYears;
    if (analysis.doctors?.length) merged.doctors = [...(merged.doctors || []), ...analysis.doctors];
    if (analysis.certifications?.length) merged.certifications = [...new Set([...(merged.certifications || []), ...analysis.certifications])];
    if (analysis.serviceCategories?.length) merged.serviceCategories = [...new Set([...(merged.serviceCategories || []), ...analysis.serviceCategories])];
    if (analysis.socialIcons?.length) merged.socialIcons = analysis.socialIcons;
    if (analysis.floatingButtons?.length) merged.floatingButtons = analysis.floatingButtons;
    if (analysis.brandColors) merged.brandColors = analysis.brandColors;
    if (analysis.slogans?.length) merged.slogans = [...(merged.slogans || []), ...analysis.slogans];
    if (analysis.youtubeStats) merged.youtubeStats = analysis.youtubeStats;
    if (analysis.instagramStats) merged.instagramStats = analysis.instagramStats;
    if (analysis.gangnamUnniStats) merged.gangnamUnniStats = analysis.gangnamUnniStats;
  });

  await Promise.allSettled(tasks);
  return { merged, perPage };
}