o2o-infinith-demo/supabase/functions/collect-channel-data/index.ts

import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import type { VerifiedChannels } from "../_shared/verifyHandles.ts";
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
import { captureAllScreenshots, runVisionAnalysis, screenshotErrors, type ScreenshotResult } from "../_shared/visionAnalysis.ts";
import { fetchWithRetry, fetchJsonWithRetry, wrapChannelTask, type ChannelTaskResult } from "../_shared/retry.ts";
import { searchGooglePlace } from "../_shared/googlePlaces.ts";
import { extractFoundingYear, validateFoundingYearExtractor } from "../_shared/foundingYearExtractor.ts";
import { validateClassifier } from "../_shared/urlClassifier.ts";
import { validateDataQuality } from "../_shared/dataQuality.ts";

// ─── Harness Self-Tests (cold-start) ───
const harnessResults = {
  classifier: validateClassifier(),
  foundingYear: validateFoundingYearExtractor(),
  dataQuality: validateDataQuality(),
};
for (const [name, result] of Object.entries(harnessResults)) {
  if (!result.pass) {
    console.warn(`[harness] ${name} self-test FAILED:`, result.failures);
  }
}

const corsHeaders = {
  "Access-Control-Allow-Origin": "*",
  "Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
};

const APIFY_BASE = "https://api.apify.com/v2";

interface CollectRequest {
  reportId: string;
  clinicId?: string;  // V3: clinic UUID
  runId?: string;     // V3: analysis_run UUID
}

async function runApifyActor(actorId: string, input: Record<string, unknown>, token: string): Promise<unknown[]> {
  const res = await fetchWithRetry(
    `${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`,
    { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(input) },
    { maxRetries: 1, timeoutMs: 130000, label: `apify:${actorId.split('~')[1] || actorId}` },
  );
  if (!res.ok) throw new Error(`Apify ${actorId} returned ${res.status}`);
  const run = await res.json();
  const datasetId = run.data?.defaultDatasetId;
  if (!datasetId) throw new Error(`Apify ${actorId}: no dataset returned`);
  const itemsRes = await fetchWithRetry(
    `${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`,
    undefined,
    { maxRetries: 1, timeoutMs: 30000, label: `apify-dataset:${actorId.split('~')[1] || actorId}` },
  );
  return itemsRes.json();
}

// ─── Health Score Calculator ───────────────────────────────────────────────
// Computes a 0-100 health score per channel based on follower/rating/review
// benchmarks for Korean plastic surgery clinics. Used in channel_snapshots.
//
// Design: linear interpolation between (floor, floorScore) and (ceil, ceilScore).
// E.g., Instagram at 5K followers → mid-range between 1K=40 and 10K=70.

function lerp(value: number, low: number, high: number, scoreLow: number, scoreHigh: number): number {
  if (value <= low) return scoreLow;
  if (value >= high) return scoreHigh;
  return Math.round(scoreLow + (value - low) / (high - low) * (scoreHigh - scoreLow));
}

function computeHealthScore(channel: string, data: Record<string, unknown>): number {
  const n = (v: unknown): number => typeof v === 'number' ? v : (parseInt(String(v || 0)) || 0);

  switch (channel) {
    case 'instagram': {
      // followers: 0→20, 1K→40, 10K→70, 50K→90, 100K+→100
      const followers = n(data.followers);
      const fScore = followers >= 100_000 ? 100
        : followers >= 50_000 ? lerp(followers, 50_000, 100_000, 90, 100)
        : followers >= 10_000 ? lerp(followers, 10_000, 50_000, 70, 90)
        : followers >= 1_000  ? lerp(followers, 1_000, 10_000, 40, 70)
        : lerp(followers, 0, 1_000, 20, 40);
      // posts bonus: +5 if active (≥ 50 posts)
      const posts = n(data.posts);
      return Math.min(fScore + (posts >= 50 ? 5 : 0), 100);
    }
    case 'youtube': {
      // subscribers: 0→20, 500→40, 5K→65, 50K→85, 200K+→100
      const subs = n(data.subscribers);
      const sScore = subs >= 200_000 ? 100
        : subs >= 50_000 ? lerp(subs, 50_000, 200_000, 85, 100)
        : subs >= 5_000  ? lerp(subs, 5_000, 50_000, 65, 85)
        : subs >= 500    ? lerp(subs, 500, 5_000, 40, 65)
        : lerp(subs, 0, 500, 20, 40);
      // video count bonus: +5 if ≥ 20 videos
      const videos = n(data.totalVideos);
      return Math.min(sScore + (videos >= 20 ? 5 : 0), 100);
    }
    case 'facebook': {
      // followers: 0→20, 500→35, 5K→60, 20K→80, 50K+→100
      const followers = n(data.followers);
      return followers >= 50_000 ? 100
        : followers >= 20_000 ? lerp(followers, 20_000, 50_000, 80, 100)
        : followers >= 5_000  ? lerp(followers, 5_000, 20_000, 60, 80)
        : followers >= 500    ? lerp(followers, 500, 5_000, 35, 60)
        : lerp(followers, 0, 500, 20, 35);
    }
    case 'gangnamUnni': {
      // rating /10: max 70pts. reviews: 0→0, 100→10, 1000→20, 10000→30
      const rating = n(data.rating);
      const reviews = n(data.totalReviews);
      const rScore = Math.round(Math.min(rating / 10, 1.0) * 70);
      const rvScore = reviews >= 10_000 ? 30 : reviews >= 1_000 ? lerp(reviews, 1_000, 10_000, 20, 30)
        : reviews >= 100 ? lerp(reviews, 100, 1_000, 10, 20)
        : lerp(reviews, 0, 100, 0, 10);
      return Math.min(rScore + rvScore, 100);
    }
    case 'googleMaps': {
      // rating /5: max 60pts. reviews: 0→0, 50→10, 500→25, 5000→40
      const rating = n(data.rating);
      const reviews = n(data.reviewCount);
      const rScore = Math.round(Math.min(rating / 5, 1.0) * 60);
      const rvScore = reviews >= 5_000 ? 40 : reviews >= 500 ? lerp(reviews, 500, 5_000, 25, 40)
        : reviews >= 50 ? lerp(reviews, 50, 500, 10, 25)
        : lerp(reviews, 0, 50, 0, 10);
      return Math.min(rScore + rvScore, 100);
    }
    case 'naverBlog': {
      // Presence-based: official handle = 50, mention count bonus up to +30, activity +20
      const hasHandle = Boolean(data.officialBlogHandle);
      const total = n(data.totalResults);
      const mentionScore = total >= 1000 ? 30 : total >= 100 ? lerp(total, 100, 1000, 15, 30) : lerp(total, 0, 100, 0, 15);
      return Math.min((hasHandle ? 50 : 20) + mentionScore, 100);
    }
    case 'naverPlace': {
      // rating /5: max 60pts. reviews: 0→0, 100→15, 1000→30, 10000→40
      const rating = n(data.rating);
      const reviews = n(data.reviewCount) || n(data.reviews);
      const rScore = Math.round(Math.min(rating / 5, 1.0) * 60);
      const rvScore = reviews >= 10_000 ? 40 : reviews >= 1_000 ? lerp(reviews, 1_000, 10_000, 30, 40)
        : reviews >= 100 ? lerp(reviews, 100, 1_000, 15, 30)
        : lerp(reviews, 0, 100, 0, 15);
      return Math.min(rScore + rvScore, 100);
    }
    default:
      return 50; // Unknown channel — neutral score
  }
}

/**
 * Phase 2: Collect Channel Data
 *
 * Uses verified handles from Phase 1 (stored in DB) to collect ALL raw data
 * from each channel in parallel. Also runs market analysis via Perplexity.
 */
Deno.serve(async (req) => {
  if (req.method === "OPTIONS") {
    return new Response("ok", { headers: corsHeaders });
  }

  try {
    const { reportId, clinicId: inputClinicId, runId: inputRunId } = (await req.json()) as CollectRequest;
    if (!reportId) throw new Error("reportId is required");

    // Read Phase 1 results from DB
    const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
    const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
    const supabase = createClient(supabaseUrl, supabaseKey);

    const { data: row, error: fetchError } = await supabase
      .from("marketing_reports")
      .select("*")
      .eq("id", reportId)
      .single();

    if (fetchError || !row) throw new Error(`Report not found: ${fetchError?.message}`);

    const verified = row.verified_channels as VerifiedChannels;
    const clinicName = row.clinic_name || "";
    const address = row.scrape_data?.clinic?.address || "";
    const services: string[] = row.scrape_data?.clinic?.services || [];

    await supabase.from("marketing_reports").update({ status: "collecting" }).eq("id", reportId);

    const APIFY_TOKEN = Deno.env.get("APIFY_API_TOKEN") || "";
    const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY") || "";
    const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY") || "";
    const PERPLEXITY_API_KEY = Deno.env.get("PERPLEXITY_API_KEY") || "";
    const NAVER_CLIENT_ID = Deno.env.get("NAVER_CLIENT_ID") || "";
    const NAVER_CLIENT_SECRET = Deno.env.get("NAVER_CLIENT_SECRET") || "";
    const GOOGLE_PLACES_API_KEY = Deno.env.get("GOOGLE_PLACES_API_KEY") || "";

    const channelData: Record<string, unknown> = {};
    const analysisData: Record<string, unknown> = {};
    const channelTasks: Promise<ChannelTaskResult>[] = [];

    // ─── 1. Instagram (multi-account) — try ALL candidates including unverified/unverifiable ───
    const igCandidates = (verified.instagram || []).filter((v: Record<string, unknown>) => v.handle && v.verified !== false);
    if (APIFY_TOKEN && igCandidates.length > 0) {
      channelTasks.push(wrapChannelTask("instagram", async () => {
        const accounts: Record<string, unknown>[] = [];
        for (const ig of igCandidates) {
          const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN);
          const profile = (items as Record<string, unknown>[])[0];
          if (profile && !profile.error) {
            accounts.push({
              username: profile.username,
              followers: profile.followersCount,
              following: profile.followsCount,
              posts: profile.postsCount,
              bio: profile.biography,
              isBusinessAccount: profile.isBusinessAccount,
              externalUrl: profile.externalUrl,
              igtvVideoCount: profile.igtvVideoCount,
              latestPosts: ((profile.latestPosts as Record<string, unknown>[]) || []).slice(0, 12).map(p => ({
                type: p.type, likes: p.likesCount, comments: p.commentsCount,
                caption: p.caption, timestamp: p.timestamp,
              })),
            });
          }
        }
        if (accounts.length > 0) {
          channelData.instagramAccounts = accounts;
          channelData.instagram = accounts[0];
        } else {
          throw new Error("No Instagram profiles found via Apify");
        }
      }));
    }

    // ─── 1b. Instagram Posts (최근 20개 포스트 상세) ───
    const igPrimaryHandle = igCandidates[0]?.handle as string | undefined;
    if (APIFY_TOKEN && igPrimaryHandle) {
      channelTasks.push(wrapChannelTask("instagramPosts", async () => {
        const handle = igPrimaryHandle.replace(/^@/, '');
        const items = await runApifyActor(
          "apify~instagram-post-scraper",
          { directUrls: [`https://www.instagram.com/${handle}/`], resultsLimit: 20 },
          APIFY_TOKEN,
        );
        const posts = (items as Record<string, unknown>[]).map(p => ({
          id: p.id,
          type: p.type,
          shortCode: p.shortCode,
          url: p.url,
          caption: ((p.caption as string) || '').slice(0, 500),
          hashtags: p.hashtags || [],
          mentions: p.mentions || [],
          likesCount: (p.likesCount as number) || 0,
          commentsCount: (p.commentsCount as number) || 0,
          timestamp: p.timestamp,
          displayUrl: p.displayUrl,
        }));
        const totalLikes = posts.reduce((sum, p) => sum + p.likesCount, 0);
        const totalComments = posts.reduce((sum, p) => sum + p.commentsCount, 0);
        channelData.instagramPosts = {
          posts,
          totalPosts: posts.length,
          avgLikes: posts.length > 0 ? Math.round(totalLikes / posts.length) : 0,
          avgComments: posts.length > 0 ? Math.round(totalComments / posts.length) : 0,
        };
      }));
    }

    // ─── 1c. Instagram Reels (최근 15개 릴스 상세) ───
    if (APIFY_TOKEN && igPrimaryHandle) {
      channelTasks.push(wrapChannelTask("instagramReels", async () => {
        const handle = igPrimaryHandle.replace(/^@/, '');
        const items = await runApifyActor(
          "apify~instagram-reel-scraper",
          { directUrls: [`https://www.instagram.com/${handle}/reels/`], resultsLimit: 15 },
          APIFY_TOKEN,
        );
        const reels = (items as Record<string, unknown>[]).map(r => ({
          id: r.id,
          shortCode: r.shortCode,
          url: r.url,
          caption: ((r.caption as string) || '').slice(0, 500),
          hashtags: r.hashtags || [],
          likesCount: (r.likesCount as number) || 0,
          commentsCount: (r.commentsCount as number) || 0,
          videoViewCount: (r.videoViewCount as number) || 0,
          videoPlayCount: (r.videoPlayCount as number) || 0,
          videoDuration: (r.videoDuration as number) || 0,
          timestamp: r.timestamp,
          musicInfo: r.musicInfo || null,
        }));
        const totalViews = reels.reduce((sum, r) => sum + r.videoViewCount, 0);
        const totalPlays = reels.reduce((sum, r) => sum + r.videoPlayCount, 0);
        channelData.instagramReels = {
          reels,
          totalReels: reels.length,
          avgViews: reels.length > 0 ? Math.round(totalViews / reels.length) : 0,
          avgPlays: reels.length > 0 ? Math.round(totalPlays / reels.length) : 0,
        };
      }));
    }

    // ─── 2. YouTube ───
    const ytVerified = verified.youtube as Record<string, unknown> | null;
    if (YOUTUBE_API_KEY && (ytVerified?.verified === true || ytVerified?.verified === "unverifiable")) {
      channelTasks.push(wrapChannelTask("youtube", async () => {
        const YT = "https://www.googleapis.com/youtube/v3";
        let channelId = (ytVerified?.channelId as string) || "";

        // If no channelId, try to resolve from handle
        if (!channelId && ytVerified?.handle) {
          const h = (ytVerified.handle as string).replace(/^@/, '');
          if (h.startsWith('UC')) {
            channelId = h;
          } else {
            for (const param of ['forHandle', 'forUsername']) {
              const lookupRes = await fetch(`${YT}/channels?part=id&${param}=${h}&key=${YOUTUBE_API_KEY}`);
              const lookupData = await lookupRes.json();
              channelId = lookupData.items?.[0]?.id || '';
              if (channelId) break;
            }
          }
        }
        if (!channelId) throw new Error("Could not resolve YouTube channel ID");

        const chRes = await fetchWithRetry(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-api" });
        const chData = await chRes.json();
        const channel = chData.items?.[0];
        if (!channel) throw new Error("YouTube channel not found in API response");

        const stats = channel.statistics || {};
        const snippet = channel.snippet || {};

        // Popular videos
        const searchRes = await fetchWithRetry(`${YT}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-search" });
        const searchData = await searchRes.json();
        const videoIds = (searchData.items || []).map((i: Record<string, unknown>) => (i.id as Record<string, string>)?.videoId).filter(Boolean).join(",");

        let videos: Record<string, unknown>[] = [];
        if (videoIds) {
          const vRes = await fetchWithRetry(`${YT}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-videos" });
          const vData = await vRes.json();
          videos = vData.items || [];
        }

        channelData.youtube = {
          channelId, channelName: snippet.title, handle: snippet.customUrl,
          description: snippet.description, publishedAt: snippet.publishedAt,
          thumbnailUrl: snippet.thumbnails?.default?.url,
          subscribers: parseInt(stats.subscriberCount || "0", 10),
          totalViews: parseInt(stats.viewCount || "0", 10),
          totalVideos: parseInt(stats.videoCount || "0", 10),
          videos: videos.slice(0, 10).map(v => {
            const vs = v.statistics as Record<string, string> || {};
            const vSnip = v.snippet as Record<string, unknown> || {};
            const vCon = v.contentDetails as Record<string, string> || {};
            return {
              title: vSnip.title, views: parseInt(vs.viewCount || "0", 10),
              likes: parseInt(vs.likeCount || "0", 10), comments: parseInt(vs.commentCount || "0", 10),
              date: vSnip.publishedAt, duration: vCon.duration,
              url: `https://www.youtube.com/watch?v=${v.id}`,
              thumbnail: (vSnip.thumbnails as Record<string, Record<string, string>>)?.medium?.url,
            };
          }),
        };
      }));
    }

    // ─── 3. Facebook ───
    const fbVerified = verified.facebook as Record<string, unknown> | null;
    if (APIFY_TOKEN && (fbVerified?.verified === true || fbVerified?.verified === "unverifiable")) {
      channelTasks.push(wrapChannelTask("facebook", async () => {
        const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`;
        const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN);
        const page = (items as Record<string, unknown>[])[0];
        if (page?.title) {
          channelData.facebook = {
            pageName: page.title, pageUrl: page.pageUrl || fbUrl,
            followers: page.followers, likes: page.likes, categories: page.categories,
            email: page.email, phone: page.phone, website: page.website,
            address: page.address, intro: page.intro, rating: page.rating,
            profilePictureUrl: page.profilePictureUrl,
          };
        } else {
          throw new Error("Facebook page scraper returned no data");
        }
      }));
    }

    // ─── 4. 강남언니 (항상 시도 — verified 여부 무관) ───
    const guVerified = verified.gangnamUnni as Record<string, unknown> | null;
    if (FIRECRAWL_API_KEY && clinicName) {
      channelTasks.push(wrapChannelTask("gangnamUnni", async () => {
        let gangnamUnniUrl = (guVerified?.verified && guVerified.url) ? String(guVerified.url) : "";

        // Fallback: 강남언니 URL을 Firecrawl 검색으로 직접 찾기
        if (!gangnamUnniUrl) {
          const shortName = clinicName.replace(/성형외과|의원|병원|클리닉|피부과/g, '').trim();
          const searchQueries = [
            `${clinicName} site:gangnamunni.com`,
            `${shortName} 성형외과 site:gangnamunni.com`,
            `${clinicName} 강남언니 병원`,
          ];
          for (const q of searchQueries) {
            try {
              const sRes = await fetch("https://api.firecrawl.dev/v1/search", {
                method: "POST",
                headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
                body: JSON.stringify({ query: q, limit: 5 }),
              });
              const sData = await sRes.json();
              const found = (sData.data || [])
                .map((r: Record<string, string>) => r.url)
                .find((u: string) => u?.includes('gangnamunni.com/hospitals/'));
              if (found) { gangnamUnniUrl = found; break; }
            } catch { /* try next query */ }
          }
          if (gangnamUnniUrl) {
            console.log(`[gangnamUnni] Fallback search found: ${gangnamUnniUrl}`);
          }
        }

        if (!gangnamUnniUrl) {
          throw new Error("강남언니 URL을 찾을 수 없습니다 (검색 실패)");
        }

        const scrapeRes = await fetchWithRetry("https://api.firecrawl.dev/v1/scrape", {
          method: "POST",
          headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
          body: JSON.stringify({
            url: gangnamUnniUrl,
            formats: ["json"],
            jsonOptions: {
              prompt: "Extract: hospital name, overall rating (강남언니 rating is always out of 10, NOT out of 5), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
              schema: {
                type: "object",
                properties: {
                  hospitalName: { type: "string" }, rating: { type: "number" }, totalReviews: { type: "number" },
                  doctors: { type: "array", items: { type: "object", properties: { name: { type: "string" }, rating: { type: "number" }, reviews: { type: "number" }, specialty: { type: "string" } } } },
                  procedures: { type: "array", items: { type: "string" } },
                  address: { type: "string" }, badges: { type: "array", items: { type: "string" } },
                },
              },
            },
            waitFor: 5000,
          }),
        }, { label: "firecrawl-gangnamunni", timeoutMs: 60000 });
        if (!scrapeRes.ok) throw new Error(`Firecrawl 강남언니 scrape failed: ${scrapeRes.status}`);
        const data = await scrapeRes.json();
        const hospital = data.data?.json;
        if (hospital?.hospitalName) {
          channelData.gangnamUnni = {
            name: hospital.hospitalName,
            rawRating: hospital.rating,
            rating: typeof hospital.rating === 'number' && hospital.rating > 0 ? hospital.rating : null,
            ratingScale: "/10",
            totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
            procedures: hospital.procedures || [], address: hospital.address,
            badges: hospital.badges || [], sourceUrl: gangnamUnniUrl,
          };
        } else {
          throw new Error("강남언니 scrape returned no hospital data");
        }
      }));
    }

    // ─── 5. Naver Blog + Place ───
    if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET && clinicName) {
      const naverHeaders = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET };

      channelTasks.push(wrapChannelTask("naverBlog", async () => {
        // Get verified Naver Blog handle from Phase 1 for official blog URL
        const nbVerified = verified.naverBlog as Record<string, unknown> | null;
        const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null;
        const officialBlogUrl = officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null;

        // ─── 5a. Naver Search: 3rd-party blog mentions ───
        const query = encodeURIComponent(`${clinicName} 후기`);
        const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders }, { label: "naver-blog" });
        if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`);
        const data = await res.json();

        // ─── 5b. Firecrawl: Official blog recent posts ───
        // Registry always provides the official blog URL — scrape it for real content metrics.
        let officialBlogContent: Record<string, unknown> | null = null;
        if (officialBlogUrl) {
          const FIRECRAWL_KEY = Deno.env.get("FIRECRAWL_API_KEY");
          if (FIRECRAWL_KEY) {
            try {
              const blogScrape = await fetchWithRetry(`https://api.firecrawl.dev/v1/scrape`, {
                method: "POST",
                headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_KEY}` },
                body: JSON.stringify({
                  url: officialBlogUrl,
                  formats: ["json"],
                  jsonOptions: {
                    prompt: "Extract the blog's recent posts: title, date, excerpt. Also total post count visible on the page, and the blog category/tag list.",
                    schema: {
                      type: "object",
                      properties: {
                        totalPosts: { type: "number" },
                        recentPosts: { type: "array", items: { type: "object", properties: { title: { type: "string" }, date: { type: "string" }, excerpt: { type: "string" } } } },
                        categories: { type: "array", items: { type: "string" } },
                      },
                    },
                  },
                  waitFor: 3000,
                }),
              }, { label: "firecrawl-naver-blog", timeoutMs: 45000 });
              if (blogScrape.ok) {
                const blogData = await blogScrape.json();
                officialBlogContent = blogData.data?.json || null;
                console.log(`[naverBlog] Official blog scraped: ${officialBlogContent?.totalPosts ?? 0} posts`);
              }
            } catch (e) {
              console.warn(`[naverBlog] Official blog Firecrawl failed (non-critical):`, e);
            }
          }
        }

        channelData.naverBlog = {
          totalResults: data.total || 0, searchQuery: `${clinicName} 후기`,
          officialBlogUrl,
          officialBlogHandle,
          // Official blog content (from Firecrawl — actual blog data)
          officialContent: officialBlogContent,
          // Blog mentions (third-party posts via Naver Search)
          posts: (data.items || []).slice(0, 10).map((item: Record<string, string>) => ({
            title: (item.title || "").replace(/<[^>]*>/g, ""),
            description: (item.description || "").replace(/<[^>]*>/g, ""),
            link: item.link, bloggerName: item.bloggername, postDate: item.postdate,
          })),
        };
      }));

      channelTasks.push(wrapChannelTask("naverPlace", async () => {
        // Try multiple queries to find the correct place (avoid same-name different clinics)
        const queries = [
          `${clinicName} 성형외과`,
          `${clinicName} 성형`,
          clinicName,
        ];
        for (const q of queries) {
          const query = encodeURIComponent(q);
          const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" });
          if (!res.ok) continue;
          const data = await res.json();
          // Find the best match: prefer category containing 성형 or 피부
          const items = (data.items || []) as Record<string, string>[];
          const match = items.find(i =>
            (i.category || '').includes('성형') || (i.category || '').includes('피부')
          ) || items.find(i => {
            const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase();
            return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase());
          }) || null;

          if (match) {
            channelData.naverPlace = {
              name: (match.title || "").replace(/<[^>]*>/g, ""),
              category: match.category, address: match.roadAddress || match.address,
              telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy,
            };
            break;
          }
        }
      }));
    }

    // ─── 6. Google Maps (Google Places API New) ───
    if (GOOGLE_PLACES_API_KEY && clinicName) {
      channelTasks.push(wrapChannelTask("googleMaps", async () => {
        const place = await searchGooglePlace(clinicName, address || undefined, GOOGLE_PLACES_API_KEY);
        if (place) {
          channelData.googleMaps = {
            name: place.name, rating: place.rating, reviewCount: place.reviewCount,
            address: place.address, phone: place.phone,
            clinicWebsite: place.clinicWebsite,
            mapsUrl: place.mapsUrl,
            placeId: place.placeId,
            category: place.category, openingHours: place.openingHours,
            topReviews: place.topReviews,
          };
        } else {
          throw new Error("Google Maps: no matching place found");
        }
      }));
    }

    // ─── 7. Market Analysis (Perplexity) ───
    if (PERPLEXITY_API_KEY && services.length > 0) {
      channelTasks.push(wrapChannelTask("marketAnalysis", async () => {
        const queries = [
          { id: "competitors", prompt: `${address || "강남"} 근처 ${services.slice(0, 3).join(", ")} 전문 성형외과/피부과 경쟁 병원 5곳을 분석해줘. 각 병원의 이름, 주요 시술, 온라인 평판, 마케팅 채널을 JSON 형식으로 제공해줘.` },
          { id: "keywords", prompt: `한국 ${services.slice(0, 3).join(", ")} 관련 검색 키워드 트렌드. 네이버와 구글에서 월간 검색량이 높은 키워드 20개, 경쟁 강도, 추천 롱테일 키워드를 JSON 형식으로 제공해줘.` },
          { id: "market", prompt: `한국 ${services[0] || "성형외과"} 시장 트렌드 2025-2026. 시장 규모, 성장률, 주요 트렌드, 마케팅 채널별 효과를 JSON 형식으로 제공해줘.` },
          { id: "targetAudience", prompt: `${clinicName}의 잠재 고객 분석. 연령대별, 성별, 관심 시술, 정보 탐색 채널, 의사결정 요인을 JSON 형식으로 제공해줘.` },
        ];

        const results = await Promise.allSettled(queries.map(async q => {
          const res = await fetchWithRetry("https://api.perplexity.ai/chat/completions", {
            method: "POST",
            headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
            body: JSON.stringify({
              model: PERPLEXITY_MODEL, messages: [
                { role: "system", content: "You are a Korean medical marketing analyst. Always respond in Korean. Provide data in valid JSON format." },
                { role: "user", content: q.prompt },
              ], temperature: 0.3,
            }),
          }, { label: `perplexity:${q.id}`, timeoutMs: 60000 });
          const data = await res.json();
          return { id: q.id, content: data.choices?.[0]?.message?.content || "", citations: data.citations || [] };
        }));

        let successCount = 0;
        for (const r of results) {
          if (r.status === "fulfilled") {
            const { id, content, citations } = r.value;
            let parsed = content;
            const jsonMatch = content.match(/```json\n?([\s\S]*?)```/);
            if (jsonMatch) { try { parsed = JSON.parse(jsonMatch[1]); } catch {} }
            analysisData[id] = { data: parsed, citations };
            successCount++;
          }
        }
        if (successCount === 0) throw new Error("All Perplexity queries failed");
      }));
    }

    // ─── 8. Vision Analysis: Screenshots + Gemini Vision ───
    const GEMINI_API_KEY = Deno.env.get("GEMINI_API_KEY") || "";
    let screenshots: ScreenshotResult[] = [];

    if (FIRECRAWL_API_KEY) {
      const mainUrl = row.url || "";
      const siteMap: string[] = row.scrape_data?.siteMap || [];

      channelTasks.push(wrapChannelTask("vision", async () => {
        // Step 1: Capture screenshots of relevant pages + social channel landings
        screenshots = await captureAllScreenshots(mainUrl, siteMap, verified, FIRECRAWL_API_KEY);

        if (screenshots.length === 0) {
          const debugInfo = screenshotErrors.length > 0
            ? screenshotErrors.join(" | ")
            : "No errors recorded — check FIRECRAWL_API_KEY";
          throw new Error(`No screenshots captured: ${debugInfo}`);
        }

        // ─── Step 2: Archive to Supabase Storage (GCS 7일 임시 URL → 영구 저장) ──
        // base64가 메모리에 있는 지금 즉시 업로드. 실패 시 GCS URL 유지(비치명적).
        // 경로: clinics/{domain}/{reportId}/screenshots/{id}.png
        const domain = (() => {
          try {
            const h = new URL(row.url || "").hostname.replace('www.', '');
            return h || "unknown";
          } catch { return "unknown"; }
        })();
        const SUPABASE_STORAGE_BUCKET = "screenshots";

        // 순차 업로드 (병렬 시 Supabase rate-limit 위험 방지)
        for (const ss of screenshots) {
          if (!ss.base64) {
            console.warn(`[archive] ${ss.id}: base64 없음 — GCS URL 유지`);
            continue;
          }
          try {
            const binaryStr = atob(ss.base64);
            const bytes = new Uint8Array(binaryStr.length);
            for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i);

            const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`;
            const { error: uploadError } = await supabase.storage
              .from(SUPABASE_STORAGE_BUCKET)
              .upload(storagePath, bytes, { contentType: "image/png", upsert: true });

            if (uploadError) {
              console.error(`[archive] ❌ 업로드 실패 ${ss.id}: ${uploadError.message}`);
              continue; // GCS URL 유지
            }

            const { data: { publicUrl } } = supabase.storage
              .from(SUPABASE_STORAGE_BUCKET)
              .getPublicUrl(storagePath);

            ss.url = publicUrl;
            ss.archived = true;
            console.log(`[archive] ✅ ${ss.id} → Supabase Storage`);
          } catch (err) {
            console.error(`[archive] ❌ 예외 ${ss.id}:`, err instanceof Error ? err.message : err);
          }
        }

        const archivedCount = screenshots.filter(ss => ss.archived).length;
        console.log(`[archive] ${archivedCount}/${screenshots.length}개 영구 저장 완료`);

        // Step 3: Run Gemini Vision on captured screenshots (base64 still in memory)
        if (GEMINI_API_KEY && screenshots.length > 0) {
          const vision = await runVisionAnalysis(screenshots, GEMINI_API_KEY);
          channelData.visionAnalysis = vision.merged;
          channelData.visionPerPage = vision.perPage;
        }

        // Step 4: Store screenshots metadata — ss.url is now the permanent URL (or GCS fallback)
        channelData.screenshots = screenshots.map(ss => ({
          id: ss.id,
          url: ss.url,        // permanent Supabase Storage URL (or GCS fallback if archive failed)
          channel: ss.channel,
          capturedAt: ss.capturedAt,
          caption: ss.caption,
          sourceUrl: ss.sourceUrl,
          archived: ss.url.includes("supabase"), // flag: true = permanent, false = GCS fallback
        }));
      }));
    }

    // ─── 9. Founding Year Text Fallback (Harness 2) ───
    // If Vision didn't find foundingYear, try regex extraction from scraped text
    if (!channelData.visionAnalysis?.foundingYear) {
      const htmlText = row.scrape_data?.markdown || row.scrape_data?.text || "";
      if (htmlText) {
        const textYear = extractFoundingYear(htmlText);
        if (textYear) {
          channelData.visionAnalysis = channelData.visionAnalysis || {};
          channelData.visionAnalysis.foundingYear = String(textYear);
          console.log(`[harness] Founding year extracted from text fallback: ${textYear}`);
        }
      }
    }

    // ─── Execute all channel tasks ───
    const taskResults = await Promise.all(channelTasks);

    // ─── Build channelErrors from task results ───
    const channelErrors: Record<string, { error: string; durationMs: number }> = {};
    let failedCount = 0;
    let successCount = 0;
    for (const result of taskResults) {
      if (result.success) {
        successCount++;
      } else {
        failedCount++;
        channelErrors[result.channel] = {
          error: result.error || "Unknown error",
          durationMs: result.durationMs,
        };
      }
    }

    const totalTasks = taskResults.length;
    const isPartial = failedCount > 0 && successCount > 0;
    const isFullFailure = failedCount > 0 && successCount === 0;
    const collectionStatus = isFullFailure ? "collection_failed" : isPartial ? "partial" : "collected";

    console.log(`[collect] ${successCount}/${totalTasks} tasks succeeded. Status: ${collectionStatus}`);
    if (failedCount > 0) {
      console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors));
    }

    // ─── Storage: save channel_data.json to clinics/{domain}/{reportId}/ ───
    try {
      const domain = (() => {
        try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; }
      })();
      const payload = {
        channelData, analysisData, channelErrors,
        clinicName, address, services,
        collectionStatus, collectedAt: new Date().toISOString(),
      };
      const jsonBytes = new TextEncoder().encode(JSON.stringify(payload, null, 2));
      await supabase.storage
        .from('clinic-data')
        .upload(`clinics/${domain}/${reportId}/channel_data.json`, jsonBytes, {
          contentType: 'application/json',
          upsert: true,
        });
      console.log(`[storage] channel_data.json → clinics/${domain}/${reportId}/`);
    } catch (e) {
      console.warn('[storage] channel_data.json upload failed:', e instanceof Error ? e.message : e);
    }

    // ─── UNCONDITIONAL Legacy Save: always persist whatever we have ───
    await supabase.from("marketing_reports").update({
      channel_data: channelData,
      analysis_data: { clinicName, services, address, analysis: analysisData, analyzedAt: new Date().toISOString() },
      channel_errors: channelErrors,
      status: collectionStatus,
      updated_at: new Date().toISOString(),
    }).eq("id", reportId);

    // ─── V3: channel_snapshots + screenshots + analysis_runs ───
    const clinicId = inputClinicId || null;
    const runId = inputRunId || null;

    if (clinicId && runId) {
      try {
        // Channel snapshots — INSERT one row per channel (time-series!)
        const snapshotInserts: Record<string, unknown>[] = [];

        const igData = channelData.instagram as Record<string, unknown> | undefined;
        if (igData) {
          snapshotInserts.push({
            clinic_id: clinicId, run_id: runId, channel: 'instagram',
            handle: igData.username, followers: igData.followers, posts: igData.posts,
            health_score: computeHealthScore('instagram', igData),
            details: igData,
          });
        }

        const ytData = channelData.youtube as Record<string, unknown> | undefined;
        if (ytData) {
          snapshotInserts.push({
            clinic_id: clinicId, run_id: runId, channel: 'youtube',
            handle: ytData.handle || ytData.channelName, followers: ytData.subscribers,
            posts: ytData.totalVideos, total_views: ytData.totalViews,
            health_score: computeHealthScore('youtube', ytData),
            details: ytData,
          });
        }

        const fbData = channelData.facebook as Record<string, unknown> | undefined;
        if (fbData) {
          snapshotInserts.push({
            clinic_id: clinicId, run_id: runId, channel: 'facebook',
            handle: fbData.pageName, followers: fbData.followers,
            health_score: computeHealthScore('facebook', fbData),
            details: fbData,
          });
        }

        const guData = channelData.gangnamUnni as Record<string, unknown> | undefined;
        if (guData) {
          snapshotInserts.push({
            clinic_id: clinicId, run_id: runId, channel: 'gangnamUnni',
            handle: guData.name, rating: guData.rating, rating_scale: 10,
            reviews: guData.totalReviews,
            health_score: computeHealthScore('gangnamUnni', guData),
            details: guData,
          });
        }

        const gmData = channelData.googleMaps as Record<string, unknown> | undefined;
        if (gmData) {
          snapshotInserts.push({
            clinic_id: clinicId, run_id: runId, channel: 'googleMaps',
            handle: gmData.name, rating: gmData.rating, rating_scale: 5,
            reviews: gmData.reviewCount,
            health_score: computeHealthScore('googleMaps', gmData),
            details: gmData,
          });
        }

        const nbData = channelData.naverBlog as Record<string, unknown> | undefined;
        if (nbData) {
          snapshotInserts.push({
            clinic_id: clinicId, run_id: runId, channel: 'naverBlog',
            handle: nbData.officialBlogHandle,
            health_score: computeHealthScore('naverBlog', nbData),
            details: nbData,
          });
        }

        if (snapshotInserts.length > 0) {
          await supabase.from("channel_snapshots").insert(snapshotInserts);
        }

        // Screenshots — INSERT evidence rows
        const screenshotList = (channelData.screenshots || []) as Record<string, unknown>[];
        if (screenshotList.length > 0) {
          await supabase.from("screenshots").insert(
            screenshotList.map(ss => ({
              clinic_id: clinicId, run_id: runId,
              channel: ss.channel, page_type: (ss.id as string || '').split('-')[1] || 'main',
              url: ss.url, source_url: ss.sourceUrl, caption: ss.caption,
            }))
          );
        }

        // Update analysis_run with status + errors
        await supabase.from("analysis_runs").update({
          raw_channel_data: channelData,
          analysis_data: { clinicName, services, address, analysis: analysisData },
          vision_analysis: channelData.visionAnalysis || {},
          channel_errors: channelErrors,
          status: collectionStatus,
        }).eq("id", runId);

      } catch (e) {
        const errMsg = e instanceof Error ? e.message : String(e);
        console.error("V3 dual-write error:", errMsg);
        // Best-effort: record error into analysis_run so it's visible in DB
        try {
          await supabase.from("analysis_runs").update({
            error_message: `V3 dual-write failed: ${errMsg}`,
            status: "collection_error",
          }).eq("id", runId);
        } catch { /* ignore secondary failure */ }
      }
    }

    return new Response(
      JSON.stringify({
        success: !isFullFailure,
        status: collectionStatus,
        channelData,
        analysisData,
        channelErrors: Object.keys(channelErrors).length > 0 ? channelErrors : undefined,
        partialFailure: isPartial,
        taskSummary: { total: totalTasks, succeeded: successCount, failed: failedCount },
        collectedAt: new Date().toISOString(),
      }),
      { headers: { ...corsHeaders, "Content-Type": "application/json" } },
    );
  } catch (error) {
    return new Response(
      JSON.stringify({ success: false, error: error.message }),
      { status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" } },
    );
  }
});