o2o-infinith-demo/supabase/functions/collect-channel-data/index.ts

import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import type { VerifiedChannels } from "../_shared/verifyHandles.ts";
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
import { captureAllScreenshots, runVisionAnalysis, type ScreenshotResult } from "../_shared/visionAnalysis.ts";

const corsHeaders = {
  "Access-Control-Allow-Origin": "*",
  "Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
};

const APIFY_BASE = "https://api.apify.com/v2";

interface CollectRequest {
  reportId: string;
}

async function runApifyActor(actorId: string, input: Record<string, unknown>, token: string): Promise<unknown[]> {
  const res = await fetch(`${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(input),
  });
  const run = await res.json();
  const datasetId = run.data?.defaultDatasetId;
  if (!datasetId) return [];
  const itemsRes = await fetch(`${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`);
  return itemsRes.json();
}

/**
 * Phase 2: Collect Channel Data
 *
 * Uses verified handles from Phase 1 (stored in DB) to collect ALL raw data
 * from each channel in parallel. Also runs market analysis via Perplexity.
 */
Deno.serve(async (req) => {
  if (req.method === "OPTIONS") {
    return new Response("ok", { headers: corsHeaders });
  }

  try {
    const { reportId } = (await req.json()) as CollectRequest;
    if (!reportId) throw new Error("reportId is required");

    // Read Phase 1 results from DB
    const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
    const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
    const supabase = createClient(supabaseUrl, supabaseKey);

    const { data: row, error: fetchError } = await supabase
      .from("marketing_reports")
      .select("*")
      .eq("id", reportId)
      .single();

    if (fetchError || !row) throw new Error(`Report not found: ${fetchError?.message}`);

    const verified = row.verified_channels as VerifiedChannels;
    const clinicName = row.clinic_name || "";
    const address = row.scrape_data?.clinic?.address || "";
    const services: string[] = row.scrape_data?.clinic?.services || [];

    await supabase.from("marketing_reports").update({ status: "collecting" }).eq("id", reportId);

    const APIFY_TOKEN = Deno.env.get("APIFY_API_TOKEN") || "";
    const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY") || "";
    const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY") || "";
    const PERPLEXITY_API_KEY = Deno.env.get("PERPLEXITY_API_KEY") || "";
    const NAVER_CLIENT_ID = Deno.env.get("NAVER_CLIENT_ID") || "";
    const NAVER_CLIENT_SECRET = Deno.env.get("NAVER_CLIENT_SECRET") || "";

    const channelData: Record<string, unknown> = {};
    const analysisData: Record<string, unknown> = {};
    const tasks: Promise<void>[] = [];

    // ─── 1. Instagram (multi-account) — try ALL candidates including unverified ───
    const igCandidates = (verified.instagram || []).filter((v: Record<string, unknown>) => v.handle);
    if (APIFY_TOKEN && igCandidates.length > 0) {
      tasks.push((async () => {
        const accounts: Record<string, unknown>[] = [];
        for (const ig of igCandidates) {
          const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN);
          const profile = (items as Record<string, unknown>[])[0];
          if (profile && !profile.error) {
            accounts.push({
              username: profile.username,
              followers: profile.followersCount,
              following: profile.followsCount,
              posts: profile.postsCount,
              bio: profile.biography,
              isBusinessAccount: profile.isBusinessAccount,
              externalUrl: profile.externalUrl,
              igtvVideoCount: profile.igtvVideoCount,
              latestPosts: ((profile.latestPosts as Record<string, unknown>[]) || []).slice(0, 12).map(p => ({
                type: p.type, likes: p.likesCount, comments: p.commentsCount,
                caption: p.caption, timestamp: p.timestamp,
              })),
            });
          }
        }
        if (accounts.length > 0) {
          channelData.instagramAccounts = accounts;
          channelData.instagram = accounts[0];
        }
      })());
    }

    // ─── 2. YouTube ───
    const ytVerified = verified.youtube as Record<string, unknown> | null;
    if (YOUTUBE_API_KEY && ytVerified?.verified) {
      tasks.push((async () => {
        const YT = "https://www.googleapis.com/youtube/v3";
        let channelId = (ytVerified?.channelId as string) || "";

        // If no channelId, try to resolve from handle
        if (!channelId && ytVerified?.handle) {
          const h = (ytVerified.handle as string).replace(/^@/, '');
          if (h.startsWith('UC')) {
            channelId = h;
          } else {
            for (const param of ['forHandle', 'forUsername']) {
              const lookupRes = await fetch(`${YT}/channels?part=id&${param}=${h}&key=${YOUTUBE_API_KEY}`);
              const lookupData = await lookupRes.json();
              channelId = lookupData.items?.[0]?.id || '';
              if (channelId) break;
            }
          }
        }
        if (!channelId) return;

        const chRes = await fetch(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`);
        const chData = await chRes.json();
        const channel = chData.items?.[0];
        if (!channel) return;

        const stats = channel.statistics || {};
        const snippet = channel.snippet || {};

        // Popular videos
        const searchRes = await fetch(`${YT}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`);
        const searchData = await searchRes.json();
        const videoIds = (searchData.items || []).map((i: Record<string, unknown>) => (i.id as Record<string, string>)?.videoId).filter(Boolean).join(",");

        let videos: Record<string, unknown>[] = [];
        if (videoIds) {
          const vRes = await fetch(`${YT}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`);
          const vData = await vRes.json();
          videos = vData.items || [];
        }

        channelData.youtube = {
          channelId, channelName: snippet.title, handle: snippet.customUrl,
          description: snippet.description, publishedAt: snippet.publishedAt,
          thumbnailUrl: snippet.thumbnails?.default?.url,
          subscribers: parseInt(stats.subscriberCount || "0", 10),
          totalViews: parseInt(stats.viewCount || "0", 10),
          totalVideos: parseInt(stats.videoCount || "0", 10),
          videos: videos.slice(0, 10).map(v => {
            const vs = v.statistics as Record<string, string> || {};
            const vSnip = v.snippet as Record<string, unknown> || {};
            const vCon = v.contentDetails as Record<string, string> || {};
            return {
              title: vSnip.title, views: parseInt(vs.viewCount || "0", 10),
              likes: parseInt(vs.likeCount || "0", 10), comments: parseInt(vs.commentCount || "0", 10),
              date: vSnip.publishedAt, duration: vCon.duration,
              url: `https://www.youtube.com/watch?v=${v.id}`,
              thumbnail: (vSnip.thumbnails as Record<string, Record<string, string>>)?.medium?.url,
            };
          }),
        };
      })());
    }

    // ─── 3. Facebook ───
    const fbVerified = verified.facebook as Record<string, unknown> | null;
    if (APIFY_TOKEN && fbVerified?.verified) {
      tasks.push((async () => {
        const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`;
        const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN);
        const page = (items as Record<string, unknown>[])[0];
        if (page?.title) {
          channelData.facebook = {
            pageName: page.title, pageUrl: page.pageUrl || fbUrl,
            followers: page.followers, likes: page.likes, categories: page.categories,
            email: page.email, phone: page.phone, website: page.website,
            address: page.address, intro: page.intro, rating: page.rating,
            profilePictureUrl: page.profilePictureUrl,
          };
        }
      })());
    }

    // ─── 4. 강남언니 ───
    const guVerified = verified.gangnamUnni as Record<string, unknown> | null;
    if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) {
      tasks.push((async () => {
        const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
          method: "POST",
          headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
          body: JSON.stringify({
            url: guVerified!.url as string,
            formats: ["json"],
            jsonOptions: {
              prompt: "Extract: hospital name, overall rating (강남언니 rating is always out of 10, NOT out of 5), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
              schema: {
                type: "object",
                properties: {
                  hospitalName: { type: "string" }, rating: { type: "number" }, totalReviews: { type: "number" },
                  doctors: { type: "array", items: { type: "object", properties: { name: { type: "string" }, rating: { type: "number" }, reviews: { type: "number" }, specialty: { type: "string" } } } },
                  procedures: { type: "array", items: { type: "string" } },
                  address: { type: "string" }, badges: { type: "array", items: { type: "string" } },
                },
              },
            },
            waitFor: 5000,
          }),
        });
        const data = await scrapeRes.json();
        const hospital = data.data?.json;
        if (hospital?.hospitalName) {
          channelData.gangnamUnni = {
            name: hospital.hospitalName,
            rawRating: hospital.rating,
            rating: typeof hospital.rating === 'number' && hospital.rating > 0 && hospital.rating <= 5 ? hospital.rating * 2 : hospital.rating,
            ratingScale: "/10",
            totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
            procedures: hospital.procedures || [], address: hospital.address,
            badges: hospital.badges || [], sourceUrl: guVerified!.url as string,
          };
        }
      })());
    }

    // ─── 5. Naver Blog + Place ───
    if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET && clinicName) {
      const naverHeaders = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET };

      tasks.push((async () => {
        // Get verified Naver Blog handle from Phase 1 for official blog URL
        const nbVerified = verified.naverBlog as Record<string, unknown> | null;
        const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null;

        const query = encodeURIComponent(`${clinicName} 후기`);
        const res = await fetch(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders });
        if (!res.ok) return;
        const data = await res.json();
        channelData.naverBlog = {
          totalResults: data.total || 0, searchQuery: `${clinicName} 후기`,
          // Official blog URL from Phase 1 verified handle
          officialBlogUrl: officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null,
          officialBlogHandle: officialBlogHandle,
          // Blog mentions (third-party posts, NOT the official blog)
          posts: (data.items || []).slice(0, 10).map((item: Record<string, string>) => ({
            title: (item.title || "").replace(/<[^>]*>/g, ""),
            description: (item.description || "").replace(/<[^>]*>/g, ""),
            link: item.link, bloggerName: item.bloggername, postDate: item.postdate,
          })),
        };
      })());

      tasks.push((async () => {
        // Try multiple queries to find the correct place (avoid same-name different clinics)
        const queries = [
          `${clinicName} 성형외과`,
          `${clinicName} 성형`,
          clinicName,
        ];
        for (const q of queries) {
          const query = encodeURIComponent(q);
          const res = await fetch(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders });
          if (!res.ok) continue;
          const data = await res.json();
          // Find the best match: prefer category containing 성형 or 피부
          const items = (data.items || []) as Record<string, string>[];
          const match = items.find(i =>
            (i.category || '').includes('성형') || (i.category || '').includes('피부')
          ) || items.find(i => {
            const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase();
            return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase());
          }) || null;

          if (match) {
            channelData.naverPlace = {
              name: (match.title || "").replace(/<[^>]*>/g, ""),
              category: match.category, address: match.roadAddress || match.address,
              telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy,
            };
            break;
          }
        }
      })());
    }

    // ─── 6. Google Maps ───
    if (APIFY_TOKEN && clinicName) {
      tasks.push((async () => {
        const queries = [`${clinicName} 성형외과`, clinicName, `${clinicName} ${address || "강남"}`];
        let items: unknown[] = [];
        for (const q of queries) {
          items = await runApifyActor("compass~crawler-google-places", {
            searchStringsArray: [q], maxCrawledPlacesPerSearch: 3, language: "ko", maxReviews: 10,
          }, APIFY_TOKEN);
          if ((items as Record<string, unknown>[]).length > 0) break;
        }
        const place = (items as Record<string, unknown>[])[0];
        if (place) {
          channelData.googleMaps = {
            name: place.title, rating: place.totalScore, reviewCount: place.reviewsCount,
            address: place.address, phone: place.phone,
            clinicWebsite: place.website, // clinic's own website (not Maps URL)
            mapsUrl: place.url || (place.title ? `https://www.google.com/maps/search/${encodeURIComponent(String(place.title))}` : ''),
            category: place.categoryName, openingHours: place.openingHours,
            topReviews: ((place.reviews as Record<string, unknown>[]) || []).slice(0, 10).map(r => ({
              stars: r.stars, text: r.text, publishedAtDate: r.publishedAtDate,
            })),
          };
        }
      })());
    }

    // ─── 7. Market Analysis (Perplexity) ───
    if (PERPLEXITY_API_KEY && services.length > 0) {
      tasks.push((async () => {
        const queries = [
          { id: "competitors", prompt: `${address || "강남"} 근처 ${services.slice(0, 3).join(", ")} 전문 성형외과/피부과 경쟁 병원 5곳을 분석해줘. 각 병원의 이름, 주요 시술, 온라인 평판, 마케팅 채널을 JSON 형식으로 제공해줘.` },
          { id: "keywords", prompt: `한국 ${services.slice(0, 3).join(", ")} 관련 검색 키워드 트렌드. 네이버와 구글에서 월간 검색량이 높은 키워드 20개, 경쟁 강도, 추천 롱테일 키워드를 JSON 형식으로 제공해줘.` },
          { id: "market", prompt: `한국 ${services[0] || "성형외과"} 시장 트렌드 2025-2026. 시장 규모, 성장률, 주요 트렌드, 마케팅 채널별 효과를 JSON 형식으로 제공해줘.` },
          { id: "targetAudience", prompt: `${clinicName}의 잠재 고객 분석. 연령대별, 성별, 관심 시술, 정보 탐색 채널, 의사결정 요인을 JSON 형식으로 제공해줘.` },
        ];

        const results = await Promise.allSettled(queries.map(async q => {
          const res = await fetch("https://api.perplexity.ai/chat/completions", {
            method: "POST",
            headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
            body: JSON.stringify({
              model: PERPLEXITY_MODEL, messages: [
                { role: "system", content: "You are a Korean medical marketing analyst. Always respond in Korean. Provide data in valid JSON format." },
                { role: "user", content: q.prompt },
              ], temperature: 0.3,
            }),
          });
          const data = await res.json();
          return { id: q.id, content: data.choices?.[0]?.message?.content || "", citations: data.citations || [] };
        }));

        for (const r of results) {
          if (r.status === "fulfilled") {
            const { id, content, citations } = r.value;
            let parsed = content;
            const jsonMatch = content.match(/```json\n?([\s\S]*?)```/);
            if (jsonMatch) { try { parsed = JSON.parse(jsonMatch[1]); } catch {} }
            analysisData[id] = { data: parsed, citations };
          }
        }
      })());
    }

    // ─── 8. Vision Analysis: Screenshots + Gemini Vision ───
    const GEMINI_API_KEY = Deno.env.get("GEMINI_API_KEY") || "";
    let screenshots: ScreenshotResult[] = [];

    if (FIRECRAWL_API_KEY) {
      const mainUrl = row.url || "";
      const siteMap: string[] = row.scrape_data?.siteMap || [];

      tasks.push((async () => {
        // Capture screenshots of relevant pages + social channel landings
        screenshots = await captureAllScreenshots(mainUrl, siteMap, verified, FIRECRAWL_API_KEY);

        // Run Gemini Vision on captured screenshots
        if (GEMINI_API_KEY && screenshots.length > 0) {
          const vision = await runVisionAnalysis(screenshots, GEMINI_API_KEY);
          channelData.visionAnalysis = vision.merged;
          channelData.visionPerPage = vision.perPage;
        }

        // Store screenshots (without base64 — just metadata for report)
        channelData.screenshots = screenshots.map(ss => ({
          id: ss.id,
          url: ss.base64 ? `data:image/png;base64,${ss.base64}` : ss.url,
          channel: ss.channel,
          capturedAt: ss.capturedAt,
          caption: ss.caption,
          sourceUrl: ss.sourceUrl,
        }));
      })());
    }

    // ─── Execute all tasks ───
    await Promise.allSettled(tasks);

    // ─── Save to DB ───
    await supabase.from("marketing_reports").update({
      channel_data: channelData,
      analysis_data: { clinicName, services, address, analysis: analysisData, analyzedAt: new Date().toISOString() },
      status: "collected",
      updated_at: new Date().toISOString(),
    }).eq("id", reportId);

    return new Response(
      JSON.stringify({ success: true, channelData, analysisData, collectedAt: new Date().toISOString() }),
      { headers: { ...corsHeaders, "Content-Type": "application/json" } },
    );
  } catch (error) {
    return new Response(
      JSON.stringify({ success: false, error: error.message }),
      { status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" } },
    );
  }
});