o2o-infinith-demo/supabase/functions/enrich-channels/index.ts

import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import { normalizeInstagramHandle, normalizeYouTubeChannel } from "../_shared/normalizeHandles.ts";

const corsHeaders = {
  "Access-Control-Allow-Origin": "*",
  "Access-Control-Allow-Headers":
    "authorization, x-client-info, apikey, content-type",
};

const APIFY_BASE = "https://api.apify.com/v2";

interface EnrichRequest {
  reportId: string;
  clinicName: string;
  instagramHandle?: string;
  instagramHandles?: string[];
  youtubeChannelId?: string;
  facebookHandle?: string;
  address?: string;
}

async function runApifyActor(
  actorId: string,
  input: Record<string, unknown>,
  token: string
): Promise<unknown[]> {
  const res = await fetch(
    `${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`,
    {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(input),
    }
  );
  const run = await res.json();
  const datasetId = run.data?.defaultDatasetId;
  if (!datasetId) return [];

  const itemsRes = await fetch(
    `${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`
  );
  return itemsRes.json();
}

Deno.serve(async (req) => {
  if (req.method === "OPTIONS") {
    return new Response("ok", { headers: corsHeaders });
  }

  try {
    const { reportId, clinicName, instagramHandle, instagramHandles, youtubeChannelId, facebookHandle, address } =
      (await req.json()) as EnrichRequest;

    // Build list of IG handles to try: explicit array > single handle > empty
    const igHandlesToTry: string[] = instagramHandles?.length
      ? instagramHandles
      : instagramHandle ? [instagramHandle] : [];

    const APIFY_TOKEN = Deno.env.get("APIFY_API_TOKEN");
    if (!APIFY_TOKEN) throw new Error("APIFY_API_TOKEN not configured");

    const enrichment: Record<string, unknown> = {};

    // Run all enrichment tasks in parallel
    const tasks = [];

    // 1. Instagram Profiles — multi-account with fallback
    if (igHandlesToTry.length > 0) {
      tasks.push(
        (async () => {
          const accounts: Record<string, unknown>[] = [];
          const triedHandles = new Set<string>();

          for (const rawHandle of igHandlesToTry) {
            const baseHandle = normalizeInstagramHandle(rawHandle);
            if (!baseHandle || triedHandles.has(baseHandle)) continue;

            // Try the handle + common clinic variants
            const candidates = [
              baseHandle,
              `${baseHandle}_ps`,
              `${baseHandle}.ps`,
              `${baseHandle}_clinic`,
              `${baseHandle}_official`,
            ];

            for (const handle of candidates) {
              if (triedHandles.has(handle)) continue;
              triedHandles.add(handle);

              const items = await runApifyActor(
                "apify~instagram-profile-scraper",
                { usernames: [handle], resultsLimit: 12 },
                APIFY_TOKEN
              );
              const profile = (items as Record<string, unknown>[])[0];

              if (profile && !profile.error) {
                const followers = (profile.followersCount as number) || 0;
                if (followers >= 100 || ((profile.isBusinessAccount as boolean) && (profile.postsCount as number) > 10)) {
                  accounts.push({
                    username: profile.username,
                    followers: profile.followersCount,
                    following: profile.followsCount,
                    posts: profile.postsCount,
                    bio: profile.biography,
                    isBusinessAccount: profile.isBusinessAccount,
                    externalUrl: profile.externalUrl,
                    igtvVideoCount: profile.igtvVideoCount,
                    highlightsCount: profile.highlightsCount,
                    latestPosts: ((profile.latestPosts as Record<string, unknown>[]) || [])
                      .slice(0, 12)
                      .map((p) => ({
                        type: p.type,
                        likes: p.likesCount,
                        comments: p.commentsCount,
                        caption: (p.caption as string || "").slice(0, 200),
                        timestamp: p.timestamp,
                      })),
                  });
                  break; // Found valid for this base handle, move to next
                }
              }
            }
          }

          // Store as array for multi-account support
          if (accounts.length > 0) {
            enrichment.instagramAccounts = accounts;
            // Keep backwards compat: first account as enrichment.instagram
            enrichment.instagram = accounts[0];
          }
        })()
      );
    }

    // 2. Google Maps / Place Reviews
    if (clinicName || address) {
      tasks.push(
        (async () => {
          // Try multiple search queries for better hit rate
          const queries = [
            `${clinicName} 성형외과`,
            clinicName,
            `${clinicName} ${address || "강남"}`,
          ];

          let items: unknown[] = [];
          for (const query of queries) {
            items = await runApifyActor(
              "compass~crawler-google-places",
              {
                searchStringsArray: [query],
                maxCrawledPlacesPerSearch: 3,
                language: "ko",
                maxReviews: 10,
              },
              APIFY_TOKEN
            );
            if ((items as Record<string, unknown>[]).length > 0) break;
          }
          const place = (items as Record<string, unknown>[])[0];
          if (place) {
            enrichment.googleMaps = {
              name: place.title,
              rating: place.totalScore,
              reviewCount: place.reviewsCount,
              address: place.address,
              phone: place.phone,
              website: place.website,
              category: place.categoryName,
              openingHours: place.openingHours,
              topReviews: ((place.reviews as Record<string, unknown>[]) || [])
                .slice(0, 10)
                .map((r) => ({
                  stars: r.stars,
                  text: (r.text as string || "").slice(0, 200),
                  publishedAtDate: r.publishedAtDate,
                })),
            };
          }
        })()
      );
    }

    // 3. 강남언니 (Gangnam Unni) — Firecrawl scraping
    if (clinicName) {
      const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
      if (FIRECRAWL_API_KEY) {
        tasks.push(
          (async () => {
            // Step 1: Search for the clinic's gangnamunni page
            // Try multiple search queries for better matching
            const searchQueries = [
              `${clinicName} site:gangnamunni.com`,
              `${clinicName.replace(/성형외과|의원|병원|클리닉/g, '')} 성형외과 site:gangnamunni.com`,
              `${clinicName} 강남언니`,
            ];

            let hospitalUrl: string | undefined;

            for (const query of searchQueries) {
              if (hospitalUrl) break;
              try {
                const searchRes = await fetch("https://api.firecrawl.dev/v1/search", {
                  method: "POST",
                  headers: {
                    "Content-Type": "application/json",
                    Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
                  },
                  body: JSON.stringify({ query, limit: 5 }),
                });
                const searchData = await searchRes.json();
                hospitalUrl = (searchData.data || [])
                  .map((r: Record<string, string>) => r.url)
                  .find((u: string) => u?.includes("gangnamunni.com/hospitals/"));
              } catch {
                // Try next query
              }
            }

            if (!hospitalUrl) return;

            // Step 2: Scrape the hospital page with structured JSON extraction
            const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
              method: "POST",
              headers: {
                "Content-Type": "application/json",
                Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
              },
              body: JSON.stringify({
                url: hospitalUrl,
                formats: ["json"],
                jsonOptions: {
                  prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
                  schema: {
                    type: "object",
                    properties: {
                      hospitalName: { type: "string" },
                      rating: { type: "number" },
                      totalReviews: { type: "number" },
                      doctors: {
                        type: "array",
                        items: {
                          type: "object",
                          properties: {
                            name: { type: "string" },
                            rating: { type: "number" },
                            reviews: { type: "number" },
                            specialty: { type: "string" },
                          },
                        },
                      },
                      procedures: { type: "array", items: { type: "string" } },
                      address: { type: "string" },
                      badges: { type: "array", items: { type: "string" } },
                    },
                  },
                },
                waitFor: 5000,
              }),
            });
            const scrapeData = await scrapeRes.json();
            const hospital = scrapeData.data?.json;

            if (hospital?.hospitalName) {
              enrichment.gangnamUnni = {
                name: hospital.hospitalName,
                rating: hospital.rating,
                ratingScale: "/10",
                totalReviews: hospital.totalReviews,
                doctors: (hospital.doctors || []).slice(0, 10),
                procedures: hospital.procedures || [],
                address: hospital.address,
                badges: hospital.badges || [],
                sourceUrl: hospitalUrl,
              };
            }
          })()
        );
      }
    }

    // 4. Naver Blog + Place Search (네이버 검색 API)
    if (clinicName) {
      const NAVER_CLIENT_ID = Deno.env.get("NAVER_CLIENT_ID");
      const NAVER_CLIENT_SECRET = Deno.env.get("NAVER_CLIENT_SECRET");
      if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET) {
        const naverHeaders = {
          "X-Naver-Client-Id": NAVER_CLIENT_ID,
          "X-Naver-Client-Secret": NAVER_CLIENT_SECRET,
        };

        // 4a. Blog search — "{clinicName} 후기"
        tasks.push(
          (async () => {
            const query = encodeURIComponent(`${clinicName} 후기`);
            const res = await fetch(
              `https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`,
              { headers: naverHeaders }
            );
            if (!res.ok) return;
            const data = await res.json();
            enrichment.naverBlog = {
              totalResults: data.total || 0,
              searchQuery: `${clinicName} 후기`,
              posts: (data.items || []).slice(0, 10).map((item: Record<string, string>) => ({
                title: (item.title || "").replace(/<[^>]*>/g, ""),
                description: (item.description || "").replace(/<[^>]*>/g, "").slice(0, 200),
                link: item.link,
                bloggerName: item.bloggername,
                postDate: item.postdate,
              })),
            };
          })()
        );

        // 4b. Local search — Naver Place
        tasks.push(
          (async () => {
            const query = encodeURIComponent(clinicName);
            const res = await fetch(
              `https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`,
              { headers: naverHeaders }
            );
            if (!res.ok) return;
            const data = await res.json();
            const place = (data.items || [])[0];
            if (place) {
              enrichment.naverPlace = {
                name: (place.title || "").replace(/<[^>]*>/g, ""),
                category: place.category,
                address: place.roadAddress || place.address,
                telephone: place.telephone,
                link: place.link,
                mapx: place.mapx,
                mapy: place.mapy,
              };
            }
          })()
        );
      }
    }

    // 5. Facebook Page (using Apify)
    if (facebookHandle) {
      tasks.push(
        (async () => {
          // Normalize: strip URL parts to get page name, then build full URL
          let fbUrl = facebookHandle;
          if (!fbUrl.startsWith("http")) {
            fbUrl = fbUrl.replace(/^@/, "");
            fbUrl = `https://www.facebook.com/${fbUrl}`;
          }

          const items = await runApifyActor(
            "apify~facebook-pages-scraper",
            { startUrls: [{ url: fbUrl }] },
            APIFY_TOKEN
          );
          const page = (items as Record<string, unknown>[])[0];
          if (page && page.title) {
            enrichment.facebook = {
              pageName: page.title,
              pageUrl: page.pageUrl || fbUrl,
              followers: page.followers,
              likes: page.likes,
              categories: page.categories,
              email: page.email,
              phone: page.phone,
              website: page.website,
              address: page.address,
              intro: page.intro,
              rating: page.rating,
              profilePictureUrl: page.profilePictureUrl,
            };
          }
        })()
      );
    }

    // 6. YouTube Channel (using YouTube Data API v3)
    if (youtubeChannelId) {
      const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
      if (YOUTUBE_API_KEY) {
        tasks.push(
          (async () => {
            const YT_BASE = "https://www.googleapis.com/youtube/v3";

            // Normalize YouTube URL/handle to structured identifier
            const ytNormalized = normalizeYouTubeChannel(youtubeChannelId);
            if (!ytNormalized) return;

            let channelId = "";

            if (ytNormalized.type === "channelId") {
              channelId = ytNormalized.value;
            } else {
              // Try forHandle first, then forUsername as fallback
              for (const param of ["forHandle", "forUsername"]) {
                const lookupRes = await fetch(
                  `${YT_BASE}/channels?part=id&${param}=${ytNormalized.value}&key=${YOUTUBE_API_KEY}`
                );
                const lookupData = await lookupRes.json();
                channelId = lookupData.items?.[0]?.id || "";
                if (channelId) break;
              }
            }

            if (!channelId) return;

            // Step 1: Get channel statistics & snippet (1 quota unit)
            const channelRes = await fetch(
              `${YT_BASE}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`
            );
            const channelData = await channelRes.json();
            const channel = channelData.items?.[0];

            if (!channel) return;

            const stats = channel.statistics || {};
            const snippet = channel.snippet || {};

            // Step 2: Get recent/popular videos (100 quota units)
            const searchRes = await fetch(
              `${YT_BASE}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`
            );
            const searchData = await searchRes.json();
            const videoIds = (searchData.items || [])
              .map((item: Record<string, unknown>) => (item.id as Record<string, string>)?.videoId)
              .filter(Boolean)
              .join(",");

            // Step 3: Get video details — views, likes, duration (1 quota unit)
            let videos: Record<string, unknown>[] = [];
            if (videoIds) {
              const videosRes = await fetch(
                `${YT_BASE}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`
              );
              const videosData = await videosRes.json();
              videos = videosData.items || [];
            }

            enrichment.youtube = {
              channelId,
              channelName: snippet.title,
              handle: snippet.customUrl || youtubeChannelId,
              description: snippet.description?.slice(0, 500),
              publishedAt: snippet.publishedAt,
              thumbnailUrl: snippet.thumbnails?.default?.url,
              subscribers: parseInt(stats.subscriberCount || "0", 10),
              totalViews: parseInt(stats.viewCount || "0", 10),
              totalVideos: parseInt(stats.videoCount || "0", 10),
              videos: videos.slice(0, 10).map((v) => {
                const vs = v.statistics as Record<string, string> || {};
                const vSnippet = v.snippet as Record<string, unknown> || {};
                const vContent = v.contentDetails as Record<string, string> || {};
                return {
                  title: vSnippet.title,
                  views: parseInt(vs.viewCount || "0", 10),
                  likes: parseInt(vs.likeCount || "0", 10),
                  comments: parseInt(vs.commentCount || "0", 10),
                  date: vSnippet.publishedAt,
                  duration: vContent.duration,
                  url: `https://www.youtube.com/watch?v=${(v.id as string)}`,
                  thumbnail: (vSnippet.thumbnails as Record<string, Record<string, string>>)?.medium?.url,
                };
              }),
            };
          })()
        );
      }
    }

    await Promise.allSettled(tasks);

    // Save enrichment data to Supabase
    const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
    const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
    const supabase = createClient(supabaseUrl, supabaseKey);

    if (reportId) {
      // Get existing report
      const { data: existing } = await supabase
        .from("marketing_reports")
        .select("report")
        .eq("id", reportId)
        .single();

      if (existing) {
        const updatedReport = {
          ...existing.report,
          channelEnrichment: enrichment,
          enrichedAt: new Date().toISOString(),
        };

        await supabase
          .from("marketing_reports")
          .update({ report: updatedReport, updated_at: new Date().toISOString() })
          .eq("id", reportId);
      }
    }

    return new Response(
      JSON.stringify({
        success: true,
        data: enrichment,
        enrichedAt: new Date().toISOString(),
      }),
      { headers: { ...corsHeaders, "Content-Type": "application/json" } }
    );
  } catch (error) {
    return new Response(
      JSON.stringify({ success: false, error: error.message }),
      { status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" } }
    );
  }
});