o2o-infinith-demo/supabase/functions/enrich-channels/index.ts

505 lines
18 KiB
TypeScript

import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import { normalizeInstagramHandle, normalizeYouTubeChannel } from "../_shared/normalizeHandles.ts";
const corsHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers":
"authorization, x-client-info, apikey, content-type",
};
const APIFY_BASE = "https://api.apify.com/v2";
interface EnrichRequest {
reportId: string;
clinicName: string;
instagramHandle?: string;
instagramHandles?: string[];
youtubeChannelId?: string;
facebookHandle?: string;
address?: string;
}
async function runApifyActor(
actorId: string,
input: Record<string, unknown>,
token: string
): Promise<unknown[]> {
const res = await fetch(
`${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`,
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(input),
}
);
const run = await res.json();
const datasetId = run.data?.defaultDatasetId;
if (!datasetId) return [];
const itemsRes = await fetch(
`${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`
);
return itemsRes.json();
}
Deno.serve(async (req) => {
if (req.method === "OPTIONS") {
return new Response("ok", { headers: corsHeaders });
}
try {
const { reportId, clinicName, instagramHandle, instagramHandles, youtubeChannelId, facebookHandle, address } =
(await req.json()) as EnrichRequest;
// Build list of IG handles to try: explicit array > single handle > empty
const igHandlesToTry: string[] = instagramHandles?.length
? instagramHandles
: instagramHandle ? [instagramHandle] : [];
const APIFY_TOKEN = Deno.env.get("APIFY_API_TOKEN");
if (!APIFY_TOKEN) throw new Error("APIFY_API_TOKEN not configured");
const enrichment: Record<string, unknown> = {};
// Run all enrichment tasks in parallel
const tasks = [];
// 1. Instagram Profiles — multi-account with fallback
if (igHandlesToTry.length > 0) {
tasks.push(
(async () => {
const accounts: Record<string, unknown>[] = [];
const triedHandles = new Set<string>();
for (const rawHandle of igHandlesToTry) {
const baseHandle = normalizeInstagramHandle(rawHandle);
if (!baseHandle || triedHandles.has(baseHandle)) continue;
// Try the handle + common clinic variants
const candidates = [
baseHandle,
`${baseHandle}_ps`,
`${baseHandle}.ps`,
`${baseHandle}_clinic`,
`${baseHandle}_official`,
];
for (const handle of candidates) {
if (triedHandles.has(handle)) continue;
triedHandles.add(handle);
const items = await runApifyActor(
"apify~instagram-profile-scraper",
{ usernames: [handle], resultsLimit: 12 },
APIFY_TOKEN
);
const profile = (items as Record<string, unknown>[])[0];
if (profile && !profile.error) {
const followers = (profile.followersCount as number) || 0;
if (followers >= 100 || ((profile.isBusinessAccount as boolean) && (profile.postsCount as number) > 10)) {
accounts.push({
username: profile.username,
followers: profile.followersCount,
following: profile.followsCount,
posts: profile.postsCount,
bio: profile.biography,
isBusinessAccount: profile.isBusinessAccount,
externalUrl: profile.externalUrl,
latestPosts: ((profile.latestPosts as Record<string, unknown>[]) || [])
.slice(0, 6)
.map((p) => ({
type: p.type,
likes: p.likesCount,
comments: p.commentsCount,
caption: (p.caption as string || "").slice(0, 200),
timestamp: p.timestamp,
})),
});
break; // Found valid for this base handle, move to next
}
}
}
}
// Store as array for multi-account support
if (accounts.length > 0) {
enrichment.instagramAccounts = accounts;
// Keep backwards compat: first account as enrichment.instagram
enrichment.instagram = accounts[0];
}
})()
);
}
// 2. Google Maps / Place Reviews
if (clinicName || address) {
tasks.push(
(async () => {
// Try multiple search queries for better hit rate
const queries = [
`${clinicName} 성형외과`,
clinicName,
`${clinicName} ${address || "강남"}`,
];
let items: unknown[] = [];
for (const query of queries) {
items = await runApifyActor(
"compass~crawler-google-places",
{
searchStringsArray: [query],
maxCrawledPlacesPerSearch: 3,
language: "ko",
maxReviews: 10,
},
APIFY_TOKEN
);
if ((items as Record<string, unknown>[]).length > 0) break;
}
const place = (items as Record<string, unknown>[])[0];
if (place) {
enrichment.googleMaps = {
name: place.title,
rating: place.totalScore,
reviewCount: place.reviewsCount,
address: place.address,
phone: place.phone,
website: place.website,
category: place.categoryName,
openingHours: place.openingHours,
topReviews: ((place.reviews as Record<string, unknown>[]) || [])
.slice(0, 10)
.map((r) => ({
stars: r.stars,
text: (r.text as string || "").slice(0, 200),
publishedAtDate: r.publishedAtDate,
})),
};
}
})()
);
}
// 3. 강남언니 (Gangnam Unni) — Firecrawl scraping
if (clinicName) {
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
if (FIRECRAWL_API_KEY) {
tasks.push(
(async () => {
// Step 1: Search for the clinic's gangnamunni page
const searchRes = await fetch("https://api.firecrawl.dev/v1/search", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
query: `${clinicName} site:gangnamunni.com`,
limit: 3,
}),
});
const searchData = await searchRes.json();
const hospitalUrl = (searchData.data || [])
.map((r: Record<string, string>) => r.url)
.find((u: string) => u?.includes("gangnamunni.com/hospitals/"));
if (!hospitalUrl) return;
// Step 2: Scrape the hospital page with structured JSON extraction
const scrapeRes = await fetch("https://api.firecrawl.dev/v1/scrape", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
url: hospitalUrl,
formats: ["json"],
jsonOptions: {
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
schema: {
type: "object",
properties: {
hospitalName: { type: "string" },
rating: { type: "number" },
totalReviews: { type: "number" },
doctors: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
rating: { type: "number" },
reviews: { type: "number" },
specialty: { type: "string" },
},
},
},
procedures: { type: "array", items: { type: "string" } },
address: { type: "string" },
badges: { type: "array", items: { type: "string" } },
},
},
},
waitFor: 5000,
}),
});
const scrapeData = await scrapeRes.json();
const hospital = scrapeData.data?.json;
if (hospital?.hospitalName) {
enrichment.gangnamUnni = {
name: hospital.hospitalName,
rating: hospital.rating,
ratingScale: "/10",
totalReviews: hospital.totalReviews,
doctors: (hospital.doctors || []).slice(0, 10),
procedures: hospital.procedures || [],
address: hospital.address,
badges: hospital.badges || [],
sourceUrl: hospitalUrl,
};
}
})()
);
}
}
// 4. Naver Blog + Place Search (네이버 검색 API)
if (clinicName) {
const NAVER_CLIENT_ID = Deno.env.get("NAVER_CLIENT_ID");
const NAVER_CLIENT_SECRET = Deno.env.get("NAVER_CLIENT_SECRET");
if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET) {
const naverHeaders = {
"X-Naver-Client-Id": NAVER_CLIENT_ID,
"X-Naver-Client-Secret": NAVER_CLIENT_SECRET,
};
// 4a. Blog search — "{clinicName} 후기"
tasks.push(
(async () => {
const query = encodeURIComponent(`${clinicName} 후기`);
const res = await fetch(
`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`,
{ headers: naverHeaders }
);
if (!res.ok) return;
const data = await res.json();
enrichment.naverBlog = {
totalResults: data.total || 0,
searchQuery: `${clinicName} 후기`,
posts: (data.items || []).slice(0, 10).map((item: Record<string, string>) => ({
title: (item.title || "").replace(/<[^>]*>/g, ""),
description: (item.description || "").replace(/<[^>]*>/g, "").slice(0, 200),
link: item.link,
bloggerName: item.bloggername,
postDate: item.postdate,
})),
};
})()
);
// 4b. Local search — Naver Place
tasks.push(
(async () => {
const query = encodeURIComponent(clinicName);
const res = await fetch(
`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`,
{ headers: naverHeaders }
);
if (!res.ok) return;
const data = await res.json();
const place = (data.items || [])[0];
if (place) {
enrichment.naverPlace = {
name: (place.title || "").replace(/<[^>]*>/g, ""),
category: place.category,
address: place.roadAddress || place.address,
telephone: place.telephone,
link: place.link,
mapx: place.mapx,
mapy: place.mapy,
};
}
})()
);
}
}
// 5. Facebook Page (using Apify)
if (facebookHandle) {
tasks.push(
(async () => {
// Normalize: strip URL parts to get page name, then build full URL
let fbUrl = facebookHandle;
if (!fbUrl.startsWith("http")) {
fbUrl = fbUrl.replace(/^@/, "");
fbUrl = `https://www.facebook.com/${fbUrl}`;
}
const items = await runApifyActor(
"apify~facebook-pages-scraper",
{ startUrls: [{ url: fbUrl }] },
APIFY_TOKEN
);
const page = (items as Record<string, unknown>[])[0];
if (page && page.title) {
enrichment.facebook = {
pageName: page.title,
pageUrl: page.pageUrl || fbUrl,
followers: page.followers,
likes: page.likes,
categories: page.categories,
email: page.email,
phone: page.phone,
website: page.website,
address: page.address,
intro: page.intro,
rating: page.rating,
profilePictureUrl: page.profilePictureUrl,
};
}
})()
);
}
// 6. YouTube Channel (using YouTube Data API v3)
if (youtubeChannelId) {
const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
if (YOUTUBE_API_KEY) {
tasks.push(
(async () => {
const YT_BASE = "https://www.googleapis.com/youtube/v3";
// Normalize YouTube URL/handle to structured identifier
const ytNormalized = normalizeYouTubeChannel(youtubeChannelId);
if (!ytNormalized) return;
let channelId = "";
if (ytNormalized.type === "channelId") {
channelId = ytNormalized.value;
} else {
// Try forHandle first, then forUsername as fallback
for (const param of ["forHandle", "forUsername"]) {
const lookupRes = await fetch(
`${YT_BASE}/channels?part=id&${param}=${ytNormalized.value}&key=${YOUTUBE_API_KEY}`
);
const lookupData = await lookupRes.json();
channelId = lookupData.items?.[0]?.id || "";
if (channelId) break;
}
}
if (!channelId) return;
// Step 1: Get channel statistics & snippet (1 quota unit)
const channelRes = await fetch(
`${YT_BASE}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`
);
const channelData = await channelRes.json();
const channel = channelData.items?.[0];
if (!channel) return;
const stats = channel.statistics || {};
const snippet = channel.snippet || {};
// Step 2: Get recent/popular videos (100 quota units)
const searchRes = await fetch(
`${YT_BASE}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`
);
const searchData = await searchRes.json();
const videoIds = (searchData.items || [])
.map((item: Record<string, unknown>) => (item.id as Record<string, string>)?.videoId)
.filter(Boolean)
.join(",");
// Step 3: Get video details — views, likes, duration (1 quota unit)
let videos: Record<string, unknown>[] = [];
if (videoIds) {
const videosRes = await fetch(
`${YT_BASE}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`
);
const videosData = await videosRes.json();
videos = videosData.items || [];
}
enrichment.youtube = {
channelId,
channelName: snippet.title,
handle: snippet.customUrl || youtubeChannelId,
description: snippet.description?.slice(0, 500),
publishedAt: snippet.publishedAt,
thumbnailUrl: snippet.thumbnails?.default?.url,
subscribers: parseInt(stats.subscriberCount || "0", 10),
totalViews: parseInt(stats.viewCount || "0", 10),
totalVideos: parseInt(stats.videoCount || "0", 10),
videos: videos.slice(0, 10).map((v) => {
const vs = v.statistics as Record<string, string> || {};
const vSnippet = v.snippet as Record<string, unknown> || {};
const vContent = v.contentDetails as Record<string, string> || {};
return {
title: vSnippet.title,
views: parseInt(vs.viewCount || "0", 10),
likes: parseInt(vs.likeCount || "0", 10),
comments: parseInt(vs.commentCount || "0", 10),
date: vSnippet.publishedAt,
duration: vContent.duration,
url: `https://www.youtube.com/watch?v=${(v.id as string)}`,
thumbnail: (vSnippet.thumbnails as Record<string, Record<string, string>>)?.medium?.url,
};
}),
};
})()
);
}
}
await Promise.allSettled(tasks);
// Save enrichment data to Supabase
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
const supabase = createClient(supabaseUrl, supabaseKey);
if (reportId) {
// Get existing report
const { data: existing } = await supabase
.from("marketing_reports")
.select("report")
.eq("id", reportId)
.single();
if (existing) {
const updatedReport = {
...existing.report,
channelEnrichment: enrichment,
enrichedAt: new Date().toISOString(),
};
await supabase
.from("marketing_reports")
.update({ report: updatedReport, updated_at: new Date().toISOString() })
.eq("id", reportId);
}
}
return new Response(
JSON.stringify({
success: true,
data: enrichment,
enrichedAt: new Date().toISOString(),
}),
{ headers: { ...corsHeaders, "Content-Type": "application/json" } }
);
} catch (error) {
return new Response(
JSON.stringify({ success: false, error: error.message }),
{ status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" } }
);
}
});