660 lines
30 KiB
TypeScript
660 lines
30 KiB
TypeScript
import "@supabase/functions-js/edge-runtime.d.ts";
|
|
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
|
import type { VerifiedChannels } from "../_shared/verifyHandles.ts";
|
|
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
|
|
import { captureAllScreenshots, runVisionAnalysis, screenshotErrors, type ScreenshotResult } from "../_shared/visionAnalysis.ts";
|
|
import { fetchWithRetry, fetchJsonWithRetry, wrapChannelTask, type ChannelTaskResult } from "../_shared/retry.ts";
|
|
import { searchGooglePlace } from "../_shared/googlePlaces.ts";
|
|
import { extractFoundingYear, validateFoundingYearExtractor } from "../_shared/foundingYearExtractor.ts";
|
|
import { validateClassifier } from "../_shared/urlClassifier.ts";
|
|
import { validateDataQuality } from "../_shared/dataQuality.ts";
|
|
|
|
// ─── Harness Self-Tests (cold-start) ───
|
|
const harnessResults = {
|
|
classifier: validateClassifier(),
|
|
foundingYear: validateFoundingYearExtractor(),
|
|
dataQuality: validateDataQuality(),
|
|
};
|
|
for (const [name, result] of Object.entries(harnessResults)) {
|
|
if (!result.pass) {
|
|
console.warn(`[harness] ${name} self-test FAILED:`, result.failures);
|
|
}
|
|
}
|
|
|
|
const corsHeaders = {
|
|
"Access-Control-Allow-Origin": "*",
|
|
"Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
|
|
};
|
|
|
|
const APIFY_BASE = "https://api.apify.com/v2";
|
|
|
|
interface CollectRequest {
|
|
reportId: string;
|
|
clinicId?: string; // V3: clinic UUID
|
|
runId?: string; // V3: analysis_run UUID
|
|
}
|
|
|
|
async function runApifyActor(actorId: string, input: Record<string, unknown>, token: string): Promise<unknown[]> {
|
|
const res = await fetchWithRetry(
|
|
`${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`,
|
|
{ method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(input) },
|
|
{ maxRetries: 1, timeoutMs: 130000, label: `apify:${actorId.split('~')[1] || actorId}` },
|
|
);
|
|
if (!res.ok) throw new Error(`Apify ${actorId} returned ${res.status}`);
|
|
const run = await res.json();
|
|
const datasetId = run.data?.defaultDatasetId;
|
|
if (!datasetId) throw new Error(`Apify ${actorId}: no dataset returned`);
|
|
const itemsRes = await fetchWithRetry(
|
|
`${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`,
|
|
undefined,
|
|
{ maxRetries: 1, timeoutMs: 30000, label: `apify-dataset:${actorId.split('~')[1] || actorId}` },
|
|
);
|
|
return itemsRes.json();
|
|
}
|
|
|
|
/**
|
|
* Phase 2: Collect Channel Data
|
|
*
|
|
* Uses verified handles from Phase 1 (stored in DB) to collect ALL raw data
|
|
* from each channel in parallel. Also runs market analysis via Perplexity.
|
|
*/
|
|
Deno.serve(async (req) => {
|
|
if (req.method === "OPTIONS") {
|
|
return new Response("ok", { headers: corsHeaders });
|
|
}
|
|
|
|
try {
|
|
const { reportId, clinicId: inputClinicId, runId: inputRunId } = (await req.json()) as CollectRequest;
|
|
if (!reportId) throw new Error("reportId is required");
|
|
|
|
// Read Phase 1 results from DB
|
|
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
|
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
|
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
|
|
const { data: row, error: fetchError } = await supabase
|
|
.from("marketing_reports")
|
|
.select("*")
|
|
.eq("id", reportId)
|
|
.single();
|
|
|
|
if (fetchError || !row) throw new Error(`Report not found: ${fetchError?.message}`);
|
|
|
|
const verified = row.verified_channels as VerifiedChannels;
|
|
const clinicName = row.clinic_name || "";
|
|
const address = row.scrape_data?.clinic?.address || "";
|
|
const services: string[] = row.scrape_data?.clinic?.services || [];
|
|
|
|
await supabase.from("marketing_reports").update({ status: "collecting" }).eq("id", reportId);
|
|
|
|
const APIFY_TOKEN = Deno.env.get("APIFY_API_TOKEN") || "";
|
|
const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY") || "";
|
|
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY") || "";
|
|
const PERPLEXITY_API_KEY = Deno.env.get("PERPLEXITY_API_KEY") || "";
|
|
const NAVER_CLIENT_ID = Deno.env.get("NAVER_CLIENT_ID") || "";
|
|
const NAVER_CLIENT_SECRET = Deno.env.get("NAVER_CLIENT_SECRET") || "";
|
|
const GOOGLE_PLACES_API_KEY = Deno.env.get("GOOGLE_PLACES_API_KEY") || "";
|
|
|
|
const channelData: Record<string, unknown> = {};
|
|
const analysisData: Record<string, unknown> = {};
|
|
const channelTasks: Promise<ChannelTaskResult>[] = [];
|
|
|
|
// ─── 1. Instagram (multi-account) — try ALL candidates including unverified/unverifiable ───
|
|
const igCandidates = (verified.instagram || []).filter((v: Record<string, unknown>) => v.handle && v.verified !== false);
|
|
if (APIFY_TOKEN && igCandidates.length > 0) {
|
|
channelTasks.push(wrapChannelTask("instagram", async () => {
|
|
const accounts: Record<string, unknown>[] = [];
|
|
for (const ig of igCandidates) {
|
|
const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN);
|
|
const profile = (items as Record<string, unknown>[])[0];
|
|
if (profile && !profile.error) {
|
|
accounts.push({
|
|
username: profile.username,
|
|
followers: profile.followersCount,
|
|
following: profile.followsCount,
|
|
posts: profile.postsCount,
|
|
bio: profile.biography,
|
|
isBusinessAccount: profile.isBusinessAccount,
|
|
externalUrl: profile.externalUrl,
|
|
igtvVideoCount: profile.igtvVideoCount,
|
|
latestPosts: ((profile.latestPosts as Record<string, unknown>[]) || []).slice(0, 12).map(p => ({
|
|
type: p.type, likes: p.likesCount, comments: p.commentsCount,
|
|
caption: p.caption, timestamp: p.timestamp,
|
|
})),
|
|
});
|
|
}
|
|
}
|
|
if (accounts.length > 0) {
|
|
channelData.instagramAccounts = accounts;
|
|
channelData.instagram = accounts[0];
|
|
} else {
|
|
throw new Error("No Instagram profiles found via Apify");
|
|
}
|
|
}));
|
|
}
|
|
|
|
// ─── 1b. Instagram Posts (최근 20개 포스트 상세) ───
|
|
const igPrimaryHandle = igCandidates[0]?.handle as string | undefined;
|
|
if (APIFY_TOKEN && igPrimaryHandle) {
|
|
channelTasks.push(wrapChannelTask("instagramPosts", async () => {
|
|
const handle = igPrimaryHandle.replace(/^@/, '');
|
|
const items = await runApifyActor(
|
|
"apify~instagram-post-scraper",
|
|
{ directUrls: [`https://www.instagram.com/${handle}/`], resultsLimit: 20 },
|
|
APIFY_TOKEN,
|
|
);
|
|
const posts = (items as Record<string, unknown>[]).map(p => ({
|
|
id: p.id,
|
|
type: p.type,
|
|
shortCode: p.shortCode,
|
|
url: p.url,
|
|
caption: ((p.caption as string) || '').slice(0, 500),
|
|
hashtags: p.hashtags || [],
|
|
mentions: p.mentions || [],
|
|
likesCount: (p.likesCount as number) || 0,
|
|
commentsCount: (p.commentsCount as number) || 0,
|
|
timestamp: p.timestamp,
|
|
displayUrl: p.displayUrl,
|
|
}));
|
|
const totalLikes = posts.reduce((sum, p) => sum + p.likesCount, 0);
|
|
const totalComments = posts.reduce((sum, p) => sum + p.commentsCount, 0);
|
|
channelData.instagramPosts = {
|
|
posts,
|
|
totalPosts: posts.length,
|
|
avgLikes: posts.length > 0 ? Math.round(totalLikes / posts.length) : 0,
|
|
avgComments: posts.length > 0 ? Math.round(totalComments / posts.length) : 0,
|
|
};
|
|
}));
|
|
}
|
|
|
|
// ─── 1c. Instagram Reels (최근 15개 릴스 상세) ───
|
|
if (APIFY_TOKEN && igPrimaryHandle) {
|
|
channelTasks.push(wrapChannelTask("instagramReels", async () => {
|
|
const handle = igPrimaryHandle.replace(/^@/, '');
|
|
const items = await runApifyActor(
|
|
"apify~instagram-reel-scraper",
|
|
{ directUrls: [`https://www.instagram.com/${handle}/reels/`], resultsLimit: 15 },
|
|
APIFY_TOKEN,
|
|
);
|
|
const reels = (items as Record<string, unknown>[]).map(r => ({
|
|
id: r.id,
|
|
shortCode: r.shortCode,
|
|
url: r.url,
|
|
caption: ((r.caption as string) || '').slice(0, 500),
|
|
hashtags: r.hashtags || [],
|
|
likesCount: (r.likesCount as number) || 0,
|
|
commentsCount: (r.commentsCount as number) || 0,
|
|
videoViewCount: (r.videoViewCount as number) || 0,
|
|
videoPlayCount: (r.videoPlayCount as number) || 0,
|
|
videoDuration: (r.videoDuration as number) || 0,
|
|
timestamp: r.timestamp,
|
|
musicInfo: r.musicInfo || null,
|
|
}));
|
|
const totalViews = reels.reduce((sum, r) => sum + r.videoViewCount, 0);
|
|
const totalPlays = reels.reduce((sum, r) => sum + r.videoPlayCount, 0);
|
|
channelData.instagramReels = {
|
|
reels,
|
|
totalReels: reels.length,
|
|
avgViews: reels.length > 0 ? Math.round(totalViews / reels.length) : 0,
|
|
avgPlays: reels.length > 0 ? Math.round(totalPlays / reels.length) : 0,
|
|
};
|
|
}));
|
|
}
|
|
|
|
// ─── 2. YouTube ───
|
|
const ytVerified = verified.youtube as Record<string, unknown> | null;
|
|
if (YOUTUBE_API_KEY && (ytVerified?.verified === true || ytVerified?.verified === "unverifiable")) {
|
|
channelTasks.push(wrapChannelTask("youtube", async () => {
|
|
const YT = "https://www.googleapis.com/youtube/v3";
|
|
let channelId = (ytVerified?.channelId as string) || "";
|
|
|
|
// If no channelId, try to resolve from handle
|
|
if (!channelId && ytVerified?.handle) {
|
|
const h = (ytVerified.handle as string).replace(/^@/, '');
|
|
if (h.startsWith('UC')) {
|
|
channelId = h;
|
|
} else {
|
|
for (const param of ['forHandle', 'forUsername']) {
|
|
const lookupRes = await fetch(`${YT}/channels?part=id&${param}=${h}&key=${YOUTUBE_API_KEY}`);
|
|
const lookupData = await lookupRes.json();
|
|
channelId = lookupData.items?.[0]?.id || '';
|
|
if (channelId) break;
|
|
}
|
|
}
|
|
}
|
|
if (!channelId) throw new Error("Could not resolve YouTube channel ID");
|
|
|
|
const chRes = await fetchWithRetry(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-api" });
|
|
const chData = await chRes.json();
|
|
const channel = chData.items?.[0];
|
|
if (!channel) throw new Error("YouTube channel not found in API response");
|
|
|
|
const stats = channel.statistics || {};
|
|
const snippet = channel.snippet || {};
|
|
|
|
// Popular videos
|
|
const searchRes = await fetchWithRetry(`${YT}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-search" });
|
|
const searchData = await searchRes.json();
|
|
const videoIds = (searchData.items || []).map((i: Record<string, unknown>) => (i.id as Record<string, string>)?.videoId).filter(Boolean).join(",");
|
|
|
|
let videos: Record<string, unknown>[] = [];
|
|
if (videoIds) {
|
|
const vRes = await fetchWithRetry(`${YT}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-videos" });
|
|
const vData = await vRes.json();
|
|
videos = vData.items || [];
|
|
}
|
|
|
|
channelData.youtube = {
|
|
channelId, channelName: snippet.title, handle: snippet.customUrl,
|
|
description: snippet.description, publishedAt: snippet.publishedAt,
|
|
thumbnailUrl: snippet.thumbnails?.default?.url,
|
|
subscribers: parseInt(stats.subscriberCount || "0", 10),
|
|
totalViews: parseInt(stats.viewCount || "0", 10),
|
|
totalVideos: parseInt(stats.videoCount || "0", 10),
|
|
videos: videos.slice(0, 10).map(v => {
|
|
const vs = v.statistics as Record<string, string> || {};
|
|
const vSnip = v.snippet as Record<string, unknown> || {};
|
|
const vCon = v.contentDetails as Record<string, string> || {};
|
|
return {
|
|
title: vSnip.title, views: parseInt(vs.viewCount || "0", 10),
|
|
likes: parseInt(vs.likeCount || "0", 10), comments: parseInt(vs.commentCount || "0", 10),
|
|
date: vSnip.publishedAt, duration: vCon.duration,
|
|
url: `https://www.youtube.com/watch?v=${v.id}`,
|
|
thumbnail: (vSnip.thumbnails as Record<string, Record<string, string>>)?.medium?.url,
|
|
};
|
|
}),
|
|
};
|
|
}));
|
|
}
|
|
|
|
// ─── 3. Facebook ───
|
|
const fbVerified = verified.facebook as Record<string, unknown> | null;
|
|
if (APIFY_TOKEN && (fbVerified?.verified === true || fbVerified?.verified === "unverifiable")) {
|
|
channelTasks.push(wrapChannelTask("facebook", async () => {
|
|
const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`;
|
|
const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN);
|
|
const page = (items as Record<string, unknown>[])[0];
|
|
if (page?.title) {
|
|
channelData.facebook = {
|
|
pageName: page.title, pageUrl: page.pageUrl || fbUrl,
|
|
followers: page.followers, likes: page.likes, categories: page.categories,
|
|
email: page.email, phone: page.phone, website: page.website,
|
|
address: page.address, intro: page.intro, rating: page.rating,
|
|
profilePictureUrl: page.profilePictureUrl,
|
|
};
|
|
} else {
|
|
throw new Error("Facebook page scraper returned no data");
|
|
}
|
|
}));
|
|
}
|
|
|
|
// ─── 4. 강남언니 ───
|
|
const guVerified = verified.gangnamUnni as Record<string, unknown> | null;
|
|
if (FIRECRAWL_API_KEY && guVerified?.verified && guVerified.url) {
|
|
channelTasks.push(wrapChannelTask("gangnamUnni", async () => {
|
|
const scrapeRes = await fetchWithRetry("https://api.firecrawl.dev/v1/scrape", {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` },
|
|
body: JSON.stringify({
|
|
url: guVerified!.url as string,
|
|
formats: ["json"],
|
|
jsonOptions: {
|
|
prompt: "Extract: hospital name, overall rating (강남언니 rating is always out of 10, NOT out of 5), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
hospitalName: { type: "string" }, rating: { type: "number" }, totalReviews: { type: "number" },
|
|
doctors: { type: "array", items: { type: "object", properties: { name: { type: "string" }, rating: { type: "number" }, reviews: { type: "number" }, specialty: { type: "string" } } } },
|
|
procedures: { type: "array", items: { type: "string" } },
|
|
address: { type: "string" }, badges: { type: "array", items: { type: "string" } },
|
|
},
|
|
},
|
|
},
|
|
waitFor: 5000,
|
|
}),
|
|
}, { label: "firecrawl-gangnamunni", timeoutMs: 60000 });
|
|
if (!scrapeRes.ok) throw new Error(`Firecrawl 강남언니 scrape failed: ${scrapeRes.status}`);
|
|
const data = await scrapeRes.json();
|
|
const hospital = data.data?.json;
|
|
if (hospital?.hospitalName) {
|
|
channelData.gangnamUnni = {
|
|
name: hospital.hospitalName,
|
|
rawRating: hospital.rating,
|
|
// 강남언니 rating is always /10 (enforced in Firecrawl prompt) — trust the value directly.
|
|
// Do NOT multiply by 2: a score of 4.8 means 4.8/10, not 9.6/10.
|
|
rating: typeof hospital.rating === 'number' && hospital.rating > 0 ? hospital.rating : null,
|
|
ratingScale: "/10",
|
|
totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
|
|
procedures: hospital.procedures || [], address: hospital.address,
|
|
badges: hospital.badges || [], sourceUrl: guVerified!.url as string,
|
|
};
|
|
} else {
|
|
throw new Error("강남언니 scrape returned no hospital data");
|
|
}
|
|
}));
|
|
}
|
|
|
|
// ─── 5. Naver Blog + Place ───
|
|
if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET && clinicName) {
|
|
const naverHeaders = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET };
|
|
|
|
channelTasks.push(wrapChannelTask("naverBlog", async () => {
|
|
// Get verified Naver Blog handle from Phase 1 for official blog URL
|
|
const nbVerified = verified.naverBlog as Record<string, unknown> | null;
|
|
const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null;
|
|
|
|
const query = encodeURIComponent(`${clinicName} 후기`);
|
|
const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders }, { label: "naver-blog" });
|
|
if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`);
|
|
const data = await res.json();
|
|
channelData.naverBlog = {
|
|
totalResults: data.total || 0, searchQuery: `${clinicName} 후기`,
|
|
// Official blog URL from Phase 1 verified handle
|
|
officialBlogUrl: officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null,
|
|
officialBlogHandle: officialBlogHandle,
|
|
// Blog mentions (third-party posts, NOT the official blog)
|
|
posts: (data.items || []).slice(0, 10).map((item: Record<string, string>) => ({
|
|
title: (item.title || "").replace(/<[^>]*>/g, ""),
|
|
description: (item.description || "").replace(/<[^>]*>/g, ""),
|
|
link: item.link, bloggerName: item.bloggername, postDate: item.postdate,
|
|
})),
|
|
};
|
|
}));
|
|
|
|
channelTasks.push(wrapChannelTask("naverPlace", async () => {
|
|
// Try multiple queries to find the correct place (avoid same-name different clinics)
|
|
const queries = [
|
|
`${clinicName} 성형외과`,
|
|
`${clinicName} 성형`,
|
|
clinicName,
|
|
];
|
|
for (const q of queries) {
|
|
const query = encodeURIComponent(q);
|
|
const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" });
|
|
if (!res.ok) continue;
|
|
const data = await res.json();
|
|
// Find the best match: prefer category containing 성형 or 피부
|
|
const items = (data.items || []) as Record<string, string>[];
|
|
const match = items.find(i =>
|
|
(i.category || '').includes('성형') || (i.category || '').includes('피부')
|
|
) || items.find(i => {
|
|
const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase();
|
|
return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase());
|
|
}) || null;
|
|
|
|
if (match) {
|
|
channelData.naverPlace = {
|
|
name: (match.title || "").replace(/<[^>]*>/g, ""),
|
|
category: match.category, address: match.roadAddress || match.address,
|
|
telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy,
|
|
};
|
|
break;
|
|
}
|
|
}
|
|
}));
|
|
}
|
|
|
|
// ─── 6. Google Maps (Google Places API New) ───
|
|
if (GOOGLE_PLACES_API_KEY && clinicName) {
|
|
channelTasks.push(wrapChannelTask("googleMaps", async () => {
|
|
const place = await searchGooglePlace(clinicName, address || undefined, GOOGLE_PLACES_API_KEY);
|
|
if (place) {
|
|
channelData.googleMaps = {
|
|
name: place.name, rating: place.rating, reviewCount: place.reviewCount,
|
|
address: place.address, phone: place.phone,
|
|
clinicWebsite: place.clinicWebsite,
|
|
mapsUrl: place.mapsUrl,
|
|
placeId: place.placeId,
|
|
category: place.category, openingHours: place.openingHours,
|
|
topReviews: place.topReviews,
|
|
};
|
|
} else {
|
|
throw new Error("Google Maps: no matching place found");
|
|
}
|
|
}));
|
|
}
|
|
|
|
// ─── 7. Market Analysis (Perplexity) ───
|
|
if (PERPLEXITY_API_KEY && services.length > 0) {
|
|
channelTasks.push(wrapChannelTask("marketAnalysis", async () => {
|
|
const queries = [
|
|
{ id: "competitors", prompt: `${address || "강남"} 근처 ${services.slice(0, 3).join(", ")} 전문 성형외과/피부과 경쟁 병원 5곳을 분석해줘. 각 병원의 이름, 주요 시술, 온라인 평판, 마케팅 채널을 JSON 형식으로 제공해줘.` },
|
|
{ id: "keywords", prompt: `한국 ${services.slice(0, 3).join(", ")} 관련 검색 키워드 트렌드. 네이버와 구글에서 월간 검색량이 높은 키워드 20개, 경쟁 강도, 추천 롱테일 키워드를 JSON 형식으로 제공해줘.` },
|
|
{ id: "market", prompt: `한국 ${services[0] || "성형외과"} 시장 트렌드 2025-2026. 시장 규모, 성장률, 주요 트렌드, 마케팅 채널별 효과를 JSON 형식으로 제공해줘.` },
|
|
{ id: "targetAudience", prompt: `${clinicName}의 잠재 고객 분석. 연령대별, 성별, 관심 시술, 정보 탐색 채널, 의사결정 요인을 JSON 형식으로 제공해줘.` },
|
|
];
|
|
|
|
const results = await Promise.allSettled(queries.map(async q => {
|
|
const res = await fetchWithRetry("https://api.perplexity.ai/chat/completions", {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
|
|
body: JSON.stringify({
|
|
model: PERPLEXITY_MODEL, messages: [
|
|
{ role: "system", content: "You are a Korean medical marketing analyst. Always respond in Korean. Provide data in valid JSON format." },
|
|
{ role: "user", content: q.prompt },
|
|
], temperature: 0.3,
|
|
}),
|
|
}, { label: `perplexity:${q.id}`, timeoutMs: 60000 });
|
|
const data = await res.json();
|
|
return { id: q.id, content: data.choices?.[0]?.message?.content || "", citations: data.citations || [] };
|
|
}));
|
|
|
|
let successCount = 0;
|
|
for (const r of results) {
|
|
if (r.status === "fulfilled") {
|
|
const { id, content, citations } = r.value;
|
|
let parsed = content;
|
|
const jsonMatch = content.match(/```json\n?([\s\S]*?)```/);
|
|
if (jsonMatch) { try { parsed = JSON.parse(jsonMatch[1]); } catch {} }
|
|
analysisData[id] = { data: parsed, citations };
|
|
successCount++;
|
|
}
|
|
}
|
|
if (successCount === 0) throw new Error("All Perplexity queries failed");
|
|
}));
|
|
}
|
|
|
|
// ─── 8. Vision Analysis: Screenshots + Gemini Vision ───
|
|
const GEMINI_API_KEY = Deno.env.get("GEMINI_API_KEY") || "";
|
|
let screenshots: ScreenshotResult[] = [];
|
|
|
|
if (FIRECRAWL_API_KEY) {
|
|
const mainUrl = row.url || "";
|
|
const siteMap: string[] = row.scrape_data?.siteMap || [];
|
|
|
|
channelTasks.push(wrapChannelTask("vision", async () => {
|
|
// Capture screenshots of relevant pages + social channel landings
|
|
screenshots = await captureAllScreenshots(mainUrl, siteMap, verified, FIRECRAWL_API_KEY);
|
|
|
|
// Run Gemini Vision on captured screenshots
|
|
if (GEMINI_API_KEY && screenshots.length > 0) {
|
|
const vision = await runVisionAnalysis(screenshots, GEMINI_API_KEY);
|
|
channelData.visionAnalysis = vision.merged;
|
|
channelData.visionPerPage = vision.perPage;
|
|
}
|
|
|
|
// Store screenshots metadata (NOT base64 — use the GCS URL from Firecrawl)
|
|
channelData.screenshots = screenshots.map(ss => ({
|
|
id: ss.id,
|
|
url: ss.url, // GCS signed URL (valid ~7 days)
|
|
channel: ss.channel,
|
|
capturedAt: ss.capturedAt,
|
|
caption: ss.caption,
|
|
sourceUrl: ss.sourceUrl,
|
|
}));
|
|
|
|
if (screenshots.length === 0) {
|
|
const debugInfo = screenshotErrors.length > 0
|
|
? screenshotErrors.join(" | ")
|
|
: "No errors recorded — check FIRECRAWL_API_KEY";
|
|
throw new Error(`No screenshots captured: ${debugInfo}`);
|
|
}
|
|
}));
|
|
}
|
|
|
|
// ─── 9. Founding Year Text Fallback (Harness 2) ───
|
|
// If Vision didn't find foundingYear, try regex extraction from scraped text
|
|
if (!channelData.visionAnalysis?.foundingYear) {
|
|
const htmlText = row.scrape_data?.markdown || row.scrape_data?.text || "";
|
|
if (htmlText) {
|
|
const textYear = extractFoundingYear(htmlText);
|
|
if (textYear) {
|
|
channelData.visionAnalysis = channelData.visionAnalysis || {};
|
|
channelData.visionAnalysis.foundingYear = String(textYear);
|
|
console.log(`[harness] Founding year extracted from text fallback: ${textYear}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ─── Execute all channel tasks ───
|
|
const taskResults = await Promise.all(channelTasks);
|
|
|
|
// ─── Build channelErrors from task results ───
|
|
const channelErrors: Record<string, { error: string; durationMs: number }> = {};
|
|
let failedCount = 0;
|
|
let successCount = 0;
|
|
for (const result of taskResults) {
|
|
if (result.success) {
|
|
successCount++;
|
|
} else {
|
|
failedCount++;
|
|
channelErrors[result.channel] = {
|
|
error: result.error || "Unknown error",
|
|
durationMs: result.durationMs,
|
|
};
|
|
}
|
|
}
|
|
|
|
const totalTasks = taskResults.length;
|
|
const isPartial = failedCount > 0 && successCount > 0;
|
|
const isFullFailure = failedCount > 0 && successCount === 0;
|
|
const collectionStatus = isFullFailure ? "collection_failed" : isPartial ? "partial" : "collected";
|
|
|
|
console.log(`[collect] ${successCount}/${totalTasks} tasks succeeded. Status: ${collectionStatus}`);
|
|
if (failedCount > 0) {
|
|
console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors));
|
|
}
|
|
|
|
// ─── UNCONDITIONAL Legacy Save: always persist whatever we have ───
|
|
await supabase.from("marketing_reports").update({
|
|
channel_data: channelData,
|
|
analysis_data: { clinicName, services, address, analysis: analysisData, analyzedAt: new Date().toISOString() },
|
|
channel_errors: channelErrors,
|
|
status: collectionStatus,
|
|
updated_at: new Date().toISOString(),
|
|
}).eq("id", reportId);
|
|
|
|
// ─── V3: channel_snapshots + screenshots + analysis_runs ───
|
|
const clinicId = inputClinicId || null;
|
|
const runId = inputRunId || null;
|
|
|
|
if (clinicId && runId) {
|
|
try {
|
|
// Channel snapshots — INSERT one row per channel (time-series!)
|
|
const snapshotInserts: Record<string, unknown>[] = [];
|
|
|
|
const igData = channelData.instagram as Record<string, unknown> | undefined;
|
|
if (igData) {
|
|
snapshotInserts.push({
|
|
clinic_id: clinicId, run_id: runId, channel: 'instagram',
|
|
handle: igData.username, followers: igData.followers, posts: igData.posts,
|
|
details: igData,
|
|
});
|
|
}
|
|
|
|
const ytData = channelData.youtube as Record<string, unknown> | undefined;
|
|
if (ytData) {
|
|
snapshotInserts.push({
|
|
clinic_id: clinicId, run_id: runId, channel: 'youtube',
|
|
handle: ytData.handle || ytData.channelName, followers: ytData.subscribers,
|
|
posts: ytData.totalVideos, total_views: ytData.totalViews,
|
|
details: ytData,
|
|
});
|
|
}
|
|
|
|
const fbData = channelData.facebook as Record<string, unknown> | undefined;
|
|
if (fbData) {
|
|
snapshotInserts.push({
|
|
clinic_id: clinicId, run_id: runId, channel: 'facebook',
|
|
handle: fbData.pageName, followers: fbData.followers,
|
|
details: fbData,
|
|
});
|
|
}
|
|
|
|
const guData = channelData.gangnamUnni as Record<string, unknown> | undefined;
|
|
if (guData) {
|
|
snapshotInserts.push({
|
|
clinic_id: clinicId, run_id: runId, channel: 'gangnamUnni',
|
|
handle: guData.name, rating: guData.rating, rating_scale: 10,
|
|
reviews: guData.totalReviews, details: guData,
|
|
});
|
|
}
|
|
|
|
const gmData = channelData.googleMaps as Record<string, unknown> | undefined;
|
|
if (gmData) {
|
|
snapshotInserts.push({
|
|
clinic_id: clinicId, run_id: runId, channel: 'googleMaps',
|
|
handle: gmData.name, rating: gmData.rating, rating_scale: 5,
|
|
reviews: gmData.reviewCount, details: gmData,
|
|
});
|
|
}
|
|
|
|
const nbData = channelData.naverBlog as Record<string, unknown> | undefined;
|
|
if (nbData) {
|
|
snapshotInserts.push({
|
|
clinic_id: clinicId, run_id: runId, channel: 'naverBlog',
|
|
handle: nbData.officialBlogHandle, details: nbData,
|
|
});
|
|
}
|
|
|
|
if (snapshotInserts.length > 0) {
|
|
await supabase.from("channel_snapshots").insert(snapshotInserts);
|
|
}
|
|
|
|
// Screenshots — INSERT evidence rows
|
|
const screenshotList = (channelData.screenshots || []) as Record<string, unknown>[];
|
|
if (screenshotList.length > 0) {
|
|
await supabase.from("screenshots").insert(
|
|
screenshotList.map(ss => ({
|
|
clinic_id: clinicId, run_id: runId,
|
|
channel: ss.channel, page_type: (ss.id as string || '').split('-')[1] || 'main',
|
|
url: ss.url, source_url: ss.sourceUrl, caption: ss.caption,
|
|
}))
|
|
);
|
|
}
|
|
|
|
// Update analysis_run with status + errors
|
|
await supabase.from("analysis_runs").update({
|
|
raw_channel_data: channelData,
|
|
analysis_data: { clinicName, services, address, analysis: analysisData },
|
|
vision_analysis: channelData.visionAnalysis || {},
|
|
channel_errors: channelErrors,
|
|
status: collectionStatus,
|
|
}).eq("id", runId);
|
|
|
|
} catch (e) {
|
|
console.error("V3 dual-write error:", e);
|
|
}
|
|
}
|
|
|
|
return new Response(
|
|
JSON.stringify({
|
|
success: !isFullFailure,
|
|
status: collectionStatus,
|
|
channelData,
|
|
analysisData,
|
|
channelErrors: Object.keys(channelErrors).length > 0 ? channelErrors : undefined,
|
|
partialFailure: isPartial,
|
|
taskSummary: { total: totalTasks, succeeded: successCount, failed: failedCount },
|
|
collectedAt: new Date().toISOString(),
|
|
}),
|
|
{ headers: { ...corsHeaders, "Content-Type": "application/json" } },
|
|
);
|
|
} catch (error) {
|
|
return new Response(
|
|
JSON.stringify({ success: false, error: error.message }),
|
|
{ status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" } },
|
|
);
|
|
}
|
|
});
|