import "@supabase/functions-js/edge-runtime.d.ts"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; import type { VerifiedChannels } from "../_shared/verifyHandles.ts"; import { PERPLEXITY_MODEL } from "../_shared/config.ts"; import { captureAllScreenshots, runVisionAnalysis, screenshotErrors, type ScreenshotResult } from "../_shared/visionAnalysis.ts"; import { fetchWithRetry, fetchJsonWithRetry, wrapChannelTask, type ChannelTaskResult } from "../_shared/retry.ts"; import { searchGooglePlace } from "../_shared/googlePlaces.ts"; import { extractFoundingYear, validateFoundingYearExtractor } from "../_shared/foundingYearExtractor.ts"; import { validateClassifier } from "../_shared/urlClassifier.ts"; import { validateDataQuality } from "../_shared/dataQuality.ts"; // ─── Harness Self-Tests (cold-start) ─── const harnessResults = { classifier: validateClassifier(), foundingYear: validateFoundingYearExtractor(), dataQuality: validateDataQuality(), }; for (const [name, result] of Object.entries(harnessResults)) { if (!result.pass) { console.warn(`[harness] ${name} self-test FAILED:`, result.failures); } } const corsHeaders = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type", }; const APIFY_BASE = "https://api.apify.com/v2"; interface CollectRequest { reportId: string; clinicId?: string; // V3: clinic UUID runId?: string; // V3: analysis_run UUID } async function runApifyActor(actorId: string, input: Record, token: string): Promise { const res = await fetchWithRetry( `${APIFY_BASE}/acts/${actorId}/runs?token=${token}&waitForFinish=120`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(input) }, { maxRetries: 1, timeoutMs: 130000, label: `apify:${actorId.split('~')[1] || actorId}` }, ); if (!res.ok) throw new Error(`Apify ${actorId} returned ${res.status}`); const run = await res.json(); const datasetId = run.data?.defaultDatasetId; if (!datasetId) throw new Error(`Apify ${actorId}: no dataset returned`); const itemsRes = await fetchWithRetry( `${APIFY_BASE}/datasets/${datasetId}/items?token=${token}&limit=20`, undefined, { maxRetries: 1, timeoutMs: 30000, label: `apify-dataset:${actorId.split('~')[1] || actorId}` }, ); return itemsRes.json(); } // ─── Health Score Calculator ─────────────────────────────────────────────── // Computes a 0-100 health score per channel based on follower/rating/review // benchmarks for Korean plastic surgery clinics. Used in channel_snapshots. // // Design: linear interpolation between (floor, floorScore) and (ceil, ceilScore). // E.g., Instagram at 5K followers → mid-range between 1K=40 and 10K=70. function lerp(value: number, low: number, high: number, scoreLow: number, scoreHigh: number): number { if (value <= low) return scoreLow; if (value >= high) return scoreHigh; return Math.round(scoreLow + (value - low) / (high - low) * (scoreHigh - scoreLow)); } function computeHealthScore(channel: string, data: Record): number { const n = (v: unknown): number => typeof v === 'number' ? v : (parseInt(String(v || 0)) || 0); switch (channel) { case 'instagram': { // followers: 0→20, 1K→40, 10K→70, 50K→90, 100K+→100 const followers = n(data.followers); const fScore = followers >= 100_000 ? 100 : followers >= 50_000 ? lerp(followers, 50_000, 100_000, 90, 100) : followers >= 10_000 ? lerp(followers, 10_000, 50_000, 70, 90) : followers >= 1_000 ? lerp(followers, 1_000, 10_000, 40, 70) : lerp(followers, 0, 1_000, 20, 40); // posts bonus: +5 if active (≥ 50 posts) const posts = n(data.posts); return Math.min(fScore + (posts >= 50 ? 5 : 0), 100); } case 'youtube': { // subscribers: 0→20, 500→40, 5K→65, 50K→85, 200K+→100 const subs = n(data.subscribers); const sScore = subs >= 200_000 ? 100 : subs >= 50_000 ? lerp(subs, 50_000, 200_000, 85, 100) : subs >= 5_000 ? lerp(subs, 5_000, 50_000, 65, 85) : subs >= 500 ? lerp(subs, 500, 5_000, 40, 65) : lerp(subs, 0, 500, 20, 40); // video count bonus: +5 if ≥ 20 videos const videos = n(data.totalVideos); return Math.min(sScore + (videos >= 20 ? 5 : 0), 100); } case 'facebook': { // followers: 0→20, 500→35, 5K→60, 20K→80, 50K+→100 const followers = n(data.followers); return followers >= 50_000 ? 100 : followers >= 20_000 ? lerp(followers, 20_000, 50_000, 80, 100) : followers >= 5_000 ? lerp(followers, 5_000, 20_000, 60, 80) : followers >= 500 ? lerp(followers, 500, 5_000, 35, 60) : lerp(followers, 0, 500, 20, 35); } case 'gangnamUnni': { // rating /10: max 70pts. reviews: 0→0, 100→10, 1000→20, 10000→30 const rating = n(data.rating); const reviews = n(data.totalReviews); const rScore = Math.round(Math.min(rating / 10, 1.0) * 70); const rvScore = reviews >= 10_000 ? 30 : reviews >= 1_000 ? lerp(reviews, 1_000, 10_000, 20, 30) : reviews >= 100 ? lerp(reviews, 100, 1_000, 10, 20) : lerp(reviews, 0, 100, 0, 10); return Math.min(rScore + rvScore, 100); } case 'googleMaps': { // rating /5: max 60pts. reviews: 0→0, 50→10, 500→25, 5000→40 const rating = n(data.rating); const reviews = n(data.reviewCount); const rScore = Math.round(Math.min(rating / 5, 1.0) * 60); const rvScore = reviews >= 5_000 ? 40 : reviews >= 500 ? lerp(reviews, 500, 5_000, 25, 40) : reviews >= 50 ? lerp(reviews, 50, 500, 10, 25) : lerp(reviews, 0, 50, 0, 10); return Math.min(rScore + rvScore, 100); } case 'naverBlog': { // Presence-based: official handle = 50, mention count bonus up to +30, activity +20 const hasHandle = Boolean(data.officialBlogHandle); const total = n(data.totalResults); const mentionScore = total >= 1000 ? 30 : total >= 100 ? lerp(total, 100, 1000, 15, 30) : lerp(total, 0, 100, 0, 15); return Math.min((hasHandle ? 50 : 20) + mentionScore, 100); } case 'naverPlace': { // rating /5: max 60pts. reviews: 0→0, 100→15, 1000→30, 10000→40 const rating = n(data.rating); const reviews = n(data.reviewCount) || n(data.reviews); const rScore = Math.round(Math.min(rating / 5, 1.0) * 60); const rvScore = reviews >= 10_000 ? 40 : reviews >= 1_000 ? lerp(reviews, 1_000, 10_000, 30, 40) : reviews >= 100 ? lerp(reviews, 100, 1_000, 15, 30) : lerp(reviews, 0, 100, 0, 15); return Math.min(rScore + rvScore, 100); } default: return 50; // Unknown channel — neutral score } } /** * Phase 2: Collect Channel Data * * Uses verified handles from Phase 1 (stored in DB) to collect ALL raw data * from each channel in parallel. Also runs market analysis via Perplexity. */ Deno.serve(async (req) => { if (req.method === "OPTIONS") { return new Response("ok", { headers: corsHeaders }); } try { const { reportId, clinicId: inputClinicId, runId: inputRunId } = (await req.json()) as CollectRequest; if (!reportId) throw new Error("reportId is required"); // Read Phase 1 results from DB const supabaseUrl = Deno.env.get("SUPABASE_URL")!; const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!; const supabase = createClient(supabaseUrl, supabaseKey); const { data: row, error: fetchError } = await supabase .from("marketing_reports") .select("*") .eq("id", reportId) .single(); if (fetchError || !row) throw new Error(`Report not found: ${fetchError?.message}`); const verified = row.verified_channels as VerifiedChannels; const clinicName = row.clinic_name || ""; const address = row.scrape_data?.clinic?.address || ""; const services: string[] = row.scrape_data?.clinic?.services || []; await supabase.from("marketing_reports").update({ status: "collecting" }).eq("id", reportId); const APIFY_TOKEN = Deno.env.get("APIFY_API_TOKEN") || ""; const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY") || ""; const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY") || ""; const PERPLEXITY_API_KEY = Deno.env.get("PERPLEXITY_API_KEY") || ""; const NAVER_CLIENT_ID = Deno.env.get("NAVER_CLIENT_ID") || ""; const NAVER_CLIENT_SECRET = Deno.env.get("NAVER_CLIENT_SECRET") || ""; const GOOGLE_PLACES_API_KEY = Deno.env.get("GOOGLE_PLACES_API_KEY") || ""; const channelData: Record = {}; const analysisData: Record = {}; const channelTasks: Promise[] = []; // ─── 1. Instagram (multi-account) — try ALL candidates including unverified/unverifiable ─── const igCandidates = (verified.instagram || []).filter((v: Record) => v.handle && v.verified !== false); if (APIFY_TOKEN && igCandidates.length > 0) { channelTasks.push(wrapChannelTask("instagram", async () => { const accounts: Record[] = []; for (const ig of igCandidates) { const items = await runApifyActor("apify~instagram-profile-scraper", { usernames: [ig.handle], resultsLimit: 12 }, APIFY_TOKEN); const profile = (items as Record[])[0]; if (profile && !profile.error) { accounts.push({ username: profile.username, followers: profile.followersCount, following: profile.followsCount, posts: profile.postsCount, bio: profile.biography, isBusinessAccount: profile.isBusinessAccount, externalUrl: profile.externalUrl, igtvVideoCount: profile.igtvVideoCount, latestPosts: ((profile.latestPosts as Record[]) || []).slice(0, 12).map(p => ({ type: p.type, likes: p.likesCount, comments: p.commentsCount, caption: p.caption, timestamp: p.timestamp, })), }); } } if (accounts.length > 0) { channelData.instagramAccounts = accounts; channelData.instagram = accounts[0]; } else { throw new Error("No Instagram profiles found via Apify"); } })); } // ─── 1b. Instagram Posts (최근 20개 포스트 상세) ─── const igPrimaryHandle = igCandidates[0]?.handle as string | undefined; if (APIFY_TOKEN && igPrimaryHandle) { channelTasks.push(wrapChannelTask("instagramPosts", async () => { const handle = igPrimaryHandle.replace(/^@/, ''); const items = await runApifyActor( "apify~instagram-post-scraper", { directUrls: [`https://www.instagram.com/${handle}/`], resultsLimit: 20 }, APIFY_TOKEN, ); const posts = (items as Record[]).map(p => ({ id: p.id, type: p.type, shortCode: p.shortCode, url: p.url, caption: ((p.caption as string) || '').slice(0, 500), hashtags: p.hashtags || [], mentions: p.mentions || [], likesCount: (p.likesCount as number) || 0, commentsCount: (p.commentsCount as number) || 0, timestamp: p.timestamp, displayUrl: p.displayUrl, })); const totalLikes = posts.reduce((sum, p) => sum + p.likesCount, 0); const totalComments = posts.reduce((sum, p) => sum + p.commentsCount, 0); channelData.instagramPosts = { posts, totalPosts: posts.length, avgLikes: posts.length > 0 ? Math.round(totalLikes / posts.length) : 0, avgComments: posts.length > 0 ? Math.round(totalComments / posts.length) : 0, }; })); } // ─── 1c. Instagram Reels (최근 15개 릴스 상세) ─── if (APIFY_TOKEN && igPrimaryHandle) { channelTasks.push(wrapChannelTask("instagramReels", async () => { const handle = igPrimaryHandle.replace(/^@/, ''); const items = await runApifyActor( "apify~instagram-reel-scraper", { directUrls: [`https://www.instagram.com/${handle}/reels/`], resultsLimit: 15 }, APIFY_TOKEN, ); const reels = (items as Record[]).map(r => ({ id: r.id, shortCode: r.shortCode, url: r.url, caption: ((r.caption as string) || '').slice(0, 500), hashtags: r.hashtags || [], likesCount: (r.likesCount as number) || 0, commentsCount: (r.commentsCount as number) || 0, videoViewCount: (r.videoViewCount as number) || 0, videoPlayCount: (r.videoPlayCount as number) || 0, videoDuration: (r.videoDuration as number) || 0, timestamp: r.timestamp, musicInfo: r.musicInfo || null, })); const totalViews = reels.reduce((sum, r) => sum + r.videoViewCount, 0); const totalPlays = reels.reduce((sum, r) => sum + r.videoPlayCount, 0); channelData.instagramReels = { reels, totalReels: reels.length, avgViews: reels.length > 0 ? Math.round(totalViews / reels.length) : 0, avgPlays: reels.length > 0 ? Math.round(totalPlays / reels.length) : 0, }; })); } // ─── 2. YouTube ─── const ytVerified = verified.youtube as Record | null; if (YOUTUBE_API_KEY && (ytVerified?.verified === true || ytVerified?.verified === "unverifiable")) { channelTasks.push(wrapChannelTask("youtube", async () => { const YT = "https://www.googleapis.com/youtube/v3"; let channelId = (ytVerified?.channelId as string) || ""; // If no channelId, try to resolve from handle if (!channelId && ytVerified?.handle) { const h = (ytVerified.handle as string).replace(/^@/, ''); if (h.startsWith('UC')) { channelId = h; } else { for (const param of ['forHandle', 'forUsername']) { const lookupRes = await fetch(`${YT}/channels?part=id&${param}=${h}&key=${YOUTUBE_API_KEY}`); const lookupData = await lookupRes.json(); channelId = lookupData.items?.[0]?.id || ''; if (channelId) break; } } } if (!channelId) throw new Error("Could not resolve YouTube channel ID"); const chRes = await fetchWithRetry(`${YT}/channels?part=snippet,statistics,brandingSettings&id=${channelId}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-api" }); const chData = await chRes.json(); const channel = chData.items?.[0]; if (!channel) throw new Error("YouTube channel not found in API response"); const stats = channel.statistics || {}; const snippet = channel.snippet || {}; // Popular videos const searchRes = await fetchWithRetry(`${YT}/search?part=snippet&channelId=${channelId}&order=viewCount&type=video&maxResults=10&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-search" }); const searchData = await searchRes.json(); const videoIds = (searchData.items || []).map((i: Record) => (i.id as Record)?.videoId).filter(Boolean).join(","); let videos: Record[] = []; if (videoIds) { const vRes = await fetchWithRetry(`${YT}/videos?part=snippet,statistics,contentDetails&id=${videoIds}&key=${YOUTUBE_API_KEY}`, undefined, { label: "youtube-videos" }); const vData = await vRes.json(); videos = vData.items || []; } channelData.youtube = { channelId, channelName: snippet.title, handle: snippet.customUrl, description: snippet.description, publishedAt: snippet.publishedAt, thumbnailUrl: snippet.thumbnails?.default?.url, subscribers: parseInt(stats.subscriberCount || "0", 10), totalViews: parseInt(stats.viewCount || "0", 10), totalVideos: parseInt(stats.videoCount || "0", 10), videos: videos.slice(0, 10).map(v => { const vs = v.statistics as Record || {}; const vSnip = v.snippet as Record || {}; const vCon = v.contentDetails as Record || {}; return { title: vSnip.title, views: parseInt(vs.viewCount || "0", 10), likes: parseInt(vs.likeCount || "0", 10), comments: parseInt(vs.commentCount || "0", 10), date: vSnip.publishedAt, duration: vCon.duration, url: `https://www.youtube.com/watch?v=${v.id}`, thumbnail: (vSnip.thumbnails as Record>)?.medium?.url, }; }), }; })); } // ─── 3. Facebook ─── const fbVerified = verified.facebook as Record | null; if (APIFY_TOKEN && (fbVerified?.verified === true || fbVerified?.verified === "unverifiable")) { channelTasks.push(wrapChannelTask("facebook", async () => { const fbUrl = (fbVerified.url as string) || `https://www.facebook.com/${fbVerified.handle}`; const items = await runApifyActor("apify~facebook-pages-scraper", { startUrls: [{ url: fbUrl }] }, APIFY_TOKEN); const page = (items as Record[])[0]; if (page?.title) { channelData.facebook = { pageName: page.title, pageUrl: page.pageUrl || fbUrl, followers: page.followers, likes: page.likes, categories: page.categories, email: page.email, phone: page.phone, website: page.website, address: page.address, intro: page.intro, rating: page.rating, profilePictureUrl: page.profilePictureUrl, }; } else { throw new Error("Facebook page scraper returned no data"); } })); } // ─── 4. 강남언니 (항상 시도 — verified 여부 무관) ─── const guVerified = verified.gangnamUnni as Record | null; if (FIRECRAWL_API_KEY && clinicName) { channelTasks.push(wrapChannelTask("gangnamUnni", async () => { let gangnamUnniUrl = (guVerified?.verified && guVerified.url) ? String(guVerified.url) : ""; // Fallback: 강남언니 URL을 Firecrawl 검색으로 직접 찾기 if (!gangnamUnniUrl) { const shortName = clinicName.replace(/성형외과|의원|병원|클리닉|피부과/g, '').trim(); const searchQueries = [ `${clinicName} site:gangnamunni.com`, `${shortName} 성형외과 site:gangnamunni.com`, `${clinicName} 강남언니 병원`, ]; for (const q of searchQueries) { try { const sRes = await fetch("https://api.firecrawl.dev/v1/search", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, body: JSON.stringify({ query: q, limit: 5 }), }); const sData = await sRes.json(); const found = (sData.data || []) .map((r: Record) => r.url) .find((u: string) => u?.includes('gangnamunni.com/hospitals/')); if (found) { gangnamUnniUrl = found; break; } } catch { /* try next query */ } } if (gangnamUnniUrl) { console.log(`[gangnamUnni] Fallback search found: ${gangnamUnniUrl}`); } } if (!gangnamUnniUrl) { throw new Error("강남언니 URL을 찾을 수 없습니다 (검색 실패)"); } const scrapeRes = await fetchWithRetry("https://api.firecrawl.dev/v1/scrape", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_API_KEY}` }, body: JSON.stringify({ url: gangnamUnniUrl, formats: ["json"], jsonOptions: { prompt: "Extract: hospital name, overall rating (강남언니 rating is always out of 10, NOT out of 5), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges", schema: { type: "object", properties: { hospitalName: { type: "string" }, rating: { type: "number" }, totalReviews: { type: "number" }, doctors: { type: "array", items: { type: "object", properties: { name: { type: "string" }, rating: { type: "number" }, reviews: { type: "number" }, specialty: { type: "string" } } } }, procedures: { type: "array", items: { type: "string" } }, address: { type: "string" }, badges: { type: "array", items: { type: "string" } }, }, }, }, waitFor: 5000, }), }, { label: "firecrawl-gangnamunni", timeoutMs: 60000 }); if (!scrapeRes.ok) throw new Error(`Firecrawl 강남언니 scrape failed: ${scrapeRes.status}`); const data = await scrapeRes.json(); const hospital = data.data?.json; if (hospital?.hospitalName) { channelData.gangnamUnni = { name: hospital.hospitalName, rawRating: hospital.rating, rating: typeof hospital.rating === 'number' && hospital.rating > 0 ? hospital.rating : null, ratingScale: "/10", totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10), procedures: hospital.procedures || [], address: hospital.address, badges: hospital.badges || [], sourceUrl: gangnamUnniUrl, }; } else { throw new Error("강남언니 scrape returned no hospital data"); } })); } // ─── 5. Naver Blog + Place ─── if (NAVER_CLIENT_ID && NAVER_CLIENT_SECRET && clinicName) { const naverHeaders = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET }; channelTasks.push(wrapChannelTask("naverBlog", async () => { // Get verified Naver Blog handle from Phase 1 for official blog URL const nbVerified = verified.naverBlog as Record | null; const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null; const officialBlogUrl = officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null; // ─── 5a. Naver Search: 3rd-party blog mentions ─── const query = encodeURIComponent(`${clinicName} 후기`); const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders }, { label: "naver-blog" }); if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`); const data = await res.json(); // ─── 5b. Firecrawl: Official blog recent posts ─── // Registry always provides the official blog URL — scrape it for real content metrics. let officialBlogContent: Record | null = null; if (officialBlogUrl) { const FIRECRAWL_KEY = Deno.env.get("FIRECRAWL_API_KEY"); if (FIRECRAWL_KEY) { try { const blogScrape = await fetchWithRetry(`https://api.firecrawl.dev/v1/scrape`, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_KEY}` }, body: JSON.stringify({ url: officialBlogUrl, formats: ["json"], jsonOptions: { prompt: "Extract the blog's recent posts: title, date, excerpt. Also total post count visible on the page, and the blog category/tag list.", schema: { type: "object", properties: { totalPosts: { type: "number" }, recentPosts: { type: "array", items: { type: "object", properties: { title: { type: "string" }, date: { type: "string" }, excerpt: { type: "string" } } } }, categories: { type: "array", items: { type: "string" } }, }, }, }, waitFor: 3000, }), }, { label: "firecrawl-naver-blog", timeoutMs: 45000 }); if (blogScrape.ok) { const blogData = await blogScrape.json(); officialBlogContent = blogData.data?.json || null; console.log(`[naverBlog] Official blog scraped: ${officialBlogContent?.totalPosts ?? 0} posts`); } } catch (e) { console.warn(`[naverBlog] Official blog Firecrawl failed (non-critical):`, e); } } } channelData.naverBlog = { totalResults: data.total || 0, searchQuery: `${clinicName} 후기`, officialBlogUrl, officialBlogHandle, // Official blog content (from Firecrawl — actual blog data) officialContent: officialBlogContent, // Blog mentions (third-party posts via Naver Search) posts: (data.items || []).slice(0, 10).map((item: Record) => ({ title: (item.title || "").replace(/<[^>]*>/g, ""), description: (item.description || "").replace(/<[^>]*>/g, ""), link: item.link, bloggerName: item.bloggername, postDate: item.postdate, })), }; })); channelTasks.push(wrapChannelTask("naverPlace", async () => { // Try multiple queries to find the correct place (avoid same-name different clinics) const queries = [ `${clinicName} 성형외과`, `${clinicName} 성형`, clinicName, ]; for (const q of queries) { const query = encodeURIComponent(q); const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" }); if (!res.ok) continue; const data = await res.json(); // Find the best match: prefer category containing 성형 or 피부 const items = (data.items || []) as Record[]; const match = items.find(i => (i.category || '').includes('성형') || (i.category || '').includes('피부') ) || items.find(i => { const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase(); return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase()); }) || null; if (match) { channelData.naverPlace = { name: (match.title || "").replace(/<[^>]*>/g, ""), category: match.category, address: match.roadAddress || match.address, telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy, }; break; } } })); } // ─── 6. Google Maps (Google Places API New) ─── if (GOOGLE_PLACES_API_KEY && clinicName) { channelTasks.push(wrapChannelTask("googleMaps", async () => { const place = await searchGooglePlace(clinicName, address || undefined, GOOGLE_PLACES_API_KEY); if (place) { channelData.googleMaps = { name: place.name, rating: place.rating, reviewCount: place.reviewCount, address: place.address, phone: place.phone, clinicWebsite: place.clinicWebsite, mapsUrl: place.mapsUrl, placeId: place.placeId, category: place.category, openingHours: place.openingHours, topReviews: place.topReviews, }; } else { throw new Error("Google Maps: no matching place found"); } })); } // ─── 7. Market Analysis (Perplexity) ─── if (PERPLEXITY_API_KEY && services.length > 0) { channelTasks.push(wrapChannelTask("marketAnalysis", async () => { const queries = [ { id: "competitors", prompt: `${address || "강남"} 근처 ${services.slice(0, 3).join(", ")} 전문 성형외과/피부과 경쟁 병원 5곳을 분석해줘. 각 병원의 이름, 주요 시술, 온라인 평판, 마케팅 채널을 JSON 형식으로 제공해줘.` }, { id: "keywords", prompt: `한국 ${services.slice(0, 3).join(", ")} 관련 검색 키워드 트렌드. 네이버와 구글에서 월간 검색량이 높은 키워드 20개, 경쟁 강도, 추천 롱테일 키워드를 JSON 형식으로 제공해줘.` }, { id: "market", prompt: `한국 ${services[0] || "성형외과"} 시장 트렌드 2025-2026. 시장 규모, 성장률, 주요 트렌드, 마케팅 채널별 효과를 JSON 형식으로 제공해줘.` }, { id: "targetAudience", prompt: `${clinicName}의 잠재 고객 분석. 연령대별, 성별, 관심 시술, 정보 탐색 채널, 의사결정 요인을 JSON 형식으로 제공해줘.` }, ]; const results = await Promise.allSettled(queries.map(async q => { const res = await fetchWithRetry("https://api.perplexity.ai/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` }, body: JSON.stringify({ model: PERPLEXITY_MODEL, messages: [ { role: "system", content: "You are a Korean medical marketing analyst. Always respond in Korean. Provide data in valid JSON format." }, { role: "user", content: q.prompt }, ], temperature: 0.3, }), }, { label: `perplexity:${q.id}`, timeoutMs: 60000 }); const data = await res.json(); return { id: q.id, content: data.choices?.[0]?.message?.content || "", citations: data.citations || [] }; })); let successCount = 0; for (const r of results) { if (r.status === "fulfilled") { const { id, content, citations } = r.value; let parsed = content; const jsonMatch = content.match(/```json\n?([\s\S]*?)```/); if (jsonMatch) { try { parsed = JSON.parse(jsonMatch[1]); } catch {} } analysisData[id] = { data: parsed, citations }; successCount++; } } if (successCount === 0) throw new Error("All Perplexity queries failed"); })); } // ─── 8. Vision Analysis: Screenshots + Gemini Vision ─── const GEMINI_API_KEY = Deno.env.get("GEMINI_API_KEY") || ""; let screenshots: ScreenshotResult[] = []; if (FIRECRAWL_API_KEY) { const mainUrl = row.url || ""; const siteMap: string[] = row.scrape_data?.siteMap || []; channelTasks.push(wrapChannelTask("vision", async () => { // Step 1: Capture screenshots of relevant pages + social channel landings screenshots = await captureAllScreenshots(mainUrl, siteMap, verified, FIRECRAWL_API_KEY); if (screenshots.length === 0) { const debugInfo = screenshotErrors.length > 0 ? screenshotErrors.join(" | ") : "No errors recorded — check FIRECRAWL_API_KEY"; throw new Error(`No screenshots captured: ${debugInfo}`); } // ─── Step 2: Archive to Supabase Storage (replace 7-day GCS URLs) ─────── // Firecrawl returns signed GCS URLs that expire after ~7 days. // We already have the image as base64 in memory — upload it permanently // to Supabase Storage and replace ss.url in-place before storing to DB. // // Upload happens in parallel; failures are non-fatal — the screenshot // keeps its GCS URL as a fallback so Vision analysis still proceeds. // clinics/{domain}/{reportId}/screenshots/{id}.png const domain = (() => { try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; } })(); const SUPABASE_STORAGE_BUCKET = "screenshots"; const archiveTasks = screenshots.map(async (ss) => { if (!ss.base64) return; // no image data — skip try { // base64 → Uint8Array const binaryStr = atob(ss.base64); const bytes = new Uint8Array(binaryStr.length); for (let i = 0; i < binaryStr.length; i++) { bytes[i] = binaryStr.charCodeAt(i); } // Upload: clinics/{domain}/{reportId}/screenshots/{screenshotId}.png const storagePath = `clinics/${domain}/${reportId}/screenshots/${ss.id}.png`; const { error: uploadError } = await supabase.storage .from(SUPABASE_STORAGE_BUCKET) .upload(storagePath, bytes, { contentType: "image/png", upsert: true, // overwrite if re-running same analysis }); if (uploadError) { // Non-fatal: log and keep GCS URL as fallback console.warn(`[archive] Storage upload failed for ${ss.id}: ${uploadError.message}`); return; } // Replace GCS temp URL with permanent Supabase Storage public URL const { data: { publicUrl } } = supabase.storage .from(SUPABASE_STORAGE_BUCKET) .getPublicUrl(storagePath); ss.url = publicUrl; // in-place replace — all downstream code uses permanent URL console.log(`[archive] ${ss.id} → clinics/${domain}/${reportId}/screenshots/`); } catch (archiveErr) { // Non-fatal: Vision analysis still proceeds with base64 console.warn(`[archive] Exception for ${ss.id}:`, archiveErr instanceof Error ? archiveErr.message : archiveErr); } }); await Promise.allSettled(archiveTasks); const archivedCount = screenshots.filter(ss => ss.url.includes("supabase")).length; console.log(`[archive] ${archivedCount}/${screenshots.length} screenshots archived to Supabase Storage`); // Step 3: Run Gemini Vision on captured screenshots (base64 still in memory) if (GEMINI_API_KEY && screenshots.length > 0) { const vision = await runVisionAnalysis(screenshots, GEMINI_API_KEY); channelData.visionAnalysis = vision.merged; channelData.visionPerPage = vision.perPage; } // Step 4: Store screenshots metadata — ss.url is now the permanent URL (or GCS fallback) channelData.screenshots = screenshots.map(ss => ({ id: ss.id, url: ss.url, // permanent Supabase Storage URL (or GCS fallback if archive failed) channel: ss.channel, capturedAt: ss.capturedAt, caption: ss.caption, sourceUrl: ss.sourceUrl, archived: ss.url.includes("supabase"), // flag: true = permanent, false = GCS fallback })); })); } // ─── 9. Founding Year Text Fallback (Harness 2) ─── // If Vision didn't find foundingYear, try regex extraction from scraped text if (!channelData.visionAnalysis?.foundingYear) { const htmlText = row.scrape_data?.markdown || row.scrape_data?.text || ""; if (htmlText) { const textYear = extractFoundingYear(htmlText); if (textYear) { channelData.visionAnalysis = channelData.visionAnalysis || {}; channelData.visionAnalysis.foundingYear = String(textYear); console.log(`[harness] Founding year extracted from text fallback: ${textYear}`); } } } // ─── Execute all channel tasks ─── const taskResults = await Promise.all(channelTasks); // ─── Build channelErrors from task results ─── const channelErrors: Record = {}; let failedCount = 0; let successCount = 0; for (const result of taskResults) { if (result.success) { successCount++; } else { failedCount++; channelErrors[result.channel] = { error: result.error || "Unknown error", durationMs: result.durationMs, }; } } const totalTasks = taskResults.length; const isPartial = failedCount > 0 && successCount > 0; const isFullFailure = failedCount > 0 && successCount === 0; const collectionStatus = isFullFailure ? "collection_failed" : isPartial ? "partial" : "collected"; console.log(`[collect] ${successCount}/${totalTasks} tasks succeeded. Status: ${collectionStatus}`); if (failedCount > 0) { console.warn(`[collect] Failed channels:`, JSON.stringify(channelErrors)); } // ─── Storage: save channel_data.json to clinics/{domain}/{reportId}/ ─── try { const domain = (() => { try { return new URL(row.url || "").hostname.replace('www.', ''); } catch { return "unknown"; } })(); const payload = { channelData, analysisData, channelErrors, clinicName, address, services, collectionStatus, collectedAt: new Date().toISOString(), }; const jsonBytes = new TextEncoder().encode(JSON.stringify(payload, null, 2)); await supabase.storage .from('clinic-data') .upload(`clinics/${domain}/${reportId}/channel_data.json`, jsonBytes, { contentType: 'application/json', upsert: true, }); console.log(`[storage] channel_data.json → clinics/${domain}/${reportId}/`); } catch (e) { console.warn('[storage] channel_data.json upload failed:', e instanceof Error ? e.message : e); } // ─── UNCONDITIONAL Legacy Save: always persist whatever we have ─── await supabase.from("marketing_reports").update({ channel_data: channelData, analysis_data: { clinicName, services, address, analysis: analysisData, analyzedAt: new Date().toISOString() }, channel_errors: channelErrors, status: collectionStatus, updated_at: new Date().toISOString(), }).eq("id", reportId); // ─── V3: channel_snapshots + screenshots + analysis_runs ─── const clinicId = inputClinicId || null; const runId = inputRunId || null; if (clinicId && runId) { try { // Channel snapshots — INSERT one row per channel (time-series!) const snapshotInserts: Record[] = []; const igData = channelData.instagram as Record | undefined; if (igData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'instagram', handle: igData.username, followers: igData.followers, posts: igData.posts, health_score: computeHealthScore('instagram', igData), details: igData, }); } const ytData = channelData.youtube as Record | undefined; if (ytData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'youtube', handle: ytData.handle || ytData.channelName, followers: ytData.subscribers, posts: ytData.totalVideos, total_views: ytData.totalViews, health_score: computeHealthScore('youtube', ytData), details: ytData, }); } const fbData = channelData.facebook as Record | undefined; if (fbData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'facebook', handle: fbData.pageName, followers: fbData.followers, health_score: computeHealthScore('facebook', fbData), details: fbData, }); } const guData = channelData.gangnamUnni as Record | undefined; if (guData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'gangnamUnni', handle: guData.name, rating: guData.rating, rating_scale: 10, reviews: guData.totalReviews, health_score: computeHealthScore('gangnamUnni', guData), details: guData, }); } const gmData = channelData.googleMaps as Record | undefined; if (gmData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'googleMaps', handle: gmData.name, rating: gmData.rating, rating_scale: 5, reviews: gmData.reviewCount, health_score: computeHealthScore('googleMaps', gmData), details: gmData, }); } const nbData = channelData.naverBlog as Record | undefined; if (nbData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'naverBlog', handle: nbData.officialBlogHandle, health_score: computeHealthScore('naverBlog', nbData), details: nbData, }); } if (snapshotInserts.length > 0) { await supabase.from("channel_snapshots").insert(snapshotInserts); } // Screenshots — INSERT evidence rows const screenshotList = (channelData.screenshots || []) as Record[]; if (screenshotList.length > 0) { await supabase.from("screenshots").insert( screenshotList.map(ss => ({ clinic_id: clinicId, run_id: runId, channel: ss.channel, page_type: (ss.id as string || '').split('-')[1] || 'main', url: ss.url, source_url: ss.sourceUrl, caption: ss.caption, })) ); } // Update analysis_run with status + errors await supabase.from("analysis_runs").update({ raw_channel_data: channelData, analysis_data: { clinicName, services, address, analysis: analysisData }, vision_analysis: channelData.visionAnalysis || {}, channel_errors: channelErrors, status: collectionStatus, }).eq("id", runId); } catch (e) { const errMsg = e instanceof Error ? e.message : String(e); console.error("V3 dual-write error:", errMsg); // Best-effort: record error into analysis_run so it's visible in DB try { await supabase.from("analysis_runs").update({ error_message: `V3 dual-write failed: ${errMsg}`, status: "collection_error", }).eq("id", runId); } catch { /* ignore secondary failure */ } } } return new Response( JSON.stringify({ success: !isFullFailure, status: collectionStatus, channelData, analysisData, channelErrors: Object.keys(channelErrors).length > 0 ? channelErrors : undefined, partialFailure: isPartial, taskSummary: { total: totalTasks, succeeded: successCount, failed: failedCount }, collectedAt: new Date().toISOString(), }), { headers: { ...corsHeaders, "Content-Type": "application/json" } }, ); } catch (error) { return new Response( JSON.stringify({ success: false, error: error.message }), { status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" } }, ); } });