diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index 82a9335..9682926 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -52,6 +52,98 @@ async function runApifyActor(actorId: string, input: Record, to return itemsRes.json(); } +// ─── Health Score Calculator ─────────────────────────────────────────────── +// Computes a 0-100 health score per channel based on follower/rating/review +// benchmarks for Korean plastic surgery clinics. Used in channel_snapshots. +// +// Design: linear interpolation between (floor, floorScore) and (ceil, ceilScore). +// E.g., Instagram at 5K followers → mid-range between 1K=40 and 10K=70. + +function lerp(value: number, low: number, high: number, scoreLow: number, scoreHigh: number): number { + if (value <= low) return scoreLow; + if (value >= high) return scoreHigh; + return Math.round(scoreLow + (value - low) / (high - low) * (scoreHigh - scoreLow)); +} + +function computeHealthScore(channel: string, data: Record): number { + const n = (v: unknown): number => typeof v === 'number' ? v : (parseInt(String(v || 0)) || 0); + + switch (channel) { + case 'instagram': { + // followers: 0→20, 1K→40, 10K→70, 50K→90, 100K+→100 + const followers = n(data.followers); + const fScore = followers >= 100_000 ? 100 + : followers >= 50_000 ? lerp(followers, 50_000, 100_000, 90, 100) + : followers >= 10_000 ? lerp(followers, 10_000, 50_000, 70, 90) + : followers >= 1_000 ? lerp(followers, 1_000, 10_000, 40, 70) + : lerp(followers, 0, 1_000, 20, 40); + // posts bonus: +5 if active (≥ 50 posts) + const posts = n(data.posts); + return Math.min(fScore + (posts >= 50 ? 5 : 0), 100); + } + case 'youtube': { + // subscribers: 0→20, 500→40, 5K→65, 50K→85, 200K+→100 + const subs = n(data.subscribers); + const sScore = subs >= 200_000 ? 100 + : subs >= 50_000 ? lerp(subs, 50_000, 200_000, 85, 100) + : subs >= 5_000 ? lerp(subs, 5_000, 50_000, 65, 85) + : subs >= 500 ? lerp(subs, 500, 5_000, 40, 65) + : lerp(subs, 0, 500, 20, 40); + // video count bonus: +5 if ≥ 20 videos + const videos = n(data.totalVideos); + return Math.min(sScore + (videos >= 20 ? 5 : 0), 100); + } + case 'facebook': { + // followers: 0→20, 500→35, 5K→60, 20K→80, 50K+→100 + const followers = n(data.followers); + return followers >= 50_000 ? 100 + : followers >= 20_000 ? lerp(followers, 20_000, 50_000, 80, 100) + : followers >= 5_000 ? lerp(followers, 5_000, 20_000, 60, 80) + : followers >= 500 ? lerp(followers, 500, 5_000, 35, 60) + : lerp(followers, 0, 500, 20, 35); + } + case 'gangnamUnni': { + // rating /10: max 70pts. reviews: 0→0, 100→10, 1000→20, 10000→30 + const rating = n(data.rating); + const reviews = n(data.totalReviews); + const rScore = Math.round(Math.min(rating / 10, 1.0) * 70); + const rvScore = reviews >= 10_000 ? 30 : reviews >= 1_000 ? lerp(reviews, 1_000, 10_000, 20, 30) + : reviews >= 100 ? lerp(reviews, 100, 1_000, 10, 20) + : lerp(reviews, 0, 100, 0, 10); + return Math.min(rScore + rvScore, 100); + } + case 'googleMaps': { + // rating /5: max 60pts. reviews: 0→0, 50→10, 500→25, 5000→40 + const rating = n(data.rating); + const reviews = n(data.reviewCount); + const rScore = Math.round(Math.min(rating / 5, 1.0) * 60); + const rvScore = reviews >= 5_000 ? 40 : reviews >= 500 ? lerp(reviews, 500, 5_000, 25, 40) + : reviews >= 50 ? lerp(reviews, 50, 500, 10, 25) + : lerp(reviews, 0, 50, 0, 10); + return Math.min(rScore + rvScore, 100); + } + case 'naverBlog': { + // Presence-based: official handle = 50, mention count bonus up to +30, activity +20 + const hasHandle = Boolean(data.officialBlogHandle); + const total = n(data.totalResults); + const mentionScore = total >= 1000 ? 30 : total >= 100 ? lerp(total, 100, 1000, 15, 30) : lerp(total, 0, 100, 0, 15); + return Math.min((hasHandle ? 50 : 20) + mentionScore, 100); + } + case 'naverPlace': { + // rating /5: max 60pts. reviews: 0→0, 100→15, 1000→30, 10000→40 + const rating = n(data.rating); + const reviews = n(data.reviewCount) || n(data.reviews); + const rScore = Math.round(Math.min(rating / 5, 1.0) * 60); + const rvScore = reviews >= 10_000 ? 40 : reviews >= 1_000 ? lerp(reviews, 1_000, 10_000, 30, 40) + : reviews >= 100 ? lerp(reviews, 100, 1_000, 15, 30) + : lerp(reviews, 0, 100, 0, 15); + return Math.min(rScore + rvScore, 100); + } + default: + return 50; // Unknown channel — neutral score + } +} + /** * Phase 2: Collect Channel Data * @@ -342,17 +434,59 @@ Deno.serve(async (req) => { // Get verified Naver Blog handle from Phase 1 for official blog URL const nbVerified = verified.naverBlog as Record | null; const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null; + const officialBlogUrl = officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null; + // ─── 5a. Naver Search: 3rd-party blog mentions ─── const query = encodeURIComponent(`${clinicName} 후기`); const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders }, { label: "naver-blog" }); if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`); const data = await res.json(); + + // ─── 5b. Firecrawl: Official blog recent posts ─── + // Registry always provides the official blog URL — scrape it for real content metrics. + let officialBlogContent: Record | null = null; + if (officialBlogUrl) { + const FIRECRAWL_KEY = Deno.env.get("FIRECRAWL_API_KEY"); + if (FIRECRAWL_KEY) { + try { + const blogScrape = await fetchWithRetry(`https://api.firecrawl.dev/v1/scrape`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_KEY}` }, + body: JSON.stringify({ + url: officialBlogUrl, + formats: ["json"], + jsonOptions: { + prompt: "Extract the blog's recent posts: title, date, excerpt. Also total post count visible on the page, and the blog category/tag list.", + schema: { + type: "object", + properties: { + totalPosts: { type: "number" }, + recentPosts: { type: "array", items: { type: "object", properties: { title: { type: "string" }, date: { type: "string" }, excerpt: { type: "string" } } } }, + categories: { type: "array", items: { type: "string" } }, + }, + }, + }, + waitFor: 3000, + }), + }, { label: "firecrawl-naver-blog", timeoutMs: 45000 }); + if (blogScrape.ok) { + const blogData = await blogScrape.json(); + officialBlogContent = blogData.data?.json || null; + console.log(`[naverBlog] Official blog scraped: ${officialBlogContent?.totalPosts ?? 0} posts`); + } + } catch (e) { + console.warn(`[naverBlog] Official blog Firecrawl failed (non-critical):`, e); + } + } + } + channelData.naverBlog = { totalResults: data.total || 0, searchQuery: `${clinicName} 후기`, - // Official blog URL from Phase 1 verified handle - officialBlogUrl: officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null, - officialBlogHandle: officialBlogHandle, - // Blog mentions (third-party posts, NOT the official blog) + officialBlogUrl, + officialBlogHandle, + // Official blog content (from Firecrawl — actual blog data) + officialContent: officialBlogContent, + // Blog mentions (third-party posts via Naver Search) posts: (data.items || []).slice(0, 10).map((item: Record) => ({ title: (item.title || "").replace(/<[^>]*>/g, ""), description: (item.description || "").replace(/<[^>]*>/g, ""), @@ -558,6 +692,7 @@ Deno.serve(async (req) => { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'instagram', handle: igData.username, followers: igData.followers, posts: igData.posts, + health_score: computeHealthScore('instagram', igData), details: igData, }); } @@ -568,6 +703,7 @@ Deno.serve(async (req) => { clinic_id: clinicId, run_id: runId, channel: 'youtube', handle: ytData.handle || ytData.channelName, followers: ytData.subscribers, posts: ytData.totalVideos, total_views: ytData.totalViews, + health_score: computeHealthScore('youtube', ytData), details: ytData, }); } @@ -577,6 +713,7 @@ Deno.serve(async (req) => { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'facebook', handle: fbData.pageName, followers: fbData.followers, + health_score: computeHealthScore('facebook', fbData), details: fbData, }); } @@ -586,7 +723,9 @@ Deno.serve(async (req) => { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'gangnamUnni', handle: guData.name, rating: guData.rating, rating_scale: 10, - reviews: guData.totalReviews, details: guData, + reviews: guData.totalReviews, + health_score: computeHealthScore('gangnamUnni', guData), + details: guData, }); } @@ -595,7 +734,9 @@ Deno.serve(async (req) => { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'googleMaps', handle: gmData.name, rating: gmData.rating, rating_scale: 5, - reviews: gmData.reviewCount, details: gmData, + reviews: gmData.reviewCount, + health_score: computeHealthScore('googleMaps', gmData), + details: gmData, }); } @@ -603,7 +744,9 @@ Deno.serve(async (req) => { if (nbData) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'naverBlog', - handle: nbData.officialBlogHandle, details: nbData, + handle: nbData.officialBlogHandle, + health_score: computeHealthScore('naverBlog', nbData), + details: nbData, }); }