diff --git a/supabase/functions/_shared/normalizeHandles.ts b/supabase/functions/_shared/normalizeHandles.ts index f98a1a8..5f8b417 100644 --- a/supabase/functions/_shared/normalizeHandles.ts +++ b/supabase/functions/_shared/normalizeHandles.ts @@ -46,3 +46,62 @@ export function normalizeInstagramHandle( return handle || null; } + +/** + * Normalize a YouTube channel identifier from various URL formats. + * + * Returns an object with the best identifier type for API lookup: + * - "https://www.youtube.com/@banobagips" → { type: 'handle', value: 'banobagips' } + * - "https://youtube.com/c/banobagips" → { type: 'username', value: 'banobagips' } + * - "https://youtube.com/user/banobagi" → { type: 'username', value: 'banobagi' } + * - "https://youtube.com/channel/UCxxxx" → { type: 'channelId', value: 'UCxxxx' } + * - "@banobagips" → { type: 'handle', value: 'banobagips' } + * - "UCxxxx" → { type: 'channelId', value: 'UCxxxx' } + * - "banobagips" → { type: 'username', value: 'banobagips' } + */ +export function normalizeYouTubeChannel( + raw: string | null | undefined, +): { type: 'handle' | 'username' | 'channelId'; value: string } | null { + if (!raw || typeof raw !== "string") return null; + + let input = raw.trim(); + if (!input) return null; + + // Parse YouTube URLs + if (input.includes("youtube.com") || input.includes("youtu.be")) { + try { + const urlStr = input.startsWith("http") ? input : `https://${input}`; + const url = new URL(urlStr); + const segments = url.pathname.split("/").filter(Boolean); + + if (segments[0] === "channel" && segments[1]?.startsWith("UC")) { + return { type: "channelId", value: segments[1] }; + } + if (segments[0] === "c" && segments[1]) { + return { type: "username", value: segments[1] }; + } + if (segments[0] === "user" && segments[1]) { + return { type: "username", value: segments[1] }; + } + if (segments[0]?.startsWith("@")) { + return { type: "handle", value: segments[0].slice(1) }; + } + // Fallback: first path segment + if (segments[0]) { + return { type: "username", value: segments[0] }; + } + } catch { + // URL parsing failed + } + } + + // Non-URL formats + if (input.startsWith("@")) { + return { type: "handle", value: input.slice(1) }; + } + if (input.startsWith("UC") && input.length > 20) { + return { type: "channelId", value: input }; + } + + return { type: "username", value: input }; +} diff --git a/supabase/functions/enrich-channels/index.ts b/supabase/functions/enrich-channels/index.ts index 061050c..fa32855 100644 --- a/supabase/functions/enrich-channels/index.ts +++ b/supabase/functions/enrich-channels/index.ts @@ -1,6 +1,6 @@ import "@supabase/functions-js/edge-runtime.d.ts"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; -import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts"; +import { normalizeInstagramHandle, normalizeYouTubeChannel } from "../_shared/normalizeHandles.ts"; const corsHeaders = { "Access-Control-Allow-Origin": "*", @@ -58,36 +58,54 @@ Deno.serve(async (req) => { // Run all enrichment tasks in parallel const tasks = []; - // 1. Instagram Profile + // 1. Instagram Profile — with fallback for wrong handle const cleanIgHandle = normalizeInstagramHandle(instagramHandle); if (cleanIgHandle) { tasks.push( (async () => { - const items = await runApifyActor( - "apify~instagram-profile-scraper", - { usernames: [cleanIgHandle], resultsLimit: 12 }, - APIFY_TOKEN - ); - const profile = (items as Record[])[0]; - if (profile && !profile.error) { - enrichment.instagram = { - username: profile.username, - followers: profile.followersCount, - following: profile.followsCount, - posts: profile.postsCount, - bio: profile.biography, - isBusinessAccount: profile.isBusinessAccount, - externalUrl: profile.externalUrl, - latestPosts: ((profile.latestPosts as Record[]) || []) - .slice(0, 12) - .map((p) => ({ - type: p.type, - likes: p.likesCount, - comments: p.commentsCount, - caption: (p.caption as string || "").slice(0, 200), - timestamp: p.timestamp, - })), - }; + // Try the given handle first, then common clinic variants + const handleCandidates = [ + cleanIgHandle, + `${cleanIgHandle}_ps`, // banobagi → banobagi_ps + `${cleanIgHandle}.ps`, // banobagi → banobagi.ps + `${cleanIgHandle}_clinic`, // banobagi → banobagi_clinic + `${cleanIgHandle}_official`, // banobagi → banobagi_official + ]; + + for (const handle of handleCandidates) { + const items = await runApifyActor( + "apify~instagram-profile-scraper", + { usernames: [handle], resultsLimit: 12 }, + APIFY_TOKEN + ); + const profile = (items as Record[])[0]; + + if (profile && !profile.error) { + const followers = (profile.followersCount as number) || 0; + + // Accept if: has meaningful followers OR is a business account with posts + if (followers >= 100 || ((profile.isBusinessAccount as boolean) && (profile.postsCount as number) > 10)) { + enrichment.instagram = { + username: profile.username, + followers: profile.followersCount, + following: profile.followsCount, + posts: profile.postsCount, + bio: profile.biography, + isBusinessAccount: profile.isBusinessAccount, + externalUrl: profile.externalUrl, + latestPosts: ((profile.latestPosts as Record[]) || []) + .slice(0, 12) + .map((p) => ({ + type: p.type, + likes: p.likesCount, + comments: p.commentsCount, + caption: (p.caption as string || "").slice(0, 200), + timestamp: p.timestamp, + })), + }; + break; // Found a valid account + } + } } })() ); @@ -97,17 +115,27 @@ Deno.serve(async (req) => { if (clinicName || address) { tasks.push( (async () => { - const searchQuery = `${clinicName} ${address || "강남"}`; - const items = await runApifyActor( - "compass~crawler-google-places", - { - searchStringsArray: [searchQuery], - maxCrawledPlacesPerSearch: 1, - language: "ko", - maxReviews: 10, - }, - APIFY_TOKEN - ); + // Try multiple search queries for better hit rate + const queries = [ + `${clinicName} 성형외과`, + clinicName, + `${clinicName} ${address || "강남"}`, + ]; + + let items: unknown[] = []; + for (const query of queries) { + items = await runApifyActor( + "compass~crawler-google-places", + { + searchStringsArray: [query], + maxCrawledPlacesPerSearch: 3, + language: "ko", + maxReviews: 10, + }, + APIFY_TOKEN + ); + if ((items as Record[]).length > 0) break; + } const place = (items as Record[])[0]; if (place) { enrichment.googleMaps = { @@ -140,17 +168,24 @@ Deno.serve(async (req) => { (async () => { const YT_BASE = "https://www.googleapis.com/youtube/v3"; - // Resolve handle/username to channel ID - let channelId = youtubeChannelId; - if (channelId.startsWith("@") || !channelId.startsWith("UC")) { - // Use forHandle for @handles, forUsername for legacy usernames - const param = channelId.startsWith("@") ? "forHandle" : "forUsername"; - const handle = channelId.startsWith("@") ? channelId.slice(1) : channelId; - const lookupRes = await fetch( - `${YT_BASE}/channels?part=id&${param}=${handle}&key=${YOUTUBE_API_KEY}` - ); - const lookupData = await lookupRes.json(); - channelId = lookupData.items?.[0]?.id || ""; + // Normalize YouTube URL/handle to structured identifier + const ytNormalized = normalizeYouTubeChannel(youtubeChannelId); + if (!ytNormalized) return; + + let channelId = ""; + + if (ytNormalized.type === "channelId") { + channelId = ytNormalized.value; + } else { + // Try forHandle first, then forUsername as fallback + for (const param of ["forHandle", "forUsername"]) { + const lookupRes = await fetch( + `${YT_BASE}/channels?part=id&${param}=${ytNormalized.value}&key=${YOUTUBE_API_KEY}` + ); + const lookupData = await lookupRes.json(); + channelId = lookupData.items?.[0]?.id || ""; + if (channelId) break; + } } if (!channelId) return; diff --git a/supabase/functions/generate-report/index.ts b/supabase/functions/generate-report/index.ts index a26a89b..0d9c533 100644 --- a/supabase/functions/generate-report/index.ts +++ b/supabase/functions/generate-report/index.ts @@ -92,7 +92,13 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)} "address": "주소", "phone": "전화번호", "services": ["시술1", "시술2"], - "doctors": [{"name": "의사명", "specialty": "전문분야"}] + "doctors": [{"name": "의사명", "specialty": "전문분야"}], + "socialMedia": { + "instagram": "정확한 Instagram 핸들 (@ 없이, 예: banobagi_ps)", + "youtube": "YouTube 채널 핸들 또는 URL", + "facebook": "Facebook 페이지명", + "naverBlog": "네이버 블로그 ID" + } }, "executiveSummary": "경영진 요약 (3-5문장)", "overallScore": 0-100, @@ -165,13 +171,14 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)} report = { raw: reportText, parseError: true }; } - // Normalize social handles from scrape data - const socialMedia = clinic.socialMedia || {}; + // Merge social handles: AI-found (more accurate) > Firecrawl-extracted (fallback) + const scrapeSocial = clinic.socialMedia || {}; + const aiSocial = report?.clinicInfo?.socialMedia || {}; const normalizedHandles = { - instagram: normalizeInstagramHandle(socialMedia.instagram), - youtube: socialMedia.youtube || null, - facebook: socialMedia.facebook || null, - blog: socialMedia.blog || null, + instagram: normalizeInstagramHandle(aiSocial.instagram) || normalizeInstagramHandle(scrapeSocial.instagram), + youtube: aiSocial.youtube || scrapeSocial.youtube || null, + facebook: aiSocial.facebook || scrapeSocial.facebook || null, + blog: aiSocial.naverBlog || scrapeSocial.blog || null, }; // Embed normalized handles in report for DB persistence