From c753d8593f0b5da2f55600d6b7629205a2650251 Mon Sep 17 00:00:00 2001 From: Haewon Kam Date: Fri, 10 Apr 2026 13:41:05 +0900 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=ED=8C=8C=EC=9D=B4=ED=94=84=EB=9D=BC?= =?UTF-8?q?=EC=9D=B8=203=EB=8C=80=20=ED=95=B5=EC=8B=AC=20=EB=B2=84?= =?UTF-8?q?=EA=B7=B8=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - generate-report: Harness 4 groundTruth 주입 레이어 추가 (IG/YT/FB/NaverBlog/NaverPlace/GangnamUnni 필드 강제 주입, diagnosis 폴백, qualityReport DB 저장) - discover-channels: CLINIC_NOT_REGISTERED 조기 종료 제거 + clinics 캐시 fast-path 추가 (14일 TTL, Firecrawl fallback 재활성화) - collect-channel-data: silent skip → {status, reason, attemptedAt} 구조적 기록 (naverBlog/naverPlace/googleMaps/gangnamUnni) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../functions/collect-channel-data/index.ts | 119 +++++++++--- supabase/functions/discover-channels/index.ts | 141 ++++++++++++-- supabase/functions/generate-report/index.ts | 174 +++++++++++++++++- 3 files changed, 398 insertions(+), 36 deletions(-) diff --git a/supabase/functions/collect-channel-data/index.ts b/supabase/functions/collect-channel-data/index.ts index e9df3cc..9684c8a 100644 --- a/supabase/functions/collect-channel-data/index.ts +++ b/supabase/functions/collect-channel-data/index.ts @@ -414,7 +414,14 @@ Deno.serve(async (req) => { } if (!gangnamUnniUrl) { - throw new Error("강남언니 URL을 찾을 수 없습니다 (검색 실패)"); + console.log("[gangnamUnni] URL not found via any search — marking skipped"); + channelData.gangnamUnni = { + status: "skipped", + reason: "URL_NOT_FOUND", + reasonDetail: "Firecrawl search across 3 query variants returned no gangnamunni.com/hospitals/ link", + attemptedAt: new Date().toISOString(), + }; + return; } const scrapeRes = await fetchWithRetry("https://api.firecrawl.dev/v1/scrape", { @@ -438,11 +445,21 @@ Deno.serve(async (req) => { waitFor: 5000, }), }, { label: "firecrawl-gangnamunni", timeoutMs: 60000 }); - if (!scrapeRes.ok) throw new Error(`Firecrawl 강남언니 scrape failed: ${scrapeRes.status}`); + if (!scrapeRes.ok) { + channelData.gangnamUnni = { + status: "skipped", + reason: "SCRAPE_FAILED", + reasonDetail: `Firecrawl returned HTTP ${scrapeRes.status}`, + attemptedAt: new Date().toISOString(), + sourceUrl: gangnamUnniUrl, + }; + return; + } const data = await scrapeRes.json(); const hospital = data.data?.json; if (hospital?.hospitalName) { channelData.gangnamUnni = { + status: "ok", name: hospital.hospitalName, rawRating: hospital.rating, rating: typeof hospital.rating === 'number' && hospital.rating > 0 ? hospital.rating : null, @@ -452,9 +469,21 @@ Deno.serve(async (req) => { badges: hospital.badges || [], sourceUrl: gangnamUnniUrl, }; } else { - throw new Error("강남언니 scrape returned no hospital data"); + channelData.gangnamUnni = { + status: "skipped", + reason: "EMPTY_SCRAPE_RESULT", + reasonDetail: "Firecrawl scraped the page but could not extract hospital data", + attemptedAt: new Date().toISOString(), + sourceUrl: gangnamUnniUrl, + }; } })); + } else { + channelData.gangnamUnni = { + status: "skipped", + reason: !FIRECRAWL_API_KEY ? "FIRECRAWL_API_KEY_MISSING" : "CLINIC_NAME_MISSING", + attemptedAt: new Date().toISOString(), + }; } // ─── 5. Naver Blog + Place ─── @@ -490,6 +519,7 @@ Deno.serve(async (req) => { const totalMatch = xml.match(/(\d+)<\/totalCount>/) || xml.match(/(\d+)<\/managedCount>/); const totalPosts = totalMatch ? Number(totalMatch[1]) : items.length; channelData.naverBlog = { + status: "ok", officialBlogUrl, officialBlogHandle, totalResults: totalPosts, posts: items.slice(0, 10).map(i => ({ @@ -500,13 +530,28 @@ Deno.serve(async (req) => { }; console.log(`[naverBlog] RSS: ${items.length} posts from verified handle ${officialBlogHandle}`); } catch (e) { - console.warn(`[naverBlog] RSS fetch failed:`, e); - // Fallback: at minimum expose the official URL even without post data - channelData.naverBlog = { officialBlogUrl, officialBlogHandle, totalResults: 0, posts: [], officialContent: null }; + const reason = e instanceof Error ? e.message : String(e); + console.warn(`[naverBlog] RSS fetch failed:`, reason); + // Expose the official URL even without post data so the frontend can still link out + channelData.naverBlog = { + status: "skipped", + reason: "RSS_FETCH_FAILED", + reasonDetail: reason, + attemptedAt: new Date().toISOString(), + officialBlogUrl, officialBlogHandle, + totalResults: 0, posts: [], officialContent: null, + }; } })); } else { - console.log(`[naverBlog] No verified handle in DB — skipping`); + console.log(`[naverBlog] No verified handle in DB — marking skipped`); + channelData.naverBlog = { + status: "skipped", + reason: "NO_VERIFIED_HANDLE", + attemptedAt: new Date().toISOString(), + totalResults: 0, + posts: [], + }; } // naverPlace: use stored verified data if available, otherwise search once and save @@ -564,7 +609,7 @@ Deno.serve(async (req) => { } if (found) { - channelData.naverPlace = found; + channelData.naverPlace = { status: "ok", ...found }; // Save to clinics.verified_channels so future runs skip the search if (inputClinicId) { const { data: clinicRow } = await supabase.from('clinics').select('verified_channels').eq('id', inputClinicId).single(); @@ -576,9 +621,22 @@ Deno.serve(async (req) => { } } } else { - console.log(`[naverPlace] No confident match found — skipping to avoid wrong data`); + console.log(`[naverPlace] No confident match found — marking skipped`); + channelData.naverPlace = { + status: "skipped", + reason: "NO_CONFIDENT_MATCH", + reasonDetail: `Tried ${queries.length} queries but none matched the domain or exact clinic name`, + attemptedAt: new Date().toISOString(), + attemptedQueries: queries, + }; } })); + } else { + channelData.naverPlace = { + status: "skipped", + reason: "NAVER_API_CREDENTIALS_MISSING", + attemptedAt: new Date().toISOString(), + }; } // ─── 6. Google Maps (Google Places API New) ─── @@ -587,6 +645,7 @@ Deno.serve(async (req) => { const place = await searchGooglePlace(clinicName, address || undefined, GOOGLE_PLACES_API_KEY); if (place) { channelData.googleMaps = { + status: "ok", name: place.name, rating: place.rating, reviewCount: place.reviewCount, address: place.address, phone: place.phone, clinicWebsite: place.clinicWebsite, @@ -596,9 +655,20 @@ Deno.serve(async (req) => { topReviews: place.topReviews, }; } else { - throw new Error("Google Maps: no matching place found"); + channelData.googleMaps = { + status: "skipped", + reason: "PLACE_NOT_FOUND", + attemptedAt: new Date().toISOString(), + searchQuery: clinicName, + }; } })); + } else { + channelData.googleMaps = { + status: "skipped", + reason: !GOOGLE_PLACES_API_KEY ? "API_KEY_MISSING" : "CLINIC_NAME_MISSING", + attemptedAt: new Date().toISOString(), + }; } // ─── 7. Market Analysis (Perplexity) ─── @@ -841,34 +911,41 @@ Deno.serve(async (req) => { }); } + // Helper: only snapshot channels that actually collected data. + // Skipped channels (status === 'skipped') still live in channelData for the + // generate-report step to render their skip reason, but shouldn't pollute + // the time-series channel_snapshots table. + const isCollected = (d: Record | undefined): boolean => + !!d && d.status !== "skipped"; + const guData = channelData.gangnamUnni as Record | undefined; - if (guData) { + if (isCollected(guData)) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'gangnamUnni', - handle: guData.name, rating: guData.rating, rating_scale: 10, - reviews: guData.totalReviews, - health_score: computeHealthScore('gangnamUnni', guData), + handle: guData!.name, rating: guData!.rating, rating_scale: 10, + reviews: guData!.totalReviews, + health_score: computeHealthScore('gangnamUnni', guData!), details: guData, }); } const gmData = channelData.googleMaps as Record | undefined; - if (gmData) { + if (isCollected(gmData)) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'googleMaps', - handle: gmData.name, rating: gmData.rating, rating_scale: 5, - reviews: gmData.reviewCount, - health_score: computeHealthScore('googleMaps', gmData), + handle: gmData!.name, rating: gmData!.rating, rating_scale: 5, + reviews: gmData!.reviewCount, + health_score: computeHealthScore('googleMaps', gmData!), details: gmData, }); } const nbData = channelData.naverBlog as Record | undefined; - if (nbData) { + if (isCollected(nbData)) { snapshotInserts.push({ clinic_id: clinicId, run_id: runId, channel: 'naverBlog', - handle: nbData.officialBlogHandle, - health_score: computeHealthScore('naverBlog', nbData), + handle: nbData!.officialBlogHandle, + health_score: computeHealthScore('naverBlog', nbData!), details: nbData, }); } diff --git a/supabase/functions/discover-channels/index.ts b/supabase/functions/discover-channels/index.ts index c745954..dcc4493 100644 --- a/supabase/functions/discover-channels/index.ts +++ b/supabase/functions/discover-channels/index.ts @@ -284,23 +284,122 @@ Deno.serve(async (req) => { } // ═══════════════════════════════════════════ - // NOT REGISTERED: Return error for unregistered domains - // (Registry-only mode — no API fallback) + // REGISTRY MISS — secondary fast-path via `clinics` cache + // If this domain was previously auto-discovered and cached in `clinics.verified_channels`, + // reuse it instead of re-running the full Firecrawl+Perplexity discovery. + // Fresh runs only re-discover after 14 days so stale handles eventually refresh. // ═══════════════════════════════════════════ - console.log(`[registry] Miss: ${registryDomain} — returning CLINIC_NOT_REGISTERED`); - return new Response( - JSON.stringify({ - success: false, - error: "CLINIC_NOT_REGISTERED", - message: "현재 지원하지 않는 병원입니다. 등록된 병원만 분석 가능합니다.", - domain: registryDomain, - }), - { status: 404, headers: { ...corsHeaders, "Content-Type": "application/json" } }, - ); + console.log(`[registry] Miss: ${registryDomain} — trying clinics cache + Firecrawl fallback`); + + if (registryDomain) { + try { + const { data: cachedClinic } = await supabase + .from("clinics") + .select("id, name, name_en, address, phone, services, branding, verified_channels, last_analyzed_at") + .eq("domain", registryDomain) + .maybeSingle(); + + const cachedChannels = cachedClinic?.verified_channels as VerifiedChannels | null | undefined; + const hasAny = cachedChannels && ( + ((cachedChannels as Record).instagram as unknown[] | undefined)?.length + || (cachedChannels as Record).youtube + || (cachedChannels as Record).facebook + || (cachedChannels as Record).naverBlog + ); + const lastAnalyzed = cachedClinic?.last_analyzed_at ? new Date(cachedClinic.last_analyzed_at).getTime() : 0; + const CACHE_TTL_MS = 14 * 24 * 60 * 60 * 1000; + const cacheFresh = lastAnalyzed > 0 && (Date.now() - lastAnalyzed) < CACHE_TTL_MS; + + if (cachedClinic && hasAny && cacheFresh) { + console.log(`[clinics-cache] Hit: ${cachedClinic.name} (${registryDomain}) — reusing cached channels`); + + const scrapeDataFromCache = { + clinic: { + clinicName: cachedClinic.name, + clinicNameEn: cachedClinic.name_en, + address: cachedClinic.address, + phone: cachedClinic.phone, + services: cachedClinic.services || [], + }, + branding: cachedClinic.branding || {}, + siteLinks: [], + siteMap: [], + sourceUrl: url, + scrapedAt: new Date().toISOString(), + source: "clinics-cache", + }; + + const { data: saved, error: saveError } = await supabase + .from("marketing_reports") + .insert({ + url, + clinic_name: cachedClinic.name, + status: "discovered", + verified_channels: cachedChannels, + scrape_data: scrapeDataFromCache, + report: {}, + pipeline_started_at: new Date().toISOString(), + }) + .select("id") + .single(); + + if (saveError) throw new Error(`DB save failed: ${saveError.message}`); + + // Refresh last_analyzed_at so the cache stays warm + await supabase.from("clinics") + .update({ last_analyzed_at: new Date().toISOString() }) + .eq("id", cachedClinic.id); + + // V3 run record + let runId: string | null = null; + try { + const { data: runRow } = await supabase + .from("analysis_runs") + .insert({ + clinic_id: cachedClinic.id, + status: "discovering", + scrape_data: scrapeDataFromCache, + discovered_channels: cachedChannels, + trigger: "manual", + pipeline_started_at: new Date().toISOString(), + }) + .select("id") + .single(); + runId = runRow?.id || null; + } catch (e) { + console.error("V3 dual-write error (clinics-cache):", e); + } + + return new Response( + JSON.stringify({ + success: true, + reportId: saved.id, + clinicId: cachedClinic.id, + runId, + clinicName: cachedClinic.name, + verifiedChannels: cachedChannels, + address: cachedClinic.address || "", + services: cachedClinic.services || [], + scrapeData: scrapeDataFromCache, + source: "clinics-cache", + }), + { headers: { ...corsHeaders, "Content-Type": "application/json" } }, + ); + } else if (cachedClinic && !cacheFresh) { + console.log(`[clinics-cache] Stale for ${registryDomain} — re-running discovery`); + } + } catch (e) { + console.warn("[clinics-cache] Lookup failed, falling through to full discovery:", e instanceof Error ? e.message : e); + } + } // ═══════════════════════════════════════════ - // LEGACY FALLBACK: Full API discovery (disabled — registry-only mode) - // Kept for reference; unreachable in production + // FULL DISCOVERY: Firecrawl + extractSocialLinks + verifyHandles + // Runs for: unregistered domains, stale cache, or cache miss. + // Deterministic footer/link extraction (Source 1-3) is prioritized; + // Perplexity/Apify/Naver (Source 4-5) are AI fallbacks that only contribute + // when the regex path is empty. Everything that succeeds gets saved to + // `clinics.verified_channels` so the next run hits the cache fast-path above. // ═══════════════════════════════════════════ const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY") || ""; @@ -806,6 +905,19 @@ Deno.serve(async (req) => { } catch { /* ignore secondary failure */ } } + // Log deterministic vs AI contribution so we can see whether the footer + // scraping alone was sufficient for this clinic (the developer's hypothesis). + const deterministicCount = + (linkHandles.instagram?.length || 0) + + (linkHandles.youtube?.length || 0) + + (linkHandles.facebook?.length || 0) + + (linkHandles.naverBlog?.length || 0) + + (buttonHandles.instagram?.length || 0) + + (buttonHandles.youtube?.length || 0) + + (buttonHandles.facebook?.length || 0) + + (buttonHandles.naverBlog?.length || 0); + console.log(`[discover] ${registryDomain} — deterministic handles: ${deterministicCount}, final verified channels: ig=${(verified.instagram || []).length}, yt=${verified.youtube ? 1 : 0}, fb=${verified.facebook ? 1 : 0}, blog=${verified.naverBlog ? 1 : 0}`); + return new Response( JSON.stringify({ success: true, reportId: saved.id, @@ -815,6 +927,7 @@ Deno.serve(async (req) => { address: clinic.address || "", services: clinic.services || [], scrapeData: scrapeDataFull, + source: "firecrawl-fallback", }), { headers: { ...corsHeaders, "Content-Type": "application/json" } }, ); diff --git a/supabase/functions/generate-report/index.ts b/supabase/functions/generate-report/index.ts index 17ef24b..8434219 100644 --- a/supabase/functions/generate-report/index.ts +++ b/supabase/functions/generate-report/index.ts @@ -195,6 +195,157 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)} report.visionAnalysis = vision; } + // ─── Harness 4: GroundTruth channel injection ─── + // Perplexity occasionally drops channel metrics even though they exist in channelData. + // For each channel we force-inject the numeric fields from the actually-collected data. + // Same pattern as the Vision injection above (foundingYear / gangnamUnni), extended + // to instagram / youtube / facebook / naverBlog / naverPlace. + report.channelAnalysis = report.channelAnalysis || {}; + const injectedChannels: string[] = []; + + // Instagram — prefer instagramAccounts[0], fall back to channelData.instagram + const igAccountsList = (channelData.instagramAccounts as Record[] | undefined) || []; + const igPrimary = (igAccountsList[0] || channelData.instagram) as Record | undefined; + const igPostsSummary = channelData.instagramPosts as Record | undefined; + if (igPrimary) { + const target = (report.channelAnalysis.instagram ||= {} as Record); + if (isMissingValue(target.followers) && igPrimary.followers) { + target.followers = igPrimary.followers; + } + // Prefer profile-level post count; fall back to scraped posts summary + if (isMissingValue(target.posts)) { + if (igPrimary.posts) target.posts = igPrimary.posts; + else if (igPostsSummary?.totalPosts) target.posts = igPostsSummary.totalPosts; + } + if (isMissingValue(target.status)) target.status = "active"; + injectedChannels.push(`instagram(f=${target.followers},p=${target.posts})`); + } + + // YouTube + const ytGT = channelData.youtube as Record | undefined; + if (ytGT) { + const target = (report.channelAnalysis.youtube ||= {} as Record); + if (isMissingValue(target.subscribers) && ytGT.subscribers) { + target.subscribers = ytGT.subscribers; + } + if (isMissingValue((target as Record).videos) && ytGT.totalVideos) { + (target as Record).videos = ytGT.totalVideos; + } + if (isMissingValue(target.status)) target.status = "active"; + injectedChannels.push(`youtube(s=${target.subscribers})`); + } + + // Facebook + const fbGT = channelData.facebook as Record | undefined; + if (fbGT) { + const target = (report.channelAnalysis.facebook ||= {} as Record); + if (isMissingValue(target.followers) && fbGT.followers) { + target.followers = fbGT.followers; + } + if (isMissingValue((target as Record).likes) && fbGT.likes) { + (target as Record).likes = fbGT.likes; + } + if (isMissingValue(target.status)) target.status = "active"; + injectedChannels.push(`facebook(f=${target.followers})`); + } + + // Naver Blog — RSS provides totalResults + posts + const nbGT = channelData.naverBlog as Record | undefined; + if (nbGT && !nbGT.skipped) { + const target = (report.channelAnalysis.naverBlog ||= {} as Record); + if (isMissingValue(target.posts) && nbGT.totalResults) { + target.posts = nbGT.totalResults; + } + if (isMissingValue(target.status)) { + target.status = (nbGT.totalResults as number) > 0 ? "active" : "inactive"; + } + injectedChannels.push(`naverBlog(p=${target.posts})`); + } + + // Naver Place — Naver Local Search doesn't include rating/reviews, but stores the place metadata. + // If the place was found we at least set status=active; rating/reviews often come via Vision. + const npGT = channelData.naverPlace as Record | undefined; + if (npGT && !npGT.skipped && npGT.name) { + const target = (report.channelAnalysis.naverPlace ||= {} as Record); + if (isMissingValue(target.status)) target.status = "active"; + if (isMissingValue((target as Record).name) && npGT.name) { + (target as Record).name = npGT.name; + } + // Rating/reviews may be injected by Vision block above; don't overwrite + injectedChannels.push(`naverPlace(${npGT.name})`); + } + + // GangnamUnni — Vision block handled the rating/reviews path already. + // Here we only fill status/rating when they come directly from the scraper (D). + const guGT = channelData.gangnamUnni as Record | undefined; + if (guGT && !guGT.skipped) { + const target = (report.channelAnalysis.gangnamUnni ||= {} as Record); + if (isMissingValue(target.rating) && guGT.rating) { + target.rating = guGT.rating; + (target as Record).ratingScale = 10; + } + if (isMissingValue(target.reviews) && (guGT.totalReviews || guGT.reviews)) { + target.reviews = guGT.totalReviews || guGT.reviews; + } + if (isMissingValue(target.status)) target.status = "active"; + injectedChannels.push(`gangnamUnni(r=${target.rating})`); + } + + if (injectedChannels.length > 0) { + console.log(`[report] GroundTruth injected: ${injectedChannels.join(", ")}`); + } + + // ─── Harness 4b: Diagnosis fallback ─── + // ProblemDiagnosis.tsx returns null when `problemDiagnosis` is empty, hiding the whole + // section. Make sure each weak channel contributes at least one diagnosis item so the + // frontend renders. Only runs when AI didn't fill diagnosis itself. + const DEFAULT_DIAGNOSIS: Record = { + instagram: { + issue: "Instagram 업로드 빈도가 부족하거나 게시물 참여율이 낮습니다", + recommendation: "주 3회 이상 주제별 콘텐츠 발행 + 릴스 비중 40% 이상 확보", + }, + youtube: { + issue: "YouTube 채널 활동이 저조하여 브랜드 신뢰도 확보가 어렵습니다", + recommendation: "시술 Before/After Shorts 주 2회, 롱폼 월 2회 업로드 루틴 수립", + }, + facebook: { + issue: "Facebook 페이지 활동이 낮아 유입 기여가 제한적입니다", + recommendation: "광고 랜딩 채널로 재정의하고 월 4회 프로모션 포스트 자동화", + }, + naverBlog: { + issue: "네이버 블로그 최신 포스팅 양이 부족하여 로컬 SEO 기회를 놓치고 있습니다", + recommendation: "주요 시술 키워드별 주 2건, 월 8건 이상 SEO 최적화 포스팅", + }, + naverPlace: { + issue: "네이버 플레이스 리뷰/콘텐츠 관리가 체계화되어 있지 않습니다", + recommendation: "방문 리뷰 유도 프로세스 구축 + 키워드·사진 리뷰 월 20건 확보", + }, + gangnamUnni: { + issue: "강남언니 프로필 정보와 답변률이 미흡합니다", + recommendation: "의사별 프로필 완성도 100% + 문의 24시간 내 응답 체계", + }, + }; + const weakStatuses = new Set(["inactive", "weak", "not_found", undefined]); + for (const ch of Object.keys(DEFAULT_DIAGNOSIS)) { + const node = (report.channelAnalysis as Record)[ch] as + | Record + | undefined; + if (!node) continue; + const existing = node.diagnosis as unknown[] | undefined; + if (Array.isArray(existing) && existing.length > 0) continue; + const score = typeof node.score === "number" ? node.score : null; + const status = node.status as string | undefined; + const needsFallback = (score !== null && score < 60) || weakStatuses.has(status); + if (!needsFallback) continue; + const def = DEFAULT_DIAGNOSIS[ch]; + node.diagnosis = [{ + issue: def.issue, + severity: score !== null && score < 40 ? "critical" : "warning", + recommendation: def.recommendation, + }]; + console.log(`[report] Diagnosis fallback injected for ${ch} (score=${score}, status=${status})`); + } + // Embed channel enrichment data for frontend mergeEnrichment() report.channelEnrichment = channelData; report.enrichedAt = new Date().toISOString(); @@ -320,10 +471,31 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)} console.warn(`[harness] Low report quality (${qualityReport.score}/100):`, qualityReport.warnings); } + // Persist quality report into analysis_data.qualityReport so the frontend (and + // future debugging queries) can inspect why specific sections rendered empty. + // Downgrade status to 'partial' when too many important/critical fields are missing. + const totalMissing = qualityReport.missingCritical.length + qualityReport.missingImportant.length; + const reportStatus = (qualityReport.missingCritical.length > 0 || totalMissing >= 5) + ? "partial" + : "complete"; + const enrichedAnalysisData = { + ...analysisData, + qualityReport: { + score: qualityReport.score, + missingCritical: qualityReport.missingCritical, + missingImportant: qualityReport.missingImportant, + missingOptional: qualityReport.missingOptional, + warnings: qualityReport.warnings, + injectedChannels, + generatedAt: new Date().toISOString(), + }, + }; + // Legacy: marketing_reports await supabase.from("marketing_reports").update({ report, - status: "complete", + analysis_data: enrichedAnalysisData, + status: reportStatus, data_quality_score: qualityReport.score, pipeline_completed_at: new Date().toISOString(), updated_at: new Date().toISOString(), From 742c0f1bcc8fd828886342d617b9d23c05c54ace Mon Sep 17 00:00:00 2001 From: Haewon Kam Date: Fri, 10 Apr 2026 13:41:53 +0900 Subject: [PATCH 2/2] =?UTF-8?q?docs:=20CLAUDE.md=20=EB=B0=B1=EC=97=94?= =?UTF-8?q?=EB=93=9C=20=ED=8C=8C=EC=9D=B4=ED=94=84=EB=9D=BC=EC=9D=B8=20?= =?UTF-8?q?=EC=8B=A4=EC=A0=9C=20=EA=B5=AC=ED=98=84=20=EB=B0=98=EC=98=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mock 데이터 기반이라는 잘못된 설명을 제거하고 실제 구현 상태로 업데이트: - 4단계 Edge Functions 파이프라인 (discover→collect→generate-report→generate-content-plan) - 실제 연동 API 목록 (YouTube/Apify/Naver/Firecrawl/Perplexity) - DB 테이블 구조, _shared 유틸리티, 환경변수 정리 - 배포 방법 (Vercel 수동 + Supabase Functions) Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 60900b9..e2cdb47 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,9 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -INFINITH Demo — AI 마케팅 분석 플랫폼의 프론트엔드 데모. 성형외과(뷰성형외과)를 대상으로 YouTube/Instagram/Facebook/웹사이트를 분석하고, 변환 전략/로드맵/KPI를 제안하는 마케팅 리포트를 보여준다. 현재는 mock 데이터 기반이며 실제 API 연동은 미구현 상태. +INFINITH — AI 마케팅 분석 플랫폼. 성형외과를 대상으로 YouTube/Instagram/Facebook/네이버블로그/네이버플레이스/강남언니를 분석하고, 변환 전략/로드맵/KPI를 제안하는 마케팅 리포트를 생성한다. + +**프론트엔드(React) + 백엔드(Supabase Edge Functions) 모두 실제 구현 완료.** `src/data/` mock 데이터는 개발 fallback 용도로만 남아 있으며, 실제 파이프라인은 Supabase Edge Functions에서 실행된다. ## Commands @@ -27,12 +29,43 @@ INFINITH Demo — AI 마케팅 분석 플랫폼의 프론트엔드 데모. 성 - `/distribute` — 콘텐츠 배포 - `/performance` — 성과 분석 -**Data flow:** -1. Pages → custom hooks (`useReport`, `useMarketingPlan`) → mock data (`src/data/`) -2. Hooks return `{ data, isLoading, error }` 패턴 (100ms delay로 API 시뮬레이션) +**Frontend Data flow:** +1. Pages → custom hooks (`useReport`, `useMarketingPlan`) → Supabase DB (`marketing_reports` 테이블) +2. Hooks return `{ data, isLoading, error }` 패턴 3. `ScreenshotContext`로 리포트 스크린샷 데이터 공유 4. `useExportPDF` 훅으로 리포트 → PDF 변환 +**Backend Pipeline (Supabase Edge Functions):** + +| Phase | Function | 역할 | +|---|---|---| +| 1 | `discover-channels` | 병원 URL → SNS 채널 발견 (registry fast-path → clinics 캐시 → Firecrawl 스크래핑) | +| 2 | `collect-channel-data` | 채널별 실제 데이터 수집 (YouTube API, Apify, Naver API, Firecrawl) | +| 3 | `generate-report` | Perplexity AI로 리포트 생성 + groundTruth 주입으로 수치 보정 | +| 4 | `generate-content-plan` | 마케팅 플랜 생성 | + +**실제 연동된 API:** +- YouTube Data API v3 — 구독자/조회수/영상 수 +- Apify — Instagram/Facebook 스크래퍼 +- Naver Search API — 네이버플레이스 검색 +- Naver Blog RSS — 공식 블로그 포스트 +- Firecrawl — 홈페이지 footer SNS 링크 추출, 강남언니 스크래핑 +- Perplexity AI — 리포트 JSON 생성 +- Gemini 2.5 Flash — 콘텐츠 이미지 생성 (`src/services/geminiImageGen.ts`) + +**주요 DB 테이블 (Supabase):** +- `marketing_reports` — 리포트 결과 (`channel_data`, `report`, `analysis_data` JSONB) +- `clinic_registry` — 병원 SNS 채널 등록 (fast-path 캐시) +- `clinics` — 병원 기본 정보 + 14일 TTL 캐시 +- `channel_snapshots` — 채널별 수집 이력 +- `analysis_runs` — 파이프라인 실행 로그 + +**Shared utilities (`supabase/functions/_shared/`):** +- `extractSocialLinks.ts` — URL → SNS handle 결정론적 추출 (regex 기반) +- `verifyHandles.ts` — 핸들 실제 존재 여부 검증 +- `dataQuality.ts` — `isMissingValue()`, `validateReportQuality()` +- `retry.ts` — 자동 재시도/타임아웃/rate limit 내장 fetch wrapper + **Type definitions** in `src/types/`: - `report.ts` — `MarketingReport` 및 하위 타입 (YouTubeAudit, InstagramAudit, etc.) - `plan.ts` — 마케팅 플랜 타입 @@ -40,6 +73,18 @@ INFINITH Demo — AI 마케팅 분석 플랫폼의 프론트엔드 데모. 성 **AI Integration:** `src/services/geminiImageGen.ts` — Gemini 2.5 Flash로 이미지 생성. `GEMINI_API_KEY` 환경변수 필요. +## Environment Variables + +**Supabase Edge Functions (Supabase Dashboard → Settings → Secrets):** +- `YOUTUBE_API_KEY` — YouTube Data API v3 +- `APIFY_API_KEY` — Instagram/Facebook 스크래퍼 +- `NAVER_CLIENT_ID` / `NAVER_CLIENT_SECRET` — Naver Search API +- `FIRECRAWL_API_KEY` — Firecrawl 스크래핑 +- `PERPLEXITY_API_KEY` — 리포트 AI 생성 + +**Frontend (`.env.local`):** +- `GEMINI_API_KEY` — Gemini 이미지 생성 + ## Styling - Tailwind CSS 4 (`@theme` directive in `src/index.css`) @@ -55,4 +100,5 @@ INFINITH Demo — AI 마케팅 분석 플랫폼의 프론트엔드 데모. 성 ## Deployment -Vercel (SPA 모드 — 모든 경로 `/index.html`로 리라이트) +- **Frontend:** Vercel (SPA 모드 — 모든 경로 `/index.html`로 리라이트). `git push` 후 `vercel --prod` 수동 실행 필요 (Gitea 자동 트리거 없음) +- **Backend:** Supabase Edge Functions — `supabase functions deploy ` 으로 개별 배포