fix: Instagram data collection pipeline — handle normalization + DB persistence

- Add normalizeInstagramHandle() utility (Edge + browser) to strip URLs, @ prefixes
- generate-report: normalize handles before saving, persist socialHandles in report JSONB
- enrich-channels: normalize Instagram handle before Apify call (defense in depth)
- useReport: recover socialHandles + channelEnrichment from DB on direct URL access
- ReportPage: skip redundant enrichment when data already exists in DB

Fixes: Instagram enrichment failing due to URL-format handles passed to Apify

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
claude/bold-hawking
Haewon Kam 2026-04-02 13:34:54 +09:00
parent 2d6e95c414
commit bd7bc45192
6 changed files with 156 additions and 23 deletions

View File

@ -2,12 +2,17 @@ import { useState, useEffect } from 'react';
import { useLocation } from 'react-router'; import { useLocation } from 'react-router';
import type { MarketingReport } from '../types/report'; import type { MarketingReport } from '../types/report';
import { fetchReportById } from '../lib/supabase'; import { fetchReportById } from '../lib/supabase';
import { transformApiReport } from '../lib/transformReport'; import { transformApiReport, mergeEnrichment, type EnrichmentData } from '../lib/transformReport';
import { normalizeInstagramHandle } from '../lib/normalizeHandles';
interface UseReportResult { interface UseReportResult {
data: MarketingReport | null; data: MarketingReport | null;
isLoading: boolean; isLoading: boolean;
error: string | null; error: string | null;
/** True if channelEnrichment was already in the DB — no need to re-enrich */
isEnriched: boolean;
/** Normalized social handles recovered from DB or API metadata */
socialHandles: Record<string, string | null> | null;
} }
interface LocationState { interface LocationState {
@ -27,6 +32,8 @@ export function useReport(id: string | undefined): UseReportResult {
const [data, setData] = useState<MarketingReport | null>(null); const [data, setData] = useState<MarketingReport | null>(null);
const [isLoading, setIsLoading] = useState(true); const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
const [isEnriched, setIsEnriched] = useState(false);
const [socialHandles, setSocialHandles] = useState<Record<string, string | null> | null>(null);
const location = useLocation(); const location = useLocation();
useEffect(() => { useEffect(() => {
@ -42,6 +49,8 @@ export function useReport(id: string | undefined): UseReportResult {
state.metadata, state.metadata,
); );
setData(transformed); setData(transformed);
setSocialHandles(state.metadata.socialHandles || null);
setIsEnriched(false);
setIsLoading(false); setIsLoading(false);
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to parse report data'); setError(err instanceof Error ? err.message : 'Failed to parse report data');
@ -54,16 +63,45 @@ export function useReport(id: string | undefined): UseReportResult {
if (id) { if (id) {
fetchReportById(id) fetchReportById(id)
.then((row) => { .then((row) => {
const reportJson = row.report as Record<string, unknown>;
const scrapeData = row.scrape_data as Record<string, unknown> | undefined;
const transformed = transformApiReport( const transformed = transformApiReport(
row.id, row.id,
row.report, reportJson,
{ {
url: row.url, url: row.url,
clinicName: row.clinic_name || '', clinicName: row.clinic_name || '',
generatedAt: row.created_at, generatedAt: row.created_at,
}, },
); );
// Recover social handles: report.socialHandles > scrape_data.clinic.socialMedia
let handles = (reportJson.socialHandles as Record<string, string | null>) || null;
if (!handles && scrapeData) {
const clinic = scrapeData.clinic as Record<string, unknown> | undefined;
const socialMedia = clinic?.socialMedia as Record<string, string> | undefined;
if (socialMedia) {
handles = {
instagram: normalizeInstagramHandle(socialMedia.instagram),
youtube: socialMedia.youtube || null,
facebook: socialMedia.facebook || null,
blog: socialMedia.blog || null,
};
}
}
setSocialHandles(handles);
// If channelEnrichment already exists in DB, merge it immediately
const enrichment = reportJson.channelEnrichment as EnrichmentData | undefined;
if (enrichment) {
const merged = mergeEnrichment(transformed, enrichment);
setData(merged);
setIsEnriched(true);
} else {
setData(transformed); setData(transformed);
setIsEnriched(false);
}
}) })
.catch((err) => { .catch((err) => {
setError(err instanceof Error ? err.message : 'Failed to fetch report'); setError(err instanceof Error ? err.message : 'Failed to fetch report');
@ -77,5 +115,5 @@ export function useReport(id: string | undefined): UseReportResult {
setIsLoading(false); setIsLoading(false);
}, [id, location.state]); }, [id, location.state]);
return { data, isLoading, error }; return { data, isLoading, error, isEnriched, socialHandles };
} }

View File

@ -0,0 +1,32 @@
/**
* Normalize an Instagram handle from various input formats to a pure username.
* Browser-side copy of supabase/functions/_shared/normalizeHandles.ts
*/
export function normalizeInstagramHandle(
raw: string | null | undefined,
): string | null {
if (!raw || typeof raw !== 'string') return null;
let handle = raw.trim();
if (!handle) return null;
if (handle.includes('instagram.com')) {
try {
const urlStr = handle.startsWith('http') ? handle : `https://${handle}`;
const url = new URL(urlStr);
const segments = url.pathname.split('/').filter(Boolean);
handle = segments[0] || '';
} catch {
const match = handle.match(/instagram\.com\/([^/?#]+)/);
handle = match?.[1] || '';
}
}
if (handle.startsWith('@')) {
handle = handle.slice(1);
}
handle = handle.replace(/\/+$/, '');
return handle || null;
}

View File

@ -34,24 +34,32 @@ const REPORT_SECTIONS = [
export default function ReportPage() { export default function ReportPage() {
const { id } = useParams<{ id: string }>(); const { id } = useParams<{ id: string }>();
const location = useLocation(); const location = useLocation();
const { data: baseData, isLoading, error } = useReport(id); const {
data: baseData,
isLoading,
error,
isEnriched,
socialHandles: dbSocialHandles,
} = useReport(id);
// Extract enrichment params from location state (socialHandles from API) or base data // Build enrichment params — skip if already enriched (data from DB)
const enrichmentParams = useMemo(() => { const enrichmentParams = useMemo(() => {
if (!baseData) return null; if (!baseData || isEnriched) return null;
// Priority: location.state socialHandles > DB socialHandles > transformed data
const state = location.state as Record<string, unknown> | undefined; const state = location.state as Record<string, unknown> | undefined;
const metadata = state?.metadata as Record<string, unknown> | undefined; const metadata = state?.metadata as Record<string, unknown> | undefined;
const socialHandles = metadata?.socialHandles as Record<string, string | null> | undefined; const stateSocialHandles = metadata?.socialHandles as Record<string, string | null> | undefined;
const handles = stateSocialHandles || dbSocialHandles;
// Priority: API socialHandles > transformed data > undefined
const igHandle = const igHandle =
socialHandles?.instagram || handles?.instagram ||
baseData.instagramAudit?.accounts?.[0]?.handle || baseData.instagramAudit?.accounts?.[0]?.handle ||
undefined; undefined;
const ytHandle = const ytHandle =
socialHandles?.youtube || handles?.youtube ||
baseData.youtubeAudit?.handle || baseData.youtubeAudit?.handle ||
undefined; undefined;
@ -62,7 +70,7 @@ export default function ReportPage() {
youtubeChannelId: ytHandle || undefined, youtubeChannelId: ytHandle || undefined,
address: baseData.clinicSnapshot.location || undefined, address: baseData.clinicSnapshot.location || undefined,
}; };
}, [baseData, location.state]); }, [baseData, isEnriched, dbSocialHandles, location.state]);
const { status: enrichStatus, enrichedReport } = useEnrichment(baseData, enrichmentParams); const { status: enrichStatus, enrichedReport } = useEnrichment(baseData, enrichmentParams);

View File

@ -0,0 +1,48 @@
/**
* Normalize an Instagram handle from various input formats to a pure username.
*
* Handles these formats:
* - "https://www.instagram.com/banobagi_ps/" "banobagi_ps"
* - "https://instagram.com/banobagi_ps?hl=en" "banobagi_ps"
* - "http://instagram.com/banobagi_ps" "banobagi_ps"
* - "instagram.com/banobagi_ps" "banobagi_ps"
* - "@banobagi_ps" "banobagi_ps"
* - "banobagi_ps" "banobagi_ps"
* - null / undefined / "" null
*/
export function normalizeInstagramHandle(
raw: string | null | undefined,
): string | null {
if (!raw || typeof raw !== "string") return null;
let handle = raw.trim();
if (!handle) return null;
// If it contains "instagram.com", extract the first path segment
if (handle.includes("instagram.com")) {
try {
// Add protocol if missing so URL constructor works
const urlStr = handle.startsWith("http")
? handle
: `https://${handle}`;
const url = new URL(urlStr);
// pathname is like "/banobagi_ps/" or "/banobagi_ps"
const segments = url.pathname.split("/").filter(Boolean);
handle = segments[0] || "";
} catch {
// URL parsing failed — try regex fallback
const match = handle.match(/instagram\.com\/([^/?#]+)/);
handle = match?.[1] || "";
}
}
// Strip leading @
if (handle.startsWith("@")) {
handle = handle.slice(1);
}
// Strip trailing slash
handle = handle.replace(/\/+$/, "");
return handle || null;
}

View File

@ -1,5 +1,6 @@
import "@supabase/functions-js/edge-runtime.d.ts"; import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
const corsHeaders = { const corsHeaders = {
"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Origin": "*",
@ -58,12 +59,13 @@ Deno.serve(async (req) => {
const tasks = []; const tasks = [];
// 1. Instagram Profile // 1. Instagram Profile
if (instagramHandle) { const cleanIgHandle = normalizeInstagramHandle(instagramHandle);
if (cleanIgHandle) {
tasks.push( tasks.push(
(async () => { (async () => {
const items = await runApifyActor( const items = await runApifyActor(
"apify~instagram-profile-scraper", "apify~instagram-profile-scraper",
{ usernames: [instagramHandle], resultsLimit: 12 }, { usernames: [cleanIgHandle], resultsLimit: 12 },
APIFY_TOKEN APIFY_TOKEN
); );
const profile = (items as Record<string, unknown>[])[0]; const profile = (items as Record<string, unknown>[])[0];

View File

@ -1,5 +1,6 @@
import "@supabase/functions-js/edge-runtime.d.ts"; import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
const corsHeaders = { const corsHeaders = {
"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Origin": "*",
@ -153,6 +154,18 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
report = { raw: reportText, parseError: true }; report = { raw: reportText, parseError: true };
} }
// Normalize social handles from scrape data
const socialMedia = clinic.socialMedia || {};
const normalizedHandles = {
instagram: normalizeInstagramHandle(socialMedia.instagram),
youtube: socialMedia.youtube || null,
facebook: socialMedia.facebook || null,
blog: socialMedia.blog || null,
};
// Embed normalized handles in report for DB persistence
report.socialHandles = normalizedHandles;
// Save to Supabase // Save to Supabase
const supabase = createClient(supabaseUrl, supabaseKey); const supabase = createClient(supabaseUrl, supabaseKey);
const { data: saved, error: saveError } = await supabase const { data: saved, error: saveError } = await supabase
@ -167,9 +180,6 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
.select("id") .select("id")
.single(); .single();
// Extract social handles from scrape data for frontend enrichment
const socialMedia = clinic.socialMedia || {};
return new Response( return new Response(
JSON.stringify({ JSON.stringify({
success: true, success: true,
@ -184,12 +194,7 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
marketAnalysis: analyzeResult.success, marketAnalysis: analyzeResult.success,
aiGeneration: !report.parseError, aiGeneration: !report.parseError,
}, },
socialHandles: { socialHandles: normalizedHandles,
instagram: socialMedia.instagram || null,
youtube: socialMedia.youtube || null,
facebook: socialMedia.facebook || null,
blog: socialMedia.blog || null,
},
address, address,
services, services,
}, },