fix: Instagram data collection pipeline — handle normalization + DB persistence
- Add normalizeInstagramHandle() utility (Edge + browser) to strip URLs, @ prefixes - generate-report: normalize handles before saving, persist socialHandles in report JSONB - enrich-channels: normalize Instagram handle before Apify call (defense in depth) - useReport: recover socialHandles + channelEnrichment from DB on direct URL access - ReportPage: skip redundant enrichment when data already exists in DB Fixes: Instagram enrichment failing due to URL-format handles passed to Apify Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>claude/bold-hawking
parent
2d6e95c414
commit
bd7bc45192
|
|
@ -2,12 +2,17 @@ import { useState, useEffect } from 'react';
|
|||
import { useLocation } from 'react-router';
|
||||
import type { MarketingReport } from '../types/report';
|
||||
import { fetchReportById } from '../lib/supabase';
|
||||
import { transformApiReport } from '../lib/transformReport';
|
||||
import { transformApiReport, mergeEnrichment, type EnrichmentData } from '../lib/transformReport';
|
||||
import { normalizeInstagramHandle } from '../lib/normalizeHandles';
|
||||
|
||||
interface UseReportResult {
|
||||
data: MarketingReport | null;
|
||||
isLoading: boolean;
|
||||
error: string | null;
|
||||
/** True if channelEnrichment was already in the DB — no need to re-enrich */
|
||||
isEnriched: boolean;
|
||||
/** Normalized social handles recovered from DB or API metadata */
|
||||
socialHandles: Record<string, string | null> | null;
|
||||
}
|
||||
|
||||
interface LocationState {
|
||||
|
|
@ -27,6 +32,8 @@ export function useReport(id: string | undefined): UseReportResult {
|
|||
const [data, setData] = useState<MarketingReport | null>(null);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [isEnriched, setIsEnriched] = useState(false);
|
||||
const [socialHandles, setSocialHandles] = useState<Record<string, string | null> | null>(null);
|
||||
const location = useLocation();
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -42,6 +49,8 @@ export function useReport(id: string | undefined): UseReportResult {
|
|||
state.metadata,
|
||||
);
|
||||
setData(transformed);
|
||||
setSocialHandles(state.metadata.socialHandles || null);
|
||||
setIsEnriched(false);
|
||||
setIsLoading(false);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to parse report data');
|
||||
|
|
@ -54,16 +63,45 @@ export function useReport(id: string | undefined): UseReportResult {
|
|||
if (id) {
|
||||
fetchReportById(id)
|
||||
.then((row) => {
|
||||
const reportJson = row.report as Record<string, unknown>;
|
||||
const scrapeData = row.scrape_data as Record<string, unknown> | undefined;
|
||||
|
||||
const transformed = transformApiReport(
|
||||
row.id,
|
||||
row.report,
|
||||
reportJson,
|
||||
{
|
||||
url: row.url,
|
||||
clinicName: row.clinic_name || '',
|
||||
generatedAt: row.created_at,
|
||||
},
|
||||
);
|
||||
|
||||
// Recover social handles: report.socialHandles > scrape_data.clinic.socialMedia
|
||||
let handles = (reportJson.socialHandles as Record<string, string | null>) || null;
|
||||
if (!handles && scrapeData) {
|
||||
const clinic = scrapeData.clinic as Record<string, unknown> | undefined;
|
||||
const socialMedia = clinic?.socialMedia as Record<string, string> | undefined;
|
||||
if (socialMedia) {
|
||||
handles = {
|
||||
instagram: normalizeInstagramHandle(socialMedia.instagram),
|
||||
youtube: socialMedia.youtube || null,
|
||||
facebook: socialMedia.facebook || null,
|
||||
blog: socialMedia.blog || null,
|
||||
};
|
||||
}
|
||||
}
|
||||
setSocialHandles(handles);
|
||||
|
||||
// If channelEnrichment already exists in DB, merge it immediately
|
||||
const enrichment = reportJson.channelEnrichment as EnrichmentData | undefined;
|
||||
if (enrichment) {
|
||||
const merged = mergeEnrichment(transformed, enrichment);
|
||||
setData(merged);
|
||||
setIsEnriched(true);
|
||||
} else {
|
||||
setData(transformed);
|
||||
setIsEnriched(false);
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
setError(err instanceof Error ? err.message : 'Failed to fetch report');
|
||||
|
|
@ -77,5 +115,5 @@ export function useReport(id: string | undefined): UseReportResult {
|
|||
setIsLoading(false);
|
||||
}, [id, location.state]);
|
||||
|
||||
return { data, isLoading, error };
|
||||
return { data, isLoading, error, isEnriched, socialHandles };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Normalize an Instagram handle from various input formats to a pure username.
|
||||
* Browser-side copy of supabase/functions/_shared/normalizeHandles.ts
|
||||
*/
|
||||
export function normalizeInstagramHandle(
|
||||
raw: string | null | undefined,
|
||||
): string | null {
|
||||
if (!raw || typeof raw !== 'string') return null;
|
||||
|
||||
let handle = raw.trim();
|
||||
if (!handle) return null;
|
||||
|
||||
if (handle.includes('instagram.com')) {
|
||||
try {
|
||||
const urlStr = handle.startsWith('http') ? handle : `https://${handle}`;
|
||||
const url = new URL(urlStr);
|
||||
const segments = url.pathname.split('/').filter(Boolean);
|
||||
handle = segments[0] || '';
|
||||
} catch {
|
||||
const match = handle.match(/instagram\.com\/([^/?#]+)/);
|
||||
handle = match?.[1] || '';
|
||||
}
|
||||
}
|
||||
|
||||
if (handle.startsWith('@')) {
|
||||
handle = handle.slice(1);
|
||||
}
|
||||
|
||||
handle = handle.replace(/\/+$/, '');
|
||||
|
||||
return handle || null;
|
||||
}
|
||||
|
|
@ -34,24 +34,32 @@ const REPORT_SECTIONS = [
|
|||
export default function ReportPage() {
|
||||
const { id } = useParams<{ id: string }>();
|
||||
const location = useLocation();
|
||||
const { data: baseData, isLoading, error } = useReport(id);
|
||||
const {
|
||||
data: baseData,
|
||||
isLoading,
|
||||
error,
|
||||
isEnriched,
|
||||
socialHandles: dbSocialHandles,
|
||||
} = useReport(id);
|
||||
|
||||
// Extract enrichment params from location state (socialHandles from API) or base data
|
||||
// Build enrichment params — skip if already enriched (data from DB)
|
||||
const enrichmentParams = useMemo(() => {
|
||||
if (!baseData) return null;
|
||||
if (!baseData || isEnriched) return null;
|
||||
|
||||
// Priority: location.state socialHandles > DB socialHandles > transformed data
|
||||
const state = location.state as Record<string, unknown> | undefined;
|
||||
const metadata = state?.metadata as Record<string, unknown> | undefined;
|
||||
const socialHandles = metadata?.socialHandles as Record<string, string | null> | undefined;
|
||||
const stateSocialHandles = metadata?.socialHandles as Record<string, string | null> | undefined;
|
||||
|
||||
const handles = stateSocialHandles || dbSocialHandles;
|
||||
|
||||
// Priority: API socialHandles > transformed data > undefined
|
||||
const igHandle =
|
||||
socialHandles?.instagram ||
|
||||
handles?.instagram ||
|
||||
baseData.instagramAudit?.accounts?.[0]?.handle ||
|
||||
undefined;
|
||||
|
||||
const ytHandle =
|
||||
socialHandles?.youtube ||
|
||||
handles?.youtube ||
|
||||
baseData.youtubeAudit?.handle ||
|
||||
undefined;
|
||||
|
||||
|
|
@ -62,7 +70,7 @@ export default function ReportPage() {
|
|||
youtubeChannelId: ytHandle || undefined,
|
||||
address: baseData.clinicSnapshot.location || undefined,
|
||||
};
|
||||
}, [baseData, location.state]);
|
||||
}, [baseData, isEnriched, dbSocialHandles, location.state]);
|
||||
|
||||
const { status: enrichStatus, enrichedReport } = useEnrichment(baseData, enrichmentParams);
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Normalize an Instagram handle from various input formats to a pure username.
|
||||
*
|
||||
* Handles these formats:
|
||||
* - "https://www.instagram.com/banobagi_ps/" → "banobagi_ps"
|
||||
* - "https://instagram.com/banobagi_ps?hl=en" → "banobagi_ps"
|
||||
* - "http://instagram.com/banobagi_ps" → "banobagi_ps"
|
||||
* - "instagram.com/banobagi_ps" → "banobagi_ps"
|
||||
* - "@banobagi_ps" → "banobagi_ps"
|
||||
* - "banobagi_ps" → "banobagi_ps"
|
||||
* - null / undefined / "" → null
|
||||
*/
|
||||
export function normalizeInstagramHandle(
|
||||
raw: string | null | undefined,
|
||||
): string | null {
|
||||
if (!raw || typeof raw !== "string") return null;
|
||||
|
||||
let handle = raw.trim();
|
||||
if (!handle) return null;
|
||||
|
||||
// If it contains "instagram.com", extract the first path segment
|
||||
if (handle.includes("instagram.com")) {
|
||||
try {
|
||||
// Add protocol if missing so URL constructor works
|
||||
const urlStr = handle.startsWith("http")
|
||||
? handle
|
||||
: `https://${handle}`;
|
||||
const url = new URL(urlStr);
|
||||
// pathname is like "/banobagi_ps/" or "/banobagi_ps"
|
||||
const segments = url.pathname.split("/").filter(Boolean);
|
||||
handle = segments[0] || "";
|
||||
} catch {
|
||||
// URL parsing failed — try regex fallback
|
||||
const match = handle.match(/instagram\.com\/([^/?#]+)/);
|
||||
handle = match?.[1] || "";
|
||||
}
|
||||
}
|
||||
|
||||
// Strip leading @
|
||||
if (handle.startsWith("@")) {
|
||||
handle = handle.slice(1);
|
||||
}
|
||||
|
||||
// Strip trailing slash
|
||||
handle = handle.replace(/\/+$/, "");
|
||||
|
||||
return handle || null;
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import "@supabase/functions-js/edge-runtime.d.ts";
|
||||
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
||||
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
|
||||
|
||||
const corsHeaders = {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
|
|
@ -58,12 +59,13 @@ Deno.serve(async (req) => {
|
|||
const tasks = [];
|
||||
|
||||
// 1. Instagram Profile
|
||||
if (instagramHandle) {
|
||||
const cleanIgHandle = normalizeInstagramHandle(instagramHandle);
|
||||
if (cleanIgHandle) {
|
||||
tasks.push(
|
||||
(async () => {
|
||||
const items = await runApifyActor(
|
||||
"apify~instagram-profile-scraper",
|
||||
{ usernames: [instagramHandle], resultsLimit: 12 },
|
||||
{ usernames: [cleanIgHandle], resultsLimit: 12 },
|
||||
APIFY_TOKEN
|
||||
);
|
||||
const profile = (items as Record<string, unknown>[])[0];
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import "@supabase/functions-js/edge-runtime.d.ts";
|
||||
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
||||
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
|
||||
|
||||
const corsHeaders = {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
|
|
@ -153,6 +154,18 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
|
|||
report = { raw: reportText, parseError: true };
|
||||
}
|
||||
|
||||
// Normalize social handles from scrape data
|
||||
const socialMedia = clinic.socialMedia || {};
|
||||
const normalizedHandles = {
|
||||
instagram: normalizeInstagramHandle(socialMedia.instagram),
|
||||
youtube: socialMedia.youtube || null,
|
||||
facebook: socialMedia.facebook || null,
|
||||
blog: socialMedia.blog || null,
|
||||
};
|
||||
|
||||
// Embed normalized handles in report for DB persistence
|
||||
report.socialHandles = normalizedHandles;
|
||||
|
||||
// Save to Supabase
|
||||
const supabase = createClient(supabaseUrl, supabaseKey);
|
||||
const { data: saved, error: saveError } = await supabase
|
||||
|
|
@ -167,9 +180,6 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
|
|||
.select("id")
|
||||
.single();
|
||||
|
||||
// Extract social handles from scrape data for frontend enrichment
|
||||
const socialMedia = clinic.socialMedia || {};
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
success: true,
|
||||
|
|
@ -184,12 +194,7 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
|
|||
marketAnalysis: analyzeResult.success,
|
||||
aiGeneration: !report.parseError,
|
||||
},
|
||||
socialHandles: {
|
||||
instagram: socialMedia.instagram || null,
|
||||
youtube: socialMedia.youtube || null,
|
||||
facebook: socialMedia.facebook || null,
|
||||
blog: socialMedia.blog || null,
|
||||
},
|
||||
socialHandles: normalizedHandles,
|
||||
address,
|
||||
services,
|
||||
},
|
||||
|
|
|
|||
Loading…
Reference in New Issue