fix: Instagram data collection pipeline — handle normalization + DB persistence
- Add normalizeInstagramHandle() utility (Edge + browser) to strip URLs, @ prefixes - generate-report: normalize handles before saving, persist socialHandles in report JSONB - enrich-channels: normalize Instagram handle before Apify call (defense in depth) - useReport: recover socialHandles + channelEnrichment from DB on direct URL access - ReportPage: skip redundant enrichment when data already exists in DB Fixes: Instagram enrichment failing due to URL-format handles passed to Apify Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>claude/bold-hawking
parent
2d6e95c414
commit
bd7bc45192
|
|
@ -2,12 +2,17 @@ import { useState, useEffect } from 'react';
|
||||||
import { useLocation } from 'react-router';
|
import { useLocation } from 'react-router';
|
||||||
import type { MarketingReport } from '../types/report';
|
import type { MarketingReport } from '../types/report';
|
||||||
import { fetchReportById } from '../lib/supabase';
|
import { fetchReportById } from '../lib/supabase';
|
||||||
import { transformApiReport } from '../lib/transformReport';
|
import { transformApiReport, mergeEnrichment, type EnrichmentData } from '../lib/transformReport';
|
||||||
|
import { normalizeInstagramHandle } from '../lib/normalizeHandles';
|
||||||
|
|
||||||
interface UseReportResult {
|
interface UseReportResult {
|
||||||
data: MarketingReport | null;
|
data: MarketingReport | null;
|
||||||
isLoading: boolean;
|
isLoading: boolean;
|
||||||
error: string | null;
|
error: string | null;
|
||||||
|
/** True if channelEnrichment was already in the DB — no need to re-enrich */
|
||||||
|
isEnriched: boolean;
|
||||||
|
/** Normalized social handles recovered from DB or API metadata */
|
||||||
|
socialHandles: Record<string, string | null> | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface LocationState {
|
interface LocationState {
|
||||||
|
|
@ -27,6 +32,8 @@ export function useReport(id: string | undefined): UseReportResult {
|
||||||
const [data, setData] = useState<MarketingReport | null>(null);
|
const [data, setData] = useState<MarketingReport | null>(null);
|
||||||
const [isLoading, setIsLoading] = useState(true);
|
const [isLoading, setIsLoading] = useState(true);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [isEnriched, setIsEnriched] = useState(false);
|
||||||
|
const [socialHandles, setSocialHandles] = useState<Record<string, string | null> | null>(null);
|
||||||
const location = useLocation();
|
const location = useLocation();
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|
@ -42,6 +49,8 @@ export function useReport(id: string | undefined): UseReportResult {
|
||||||
state.metadata,
|
state.metadata,
|
||||||
);
|
);
|
||||||
setData(transformed);
|
setData(transformed);
|
||||||
|
setSocialHandles(state.metadata.socialHandles || null);
|
||||||
|
setIsEnriched(false);
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(err instanceof Error ? err.message : 'Failed to parse report data');
|
setError(err instanceof Error ? err.message : 'Failed to parse report data');
|
||||||
|
|
@ -54,16 +63,45 @@ export function useReport(id: string | undefined): UseReportResult {
|
||||||
if (id) {
|
if (id) {
|
||||||
fetchReportById(id)
|
fetchReportById(id)
|
||||||
.then((row) => {
|
.then((row) => {
|
||||||
|
const reportJson = row.report as Record<string, unknown>;
|
||||||
|
const scrapeData = row.scrape_data as Record<string, unknown> | undefined;
|
||||||
|
|
||||||
const transformed = transformApiReport(
|
const transformed = transformApiReport(
|
||||||
row.id,
|
row.id,
|
||||||
row.report,
|
reportJson,
|
||||||
{
|
{
|
||||||
url: row.url,
|
url: row.url,
|
||||||
clinicName: row.clinic_name || '',
|
clinicName: row.clinic_name || '',
|
||||||
generatedAt: row.created_at,
|
generatedAt: row.created_at,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
setData(transformed);
|
|
||||||
|
// Recover social handles: report.socialHandles > scrape_data.clinic.socialMedia
|
||||||
|
let handles = (reportJson.socialHandles as Record<string, string | null>) || null;
|
||||||
|
if (!handles && scrapeData) {
|
||||||
|
const clinic = scrapeData.clinic as Record<string, unknown> | undefined;
|
||||||
|
const socialMedia = clinic?.socialMedia as Record<string, string> | undefined;
|
||||||
|
if (socialMedia) {
|
||||||
|
handles = {
|
||||||
|
instagram: normalizeInstagramHandle(socialMedia.instagram),
|
||||||
|
youtube: socialMedia.youtube || null,
|
||||||
|
facebook: socialMedia.facebook || null,
|
||||||
|
blog: socialMedia.blog || null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setSocialHandles(handles);
|
||||||
|
|
||||||
|
// If channelEnrichment already exists in DB, merge it immediately
|
||||||
|
const enrichment = reportJson.channelEnrichment as EnrichmentData | undefined;
|
||||||
|
if (enrichment) {
|
||||||
|
const merged = mergeEnrichment(transformed, enrichment);
|
||||||
|
setData(merged);
|
||||||
|
setIsEnriched(true);
|
||||||
|
} else {
|
||||||
|
setData(transformed);
|
||||||
|
setIsEnriched(false);
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.catch((err) => {
|
.catch((err) => {
|
||||||
setError(err instanceof Error ? err.message : 'Failed to fetch report');
|
setError(err instanceof Error ? err.message : 'Failed to fetch report');
|
||||||
|
|
@ -77,5 +115,5 @@ export function useReport(id: string | undefined): UseReportResult {
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
}, [id, location.state]);
|
}, [id, location.state]);
|
||||||
|
|
||||||
return { data, isLoading, error };
|
return { data, isLoading, error, isEnriched, socialHandles };
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
/**
|
||||||
|
* Normalize an Instagram handle from various input formats to a pure username.
|
||||||
|
* Browser-side copy of supabase/functions/_shared/normalizeHandles.ts
|
||||||
|
*/
|
||||||
|
export function normalizeInstagramHandle(
|
||||||
|
raw: string | null | undefined,
|
||||||
|
): string | null {
|
||||||
|
if (!raw || typeof raw !== 'string') return null;
|
||||||
|
|
||||||
|
let handle = raw.trim();
|
||||||
|
if (!handle) return null;
|
||||||
|
|
||||||
|
if (handle.includes('instagram.com')) {
|
||||||
|
try {
|
||||||
|
const urlStr = handle.startsWith('http') ? handle : `https://${handle}`;
|
||||||
|
const url = new URL(urlStr);
|
||||||
|
const segments = url.pathname.split('/').filter(Boolean);
|
||||||
|
handle = segments[0] || '';
|
||||||
|
} catch {
|
||||||
|
const match = handle.match(/instagram\.com\/([^/?#]+)/);
|
||||||
|
handle = match?.[1] || '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (handle.startsWith('@')) {
|
||||||
|
handle = handle.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
handle = handle.replace(/\/+$/, '');
|
||||||
|
|
||||||
|
return handle || null;
|
||||||
|
}
|
||||||
|
|
@ -34,24 +34,32 @@ const REPORT_SECTIONS = [
|
||||||
export default function ReportPage() {
|
export default function ReportPage() {
|
||||||
const { id } = useParams<{ id: string }>();
|
const { id } = useParams<{ id: string }>();
|
||||||
const location = useLocation();
|
const location = useLocation();
|
||||||
const { data: baseData, isLoading, error } = useReport(id);
|
const {
|
||||||
|
data: baseData,
|
||||||
|
isLoading,
|
||||||
|
error,
|
||||||
|
isEnriched,
|
||||||
|
socialHandles: dbSocialHandles,
|
||||||
|
} = useReport(id);
|
||||||
|
|
||||||
// Extract enrichment params from location state (socialHandles from API) or base data
|
// Build enrichment params — skip if already enriched (data from DB)
|
||||||
const enrichmentParams = useMemo(() => {
|
const enrichmentParams = useMemo(() => {
|
||||||
if (!baseData) return null;
|
if (!baseData || isEnriched) return null;
|
||||||
|
|
||||||
|
// Priority: location.state socialHandles > DB socialHandles > transformed data
|
||||||
const state = location.state as Record<string, unknown> | undefined;
|
const state = location.state as Record<string, unknown> | undefined;
|
||||||
const metadata = state?.metadata as Record<string, unknown> | undefined;
|
const metadata = state?.metadata as Record<string, unknown> | undefined;
|
||||||
const socialHandles = metadata?.socialHandles as Record<string, string | null> | undefined;
|
const stateSocialHandles = metadata?.socialHandles as Record<string, string | null> | undefined;
|
||||||
|
|
||||||
|
const handles = stateSocialHandles || dbSocialHandles;
|
||||||
|
|
||||||
// Priority: API socialHandles > transformed data > undefined
|
|
||||||
const igHandle =
|
const igHandle =
|
||||||
socialHandles?.instagram ||
|
handles?.instagram ||
|
||||||
baseData.instagramAudit?.accounts?.[0]?.handle ||
|
baseData.instagramAudit?.accounts?.[0]?.handle ||
|
||||||
undefined;
|
undefined;
|
||||||
|
|
||||||
const ytHandle =
|
const ytHandle =
|
||||||
socialHandles?.youtube ||
|
handles?.youtube ||
|
||||||
baseData.youtubeAudit?.handle ||
|
baseData.youtubeAudit?.handle ||
|
||||||
undefined;
|
undefined;
|
||||||
|
|
||||||
|
|
@ -62,7 +70,7 @@ export default function ReportPage() {
|
||||||
youtubeChannelId: ytHandle || undefined,
|
youtubeChannelId: ytHandle || undefined,
|
||||||
address: baseData.clinicSnapshot.location || undefined,
|
address: baseData.clinicSnapshot.location || undefined,
|
||||||
};
|
};
|
||||||
}, [baseData, location.state]);
|
}, [baseData, isEnriched, dbSocialHandles, location.state]);
|
||||||
|
|
||||||
const { status: enrichStatus, enrichedReport } = useEnrichment(baseData, enrichmentParams);
|
const { status: enrichStatus, enrichedReport } = useEnrichment(baseData, enrichmentParams);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
/**
|
||||||
|
* Normalize an Instagram handle from various input formats to a pure username.
|
||||||
|
*
|
||||||
|
* Handles these formats:
|
||||||
|
* - "https://www.instagram.com/banobagi_ps/" → "banobagi_ps"
|
||||||
|
* - "https://instagram.com/banobagi_ps?hl=en" → "banobagi_ps"
|
||||||
|
* - "http://instagram.com/banobagi_ps" → "banobagi_ps"
|
||||||
|
* - "instagram.com/banobagi_ps" → "banobagi_ps"
|
||||||
|
* - "@banobagi_ps" → "banobagi_ps"
|
||||||
|
* - "banobagi_ps" → "banobagi_ps"
|
||||||
|
* - null / undefined / "" → null
|
||||||
|
*/
|
||||||
|
export function normalizeInstagramHandle(
|
||||||
|
raw: string | null | undefined,
|
||||||
|
): string | null {
|
||||||
|
if (!raw || typeof raw !== "string") return null;
|
||||||
|
|
||||||
|
let handle = raw.trim();
|
||||||
|
if (!handle) return null;
|
||||||
|
|
||||||
|
// If it contains "instagram.com", extract the first path segment
|
||||||
|
if (handle.includes("instagram.com")) {
|
||||||
|
try {
|
||||||
|
// Add protocol if missing so URL constructor works
|
||||||
|
const urlStr = handle.startsWith("http")
|
||||||
|
? handle
|
||||||
|
: `https://${handle}`;
|
||||||
|
const url = new URL(urlStr);
|
||||||
|
// pathname is like "/banobagi_ps/" or "/banobagi_ps"
|
||||||
|
const segments = url.pathname.split("/").filter(Boolean);
|
||||||
|
handle = segments[0] || "";
|
||||||
|
} catch {
|
||||||
|
// URL parsing failed — try regex fallback
|
||||||
|
const match = handle.match(/instagram\.com\/([^/?#]+)/);
|
||||||
|
handle = match?.[1] || "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip leading @
|
||||||
|
if (handle.startsWith("@")) {
|
||||||
|
handle = handle.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip trailing slash
|
||||||
|
handle = handle.replace(/\/+$/, "");
|
||||||
|
|
||||||
|
return handle || null;
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import "@supabase/functions-js/edge-runtime.d.ts";
|
import "@supabase/functions-js/edge-runtime.d.ts";
|
||||||
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
||||||
|
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
|
||||||
|
|
||||||
const corsHeaders = {
|
const corsHeaders = {
|
||||||
"Access-Control-Allow-Origin": "*",
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
|
@ -58,12 +59,13 @@ Deno.serve(async (req) => {
|
||||||
const tasks = [];
|
const tasks = [];
|
||||||
|
|
||||||
// 1. Instagram Profile
|
// 1. Instagram Profile
|
||||||
if (instagramHandle) {
|
const cleanIgHandle = normalizeInstagramHandle(instagramHandle);
|
||||||
|
if (cleanIgHandle) {
|
||||||
tasks.push(
|
tasks.push(
|
||||||
(async () => {
|
(async () => {
|
||||||
const items = await runApifyActor(
|
const items = await runApifyActor(
|
||||||
"apify~instagram-profile-scraper",
|
"apify~instagram-profile-scraper",
|
||||||
{ usernames: [instagramHandle], resultsLimit: 12 },
|
{ usernames: [cleanIgHandle], resultsLimit: 12 },
|
||||||
APIFY_TOKEN
|
APIFY_TOKEN
|
||||||
);
|
);
|
||||||
const profile = (items as Record<string, unknown>[])[0];
|
const profile = (items as Record<string, unknown>[])[0];
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import "@supabase/functions-js/edge-runtime.d.ts";
|
import "@supabase/functions-js/edge-runtime.d.ts";
|
||||||
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
||||||
|
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
|
||||||
|
|
||||||
const corsHeaders = {
|
const corsHeaders = {
|
||||||
"Access-Control-Allow-Origin": "*",
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
|
@ -153,6 +154,18 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
|
||||||
report = { raw: reportText, parseError: true };
|
report = { raw: reportText, parseError: true };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normalize social handles from scrape data
|
||||||
|
const socialMedia = clinic.socialMedia || {};
|
||||||
|
const normalizedHandles = {
|
||||||
|
instagram: normalizeInstagramHandle(socialMedia.instagram),
|
||||||
|
youtube: socialMedia.youtube || null,
|
||||||
|
facebook: socialMedia.facebook || null,
|
||||||
|
blog: socialMedia.blog || null,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Embed normalized handles in report for DB persistence
|
||||||
|
report.socialHandles = normalizedHandles;
|
||||||
|
|
||||||
// Save to Supabase
|
// Save to Supabase
|
||||||
const supabase = createClient(supabaseUrl, supabaseKey);
|
const supabase = createClient(supabaseUrl, supabaseKey);
|
||||||
const { data: saved, error: saveError } = await supabase
|
const { data: saved, error: saveError } = await supabase
|
||||||
|
|
@ -167,9 +180,6 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
|
||||||
.select("id")
|
.select("id")
|
||||||
.single();
|
.single();
|
||||||
|
|
||||||
// Extract social handles from scrape data for frontend enrichment
|
|
||||||
const socialMedia = clinic.socialMedia || {};
|
|
||||||
|
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
|
|
@ -184,12 +194,7 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2)}
|
||||||
marketAnalysis: analyzeResult.success,
|
marketAnalysis: analyzeResult.success,
|
||||||
aiGeneration: !report.parseError,
|
aiGeneration: !report.parseError,
|
||||||
},
|
},
|
||||||
socialHandles: {
|
socialHandles: normalizedHandles,
|
||||||
instagram: socialMedia.instagram || null,
|
|
||||||
youtube: socialMedia.youtube || null,
|
|
||||||
facebook: socialMedia.facebook || null,
|
|
||||||
blog: socialMedia.blog || null,
|
|
||||||
},
|
|
||||||
address,
|
address,
|
||||||
services,
|
services,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue