o2o-infinith-demo/supabase/functions/scrape-website/index.ts

161 lines
4.8 KiB
TypeScript

import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
const corsHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers":
"authorization, x-client-info, apikey, content-type",
};
interface ScrapeRequest {
url: string;
clinicName?: string;
}
Deno.serve(async (req) => {
// Handle CORS preflight
if (req.method === "OPTIONS") {
return new Response("ok", { headers: corsHeaders });
}
try {
const { url, clinicName } = (await req.json()) as ScrapeRequest;
if (!url) {
return new Response(
JSON.stringify({ error: "URL is required" }),
{ status: 400, headers: { ...corsHeaders, "Content-Type": "application/json" } }
);
}
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
if (!FIRECRAWL_API_KEY) {
throw new Error("FIRECRAWL_API_KEY not configured");
}
// Step 1: Scrape the main website
const scrapeResponse = await fetch("https://api.firecrawl.dev/v1/scrape", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
url,
formats: ["json", "links"],
jsonOptions: {
prompt:
"Extract clinic information: clinic name, address, phone number, services offered, doctors with specialties, social media links (instagram, youtube, blog, facebook), business hours, and any marketing-related content like slogans or key messages",
schema: {
type: "object",
properties: {
clinicName: { type: "string" },
address: { type: "string" },
phone: { type: "string" },
businessHours: { type: "string" },
slogan: { type: "string" },
services: {
type: "array",
items: { type: "string" },
},
doctors: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
title: { type: "string" },
specialty: { type: "string" },
},
},
},
socialMedia: {
type: "object",
properties: {
instagram: { type: "string" },
youtube: { type: "string" },
blog: { type: "string" },
facebook: { type: "string" },
},
},
},
},
},
waitFor: 5000,
}),
});
const scrapeData = await scrapeResponse.json();
if (!scrapeData.success) {
throw new Error(scrapeData.error || "Scraping failed");
}
// Step 2: Map the site to discover all pages
const mapResponse = await fetch("https://api.firecrawl.dev/v1/map", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
url,
limit: 50,
}),
});
const mapData = await mapResponse.json();
// Step 3: Search for reviews and ratings
const searchName = clinicName || scrapeData.data?.json?.clinicName || url;
const searchResponse = await fetch("https://api.firecrawl.dev/v1/search", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
},
body: JSON.stringify({
query: `${searchName} 리뷰 평점 후기 강남언니 바비톡`,
limit: 5,
}),
});
const searchData = await searchResponse.json();
// Combine all data
const result = {
clinic: scrapeData.data?.json || {},
siteLinks: scrapeData.data?.links || [],
siteMap: mapData.success ? mapData.links || [] : [],
reviews: searchData.data || [],
scrapedAt: new Date().toISOString(),
sourceUrl: url,
};
// Save to Supabase if configured
const supabaseUrl = Deno.env.get("SUPABASE_URL");
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY");
if (supabaseUrl && supabaseKey) {
const supabase = createClient(supabaseUrl, supabaseKey);
await supabase.from("scrape_results").insert({
url,
clinic_name: result.clinic.clinicName || searchName,
data: result,
});
}
return new Response(JSON.stringify({ success: true, data: result }), {
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
} catch (error) {
return new Response(
JSON.stringify({ success: false, error: error.message }),
{
status: 500,
headers: { ...corsHeaders, "Content-Type": "application/json" },
}
);
}
});