161 lines
4.8 KiB
TypeScript
161 lines
4.8 KiB
TypeScript
import "@supabase/functions-js/edge-runtime.d.ts";
|
|
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
|
|
|
|
const corsHeaders = {
|
|
"Access-Control-Allow-Origin": "*",
|
|
"Access-Control-Allow-Headers":
|
|
"authorization, x-client-info, apikey, content-type",
|
|
};
|
|
|
|
interface ScrapeRequest {
|
|
url: string;
|
|
clinicName?: string;
|
|
}
|
|
|
|
Deno.serve(async (req) => {
|
|
// Handle CORS preflight
|
|
if (req.method === "OPTIONS") {
|
|
return new Response("ok", { headers: corsHeaders });
|
|
}
|
|
|
|
try {
|
|
const { url, clinicName } = (await req.json()) as ScrapeRequest;
|
|
|
|
if (!url) {
|
|
return new Response(
|
|
JSON.stringify({ error: "URL is required" }),
|
|
{ status: 400, headers: { ...corsHeaders, "Content-Type": "application/json" } }
|
|
);
|
|
}
|
|
|
|
const FIRECRAWL_API_KEY = Deno.env.get("FIRECRAWL_API_KEY");
|
|
if (!FIRECRAWL_API_KEY) {
|
|
throw new Error("FIRECRAWL_API_KEY not configured");
|
|
}
|
|
|
|
// Step 1: Scrape the main website
|
|
const scrapeResponse = await fetch("https://api.firecrawl.dev/v1/scrape", {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
|
},
|
|
body: JSON.stringify({
|
|
url,
|
|
formats: ["json", "links"],
|
|
jsonOptions: {
|
|
prompt:
|
|
"Extract clinic information: clinic name, address, phone number, services offered, doctors with specialties, social media links (instagram, youtube, blog, facebook), business hours, and any marketing-related content like slogans or key messages",
|
|
schema: {
|
|
type: "object",
|
|
properties: {
|
|
clinicName: { type: "string" },
|
|
address: { type: "string" },
|
|
phone: { type: "string" },
|
|
businessHours: { type: "string" },
|
|
slogan: { type: "string" },
|
|
services: {
|
|
type: "array",
|
|
items: { type: "string" },
|
|
},
|
|
doctors: {
|
|
type: "array",
|
|
items: {
|
|
type: "object",
|
|
properties: {
|
|
name: { type: "string" },
|
|
title: { type: "string" },
|
|
specialty: { type: "string" },
|
|
},
|
|
},
|
|
},
|
|
socialMedia: {
|
|
type: "object",
|
|
properties: {
|
|
instagram: { type: "string" },
|
|
youtube: { type: "string" },
|
|
blog: { type: "string" },
|
|
facebook: { type: "string" },
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
waitFor: 5000,
|
|
}),
|
|
});
|
|
|
|
const scrapeData = await scrapeResponse.json();
|
|
|
|
if (!scrapeData.success) {
|
|
throw new Error(scrapeData.error || "Scraping failed");
|
|
}
|
|
|
|
// Step 2: Map the site to discover all pages
|
|
const mapResponse = await fetch("https://api.firecrawl.dev/v1/map", {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
|
},
|
|
body: JSON.stringify({
|
|
url,
|
|
limit: 50,
|
|
}),
|
|
});
|
|
|
|
const mapData = await mapResponse.json();
|
|
|
|
// Step 3: Search for reviews and ratings
|
|
const searchName = clinicName || scrapeData.data?.json?.clinicName || url;
|
|
const searchResponse = await fetch("https://api.firecrawl.dev/v1/search", {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
|
},
|
|
body: JSON.stringify({
|
|
query: `${searchName} 리뷰 평점 후기 강남언니 바비톡`,
|
|
limit: 5,
|
|
}),
|
|
});
|
|
|
|
const searchData = await searchResponse.json();
|
|
|
|
// Combine all data
|
|
const result = {
|
|
clinic: scrapeData.data?.json || {},
|
|
siteLinks: scrapeData.data?.links || [],
|
|
siteMap: mapData.success ? mapData.links || [] : [],
|
|
reviews: searchData.data || [],
|
|
scrapedAt: new Date().toISOString(),
|
|
sourceUrl: url,
|
|
};
|
|
|
|
// Save to Supabase if configured
|
|
const supabaseUrl = Deno.env.get("SUPABASE_URL");
|
|
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY");
|
|
|
|
if (supabaseUrl && supabaseKey) {
|
|
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
await supabase.from("scrape_results").insert({
|
|
url,
|
|
clinic_name: result.clinic.clinicName || searchName,
|
|
data: result,
|
|
});
|
|
}
|
|
|
|
return new Response(JSON.stringify({ success: true, data: result }), {
|
|
headers: { ...corsHeaders, "Content-Type": "application/json" },
|
|
});
|
|
} catch (error) {
|
|
return new Response(
|
|
JSON.stringify({ success: false, error: error.message }),
|
|
{
|
|
status: 500,
|
|
headers: { ...corsHeaders, "Content-Type": "application/json" },
|
|
}
|
|
);
|
|
}
|
|
});
|