feat: Sprint 1 — 7 data quality quick wins

WP-1: YouTube channel ID regex {20,} → {22} (exactly 24 chars)
WP-2: Naver Place category filtering in enrich-channels (성형/피부)
WP-3: Google Maps stores mapsUrl separately from clinicWebsite
WP-4: Naver Blog separates officialBlogUrl from search results
WP-5: 강남언니 rawRating + normalized rating (≤5 → ×2), Firecrawl
      prompt explicitly states "out of 10, NOT out of 5"
WP-6: Perplexity model centralized in _shared/config.ts (env override)
WP-7: Apify Instagram timeout 30s → 45s

Frontend: transformReport uses mapsUrl and officialBlogUrl when available

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
claude/bold-hawking
Haewon Kam 2026-04-04 23:35:40 +09:00
parent 1071328574
commit 80c57147e7
8 changed files with 80 additions and 39 deletions

View File

@ -994,8 +994,10 @@ export function mergeEnrichment(
name: '구글 지도',
status: 'active' as const,
details: `평점: ${gm.rating ?? '-'} / 리뷰: ${gm.reviewCount ?? '-'}`,
// Always use Google Maps search URL — gm.website is the clinic's own site, not Maps
url: gm.name ? `https://www.google.com/maps/search/${encodeURIComponent(String(gm.name))}` : '',
// Use Maps URL from enrichment if available, fallback to search URL
url: (gm as Record<string, unknown>).mapsUrl
? String((gm as Record<string, unknown>).mapsUrl)
: gm.name ? `https://www.google.com/maps/search/${encodeURIComponent(String(gm.name))}` : '',
};
if (gmChannelIdx >= 0) {
merged.otherChannels[gmChannelIdx] = gmChannel;
@ -1090,8 +1092,10 @@ export function mergeEnrichment(
name: '네이버 블로그',
status: 'active' as const,
details: `검색 결과: ${nb.totalResults?.toLocaleString() ?? '-'}건 / 최근 포스트 ${nb.posts?.length ?? 0}`,
// Always link to Naver blog search — individual post links may be unrelated personal blogs
url: nb.searchQuery ? `https://search.naver.com/search.naver?where=blog&query=${encodeURIComponent(String(nb.searchQuery))}` : '',
// Prefer official blog URL from Phase 1, fallback to search URL
url: (nb as Record<string, unknown>).officialBlogUrl
? String((nb as Record<string, unknown>).officialBlogUrl)
: nb.searchQuery ? `https://search.naver.com/search.naver?where=blog&query=${encodeURIComponent(String(nb.searchQuery))}` : '',
};
if (nbChannelIdx >= 0) {
merged.otherChannels[nbChannelIdx] = nbChannel;

View File

@ -0,0 +1,6 @@
/**
* Shared configuration constants for Edge Functions.
* Centralizes API model names and defaults to prevent hardcoding.
*/
export const PERPLEXITY_MODEL = Deno.env.get("PERPLEXITY_MODEL") || "sonar";

View File

@ -66,7 +66,7 @@ async function verifyYouTube(handle: string, apiKey: string): Promise<VerifiedCh
}
// Try as channel ID directly (starts with UC)
if (cleanHandle.startsWith('UC')) {
if (cleanHandle.startsWith('UC') && cleanHandle.length === 24) {
const res = await fetch(`${YT_BASE}/channels?part=id,snippet&id=${cleanHandle}&key=${apiKey}`);
const data = await res.json();
if (data.items?.[0]) {

View File

@ -1,4 +1,5 @@
import "@supabase/functions-js/edge-runtime.d.ts";
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
const corsHeaders = {
"Access-Control-Allow-Origin": "*",
@ -58,7 +59,7 @@ Deno.serve(async (req) => {
Authorization: `Bearer ${PERPLEXITY_API_KEY}`,
},
body: JSON.stringify({
model: "sonar",
model: PERPLEXITY_MODEL,
messages: [
{
role: "system",

View File

@ -1,6 +1,7 @@
import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import type { VerifiedChannels } from "../_shared/verifyHandles.ts";
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
const corsHeaders = {
"Access-Control-Allow-Origin": "*",
@ -200,7 +201,7 @@ Deno.serve(async (req) => {
url: guVerified!.url as string,
formats: ["json"],
jsonOptions: {
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
prompt: "Extract: hospital name, overall rating (강남언니 rating is always out of 10, NOT out of 5), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
schema: {
type: "object",
properties: {
@ -218,7 +219,10 @@ Deno.serve(async (req) => {
const hospital = data.data?.json;
if (hospital?.hospitalName) {
channelData.gangnamUnni = {
name: hospital.hospitalName, rating: hospital.rating, ratingScale: "/10",
name: hospital.hospitalName,
rawRating: hospital.rating,
rating: typeof hospital.rating === 'number' && hospital.rating > 0 && hospital.rating <= 5 ? hospital.rating * 2 : hospital.rating,
ratingScale: "/10",
totalReviews: hospital.totalReviews, doctors: (hospital.doctors || []).slice(0, 10),
procedures: hospital.procedures || [], address: hospital.address,
badges: hospital.badges || [], sourceUrl: guVerified!.url as string,
@ -232,12 +236,20 @@ Deno.serve(async (req) => {
const naverHeaders = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET };
tasks.push((async () => {
// Get verified Naver Blog handle from Phase 1 for official blog URL
const nbVerified = verified.naverBlog as Record<string, unknown> | null;
const officialBlogHandle = nbVerified?.handle ? String(nbVerified.handle) : null;
const query = encodeURIComponent(`${clinicName} 후기`);
const res = await fetch(`https://openapi.naver.com/v1/search/blog.json?query=${query}&display=10&sort=sim`, { headers: naverHeaders });
if (!res.ok) return;
const data = await res.json();
channelData.naverBlog = {
totalResults: data.total || 0, searchQuery: `${clinicName} 후기`,
// Official blog URL from Phase 1 verified handle
officialBlogUrl: officialBlogHandle ? `https://blog.naver.com/${officialBlogHandle}` : null,
officialBlogHandle: officialBlogHandle,
// Blog mentions (third-party posts, NOT the official blog)
posts: (data.items || []).slice(0, 10).map((item: Record<string, string>) => ({
title: (item.title || "").replace(/<[^>]*>/g, ""),
description: (item.description || "").replace(/<[^>]*>/g, ""),
@ -294,7 +306,9 @@ Deno.serve(async (req) => {
if (place) {
channelData.googleMaps = {
name: place.title, rating: place.totalScore, reviewCount: place.reviewsCount,
address: place.address, phone: place.phone, website: place.website,
address: place.address, phone: place.phone,
clinicWebsite: place.website, // clinic's own website (not Maps URL)
mapsUrl: place.url || (place.title ? `https://www.google.com/maps/search/${encodeURIComponent(String(place.title))}` : ''),
category: place.categoryName, openingHours: place.openingHours,
topReviews: ((place.reviews as Record<string, unknown>[]) || []).slice(0, 10).map(r => ({
stars: r.stars, text: r.text, publishedAtDate: r.publishedAtDate,
@ -319,7 +333,7 @@ Deno.serve(async (req) => {
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
body: JSON.stringify({
model: "sonar", messages: [
model: PERPLEXITY_MODEL, messages: [
{ role: "system", content: "You are a Korean medical marketing analyst. Always respond in Korean. Provide data in valid JSON format." },
{ role: "user", content: q.prompt },
], temperature: 0.3,

View File

@ -1,6 +1,7 @@
import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import { extractSocialLinks, mergeSocialLinks } from "../_shared/extractSocialLinks.ts";
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
import { verifyAllHandles, type VerifiedChannels } from "../_shared/verifyHandles.ts";
import { RESEARCH_SYSTEM_PROMPT, buildResearchUserPrompt } from "../_shared/researchPrompt.ts";
@ -31,7 +32,7 @@ function extractHandle(raw: string, platform: string): string | null {
if (m) return m[1] ? `@${m[1]}` : m[2] || m[3] || null;
h = h.replace(/^@/, '');
if (h.includes('http') || h.includes('/') || h.includes('.com')) return null;
if (/^UC[a-zA-Z0-9_-]{20,}$/.test(h)) return h;
if (/^UC[a-zA-Z0-9_-]{22}$/.test(h)) return h; // YouTube channel IDs are exactly 24 chars (UC + 22)
if (/^[a-zA-Z0-9._-]+$/.test(h) && h.length >= 2) return `@${h}`;
return null;
}
@ -152,7 +153,7 @@ Deno.serve(async (req) => {
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
body: JSON.stringify({
model: "sonar",
model: PERPLEXITY_MODEL,
messages: [
{ role: "system", content: "Respond with ONLY the clinic name in Korean, nothing else." },
{ role: "user", content: `${url} 이 URL의 병원/클리닉 한국어 이름이 뭐야?` },
@ -300,7 +301,7 @@ Deno.serve(async (req) => {
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
body: JSON.stringify({
model: "sonar",
model: PERPLEXITY_MODEL,
messages: [
{ role: "system", content: "You are a social media researcher. Search the web and find social media accounts. Respond ONLY with valid JSON." },
{ role: "user", content: `${searchName} 병원의 인스타그램, 유튜브, 페이스북, 틱톡, 네이버블로그 계정을 검색해서 찾아줘. 검색 결과에서 발견된 계정을 모두 알려줘. 인스타그램은 여러 계정이 있을 수 있어.\n\n{"instagram": ["handle1", "handle2"], "youtube": "channel URL or handle", "facebook": "page name or URL", "tiktok": "handle", "naverBlog": "blog ID"}` },
@ -333,7 +334,7 @@ Deno.serve(async (req) => {
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
body: JSON.stringify({
model: "sonar",
model: PERPLEXITY_MODEL,
messages: [
{ role: "system", content: "You search for clinic listings on medical platforms. Respond ONLY with valid JSON." },
{ role: "user", content: `${resolvedName} 병원 강남언니 gangnamunni.com 페이지를 찾아줘.\n\n{"gangnamUnni": {"url": "https://gangnamunni.com/hospitals/...", "rating": 9.5, "reviews": 1000}}` },
@ -377,7 +378,7 @@ Deno.serve(async (req) => {
for (const handle of candidates.slice(0, 6)) {
try {
const apifyRes = await fetch(
`${APIFY_BASE}/acts/apify~instagram-profile-scraper/runs?token=${APIFY_TOKEN}&waitForFinish=30`,
`${APIFY_BASE}/acts/apify~instagram-profile-scraper/runs?token=${APIFY_TOKEN}&waitForFinish=45`,
{
method: "POST",
headers: { "Content-Type": "application/json" },

View File

@ -168,7 +168,8 @@ Deno.serve(async (req) => {
reviewCount: place.reviewsCount,
address: place.address,
phone: place.phone,
website: place.website,
clinicWebsite: place.website,
mapsUrl: place.url || (place.title ? `https://www.google.com/maps/search/${encodeURIComponent(String(place.title))}` : ''),
category: place.categoryName,
openingHours: place.openingHours,
topReviews: ((place.reviews as Record<string, unknown>[]) || [])
@ -233,7 +234,7 @@ Deno.serve(async (req) => {
url: hospitalUrl,
formats: ["json"],
jsonOptions: {
prompt: "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
prompt: "Extract: hospital name, overall rating (강남언니 rating is always out of 10, NOT out of 5), total review count, doctors with names/ratings/review counts/specialties, procedures offered, address, certifications/badges",
schema: {
type: "object",
properties: {
@ -267,7 +268,8 @@ Deno.serve(async (req) => {
if (hospital?.hospitalName) {
enrichment.gangnamUnni = {
name: hospital.hospitalName,
rating: hospital.rating,
rawRating: hospital.rating,
rating: typeof hospital.rating === 'number' && hospital.rating > 0 && hospital.rating <= 5 ? hospital.rating * 2 : hospital.rating,
ratingScale: "/10",
totalReviews: hospital.totalReviews,
doctors: (hospital.doctors || []).slice(0, 10),
@ -316,27 +318,39 @@ Deno.serve(async (req) => {
})()
);
// 4b. Local search — Naver Place
// 4b. Local search — Naver Place (with category filtering to avoid same-name clinics)
tasks.push(
(async () => {
const query = encodeURIComponent(clinicName);
const queries = [`${clinicName} 성형외과`, `${clinicName} 성형`, clinicName];
for (const q of queries) {
const query = encodeURIComponent(q);
const res = await fetch(
`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`,
{ headers: naverHeaders }
);
if (!res.ok) return;
if (!res.ok) continue;
const data = await res.json();
const place = (data.items || [])[0];
if (place) {
const items = (data.items || []) as Record<string, string>[];
// Prefer category matching 성형 or 피부
const match = items.find(i =>
(i.category || '').includes('성형') || (i.category || '').includes('피부')
) || items.find(i => {
const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase();
return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase());
}) || null;
if (match) {
enrichment.naverPlace = {
name: (place.title || "").replace(/<[^>]*>/g, ""),
category: place.category,
address: place.roadAddress || place.address,
telephone: place.telephone,
link: place.link,
mapx: place.mapx,
mapy: place.mapy,
name: (match.title || "").replace(/<[^>]*>/g, ""),
category: match.category,
address: match.roadAddress || match.address,
telephone: match.telephone,
link: match.link,
mapx: match.mapx,
mapy: match.mapy,
};
break;
}
}
})()
);

View File

@ -1,6 +1,7 @@
import "@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
import { normalizeInstagramHandle } from "../_shared/normalizeHandles.ts";
import { PERPLEXITY_MODEL } from "../_shared/config.ts";
const corsHeaders = {
"Access-Control-Allow-Origin": "*",
@ -110,7 +111,7 @@ ${JSON.stringify(scrapeData.branding || {}, null, 2).slice(0, 1000)}
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
body: JSON.stringify({
model: "sonar",
model: PERPLEXITY_MODEL,
messages: [
{ role: "system", content: "You are a Korean medical marketing analyst. Respond ONLY with valid JSON, no markdown code blocks. Use Korean for text fields. 강남언니 rating is 10-point scale. Use ONLY the provided real data — never invent metrics." },
{ role: "user", content: reportPrompt },
@ -217,7 +218,7 @@ ${JSON.stringify(analyzeResult.data?.analysis || {}, null, 2).slice(0, 4000)}
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${PERPLEXITY_API_KEY}` },
body: JSON.stringify({
model: "sonar",
model: PERPLEXITY_MODEL,
messages: [
{ role: "system", content: "You are a Korean medical marketing analyst. Respond ONLY with valid JSON, no markdown code blocks. Korean for text fields. 강남언니 rating uses 10-point scale." },
{ role: "user", content: reportPrompt },