fix: naverPlace 오매핑 수정 + naverBlog RSS 공식 블로그 스크래핑
- naverPlace: 매칭 우선순위 재정의 — 전체 클리닉명 정확 매칭 > 짧은명+성형 카테고리 > 성형 카테고리 순으로 변경 (기존 로직은 피부과까지 매칭되어 데이뷰의원 오매핑 발생) - naverBlog: Firecrawl 공식 블로그 스크래핑 → Naver RSS 피드로 교체 (blog.naver.com은 Firecrawl 차단, RSS는 공개 엔드포인트) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>claude/bold-hawking
parent
2027ae9b64
commit
5ed35bc4cd
|
|
@ -473,41 +473,39 @@ Deno.serve(async (req) => {
|
||||||
if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`);
|
if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
|
||||||
// ─── 5b. Firecrawl: Official blog recent posts ───
|
// ─── 5b. Naver RSS: Official blog recent posts ───
|
||||||
// Registry always provides the official blog URL — scrape it for real content metrics.
|
// blog.naver.com is blocked by Firecrawl. Use the public RSS feed instead:
|
||||||
|
// https://rss.blog.naver.com/{blogId}.xml — no auth required.
|
||||||
let officialBlogContent: Record<string, unknown> | null = null;
|
let officialBlogContent: Record<string, unknown> | null = null;
|
||||||
if (officialBlogUrl) {
|
if (officialBlogHandle) {
|
||||||
const FIRECRAWL_KEY = Deno.env.get("FIRECRAWL_API_KEY");
|
try {
|
||||||
if (FIRECRAWL_KEY) {
|
const rssRes = await fetchWithRetry(
|
||||||
try {
|
`https://rss.blog.naver.com/${officialBlogHandle}.xml`,
|
||||||
const blogScrape = await fetchWithRetry(`https://api.firecrawl.dev/v1/scrape`, {
|
{},
|
||||||
method: "POST",
|
{ label: "naver-rss", timeoutMs: 15000 }
|
||||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_KEY}` },
|
);
|
||||||
body: JSON.stringify({
|
if (rssRes.ok) {
|
||||||
url: officialBlogUrl,
|
const xml = await rssRes.text();
|
||||||
formats: ["json"],
|
// Parse RSS items: <item><title>...</title><link>...</link><pubDate>...</pubDate><description>...</description></item>
|
||||||
jsonOptions: {
|
const items: Array<{ title: string; link: string; date: string; excerpt: string }> = [];
|
||||||
prompt: "Extract the blog's recent posts: title, date, excerpt. Also total post count visible on the page, and the blog category/tag list.",
|
const itemMatches = xml.matchAll(/<item>([\s\S]*?)<\/item>/g);
|
||||||
schema: {
|
for (const m of itemMatches) {
|
||||||
type: "object",
|
const block = m[1];
|
||||||
properties: {
|
const title = (block.match(/<title><!\[CDATA\[(.*?)\]\]><\/title>/) || block.match(/<title>(.*?)<\/title>/))?.[1] || "";
|
||||||
totalPosts: { type: "number" },
|
const link = (block.match(/<link>(.*?)<\/link>/))?.[1] || "";
|
||||||
recentPosts: { type: "array", items: { type: "object", properties: { title: { type: "string" }, date: { type: "string" }, excerpt: { type: "string" } } } },
|
const date = (block.match(/<pubDate>(.*?)<\/pubDate>/))?.[1] || "";
|
||||||
categories: { type: "array", items: { type: "string" } },
|
const desc = (block.match(/<description><!\[CDATA\[(.*?)\]\]><\/description>/) || block.match(/<description>(.*?)<\/description>/))?.[1] || "";
|
||||||
},
|
items.push({ title, link, date, excerpt: desc.replace(/<[^>]*>/g, "").trim().slice(0, 150) });
|
||||||
},
|
|
||||||
},
|
|
||||||
waitFor: 3000,
|
|
||||||
}),
|
|
||||||
}, { label: "firecrawl-naver-blog", timeoutMs: 45000 });
|
|
||||||
if (blogScrape.ok) {
|
|
||||||
const blogData = await blogScrape.json();
|
|
||||||
officialBlogContent = blogData.data?.json || null;
|
|
||||||
console.log(`[naverBlog] Official blog scraped: ${officialBlogContent?.totalPosts ?? 0} posts`);
|
|
||||||
}
|
}
|
||||||
} catch (e) {
|
const totalPosts = (xml.match(/<totalCount>(\d+)<\/totalCount>/) || xml.match(/<managedCount>(\d+)<\/managedCount>/))?.[1];
|
||||||
console.warn(`[naverBlog] Official blog Firecrawl failed (non-critical):`, e);
|
officialBlogContent = {
|
||||||
|
totalPosts: totalPosts ? Number(totalPosts) : items.length,
|
||||||
|
recentPosts: items.slice(0, 10),
|
||||||
|
};
|
||||||
|
console.log(`[naverBlog] RSS fetched: ${items.length} posts from ${officialBlogHandle}`);
|
||||||
}
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`[naverBlog] RSS fetch failed (non-critical):`, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -533,23 +531,36 @@ Deno.serve(async (req) => {
|
||||||
`${clinicName} 성형`,
|
`${clinicName} 성형`,
|
||||||
clinicName,
|
clinicName,
|
||||||
];
|
];
|
||||||
|
// Core name without type suffixes (e.g. "뷰성형외과" → "뷰성형외과", trimmed for short names)
|
||||||
|
const cleanedName = clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase();
|
||||||
|
|
||||||
for (const q of queries) {
|
for (const q of queries) {
|
||||||
const query = encodeURIComponent(q);
|
const query = encodeURIComponent(q);
|
||||||
const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" });
|
const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" });
|
||||||
if (!res.ok) continue;
|
if (!res.ok) continue;
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
// Find the best match: prefer category containing 성형 or 피부
|
|
||||||
const items = (data.items || []) as Record<string, string>[];
|
const items = (data.items || []) as Record<string, string>[];
|
||||||
const match = items.find(i =>
|
|
||||||
(i.category || '').includes('성형') || (i.category || '').includes('피부')
|
const normalize = (s: string) => (s || '').replace(/<[^>]*>/g, '').toLowerCase();
|
||||||
) || items.find(i => {
|
|
||||||
const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase();
|
// Priority 1: name contains full clinicName (exact match, ignoring HTML tags)
|
||||||
return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase());
|
let match = items.find(i => normalize(i.title).includes(clinicName.toLowerCase())) || null;
|
||||||
}) || null;
|
|
||||||
|
// Priority 2: name contains cleaned short name AND category is 성형 (plastic surgery only)
|
||||||
|
if (!match && cleanedName.length >= 2) {
|
||||||
|
match = items.find(i =>
|
||||||
|
normalize(i.title).includes(cleanedName) && (i.category || '').includes('성형')
|
||||||
|
) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 3: category is 성형 (plastic surgery) — not 피부 to avoid skin clinics
|
||||||
|
if (!match) {
|
||||||
|
match = items.find(i => (i.category || '').includes('성형')) || null;
|
||||||
|
}
|
||||||
|
|
||||||
if (match) {
|
if (match) {
|
||||||
channelData.naverPlace = {
|
channelData.naverPlace = {
|
||||||
name: (match.title || "").replace(/<[^>]*>/g, ""),
|
name: normalize(match.title),
|
||||||
category: match.category, address: match.roadAddress || match.address,
|
category: match.category, address: match.roadAddress || match.address,
|
||||||
telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy,
|
telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue