fix: naverPlace 오매핑 수정 + naverBlog RSS 공식 블로그 스크래핑
- naverPlace: 매칭 우선순위 재정의 — 전체 클리닉명 정확 매칭 > 짧은명+성형 카테고리 > 성형 카테고리 순으로 변경 (기존 로직은 피부과까지 매칭되어 데이뷰의원 오매핑 발생) - naverBlog: Firecrawl 공식 블로그 스크래핑 → Naver RSS 피드로 교체 (blog.naver.com은 Firecrawl 차단, RSS는 공개 엔드포인트) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>claude/bold-hawking
parent
2027ae9b64
commit
5ed35bc4cd
|
|
@ -473,41 +473,39 @@ Deno.serve(async (req) => {
|
|||
if (!res.ok) throw new Error(`Naver Blog API returned ${res.status}`);
|
||||
const data = await res.json();
|
||||
|
||||
// ─── 5b. Firecrawl: Official blog recent posts ───
|
||||
// Registry always provides the official blog URL — scrape it for real content metrics.
|
||||
// ─── 5b. Naver RSS: Official blog recent posts ───
|
||||
// blog.naver.com is blocked by Firecrawl. Use the public RSS feed instead:
|
||||
// https://rss.blog.naver.com/{blogId}.xml — no auth required.
|
||||
let officialBlogContent: Record<string, unknown> | null = null;
|
||||
if (officialBlogUrl) {
|
||||
const FIRECRAWL_KEY = Deno.env.get("FIRECRAWL_API_KEY");
|
||||
if (FIRECRAWL_KEY) {
|
||||
if (officialBlogHandle) {
|
||||
try {
|
||||
const blogScrape = await fetchWithRetry(`https://api.firecrawl.dev/v1/scrape`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", Authorization: `Bearer ${FIRECRAWL_KEY}` },
|
||||
body: JSON.stringify({
|
||||
url: officialBlogUrl,
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Extract the blog's recent posts: title, date, excerpt. Also total post count visible on the page, and the blog category/tag list.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
totalPosts: { type: "number" },
|
||||
recentPosts: { type: "array", items: { type: "object", properties: { title: { type: "string" }, date: { type: "string" }, excerpt: { type: "string" } } } },
|
||||
categories: { type: "array", items: { type: "string" } },
|
||||
},
|
||||
},
|
||||
},
|
||||
waitFor: 3000,
|
||||
}),
|
||||
}, { label: "firecrawl-naver-blog", timeoutMs: 45000 });
|
||||
if (blogScrape.ok) {
|
||||
const blogData = await blogScrape.json();
|
||||
officialBlogContent = blogData.data?.json || null;
|
||||
console.log(`[naverBlog] Official blog scraped: ${officialBlogContent?.totalPosts ?? 0} posts`);
|
||||
const rssRes = await fetchWithRetry(
|
||||
`https://rss.blog.naver.com/${officialBlogHandle}.xml`,
|
||||
{},
|
||||
{ label: "naver-rss", timeoutMs: 15000 }
|
||||
);
|
||||
if (rssRes.ok) {
|
||||
const xml = await rssRes.text();
|
||||
// Parse RSS items: <item><title>...</title><link>...</link><pubDate>...</pubDate><description>...</description></item>
|
||||
const items: Array<{ title: string; link: string; date: string; excerpt: string }> = [];
|
||||
const itemMatches = xml.matchAll(/<item>([\s\S]*?)<\/item>/g);
|
||||
for (const m of itemMatches) {
|
||||
const block = m[1];
|
||||
const title = (block.match(/<title><!\[CDATA\[(.*?)\]\]><\/title>/) || block.match(/<title>(.*?)<\/title>/))?.[1] || "";
|
||||
const link = (block.match(/<link>(.*?)<\/link>/))?.[1] || "";
|
||||
const date = (block.match(/<pubDate>(.*?)<\/pubDate>/))?.[1] || "";
|
||||
const desc = (block.match(/<description><!\[CDATA\[(.*?)\]\]><\/description>/) || block.match(/<description>(.*?)<\/description>/))?.[1] || "";
|
||||
items.push({ title, link, date, excerpt: desc.replace(/<[^>]*>/g, "").trim().slice(0, 150) });
|
||||
}
|
||||
const totalPosts = (xml.match(/<totalCount>(\d+)<\/totalCount>/) || xml.match(/<managedCount>(\d+)<\/managedCount>/))?.[1];
|
||||
officialBlogContent = {
|
||||
totalPosts: totalPosts ? Number(totalPosts) : items.length,
|
||||
recentPosts: items.slice(0, 10),
|
||||
};
|
||||
console.log(`[naverBlog] RSS fetched: ${items.length} posts from ${officialBlogHandle}`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`[naverBlog] Official blog Firecrawl failed (non-critical):`, e);
|
||||
}
|
||||
console.warn(`[naverBlog] RSS fetch failed (non-critical):`, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -533,23 +531,36 @@ Deno.serve(async (req) => {
|
|||
`${clinicName} 성형`,
|
||||
clinicName,
|
||||
];
|
||||
// Core name without type suffixes (e.g. "뷰성형외과" → "뷰성형외과", trimmed for short names)
|
||||
const cleanedName = clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase();
|
||||
|
||||
for (const q of queries) {
|
||||
const query = encodeURIComponent(q);
|
||||
const res = await fetchWithRetry(`https://openapi.naver.com/v1/search/local.json?query=${query}&display=5&sort=comment`, { headers: naverHeaders }, { label: "naver-place" });
|
||||
if (!res.ok) continue;
|
||||
const data = await res.json();
|
||||
// Find the best match: prefer category containing 성형 or 피부
|
||||
const items = (data.items || []) as Record<string, string>[];
|
||||
const match = items.find(i =>
|
||||
(i.category || '').includes('성형') || (i.category || '').includes('피부')
|
||||
) || items.find(i => {
|
||||
const name = (i.title || '').replace(/<[^>]*>/g, '').toLowerCase();
|
||||
return name.includes(clinicName.replace(/성형외과|병원|의원/g, '').trim().toLowerCase());
|
||||
}) || null;
|
||||
|
||||
const normalize = (s: string) => (s || '').replace(/<[^>]*>/g, '').toLowerCase();
|
||||
|
||||
// Priority 1: name contains full clinicName (exact match, ignoring HTML tags)
|
||||
let match = items.find(i => normalize(i.title).includes(clinicName.toLowerCase())) || null;
|
||||
|
||||
// Priority 2: name contains cleaned short name AND category is 성형 (plastic surgery only)
|
||||
if (!match && cleanedName.length >= 2) {
|
||||
match = items.find(i =>
|
||||
normalize(i.title).includes(cleanedName) && (i.category || '').includes('성형')
|
||||
) || null;
|
||||
}
|
||||
|
||||
// Priority 3: category is 성형 (plastic surgery) — not 피부 to avoid skin clinics
|
||||
if (!match) {
|
||||
match = items.find(i => (i.category || '').includes('성형')) || null;
|
||||
}
|
||||
|
||||
if (match) {
|
||||
channelData.naverPlace = {
|
||||
name: (match.title || "").replace(/<[^>]*>/g, ""),
|
||||
name: normalize(match.title),
|
||||
category: match.category, address: match.roadAddress || match.address,
|
||||
telephone: match.telephone, link: match.link, mapx: match.mapx, mapy: match.mapy,
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue