diff --git a/.gitignore b/.gitignore
index 83ef291..e60be01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,5 @@ Thumbs.db
# Alembic
alembic/versions/*.pyc
+
+test_results/
\ No newline at end of file
diff --git a/README.md b/README.md
index e69de29..3df933f 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,15 @@
+# o2o-infinith-backend
+
+## 설치
+
+**Docker**
+
+```bash
+curl -fsSL https://get.docker.com | sh
+```
+
+## 실행
+
+```bash
+docker compose up -d
+```
\ No newline at end of file
diff --git a/SQL/db_create.sql b/SQL/db_create.sql
new file mode 100644
index 0000000..47b71b0
--- /dev/null
+++ b/SQL/db_create.sql
@@ -0,0 +1,100 @@
+-- 테이블 순서는 관계를 고려하여 한 번에 실행해도 에러가 발생하지 않게 정렬되었습니다.
+
+-- instagram_data Table Create SQL
+-- 테이블 생성 SQL - instagram_data
+CREATE TABLE instagram_data
+(
+ `id` INT NOT NULL AUTO_INCREMENT,
+ `hospital_id` INT NOT NULL,
+ `url` VARCHAR(500) NOT NULL,
+ `raw_data` JSON NULL,
+ `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY (id)
+);
+
+-- Index 설정 SQL - instagram_data(hospital_id)
+CREATE INDEX IX_instagram_data_1
+ ON instagram_data(hospital_id);
+
+
+-- facebook_data Table Create SQL
+-- 테이블 생성 SQL - facebook_data
+CREATE TABLE facebook_data
+(
+ `id` INT NOT NULL AUTO_INCREMENT,
+ `hospital_id` INT NOT NULL,
+ `url` VARCHAR(500) NOT NULL,
+ `raw_data` JSON NULL,
+ `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY (id)
+);
+
+-- Index 설정 SQL - facebook_data(hospital_id)
+CREATE INDEX IX_facebook_data_1
+ ON facebook_data(hospital_id);
+
+
+-- naver_blog_data Table Create SQL
+-- 테이블 생성 SQL - naver_blog_data
+CREATE TABLE naver_blog_data
+(
+ `id` INT NOT NULL AUTO_INCREMENT,
+ `hospital_id` INT NOT NULL,
+ `url` VARCHAR(500) NOT NULL,
+ `raw_data` JSON NULL,
+ `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY (id)
+);
+
+-- Index 설정 SQL - naver_blog_data(hospital_id)
+CREATE INDEX IX_naver_blog_data_1
+ ON naver_blog_data(hospital_id);
+
+
+-- hospital_baseinfo Table Create SQL
+-- 테이블 생성 SQL - hospital_baseinfo
+CREATE TABLE hospital_baseinfo
+(
+ `hospital_id` INT NOT NULL AUTO_INCREMENT,
+ `owner_user_id` INT NOT NULL,
+ `hospital_name` VARCHAR(50) NOT NULL,
+ `brn` VARCHAR(50) NOT NULL,
+ `road_address` VARCHAR(100) NULL,
+ `site_address` VARCHAR(100) NULL,
+ `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ `updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ PRIMARY KEY (hospital_id)
+);
+
+-- Index 설정 SQL - hospital_baseinfo(owner_user_id)
+CREATE INDEX IX_hospital_baseinfo_1
+ ON hospital_baseinfo(owner_user_id);
+
+
+-- user_info Table Create SQL
+-- 테이블 생성 SQL - user_info
+CREATE TABLE user_info
+(
+ `user_id` INT NOT NULL AUTO_INCREMENT,
+ `username` VARCHAR(50) NOT NULL,
+ `password` VARCHAR(50) NOT NULL,
+ `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ `updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ PRIMARY KEY (user_id)
+);
+
+-- youtube_data Table Create SQL
+CREATE TABLE youtube_data
+(
+ `id` INT NOT NULL AUTO_INCREMENT,
+ `hospital_id` INT NOT NULL,
+ `url` VARCHAR(500) NOT NULL,
+ `raw_data` JSON NULL,
+ `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY (id)
+);
+
+-- Index 설정 SQL - youtube_data(hospital_id)
+CREATE INDEX IX_youtube_data_1
+ ON youtube_data(hospital_id);
+
diff --git a/app/common/utils.py b/app/common/utils.py
new file mode 100644
index 0000000..e2d6be2
--- /dev/null
+++ b/app/common/utils.py
@@ -0,0 +1,82 @@
+import os
+import re
+import asyncio
+from http import HTTPMethod
+import httpx
+
+REQUEST_TIMEOUT = 60
+
+
+def get_env(key: str) -> str:
+ v = os.environ.get(key, "")
+ if not v:
+ raise EnvironmentError(f"Missing env: {key}")
+ return v
+
+async def http_request(
+ method: HTTPMethod,
+ url: str,
+ *,
+ label: str,
+ headers: dict | None = None,
+ params: dict | None = None,
+ json_body: dict | None = None,
+ timeout: int = REQUEST_TIMEOUT,
+ max_retries: int = 0,
+) -> httpx.Response | None:
+ async with httpx.AsyncClient() as client:
+ for attempt in range(max_retries + 1):
+ try:
+ resp = await client.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout)
+ return resp
+ except httpx.RequestError as e:
+ if attempt < max_retries:
+ print(f" [retry] {label} → {e}, attempt {attempt + 1}")
+ await asyncio.sleep((attempt + 1) * 2)
+ else:
+ print(f" [error] {label} → {e}")
+ return None
+ return None
+
+
+_SKIP_IG = {"p", "reel", "stories", "explore", "accounts", "about", "directory"}
+_SKIP_FB = {"sharer", "share", "dialog", "plugins", "groups", "events", "watch", "help"}
+
+
+def extract_social_handles(urls: list[str]) -> dict[str, list[str]]:
+ result: dict[str, list[str]] = {"instagram": [], "youtube": [], "facebook": [], "naver_blog": [], "tiktok": []}
+
+ for url in urls:
+ if not url:
+ continue
+ m = re.search(r"instagram\.com/([a-zA-Z0-9._]+)", url)
+ if m and m.group(1).lower() not in _SKIP_IG:
+ result["instagram"].append(m.group(1))
+
+ m = re.search(r"youtube\.com/(?:@([a-zA-Z0-9._-]+)|channel/(UC[a-zA-Z0-9_-]+)|c/([a-zA-Z0-9._-]+))", url)
+ if m:
+ result["youtube"].append(f"@{m.group(1)}" if m.group(1) else (m.group(2) or m.group(3) or ""))
+
+ m = re.search(r"facebook\.com/([a-zA-Z0-9._-]+)", url)
+ if m and m.group(1).lower() not in _SKIP_FB:
+ result["facebook"].append(m.group(1))
+
+ m = re.search(r"blog\.naver\.com/([a-zA-Z0-9_-]+)", url)
+ if m:
+ result["naver_blog"].append(m.group(1))
+
+ m = re.search(r"tiktok\.com/@([a-zA-Z0-9._-]+)", url)
+ if m:
+ result["tiktok"].append(m.group(1))
+
+ return {k: list(set(v)) for k, v in result.items()}
+
+
+def normalize_handle(platform: str, value: str) -> str:
+ """URL이 들어오면 핸들을 추출하고, 이미 핸들이면 그대로 반환."""
+ if not value:
+ return value
+ if "://" in value or value.startswith("www."):
+ handles = extract_social_handles([value]).get(platform, [])
+ value = handles[0] if handles else value
+ return value.lstrip("@") if platform != "youtube" else value
diff --git a/app/integrations/__init__.py b/app/integrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/integrations/apify.py b/app/integrations/apify.py
new file mode 100644
index 0000000..f2bf76a
--- /dev/null
+++ b/app/integrations/apify.py
@@ -0,0 +1,145 @@
+from http import HTTPMethod
+from common.utils import http_request
+
+APIFY_BASE = "https://api.apify.com/v2"
+
+
+class ApifyClient:
+ def __init__(self, token: str, wait_for_finish: int = 120):
+ self.token = token
+ self.wait_for_finish = wait_for_finish
+
+ async def _run_actor(self, actor_id: str, input_data: dict) -> list[dict]:
+ resp = await http_request(
+ HTTPMethod.POST,
+ url=f"{APIFY_BASE}/acts/{actor_id}/runs",
+ params={"token": self.token, "waitForFinish": self.wait_for_finish},
+ headers={"Content-Type": "application/json"},
+ json_body=input_data,
+ timeout=self.wait_for_finish + 10,
+ label=f"apify:{actor_id.split('~')[-1]}",
+ )
+ if not resp or not resp.is_success:
+ return []
+ dataset_id = resp.json()["data"]["defaultDatasetId"]
+ items_resp = await http_request(
+ HTTPMethod.GET,
+ url=f"{APIFY_BASE}/datasets/{dataset_id}/items",
+ params={"token": self.token, "limit": 20},
+ label=f"apify-dataset-{dataset_id}",
+ )
+ if not items_resp or not items_resp.is_success:
+ return []
+ return items_resp.json()
+
+ async def fetch_instagram_profile(self, handle: str) -> dict | None:
+ items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [handle], "resultsLimit": 12})
+ return items[0] if items else None
+
+ async def get_instagram_profile(self, handle: str) -> dict | None:
+ profile = await self.fetch_instagram_profile(handle)
+ if not profile or profile.get("error"):
+ return None
+ return {
+ "username": profile["username"],
+ "followers": profile.get("followersCount", 0),
+ "following": profile.get("followsCount", 0),
+ "posts": profile.get("postsCount", 0),
+ "bio": profile.get("biography", ""),
+ "isBusinessAccount": profile.get("isBusinessAccount", False),
+ "externalUrl": profile.get("externalUrl"),
+ "latestPosts": [
+ {
+ "type": p.get("type"),
+ "likes": p.get("likesCount", 0),
+ "comments": p.get("commentsCount", 0),
+ "caption": (p.get("caption") or "")[:500],
+ "timestamp": p.get("timestamp"),
+ }
+ for p in (profile.get("latestPosts") or [])[:12]
+ ],
+ }
+
+ async def fetch_instagram_posts(self, handle: str, limit: int = 20) -> list[dict]:
+ clean = handle.lstrip("@")
+ return await self._run_actor("apify~instagram-post-scraper", {
+ "directUrls": [f"https://www.instagram.com/{clean}/"],
+ "resultsLimit": limit,
+ })
+
+ async def get_instagram_posts(self, handle: str, limit: int = 20) -> dict:
+ items = await self.fetch_instagram_posts(handle, limit)
+ posts = [
+ {
+ "id": p["id"],
+ "type": p.get("type"),
+ "url": p.get("url"),
+ "caption": (p.get("caption") or "")[:500],
+ "hashtags": p.get("hashtags", []),
+ "likesCount": p.get("likesCount", 0),
+ "commentsCount": p.get("commentsCount", 0),
+ "timestamp": p.get("timestamp"),
+ }
+ for p in items
+ ]
+ n = len(posts) or 1
+ return {
+ "posts": posts,
+ "totalPosts": len(posts),
+ "avgLikes": round(sum(p["likesCount"] for p in posts) / n),
+ "avgComments": round(sum(p["commentsCount"] for p in posts) / n),
+ }
+
+ async def fetch_instagram_reels(self, handle: str, limit: int = 15) -> list[dict]:
+ clean = handle.lstrip("@")
+ return await self._run_actor("apify~instagram-reel-scraper", {
+ "directUrls": [f"https://www.instagram.com/{clean}/reels/"],
+ "resultsLimit": limit,
+ })
+
+ async def get_instagram_reels(self, handle: str, limit: int = 15) -> dict:
+ items = await self.fetch_instagram_reels(handle, limit)
+ reels = [
+ {
+ "id": r["id"],
+ "url": r.get("url"),
+ "caption": (r.get("caption") or "")[:500],
+ "hashtags": r.get("hashtags", []),
+ "likesCount": r.get("likesCount", 0),
+ "commentsCount": r.get("commentsCount", 0),
+ "videoViewCount": r.get("videoViewCount", 0),
+ "videoPlayCount": r.get("videoPlayCount", 0),
+ "videoDuration": r.get("videoDuration", 0),
+ "timestamp": r.get("timestamp"),
+ }
+ for r in items
+ ]
+ n = len(reels) or 1
+ return {
+ "reels": reels,
+ "totalReels": len(reels),
+ "avgViews": round(sum(r["videoViewCount"] for r in reels) / n),
+ "avgPlays": round(sum(r["videoPlayCount"] for r in reels) / n),
+ }
+
+ async def fetch_facebook_page(self, page_url: str) -> dict | None:
+ items = await self._run_actor("apify~facebook-pages-scraper", {"startUrls": [{"url": page_url}]})
+ return items[0] if items else None
+
+ async def get_facebook_page(self, page_url: str) -> dict | None:
+ page = await self.fetch_facebook_page(page_url)
+ if not page:
+ return None
+ return {
+ "pageName": page["title"],
+ "pageUrl": page.get("pageUrl", page_url),
+ "followers": page.get("followers", 0),
+ "likes": page.get("likes", 0),
+ "categories": page.get("categories", []),
+ "email": page.get("email"),
+ "phone": page.get("phone"),
+ "website": page.get("website"),
+ "address": page.get("address"),
+ "intro": page.get("intro"),
+ "rating": page.get("rating"),
+ }
diff --git a/app/integrations/firecrawl.py b/app/integrations/firecrawl.py
new file mode 100644
index 0000000..f50503d
--- /dev/null
+++ b/app/integrations/firecrawl.py
@@ -0,0 +1,128 @@
+from http import HTTPMethod
+from common.utils import get_env, http_request
+
+FIRECRAWL_BASE = "https://api.firecrawl.dev/v1"
+
+
+class FirecrawlClient:
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+
+ def _headers(self) -> dict:
+ return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
+
+ async def scrape(self, url: str, json_options: dict, wait_for: int = 5000) -> dict | None:
+ resp = await http_request(
+ HTTPMethod.POST,
+ url=f"{FIRECRAWL_BASE}/scrape",
+ headers=self._headers(),
+ json_body={"url": url, "formats": ["json", "links"], "jsonOptions": json_options, "waitFor": wait_for},
+ label="firecrawl-scrape",
+ )
+ if not resp or not resp.is_success:
+ return None
+ return resp.json().get("data")
+
+ async def map(self, url: str, limit: int = 50) -> list[str]:
+ resp = await http_request(
+ HTTPMethod.POST,
+ url=f"{FIRECRAWL_BASE}/map",
+ headers=self._headers(),
+ json_body={"url": url, "limit": limit},
+ label="firecrawl-map",
+ )
+ if not resp or not resp.is_success:
+ return []
+ return resp.json().get("links", [])
+
+ async def search(self, query: str, limit: int = 5) -> list[dict]:
+ resp = await http_request(
+ HTTPMethod.POST,
+ url=f"{FIRECRAWL_BASE}/search",
+ headers=self._headers(),
+ json_body={"query": query, "limit": limit},
+ label="firecrawl-search",
+ )
+ if not resp or not resp.is_success:
+ return []
+ return resp.json().get("data", [])
+
+ async def fetch_social_buttons(self, url: str) -> list[dict]:
+ data = await self.scrape(url, {
+ "prompt": "Find ALL social media link URLs on this page — header, footer, sidebar, floating buttons. Extract actual href URLs for: Instagram, YouTube, Facebook, TikTok, Naver Blog, KakaoTalk.",
+ "schema": {
+ "type": "object",
+ "properties": {
+ "socialLinks": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {"platform": {"type": "string"}, "url": {"type": "string"}},
+ },
+ },
+ },
+ },
+ })
+ if not data:
+ return []
+ return (data.get("json") or {}).get("socialLinks", [])
+
+ async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None:
+ resp = await http_request(
+ HTTPMethod.POST,
+ url=f"{FIRECRAWL_BASE}/scrape",
+ headers=self._headers(),
+ json_body={
+ "url": hospital_url,
+ "formats": ["json"],
+ "jsonOptions": {
+ "prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges",
+ "schema": {
+ "type": "object",
+ "properties": {
+ "hospitalName": {"type": "string"},
+ "rating": {"type": "number"},
+ "totalReviews": {"type": "number"},
+ "doctors": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {"type": "string"},
+ "rating": {"type": "number"},
+ "reviews": {"type": "number"},
+ "specialty": {"type": "string"},
+ },
+ },
+ },
+ "procedures": {"type": "array", "items": {"type": "string"}},
+ "address": {"type": "string"},
+ "badges": {"type": "array", "items": {"type": "string"}},
+ },
+ },
+ },
+ "waitFor": 5000,
+ },
+ timeout=60,
+ label="firecrawl-gangnamunni",
+ )
+ if not resp or not resp.is_success:
+ return None
+ raw = (resp.json().get("data") or {}).get("json")
+ return {"sourceUrl": hospital_url, **raw} if raw else None
+
+ async def get_gangnam_unni(self, hospital_url: str) -> dict | None:
+ raw = await self.fetch_gangnam_unni(hospital_url)
+ if not raw or not raw.get("hospitalName"):
+ return None
+ return {
+ "name": raw["hospitalName"],
+ "rating": raw.get("rating"),
+ "ratingScale": "/10",
+ "totalReviews": raw.get("totalReviews", 0),
+ "doctors": (raw.get("doctors") or [])[:10],
+ "procedures": raw.get("procedures", []),
+ "address": raw.get("address", ""),
+ "badges": raw.get("badges", []),
+ "sourceUrl": raw["sourceUrl"],
+ }
diff --git a/app/integrations/google_places.py b/app/integrations/google_places.py
new file mode 100644
index 0000000..9a6ff6b
--- /dev/null
+++ b/app/integrations/google_places.py
@@ -0,0 +1,61 @@
+from http import HTTPMethod
+from common.utils import http_request
+
+PLACES_BASE = "https://places.googleapis.com/v1"
+FIELD_MASK = ",".join([
+ "places.id", "places.displayName", "places.formattedAddress",
+ "places.rating", "places.userRatingCount",
+ "places.internationalPhoneNumber", "places.websiteUri",
+ "places.googleMapsUri", "places.primaryTypeDisplayName",
+ "places.regularOpeningHours", "places.reviews",
+])
+
+
+class GooglePlacesClient:
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+
+ def _headers(self) -> dict:
+ return {
+ "Content-Type": "application/json",
+ "X-Goog-Api-Key": self.api_key,
+ "X-Goog-FieldMask": FIELD_MASK,
+ }
+
+ async def fetch_place(self, query: str) -> dict | None:
+ resp = await http_request(
+ HTTPMethod.POST,
+ url=f"{PLACES_BASE}/places:searchText",
+ headers=self._headers(),
+ json_body={"textQuery": query, "languageCode": "ko", "regionCode": "KR", "maxResultCount": 3},
+ timeout=15,
+ label="google-places",
+ )
+ if not resp or not resp.is_success:
+ return None
+ places = resp.json().get("places", [])
+ return places[0] if places else None
+
+ async def get_place(self, query: str) -> dict | None:
+ p = await self.fetch_place(query)
+ if not p:
+ return None
+ return {
+ "name": (p.get("displayName") or {}).get("text", ""),
+ "rating": p.get("rating"),
+ "reviewCount": p.get("userRatingCount", 0),
+ "address": p.get("formattedAddress", ""),
+ "phone": p.get("internationalPhoneNumber", ""),
+ "clinicWebsite": p.get("websiteUri", ""),
+ "mapsUrl": p.get("googleMapsUri", ""),
+ "placeId": p.get("id", ""),
+ "category": (p.get("primaryTypeDisplayName") or {}).get("text", ""),
+ "topReviews": [
+ {
+ "stars": r.get("rating", 0),
+ "text": ((r.get("text") or {}).get("text", ""))[:500],
+ "date": r.get("publishTime", ""),
+ }
+ for r in (p.get("reviews") or [])[:10]
+ ],
+ }
diff --git a/app/integrations/llm/__init__.py b/app/integrations/llm/__init__.py
new file mode 100644
index 0000000..4a857dd
--- /dev/null
+++ b/app/integrations/llm/__init__.py
@@ -0,0 +1,3 @@
+from .service import LLMService
+from .prompt import Prompt
+
diff --git a/app/integrations/llm/prompt.py b/app/integrations/llm/prompt.py
new file mode 100644
index 0000000..1e3f04a
--- /dev/null
+++ b/app/integrations/llm/prompt.py
@@ -0,0 +1,19 @@
+from pydantic import BaseModel
+
+
+class Prompt:
+ def __init__(
+ self,
+ template: str,
+ model: str,
+ input_class: type[BaseModel],
+ output_class: type[BaseModel],
+ ):
+ self.template = template
+ self.model = model
+ self.input_class = input_class
+ self.output_class = output_class
+
+ def build(self, input_data: dict) -> str:
+ verified = self.input_class(**input_data)
+ return self.template.format(**verified.model_dump())
diff --git a/app/integrations/llm/schemas/__init__.py b/app/integrations/llm/schemas/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/integrations/llm/service.py b/app/integrations/llm/service.py
new file mode 100644
index 0000000..f554470
--- /dev/null
+++ b/app/integrations/llm/service.py
@@ -0,0 +1,61 @@
+from pydantic import BaseModel
+from openai import AsyncOpenAI
+from common.utils import get_env
+from .prompt import Prompt
+
+
+class LLMResponseError(Exception):
+ def __init__(self, status: str, code: str = None, message: str = None):
+ self.status = status
+ self.code = code
+ self.message = message
+ super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
+
+
+class LLMService:
+ def __init__(self, provider: str = "openai", max_retries: int = 2):
+ self.max_retries = max_retries
+ match provider:
+ case "openai":
+ self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
+ case "perplexity":
+ self.client = AsyncOpenAI(
+ api_key=get_env("PERPLEXITY_API_KEY"),
+ base_url="https://api.perplexity.ai",
+ )
+ case "gemini":
+ self.client = AsyncOpenAI(
+ api_key=get_env("GEMINI_API_KEY"),
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+ )
+ case _:
+ raise NotImplementedError(f"Unknown provider: {provider}")
+
+ async def generate(
+ self,
+ prompt: Prompt,
+ input_data: dict,
+ ) -> BaseModel:
+ prompt_text = prompt.build(input_data)
+ last_error = None
+
+ for attempt in range(self.max_retries + 1):
+ response = await self.client.beta.chat.completions.parse(
+ model=prompt.model,
+ messages=[{"role": "user", "content": prompt_text}],
+ response_format=prompt.output_class,
+ )
+ choice = response.choices[0]
+ finish_reason = choice.finish_reason
+
+ if finish_reason == "stop":
+ return choice.message.parsed
+
+ if finish_reason == "length":
+ last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
+ elif finish_reason == "content_filter":
+ last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
+ else:
+ last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
+
+ raise last_error
diff --git a/app/integrations/naver.py b/app/integrations/naver.py
new file mode 100644
index 0000000..e28371f
--- /dev/null
+++ b/app/integrations/naver.py
@@ -0,0 +1,89 @@
+import re
+from http import HTTPMethod
+from common.utils import http_request
+
+NAVER_BASE = "https://openapi.naver.com/v1/search"
+
+
+class NaverClient:
+ def __init__(self, client_id: str, client_secret: str):
+ self.client_id = client_id
+ self.client_secret = client_secret
+
+ def _headers(self) -> dict:
+ return {
+ "X-Naver-Client-Id": self.client_id,
+ "X-Naver-Client-Secret": self.client_secret,
+ }
+
+ async def fetch_blog_search(self, query: str, display: int = 5) -> list[dict]:
+ resp = await http_request(
+ HTTPMethod.GET,
+ url=f"{NAVER_BASE}/blog.json",
+ headers=self._headers(),
+ params={"query": query, "display": display, "sort": "sim"},
+ label="naver-blog",
+ )
+ if not resp or not resp.is_success:
+ return []
+ return resp.json().get("items", [])
+
+ async def fetch_web_search(self, query: str, display: int = 10) -> list[dict]:
+ resp = await http_request(
+ HTTPMethod.GET,
+ url=f"{NAVER_BASE}/webkr.json",
+ headers=self._headers(),
+ params={"query": query, "display": display},
+ label="naver-web",
+ )
+ if not resp or not resp.is_success:
+ return []
+ return resp.json().get("items", [])
+
+ async def fetch_local_search(self, query: str, display: int = 5) -> list[dict]:
+ resp = await http_request(
+ HTTPMethod.GET,
+ url=f"{NAVER_BASE}/local.json",
+ headers=self._headers(),
+ params={"query": query, "display": display, "sort": "comment"},
+ label="naver-local",
+ )
+ if not resp or not resp.is_success:
+ return []
+ return resp.json().get("items", [])
+
+ async def fetch_blog_rss(self, blog_handle: str) -> str | None:
+ resp = await http_request(
+ HTTPMethod.GET,
+ url=f"https://rss.blog.naver.com/{blog_handle}.xml",
+ timeout=15,
+ label="naver-rss",
+ )
+ if not resp or not resp.is_success:
+ return None
+ return resp.text
+
+ async def get_blog_rss(self, blog_handle: str) -> dict | None:
+ xml = await self.fetch_blog_rss(blog_handle)
+ if not xml:
+ return None
+ posts = []
+ for m in re.finditer(r"