integration 1차 데이터 및 DB 정의, 테스트

2026-04-24 14:19:29 +09:00 · 2026-04-24 14:19:29 +09:00 · d930679e90
parent 23e859217b
commit d930679e90
16 changed files with 883 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -42,3 +42,5 @@ Thumbs.db
 # Alembic
 alembic/versions/*.pyc
 test_results/
--- a/README.md
+++ b/README.md
@ -0,0 +1,15 @@
 # o2o-infinith-backend
 ## 설치
 **Docker**
 ```bash
 curl -fsSL https://get.docker.com | sh
 ```
 ## 실행
 ```bash
 docker compose up -d
 ```
--- a/SQL/db_create.sql
+++ b/SQL/db_create.sql
@ -0,0 +1,100 @@
 -- 테이블 순서는 관계를 고려하여 한 번에 실행해도 에러가 발생하지 않게 정렬되었습니다.
 -- instagram_data Table Create SQL
 -- 테이블 생성 SQL - instagram_data
 CREATE TABLE instagram_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT, 
    `hospital_id`  INT             NOT NULL, 
    `url`          VARCHAR(500)    NOT NULL, 
    `raw_data`     JSON            NULL, 
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP, 
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - instagram_data(hospital_id)
 CREATE INDEX IX_instagram_data_1
    ON instagram_data(hospital_id);
 -- facebook_data Table Create SQL
 -- 테이블 생성 SQL - facebook_data
 CREATE TABLE facebook_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT, 
    `hospital_id`  INT             NOT NULL, 
    `url`          VARCHAR(500)    NOT NULL, 
    `raw_data`     JSON            NULL, 
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP, 
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - facebook_data(hospital_id)
 CREATE INDEX IX_facebook_data_1
    ON facebook_data(hospital_id);
 -- naver_blog_data Table Create SQL
 -- 테이블 생성 SQL - naver_blog_data
 CREATE TABLE naver_blog_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT, 
    `hospital_id`  INT             NOT NULL, 
    `url`          VARCHAR(500)    NOT NULL, 
    `raw_data`     JSON            NULL, 
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP, 
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - naver_blog_data(hospital_id)
 CREATE INDEX IX_naver_blog_data_1
    ON naver_blog_data(hospital_id);
 -- hospital_baseinfo Table Create SQL
 -- 테이블 생성 SQL - hospital_baseinfo
 CREATE TABLE hospital_baseinfo
 (
    `hospital_id`    INT             NOT NULL    AUTO_INCREMENT, 
    `owner_user_id`  INT             NOT NULL, 
    `hospital_name`  VARCHAR(50)     NOT NULL, 
    `brn`            VARCHAR(50)     NOT NULL, 
    `road_address`   VARCHAR(100)    NULL, 
    `site_address`   VARCHAR(100)    NULL, 
    `created_at`     TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP, 
    `updated_at`     TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 
     PRIMARY KEY (hospital_id)
 );
 -- Index 설정 SQL - hospital_baseinfo(owner_user_id)
 CREATE INDEX IX_hospital_baseinfo_1
    ON hospital_baseinfo(owner_user_id);
 -- user_info Table Create SQL
 -- 테이블 생성 SQL - user_info
 CREATE TABLE user_info
 (
    `user_id`     INT            NOT NULL    AUTO_INCREMENT, 
    `username`    VARCHAR(50)    NOT NULL, 
    `password`    VARCHAR(50)    NOT NULL, 
    `created_at`  TIMESTAMP      NOT NULL    DEFAULT CURRENT_TIMESTAMP, 
    `updated_at`  TIMESTAMP      NOT NULL    DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 
     PRIMARY KEY (user_id)
 );
 -- youtube_data Table Create SQL
 CREATE TABLE youtube_data
 (
    `id`           INT             NOT NULL    AUTO_INCREMENT,
    `hospital_id`  INT             NOT NULL,
    `url`          VARCHAR(500)    NOT NULL,
    `raw_data`     JSON            NULL,
    `created_at`   TIMESTAMP       NOT NULL    DEFAULT CURRENT_TIMESTAMP,
     PRIMARY KEY (id)
 );
 -- Index 설정 SQL - youtube_data(hospital_id)
 CREATE INDEX IX_youtube_data_1
    ON youtube_data(hospital_id);
--- a/app/common/utils.py
+++ b/app/common/utils.py
@ -0,0 +1,82 @@
 import os
 import re
 import asyncio
 from http import HTTPMethod
 import httpx
 REQUEST_TIMEOUT = 60
 def get_env(key: str) -> str:
    v = os.environ.get(key, "")
    if not v:
        raise EnvironmentError(f"Missing env: {key}")
    return v
 async def http_request(
    method: HTTPMethod,
    url: str,
    *,
    label: str,
    headers: dict | None = None,
    params: dict | None = None,
    json_body: dict | None = None,
    timeout: int = REQUEST_TIMEOUT,
    max_retries: int = 0,
 ) -> httpx.Response | None:
    async with httpx.AsyncClient() as client:
        for attempt in range(max_retries + 1):
            try:
                resp = await client.request(method, url, headers=headers, params=params, json=json_body, timeout=timeout)
                return resp
            except httpx.RequestError as e:
                if attempt < max_retries:
                    print(f"  [retry] {label} → {e}, attempt {attempt + 1}")
                    await asyncio.sleep((attempt + 1) * 2)
                else:
                    print(f"  [error] {label} → {e}")
                    return None
    return None
 _SKIP_IG = {"p", "reel", "stories", "explore", "accounts", "about", "directory"}
 _SKIP_FB = {"sharer", "share", "dialog", "plugins", "groups", "events", "watch", "help"}
 def extract_social_handles(urls: list[str]) -> dict[str, list[str]]:
    result: dict[str, list[str]] = {"instagram": [], "youtube": [], "facebook": [], "naver_blog": [], "tiktok": []}
    for url in urls:
        if not url:
            continue
        m = re.search(r"instagram\.com/([a-zA-Z0-9._]+)", url)
        if m and m.group(1).lower() not in _SKIP_IG:
            result["instagram"].append(m.group(1))
        m = re.search(r"youtube\.com/(?:@([a-zA-Z0-9._-]+)|channel/(UC[a-zA-Z0-9_-]+)|c/([a-zA-Z0-9._-]+))", url)
        if m:
            result["youtube"].append(f"@{m.group(1)}" if m.group(1) else (m.group(2) or m.group(3) or ""))
        m = re.search(r"facebook\.com/([a-zA-Z0-9._-]+)", url)
        if m and m.group(1).lower() not in _SKIP_FB:
            result["facebook"].append(m.group(1))
        m = re.search(r"blog\.naver\.com/([a-zA-Z0-9_-]+)", url)
        if m:
            result["naver_blog"].append(m.group(1))
        m = re.search(r"tiktok\.com/@([a-zA-Z0-9._-]+)", url)
        if m:
            result["tiktok"].append(m.group(1))
    return {k: list(set(v)) for k, v in result.items()}
 def normalize_handle(platform: str, value: str) -> str:
    """URL이 들어오면 핸들을 추출하고, 이미 핸들이면 그대로 반환."""
    if not value:
        return value
    if "://" in value or value.startswith("www."):
        handles = extract_social_handles([value]).get(platform, [])
        value = handles[0] if handles else value
    return value.lstrip("@") if platform != "youtube" else value
--- a/app/integrations/init.py
+++ b/app/integrations/init.py
--- a/app/integrations/apify.py
+++ b/app/integrations/apify.py
@ -0,0 +1,145 @@
 from http import HTTPMethod
 from common.utils import http_request
 APIFY_BASE = "https://api.apify.com/v2"
 class ApifyClient:
    def __init__(self, token: str, wait_for_finish: int = 120):
        self.token = token
        self.wait_for_finish = wait_for_finish
    async def _run_actor(self, actor_id: str, input_data: dict) -> list[dict]:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{APIFY_BASE}/acts/{actor_id}/runs",
            params={"token": self.token, "waitForFinish": self.wait_for_finish},
            headers={"Content-Type": "application/json"},
            json_body=input_data,
            timeout=self.wait_for_finish + 10,
            label=f"apify:{actor_id.split('~')[-1]}",
        )
        if not resp or not resp.is_success:
            return []
        dataset_id = resp.json()["data"]["defaultDatasetId"]
        items_resp = await http_request(
            HTTPMethod.GET,
            url=f"{APIFY_BASE}/datasets/{dataset_id}/items",
            params={"token": self.token, "limit": 20},
            label=f"apify-dataset-{dataset_id}",
        )
        if not items_resp or not items_resp.is_success:
            return []
        return items_resp.json()
    async def fetch_instagram_profile(self, handle: str) -> dict | None:
        items = await self._run_actor("apify~instagram-profile-scraper", {"usernames": [handle], "resultsLimit": 12})
        return items[0] if items else None
    async def get_instagram_profile(self, handle: str) -> dict | None:
        profile = await self.fetch_instagram_profile(handle)
        if not profile or profile.get("error"):
            return None
        return {
            "username": profile["username"],
            "followers": profile.get("followersCount", 0),
            "following": profile.get("followsCount", 0),
            "posts": profile.get("postsCount", 0),
            "bio": profile.get("biography", ""),
            "isBusinessAccount": profile.get("isBusinessAccount", False),
            "externalUrl": profile.get("externalUrl"),
            "latestPosts": [
                {
                    "type": p.get("type"),
                    "likes": p.get("likesCount", 0),
                    "comments": p.get("commentsCount", 0),
                    "caption": (p.get("caption") or "")[:500],
                    "timestamp": p.get("timestamp"),
                }
                for p in (profile.get("latestPosts") or [])[:12]
            ],
        }
    async def fetch_instagram_posts(self, handle: str, limit: int = 20) -> list[dict]:
        clean = handle.lstrip("@")
        return await self._run_actor("apify~instagram-post-scraper", {
            "directUrls": [f"https://www.instagram.com/{clean}/"],
            "resultsLimit": limit,
        })
    async def get_instagram_posts(self, handle: str, limit: int = 20) -> dict:
        items = await self.fetch_instagram_posts(handle, limit)
        posts = [
            {
                "id": p["id"],
                "type": p.get("type"),
                "url": p.get("url"),
                "caption": (p.get("caption") or "")[:500],
                "hashtags": p.get("hashtags", []),
                "likesCount": p.get("likesCount", 0),
                "commentsCount": p.get("commentsCount", 0),
                "timestamp": p.get("timestamp"),
            }
            for p in items
        ]
        n = len(posts) or 1
        return {
            "posts": posts,
            "totalPosts": len(posts),
            "avgLikes": round(sum(p["likesCount"] for p in posts) / n),
            "avgComments": round(sum(p["commentsCount"] for p in posts) / n),
        }
    async def fetch_instagram_reels(self, handle: str, limit: int = 15) -> list[dict]:
        clean = handle.lstrip("@")
        return await self._run_actor("apify~instagram-reel-scraper", {
            "directUrls": [f"https://www.instagram.com/{clean}/reels/"],
            "resultsLimit": limit,
        })
    async def get_instagram_reels(self, handle: str, limit: int = 15) -> dict:
        items = await self.fetch_instagram_reels(handle, limit)
        reels = [
            {
                "id": r["id"],
                "url": r.get("url"),
                "caption": (r.get("caption") or "")[:500],
                "hashtags": r.get("hashtags", []),
                "likesCount": r.get("likesCount", 0),
                "commentsCount": r.get("commentsCount", 0),
                "videoViewCount": r.get("videoViewCount", 0),
                "videoPlayCount": r.get("videoPlayCount", 0),
                "videoDuration": r.get("videoDuration", 0),
                "timestamp": r.get("timestamp"),
            }
            for r in items
        ]
        n = len(reels) or 1
        return {
            "reels": reels,
            "totalReels": len(reels),
            "avgViews": round(sum(r["videoViewCount"] for r in reels) / n),
            "avgPlays": round(sum(r["videoPlayCount"] for r in reels) / n),
        }
    async def fetch_facebook_page(self, page_url: str) -> dict | None:
        items = await self._run_actor("apify~facebook-pages-scraper", {"startUrls": [{"url": page_url}]})
        return items[0] if items else None
    async def get_facebook_page(self, page_url: str) -> dict | None:
        page = await self.fetch_facebook_page(page_url)
        if not page:
            return None
        return {
            "pageName": page["title"],
            "pageUrl": page.get("pageUrl", page_url),
            "followers": page.get("followers", 0),
            "likes": page.get("likes", 0),
            "categories": page.get("categories", []),
            "email": page.get("email"),
            "phone": page.get("phone"),
            "website": page.get("website"),
            "address": page.get("address"),
            "intro": page.get("intro"),
            "rating": page.get("rating"),
        }
--- a/app/integrations/firecrawl.py
+++ b/app/integrations/firecrawl.py
@ -0,0 +1,128 @@
 from http import HTTPMethod
 from common.utils import get_env, http_request
 FIRECRAWL_BASE = "https://api.firecrawl.dev/v1"
 class FirecrawlClient:
    def __init__(self, api_key: str):
        self.api_key = api_key
    def _headers(self) -> dict:
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
    async def scrape(self, url: str, json_options: dict, wait_for: int = 5000) -> dict | None:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{FIRECRAWL_BASE}/scrape",
            headers=self._headers(),
            json_body={"url": url, "formats": ["json", "links"], "jsonOptions": json_options, "waitFor": wait_for},
            label="firecrawl-scrape",
        )
        if not resp or not resp.is_success:
            return None
        return resp.json().get("data")
    async def map(self, url: str, limit: int = 50) -> list[str]:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{FIRECRAWL_BASE}/map",
            headers=self._headers(),
            json_body={"url": url, "limit": limit},
            label="firecrawl-map",
        )
        if not resp or not resp.is_success:
            return []
        return resp.json().get("links", [])
    async def search(self, query: str, limit: int = 5) -> list[dict]:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{FIRECRAWL_BASE}/search",
            headers=self._headers(),
            json_body={"query": query, "limit": limit},
            label="firecrawl-search",
        )
        if not resp or not resp.is_success:
            return []
        return resp.json().get("data", [])
    async def fetch_social_buttons(self, url: str) -> list[dict]:
        data = await self.scrape(url, {
            "prompt": "Find ALL social media link URLs on this page — header, footer, sidebar, floating buttons. Extract actual href URLs for: Instagram, YouTube, Facebook, TikTok, Naver Blog, KakaoTalk.",
            "schema": {
                "type": "object",
                "properties": {
                    "socialLinks": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {"platform": {"type": "string"}, "url": {"type": "string"}},
                        },
                    },
                },
            },
        })
        if not data:
            return []
        return (data.get("json") or {}).get("socialLinks", [])
    async def fetch_gangnam_unni(self, hospital_url: str) -> dict | None:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{FIRECRAWL_BASE}/scrape",
            headers=self._headers(),
            json_body={
                "url": hospital_url,
                "formats": ["json"],
                "jsonOptions": {
                    "prompt": "Extract: hospital name, overall rating (out of 10), total review count, doctors with names/ratings/review counts/specialties, procedures, address, badges",
                    "schema": {
                        "type": "object",
                        "properties": {
                            "hospitalName": {"type": "string"},
                            "rating": {"type": "number"},
                            "totalReviews": {"type": "number"},
                            "doctors": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "properties": {
                                        "name": {"type": "string"},
                                        "rating": {"type": "number"},
                                        "reviews": {"type": "number"},
                                        "specialty": {"type": "string"},
                                    },
                                },
                            },
                            "procedures": {"type": "array", "items": {"type": "string"}},
                            "address": {"type": "string"},
                            "badges": {"type": "array", "items": {"type": "string"}},
                        },
                    },
                },
                "waitFor": 5000,
            },
            timeout=60,
            label="firecrawl-gangnamunni",
        )
        if not resp or not resp.is_success:
            return None
        raw = (resp.json().get("data") or {}).get("json")
        return {"sourceUrl": hospital_url, **raw} if raw else None
    async def get_gangnam_unni(self, hospital_url: str) -> dict | None:
        raw = await self.fetch_gangnam_unni(hospital_url)
        if not raw or not raw.get("hospitalName"):
            return None
        return {
            "name": raw["hospitalName"],
            "rating": raw.get("rating"),
            "ratingScale": "/10",
            "totalReviews": raw.get("totalReviews", 0),
            "doctors": (raw.get("doctors") or [])[:10],
            "procedures": raw.get("procedures", []),
            "address": raw.get("address", ""),
            "badges": raw.get("badges", []),
            "sourceUrl": raw["sourceUrl"],
        }
--- a/app/integrations/google_places.py
+++ b/app/integrations/google_places.py
@ -0,0 +1,61 @@
 from http import HTTPMethod
 from common.utils import http_request
 PLACES_BASE = "https://places.googleapis.com/v1"
 FIELD_MASK = ",".join([
    "places.id", "places.displayName", "places.formattedAddress",
    "places.rating", "places.userRatingCount",
    "places.internationalPhoneNumber", "places.websiteUri",
    "places.googleMapsUri", "places.primaryTypeDisplayName",
    "places.regularOpeningHours", "places.reviews",
 ])
 class GooglePlacesClient:
    def __init__(self, api_key: str):
        self.api_key = api_key
    def _headers(self) -> dict:
        return {
            "Content-Type": "application/json",
            "X-Goog-Api-Key": self.api_key,
            "X-Goog-FieldMask": FIELD_MASK,
        }
    async def fetch_place(self, query: str) -> dict | None:
        resp = await http_request(
            HTTPMethod.POST,
            url=f"{PLACES_BASE}/places:searchText",
            headers=self._headers(),
            json_body={"textQuery": query, "languageCode": "ko", "regionCode": "KR", "maxResultCount": 3},
            timeout=15,
            label="google-places",
        )
        if not resp or not resp.is_success:
            return None
        places = resp.json().get("places", [])
        return places[0] if places else None
    async def get_place(self, query: str) -> dict | None:
        p = await self.fetch_place(query)
        if not p:
            return None
        return {
            "name": (p.get("displayName") or {}).get("text", ""),
            "rating": p.get("rating"),
            "reviewCount": p.get("userRatingCount", 0),
            "address": p.get("formattedAddress", ""),
            "phone": p.get("internationalPhoneNumber", ""),
            "clinicWebsite": p.get("websiteUri", ""),
            "mapsUrl": p.get("googleMapsUri", ""),
            "placeId": p.get("id", ""),
            "category": (p.get("primaryTypeDisplayName") or {}).get("text", ""),
            "topReviews": [
                {
                    "stars": r.get("rating", 0),
                    "text": ((r.get("text") or {}).get("text", ""))[:500],
                    "date": r.get("publishTime", ""),
                }
                for r in (p.get("reviews") or [])[:10]
            ],
        }
--- a/app/integrations/llm/init.py
+++ b/app/integrations/llm/init.py
@ -0,0 +1,3 @@
 from .service import LLMService
 from .prompt import Prompt
--- a/app/integrations/llm/prompt.py
+++ b/app/integrations/llm/prompt.py
@ -0,0 +1,19 @@
 from pydantic import BaseModel
 class Prompt:
    def __init__(
        self,
        template: str,
        model: str,
        input_class: type[BaseModel],
        output_class: type[BaseModel],
    ):
        self.template = template
        self.model = model
        self.input_class = input_class
        self.output_class = output_class
    def build(self, input_data: dict) -> str:
        verified = self.input_class(**input_data)
        return self.template.format(**verified.model_dump())
--- a/app/integrations/llm/schemas/init.py
+++ b/app/integrations/llm/schemas/init.py
--- a/app/integrations/llm/service.py
+++ b/app/integrations/llm/service.py
@ -0,0 +1,61 @@
 from pydantic import BaseModel
 from openai import AsyncOpenAI
 from common.utils import get_env
 from .prompt import Prompt
 class LLMResponseError(Exception):
    def __init__(self, status: str, code: str = None, message: str = None):
        self.status = status
        self.code = code
        self.message = message
        super().__init__(f"LLM response failed: status={status}, code={code}, message={message}")
 class LLMService:
    def __init__(self, provider: str = "openai", max_retries: int = 2):
        self.max_retries = max_retries
        match provider:
            case "openai":
                self.client = AsyncOpenAI(api_key=get_env("OPENAI_API_KEY"))
            case "perplexity":
                self.client = AsyncOpenAI(
                    api_key=get_env("PERPLEXITY_API_KEY"),
                    base_url="https://api.perplexity.ai",
                )
            case "gemini":
                self.client = AsyncOpenAI(
                    api_key=get_env("GEMINI_API_KEY"),
                    base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
                )
            case _:
                raise NotImplementedError(f"Unknown provider: {provider}")
    async def generate(
        self,
        prompt: Prompt,
        input_data: dict,
    ) -> BaseModel:
        prompt_text = prompt.build(input_data)
        last_error = None
        for attempt in range(self.max_retries + 1):
            response = await self.client.beta.chat.completions.parse(
                model=prompt.model,
                messages=[{"role": "user", "content": prompt_text}],
                response_format=prompt.output_class,
            )
            choice = response.choices[0]
            finish_reason = choice.finish_reason
            if finish_reason == "stop":
                return choice.message.parsed
            if finish_reason == "length":
                last_error = LLMResponseError("incomplete", finish_reason, "max tokens reached")
            elif finish_reason == "content_filter":
                last_error = LLMResponseError("failed", finish_reason, "blocked by content filter")
            else:
                last_error = LLMResponseError("failed", finish_reason, f"unexpected finish_reason: {finish_reason}")
        raise last_error
--- a/app/integrations/naver.py
+++ b/app/integrations/naver.py
@ -0,0 +1,89 @@
 import re
 from http import HTTPMethod
 from common.utils import http_request
 NAVER_BASE = "https://openapi.naver.com/v1/search"
 class NaverClient:
    def __init__(self, client_id: str, client_secret: str):
        self.client_id = client_id
        self.client_secret = client_secret
    def _headers(self) -> dict:
        return {
            "X-Naver-Client-Id": self.client_id,
            "X-Naver-Client-Secret": self.client_secret,
        }
    async def fetch_blog_search(self, query: str, display: int = 5) -> list[dict]:
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{NAVER_BASE}/blog.json",
            headers=self._headers(),
            params={"query": query, "display": display, "sort": "sim"},
            label="naver-blog",
        )
        if not resp or not resp.is_success:
            return []
        return resp.json().get("items", [])
    async def fetch_web_search(self, query: str, display: int = 10) -> list[dict]:
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{NAVER_BASE}/webkr.json",
            headers=self._headers(),
            params={"query": query, "display": display},
            label="naver-web",
        )
        if not resp or not resp.is_success:
            return []
        return resp.json().get("items", [])
    async def fetch_local_search(self, query: str, display: int = 5) -> list[dict]:
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{NAVER_BASE}/local.json",
            headers=self._headers(),
            params={"query": query, "display": display, "sort": "comment"},
            label="naver-local",
        )
        if not resp or not resp.is_success:
            return []
        return resp.json().get("items", [])
    async def fetch_blog_rss(self, blog_handle: str) -> str | None:
        resp = await http_request(
            HTTPMethod.GET,
            url=f"https://rss.blog.naver.com/{blog_handle}.xml",
            timeout=15,
            label="naver-rss",
        )
        if not resp or not resp.is_success:
            return None
        return resp.text
    async def get_blog_rss(self, blog_handle: str) -> dict | None:
        xml = await self.fetch_blog_rss(blog_handle)
        if not xml:
            return None
        posts = []
        for m in re.finditer(r"<item>([\s\S]*?)</item>", xml):
            block = m.group(1)
            title = re.search(r"<title><!\[CDATA\[(.*?)\]\]></title>", block) or re.search(r"<title>(.*?)</title>", block)
            link = re.search(r"<link>(.*?)</link>", block)
            date = re.search(r"<pubDate>(.*?)</pubDate>", block)
            desc = re.search(r"<description><!\[CDATA\[(.*?)\]\]></description>", block) or re.search(r"<description>(.*?)</description>", block)
            posts.append({
                "title": title.group(1) if title else "",
                "link": link.group(1) if link else "",
                "postDate": date.group(1) if date else "",
                "description": re.sub(r"<[^>]*>", "", desc.group(1) if desc else "").strip()[:150],
            })
        total_match = re.search(r"<totalCount>(\d+)</totalCount>", xml)
        return {
            "officialBlogUrl": f"https://blog.naver.com/{blog_handle}",
            "officialBlogHandle": blog_handle,
            "totalResults": int(total_match.group(1)) if total_match else len(posts),
            "posts": posts[:10],
        }
--- a/app/integrations/youtube.py
+++ b/app/integrations/youtube.py
@ -0,0 +1,123 @@
 from http import HTTPMethod
 from common.utils import http_request
 YT = "https://www.googleapis.com/youtube/v3"
 class YouTubeClient:
    def __init__(self, api_key: str):
        self.api_key = api_key
    async def _resolve_channel_id(self, handle: str) -> str:
        h = handle.lstrip("@")
        if h.startswith("UC") and len(h) == 24:
            return h
        for param in ("forHandle", "forUsername"):
            resp = await http_request(
                HTTPMethod.GET,
                url=f"{YT}/channels",
                params={"part": "id", param: h, "key": self.api_key},
                label="yt-resolve",
            )
            if resp and resp.is_success:
                items = resp.json().get("items", [])
                if items:
                    return items[0]["id"]
        return ""
    async def fetch_channel(self, handle_or_id: str) -> dict | None:
        channel_id = await self._resolve_channel_id(handle_or_id)
        if not channel_id:
            return None
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{YT}/channels",
            params={"part": "snippet,statistics", "id": channel_id, "key": self.api_key},
            label="yt-channel",
        )
        if not resp or not resp.is_success:
            return None
        items = resp.json().get("items", [])
        if not items:
            return None
        channel = items[0]
        video_ids: list[str] = []
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{YT}/search",
            params={
                "part": "snippet",
                "channelId": channel_id,
                "order": "viewCount",
                "type": "video",
                "maxResults": 10,
                "key": self.api_key,
            },
            label="yt-search",
        )
        if resp and resp.is_success:
            video_ids = [i["id"]["videoId"] for i in resp.json().get("items", []) if i.get("id", {}).get("videoId")]
        videos: list[dict] = []
        if video_ids:
            resp = await http_request(
                HTTPMethod.GET,
                url=f"{YT}/videos",
                params={
                    "part": "snippet,statistics,contentDetails",
                    "id": ",".join(video_ids),
                    "key": self.api_key,
                },
                label="yt-videos",
            )
            if resp and resp.is_success:
                videos = resp.json().get("items", [])[:10]
        return {"channelId": channel_id, "channel": channel, "videos": videos}
    async def get_channel(self, handle_or_id: str) -> dict | None:
        raw = await self.fetch_channel(handle_or_id)
        if not raw:
            return None
        ch = raw["channel"]
        stats = ch.get("statistics", {})
        snippet = ch.get("snippet", {})
        return {
            "channelId": raw["channelId"],
            "channelName": snippet.get("title"),
            "handle": snippet.get("customUrl"),
            "description": snippet.get("description", ""),
            "publishedAt": snippet.get("publishedAt"),
            "subscribers": int(stats.get("subscriberCount", 0)),
            "totalViews": int(stats.get("viewCount", 0)),
            "totalVideos": int(stats.get("videoCount", 0)),
            "videos": [
                {
                    "title": v.get("snippet", {}).get("title"),
                    "views": int(v.get("statistics", {}).get("viewCount", 0)),
                    "likes": int(v.get("statistics", {}).get("likeCount", 0)),
                    "comments": int(v.get("statistics", {}).get("commentCount", 0)),
                    "date": v.get("snippet", {}).get("publishedAt"),
                    "duration": v.get("contentDetails", {}).get("duration"),
                    "url": f"https://www.youtube.com/watch?v={v['id']}",
                }
                for v in raw["videos"]
            ],
        }
    async def search_channels(self, query: str, max_results: int = 3) -> list[str]:
        resp = await http_request(
            HTTPMethod.GET,
            url=f"{YT}/search",
            params={"part": "snippet", "type": "channel", "q": query, "maxResults": max_results, "key": self.api_key},
            label="yt-search-channels",
        )
        if not resp or not resp.is_success:
            return []
        return [
            i.get("snippet", {}).get("channelId") or i.get("id", {}).get("channelId")
            for i in resp.json().get("items", [])
            if i.get("snippet", {}).get("channelId") or i.get("id", {}).get("channelId")
        ]
--- a/app/test_fetch.py
+++ b/app/test_fetch.py
@ -0,0 +1,54 @@
 import asyncio
 import json
 import os
 from dotenv import load_dotenv
 load_dotenv("../.env")
 from common.utils import normalize_handle
 from integrations.youtube import YouTubeClient
 from integrations.apify import ApifyClient
 from integrations.naver import NaverClient
 from integrations.firecrawl import FirecrawlClient
 INPUT = {
    "youtube": "@banobagips",
    "instagram": ["@banobagi_ps"],
    "facebook": "BanobagiPlasticSurgery",
    "naver_blog": "https://blog.naver.com/banobagiprs",
    "gangnam_unni": "https://www.gangnamunni.com/hospitals/23",
 }
 OUT_DIR = "../test_results"
 def save(name: str, data) -> None:
    os.makedirs(OUT_DIR, exist_ok=True)
    path = os.path.join(OUT_DIR, f"{name}.json")
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False)
    print(f"saved → {path}")
 async def main():
    yt = YouTubeClient(api_key=os.environ["YOUTUBE_API_KEY"])
    apify = ApifyClient(token=os.environ["APIFY_API_TOKEN"])
    naver = NaverClient(client_id=os.environ["NAVER_CLIENT_ID"], client_secret=os.environ["NAVER_CLIENT_SECRET"])
    firecrawl = FirecrawlClient(api_key=os.environ["FIRECRAWL_API_KEY"])
    yt_handle = normalize_handle("youtube", INPUT["youtube"])
    ig_handle = normalize_handle("instagram", INPUT["instagram"][0])
    fb_handle = normalize_handle("facebook", INPUT["facebook"])
    naver_handle = normalize_handle("naver_blog", INPUT["naver_blog"])
    save("youtube", await yt.fetch_channel(yt_handle))
    save("instagram_profile", await apify.fetch_instagram_profile(ig_handle))
    # save("instagram_posts", await apify.fetch_instagram_posts(ig_handle))
    # save("instagram_reels", await apify.fetch_instagram_reels(ig_handle))
    save("facebook", await apify.fetch_facebook_page(f"https://www.facebook.com/{fb_handle}"))
    save("naver_blog", await naver.fetch_blog_rss(naver_handle))
    save("gangnam_unni", await firecrawl.fetch_gangnam_unni(INPUT["gangnam_unni"]))
 asyncio.run(main())
--- a/requirements.txt
+++ b/requirements.txt
@ -4,6 +4,7 @@ pydantic==2.13.2
 python-dotenv==1.2.2
 redis==7.4.0
 httpx==0.28.1
 openai==2.32.0
 python-jose[cryptography]==3.5.0
 passlib[bcrypt]==1.7.4
 python-multipart==0.0.26
		`@ -0,0 +1,3 @@`
							`from .service import LLMService`
							`from .prompt import Prompt`