o2o-infinith-backend/app/integrations/vision.py

"""Gemini Vision — 로고/브랜드 비주얼 자동 분석 (OpenAI 호환 모드).

정확한 hex 색상은 color_extractor가 CSS에서 직접 뽑음 (Vision은 근사값밖에 못 냄).
Vision은 사람이 봐야 알 수 있는 정성 정보 — 심볼 형태/워드마크/톤 — 를 담당.
"""
import base64
import json
import logging
import re
import httpx
from openai import AsyncOpenAI

logger = logging.getLogger(__name__)

DEFAULT_MODEL = "gemini-2.5-flash"


class VisionClient:
    """Gemini Vision을 OpenAI 호환 endpoint로 호출. GEMINI_API_KEY만 필요."""

    def __init__(self, api_key: str, model: str = DEFAULT_MODEL, timeout: float = 30.0, max_retries: int = 2):
        self.client = AsyncOpenAI(
            api_key=api_key,
            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
            timeout=timeout,
            max_retries=max_retries,
        )
        self.model = model

    @staticmethod
    def _extract_json(text: str) -> dict | None:
        if not text:
            return None
        m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
        if m:
            try:
                return json.loads(m.group(1))
            except json.JSONDecodeError:
                pass
        m = re.search(r"\{.*\}", text, re.DOTALL)
        if m:
            try:
                return json.loads(m.group(0))
            except json.JSONDecodeError:
                return None
        return None

    @staticmethod
    async def _fetch_as_data_url(url: str) -> str | None:
        """Gemini는 URL 직접 fetch가 막힌 호스트가 많아 base64 인라인으로 변환.
        + 'image does not exist' 같은 placeholder 이미지 거부 (작은 bytes / 잘못된 content-type)."""
        try:
            async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as c:
                resp = await c.get(url)
                if resp.status_code != 200:
                    logger.warning("[vision] fetch %s status=%s", url, resp.status_code)
                    return None
                mime = resp.headers.get("content-type", "").split(";")[0].strip()
                # 실제 이미지가 아니면 거부 (HTML 페이지가 404 대신 200으로 리다이렉트 되는 경우)
                if not mime.startswith("image/"):
                    logger.warning("[vision] %s not an image (content-type=%s)", url, mime)
                    return None
                size = len(resp.content)
                if size < 500:
                    logger.warning("[vision] %s too small (%d bytes) — likely placeholder", url, size)
                    return None
                b64 = base64.b64encode(resp.content).decode("ascii")
                return f"data:{mime};base64,{b64}"
        except Exception as e:
            logger.warning("[vision] fetch error %s: %s", url, e)
            return None

    async def _ask(self, image_urls: list[str], prompt: str, max_tokens: int = 4000) -> dict | None:
        content: list[dict] = []
        for u in image_urls:
            if not u:
                continue
            data_url = await self._fetch_as_data_url(u)
            if not data_url:
                continue
            content.append({"type": "image_url", "image_url": {"url": data_url}})
        if not any(c.get("type") == "image_url" for c in content):
            logger.warning("[vision] no images could be fetched")
            return None
        content.append({"type": "text", "text": prompt})

        try:
            resp = await self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": content}],
                max_tokens=max_tokens,
            )
            choice = resp.choices[0]
            if choice.finish_reason != "stop":
                logger.warning("[vision] unexpected finish_reason=%s", choice.finish_reason)
            return self._extract_json(choice.message.content or "")
        except Exception as e:
            logger.warning("[vision] error: %s", e)
            return None

    async def analyze_brand_assets(
        self,
        logo_url: str | None,
        homepage_url: str | None,
        additional_images: list[str] | None = None,
    ) -> dict:
        """로고 이미지를 보고 정성 분석. 정확한 hex는 color_extractor가 따로 처리하므로 여기선 안 뽑음."""
        urls = [u for u in [logo_url] + list(additional_images or []) if u]
        if not urls:
            return {}
        prompt = (
            "당신은 브랜드 로고 시각 분석가입니다. 첨부된 이미지(첫 번째가 병원의 대표 로고)를 보고 "
            "아래 JSON 스키마로만 응답하세요. 코드펜스 없이 순수 JSON만 출력.\n"
            "{\n"
            '  "logo_description": "로고를 1~2문장으로 설명 (심볼 형태 + 워드마크 + 전반적 톤). 예: \'둥근 잎사귀를 감싼 추상 심볼에 세리프 한글 워드마크, 차분하고 고급스러운 톤\'",\n'
            '  "logo_style": "minimal | illustrative | typographic | abstract 중 하나",\n'
            '  "has_symbol": "심볼/아이콘이 있으면 true, 글자만 있으면 false (boolean)",\n'
            '  "logo_symbol": "심볼이 묘사하는 대상 (예: \'잎사귀\', \'추상 곡선\'). 없으면 빈 문자열",\n'
            '  "logo_text": "로고에 보이는 워드마크 텍스트 그대로 (한글/영문). 없으면 빈 문자열",\n'
            '  "logo_colors_desc": "로고에 쓰인 색감을 사람이 부르는 이름으로 서술 (예: \'딥네이비 + 골드\'). 정확한 hex는 출력하지 말 것"\n'
            "}\n"
            "주의: 색상 hex 값이나 logo URL 같은 필드는 출력하지 마세요 (별도 추출 로직이 처리)."
        )
        result = await self._ask(urls, prompt)
        if not result:
            return {}
        # logo_images는 우리가 직접 채움 (Vision은 묘사만)
        result["logo_images"] = {"circle": None, "horizontal": logo_url, "korean": None}
        return result

    async def describe_channel_logos(
        self,
        official_logo_url: str | None,
        channel_logos: list[dict],
    ) -> dict | None:
        """채널별 프로필 이미지(로고)를 보고 각각 설명 + 공식 로고와 일치 여부 평가.
        channel_logos: [{"channel": "Instagram", "url": "..."}, ...]
        반환: {"channel_logos": [{"channel","logo_description","is_official"}], "inconsistency_summary", "recommendation"}"""
        items = [c for c in channel_logos if c.get("url")]
        if not items:
            return None

        # 공식 로고가 있으면 맨 앞에 두고 기준으로 삼음
        urls: list[str] = []
        if official_logo_url:
            urls.append(official_logo_url)
        urls.extend(c["url"] for c in items)
        channel_order = ", ".join(c.get("channel", "?") for c in items)

        if official_logo_url:
            header = (
                "첨부 이미지 중 **첫 번째가 이 병원의 공식 로고**입니다. "
                f"이어지는 이미지들은 채널별 프로필 이미지이며 순서는: {channel_order}.\n"
                "각 채널 로고를 1문장으로 설명하고, 공식 로고(첫 번째)와 일치하면 is_official=true, "
                "비공식 변형/모델사진/다른 이미지면 false로 평가하세요.\n"
            )
        else:
            header = (
                f"첨부 이미지는 한 병원의 채널별 프로필 이미지입니다. 순서: {channel_order}.\n"
                "각 채널 로고를 1문장으로 설명하세요 (공식 로고 기준이 없으므로 is_official은 판단 가능하면만).\n"
            )
        prompt = (
            header
            + "아래 JSON으로만 응답 (코드펜스 없이 순수 JSON):\n"
            "{\n"
            '  "channel_logos": [{"channel": "...", "logo_description": "...", "is_official": true}],\n'
            '  "inconsistency_summary": "채널 간 로고 일관성 1~2문장 요약",\n'
            '  "recommendation": "통합 권고 1문장"\n'
            "}"
        )
        return await self._ask(urls, prompt)