이미지 태깅 제미나이 적용 및 슬롯 적용 로직 추가

image-tagging
jaehwang 2026-04-02 23:55:42 +00:00
parent ebf76a0f8f
commit cc7ee58006
12 changed files with 232 additions and 151 deletions

View File

@ -30,7 +30,7 @@ from app.home.schemas.home_schema import (
)
from app.home.services.naver_search import naver_search_client
from app.utils.upload_blob_as_request import AzureBlobUploader
from app.utils.chatgpt_prompt import ChatgptService, ChatGPTResponseError
from app.utils.prompts.chatgpt_prompt import ChatgptService, ChatGPTResponseError
from app.utils.common import generate_task_id
from app.utils.logger import get_logger
from app.utils.nvMapScraper import NvMapScraper, GraphQLException

View File

@ -42,7 +42,7 @@ from app.lyric.schemas.lyric import (
LyricStatusResponse,
)
from app.lyric.worker.lyric_task import generate_lyric_background, generate_subtitle_background
from app.utils.chatgpt_prompt import ChatgptService
from app.utils.prompts.chatgpt_prompt import ChatgptService
from app.utils.logger import get_logger
from app.utils.pagination import PaginatedResponse, get_paginated
@ -253,17 +253,6 @@ async def generate_lyric(
step1_start = time.perf_counter()
logger.debug(f"[generate_lyric] Step 1: 서비스 초기화 및 프롬프트 생성...")
# service = ChatgptService(
# customer_name=request_body.customer_name,
# region=request_body.region,
# detail_region_info=request_body.detail_region_info or "",
# language=request_body.language,
# )
# prompt = service.build_lyrics_prompt()
# 원래는 실제 사용할 프롬프트가 들어가야 하나, 로직이 변경되어 이 시점에서 이곳에서 프롬프트를 생성할 이유가 없어서 삭제됨.
# 기존 코드와의 호환을 위해 동일한 로직으로 프롬프트 생성
promotional_expressions = {
"Korean" : "인스타 감성, 사진같은 하루, 힐링, 여행, 감성 숙소",
"English" : "Instagram vibes, picture-perfect day, healing, travel, getaway",

View File

@ -13,7 +13,7 @@ from sqlalchemy.exc import SQLAlchemyError
from app.database.session import BackgroundSessionLocal
from app.home.models import Image, Project, MarketingIntel
from app.lyric.models import Lyric
from app.utils.chatgpt_prompt import ChatgptService, ChatGPTResponseError
from app.utils.prompts.chatgpt_prompt import ChatgptService, ChatGPTResponseError
from app.utils.subtitles import SubtitleContentsGenerator
from app.utils.creatomate import CreatomateService
from app.utils.prompts.prompts import Prompt
@ -104,13 +104,6 @@ async def generate_lyric_background(
step1_start = time.perf_counter()
logger.debug(f"[generate_lyric_background] Step 1: ChatGPT 서비스 초기화...")
# service = ChatgptService(
# customer_name="", # 프롬프트가 이미 생성되었으므로 빈 값
# region="",
# detail_region_info="",
# language=language,
# )
chatgpt = ChatgptService()
step1_elapsed = (time.perf_counter() - step1_start) * 1000

View File

@ -17,7 +17,7 @@ from app.home.models import MarketingIntel, Project
from app.social.constants import YOUTUBE_SEO_HASH
from app.social.schemas import YoutubeDescriptionResponse
from app.user.models import User
from app.utils.chatgpt_prompt import ChatgptService
from app.utils.prompts.chatgpt_prompt import ChatgptService
from app.utils.prompts.prompts import yt_upload_prompt
logger = logging.getLogger(__name__)

View File

@ -14,7 +14,7 @@ from app.lyric.schemas.lyrics_schema import (
SongSampleData,
StoreData,
)
from app.utils.chatgpt_prompt import chatgpt_api
from app.utils.prompts.chatgpt_prompt import chatgpt_api
logger = get_logger("song")

View File

@ -1,11 +1,13 @@
from app.utils.chatgpt_prompt import ChatgptService
from pydantic.main import BaseModel
from app.utils.prompts.chatgpt_prompt import ChatgptService
from app.utils.prompts.prompts import image_autotag_prompt
from app.utils.prompts.schemas import SpaceType, Subject, Camera, MotionRecommended
import asyncio
async def autotag_image(image_url : str) -> list[str]: #tag_list
chatgpt = ChatgptService()
chatgpt = ChatgptService(model_type="gemini")
image_input_data = {
"img_url" : image_url,
"space_type" : list(SpaceType),
@ -18,7 +20,7 @@ async def autotag_image(image_url : str) -> list[str]: #tag_list
return image_result
async def autotag_images(image_url_list : list[str]) -> list[dict]: #tag_list
chatgpt = ChatgptService()
chatgpt = ChatgptService(model_type="gemini")
image_input_data_list = [{
"img_url" : image_url,
"space_type" : list(SpaceType),
@ -28,7 +30,7 @@ async def autotag_images(image_url_list : list[str]) -> list[dict]: #tag_list
}for image_url in image_url_list]
image_result_tasks = [chatgpt.generate_structured_output(image_autotag_prompt, image_input_data, image_input_data['img_url'], False, silent = True) for image_input_data in image_input_data_list]
image_result_list = await asyncio.gather(*image_result_tasks, return_exceptions=True)
image_result_list: list[BaseModel | BaseException] = await asyncio.gather(*image_result_tasks, return_exceptions=True)
MAX_RETRY = 3 # 하드코딩, 어떻게 처리할지는 나중에
for _ in range(MAX_RETRY):
failed_idx = [i for i, r in enumerate(image_result_list) if isinstance(r, Exception)]
@ -36,7 +38,7 @@ async def autotag_images(image_url_list : list[str]) -> list[dict]: #tag_list
if not failed_idx:
break
retried = await asyncio.gather(
*[chatgpt.generate_structured_output(image_autotag_prompt, image_input_data_list[i], image_input_data_list[i]['img_url'], False, silent=True) for i in failed],
*[chatgpt.generate_structured_output(image_autotag_prompt, image_input_data_list[i], image_input_data_list[i]['img_url'], False, silent=True) for i in failed_idx],
return_exceptions=True
)
for i, result in zip(failed_idx, retried):

View File

@ -1,116 +0,0 @@
import json
import re
from pydantic import BaseModel
from typing import List, Optional
from openai import AsyncOpenAI
from app.utils.logger import get_logger
from config import apikey_settings, recovery_settings
from app.utils.prompts.prompts import Prompt
# 로거 설정
logger = get_logger("chatgpt")
class ChatGPTResponseError(Exception):
"""ChatGPT API 응답 에러"""
def __init__(self, status: str, error_code: str = None, error_message: str = None):
self.status = status
self.error_code = error_code
self.error_message = error_message
super().__init__(f"ChatGPT response failed: status={status}, code={error_code}, message={error_message}")
class ChatgptService:
"""ChatGPT API 서비스 클래스
"""
def __init__(self, timeout: float = None):
self.timeout = timeout or recovery_settings.CHATGPT_TIMEOUT
self.max_retries = recovery_settings.CHATGPT_MAX_RETRIES
self.client = AsyncOpenAI(
api_key=apikey_settings.CHATGPT_API_KEY,
timeout=self.timeout
)
async def _call_pydantic_output(
self,
prompt : str,
output_format : BaseModel, #입력 output_format의 경우 Pydantic BaseModel Class를 상속한 Class 자체임에 유의할 것
model : str,
img_url : str,
image_detail_high : bool) -> BaseModel:
content = []
if img_url:
content.append({
"type" : "input_image",
"image_url" : img_url,
"detail": "high" if image_detail_high else "low"
})
content.append({
"type": "input_text",
"text": prompt}
)
last_error = None
for attempt in range(self.max_retries + 1):
response = await self.client.responses.parse(
model=model,
input=[{"role": "user", "content": content}],
text_format=output_format
)
# Response 디버그 로깅
logger.debug(f"[ChatgptService] attempt: {attempt}")
logger.debug(f"[ChatgptService] Response ID: {response.id}")
logger.debug(f"[ChatgptService] Response status: {response.status}")
logger.debug(f"[ChatgptService] Response model: {response.model}")
# status 확인: completed, failed, incomplete, cancelled, queued, in_progress
if response.status == "completed":
logger.debug(f"[ChatgptService] Response output_text: {response.output_text[:200]}..." if len(response.output_text) > 200 else f"[ChatgptService] Response output_text: {response.output_text}")
structured_output = response.output_parsed
return structured_output #.model_dump() or {}
# 에러 상태 처리
if response.status == "failed":
error_code = getattr(response.error, 'code', None) if response.error else None
error_message = getattr(response.error, 'message', None) if response.error else None
logger.warning(f"[ChatgptService] Response failed (attempt {attempt + 1}/{self.max_retries + 1}): code={error_code}, message={error_message}")
last_error = ChatGPTResponseError(response.status, error_code, error_message)
elif response.status == "incomplete":
reason = getattr(response.incomplete_details, 'reason', None) if response.incomplete_details else None
logger.warning(f"[ChatgptService] Response incomplete (attempt {attempt + 1}/{self.max_retries + 1}): reason={reason}")
last_error = ChatGPTResponseError(response.status, reason, f"Response incomplete: {reason}")
else:
# cancelled, queued, in_progress 등 예상치 못한 상태
logger.warning(f"[ChatgptService] Unexpected response status (attempt {attempt + 1}/{self.max_retries + 1}): {response.status}")
last_error = ChatGPTResponseError(response.status, None, f"Unexpected status: {response.status}")
# 마지막 시도가 아니면 재시도
if attempt < self.max_retries:
logger.info(f"[ChatgptService] Retrying request...")
# 모든 재시도 실패
logger.error(f"[ChatgptService] All retries exhausted. Last error: {last_error}")
raise last_error
async def generate_structured_output(
self,
prompt : Prompt,
input_data : dict,
img_url : Optional[str] = None,
img_detail_high : bool = False,
silent : bool = False
) -> BaseModel:
prompt_text = prompt.build_prompt(input_data, silent)
logger.debug(f"[ChatgptService] Generated Prompt (length: {len(prompt_text)})")
if not silent:
logger.info(f"[ChatgptService] Starting GPT request with structured output with model: {prompt.prompt_model}")
# GPT API 호출
#response = await self._call_structured_output_with_response_gpt_api(prompt_text, prompt.prompt_output, prompt.prompt_model)
response = await self._call_pydantic_output(prompt_text, prompt.prompt_output_class, prompt.prompt_model, img_url, img_detail_high)
return response

View File

@ -33,7 +33,7 @@ import copy
import time
from enum import StrEnum
from typing import Literal
import traceback
import httpx
from app.utils.logger import get_logger
@ -477,9 +477,22 @@ class CreatomateService:
if slot_tag_cate == "narrative_preference":
slot_tag_narrative = slot_tag_item
continue
match slot_tag_cate:
case "space_type":
weight = 2
case "subject" :
weight = 2
case "camera":
weight = 1
case "motion_recommended" :
weight = 0.5
case _:
raise
for idx, image_tag in enumerate(image_tag_list):
if slot_tag_item.value in image_tag[slot_tag_cate]: #collect!
image_score_list[idx] += 1 / (len(image_tag) - 1)
image_score_list[idx] += weight
for idx, image_tag in enumerate(image_tag_list):
image_narrative_score = image_tag["narrative_preference"][slot_tag_narrative]
@ -737,19 +750,20 @@ class CreatomateService:
try:
if elem["track"] not in track_maximum_duration:
continue
if elem["time"] == 0: # elem is auto / 만약 마지막 elem이 auto인데 그 앞에 time이 있는 elem 일 시 버그 발생 확률 있음
if "time" not in elem or elem["time"] == 0: # elem is auto / 만약 마지막 elem이 auto인데 그 앞에 time이 있는 elem 일 시 버그 발생 확률 있음
track_maximum_duration[elem["track"]] += elem["duration"]
if "animations" not in elem:
continue
for animation in elem["animations"]:
assert animation["time"] == 0 # 0이 아닌 경우 확인 필요
if animation["transition"]:
if "transition" in animation and animation["transition"]:
track_maximum_duration[elem["track"]] -= animation["duration"]
else:
track_maximum_duration[elem["track"]] = max(track_maximum_duration[elem["track"]], elem["time"] + elem["duration"])
except Exception as e:
logger.debug(traceback.format_exc())
logger.error(f"[calc_scene_duration] Error processing element: {elem}, {e}")
total_template_duration = max(track_maximum_duration.values())

View File

@ -0,0 +1,191 @@
import json
import re
from pydantic import BaseModel
from typing import List, Optional
from openai import AsyncOpenAI
from app.utils.logger import get_logger
from config import apikey_settings, recovery_settings
from app.utils.prompts.prompts import Prompt
# 로거 설정
logger = get_logger("chatgpt")
class ChatGPTResponseError(Exception):
"""ChatGPT API 응답 에러"""
def __init__(self, status: str, error_code: str = None, error_message: str = None):
self.status = status
self.error_code = error_code
self.error_message = error_message
super().__init__(f"ChatGPT response failed: status={status}, code={error_code}, message={error_message}")
class ChatgptService:
"""ChatGPT API 서비스 클래스
"""
model_type : str
def __init__(self, model_type:str = "gpt", timeout: float = None):
self.timeout = timeout or recovery_settings.CHATGPT_TIMEOUT
self.max_retries = recovery_settings.CHATGPT_MAX_RETRIES
self.model_type = model_type
match model_type:
case "gpt":
self.client = AsyncOpenAI(
api_key=apikey_settings.CHATGPT_API_KEY,
timeout=self.timeout
)
case "gemini":
self.client = AsyncOpenAI(
api_key=apikey_settings.GEMINI_API_KEY,
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
timeout=self.timeout
)
case _:
raise NotImplementedError(f"Unknown Provider : {model_type}")
async def _call_pydantic_output(
self,
prompt : str,
output_format : BaseModel, #입력 output_format의 경우 Pydantic BaseModel Class를 상속한 Class 자체임에 유의할 것
model : str,
img_url : str,
image_detail_high : bool) -> BaseModel:
content = []
if img_url:
content.append({
"type" : "input_image",
"image_url" : img_url,
"detail": "high" if image_detail_high else "low"
})
content.append({
"type": "input_text",
"text": prompt}
)
last_error = None
for attempt in range(self.max_retries + 1):
response = await self.client.responses.parse(
model=model,
input=[{"role": "user", "content": content}],
text_format=output_format
)
# Response 디버그 로깅
logger.debug(f"[ChatgptService({self.model_type})] attempt: {attempt}")
logger.debug(f"[ChatgptService({self.model_type})] Response ID: {response.id}")
logger.debug(f"[ChatgptService({self.model_type})] Response status: {response.status}")
logger.debug(f"[ChatgptService({self.model_type})] Response model: {response.model}")
# status 확인: completed, failed, incomplete, cancelled, queued, in_progress
if response.status == "completed":
logger.debug(f"[ChatgptService({self.model_type})] Response output_text: {response.output_text[:200]}..." if len(response.output_text) > 200 else f"[ChatgptService] Response output_text: {response.output_text}")
structured_output = response.output_parsed
return structured_output #.model_dump() or {}
# 에러 상태 처리
if response.status == "failed":
error_code = getattr(response.error, 'code', None) if response.error else None
error_message = getattr(response.error, 'message', None) if response.error else None
logger.warning(f"[ChatgptService({self.model_type})] Response failed (attempt {attempt + 1}/{self.max_retries + 1}): code={error_code}, message={error_message}")
last_error = ChatGPTResponseError(response.status, error_code, error_message)
elif response.status == "incomplete":
reason = getattr(response.incomplete_details, 'reason', None) if response.incomplete_details else None
logger.warning(f"[ChatgptService({self.model_type})] Response incomplete (attempt {attempt + 1}/{self.max_retries + 1}): reason={reason}")
last_error = ChatGPTResponseError(response.status, reason, f"Response incomplete: {reason}")
else:
# cancelled, queued, in_progress 등 예상치 못한 상태
logger.warning(f"[ChatgptService({self.model_type})] Unexpected response status (attempt {attempt + 1}/{self.max_retries + 1}): {response.status}")
last_error = ChatGPTResponseError(response.status, None, f"Unexpected status: {response.status}")
# 마지막 시도가 아니면 재시도
if attempt < self.max_retries:
logger.info(f"[ChatgptService({self.model_type})] Retrying request...")
# 모든 재시도 실패
logger.error(f"[ChatgptService({self.model_type})] All retries exhausted. Last error: {last_error}")
raise last_error
async def _call_pydantic_output_chat_completion( # alter version
self,
prompt : str,
output_format : BaseModel, #입력 output_format의 경우 Pydantic BaseModel Class를 상속한 Class 자체임에 유의할 것
model : str,
img_url : str,
image_detail_high : bool) -> BaseModel:
content = []
if img_url:
content.append({
"type": "image_url",
"image_url": {
"url": img_url,
"detail": "high" if image_detail_high else "low"
}
})
content.append({
"type": "text",
"text": prompt
})
last_error = None
for attempt in range(self.max_retries + 1):
response = await self.client.beta.chat.completions.parse(
model=model,
messages=[{"role": "user", "content": content}],
response_format=output_format
)
# Response 디버그 로깅
logger.debug(f"[ChatgptService({self.model_type})] attempt: {attempt}")
logger.debug(f"[ChatgptService({self.model_type})] Response ID: {response.id}")
logger.debug(f"[ChatgptService({self.model_type})] Response finish_reason: {response.id}")
logger.debug(f"[ChatgptService({self.model_type})] Response model: {response.model}")
choice = response.choices[0]
finish_reason = choice.finish_reason
if finish_reason == "stop":
output_text = choice.message.content or ""
logger.debug(f"[ChatgptService({self.model_type})] Response output_text: {output_text[:200]}..." if len(output_text) > 200 else f"[ChatgptService] Response output_text: {output_text}")
return choice.message.parsed
elif finish_reason == "length":
logger.warning(f"[ChatgptService({self.model_type})] Response incomplete - token limit reached (attempt {attempt + 1}/{self.max_retries + 1})")
last_error = ChatGPTResponseError("incomplete", finish_reason, "Response incomplete: max tokens reached")
elif finish_reason == "content_filter":
logger.warning(f"[ChatgptService({self.model_type})] Response blocked by content filter (attempt {attempt + 1}/{self.max_retries + 1})")
last_error = ChatGPTResponseError("failed", finish_reason, "Response blocked by content filter")
else:
logger.warning(f"[ChatgptService({self.model_type})] Unexpected finish_reason (attempt {attempt + 1}/{self.max_retries + 1}): {finish_reason}")
last_error = ChatGPTResponseError("failed", finish_reason, f"Unexpected finish_reason: {finish_reason}")
# 마지막 시도가 아니면 재시도
if attempt < self.max_retries:
logger.info(f"[ChatgptService({self.model_type})] Retrying request...")
# 모든 재시도 실패
logger.error(f"[ChatgptService({self.model_type})] All retries exhausted. Last error: {last_error}")
raise last_error
async def generate_structured_output(
self,
prompt : Prompt,
input_data : dict,
img_url : Optional[str] = None,
img_detail_high : bool = False,
silent : bool = False
) -> BaseModel:
prompt_text = prompt.build_prompt(input_data, silent)
logger.debug(f"[ChatgptService({self.model_type})] Generated Prompt (length: {len(prompt_text)})")
if not silent:
logger.info(f"[ChatgptService({self.model_type})] Starting GPT request with structured output with model: {prompt.prompt_model}")
# GPT API 호출
#parsed = await self._call_structured_output_with_response_gpt_api(prompt_text, prompt.prompt_output, prompt.prompt_model)
# parsed = await self._call_pydantic_output(prompt_text, prompt.prompt_output_class, prompt.prompt_model, img_url, img_detail_high)
parsed = await self._call_pydantic_output_chat_completion(prompt_text, prompt.prompt_output_class, prompt.prompt_model, img_url, img_detail_high)
return parsed

View File

@ -6,7 +6,7 @@ from typing import Literal, Any
import httpx
from app.utils.logger import get_logger
from app.utils.chatgpt_prompt import ChatgptService
from app.utils.prompts.chatgpt_prompt import ChatgptService
from app.utils.prompts.schemas import *
from app.utils.prompts.prompts import *

View File

@ -355,7 +355,7 @@ async def generate_video(
taged_image_list = taged_image_list,
music_url = music_url,
address = store_address,
duplicate = True,
duplicate = False,
)
logger.debug(f"[generate_video] Modifications created - task_id: {task_id}")

View File

@ -42,6 +42,7 @@ class ProjectSettings(BaseSettings):
class APIKeySettings(BaseSettings):
CHATGPT_API_KEY: str = Field(default="your-chatgpt-api-key") # 기본값 추가
GEMINI_API_KEY: str = Field(default="your-gemeni-api-key") # 기본값 추가
SUNO_API_KEY: str = Field(default="your-suno-api-key") # Suno API 키
SUNO_CALLBACK_URL: str = Field(
default="https://example.com/api/suno/callback"
@ -209,6 +210,14 @@ class RecoverySettings(BaseSettings):
# ============================================================
# ChatGPT API 설정
# ============================================================
LLM_TIMEOUT: float = Field(
default=600.0,
description="LLM Default API 타임아웃 (초)",
)
LLM_MAX_RETRIES: int = Field(
default=1,
description="LLM API 응답 실패 시 최대 재시도 횟수",
)
CHATGPT_TIMEOUT: float = Field(
default=600.0,
description="ChatGPT API 타임아웃 (초). OpenAI Python SDK 기본값: 600초 (10분)",
@ -217,7 +226,6 @@ class RecoverySettings(BaseSettings):
default=1,
description="ChatGPT API 응답 실패 시 최대 재시도 횟수",
)
# ============================================================
# Suno API 설정
# ============================================================