이미지 분류 및 슬롯 찾기 적용

image-tagging
jaehwang 2026-04-01 07:07:38 +00:00
parent 2c6faadcf2
commit ebf76a0f8f
10 changed files with 947 additions and 909 deletions

View File

@ -787,6 +787,8 @@ async def tag_images_if_not_exist(
if null_tags:
tag_datas = await autotag_images([img.img_url for img in null_tags])
print(tag_datas)
for tag, tag_data in zip(null_tags, tag_datas):
tag.img_tag = tag_data.model_dump(mode="json")

View File

@ -42,7 +42,7 @@ class GenerateLyricRequest(BaseModel):
"region": "군산",
"detail_region_info": "군산 신흥동 말랭이 마을",
"language": "Korean",
"m_id" : 1,
"m_id" : 2,
"orientation" : "vertical"
}
"""

View File

@ -169,7 +169,7 @@ async def generate_subtitle_background(
) -> None:
logger.info(f"[generate_subtitle_background] task_id: {task_id}, {orientation}")
creatomate_service = CreatomateService(orientation=orientation)
template = await creatomate_service.get_one_template_data_async(creatomate_service.template_id)
template = await creatomate_service.get_one_template_data(creatomate_service.template_id)
pitchings = creatomate_service.extract_text_format_from_template(template)
subtitle_generator = SubtitleContentsGenerator()

View File

@ -7,7 +7,7 @@ from sqlalchemy import Connection, text
from sqlalchemy.exc import SQLAlchemyError
from app.utils.logger import get_logger
from app.lyrics.schemas.lyrics_schema import (
from app.lyric.schemas.lyrics_schema import (
AttributeData,
PromptTemplateData,
SongFormData,

View File

@ -27,7 +27,20 @@ async def autotag_images(image_url_list : list[str]) -> list[dict]: #tag_list
"motion_recommended" : list(MotionRecommended)
}for image_url in image_url_list]
image_result_tasks = [chatgpt.generate_structured_output(image_autotag_prompt, image_input_data, image_input_data['img_url'], False) for image_input_data in image_input_data_list]
image_result_list = await asyncio.gather(*image_result_tasks)
image_result_tasks = [chatgpt.generate_structured_output(image_autotag_prompt, image_input_data, image_input_data['img_url'], False, silent = True) for image_input_data in image_input_data_list]
image_result_list = await asyncio.gather(*image_result_tasks, return_exceptions=True)
MAX_RETRY = 3 # 하드코딩, 어떻게 처리할지는 나중에
for _ in range(MAX_RETRY):
failed_idx = [i for i, r in enumerate(image_result_list) if isinstance(r, Exception)]
print("Failed", failed_idx)
if not failed_idx:
break
retried = await asyncio.gather(
*[chatgpt.generate_structured_output(image_autotag_prompt, image_input_data_list[i], image_input_data_list[i]['img_url'], False, silent=True) for i in failed],
return_exceptions=True
)
for i, result in zip(failed_idx, retried):
image_result_list[i] = result
print("Failed", failed_idx)
return image_result_list

View File

@ -101,11 +101,13 @@ class ChatgptService:
prompt : Prompt,
input_data : dict,
img_url : Optional[str] = None,
img_detail_high : bool = False
img_detail_high : bool = False,
silent : bool = False
) -> BaseModel:
prompt_text = prompt.build_prompt(input_data)
prompt_text = prompt.build_prompt(input_data, silent)
logger.debug(f"[ChatgptService] Generated Prompt (length: {len(prompt_text)})")
if not silent:
logger.info(f"[ChatgptService] Starting GPT request with structured output with model: {prompt.prompt_model}")
# GPT API 호출

View File

@ -31,11 +31,13 @@ response = await creatomate.make_creatomate_call(template_id, modifications)
import copy
import time
from enum import StrEnum
from typing import Literal
import httpx
from app.utils.logger import get_logger
from app.utils.prompts.schemas.image import SpaceType,Subject,Camera,MotionRecommended,NarrativePhase
from config import apikey_settings, creatomate_settings, recovery_settings
# 로거 설정
@ -226,8 +228,9 @@ DVST0003 = "e1fb5b00-1f02-4f63-99fa-7524b433ba47"
DHST0001 = "660be601-080a-43ea-bf0f-adcf4596fa98"
DHST0002 = "3f194cc7-464e-4581-9db2-179d42d3e40f"
DHST0003 = "f45df555-2956-4a13-9004-ead047070b3d"
DVST0001T = "fe11aeab-ff29-4bc8-9f75-c695c7e243e6"
HST_LIST = [DHST0001,DHST0002,DHST0003]
VST_LIST = [DVST0001,DVST0002,DVST0003]
VST_LIST = [DVST0001,DVST0002,DVST0003, DVST0001T]
SCENE_TRACK = 1
AUDIO_TRACK = 2
@ -238,7 +241,7 @@ def select_template(orientation:OrientationType):
if orientation == "horizontal":
return DHST0001
elif orientation == "vertical":
return DVST0001
return DVST0001T
else:
raise
@ -399,14 +402,6 @@ class CreatomateService:
return copy.deepcopy(data)
# 하위 호환성을 위한 별칭 (deprecated)
async def get_one_template_data_async(self, template_id: str) -> dict:
"""특정 템플릿 ID로 템플릿 정보를 조회합니다.
Deprecated: get_one_template_data() 사용하세요.
"""
return await self.get_one_template_data(template_id)
def parse_template_component_name(self, template_source: list) -> dict:
"""템플릿 정보를 파싱하여 리소스 이름을 추출합니다."""
@ -441,66 +436,73 @@ class CreatomateService:
return tag_list
async def template_matching_taged_image(
def template_matching_taged_image(
self,
template_id : str,
taged_image_list : list,
address : str
template : dict,
taged_image_list : list, # [{"image_name" : str , "image_tag" : dict}]
music_url: str,
address : str,
duplicate : bool = False
) -> list:
template_data = await self.get_one_template_data(template_id)
source_elements = template_data["source"]["elements"]
source_elements = template["source"]["elements"]
template_component_data = self.parse_template_component_name(source_elements)
modifications = {}
for idx, (template_component_name, template_type) in enumerate(template_component_data.items()):
for slot_idx, (template_component_name, template_type) in enumerate(template_component_data.items()):
match template_type:
case "image":
# modifications[template_component_name] = somethingtagedimage()
image_score_list = self.calculate_image_slot_score_multi(taged_image_list, template_component_name)
maximum_idx = image_score_list.index(max(image_score_list))
if duplicate:
selected = taged_image_list[maximum_idx]
else:
selected = taged_image_list.pop(maximum_idx)
image_name = selected["image_url"]
modifications[template_component_name] =image_name
pass
case "text":
if "address_input" in template_component_name:
modifications[template_component_name] = address
# modifications["audio-music"] = music_url
async def template_connect_resource_blackbox(
self,
template_id: str,
image_url_list: list[str],
music_url: str,
address: str = None
) -> dict:
"""템플릿 정보와 이미지/가사/음악 리소스를 매핑합니다.
Note:
- 이미지는 순차적으로 집어넣기
- 가사는 개행마다 텍스트 삽입
- Template에 audio-music 항목이 있어야
"""
template_data = await self.get_one_template_data(template_id)
template_component_data = self.parse_template_component_name(
template_data["source"]["elements"]
)
modifications = {}
for idx, (template_component_name, template_type) in enumerate(
template_component_data.items()
):
match template_type:
case "image":
modifications[template_component_name] = image_url_list[
idx % len(image_url_list)
]
case "text":
if "address_input" in template_component_name:
modifications[template_component_name] = address
modifications["audio-music"] = music_url
return modifications
def calculate_image_slot_score_multi(self, taged_image_list : list[dict], slot_name : str):
image_tag_list = [taged_image["image_tag"] for taged_image in taged_image_list]
slot_tag_dict = self.parse_slot_name_to_tag(slot_name)
image_score_list = [0] * len(image_tag_list)
for slot_tag_cate, slot_tag_item in slot_tag_dict.items():
if slot_tag_cate == "narrative_preference":
slot_tag_narrative = slot_tag_item
continue
for idx, image_tag in enumerate(image_tag_list):
if slot_tag_item.value in image_tag[slot_tag_cate]: #collect!
image_score_list[idx] += 1 / (len(image_tag) - 1)
for idx, image_tag in enumerate(image_tag_list):
image_narrative_score = image_tag["narrative_preference"][slot_tag_narrative]
image_score_list[idx] = image_score_list[idx] * image_narrative_score
return image_score_list
def parse_slot_name_to_tag(self, slot_name : str) -> dict[str, StrEnum]:
tag_list = slot_name.split("-")
space_type = SpaceType(tag_list[0])
subject = Subject(tag_list[1])
camera = Camera(tag_list[2])
motion = MotionRecommended(tag_list[3])
narrative = NarrativePhase(tag_list[4])
tag_dict = {
"space_type" : space_type,
"subject" : subject,
"camera" : camera,
"motion_recommended" : motion,
"narrative_preference" : narrative,
}
return tag_dict
def elements_connect_resource_blackbox(
self,
elements: list,
@ -700,14 +702,6 @@ class CreatomateService:
original_response={"last_error": str(last_error)},
)
# 하위 호환성을 위한 별칭 (deprecated)
async def make_creatomate_custom_call_async(self, source: dict) -> dict:
"""템플릿 없이 Creatomate에 커스텀 렌더링 요청을 보냅니다.
Deprecated: make_creatomate_custom_call() 사용하세요.
"""
return await self.make_creatomate_custom_call(source)
async def get_render_status(self, render_id: str) -> dict:
"""렌더링 작업의 상태를 조회합니다.
@ -731,14 +725,6 @@ class CreatomateService:
response.raise_for_status()
return response.json()
# 하위 호환성을 위한 별칭 (deprecated)
async def get_render_status_async(self, render_id: str) -> dict:
"""렌더링 작업의 상태를 조회합니다.
Deprecated: get_render_status() 사용하세요.
"""
return await self.get_render_status(render_id)
def calc_scene_duration(self, template: dict) -> float:
"""템플릿의 전체 장면 duration을 계산합니다."""
total_template_duration = 0.0

View File

@ -31,10 +31,11 @@ class Prompt():
return prompt_template
def build_prompt(self, input_data:dict) -> str:
def build_prompt(self, input_data:dict, silent:bool = False) -> str:
verified_input = self.prompt_input_class(**input_data)
build_template = self.prompt_template
build_template = build_template.format(**verified_input.model_dump())
if not silent:
logger.debug(f"build_template: {build_template}")
logger.debug(f"input_data: {input_data}")
return build_template

View File

@ -25,7 +25,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.database.session import get_session
from app.user.dependencies.auth import get_current_user
from app.user.models import User
from app.home.models import Image, Project, MarketingIntel
from app.home.models import Image, Project, MarketingIntel, ImageTag
from app.lyric.models import Lyric
from app.song.models import Song, SongTimestamp
from app.utils.creatomate import CreatomateService
@ -39,6 +39,7 @@ from app.video.schemas.video_schema import (
VideoRenderData,
)
from app.video.worker.video_task import download_and_upload_video_to_blob
from app.video.services.video import get_image_tags_by_task_id
from config import creatomate_settings
@ -337,17 +338,24 @@ async def generate_video(
)
# 6-1. 템플릿 조회 (비동기)
template = await creatomate_service.get_one_template_data_async(
template = await creatomate_service.get_one_template_data(
creatomate_service.template_id
)
logger.debug(f"[generate_video] Template fetched - task_id: {task_id}")
# 6-2. elements에서 리소스 매핑 생성
modifications = creatomate_service.elements_connect_resource_blackbox(
elements=template["source"]["elements"],
image_url_list=image_urls,
# modifications = creatomate_service.elements_connect_resource_blackbox(
# elements=template["source"]["elements"],
# image_url_list=image_urls,
# music_url=music_url,
# address=store_address
taged_image_list = await get_image_tags_by_task_id(task_id)
modifications = creatomate_service.template_matching_taged_image(
template = template,
taged_image_list = taged_image_list,
music_url = music_url,
address=store_address
address = store_address,
duplicate = True,
)
logger.debug(f"[generate_video] Modifications created - task_id: {task_id}")
@ -413,7 +421,7 @@ async def generate_video(
# f"[generate_video] final_template: {json.dumps(final_template, indent=2, ensure_ascii=False)}"
# )
# 6-5. 커스텀 렌더링 요청 (비동기)
render_response = await creatomate_service.make_creatomate_custom_call_async(
render_response = await creatomate_service.make_creatomate_custom_call(
final_template["source"],
)
@ -565,7 +573,7 @@ async def get_video_status(
)
try:
creatomate_service = CreatomateService()
result = await creatomate_service.get_render_status_async(creatomate_render_id)
result = await creatomate_service.get_render_status(creatomate_render_id)
logger.debug(
f"[get_video_status] Creatomate API response - creatomate_render_id: {creatomate_render_id}, status: {result.get('status')}"
)

File diff suppressed because it is too large Load Diff