이미지 분류 및 슬롯 찾기 적용

2026-04-01 07:07:38 +00:00 · 2026-04-01 07:07:38 +00:00 · ebf76a0f8f
parent 2c6faadcf2
commit ebf76a0f8f
10 changed files with 947 additions and 909 deletions
--- a/app/home/api/routers/v1/home.py
+++ b/app/home/api/routers/v1/home.py
@ -787,6 +787,8 @@ async def tag_images_if_not_exist(
        if null_tags:
            tag_datas = await autotag_images([img.img_url for img in null_tags])

+            print(tag_datas)
+
            for tag, tag_data in zip(null_tags, tag_datas):
                tag.img_tag = tag_data.model_dump(mode="json")

--- a/app/lyric/schemas/lyric.py
+++ b/app/lyric/schemas/lyric.py
@ -42,7 +42,7 @@ class GenerateLyricRequest(BaseModel):
            "region": "군산",
            "detail_region_info": "군산 신흥동 말랭이 마을",
            "language": "Korean",
-            "m_id" : 1,
+            "m_id" : 2,
            "orientation" : "vertical"
        }
    """
--- a/app/lyric/worker/lyric_task.py
+++ b/app/lyric/worker/lyric_task.py
@ -169,7 +169,7 @@ async def generate_subtitle_background(
 ) -> None:
    logger.info(f"[generate_subtitle_background] task_id: {task_id}, {orientation}")
    creatomate_service = CreatomateService(orientation=orientation)
-    template = await creatomate_service.get_one_template_data_async(creatomate_service.template_id)
+    template = await creatomate_service.get_one_template_data(creatomate_service.template_id)
    pitchings = creatomate_service.extract_text_format_from_template(template)

    subtitle_generator = SubtitleContentsGenerator()
--- a/app/song/services/song.py
+++ b/app/song/services/song.py
@ -7,7 +7,7 @@ from sqlalchemy import Connection, text
 from sqlalchemy.exc import SQLAlchemyError

 from app.utils.logger import get_logger
-from app.lyrics.schemas.lyrics_schema import (
+from app.lyric.schemas.lyrics_schema import (
    AttributeData,
    PromptTemplateData,
    SongFormData,
--- a/app/utils/autotag.py
+++ b/app/utils/autotag.py
@ -27,7 +27,20 @@ async def autotag_images(image_url_list : list[str]) -> list[dict]: #tag_list
        "motion_recommended" : list(MotionRecommended)
    }for image_url in image_url_list]
    
-    image_result_tasks = [chatgpt.generate_structured_output(image_autotag_prompt, image_input_data, image_input_data['img_url'], False) for image_input_data in image_input_data_list] 
-    image_result_list = await asyncio.gather(*image_result_tasks)
+    image_result_tasks = [chatgpt.generate_structured_output(image_autotag_prompt, image_input_data, image_input_data['img_url'], False, silent = True) for image_input_data in image_input_data_list] 
+    image_result_list = await asyncio.gather(*image_result_tasks, return_exceptions=True)
+    MAX_RETRY = 3 # 하드코딩, 어떻게 처리할지는 나중에
+    for _ in range(MAX_RETRY):
+        failed_idx = [i for i, r in enumerate(image_result_list) if isinstance(r, Exception)]
+        print("Failed", failed_idx)
+        if not failed_idx:
+            break
+        retried = await asyncio.gather(
+            *[chatgpt.generate_structured_output(image_autotag_prompt, image_input_data_list[i], image_input_data_list[i]['img_url'], False, silent=True) for i in failed],
+            return_exceptions=True
+        )
+        for i, result in zip(failed_idx, retried):
+            image_result_list[i] = result

+    print("Failed", failed_idx)
    return image_result_list
--- a/app/utils/chatgpt_prompt.py
+++ b/app/utils/chatgpt_prompt.py
@ -101,11 +101,13 @@ class ChatgptService:
        prompt : Prompt,
        input_data : dict,
        img_url : Optional[str] = None,
-        img_detail_high : bool = False
+        img_detail_high : bool = False,
+        silent : bool = False
    ) -> BaseModel:
-        prompt_text = prompt.build_prompt(input_data)
+        prompt_text = prompt.build_prompt(input_data, silent)
    
        logger.debug(f"[ChatgptService] Generated Prompt (length: {len(prompt_text)})")
+        if not silent:
            logger.info(f"[ChatgptService] Starting GPT request with structured output with model: {prompt.prompt_model}")

        # GPT API 호출
--- a/app/utils/creatomate.py
+++ b/app/utils/creatomate.py
@ -31,11 +31,13 @@ response = await creatomate.make_creatomate_call(template_id, modifications)

 import copy
 import time
+from enum import StrEnum
 from typing import Literal

 import httpx

 from app.utils.logger import get_logger
+from app.utils.prompts.schemas.image import SpaceType,Subject,Camera,MotionRecommended,NarrativePhase
 from config import apikey_settings, creatomate_settings, recovery_settings

 # 로거 설정
@ -226,8 +228,9 @@ DVST0003 = "e1fb5b00-1f02-4f63-99fa-7524b433ba47"
 DHST0001 = "660be601-080a-43ea-bf0f-adcf4596fa98"
 DHST0002 = "3f194cc7-464e-4581-9db2-179d42d3e40f"
 DHST0003 = "f45df555-2956-4a13-9004-ead047070b3d"
+DVST0001T = "fe11aeab-ff29-4bc8-9f75-c695c7e243e6"
 HST_LIST = [DHST0001,DHST0002,DHST0003]
-VST_LIST = [DVST0001,DVST0002,DVST0003]
+VST_LIST = [DVST0001,DVST0002,DVST0003, DVST0001T]

 SCENE_TRACK = 1
 AUDIO_TRACK = 2
@ -238,7 +241,7 @@ def select_template(orientation:OrientationType):
    if orientation == "horizontal":
        return DHST0001
    elif orientation == "vertical":
-        return DVST0001
+        return DVST0001T
    else:
        raise

@ -399,14 +402,6 @@ class CreatomateService:

        return copy.deepcopy(data)

-    # 하위 호환성을 위한 별칭 (deprecated)
-    async def get_one_template_data_async(self, template_id: str) -> dict:
-        """특정 템플릿 ID로 템플릿 정보를 조회합니다.
-
-        Deprecated: get_one_template_data()를 사용하세요.
-        """
-        return await self.get_one_template_data(template_id)
-
    def parse_template_component_name(self, template_source: list) -> dict:
        """템플릿 정보를 파싱하여 리소스 이름을 추출합니다."""

@ -441,66 +436,73 @@ class CreatomateService:
        return tag_list
    
        
-    async def template_matching_taged_image(
+    def template_matching_taged_image(
        self,
-        template_id : str,
-        taged_image_list : list,
-        address : str
+        template : dict,
+        taged_image_list : list, # [{"image_name" : str , "image_tag" : dict}]
+        music_url: str,
+        address : str,
+        duplicate : bool = False
    ) -> list:
-        
-        template_data = await self.get_one_template_data(template_id)
-        source_elements = template_data["source"]["elements"]
+        source_elements = template["source"]["elements"]
        template_component_data = self.parse_template_component_name(source_elements)

        modifications = {}

-        for idx, (template_component_name, template_type) in enumerate(template_component_data.items()):
+        for slot_idx, (template_component_name, template_type) in enumerate(template_component_data.items()):
            match template_type:
                case "image":
-                    # modifications[template_component_name] = somethingtagedimage()
+                    image_score_list = self.calculate_image_slot_score_multi(taged_image_list, template_component_name)
+                    maximum_idx = image_score_list.index(max(image_score_list))
+                    if duplicate:
+                        selected = taged_image_list[maximum_idx]
+                    else:
+                        selected = taged_image_list.pop(maximum_idx)
+                    image_name = selected["image_url"]
+                    modifications[template_component_name] =image_name
                    pass
                case "text":
                    if "address_input" in template_component_name:
                        modifications[template_component_name] = address

-        # modifications["audio-music"] = music_url
-
-    async def template_connect_resource_blackbox(
-        self,
-        template_id: str,
-        image_url_list: list[str],
-        music_url: str,
-        address: str = None
-    ) -> dict:
-        """템플릿 정보와 이미지/가사/음악 리소스를 매핑합니다.
-
-        Note:
-            - 이미지는 순차적으로 집어넣기
-            - 가사는 개행마다 한 텍스트 삽입
-            - Template에 audio-music 항목이 있어야 함
-        """
-        template_data = await self.get_one_template_data(template_id)
-        template_component_data = self.parse_template_component_name(
-            template_data["source"]["elements"]
-        )
-        modifications = {}
-
-        for idx, (template_component_name, template_type) in enumerate(
-            template_component_data.items()
-        ):
-            match template_type:
-                case "image":
-                    modifications[template_component_name] = image_url_list[
-                        idx % len(image_url_list)
-                    ]
-                case "text":
-                    if "address_input" in template_component_name:
-                        modifications[template_component_name] = address
-
        modifications["audio-music"] = music_url
-
        return modifications
                
+    def calculate_image_slot_score_multi(self, taged_image_list : list[dict], slot_name : str):
+        image_tag_list = [taged_image["image_tag"] for taged_image in taged_image_list]
+        slot_tag_dict = self.parse_slot_name_to_tag(slot_name)
+        image_score_list = [0] * len(image_tag_list)
+        
+        for slot_tag_cate, slot_tag_item in slot_tag_dict.items():
+            if slot_tag_cate == "narrative_preference":
+                slot_tag_narrative = slot_tag_item
+                continue
+            for idx, image_tag in enumerate(image_tag_list):
+                if slot_tag_item.value in image_tag[slot_tag_cate]: #collect!
+                    image_score_list[idx] += 1 / (len(image_tag) - 1)
+
+        for idx, image_tag in enumerate(image_tag_list):
+            image_narrative_score = image_tag["narrative_preference"][slot_tag_narrative]
+            image_score_list[idx] = image_score_list[idx] * image_narrative_score
+
+        return image_score_list
+    
+    def parse_slot_name_to_tag(self, slot_name : str) -> dict[str, StrEnum]:
+        tag_list = slot_name.split("-")
+        space_type = SpaceType(tag_list[0])
+        subject = Subject(tag_list[1])
+        camera = Camera(tag_list[2])
+        motion = MotionRecommended(tag_list[3])
+        narrative = NarrativePhase(tag_list[4])
+        tag_dict = {
+            "space_type" : space_type,
+            "subject" : subject,
+            "camera" : camera,
+            "motion_recommended" : motion,
+            "narrative_preference" : narrative,
+        }
+        return tag_dict
+
    def elements_connect_resource_blackbox(
        self,
        elements: list,
@ -700,14 +702,6 @@ class CreatomateService:
            original_response={"last_error": str(last_error)},
        )

-    # 하위 호환성을 위한 별칭 (deprecated)
-    async def make_creatomate_custom_call_async(self, source: dict) -> dict:
-        """템플릿 없이 Creatomate에 커스텀 렌더링 요청을 보냅니다.
-
-        Deprecated: make_creatomate_custom_call()을 사용하세요.
-        """
-        return await self.make_creatomate_custom_call(source)
-
    async def get_render_status(self, render_id: str) -> dict:
        """렌더링 작업의 상태를 조회합니다.

@ -731,14 +725,6 @@ class CreatomateService:
        response.raise_for_status()
        return response.json()

-    # 하위 호환성을 위한 별칭 (deprecated)
-    async def get_render_status_async(self, render_id: str) -> dict:
-        """렌더링 작업의 상태를 조회합니다.
-
-        Deprecated: get_render_status()를 사용하세요.
-        """
-        return await self.get_render_status(render_id)
-
    def calc_scene_duration(self, template: dict) -> float:
        """템플릿의 전체 장면 duration을 계산합니다."""
        total_template_duration = 0.0
--- a/app/utils/prompts/prompts.py
+++ b/app/utils/prompts/prompts.py
@ -31,10 +31,11 @@ class Prompt():

        return prompt_template

-    def build_prompt(self, input_data:dict) -> str:
+    def build_prompt(self, input_data:dict, silent:bool = False) -> str:
        verified_input = self.prompt_input_class(**input_data)
        build_template = self.prompt_template
        build_template = build_template.format(**verified_input.model_dump())
+        if not silent:
            logger.debug(f"build_template: {build_template}")
            logger.debug(f"input_data: {input_data}")
        return build_template
--- a/app/video/api/routers/v1/video.py
+++ b/app/video/api/routers/v1/video.py
@ -25,7 +25,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.database.session import get_session
 from app.user.dependencies.auth import get_current_user
 from app.user.models import User
-from app.home.models import Image, Project, MarketingIntel
+from app.home.models import Image, Project, MarketingIntel, ImageTag
 from app.lyric.models import Lyric
 from app.song.models import Song, SongTimestamp
 from app.utils.creatomate import CreatomateService
@ -39,6 +39,7 @@ from app.video.schemas.video_schema import (
    VideoRenderData,
 )
 from app.video.worker.video_task import download_and_upload_video_to_blob
+from app.video.services.video import get_image_tags_by_task_id

 from config import creatomate_settings

@ -337,17 +338,24 @@ async def generate_video(
        )

        # 6-1. 템플릿 조회 (비동기)
-        template = await creatomate_service.get_one_template_data_async(
+        template = await creatomate_service.get_one_template_data(
            creatomate_service.template_id
        )
        logger.debug(f"[generate_video] Template fetched - task_id: {task_id}")

        # 6-2. elements에서 리소스 매핑 생성
-        modifications = creatomate_service.elements_connect_resource_blackbox(
-            elements=template["source"]["elements"],
-            image_url_list=image_urls,
+        # modifications = creatomate_service.elements_connect_resource_blackbox(
+        #     elements=template["source"]["elements"],
+        #     image_url_list=image_urls,
+        #     music_url=music_url,
+        #     address=store_address
+        taged_image_list = await get_image_tags_by_task_id(task_id)
+        modifications = creatomate_service.template_matching_taged_image(
+            template = template,
+            taged_image_list = taged_image_list,
            music_url = music_url, 
-            address=store_address
+            address = store_address,
+            duplicate = True,
        )
        logger.debug(f"[generate_video] Modifications created - task_id: {task_id}")

@ -413,7 +421,7 @@ async def generate_video(
        #     f"[generate_video] final_template: {json.dumps(final_template, indent=2, ensure_ascii=False)}"
        # )
        # 6-5. 커스텀 렌더링 요청 (비동기)
-        render_response = await creatomate_service.make_creatomate_custom_call_async(
+        render_response = await creatomate_service.make_creatomate_custom_call(
            final_template["source"],
        )
        
@ -565,7 +573,7 @@ async def get_video_status(
    )
    try:
        creatomate_service = CreatomateService()
-        result = await creatomate_service.get_render_status_async(creatomate_render_id)
+        result = await creatomate_service.get_render_status(creatomate_render_id)
        logger.debug(
            f"[get_video_status] Creatomate API response - creatomate_render_id: {creatomate_render_id}, status: {result.get('status')}"
        )
--- a/app/video/services/video.py
+++ b/app/video/services/video.py