diff --git a/app/core/common.py b/app/core/common.py index 520fab4..85cb08d 100644 --- a/app/core/common.py +++ b/app/core/common.py @@ -5,7 +5,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI from app.utils.logger import get_logger - +from app.utils.nvMapPwScraper import NvMapPwScraper logger = get_logger("core") @@ -24,6 +24,7 @@ async def lifespan(app: FastAPI): await create_db_tables() logger.info("Database tables created (DEBUG mode)") + await NvMapPwScraper.initiate_scraper() except asyncio.TimeoutError: logger.error("Database initialization timed out") # 타임아웃 시 앱 시작 중단하려면 raise, 계속하려면 pass diff --git a/app/database/session.py b/app/database/session.py index 8e35772..af942d0 100644 --- a/app/database/session.py +++ b/app/database/session.py @@ -92,18 +92,18 @@ async def get_session() -> AsyncGenerator[AsyncSession, None]: pool = engine.pool # 커넥션 풀 상태 로깅 (디버깅용) - logger.debug( - f"[get_session] ACQUIRE - pool_size: {pool.size()}, " - f"in: {pool.checkedin()}, out: {pool.checkedout()}, " - f"overflow: {pool.overflow()}" - ) + # logger.debug( + # f"[get_session] ACQUIRE - pool_size: {pool.size()}, " + # f"in: {pool.checkedin()}, out: {pool.checkedout()}, " + # f"overflow: {pool.overflow()}" + # ) async with AsyncSessionLocal() as session: acquire_time = time.perf_counter() - logger.debug( - f"[get_session] Session acquired in " - f"{(acquire_time - start_time)*1000:.1f}ms" - ) + # logger.debug( + # f"[get_session] Session acquired in " + # f"{(acquire_time - start_time)*1000:.1f}ms" + # ) try: yield session except Exception as e: @@ -115,10 +115,10 @@ async def get_session() -> AsyncGenerator[AsyncSession, None]: raise e finally: total_time = time.perf_counter() - start_time - logger.debug( - f"[get_session] RELEASE - duration: {total_time*1000:.1f}ms, " - f"pool_out: {pool.checkedout()}" - ) + # logger.debug( + # f"[get_session] RELEASE - duration: {total_time*1000:.1f}ms, " + # f"pool_out: {pool.checkedout()}" + # ) # 백그라운드 태스크용 세션 제너레이터 @@ -126,18 +126,18 @@ async def get_background_session() -> AsyncGenerator[AsyncSession, None]: start_time = time.perf_counter() pool = background_engine.pool - logger.debug( - f"[get_background_session] ACQUIRE - pool_size: {pool.size()}, " - f"in: {pool.checkedin()}, out: {pool.checkedout()}, " - f"overflow: {pool.overflow()}" - ) + # logger.debug( + # f"[get_background_session] ACQUIRE - pool_size: {pool.size()}, " + # f"in: {pool.checkedin()}, out: {pool.checkedout()}, " + # f"overflow: {pool.overflow()}" + # ) async with BackgroundSessionLocal() as session: acquire_time = time.perf_counter() - logger.debug( - f"[get_background_session] Session acquired in " - f"{(acquire_time - start_time)*1000:.1f}ms" - ) + # logger.debug( + # f"[get_background_session] Session acquired in " + # f"{(acquire_time - start_time)*1000:.1f}ms" + # ) try: yield session except Exception as e: @@ -150,11 +150,11 @@ async def get_background_session() -> AsyncGenerator[AsyncSession, None]: raise e finally: total_time = time.perf_counter() - start_time - logger.debug( - f"[get_background_session] RELEASE - " - f"duration: {total_time*1000:.1f}ms, " - f"pool_out: {pool.checkedout()}" - ) + # logger.debug( + # f"[get_background_session] RELEASE - " + # f"duration: {total_time*1000:.1f}ms, " + # f"pool_out: {pool.checkedout()}" + # ) # 앱 종료 시 엔진 리소스 정리 함수 diff --git a/app/home/api/routers/v1/home.py b/app/home/api/routers/v1/home.py index 489bf2d..9f4ceec 100644 --- a/app/home/api/routers/v1/home.py +++ b/app/home/api/routers/v1/home.py @@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.database.session import get_session, AsyncSessionLocal from app.home.models import Image from app.home.schemas.home_schema import ( + AutoCompleteRequest, CrawlingRequest, CrawlingResponse, ErrorResponse, @@ -27,6 +28,7 @@ from app.utils.chatgpt_prompt import ChatgptService from app.utils.common import generate_task_id from app.utils.logger import get_logger from app.utils.nvMapScraper import NvMapScraper, GraphQLException +from app.utils.nvMapPwScraper import NvMapPwScraper from app.utils.prompts.prompts import marketing_prompt from config import MEDIA_ROOT @@ -105,17 +107,54 @@ def _extract_region_from_address(road_address: str | None) -> str: tags=["Crawling"], ) async def crawling(request_body: CrawlingRequest): - """네이버 지도 장소 크롤링""" + return await _crawling_logic(request_body.url) + +@router.post( + "/autocomplete", + summary="네이버 자동완성 크롤링", + description=""" +네이버 검색 API 정보를 활용하여 Place ID를 추출한 뒤 자동으로 크롤링합니다. + +## 요청 필드 +- **title**: 네이버 검색 API Place 결과물 title (필수) +- **address**: 네이버 검색 API Place 결과물 지번주소 (필수) +- **roadAddress**:네이버 검색 API Place 결과물 도로명주소 + +## 반환 정보 +- **image_list**: 장소 이미지 URL 목록 +- **image_count**: 이미지 개수 +- **processed_info**: 가공된 장소 정보 (customer_name, region, detail_region_info) + """, + response_model=CrawlingResponse, + response_description="크롤링 결과", + responses={ + 200: {"description": "크롤링 성공", "model": CrawlingResponse}, + 400: { + "description": "잘못된 URL", + "model": ErrorResponse, + }, + 502: { + "description": "크롤링 실패", + "model": ErrorResponse, + }, + }, + tags=["Crawling"], +) +async def autocomplete_crawling(request_body: AutoCompleteRequest): + url = await _autocomplete_logic(request_body.dict()) + return await _crawling_logic(url) + +async def _crawling_logic(url:str): request_start = time.perf_counter() logger.info("[crawling] ========== START ==========") - logger.info(f"[crawling] URL: {request_body.url[:80]}...") + logger.info(f"[crawling] URL: {url[:80]}...") # ========== Step 1: 네이버 지도 크롤링 ========== step1_start = time.perf_counter() logger.info("[crawling] Step 1: 네이버 지도 크롤링 시작...") try: - scraper = NvMapScraper(request_body.url) + scraper = NvMapScraper(url) await scraper.scrap() except GraphQLException as e: step1_elapsed = (time.perf_counter() - step1_start) * 1000 @@ -288,6 +327,23 @@ async def crawling(request_body: CrawlingRequest): } +async def _autocomplete_logic(autocomplete_item:dict): + step1_start = time.perf_counter() + try: + async with NvMapPwScraper() as pw_scraper: + new_url = await pw_scraper.get_place_id_url(autocomplete_item) + except Exception as e: + step1_elapsed = (time.perf_counter() - step1_start) * 1000 + logger.error( + f"[crawling] Autocomplete FAILED - 자동완성 예기치 않은 오류: {e} ({step1_elapsed:.1f}ms)" + ) + logger.exception("[crawling] Autocomplete 상세 오류:") + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail="자동완성 place id 추출 실패", + ) + return new_url + def _extract_image_name(url: str, index: int) -> str: """URL에서 이미지 이름 추출 또는 기본 이름 생성""" try: diff --git a/app/home/schemas/home_schema.py b/app/home/schemas/home_schema.py index 249c8de..069aa6b 100644 --- a/app/home/schemas/home_schema.py +++ b/app/home/schemas/home_schema.py @@ -122,6 +122,22 @@ class CrawlingRequest(BaseModel): url: str = Field(..., description="네이버 지도 장소 URL") +class AutoCompleteRequest(BaseModel): + """자동완성 요청 스키마""" + + model_config = ConfigDict( + json_schema_extra={ + "example": { + 'title': '스테이,머뭄', + 'address': '전북특별자치도 군산시 신흥동 63-18', + 'roadAddress': '전북특별자치도 군산시 절골길 18', + } + } + ) + + title: str = Field(..., description="네이버 검색 place API Title") + address: str = Field(..., description="네이버 검색 place API 지번주소") + roadAddress: Optional[str] = Field(None, description="네이버 검색 place API 도로명주소") class ProcessedInfo(BaseModel): """가공된 장소 정보 스키마""" diff --git a/app/song/api/routers/v1/song.py b/app/song/api/routers/v1/song.py index 6362a00..13b39f3 100644 --- a/app/song/api/routers/v1/song.py +++ b/app/song/api/routers/v1/song.py @@ -392,7 +392,7 @@ async def get_song_status( # song_id로 Song 조회 song_result = await session.execute( select(Song) - .where(Song.suno_task_id == suno_task_id) + .where(Song.suno_task_id == song_id) .order_by(Song.created_at.desc()) .limit(1) ) @@ -418,7 +418,7 @@ async def get_song_status( # 백그라운드 태스크로 MP3 다운로드 및 Blob 업로드 실행 background_tasks.add_task( download_and_upload_song_by_suno_task_id, - suno_task_id=suno_task_id, + suno_task_id=song_id, audio_url=audio_url, store_name=store_name, duration=clip_duration, diff --git a/app/utils/nvMapPwScraper.py b/app/utils/nvMapPwScraper.py index d724764..6b3f2a6 100644 --- a/app/utils/nvMapPwScraper.py +++ b/app/utils/nvMapPwScraper.py @@ -2,7 +2,7 @@ import asyncio from playwright.async_api import async_playwright from urllib import parse -class nvMapPwScraper(): +class NvMapPwScraper(): # cls vars is_ready = False _playwright = None @@ -107,7 +107,7 @@ patchedGetter.toString();''') if "/place/" in self.page.url: return self.page.url - if (count == self._max_retry / 2): - raise Exception("Failed to identify place id. loading timeout") - else: - raise Exception("Failed to identify place id. item is ambiguous") + # if (count == self._max_retry / 2): + # raise Exception("Failed to identify place id. loading timeout") + # else: + # raise Exception("Failed to identify place id. item is ambiguous") diff --git a/app/utils/suno.py b/app/utils/suno.py index cfcd530..663b813 100644 --- a/app/utils/suno.py +++ b/app/utils/suno.py @@ -231,7 +231,7 @@ class SunoService: ) code = result.get("code", 0) - data = result.get("data", {}) + data = result.get("data") or {} if code != 200: return PollingSongResponse( diff --git a/pyproject.toml b/pyproject.toml index 9a0be33..d5e984d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "fastapi-cli>=0.0.16", "fastapi[standard]>=0.125.0", "openai>=2.13.0", + "playwright>=1.57.0", "pydantic-settings>=2.12.0", "python-jose[cryptography]>=3.5.0", "python-multipart>=0.0.21", diff --git a/uv.lock b/uv.lock index b2feb3a..9c9dcaf 100644 --- a/uv.lock +++ b/uv.lock @@ -716,6 +716,7 @@ dependencies = [ { name = "fastapi", extra = ["standard"] }, { name = "fastapi-cli" }, { name = "openai" }, + { name = "playwright" }, { name = "pydantic-settings" }, { name = "python-jose", extra = ["cryptography"] }, { name = "python-multipart" }, @@ -743,6 +744,7 @@ requires-dist = [ { name = "fastapi", extras = ["standard"], specifier = ">=0.125.0" }, { name = "fastapi-cli", specifier = ">=0.0.16" }, { name = "openai", specifier = ">=2.13.0" }, + { name = "playwright", specifier = ">=1.57.0" }, { name = "pydantic-settings", specifier = ">=2.12.0" }, { name = "python-jose", extras = ["cryptography"], specifier = ">=3.5.0" }, { name = "python-multipart", specifier = ">=0.0.21" }, @@ -788,6 +790,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, ] +[[package]] +name = "playwright" +version = "1.57.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet" }, + { name = "pyee" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/b6/e17543cea8290ae4dced10be21d5a43c360096aa2cce0aa7039e60c50df3/playwright-1.57.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:9351c1ac3dfd9b3820fe7fc4340d96c0d3736bb68097b9b7a69bd45d25e9370c", size = 41985039, upload-time = "2025-12-09T08:06:18.408Z" }, + { url = "https://files.pythonhosted.org/packages/8b/04/ef95b67e1ff59c080b2effd1a9a96984d6953f667c91dfe9d77c838fc956/playwright-1.57.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4a9d65027bce48eeba842408bcc1421502dfd7e41e28d207e94260fa93ca67e", size = 40775575, upload-time = "2025-12-09T08:06:22.105Z" }, + { url = "https://files.pythonhosted.org/packages/60/bd/5563850322a663956c927eefcf1457d12917e8f118c214410e815f2147d1/playwright-1.57.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:99104771abc4eafee48f47dac2369e0015516dc1ce8c409807d2dd440828b9a4", size = 41985042, upload-time = "2025-12-09T08:06:25.357Z" }, + { url = "https://files.pythonhosted.org/packages/56/61/3a803cb5ae0321715bfd5247ea871d25b32c8f372aeb70550a90c5f586df/playwright-1.57.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:284ed5a706b7c389a06caa431b2f0ba9ac4130113c3a779767dda758c2497bb1", size = 45975252, upload-time = "2025-12-09T08:06:29.186Z" }, + { url = "https://files.pythonhosted.org/packages/83/d7/b72eb59dfbea0013a7f9731878df8c670f5f35318cedb010c8a30292c118/playwright-1.57.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a1bae6c0a07839cdeaddbc0756b3b2b85e476c07945f64ece08f1f956a86f1", size = 45706917, upload-time = "2025-12-09T08:06:32.549Z" }, + { url = "https://files.pythonhosted.org/packages/e4/09/3fc9ebd7c95ee54ba6a68d5c0bc23e449f7235f4603fc60534a364934c16/playwright-1.57.0-py3-none-win32.whl", hash = "sha256:1dd93b265688da46e91ecb0606d36f777f8eadcf7fbef12f6426b20bf0c9137c", size = 36553860, upload-time = "2025-12-09T08:06:35.864Z" }, + { url = "https://files.pythonhosted.org/packages/58/d4/dcdfd2a33096aeda6ca0d15584800443dd2be64becca8f315634044b135b/playwright-1.57.0-py3-none-win_amd64.whl", hash = "sha256:6caefb08ed2c6f29d33b8088d05d09376946e49a73be19271c8cd5384b82b14c", size = 36553864, upload-time = "2025-12-09T08:06:38.915Z" }, + { url = "https://files.pythonhosted.org/packages/6a/60/fe31d7e6b8907789dcb0584f88be741ba388413e4fbce35f1eba4e3073de/playwright-1.57.0-py3-none-win_arm64.whl", hash = "sha256:5f065f5a133dbc15e6e7c71e7bc04f258195755b1c32a432b792e28338c8335e", size = 32837940, upload-time = "2025-12-09T08:06:42.268Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -940,6 +961,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, ] +[[package]] +name = "pyee" +version = "13.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37", size = 31250, upload-time = "2025-03-17T18:53:15.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" }, +] + [[package]] name = "pygments" version = "2.19.2"