Compare commits

..

7 Commits

Author SHA1 Message Date
jaehwang 2e9a43263f 자동완성 scalar docs 일부 수정 2026-01-27 00:43:46 +00:00
jaehwang 3039a65ee4 pw 종속성 추가 및 버그 수정, *주의: docker compose 파일 변경됨 2026-01-27 00:06:37 +00:00
jaehwang f29ac29649 자동완성 기능 추가 2026-01-26 16:59:13 +09:00
jaehwang fc88eedfa2 fix forgoten merge conflict 2026-01-26 15:10:13 +09:00
jaehwang 72dcd09771 update main 2026-01-26 15:07:56 +09:00
Dohyun Lim f6da65044a update code 2026-01-22 11:57:56 +09:00
Dohyun Lim 4a06bfdde4 modify song process flow 2026-01-22 11:43:13 +09:00
9 changed files with 146 additions and 39 deletions

View File

@ -5,7 +5,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.utils.logger import get_logger
from app.utils.nvMapPwScraper import NvMapPwScraper
logger = get_logger("core")
@ -24,6 +24,7 @@ async def lifespan(app: FastAPI):
await create_db_tables()
logger.info("Database tables created (DEBUG mode)")
await NvMapPwScraper.initiate_scraper()
except asyncio.TimeoutError:
logger.error("Database initialization timed out")
# 타임아웃 시 앱 시작 중단하려면 raise, 계속하려면 pass

View File

@ -92,18 +92,18 @@ async def get_session() -> AsyncGenerator[AsyncSession, None]:
pool = engine.pool
# 커넥션 풀 상태 로깅 (디버깅용)
logger.debug(
f"[get_session] ACQUIRE - pool_size: {pool.size()}, "
f"in: {pool.checkedin()}, out: {pool.checkedout()}, "
f"overflow: {pool.overflow()}"
)
# logger.debug(
# f"[get_session] ACQUIRE - pool_size: {pool.size()}, "
# f"in: {pool.checkedin()}, out: {pool.checkedout()}, "
# f"overflow: {pool.overflow()}"
# )
async with AsyncSessionLocal() as session:
acquire_time = time.perf_counter()
logger.debug(
f"[get_session] Session acquired in "
f"{(acquire_time - start_time)*1000:.1f}ms"
)
# logger.debug(
# f"[get_session] Session acquired in "
# f"{(acquire_time - start_time)*1000:.1f}ms"
# )
try:
yield session
except Exception as e:
@ -115,10 +115,10 @@ async def get_session() -> AsyncGenerator[AsyncSession, None]:
raise e
finally:
total_time = time.perf_counter() - start_time
logger.debug(
f"[get_session] RELEASE - duration: {total_time*1000:.1f}ms, "
f"pool_out: {pool.checkedout()}"
)
# logger.debug(
# f"[get_session] RELEASE - duration: {total_time*1000:.1f}ms, "
# f"pool_out: {pool.checkedout()}"
# )
# 백그라운드 태스크용 세션 제너레이터
@ -126,18 +126,18 @@ async def get_background_session() -> AsyncGenerator[AsyncSession, None]:
start_time = time.perf_counter()
pool = background_engine.pool
logger.debug(
f"[get_background_session] ACQUIRE - pool_size: {pool.size()}, "
f"in: {pool.checkedin()}, out: {pool.checkedout()}, "
f"overflow: {pool.overflow()}"
)
# logger.debug(
# f"[get_background_session] ACQUIRE - pool_size: {pool.size()}, "
# f"in: {pool.checkedin()}, out: {pool.checkedout()}, "
# f"overflow: {pool.overflow()}"
# )
async with BackgroundSessionLocal() as session:
acquire_time = time.perf_counter()
logger.debug(
f"[get_background_session] Session acquired in "
f"{(acquire_time - start_time)*1000:.1f}ms"
)
# logger.debug(
# f"[get_background_session] Session acquired in "
# f"{(acquire_time - start_time)*1000:.1f}ms"
# )
try:
yield session
except Exception as e:
@ -150,11 +150,11 @@ async def get_background_session() -> AsyncGenerator[AsyncSession, None]:
raise e
finally:
total_time = time.perf_counter() - start_time
logger.debug(
f"[get_background_session] RELEASE - "
f"duration: {total_time*1000:.1f}ms, "
f"pool_out: {pool.checkedout()}"
)
# logger.debug(
# f"[get_background_session] RELEASE - "
# f"duration: {total_time*1000:.1f}ms, "
# f"pool_out: {pool.checkedout()}"
# )
# 앱 종료 시 엔진 리소스 정리 함수

View File

@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.database.session import get_session, AsyncSessionLocal
from app.home.models import Image
from app.home.schemas.home_schema import (
AutoCompleteRequest,
CrawlingRequest,
CrawlingResponse,
ErrorResponse,
@ -27,6 +28,7 @@ from app.utils.chatgpt_prompt import ChatgptService
from app.utils.common import generate_task_id
from app.utils.logger import get_logger
from app.utils.nvMapScraper import NvMapScraper, GraphQLException
from app.utils.nvMapPwScraper import NvMapPwScraper
from app.utils.prompts.prompts import marketing_prompt
from config import MEDIA_ROOT
@ -105,17 +107,54 @@ def _extract_region_from_address(road_address: str | None) -> str:
tags=["Crawling"],
)
async def crawling(request_body: CrawlingRequest):
"""네이버 지도 장소 크롤링"""
return await _crawling_logic(request_body.url)
@router.post(
"/autocomplete",
summary="네이버 자동완성 크롤링",
description="""
네이버 검색 API 정보를 활용하여 Place ID를 추출한 자동으로 크롤링합니다.
## 요청 필드
- **title**: 네이버 검색 API Place 결과물 title (필수)
- **address**: 네이버 검색 API Place 결과물 지번주소 (필수)
- **roadAddress**:네이버 검색 API Place 결과물 도로명주소
## 반환 정보
- **image_list**: 장소 이미지 URL 목록
- **image_count**: 이미지 개수
- **processed_info**: 가공된 장소 정보 (customer_name, region, detail_region_info)
""",
response_model=CrawlingResponse,
response_description="크롤링 결과",
responses={
200: {"description": "크롤링 성공", "model": CrawlingResponse},
400: {
"description": "잘못된 URL",
"model": ErrorResponse,
},
502: {
"description": "크롤링 실패",
"model": ErrorResponse,
},
},
tags=["Crawling"],
)
async def autocomplete_crawling(request_body: AutoCompleteRequest):
url = await _autocomplete_logic(request_body.dict())
return await _crawling_logic(url)
async def _crawling_logic(url:str):
request_start = time.perf_counter()
logger.info("[crawling] ========== START ==========")
logger.info(f"[crawling] URL: {request_body.url[:80]}...")
logger.info(f"[crawling] URL: {url[:80]}...")
# ========== Step 1: 네이버 지도 크롤링 ==========
step1_start = time.perf_counter()
logger.info("[crawling] Step 1: 네이버 지도 크롤링 시작...")
try:
scraper = NvMapScraper(request_body.url)
scraper = NvMapScraper(url)
await scraper.scrap()
except GraphQLException as e:
step1_elapsed = (time.perf_counter() - step1_start) * 1000
@ -288,6 +327,23 @@ async def crawling(request_body: CrawlingRequest):
}
async def _autocomplete_logic(autocomplete_item:dict):
step1_start = time.perf_counter()
try:
async with NvMapPwScraper() as pw_scraper:
new_url = await pw_scraper.get_place_id_url(autocomplete_item)
except Exception as e:
step1_elapsed = (time.perf_counter() - step1_start) * 1000
logger.error(
f"[crawling] Autocomplete FAILED - 자동완성 예기치 않은 오류: {e} ({step1_elapsed:.1f}ms)"
)
logger.exception("[crawling] Autocomplete 상세 오류:")
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail="자동완성 place id 추출 실패",
)
return new_url
def _extract_image_name(url: str, index: int) -> str:
"""URL에서 이미지 이름 추출 또는 기본 이름 생성"""
try:

View File

@ -122,6 +122,22 @@ class CrawlingRequest(BaseModel):
url: str = Field(..., description="네이버 지도 장소 URL")
class AutoCompleteRequest(BaseModel):
"""자동완성 요청 스키마"""
model_config = ConfigDict(
json_schema_extra={
"example": {
'title': '<b>스테이</b>,<b>머뭄</b>',
'address': '전북특별자치도 군산시 신흥동 63-18',
'roadAddress': '전북특별자치도 군산시 절골길 18',
}
}
)
title: str = Field(..., description="네이버 검색 place API Title")
address: str = Field(..., description="네이버 검색 place API 지번주소")
roadAddress: Optional[str] = Field(None, description="네이버 검색 place API 도로명주소")
class ProcessedInfo(BaseModel):
"""가공된 장소 정보 스키마"""

View File

@ -392,7 +392,7 @@ async def get_song_status(
# song_id로 Song 조회
song_result = await session.execute(
select(Song)
.where(Song.suno_task_id == suno_task_id)
.where(Song.suno_task_id == song_id)
.order_by(Song.created_at.desc())
.limit(1)
)
@ -418,7 +418,7 @@ async def get_song_status(
# 백그라운드 태스크로 MP3 다운로드 및 Blob 업로드 실행
background_tasks.add_task(
download_and_upload_song_by_suno_task_id,
suno_task_id=suno_task_id,
suno_task_id=song_id,
audio_url=audio_url,
store_name=store_name,
duration=clip_duration,

View File

@ -2,7 +2,7 @@ import asyncio
from playwright.async_api import async_playwright
from urllib import parse
class nvMapPwScraper():
class NvMapPwScraper():
# cls vars
is_ready = False
_playwright = None
@ -107,7 +107,7 @@ patchedGetter.toString();''')
if "/place/" in self.page.url:
return self.page.url
if (count == self._max_retry / 2):
raise Exception("Failed to identify place id. loading timeout")
else:
raise Exception("Failed to identify place id. item is ambiguous")
# if (count == self._max_retry / 2):
# raise Exception("Failed to identify place id. loading timeout")
# else:
# raise Exception("Failed to identify place id. item is ambiguous")

View File

@ -231,7 +231,7 @@ class SunoService:
)
code = result.get("code", 0)
data = result.get("data", {})
data = result.get("data") or {}
if code != 200:
return PollingSongResponse(

View File

@ -13,6 +13,7 @@ dependencies = [
"fastapi-cli>=0.0.16",
"fastapi[standard]>=0.125.0",
"openai>=2.13.0",
"playwright>=1.57.0",
"pydantic-settings>=2.12.0",
"python-jose[cryptography]>=3.5.0",
"python-multipart>=0.0.21",

33
uv.lock
View File

@ -716,6 +716,7 @@ dependencies = [
{ name = "fastapi", extra = ["standard"] },
{ name = "fastapi-cli" },
{ name = "openai" },
{ name = "playwright" },
{ name = "pydantic-settings" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
@ -743,6 +744,7 @@ requires-dist = [
{ name = "fastapi", extras = ["standard"], specifier = ">=0.125.0" },
{ name = "fastapi-cli", specifier = ">=0.0.16" },
{ name = "openai", specifier = ">=2.13.0" },
{ name = "playwright", specifier = ">=1.57.0" },
{ name = "pydantic-settings", specifier = ">=2.12.0" },
{ name = "python-jose", extras = ["cryptography"], specifier = ">=3.5.0" },
{ name = "python-multipart", specifier = ">=0.0.21" },
@ -788,6 +790,25 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
]
[[package]]
name = "playwright"
version = "1.57.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "greenlet" },
{ name = "pyee" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/b6/e17543cea8290ae4dced10be21d5a43c360096aa2cce0aa7039e60c50df3/playwright-1.57.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:9351c1ac3dfd9b3820fe7fc4340d96c0d3736bb68097b9b7a69bd45d25e9370c", size = 41985039, upload-time = "2025-12-09T08:06:18.408Z" },
{ url = "https://files.pythonhosted.org/packages/8b/04/ef95b67e1ff59c080b2effd1a9a96984d6953f667c91dfe9d77c838fc956/playwright-1.57.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4a9d65027bce48eeba842408bcc1421502dfd7e41e28d207e94260fa93ca67e", size = 40775575, upload-time = "2025-12-09T08:06:22.105Z" },
{ url = "https://files.pythonhosted.org/packages/60/bd/5563850322a663956c927eefcf1457d12917e8f118c214410e815f2147d1/playwright-1.57.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:99104771abc4eafee48f47dac2369e0015516dc1ce8c409807d2dd440828b9a4", size = 41985042, upload-time = "2025-12-09T08:06:25.357Z" },
{ url = "https://files.pythonhosted.org/packages/56/61/3a803cb5ae0321715bfd5247ea871d25b32c8f372aeb70550a90c5f586df/playwright-1.57.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:284ed5a706b7c389a06caa431b2f0ba9ac4130113c3a779767dda758c2497bb1", size = 45975252, upload-time = "2025-12-09T08:06:29.186Z" },
{ url = "https://files.pythonhosted.org/packages/83/d7/b72eb59dfbea0013a7f9731878df8c670f5f35318cedb010c8a30292c118/playwright-1.57.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a1bae6c0a07839cdeaddbc0756b3b2b85e476c07945f64ece08f1f956a86f1", size = 45706917, upload-time = "2025-12-09T08:06:32.549Z" },
{ url = "https://files.pythonhosted.org/packages/e4/09/3fc9ebd7c95ee54ba6a68d5c0bc23e449f7235f4603fc60534a364934c16/playwright-1.57.0-py3-none-win32.whl", hash = "sha256:1dd93b265688da46e91ecb0606d36f777f8eadcf7fbef12f6426b20bf0c9137c", size = 36553860, upload-time = "2025-12-09T08:06:35.864Z" },
{ url = "https://files.pythonhosted.org/packages/58/d4/dcdfd2a33096aeda6ca0d15584800443dd2be64becca8f315634044b135b/playwright-1.57.0-py3-none-win_amd64.whl", hash = "sha256:6caefb08ed2c6f29d33b8088d05d09376946e49a73be19271c8cd5384b82b14c", size = 36553864, upload-time = "2025-12-09T08:06:38.915Z" },
{ url = "https://files.pythonhosted.org/packages/6a/60/fe31d7e6b8907789dcb0584f88be741ba388413e4fbce35f1eba4e3073de/playwright-1.57.0-py3-none-win_arm64.whl", hash = "sha256:5f065f5a133dbc15e6e7c71e7bc04f258195755b1c32a432b792e28338c8335e", size = 32837940, upload-time = "2025-12-09T08:06:42.268Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
@ -940,6 +961,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
]
[[package]]
name = "pyee"
version = "13.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37", size = 31250, upload-time = "2025-03-17T18:53:15.955Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"