diff --git a/database/common.py b/database/common.py new file mode 100644 index 0000000..c4e9c49 --- /dev/null +++ b/database/common.py @@ -0,0 +1,29 @@ +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession +from sqlalchemy.orm import declarative_base, sessionmaker +ENVIRONMENT = "dev" +DATABASE_URL = "" + +# 비동기 엔진 생성 +engine = create_async_engine( + DATABASE_URL.replace("postgresql://", "postgresql+asyncpg://"), + echo=True if ENVIRONMENT == "dev" else False, + future=True +) + +# 세션 팩토리 생성 +AsyncSessionLocal = sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False +) + +# Base 클래스 생성 +Base = declarative_base() + +# 데이터베이스 세션 의존성 +async def get_db(): + async with AsyncSessionLocal() as session: + try: + yield session + finally: + await session.close() diff --git a/database/models.py b/database/models.py new file mode 100644 index 0000000..698ad72 --- /dev/null +++ b/database/models.py @@ -0,0 +1,396 @@ +from sqlalchemy import Column, Integer, String, DateTime, Text, Boolean, ForeignKey, JSON, Float, BigInteger, Date +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func +from sqlalchemy.dialects.postgresql import UUID +from app.database import Base +import uuid + +class Company(Base): + """회사 모델""" + __tablename__ = "company" + + company_id = Column(Integer, primary_key=True, index=True) + company_name = Column(String, nullable=True) + + # 관계 + users = relationship("User", back_populates="company") + +class User(Base): + """사용자 모델""" + __tablename__ = "users" + + id = Column(Integer, primary_key=True, index=True) + username = Column(String, unique=True, index=True, nullable=True) # username 필드 추가 + email = Column(String, unique=True, index=True, nullable=False) + hashed_password = Column(String, nullable=False) + is_active = Column(Boolean, default=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + name = Column(String, nullable=False) # name 필드 복구 + refresh_token = Column(String, nullable=True) + company_id = Column(Integer, ForeignKey("company.company_id"), nullable=True) + address = Column(String, nullable=True) + description = Column(String, nullable=True) + role = Column(String, nullable=True) # 사용자, 마케터, 프랜차이즈 대표, 관리자 + + # 관계 + company = relationship("Company", back_populates="users") + projects = relationship("Project", back_populates="owner") + +class AdStore(Base): + """광고 매장 모델""" + __tablename__ = "ad_store" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id"), nullable=True) + name = Column(String, nullable=True) + category = Column(String, nullable=True) + site_address = Column(String, nullable=True) + road_address = Column(String, nullable=True) + phone_number = Column(String, nullable=True) + naver_place_url = Column(String, nullable=True) + hashtags = Column(JSON, nullable=False, default=list) + description = Column(Text, nullable=True) + image_urls = Column(JSON, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + found_year = Column(Integer, nullable=True) + business_hours = Column(String, nullable=True) # 매장 오픈, close 시간, 주말, 휴무 관련 + employ_count = Column(Integer, nullable=True) # 매장 직원수 + specialties = Column(String, nullable=True) # 사업 전문 분야 + + # 관계 + user = relationship("User", backref="ad_stores") + projects = relationship("Project", backref="ad_store") + +class Preset(Base): + """프리셋 모델""" + __tablename__ = "preset" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id"), nullable=False) + name = Column(String, nullable=True) + category = Column(String, nullable=True) + description = Column(String, nullable=True) + favorite = Column(Boolean, default=False) + hashtag = Column(JSON, nullable=True) + image_urls = Column(JSON, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + last_used_at = Column(DateTime(timezone=True), nullable=True) + title = Column(String, nullable=True) + + # 관계 + user = relationship("User", backref="presets") + projects = relationship("Project", back_populates="preset") + +class Template(Base): + """템플릿 모델""" + __tablename__ = "template" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id"), nullable=True) + name = Column(String, nullable=True) + description = Column(String, nullable=True) + thumbnail = Column(String, nullable=True) + template_url = Column(String, nullable=True) + category = Column(String, nullable=True) + rating = Column(Float, nullable=True) # real type in PostgreSQL maps to Float + favortie = Column(Boolean, default=False) # Note: typo in DB column name "favortie" + hashtags = Column(JSON, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + usage_count = Column(Integer, nullable=True) + + # 관계 + user = relationship("User", backref="templates") + projects = relationship("Project", back_populates="template") + +class Project(Base): + """프로젝트 모델""" + __tablename__ = "projects" + + id = Column(Integer, primary_key=True, index=True) + title = Column(String, nullable=False) + description = Column(Text) + target_url = Column(String, nullable=False) # 크롤링 대상 URL + status = Column(String, default="created") # created, crawling, processing, completed, failed + owner_id = Column(Integer, ForeignKey("users.id")) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + # Template and Store references (from database) + template_id = Column(Integer, ForeignKey("template.id"), nullable=True) + store_id = Column(Integer, ForeignKey("ad_store.id"), nullable=True) + preset_id = Column(Integer, ForeignKey("preset.id"), nullable=True) + + # YouTube 관련 필드 + youtube_video_id = Column(String(50), nullable=True) + youtube_url = Column(String(255), nullable=True) + youtube_upload_status = Column(String(50), nullable=True) + youtube_privacy_status = Column(String(20), nullable=True) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + # 관계 + owner = relationship("User", back_populates="projects") + template = relationship("Template", back_populates="projects") + preset = relationship("Preset", back_populates="projects") + crawl_data = relationship("CrawlData", back_populates="project", uselist=False) + video = relationship("Video", back_populates="project", uselist=False) + generated_content = relationship("GeneratedContent", back_populates="project", uselist=False) + task_statuses = relationship("TaskStatus", back_populates="project") + +class CrawlData(Base): + """크롤링 데이터 모델""" + __tablename__ = "crawl_data" + + id = Column(Integer, primary_key=True, index=True) + project_id = Column(Integer, ForeignKey("projects.id")) + + # Naver Map 데이터 + business_name = Column(String) + address = Column(String) + phone = Column(String) + business_type = Column(String) + coordinates = Column(JSON) # {"lat": float, "lng": float} + + # 추가 장소 정보 (crawler4) + business_hours = Column(String) # 영업시간 + homepage = Column(String) # 홈페이지 URL + place_description = Column(Text) # 장소 설명 + + # 블로그 데이터 + blog_urls = Column(JSON) # 수집된 블로그 URL 리스트 + blog_content = Column(Text) # 추출된 블로그 내용 + advertising_keywords = Column(JSON) # 광고 키워드 리스트 + + # 상세 블로그 정보 (crawler4) + blog_details = Column(JSON) # detailed_blogs 전체 저장 + map_blog_reviews = Column(JSON) # 지도 블로그 리뷰 + + # 이미지 데이터 + image_urls = Column(JSON) # 수집된 이미지 URL 리스트 + stored_images = Column(JSON) # 저장된 이미지 파일 경로 리스트 + + # 크롤링 메타데이터 + crawl_metadata = Column(JSON) # 통계 정보, 수집 시간 등 + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + # 관계 + project = relationship("Project", back_populates="crawl_data") + +class GeneratedContent(Base): + """생성된 콘텐츠 모델""" + __tablename__ = "generated_content" + + id = Column(Integer, primary_key=True, index=True) + project_id = Column(Integer, ForeignKey("projects.id")) + + # 가사 데이터 + lyrics = Column(Text) + lyrics_prompt = Column(Text) # 사용된 프롬프트 + lyrics_generation_time = Column(Integer, default=0) # 가사 생성 시간(초) + lyrics_token_count = Column(Integer, default=0) # 사용된 토큰 수 + lyrics_model_used = Column(String, default="gpt-3.5-turbo") # 사용된 모델 + + # BGM 데이터 + bgm_url = Column(String) + bgm_file_path = Column(String) # 저장된 BGM 파일 경로 + bgm_duration = Column(Float, default=30.0) # BGM 길이(초) + bgm_generation_time = Column(Integer, default=0) # BGM 생성 시간(초) + bgm_model_used = Column(String, default="mureka-v6") # 사용된 모델 + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + # 관계 + project = relationship("Project", back_populates="generated_content") + +class Video(Base): + """동영상 모델""" + __tablename__ = "videos" + + id = Column(Integer, primary_key=True, index=True) + project_id = Column(Integer, ForeignKey("projects.id")) + + # 동영상 파일 정보 + file_path = Column(String, nullable=False) + file_size = Column(Integer) # 바이트 단위 + duration = Column(Integer) # 초 단위 + resolution = Column(String) # "1920x1080" 형식 + + # 생성 설정 + template_type = Column(String, default="default") + generation_settings = Column(JSON) # 동영상 생성 설정 + + # 상태 + status = Column(String, default="processing") # processing, completed, failed + error_message = Column(Text) + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + completed_at = Column(DateTime(timezone=True)) + + # 관계 + project = relationship("Project", back_populates="video") + +class TaskStatus(Base): + """작업 상태 추적 모델""" + __tablename__ = "task_status" + + id = Column(Integer, primary_key=True, index=True) + task_id = Column(String, unique=True, index=True, nullable=True) # Celery 태스크 ID + project_id = Column(Integer, ForeignKey("projects.id"), nullable=True) + task_type = Column(String, nullable=True) # crawling, content_generation, video_generation + status = Column(String, nullable=True) # pending, started, progress, success, failure + progress = Column(Integer, nullable=True) # 0-100 + current_step = Column(String, nullable=True) + result = Column(JSON, nullable=True) + error_message = Column(Text, nullable=True) + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), nullable=True) + + # 관계 + project = relationship("Project", back_populates="task_statuses") + +class UserActivityLog(Base): + """사용자 활동 로그 모델""" + __tablename__ = "user_activity_logs" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id")) + action = Column(String, nullable=False) # login, logout, create_project, etc. + details = Column(JSON) # 추가 정보 + ip_address = Column(String) + user_agent = Column(String) + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + # 관계 + user = relationship("User") + +class SystemStatistics(Base): + """시스템 통계 모델""" + __tablename__ = "system_statistics" + + id = Column(Integer, primary_key=True, index=True) + stat_date = Column(Date, unique=True, nullable=False) + + # 사용자 통계 + total_users = Column(Integer, nullable=True) + active_users = Column(Integer, nullable=True) + new_users = Column(Integer, nullable=True) + + # 프로젝트 통계 + total_projects = Column(Integer, nullable=True) + completed_projects = Column(Integer, nullable=True) + failed_projects = Column(Integer, nullable=True) + + # 동영상 통계 + total_videos = Column(Integer, nullable=True) + total_video_size = Column(BigInteger, nullable=True) # bytes + new_videos = Column(Integer, nullable=True) # 이번달 새로 생성한 비디오 갯수 + + # 템플릿 통계 + total_templates = Column(Integer, nullable=True) # 전체 템플릿 갯수 + + # 성능 통계 + avg_crawling_time = Column(Float, nullable=True) # seconds + avg_content_generation_time = Column(Float, nullable=True) # seconds + avg_video_rendering_time = Column(Float, nullable=True) # seconds + + # 증가율 통계 (월단위) + user_growth_rate = Column(Float, nullable=True) # 사용자 증가량-월단위 + template_growth_rate = Column(Float, nullable=True) # 템플릿 생성 증가율 - 월단위 + video_growth_rate = Column(Float, nullable=True) # 비디오 생성 증가율 -월단위 + media_growth_rate = Column(Float, nullable=True) # 미디어 에셋 증가율 - 월단위 + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + +class SystemUsage(Base): + """시스템 사용량 모델""" + __tablename__ = "system_usage" + + id = Column(Integer, primary_key=True, index=True) + stat_date = Column(Date, unique=True, nullable=False) + + # 스토리지 사용량 (Giga 단위) + image_usage = Column(Float, nullable=True) # 이미지 스토리지 사용량 - Giga 단위 + video_usage = Column(Float, nullable=True) # 비디오 스토리지 사용량 - Giga 단위 + music_usage = Column(Float, nullable=True) # 노래 스토리지 사용량 - Giga 단위 + extra_usage = Column(Float, nullable=True) # 기타 스토리지 사용량 - Giga 단위 + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + +class SystemAvailability(Base): + """시스템 가용성 모델""" + __tablename__ = "system_availability" + + id = Column(Integer, primary_key=True, index=True) + stat_date = Column(Date, unique=True, nullable=False) + + # 가용성 지표 (퍼센트 단위 - smallint로 0-100 범위) + ai_availability = Column(Integer, nullable=True) # AI 엔진 가동율 + dbase_availability = Column(Integer, nullable=True) # 데이터베이스 가동율 + storage_availability = Column(Integer, nullable=True) # 스토리지 서버 가동율 + renderer_availability = Column(Integer, nullable=True) # 비디오 Renderer 가동율 + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + +class MediaAsset(Base): + """미디어 자산 모델""" + __tablename__ = "media_assets" + __mapper_args__ = {"confirm_deleted_rows": False} + + id = Column(Integer, primary_key=True, index=True) + name = Column(String, nullable=False) + type = Column(String, nullable=False) + size = Column(String) + file_size_bytes = Column(Integer) + dimensions = Column(String) + width = Column(Integer) + height = Column(Integer) + duration = Column(String) + duration_seconds = Column(Float) + thumbnail = Column(String) + file_path = Column(String) + url = Column(String) + mime_type = Column(String) + tags = Column(JSON) + usage_count = Column(Integer, default=0) + owner_id = Column(Integer, ForeignKey("users.id")) + meta_info = Column(JSON) # metadata는 SQLAlchemy 예약어이므로 변경 + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + # 관계 + owner = relationship("User", backref="media_assets") + +class Asset(Base): + """미디어 자산 모델""" + __tablename__ = "media_assets" + __mapper_args__ = {"confirm_deleted_rows": False} + + id = Column(Integer, primary_key=True, index=True) + name = Column(String, nullable=False) + type = Column(String, nullable=False) + size = Column(String) + file_size_bytes = Column(Integer) + dimensions = Column(String) + width = Column(Integer) + height = Column(Integer) + duration = Column(String) + duration_seconds = Column(Float) + thumbnail = Column(String) + file_path = Column(String) + url = Column(String) + mime_type = Column(String) + tags = Column(JSON) + usage_count = Column(Integer, default=0) + owner_id = Column(Integer, ForeignKey("users.id")) + meta_info = Column(JSON) # metadata는 SQLAlchemy 예약어이므로 변경 + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + # 관계 + owner = relationship("User", backref="media_assets") diff --git a/main.py b/main.py index 9798ab8..2fe41e4 100644 --- a/main.py +++ b/main.py @@ -34,11 +34,8 @@ async def main(): print(f" - 허용 포맷: {', '.join(sorted(image_filter.allowed_formats))}") print(f" - 크기 조건: {'AND' if image_filter.require_both_dimensions else 'OR'}") - # 프로젝트 디렉토리 설정 - if platform.system() == 'Windows': - project_dir = config.get('paths.project_dir_windows', r'C:\CrawlingData') - else: - project_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') + # 프로젝트 디렉토리 설정) + project_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') # 크롤러 초기화 crawler = NaverIntegratedCrawler(project_dir=project_dir, image_filter=image_filter) diff --git a/requirements.txt b/requirements.txt index 56a4fe5..225e8ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,8 @@ selenium requests pillow azure-identity +azure-keyvault-secrets +azure-storage-blob +azure-storage-queue azure-mgmt-resource azure-mgmt-compute \ No newline at end of file diff --git a/selenium_crawler/naver_integrated_crawler.py b/selenium_crawler/naver_integrated_crawler.py index 2622479..44a7b73 100644 --- a/selenium_crawler/naver_integrated_crawler.py +++ b/selenium_crawler/naver_integrated_crawler.py @@ -23,7 +23,7 @@ from .naver_blog_crawler import NaverBlogCrawler, ImageFilterConfig class NaverIntegratedCrawler: - def __init__(self, project_dir: str = r'/home/jhyeu/workspace/data', + def __init__(self, project_dir:str, image_filter: ImageFilterConfig = None): """ 초기화 diff --git a/utils/azure.py b/utils/azure.py new file mode 100644 index 0000000..ebb956f --- /dev/null +++ b/utils/azure.py @@ -0,0 +1,87 @@ +import requests +import json + +from azure.storage.blob import BlobServiceClient, ContentSettings +from azure.storage.queue import QueueClient +from azure.identity import DefaultAzureCredential +from azure.keyvault.secrets import SecretClient +from azure.identity import DefaultAzureCredential + +KEY_VAULT_NAME = "ado2-keys" +def get_keyvault_client(): + key_vault_uri = f"https://{KEY_VAULT_NAME}.vault.azure.net/" + + # DefaultAzureCredential이 Managed Identity를 자동으로 사용 + credential = DefaultAzureCredential() + return SecretClient(vault_url=key_vault_uri, credential=credential) + +# Secret Key +BLOB_ACCOUNT_URL_KEY = "ado2-blob-account-url"# https://mystorageaccount.blob.core.windows.net +QUEUE_URL_KEY = "ado2-queue-url" + +# Plain text +BLOB_CONTAINER_NAME = "ado2-media-public-access" +BLOB_MEDIA_FOLDER = "ado2-media-original" + +def az_storage_upload_ado2_media(data, remote_file_path:str): + secret_client = get_keyvault_client() + account_url = secret_client.get_secret(BLOB_ACCOUNT_URL_KEY).value + # Managed Identity 사용 + credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(account_url=account_url, credential=credential) + + blob_client = blob_service_client.get_blob_client(container=BLOB_CONTAINER_NAME, blob=f"{BLOB_MEDIA_FOLDER}/{remote_file_path}") + + # with open(local_file_path, 'rb') as data: + # blob_client.upload_blob(data, overwrite=True) + blob_client.upload_blob(data, overwrite=True) + return blob_client.url + + +def az_storage_read_ado2_media(remote_file_path:str): + secret_client = get_keyvault_client() + account_url = secret_client.get_secret(BLOB_ACCOUNT_URL_KEY).value + # Managed Identity 사용 + credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(account_url=account_url, credential=credential) + + blob_client = blob_service_client.get_blob_client(container=BLOB_CONTAINER_NAME, blob=f"{BLOB_MEDIA_FOLDER}/{remote_file_path}") + + blob_data = blob_client.download_blob() + return blob_data + +def az_storage_get_ado2_media_list(remote_folder_path:str) -> list[str]: + secret_client = get_keyvault_client() + account_url = secret_client.get_secret(BLOB_ACCOUNT_URL_KEY).value + # Managed Identity 사용 + credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(account_url=account_url, credential=credential) + + prefix = f"{BLOB_MEDIA_FOLDER}/{remote_folder_path}" + blob_media_list = blob_service_client.get_container_client(BLOB_CONTAINER_NAME).list_blobs(name_starts_with=prefix) + return blob_media_list + +def az_add_queue_message(body:dict): + secret_client = get_keyvault_client() + queue_url = secret_client.get_secret(QUEUE_URL_KEY).value + credential = DefaultAzureCredential() + queue_client = QueueClient.from_queue_url(queue_url, credential=credential) + queue_client.send_message(json.dumps(body, ensure_ascii=False)) + +def az_get_queue_message(): + secret_client = get_keyvault_client() + queue_url = secret_client.get_secret(QUEUE_URL_KEY).value + credential = DefaultAzureCredential() + queue_client = QueueClient.from_queue_url(queue_url, credential=credential) + + # 메시지 읽기 + messages = queue_client.receive_messages() + data_list = [] + for message in messages: + # 메시지 처리 + data = json.loads(message.content) + print(f"받은 메시지: {data}") + # 처리 후 삭제 + queue_client.delete_message(message.id, message.pop_receipt) + data_list.append(data) + return data_list \ No newline at end of file diff --git a/kill_myself.py b/utils/kill_myself.py similarity index 100% rename from kill_myself.py rename to utils/kill_myself.py