스크래핑 로직 점검 및 추가

upload
jaehwang 2026-04-30 11:58:52 +09:00
parent d930679e90
commit 0d3543d84d
18 changed files with 361 additions and 43 deletions

View File

@ -4,11 +4,12 @@
-- 테이블 생성 SQL - instagram_data -- 테이블 생성 SQL - instagram_data
CREATE TABLE instagram_data CREATE TABLE instagram_data
( (
`id` INT NOT NULL AUTO_INCREMENT, `id` INT NOT NULL AUTO_INCREMENT,
`hospital_id` INT NOT NULL, `hospital_id` CHAR(36) NOT NULL,
`url` VARCHAR(500) NOT NULL, `url` VARCHAR(500) NOT NULL,
`raw_data` JSON NULL, `status` VARCHAR(20) NOT NULL DEFAULT 'start',
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id) PRIMARY KEY (id)
); );
@ -21,11 +22,12 @@ CREATE INDEX IX_instagram_data_1
-- 테이블 생성 SQL - facebook_data -- 테이블 생성 SQL - facebook_data
CREATE TABLE facebook_data CREATE TABLE facebook_data
( (
`id` INT NOT NULL AUTO_INCREMENT, `id` INT NOT NULL AUTO_INCREMENT,
`hospital_id` INT NOT NULL, `hospital_id` CHAR(36) NOT NULL,
`url` VARCHAR(500) NOT NULL, `url` VARCHAR(500) NOT NULL,
`raw_data` JSON NULL, `status` VARCHAR(20) NOT NULL DEFAULT 'start',
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id) PRIMARY KEY (id)
); );
@ -38,11 +40,12 @@ CREATE INDEX IX_facebook_data_1
-- 테이블 생성 SQL - naver_blog_data -- 테이블 생성 SQL - naver_blog_data
CREATE TABLE naver_blog_data CREATE TABLE naver_blog_data
( (
`id` INT NOT NULL AUTO_INCREMENT, `id` INT NOT NULL AUTO_INCREMENT,
`hospital_id` INT NOT NULL, `hospital_id` CHAR(36) NOT NULL,
`url` VARCHAR(500) NOT NULL, `url` VARCHAR(500) NOT NULL,
`raw_data` JSON NULL, `status` VARCHAR(20) NOT NULL DEFAULT 'start',
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id) PRIMARY KEY (id)
); );
@ -55,14 +58,15 @@ CREATE INDEX IX_naver_blog_data_1
-- 테이블 생성 SQL - hospital_baseinfo -- 테이블 생성 SQL - hospital_baseinfo
CREATE TABLE hospital_baseinfo CREATE TABLE hospital_baseinfo
( (
`hospital_id` INT NOT NULL AUTO_INCREMENT, `hospital_id` CHAR(36) NOT NULL,
`owner_user_id` INT NOT NULL, `owner_user_id` INT NOT NULL,
`hospital_name` VARCHAR(50) NOT NULL, `hospital_name` VARCHAR(50) NOT NULL,
`brn` VARCHAR(50) NOT NULL, `hospital_name_en` VARCHAR(50) NULL,
`road_address` VARCHAR(100) NULL, `brn` VARCHAR(50) NOT NULL,
`site_address` VARCHAR(100) NULL, `road_address` VARCHAR(100) NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `site_address` VARCHAR(100) NULL,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (hospital_id) PRIMARY KEY (hospital_id)
); );
@ -87,8 +91,9 @@ CREATE TABLE user_info
CREATE TABLE youtube_data CREATE TABLE youtube_data
( (
`id` INT NOT NULL AUTO_INCREMENT, `id` INT NOT NULL AUTO_INCREMENT,
`hospital_id` INT NOT NULL, `hospital_id` CHAR(36) NOT NULL,
`url` VARCHAR(500) NOT NULL, `url` VARCHAR(500) NOT NULL,
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
`raw_data` JSON NULL, `raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id) PRIMARY KEY (id)
@ -98,3 +103,46 @@ CREATE TABLE youtube_data
CREATE INDEX IX_youtube_data_1 CREATE INDEX IX_youtube_data_1
ON youtube_data(hospital_id); ON youtube_data(hospital_id);
-- gangnam_unni_data Table Create SQL
CREATE TABLE gangnam_unni_data
(
`id` INT NOT NULL AUTO_INCREMENT,
`hospital_id` CHAR(36) NOT NULL,
`url` VARCHAR(500) NOT NULL,
`status` VARCHAR(20) NOT NULL DEFAULT 'start',
`raw_data` JSON NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id)
);
-- Index 설정 SQL - gangnam_unni_data(hospital_id)
CREATE INDEX IX_gangnam_unni_data_1
ON gangnam_unni_data(hospital_id);
-- analysis_runs Table Create SQL
CREATE TABLE analysis_runs
(
`analysis_run_id` CHAR(36) NOT NULL,
`hospital_id` CHAR(36) NOT NULL,
`owner_user_id` INT NOT NULL DEFAULT 0,
`status` VARCHAR(50) NOT NULL DEFAULT 'discovering',
`instagram_data_id` INT NULL,
`facebook_data_id` INT NULL,
`naver_blog_data_id` INT NULL,
`youtube_data_id` INT NULL,
`gangnam_unni_data_id` INT NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (analysis_run_id)
);
-- Index 설정 SQL - analysis_runs(hospital_id)
CREATE INDEX IX_analysis_runs_1
ON analysis_runs(hospital_id);
-- Index 설정 SQL - analysis_runs(owner_user_id)
CREATE INDEX IX_analysis_runs_2
ON analysis_runs(owner_user_id);

View File

@ -1,28 +1,63 @@
from fastapi import APIRouter, Depends, status import uuid6
from fastapi import APIRouter, BackgroundTasks, Depends, status
from common.deps import verify_api_key from common.deps import verify_api_key
from models.api.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse from common.db import fetchone, insert_instagram_row, insert_facebook_row, insert_naver_blog_row, insert_youtube_row, insert_gangnam_unni_row, insert_analysis_run
from models.analysis import AnalysisCreate, AnalysisStartResponse, AnalysisStatusResponse
from models.status import AnalysisStatus
from services.collect import collect_instagram, collect_facebook, collect_naver_blog, collect_youtube, collect_gangnam_unni
router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/analyses", tags=["analyses"], dependencies=[Depends(verify_api_key)])
@router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse) @router.post("", status_code=status.HTTP_202_ACCEPTED, response_model=AnalysisStartResponse)
async def start_analysis(body: AnalysisCreate): async def start_analysis(body: AnalysisCreate, background_tasks: BackgroundTasks):
analysis_run_id = str(uuid6.uuid7())
hospital_id = body.clinic_id
# 사실 hospital과 owner_user_id 비교 후 검증이 필요한 거지만 일단 PoC 니까. 나중에 바꿉니다.
hospital = await fetchone(
"SELECT owner_user_id FROM hospital_baseinfo WHERE hospital_id = %s",
(hospital_id,),
)
owner_user_id = hospital["owner_user_id"] if hospital else 0
ig_url = body.channels.instagram[0] if isinstance(body.channels.instagram, list) else body.channels.instagram
ig_id = await insert_instagram_row(hospital_id, ig_url) if ig_url else None
fb_id = await insert_facebook_row(hospital_id, body.channels.facebook) if body.channels.facebook else None
nb_id = await insert_naver_blog_row(hospital_id, body.channels.naver_blog) if body.channels.naver_blog else None
yt_id = await insert_youtube_row(hospital_id, body.channels.youtube) if body.channels.youtube else None
gu_id = await insert_gangnam_unni_row(hospital_id, body.channels.gangnam_unni) if body.channels.gangnam_unni else None
analysis_run_id = await insert_analysis_run(analysis_run_id, hospital_id, owner_user_id, ig_id, fb_id, nb_id, yt_id, gu_id)
if ig_id:
background_tasks.add_task(collect_instagram, analysis_run_id, ig_id, ig_url)
if fb_id:
background_tasks.add_task(collect_facebook, analysis_run_id, fb_id, body.channels.facebook)
if nb_id:
background_tasks.add_task(collect_naver_blog, analysis_run_id, nb_id, body.channels.naver_blog)
if yt_id:
background_tasks.add_task(collect_youtube, analysis_run_id, yt_id, body.channels.youtube)
if gu_id:
background_tasks.add_task(collect_gangnam_unni, analysis_run_id, gu_id, body.channels.gangnam_unni)
return AnalysisStartResponse( return AnalysisStartResponse(
analysis_run_id="22222222-2222-2222-2222-222222222222", analysis_run_id=analysis_run_id,
clinic_id=body.clinic_id or "11111111-1111-1111-1111-111111111111", clinic_id=hospital_id,
status="discovering", status=AnalysisStatus.DISCOVERING,
estimated_seconds=90, estimated_seconds=90,
poll_url="/api/analyses/22222222-2222-2222-2222-222222222222/status", poll_url=f"/api/analyses/{analysis_run_id}/status",
) )
@router.get("/{run_id}/status", response_model=AnalysisStatusResponse) @router.get("/{run_id}/status", response_model=AnalysisStatusResponse)
async def get_analysis_status(run_id: str): async def get_analysis_status(run_id: str):
row = await fetchone("SELECT status FROM analysis_runs WHERE analysis_run_id = %s", (run_id,))
return AnalysisStatusResponse( return AnalysisStatusResponse(
analysis_run_id=run_id, analysis_run_id=run_id,
status="collecting", status=AnalysisStatus(row["status"]),
progress=0.45, progress=50.0,
current_step="채널 데이터 수집 중", current_step="",
channel_errors={}, channel_errors={},
completed_at=None, completed_at=None,
) )

View File

@ -1,10 +1,12 @@
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from common.deps import verify_api_key from common.deps import verify_api_key
from models.api.channel import ChannelVerifyRequest, ChannelVerifyResponse from models.channel import ChannelVerifyRequest, ChannelVerifyResponse
router = APIRouter(prefix="/api/channels", tags=["channels"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/channels", tags=["channels"], dependencies=[Depends(verify_api_key)])
# will not use
@router.post("/verify", response_model=ChannelVerifyResponse) @router.post("/verify", response_model=ChannelVerifyResponse)
async def verify_channels(body: ChannelVerifyRequest): async def verify_channels(body: ChannelVerifyRequest):
return ChannelVerifyResponse( return ChannelVerifyResponse(

View File

@ -1,20 +1,26 @@
import uuid6
from fastapi import APIRouter, Depends, status from fastapi import APIRouter, Depends, status
from common.deps import verify_api_key from common.deps import verify_api_key
from models.api.clinic import ClinicCreate, ClinicCreateResponse, ClinicHistoryResponse, RunSummary from common.db import insert_hospital
from models.clinic import ClinicCreate, ClinicCreateResponse, ClinicHistoryResponse, RunSummary
router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/clinics", tags=["clinics"], dependencies=[Depends(verify_api_key)])
@router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse) @router.post("", status_code=status.HTTP_201_CREATED, response_model=ClinicCreateResponse)
async def create_clinic(body: ClinicCreate): async def create_clinic(body: ClinicCreate):
hospital_id = str(uuid6.uuid7())
row = await insert_hospital(hospital_id, body.name, body.name_en, body.address, body.url)
return ClinicCreateResponse( return ClinicCreateResponse(
id="11111111-1111-1111-1111-111111111111", id=hospital_id,
url=body.url, url=body.url,
name=body.name, name=body.name,
created_at="2026-04-20T09:00:00Z", created_at=str(row["created_at"]),
) )
# Not done
@router.get("/{id}/history", response_model=ClinicHistoryResponse) @router.get("/{id}/history", response_model=ClinicHistoryResponse)
async def get_clinic_history(id: str): async def get_clinic_history(id: str):
return ClinicHistoryResponse( return ClinicHistoryResponse(

View File

@ -1,6 +1,6 @@
from fastapi import APIRouter, Depends, status from fastapi import APIRouter, Depends, status
from common.deps import verify_api_key from common.deps import verify_api_key
from models.api.plan import PlanCreate, PlanResponse from models.plan import PlanCreate, PlanResponse
router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/plans", tags=["plans"], dependencies=[Depends(verify_api_key)])

View File

@ -1,6 +1,6 @@
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from common.deps import verify_api_key from common.deps import verify_api_key
from models.api.report import ReportResponse, ClinicInfo from models.report import ReportResponse, ClinicInfo
router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)]) router = APIRouter(prefix="/api/reports", tags=["reports"], dependencies=[Depends(verify_api_key)])

142
app/common/db.py Normal file
View File

@ -0,0 +1,142 @@
import json
import os
import aiomysql
from common.utils import get_env
_pool: aiomysql.Pool | None = None
async def get_pool() -> aiomysql.Pool:
global _pool
if _pool is None:
_pool = await aiomysql.create_pool(
host=get_env("MYSQL_HOST"),
port=int(os.getenv("MYSQL_PORT", "3306")),
user=get_env("MYSQL_USER"),
password=get_env("MYSQL_PASSWORD"),
db=get_env("MYSQL_DB"),
autocommit=True,
charset="utf8mb4",
)
return _pool
async def execute(sql: str, args: tuple = ()) -> int:
pool = await get_pool()
async with pool.acquire() as conn:
async with conn.cursor() as cur:
await cur.execute(sql, args)
return cur.lastrowid
async def fetchone(sql: str, args: tuple = ()) -> dict | None:
pool = await get_pool()
async with pool.acquire() as conn:
async with conn.cursor(aiomysql.DictCursor) as cur:
await cur.execute(sql, args)
return await cur.fetchone()
async def insert_instagram_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO instagram_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_facebook_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO facebook_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_naver_blog_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO naver_blog_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_youtube_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO youtube_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_gangnam_unni_row(hospital_id: str, url: str) -> int:
return await execute("INSERT INTO gangnam_unni_data (hospital_id, url) VALUES (%s, %s)", (hospital_id, url))
async def insert_analysis_run(
analysis_run_id: str,
hospital_id: str,
owner_user_id: int,
instagram_data_id: int | None,
facebook_data_id: int | None,
naver_blog_data_id: int | None,
youtube_data_id: int | None,
gangnam_unni_data_id: int | None,
) -> str:
await execute(
"INSERT INTO analysis_runs"
" (analysis_run_id, hospital_id, owner_user_id, instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id)"
" VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
(analysis_run_id, hospital_id, owner_user_id, instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id),
)
return analysis_run_id
async def is_done(table: str, row_id: int | None) -> bool:
if row_id is None:
return True
r = await fetchone(f"SELECT status FROM {table} WHERE id = %s", (row_id,))
return r["status"] == "done"
async def set_instagram_status(row_id: int, status: str) -> None:
await execute("UPDATE instagram_data SET status = %s WHERE id = %s", (status, row_id))
async def set_facebook_status(row_id: int, status: str) -> None:
await execute("UPDATE facebook_data SET status = %s WHERE id = %s", (status, row_id))
async def set_naver_blog_status(row_id: int, status: str) -> None:
await execute("UPDATE naver_blog_data SET status = %s WHERE id = %s", (status, row_id))
async def set_youtube_status(row_id: int, status: str) -> None:
await execute("UPDATE youtube_data SET status = %s WHERE id = %s", (status, row_id))
async def set_gangnam_unni_status(row_id: int, status: str) -> None:
await execute("UPDATE gangnam_unni_data SET status = %s WHERE id = %s", (status, row_id))
async def save_instagram_raw_data(row_id: int, data: dict) -> None:
await execute("UPDATE instagram_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
async def save_facebook_raw_data(row_id: int, data: dict) -> None:
await execute("UPDATE facebook_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
async def save_naver_blog_raw_data(row_id: int, data: dict) -> None:
await execute("UPDATE naver_blog_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
async def save_youtube_raw_data(row_id: int, data: dict) -> None:
await execute("UPDATE youtube_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
async def save_gangnam_unni_raw_data(row_id: int, data: dict) -> None:
await execute("UPDATE gangnam_unni_data SET raw_data = %s, status = 'done' WHERE id = %s", (json.dumps(data, ensure_ascii=False), row_id))
async def insert_hospital(
hospital_id: str,
name: str,
name_en: str | None = None,
road_address: str | None = None,
site_address: str | None = None,
owner_user_id: int = 0,
brn: str = "",
) -> dict:
await execute(
"INSERT INTO hospital_baseinfo (hospital_id, hospital_name, hospital_name_en, road_address, site_address, owner_user_id, brn) VALUES (%s, %s, %s, %s, %s, %s, %s)",
(hospital_id, name, name_en, road_address, site_address, owner_user_id, brn),
)
return await fetchone(
"SELECT created_at FROM hospital_baseinfo WHERE hospital_id = %s",
(hospital_id,),
)

View File

@ -1,9 +1,10 @@
from pydantic import BaseModel from pydantic import BaseModel
from models.status import AnalysisStatus
class Channels(BaseModel): class Channels(BaseModel):
youtube: str | None = None youtube: str | None = None
instagram: list[str] | str | None = None instagram: str | None = None
facebook: str | None = None facebook: str | None = None
naver_blog: str | None = None naver_blog: str | None = None
gangnam_unni: str | None = None gangnam_unni: str | None = None
@ -24,14 +25,14 @@ class AnalysisCreate(BaseModel):
class AnalysisStartResponse(BaseModel): class AnalysisStartResponse(BaseModel):
analysis_run_id: str analysis_run_id: str
clinic_id: str clinic_id: str
status: str status: AnalysisStatus
estimated_seconds: int estimated_seconds: int
poll_url: str poll_url: str
class AnalysisStatusResponse(BaseModel): class AnalysisStatusResponse(BaseModel):
analysis_run_id: str analysis_run_id: str
status: str status: AnalysisStatus
progress: float progress: float
current_step: str current_step: str
channel_errors: dict channel_errors: dict

15
app/models/status.py Normal file
View File

@ -0,0 +1,15 @@
from enum import StrEnum
class AnalysisStatus(StrEnum):
DISCOVERING = "discovering"
COLLECTING = "collecting"
ANALYZING = "analyzing"
COMPLETED = "completed"
FAILED = "failed"
class TaskStatus(StrEnum):
START = "start"
PROCESSING = "processing"
DONE = "done"

18
app/services/analysis.py Normal file
View File

@ -0,0 +1,18 @@
from common.db import fetchone, execute, is_done
async def check_and_advance_analysis(analysis_run_id: str) -> None:
run = await fetchone(
"SELECT instagram_data_id, facebook_data_id, naver_blog_data_id, youtube_data_id, gangnam_unni_data_id"
" FROM analysis_runs WHERE analysis_run_id = %s",
(analysis_run_id,),
)
results = [
await is_done("instagram_data", run["instagram_data_id"]),
await is_done("facebook_data", run["facebook_data_id"]),
await is_done("naver_blog_data", run["naver_blog_data_id"]),
await is_done("youtube_data", run["youtube_data_id"]),
await is_done("gangnam_unni_data", run["gangnam_unni_data_id"]),
]
if all(results):
await execute("UPDATE analysis_runs SET status = 'analyzing' WHERE analysis_run_id = %s", (analysis_run_id,))

48
app/services/collect.py Normal file
View File

@ -0,0 +1,48 @@
from common.db import (
set_instagram_status, save_instagram_raw_data,
set_facebook_status, save_facebook_raw_data,
set_naver_blog_status, save_naver_blog_raw_data,
set_youtube_status, save_youtube_raw_data,
set_gangnam_unni_status, save_gangnam_unni_raw_data,
)
from common.utils import get_env, normalize_handle
from services.analysis import check_and_advance_analysis
from integrations.apify import ApifyClient
from integrations.naver import NaverClient
from integrations.youtube import YouTubeClient
from integrations.firecrawl import FirecrawlClient
async def collect_instagram(analysis_run_id: str, row_id: int, url: str) -> None:
await set_instagram_status(row_id, "processing")
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_instagram_profile(normalize_handle("instagram", url))
await save_instagram_raw_data(row_id, data)
await check_and_advance_analysis(analysis_run_id)
async def collect_facebook(analysis_run_id: str, row_id: int, url: str) -> None:
await set_facebook_status(row_id, "processing")
data = await ApifyClient(get_env("APIFY_API_TOKEN")).fetch_facebook_page(url)
await save_facebook_raw_data(row_id, data)
await check_and_advance_analysis(analysis_run_id)
async def collect_naver_blog(analysis_run_id: str, row_id: int, url: str) -> None:
await set_naver_blog_status(row_id, "processing")
data = await NaverClient(get_env("NAVER_CLIENT_ID"), get_env("NAVER_CLIENT_SECRET")).fetch_blog_rss(normalize_handle("naver_blog", url))
await save_naver_blog_raw_data(row_id, data)
await check_and_advance_analysis(analysis_run_id)
async def collect_youtube(analysis_run_id: str, row_id: int, url: str) -> None:
await set_youtube_status(row_id, "processing")
data = await YouTubeClient(get_env("YOUTUBE_API_KEY")).fetch_channel(normalize_handle("youtube", url))
await save_youtube_raw_data(row_id, data)
await check_and_advance_analysis(analysis_run_id)
async def collect_gangnam_unni(analysis_run_id: str, row_id: int, url: str) -> None:
await set_gangnam_unni_status(row_id, "processing")
data = await FirecrawlClient(get_env("FIRECRAWL_API_KEY")).fetch_gangnam_unni(url)
await save_gangnam_unni_raw_data(row_id, data)
await check_and_advance_analysis(analysis_run_id)

View File

@ -1,5 +1,6 @@
services: services:
o2o-infinith-backend: o2o-infinith-backend:
image: o2o-infinith-backend
build: build:
context: . context: .
dockerfile: Dockerfile dockerfile: Dockerfile

View File

@ -8,3 +8,5 @@ openai==2.32.0
python-jose[cryptography]==3.5.0 python-jose[cryptography]==3.5.0
passlib[bcrypt]==1.7.4 passlib[bcrypt]==1.7.4
python-multipart==0.0.26 python-multipart==0.0.26
uuid6==2025.0.1
aiomysql==0.3.2