o2o-infinith-backend/app/common/db/source.py

135 lines
4.6 KiB
Python

import json
from common.db.base import execute, fetchone, fetchall
from models.status import SourceType
async def insert_source(
hospital_id: str,
source_type: SourceType,
url: str,
language: str | None = None,
) -> int:
return await execute(
"INSERT INTO remote_source (hospital_id, source_type, language, url) VALUES (%s, %s, %s, %s)",
(hospital_id, source_type, language, url),
)
async def select_source_mainpage(hospital_id: str) -> dict | None:
return await fetchone(
"SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = 'mainpage'",
(hospital_id,),
)
async def insert_raw_info(
source_id: int,
analysis_run_id: str,
data_tag: SourceType,
) -> int:
return await execute(
"INSERT INTO raw_info (source_id, analysis_run_id, data_tag) VALUES (%s, %s, %s)",
(source_id, analysis_run_id, data_tag),
)
async def update_raw_info_status(info_id: int, status: str) -> None:
await execute("UPDATE raw_info SET status = %s WHERE info_id = %s", (status, info_id))
async def update_raw_info(info_id: int, data: dict) -> None:
await execute(
"UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
(json.dumps(data, ensure_ascii=False), info_id),
)
async def select_raw_info_data(info_id: int | None) -> dict | None:
if info_id is None:
return None
row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
if not row or not row["raw_data"]:
return None
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
async def select_run_sources(analysis_run_id: str) -> list[dict]:
return await fetchall(
"SELECT ri.info_id, rs.source_type, rs.url"
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s",
(analysis_run_id,),
)
async def select_run_raw_data(analysis_run_id: str) -> dict:
# language='EN' 인 row 는 dict key 를 "<source_type>_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지).
rows = await fetchall(
"SELECT rs.source_type, rs.language, ri.raw_data"
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s",
(analysis_run_id,),
)
result: dict = {}
for row in rows:
raw = row["raw_data"]
key = row["source_type"]
if (row.get("language") or "").upper() == "EN":
key = f"{key}_en"
result[key] = json.loads(raw) if isinstance(raw, str) else raw
return result
async def select_run_source_raw(
analysis_run_id: str, source_type: str, language: str | None = None,
) -> dict | None:
sql = (
"SELECT ri.raw_data FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = %s"
)
args: tuple = (analysis_run_id, source_type)
if language:
sql += " AND rs.language = %s"
args = (*args, language)
sql += " LIMIT 1"
row = await fetchone(sql, args)
if not row or not row["raw_data"]:
return None
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
async def update_raw_info_merge(info_id: int, patch: dict) -> None:
"""raw_info.raw_data 를 read-modify-write 로 top-level 머지.
한 source 가 단계별로 (예: branding 의 brandAssets → channelLogos) 키를 덧붙일 때 사용."""
row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
if not row:
return
raw = row["raw_data"]
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
data.update(patch)
await execute(
"UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
(json.dumps(data, ensure_ascii=False), info_id),
)
async def select_source_by_type(
hospital_id: str, source_type: str, language: str | None = None,
) -> dict | None:
sql = "SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = %s"
args: tuple = (hospital_id, source_type)
if language:
sql += " AND language = %s"
args = (*args, language)
sql += " LIMIT 1"
return await fetchone(sql, args)
async def select_run_mainpage_url(analysis_run_id: str) -> str:
row = await fetchone(
"SELECT rs.url FROM raw_info ri JOIN remote_source rs USING (source_id)"
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage'",
(analysis_run_id,),
)
return (row or {}).get("url") or ""