135 lines
4.6 KiB
Python
135 lines
4.6 KiB
Python
import json
|
|
from common.db.base import execute, fetchone, fetchall
|
|
from models.status import SourceType
|
|
|
|
|
|
async def insert_source(
|
|
hospital_id: str,
|
|
source_type: SourceType,
|
|
url: str,
|
|
language: str | None = None,
|
|
) -> int:
|
|
return await execute(
|
|
"INSERT INTO remote_source (hospital_id, source_type, language, url) VALUES (%s, %s, %s, %s)",
|
|
(hospital_id, source_type, language, url),
|
|
)
|
|
|
|
|
|
async def select_source_mainpage(hospital_id: str) -> dict | None:
|
|
return await fetchone(
|
|
"SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = 'mainpage'",
|
|
(hospital_id,),
|
|
)
|
|
|
|
|
|
async def insert_raw_info(
|
|
source_id: int,
|
|
analysis_run_id: str,
|
|
data_tag: SourceType,
|
|
) -> int:
|
|
return await execute(
|
|
"INSERT INTO raw_info (source_id, analysis_run_id, data_tag) VALUES (%s, %s, %s)",
|
|
(source_id, analysis_run_id, data_tag),
|
|
)
|
|
|
|
|
|
async def update_raw_info_status(info_id: int, status: str) -> None:
|
|
await execute("UPDATE raw_info SET status = %s WHERE info_id = %s", (status, info_id))
|
|
|
|
|
|
async def update_raw_info(info_id: int, data: dict) -> None:
|
|
await execute(
|
|
"UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
|
|
(json.dumps(data, ensure_ascii=False), info_id),
|
|
)
|
|
|
|
|
|
async def select_raw_info_data(info_id: int | None) -> dict | None:
|
|
if info_id is None:
|
|
return None
|
|
row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
|
|
if not row or not row["raw_data"]:
|
|
return None
|
|
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
|
|
|
|
|
async def select_run_sources(analysis_run_id: str) -> list[dict]:
|
|
return await fetchall(
|
|
"SELECT ri.info_id, rs.source_type, rs.url"
|
|
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
|
" WHERE ri.analysis_run_id = %s",
|
|
(analysis_run_id,),
|
|
)
|
|
|
|
|
|
async def select_run_raw_data(analysis_run_id: str) -> dict:
|
|
# language='EN' 인 row 는 dict key 를 "<source_type>_en" 으로 합성 (KR/EN 동시 수집 시 키 충돌 방지).
|
|
rows = await fetchall(
|
|
"SELECT rs.source_type, rs.language, ri.raw_data"
|
|
" FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
|
" WHERE ri.analysis_run_id = %s",
|
|
(analysis_run_id,),
|
|
)
|
|
result: dict = {}
|
|
for row in rows:
|
|
raw = row["raw_data"]
|
|
key = row["source_type"]
|
|
if (row.get("language") or "").upper() == "EN":
|
|
key = f"{key}_en"
|
|
result[key] = json.loads(raw) if isinstance(raw, str) else raw
|
|
return result
|
|
|
|
|
|
async def select_run_source_raw(
|
|
analysis_run_id: str, source_type: str, language: str | None = None,
|
|
) -> dict | None:
|
|
sql = (
|
|
"SELECT ri.raw_data FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
|
" WHERE ri.analysis_run_id = %s AND rs.source_type = %s"
|
|
)
|
|
args: tuple = (analysis_run_id, source_type)
|
|
if language:
|
|
sql += " AND rs.language = %s"
|
|
args = (*args, language)
|
|
sql += " LIMIT 1"
|
|
row = await fetchone(sql, args)
|
|
if not row or not row["raw_data"]:
|
|
return None
|
|
return json.loads(row["raw_data"]) if isinstance(row["raw_data"], str) else row["raw_data"]
|
|
|
|
|
|
async def update_raw_info_merge(info_id: int, patch: dict) -> None:
|
|
"""raw_info.raw_data 를 read-modify-write 로 top-level 머지.
|
|
한 source 가 단계별로 (예: branding 의 brandAssets → channelLogos) 키를 덧붙일 때 사용."""
|
|
row = await fetchone("SELECT raw_data FROM raw_info WHERE info_id = %s", (info_id,))
|
|
if not row:
|
|
return
|
|
raw = row["raw_data"]
|
|
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
|
data.update(patch)
|
|
await execute(
|
|
"UPDATE raw_info SET raw_data = %s, status = 'done' WHERE info_id = %s",
|
|
(json.dumps(data, ensure_ascii=False), info_id),
|
|
)
|
|
|
|
|
|
async def select_source_by_type(
|
|
hospital_id: str, source_type: str, language: str | None = None,
|
|
) -> dict | None:
|
|
sql = "SELECT source_id, url FROM remote_source WHERE hospital_id = %s AND source_type = %s"
|
|
args: tuple = (hospital_id, source_type)
|
|
if language:
|
|
sql += " AND language = %s"
|
|
args = (*args, language)
|
|
sql += " LIMIT 1"
|
|
return await fetchone(sql, args)
|
|
|
|
|
|
async def select_run_mainpage_url(analysis_run_id: str) -> str:
|
|
row = await fetchone(
|
|
"SELECT rs.url FROM raw_info ri JOIN remote_source rs USING (source_id)"
|
|
" WHERE ri.analysis_run_id = %s AND rs.source_type = 'mainpage'",
|
|
(analysis_run_id,),
|
|
)
|
|
return (row or {}).get("url") or ""
|