LIBRE / scripts /seed_database.py
RyZ
feat: adding full working local ETL Pipeline
e391a84
"""
scripts/seed_database.py
─────────────────────────
Seed the database with test data (PPG signals + BP predictions).
Usage:
python scripts/seed_database.py
python scripts/seed_database.py --users 3 --signals-per-user 5
Useful for:
β€’ Getting started quickly with a populated database
β€’ Manual testing of the frontend without sending real PPG signals
β€’ Demo purposes
"""
from __future__ import annotations
import argparse
import asyncio
import random
import uuid
from datetime import datetime, timedelta, timezone
from src.infrastructure.database.connection import create_all_tables, dispose_engine, get_session_factory
from src.infrastructure.database.repositories.ppg_repository import SQLAlchemyPPGRepository
from src.infrastructure.database.repositories.prediction_repository import SQLAlchemyPredictionRepository
from src.infrastructure.model.mock_model_service import MockModelService
from src.infrastructure.processing.scipy_signal_processor import ScipySignalProcessor
from src.domain.entities.ppg_signal import PPGSignal
from src.shared.logger import get_logger
logger = get_logger(__name__)
def _random_ppg(sampling_rate: float = 125.0, duration: float = 10.0) -> list[float]:
"""Generate random-ish PPG values."""
import math
n = int(sampling_rate * duration)
hr = random.uniform(60, 90) / 60.0 # heart rate in Hz
return [
math.sin(2 * math.pi * hr * i / sampling_rate) + random.gauss(0, 0.05)
for i in range(n)
]
async def seed(num_users: int, signals_per_user: int) -> None:
logger.info("Creating DB tables…")
await create_all_tables()
session_factory = get_session_factory()
processor = ScipySignalProcessor()
model_service = MockModelService()
await model_service.load_model()
total_signals = 0
total_predictions = 0
async with session_factory() as session:
ppg_repo = SQLAlchemyPPGRepository(session)
prediction_repo = SQLAlchemyPredictionRepository(session)
for u in range(1, num_users + 1):
user_id = f"seed-user-{u:03d}"
device_id = f"seed-device-{u:03d}"
for s in range(1, signals_per_user + 1):
# Spread timestamps over the last 30 days
days_ago = random.uniform(0, 30)
ts = datetime.now(timezone.utc) - timedelta(days=days_ago)
ppg_signal = PPGSignal(
id=str(uuid.uuid4()),
device_id=device_id,
user_id=user_id,
sampling_rate=125.0,
ppg_values=_random_ppg(),
duration_seconds=10.0,
timestamp=ts,
)
stored = await ppg_repo.add(ppg_signal)
total_signals += 1
# Generate a prediction for this signal
try:
segments = processor.process(stored.ppg_values, stored.sampling_rate)
if segments.shape[0] > 0:
prediction = await model_service.predict(stored.id, segments)
await prediction_repo.add(prediction)
total_predictions += 1
except Exception as exc:
logger.warning("Could not generate prediction for %s: %s", stored.id, exc)
await session.commit()
logger.info(
"Seeding complete: %d users, %d signals, %d predictions.",
num_users,
total_signals,
total_predictions,
)
await dispose_engine()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Seed BP Monitoring Database")
parser.add_argument("--users", type=int, default=3, help="Number of test users")
parser.add_argument("--signals-per-user", type=int, default=5, help="Signals per user")
args = parser.parse_args()
asyncio.run(seed(args.users, args.signals_per_user))