# src/app/models.py
"""
Declarative SQLAlchemy ORM models used across the application.
This module defines the core database entities for the platform: coordinates
for data collection, time-stamped weather observations, a singleton table for
training status, and an append-only log of ML training runs. These models are
the canonical schema definition for the application and are used by both the
FastAPI backend and the standalone Slurm-executed training jobs for reading
and writing domain data.
See Also
--------
app.database : Engine/session factories and declarative ``Base``.
app.schemas : Pydantic schemas mirroring these ORM models.
app.ml_utils : Read-only helpers that query ``TrainingLog``.
app.ml_train : Slurm-run training job that writes ``TrainingLog`` entries.
app.coordinates_manager : Utilities for seeding coordinate grids.
app.imputation : Imputation routines operating on ``WeatherObservation``.
Notes
-----
- Primary role: provide ORM mappings for coordinates, observations,
training status, and training logs bound to :class:`app.database.Base`.
- Key dependencies: a configured SQLAlchemy engine via
:data:`app.database.engine` and corresponding session factories. The schema
is created via :func:`app.database.ensure_database_schema`.
- Invariants: ``WeatherObservation`` uses a composite primary key
``(timestamp, latitude, longitude)``. ``TrainingStatus`` is treated as a
singleton with primary key ``id=1``. ``TrainingLog.horizon`` is a
non-null string key identifying coordinate+horizon groupings.
Examples
--------
>>> # Basic query pattern (requires an initialized DB) # doctest: +SKIP
>>> from app.database import SessionLocal, ensure_database_schema
>>> from app.models import Coordinate, TrainingLog
>>> ensure_database_schema() # doctest: +SKIP
>>> with SessionLocal() as session: # doctest: +SKIP
... count = session.query(Coordinate).count() # doctest: +SKIP
... latest = (session.query(TrainingLog)
... .order_by(TrainingLog.timestamp.desc())
... .first()) # doctest: +SKIP
"""
import logging
import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy import Boolean, DateTime, Float, Integer, String
from sqlalchemy.orm import Mapped, mapped_column
from .database import Base
logger = logging.getLogger(__name__)
[docs]
class TrainingStatus(Base):
"""Snapshot of the current ML training state.
This singleton table reflects whether a training job is running, when the
last successful training completed, and which horizon is currently being
processed. The application typically ensures that there is exactly one
row with ``id=1`` and updates it transactionally during training flows.
Attributes
----------
id : int
Primary key. Conventionally set to ``1`` to model a singleton row.
is_training : bool
Flag indicating whether a training job is currently running.
last_trained_at : datetime | None
Timestamp of the last completed training job in UTC, if any.
train_count : int
Monotonic counter of completed training runs.
current_horizon : str | None
Human-readable horizon label (e.g., ``"5min"``) or status message.
"""
__tablename__ = "training_status"
id: Mapped[int] = mapped_column(Integer, primary_key=True, default=1)
is_training: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
last_trained_at: Mapped[Optional[datetime]] = mapped_column(
DateTime, default=None, nullable=True
)
train_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
current_horizon: Mapped[Optional[str]] = mapped_column(String, nullable=True)
[docs]
class Coordinate(Base):
"""Geographic point used for weather data collection.
Attributes
----------
id : int
Surrogate primary key.
latitude : float
Coordinate latitude in decimal degrees.
longitude : float
Coordinate longitude in decimal degrees.
label : str | None
Optional human-readable label for the coordinate.
is_central : bool
Marks the central coordinate used as a reference point.
Notes
-----
- No uniqueness constraint is enforced at the ORM level for
``(latitude, longitude)``; duplicates are possible unless prevented by
a database constraint or application logic.
"""
__tablename__ = "coordinates"
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
latitude: Mapped[float] = mapped_column(Float, nullable=False)
longitude: Mapped[float] = mapped_column(Float, nullable=False)
label: Mapped[Optional[str]] = mapped_column(String, nullable=True)
is_central: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
[docs]
class TrainingLog(Base):
"""Append-only log of ML training runs and scores.
Each row represents one training execution for a given horizon and (optionally)
a specific coordinate. Scores from both a Scikit-learn model and a PyTorch
model are recorded, along with the number of data points used.
Attributes
----------
id : str
Primary key string. The training pipeline typically assigns a
``uuid4`` string explicitly when inserting a row. The default defined
here is a string constant created at import time and should be
overridden by callers to avoid collisions.
timestamp : datetime
Completion time of the training run in UTC.
horizon : str
Non-empty key identifying the grouping (often
``"<coord>_<horizon_label>"``).
sklearn_score : float
R^2 score from the Scikit-learn model.
pytorch_score : float
R^2 score from the PyTorch model.
data_count : int
Number of samples used for training/evaluation for this run.
coord_latitude : float | None
Coordinate latitude associated with the run, if available.
coord_longitude : float | None
Coordinate longitude associated with the run, if available.
horizon_label : str | None
Human-friendly label for the horizon (e.g., ``"5min"``, ``"1h"``).
"""
__tablename__ = "training_logs"
id: Mapped[str] = mapped_column(
String, primary_key=True, default=str(uuid.uuid4()), index=True
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, nullable=False
)
horizon: Mapped[str] = mapped_column(String, nullable=False)
sklearn_score: Mapped[float] = mapped_column(Float, nullable=False)
pytorch_score: Mapped[float] = mapped_column(Float, nullable=False)
data_count: Mapped[int] = mapped_column(Integer, nullable=False)
coord_latitude: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
coord_longitude: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
horizon_label: Mapped[Optional[str]] = mapped_column(String, nullable=True)
[docs]
class WeatherObservation(Base):
"""Nowcast weather values for a specific time and location.
This table mirrors the structure of MET Norway Nowcast 2.0 fields consumed
by the application. The composite primary key ``(timestamp, latitude,
longitude)`` uniquely identifies each observation in time and space.
Attributes
----------
timestamp : datetime
Observation timestamp in UTC (part of the composite primary key).
latitude : float
Coordinate latitude in decimal degrees (part of the primary key).
longitude : float
Coordinate longitude in decimal degrees (part of the primary key).
air_temperature : float | None
Air temperature in degrees Celsius.
wind_speed : float | None
Wind speed in meters per second.
wind_direction : float | None
Wind direction in degrees (from which the wind is coming).
cloud_area_fraction : float | None
Fraction of the sky covered by clouds (0–1).
precipitation_amount : float | None
Precipitation amount in millimeters for the interval.
is_imputed : bool
Whether this record was imputed by preprocessing routines.
"""
__tablename__ = "weather_observations"
timestamp: Mapped[datetime] = mapped_column(
DateTime, primary_key=True, nullable=False
)
latitude: Mapped[float] = mapped_column(Float, primary_key=True, nullable=False)
longitude: Mapped[float] = mapped_column(Float, primary_key=True, nullable=False)
air_temperature: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
wind_speed: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
wind_direction: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
cloud_area_fraction: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
precipitation_amount: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
is_imputed: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)