Source code for app.schemas

"""
Pydantic models for request/response payloads and internal validation.

This module defines the data transfer objects (DTOs) used by the FastAPI
application and background jobs to validate inputs and serialize outputs.
Schemas mirror the structure of the SQLAlchemy ORM models where appropriate
and are designed to be safe for JSON encoding in API responses and logs.

See Also
--------
app.models : SQLAlchemy models mirrored by these schemas.
app.main : FastAPI endpoints that emit and consume these schemas.
app.database : Session/engine configuration used to produce ORM objects.

Notes
-----
- Primary role: provide Pydantic v2 models for API endpoints and service
  boundaries, enabling strict, well-typed validation and serialization.
- Key dependencies: relies on Pydantic v2. The ``from_attributes=True``
  configuration allows converting ORM objects (e.g., ``app.models``) into
  schemas via ``model_validate``.
- Invariants: field names intentionally match ORM attributes to simplify
  conversions. Timestamps are naive ``datetime`` objects assumed to be in UTC.

Examples
--------
>>> from datetime import datetime
>>> from app.schemas import CoordinateSchema, WeatherObservationSchema
>>> center = CoordinateSchema(latitude=59.33, longitude=18.06, label="STHLM")
>>> center.model_dump()["label"]
'STHLM'
>>> obs = WeatherObservationSchema(
...     timestamp=datetime(2024, 1, 1, 12, 0, 0),
...     latitude=59.33,
...     longitude=18.06,
...     air_temperature=2.3,
...     is_imputed=False,
... )
>>> isinstance(obs.timestamp, datetime)
True
"""

import logging
from datetime import datetime
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, ConfigDict, Field

logger = logging.getLogger(__name__)


[docs] class CoordinateSchema(BaseModel): """Geographic coordinate used for data collection and grouping. This schema represents a point on Earth expressed in decimal degrees. It mirrors :class:`app.models.Coordinate` and is commonly returned by the API for map/grid operations and stored as part of training metadata. Parameters ---------- id : int, optional Surrogate primary key if available; not required for requests. latitude : float Coordinate latitude in decimal degrees. longitude : float Coordinate longitude in decimal degrees. label : str, optional Optional human‑readable label for the coordinate. is_central : bool, optional Marks the central coordinate used as a reference point. Attributes ---------- id : int | None Primary key, when present. latitude : float Latitude in decimal degrees. longitude : float Longitude in decimal degrees. label : str | None Optional label for UI or logs. is_central : bool | None Whether this coordinate is the designated center. Examples -------- >>> CoordinateSchema(latitude=59.33, longitude=18.06).model_dump()["latitude"] 59.33 See Also -------- app.models.Coordinate : ORM model providing the database mapping. """ id: Optional[int] = Field(None, description="Primary key") latitude: float = Field(..., description="Latitude of the point") longitude: float = Field(..., description="Longitude of the point") label: Optional[str] = Field(None, description="Optional label for the point") is_central: Optional[bool] = Field( None, description="True if this is the central coordinate" ) model_config = ConfigDict(from_attributes=True)
[docs] class WeatherObservationSchema(BaseModel): """Point‑in‑time weather values at a geographic location. Mirrors :class:`app.models.WeatherObservation`. The composite identity of an observation is ``(timestamp, latitude, longitude)``. Optional fields may be missing in raw feeds but can be provided after imputation. Parameters ---------- timestamp : datetime Observation timestamp in UTC (naive ``datetime`` assumed to be UTC). latitude : float Coordinate latitude in decimal degrees. longitude : float Coordinate longitude in decimal degrees. air_temperature : float, optional Air temperature in degrees Celsius. wind_speed : float, optional Wind speed in meters per second. wind_direction : float, optional Wind direction in degrees (meteorological convention). cloud_area_fraction : float, optional Fraction of the sky covered by clouds (0–1). precipitation_amount : float, optional Precipitation amount in millimeters over the interval. is_imputed : bool Whether this record was generated by an imputation process. Attributes ---------- Same as Parameters. Examples -------- >>> WeatherObservationSchema( ... timestamp=datetime(2024, 1, 1, 0, 0, 0), ... latitude=59.33, ... longitude=18.06, ... is_imputed=False, ... ).model_dump()["is_imputed"] False See Also -------- app.models.WeatherObservation : ORM model with the canonical schema. """ timestamp: datetime latitude: float longitude: float air_temperature: Optional[float] = None wind_speed: Optional[float] = None wind_direction: Optional[float] = None cloud_area_fraction: Optional[float] = None precipitation_amount: Optional[float] = None is_imputed: bool model_config = ConfigDict(from_attributes=True)
[docs] class TrainingStatusSchema(BaseModel): """Snapshot of the current ML training state. Represents the single logical row tracked by the application to indicate whether a training job is running and metadata about recent runs. Parameters ---------- id : int Primary key, conventionally ``1`` for the singleton row. is_training : bool Flag indicating whether a training job is currently running. last_trained_at : datetime, optional Timestamp of the last completed training job in UTC. train_count : int Monotonic counter of completed training runs. current_horizon : str, optional Human‑readable horizon label (e.g., ``"5min"``) or status text. Attributes ---------- Same as Parameters. Examples -------- >>> TrainingStatusSchema(id=1, is_training=False, train_count=3).train_count 3 See Also -------- app.models.TrainingStatus : ORM model used by the backend. """ id: int is_training: bool last_trained_at: Optional[datetime] train_count: int current_horizon: Optional[str] model_config = ConfigDict(from_attributes=True)
[docs] class TrainingLogSchema(BaseModel): """Append‑only log entry with training scores and metadata. Mirrors :class:`app.models.TrainingLog`. Each entry captures model scores for a given horizon and, optionally, coordinate context. Parameters ---------- id : str Unique identifier (UUID string) for the run. timestamp : datetime Completion time of the training run in UTC. horizon : str Non‑empty key identifying the grouping (often ``"<coord>_<horizon_label>"``). sklearn_score : float R^2 score from the Scikit‑learn model. pytorch_score : float R^2 score from the PyTorch model. data_count : int Number of data points used for the run. coord_latitude : float, optional Coordinate latitude associated with the run. coord_longitude : float, optional Coordinate longitude associated with the run. horizon_label : str, optional Human‑friendly label for the horizon (e.g., ``"5min"``). Attributes ---------- Same as Parameters. Examples -------- >>> TrainingLogSchema( ... id="00000000-0000-0000-0000-000000000000", ... timestamp=datetime(2024, 1, 1, 12, 0, 0), ... horizon="59.33_18.06_5min", ... sklearn_score=0.92, ... pytorch_score=0.93, ... data_count=1000, ... ).model_dump()["horizon"] '59.33_18.06_5min' See Also -------- app.models.TrainingLog : ORM model persisted by the training jobs. """ id: str # Changed from int to str for UUID timestamp: datetime horizon: str # This is the combined key e.g. "latX_lonY_5min" sklearn_score: float pytorch_score: float data_count: int coord_latitude: Optional[float] = None coord_longitude: Optional[float] = None horizon_label: Optional[str] = None # This is the pure horizon label e.g. "5min" model_config = ConfigDict(from_attributes=True)
[docs] class CoordinateListResponse(BaseModel): """Response payload carrying a list of coordinates. Parameters ---------- coordinates : list[CoordinateSchema] Collection of coordinates to return to the client. Attributes ---------- coordinates : list[CoordinateSchema] The returned coordinates. Examples -------- >>> resp = CoordinateListResponse( ... coordinates=[CoordinateSchema(latitude=0, longitude=0)] ... ) >>> len(resp.coordinates) 1 """ coordinates: List[CoordinateSchema]
[docs] class PredictionDataResponse(BaseModel): """Historical model performance keyed by horizon identifier. Parameters ---------- history : dict[str, dict[str, Any]] Mapping from horizon key (e.g., ``"latX_lonY_5min"``) to arbitrary value dictionaries suitable for charting or tabular display. Attributes ---------- history : dict[str, dict[str, Any]] The historical data structure for visualization. Examples -------- >>> payload = PredictionDataResponse(history={"h1": {"r2": 0.9}}) >>> float(payload.history["h1"]["r2"]) == 0.9 True """ history: Dict[ str, Dict[str, Any] ] # Key is horizon_log_name (e.g. "latX_lonY_5min")
[docs] class GenericStatusResponse(BaseModel): """Simple status envelope for human‑readable messages. Parameters ---------- status : str Machine‑friendly status value (e.g., ``"ok"``, ``"error"``). message : str, optional Human‑readable description or context for the status. Attributes ---------- Same as Parameters. Examples -------- >>> GenericStatusResponse(status="ok").model_dump()["status"] 'ok' """ status: str message: Optional[str] = None