utils.py#

"""
Utility helpers for logging, time formatting, and startup checks.

This module centralizes a few cross-cutting utilities used throughout the
application: persistent log configuration, simple conversion/formatting of
timestamps to a Sweden-specific display timezone, and a startup helper that
seeds or validates central coordinates based on configuration. Keeping these
helpers in one place reduces duplication across the FastAPI app and background
jobs.

See Also
--------
app.config : Central configuration (``settings.my_lat``, ``settings.my_long``).
app.database : Database session factory (``SessionLocal``).
app.coordinates_manager.seed_coordinates_if_needed : Coordinate seeding logic.
app.ml_utils.TrainingLogDetails : Typed shape consumed by ``build_status_data``.

Notes
-----
- Primary role: provide persistent logging configuration; convert and format
  timestamps for the UI; perform idempotent coordinate seeding/validation at
  process startup.
- Key dependencies: ``app.config.settings`` for optional central latitude and
  longitude; ``app.database.SessionLocal`` for DB access; and
  ``app.coordinates_manager.seed_coordinates_if_needed`` for the seeding logic.
- Invariants: log files are rotated daily and written to the path in
  ``FASTAPI_LOG_PATH`` (default ``/data/logs/fastapi.log``). Timezone handling
  uses a fixed UTC+2 offset and does not account for daylight saving time.

Examples
--------
>>> # Convert a UTC timestamp to the Sweden display timezone
>>> from datetime import datetime, timezone
>>> from app.utils import to_sweden_time, format_sweden_time
>>> dt = datetime(2024, 1, 1, 12, 0, tzinfo=timezone.utc)
>>> to_sweden_time(dt).utcoffset().total_seconds() == 2 * 3600
True
>>> format_sweden_time(dt)
'2024-01-01 14:00:00'
"""

import logging
import os
from datetime import datetime, timedelta, timezone
from logging.handlers import TimedRotatingFileHandler
from typing import Any, Dict, Mapping, Optional, Tuple

from .config import settings
from .coordinates_manager import seed_coordinates_if_needed
from .database import SessionLocal
from .ml_utils import TrainingLogDetails

logger = logging.getLogger(__name__)

LOG_FILE_PATH = os.getenv("FASTAPI_LOG_PATH", "/data/logs/fastapi.log")
LOG_ROTATION_WHEN = "midnight"
LOG_ROTATION_INTERVAL = 1
LOG_ROTATION_BACKUP_COUNT = 7
LOG_LEVEL = logging.INFO
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"


def configure_persistent_logging() -> None:
    """Configure persistent, daily‑rotated logging to a file.

    Sets up a ``TimedRotatingFileHandler`` writing to ``FASTAPI_LOG_PATH``
    (default ``/data/logs/fastapi.log``), rotating at midnight, keeping a
    bounded number of backups, and applying a consistent formatter. If a
    matching handler is already installed on the root logger, this function is
    a no‑op to avoid duplicate log lines.

    Returns
    -------
    None
        This function mutates the global logging configuration and does not
        return a value.

    Notes
    -----
    - The default stream handler is removed to prevent duplicate logs if both
      handlers are present. Adjust in callers if both sinks are desired.
    - This helper touches the filesystem to create the log directory.

    Examples
    --------
    >>> # Configures a rotating file handler on the root logger  # doctest: +SKIP
    >>> from app.utils import configure_persistent_logging        # doctest: +SKIP
    >>> configure_persistent_logging()                            # doctest: +SKIP
    """
    root_logger = logging.getLogger()
    # Avoid duplicate handlers if already configured
    if any(
        isinstance(handler, TimedRotatingFileHandler)
        and getattr(handler, "baseFilename", None) == os.path.abspath(LOG_FILE_PATH)
        for handler in root_logger.handlers
    ):
        return

    try:
        os.makedirs(os.path.dirname(LOG_FILE_PATH), exist_ok=True)
    except PermissionError:
        logger.warning(
            "Permission denied creating log directory %s; skipping persistent logging",
            os.path.dirname(LOG_FILE_PATH),
        )
        return
    handler = TimedRotatingFileHandler(
        LOG_FILE_PATH,
        when=LOG_ROTATION_WHEN,
        interval=LOG_ROTATION_INTERVAL,
        backupCount=LOG_ROTATION_BACKUP_COUNT,
        encoding="utf-8",
        delay=True,
        utc=True,
    )
    formatter = logging.Formatter(LOG_FORMAT)
    handler.setFormatter(formatter)
    handler.setLevel(LOG_LEVEL)
    root_logger.addHandler(handler)
    root_logger.setLevel(LOG_LEVEL)
    # Remove default StreamHandler if present to avoid duplicate output
    for h in list(root_logger.handlers):
        if isinstance(h, logging.StreamHandler) and not isinstance(
            h, TimedRotatingFileHandler
        ):
            root_logger.removeHandler(h)


# Sweden display timezone (fixed UTC+2; does not track daylight saving time)
SWEDEN_TZ = timezone(timedelta(hours=2))
# Format for date-time display in Swedish local time
DATE_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"


def to_sweden_time(utc_dt: datetime) -> datetime:
    """Convert a UTC timestamp to the Sweden display timezone (UTC+2).

    Naive datetimes are interpreted as UTC. This simplified conversion uses a
    fixed UTC+2 offset and intentionally does not account for daylight saving
    time transitions.

    Parameters
    ----------
    utc_dt : datetime
        A timezone‑aware UTC ``datetime`` or a naive ``datetime`` interpreted
        as UTC.

    Returns
    -------
    datetime
        A ``datetime`` converted to the fixed Sweden timezone (UTC+2).

    Raises
    ------
    AssertionError
        If ``utc_dt`` is not a ``datetime`` instance.

    Examples
    --------
    >>> from datetime import datetime, timezone
    >>> from app.utils import to_sweden_time
    >>> dt = datetime(2024, 1, 1, 12, 0, tzinfo=timezone.utc)
    >>> to_sweden_time(dt).strftime('%Y-%m-%d %H:%M:%S')
    '2024-01-01 14:00:00'
    """
    assert isinstance(utc_dt, datetime), f"utc_dt must be datetime, got {type(utc_dt)}"
    if utc_dt.tzinfo is None:
        # Assume naive datetimes are in UTC
        utc_dt = utc_dt.replace(tzinfo=timezone.utc)
    return utc_dt.astimezone(SWEDEN_TZ)


def format_sweden_time(utc_dt: datetime) -> str:
    """Format a UTC timestamp as a Sweden‑timezone display string.

    Parameters
    ----------
    utc_dt : datetime
        A timezone‑aware UTC ``datetime`` or a naive ``datetime`` interpreted
        as UTC.

    Returns
    -------
    str
        A formatted ``YYYY-MM-DD HH:MM:SS`` string in the fixed Sweden
        timezone.

    Examples
    --------
    >>> from datetime import datetime, timezone
    >>> from app.utils import format_sweden_time
    >>> format_sweden_time(datetime(2024, 1, 1, 12, 0, tzinfo=timezone.utc))
    '2024-01-01 14:00:00'
    """
    sweden_dt = to_sweden_time(utc_dt)
    return sweden_dt.strftime(DATE_TIME_FORMAT)


def format_sweden_time_iso(utc_dt: datetime) -> str:
    """Format a UTC timestamp as an ISO 8601 string in Sweden timezone.

    Parameters
    ----------
    utc_dt : datetime
        A timezone‑aware UTC ``datetime`` or a naive ``datetime`` interpreted
        as UTC.

    Returns
    -------
    str
        An ISO 8601 formatted string in the fixed Sweden timezone.

    Examples
    --------
    >>> from datetime import datetime, timezone
    >>> from app.utils import format_sweden_time_iso
    >>> s = format_sweden_time_iso(datetime(2024, 1, 1, 12, 0, tzinfo=timezone.utc))
    >>> s.endswith('+02:00')
    True
    """
    sweden_dt = to_sweden_time(utc_dt)
    return sweden_dt.isoformat()


def _check_and_seed_coordinates(
    session: Any, central_latitude: float, central_longitude: float
) -> None:
    """Seed or validate the central coordinate using the provided session.

    Delegates to :func:`app.coordinates_manager.seed_coordinates_if_needed` and
    logs any raised exceptions with context so callers can continue operating
    without crashing during startup.

    Parameters
    ----------
    session : Any
        Database session used for coordinate operations.
    central_latitude : float
        Latitude from configuration.
    central_longitude : float
        Longitude from configuration.

    Returns
    -------
    None
        This helper only performs side effects (DB calls and logging).
    """
    try:
        seed_coordinates_if_needed(session, central_latitude, central_longitude)
    except RuntimeError as error:
        logger.error(
            f"CRITICAL: Central coordinate validation failed: {error}. "
            "The application might not function correctly with existing data. "
            "Consider backing up data and re-initializing coordinates if .env has changed."
        )
    except Exception as error:
        logger.error(
            f"Unexpected error during coordinate check/seeding: {error}",
            exc_info=True,
        )


def startup_coordinate_check() -> None:
    """Seed or verify coordinates on startup based on configuration.

    Reads ``settings.my_lat`` and ``settings.my_long`` to seed the coordinate
    grid if the database is empty or to validate the stored central coordinate
    otherwise. If either value is missing, the operation is skipped and a
    warning is logged.

    Returns
    -------
    None
        This function performs side effects (DB calls and logging) and does
        not return a value.

    See Also
    --------
    - app.config.settings: Source of ``my_lat`` and ``my_long``.
    - app.coordinates_manager.seed_coordinates_if_needed: Seeding/validation logic.
    """
    with SessionLocal() as session:
        central_latitude: Optional[float] = settings.my_lat
        central_longitude: Optional[float] = settings.my_long

        if central_latitude is None or central_longitude is None:
            logger.warning(
                "MY_LAT or MY_LONG not set in .env configuration. "
                "Automatic coordinate seeding/validation based on .env will be skipped. "
                "The application will rely on coordinates already present in the database."
            )
            return

        # Ensure configuration values are floats before seeding
        assert isinstance(
            central_latitude, float
        ), f"settings.my_lat must be float, got {type(central_latitude)}"
        assert isinstance(
            central_longitude, float
        ), f"settings.my_long must be float, got {type(central_longitude)}"

        _check_and_seed_coordinates(session, central_latitude, central_longitude)

    logger.info("Startup coordinate check process finished.")


def _build_status_entry(
    horizon_key: str, training_detail: TrainingLogDetails
) -> Tuple[str, Dict[str, Any]]:
    """Construct a display key and status dict for one training log.

    Parameters
    ----------
    horizon_key : str
        Unique identifier for the training horizon.
    training_detail : app.ml_utils.TrainingLogDetails
        Typed mapping with fields such as ``timestamp``, ``sklearn_score``,
        ``pytorch_score``, and ``horizon_display_name``.

    Returns
    -------
    tuple[str, dict[str, Any]]
        A pair ``(display_key, entry)`` where ``display_key`` is the human
        friendly name for charts/legends and ``entry`` contains keys
        ``last_trained_at``, ``sklearn_score``, ``pytorch_score``,
        ``data_count``, ``horizon_label``, and ``original_log_key``.
    """
    display_key = training_detail.get("horizon_display_name", horizon_key)
    timestamp = training_detail.get("timestamp")
    if isinstance(timestamp, datetime):
        last_trained_at = format_sweden_time(timestamp)
    else:
        last_trained_at = "Never"

    entry: Dict[str, Any] = {
        "last_trained_at": last_trained_at,
        "sklearn_score": f"{training_detail.get('sklearn_score', 0.0):.4f}",
        "pytorch_score": f"{training_detail.get('pytorch_score', 0.0):.4f}",
        "data_count": training_detail.get("data_count", 0),
        "horizon_label": training_detail.get("horizon_label", "N/A"),
        "original_log_key": horizon_key,
    }
    return display_key, entry


def build_status_data(
    logs: Mapping[str, TrainingLogDetails],
) -> Dict[str, Dict[str, Any]]:
    """Transform latest training logs into a rendering‑friendly mapping.

    Parameters
    ----------
    logs : Mapping[str, app.ml_utils.TrainingLogDetails]
        Mapping where each key is a horizon and each value describes the latest
        training result for that horizon.

    Returns
    -------
    dict[str, dict[str, Any]]
        Mapping from display keys to status dictionaries (see
        :func:`_build_status_entry`). Returns an empty mapping when ``logs`` is
        empty.

    Raises
    ------
    AssertionError
        If ``logs`` is not a ``Mapping``.

    Examples
    --------
    >>> from datetime import datetime, timezone
    >>> from app.utils import build_status_data
    >>> dt = datetime(2024, 1, 1, 12, 0, tzinfo=timezone.utc)
    >>> logs = {
    ...     'h1': {
    ...         'timestamp': dt,
    ...         'sklearn_score': 0.9,
    ...         'pytorch_score': 0.8,
    ...         'data_count': 42,
    ...         'coord_latitude': 59.3,
    ...         'coord_longitude': 18.1,
    ...         'horizon_label': '1h',
    ...         'horizon_display_name': 'Coord (59.30, 18.10) - Horizon: 1h',
    ...     }
    ... }
    >>> data = build_status_data(logs)
    >>> list(data.keys()) == ['Coord (59.30, 18.10) - Horizon: 1h']
    True
    """
    assert isinstance(
        logs, Mapping
    ), f"logs must be Mapping[str, TrainingLogDetails], got {type(logs)}"
    if not logs:
        return {}

    status_display: Dict[str, Dict[str, Any]] = {}
    for horizon_key, training_detail in logs.items():
        display_key, entry = _build_status_entry(horizon_key, training_detail)
        status_display[display_key] = entry

    return status_display