Source code for pycfast.datasets._sp_ast_diesel

"""SP_AST_Diesel_1p1 dataset loader."""

from __future__ import annotations

from importlib import resources
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd

if TYPE_CHECKING:
    from numpy.typing import NDArray

_INPUT_PARAMS = [
    "heat_of_combustion",
    "radiative_fraction",
    "soot_yield",
    "target_location_z",
]

_TARGET_NAME = "max_trgsurt"

_DESCR = """\
SP_AST_Diesel_1p1 Dataset
=========================

This dataset contains 5000 pre-computed CFAST simulation results
based on the ``SP_AST_Diesel_1p1.in`` validation model. Each sample
maps fire parameters to the maximum target surface temperature (TRGSURT).

**Base model**: SP_AST_Diesel_1p1.in

**Input features** (4):
  - ``heat_of_combustion`` (MJ/kg): Heat of combustion [5.0, 50.0]
  - ``radiative_fraction`` : Radiative fraction [0.1, 0.5]
  - ``soot_yield`` : Soot yield [0.01, 0.15]
  - ``target_location_z`` (m): Target device height above floor [1.45, 5.45]

**Target** (1):
  - ``max_trgsurt`` (°C): Maximum target surface temperature

**Sampling method**: Sobol quasi-random sequence (scrambled, seed=42)

**Number of samples**: 5000
"""

_DATA_FILE = "sp_ast_diesel_1p1.csv.gz"


[docs] def load_sp_ast_diesel_1p1( *, return_X_y: bool = False, as_frame: bool = True, ) -> ( pd.DataFrame | tuple[pd.DataFrame, pd.Series] | tuple[NDArray[np.floating], NDArray[np.floating]] ): """Load the SP_AST_Diesel_1p1 pre-computed dataset. Returns 5000 CFAST simulation results based on the ``SP_AST_Diesel_1p1.in`` validation model, mapping fire parameters (heat of combustion, radiative fraction, soot yield, target height) to maximum target surface temperature (TRGSURT). Parameters ---------- return_X_y : bool, default=False If ``True``, return ``(X, y)`` instead of a single DataFrame. ``X`` contains the 4 input features and ``y`` the target. as_frame : bool, default=True If ``True`` (default), return pandas objects. If ``False``, return numpy arrays (only effective when ``return_X_y=True``). Returns ------- data : pd.DataFrame or tuple - If ``return_X_y=False``: a DataFrame with all columns. Access ``data.attrs["DESCR"]`` for a human-readable description. - If ``return_X_y=True`` and ``as_frame=True``: ``(X, y)`` as ``(pd.DataFrame, pd.Series)``. - If ``return_X_y=True`` and ``as_frame=False``: ``(X, y)`` as ``(np.ndarray, np.ndarray)``. Examples -------- >>> from pycfast.datasets import load_sp_ast_diesel_1p1 >>> df = load_sp_ast_diesel_1p1() >>> df.shape (5000, 5) >>> X, y = load_sp_ast_diesel_1p1(return_X_y=True) >>> X.shape (5000, 4) """ data_dir = resources.files("pycfast.datasets") / "data" csv_path = data_dir / _DATA_FILE with resources.as_file(csv_path) as path: df = pd.read_csv(path) df.attrs["DESCR"] = _DESCR if not return_X_y: return df x_frame = df[_INPUT_PARAMS] y_series = df[_TARGET_NAME] if as_frame: return x_frame, y_series return x_frame.to_numpy(), y_series.to_numpy()