Source code for torx.xarray_helpers_m

"""Helper routines for xarray DataArrays with units and name attributes."""
import numpy as np
import xarray as xr
import torx
import pint
import warnings
from typing import Union, Sequence
from scipy.sparse import csr_matrix

from torx import Quantity, serialize_Quantity, deserialize_Quantity
from torx.autodoc_decorators_m import autodoc_function


[docs]
@autodoc_function
def make_xarray(
    input_array, *, norm: Quantity = Quantity(1, ""), name: str = "", **kwargs
):
    """
    Shorthand initialization for xarray.DataArray.

    'norm' is the normalization factor which should be applied to get to
        physical units. For quantities with actual physical
        units, use Quantity(1, units) as the norm
    'name' is used for plotting
    'coords' should be a dictionary, where the key gives the dimension
        name and the values give the coordinates
    'attrs' should be a dictionary, used to add other attributes to the
        DataArray

    Note that '' means dimensionless
    """
    if "attrs" not in kwargs:
        kwargs["attrs"] = {}

    kwargs["attrs"]["norm"] = norm

    if name:
        kwargs["name"] = name

    return xr.DataArray(input_array, **kwargs)



[docs]
@autodoc_function
def serialize_norm_attrs(
    data: Union[xr.DataArray, xr.Dataset]
) -> Union[xr.DataArray, xr.Dataset]:
    """
    Serialize all norm attributes.

    Handles all norm attributes present in a DataArray or in all DataArrays
    within a Dataset. This means that a conversion of the Quantity type to a
    datatype that can be stored in a file is performed.
    """
    if isinstance(data, xr.Dataset):
        data_arrays = list(data.values())
    elif isinstance(data, xr.DataArray):
        data_arrays = [data]
    else:
        raise RuntimeError("The data object needs to be either an " + \
                           "xarray.Dataset or xarray.DataArray!")

    for coord in getattr(data, "coords", []):
        if isinstance(data[coord], xr.DataArray):
            data_arrays.append(data[coord])

    for da in data_arrays:
        if "norm_value" in da.attrs and "norm_unit" in da.attrs:
            warnings.warn(
                f"Norm object of {da.name} is already serialized!",
                RuntimeWarning
            )
            da.attrs["norm"] = f'{float(da.attrs["norm_value"])}'\
                             + f'{str(da.attrs["norm_unit"])}'
            continue

        if "norm" not in da.attrs:
            continue

        val = da.attrs["norm"]

        if isinstance(val, torx.Quantity):
            value = getattr(val, "magnitude", getattr(val, "value", None))
            unit = getattr(val, "units", getattr(val, "unit", None))
            # Since val is a Torx Quantity, it can never come back as None
            da.attrs["norm_value"] = float(value)
            da.attrs["norm_unit"] = str(unit)
            da.attrs["norm"] = f'{float(value)} {str(unit)}'
            continue

        if isinstance(val, pint.Quantity):
            raise RuntimeError(
                f"Norm object of {da.name} is of type pint.Quantity \
                    but should be torx.Quantity!"
                )

        raise RuntimeError(
            f"Norm object of {da.name} is not of type torx.Quantity!"
        )

    return data



[docs]
@autodoc_function
def deserialize_norm_attrs(
    data: Union[xr.DataArray, xr.Dataset]
) -> Union[xr.DataArray, xr.Dataset]:
    """
    Deserialize all norm attributes.

    Handles all norm attributes present in a DataArray or in all DataArrays
    within a Dataset. Reverts what is performed by the matching serialize
    operation.
    """
    if isinstance(data, xr.Dataset):
        data_arrays = list(data.values())
    elif isinstance(data, xr.DataArray):
        data_arrays = [data]
    else:
        raise RuntimeError("The data object needs to be either an " + \
                           "xarray.Dataset or xarray.DataArray!")

    for coord in getattr(data, "coords", []):
        if isinstance(data[coord], xr.DataArray):
            data_arrays.append(data[coord])

    for da in data_arrays:
        if "norm_value" in da.attrs and "norm_unit" in da.attrs:
            da.attrs["norm"] = torx.Quantity(
                da.attrs["norm_value"],
                da.attrs["norm_unit"]
            )
            del da.attrs["norm_value"]
            del da.attrs["norm_unit"]
            continue

        if "norm" not in da.attrs:
            continue

        if isinstance(da.attrs["norm"], torx.Quantity):
            warnings.warn(
                f"Norm object of {da.name} is already a torx.Quantity!",
                RuntimeWarning
            )
            continue

        # Legacy fallback for norm as string
        val = da.attrs["norm"]
        if isinstance(val, str):
            parts = val.strip().split(maxsplit=1)
            if len(parts) == 2:
                value = float(parts[0])
                unit = parts[1]
                da.attrs["norm"] = torx.Quantity(value, unit)
            else:
                raise ValueError(
                    f"Cannot split norm into value/unit from '{val}'"
                )
        else:
            # Let deserialize_Quantity handle any other cases
            da.attrs["norm"] = deserialize_Quantity(val)

    return data


def dense_matrix_from_csr(filepath, group: str="", dims: list=None):
    """
    Convert a PARALLAX CSR matrix to dense matrix.

    Returns as xr.DataArray.
    Appropriate dimension names can be provided via the 'dims' keyword argument.
    """
    csr_dataset = xr.load_dataset(filepath, group=group)

    # Extracting values for CSR matrix construction
    values = csr_dataset.val.values
    indices = csr_dataset.j.values - 1
    indptr = csr_dataset.i.values - 1
    shape = (csr_dataset.ndim, csr_dataset.ncol)

    dense_matrix = csr_matrix((values, indices, indptr), shape=shape).toarray()

    return xr.DataArray(dense_matrix, dims=dims)


[docs]
def combine_obj_list(object_list:Sequence[object], attr_name: str,
                     dimension: str, coord: np.ndarray) -> xr.DataArray:
    """
    Align specific attribute along a given xarray dim.

    The objective of this method is to align a specific attribute (attr_name)
    from all contained objects in object_list along a given xarray dim (dimension)
    with assigned coordinates (coord). It is a generalization of xr.align.
    It gives coordinates to dimensions which do not have any coordinates defined.
    This allows xr.align to fill missing values with NaNs where necessary.

    Useful when the individual DataArrays of a given class have different shapes
    and/or sizes along some dimension without a coordinate.
    This happens often when dealing with 3D equilibria
    as each plane will have a different number of grid points.

    NOTE: The dims without coordinates are assigned to indices
    so that the arrays can be filled with NaNs to match the
    largest array and be aligned.
    """
    # Collect and preprocess individual DataArrays
    individual_arrays = []

    for obj in object_list:
        try:
            da = getattr(obj, attr_name)
            coords_to_assign = {}

            # Find dimensions without coordinates
            dims_without_coords = [dim for dim in da.dims if dim not in da.coords]

            for dim in dims_without_coords:
                # Create a sequential integer index (0, 1, 2, ...)
                coords_to_assign[dim] = np.arange(da.sizes[dim])

            # Assign default indices before alignment
            da_indexed = da.assign_coords(**coords_to_assign)
            individual_arrays.append(da_indexed)

        except AttributeError:
            raise AttributeError(f"Object {obj} does not have the attribute '{attr_name}'.")

    if not individual_arrays:
        raise ValueError(f"No DataArrays found for attribute '{attr_name}' to combine.")

    # Align the arrays
    # All dimensions now have coordinates (either existing ones or the new default index),
    # so xr.align can use join='outer'.
    aligned_arrays = xr.align(*individual_arrays, join='outer', fill_value=np.nan)
    # Concatenate the aligned arrays along a new 'phi' dimension
    concatenated_array = xr.concat(
        aligned_arrays,
        dim=dimension
    ).assign_coords({dimension: coord})

    return concatenated_array