Source code for edges.averaging.combiners

"""Functions for combining multiple GSData files/objects."""

import logging
import warnings
from collections.abc import Sequence
from pathlib import Path
from typing import Any

import numpy as np
from pygsdata import GSData, gsregister

from .utils import NsamplesStrategy, get_weights_from_strategy

logger = logging.getLogger(__name__)



[docs]
@gsregister("gather")
def average_multiple_objects(
    *objs: Sequence[GSData],
    nsamples_strategy: NsamplesStrategy = NsamplesStrategy.FLAGGED_NSAMPLES,
    use_resids: bool | None = None,
) -> GSData:
    """Average multiple GSData objects together.

    In this function, each GSData object is expected to have the same data shape, so
    that each GSData object's data array can be directly summed together. This is most
    useful when each file represents a single night's worth of data, and the time axis
    represents different LSTs. However, the function is agnostic to the details
    of what each object represents.

    Parameters
    ----------
    objs : GSData
        The GSData objects to average together.
    nsamples_strategy : NsamplesStrategy
        The strategy to use when considering the weights with which to combine the
        objects. See the documentation for
        :class:`~edges.analysis.averaging.lstbin.NsamplesStrategy` for more information.
    use_resids : bool, optional
        If True, the residuals will be averaged and added to the average model. If
        False, the residuals will be ignored. If None, the residuals will be used if
        they are present in all objects, and otherwise ignored.

    Returns
    -------
    GSData
        The averaged GSData object.

    Raises
    ------
    ValueError
        If the objects do not all have the same shape, or if use_resids is True and one
        or more of the objects has no residuals.
    """
    if any(obj.data.shape != objs[0].data.shape for obj in objs[1:]):
        raise ValueError("All objects must have the same shape to average them.")

    if use_resids is None:
        use_resids = all(obj.residuals is not None for obj in objs)

    if use_resids and any(obj.residuals is None for obj in objs):
        raise ValueError("One or more of the input objects has no residuals.")

    weights, nsamples = [], []
    for obj in objs:
        w, n = get_weights_from_strategy(obj, nsamples_strategy)
        weights.append(w)
        nsamples.append(n)

    wtot = np.sum(weights, axis=0)
    ntot = np.sum(nsamples, axis=0)

    if use_resids:
        residuals = np.nansum(
            [obj.residuals * w for obj, w in zip(objs, weights, strict=False)], axis=0
        )

        residuals[wtot > 0] /= wtot[wtot > 0]
        tot_model = np.nansum([obj.model for obj in objs], axis=0)
        nobj = len(objs) - sum(np.all(np.isnan(obj.model), axis=3) for obj in objs)

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            tot_model /= nobj[..., None]

        final_data = tot_model + residuals
    else:
        final_data = np.nansum(
            [obj.data * w for obj, w in zip(objs, weights, strict=False)], axis=0
        )
        final_data[wtot > 0] /= wtot[wtot > 0]
        residuals = None

    return objs[0].update(
        data=final_data,
        residuals=residuals,
        nsamples=ntot,
        flags={},
    )




[docs]
def average_files_pairwise(*files: str | Path, **kwargs: Any) -> GSData:
    """Average multiple files together using their flagged weights.

    This has better memory management than lst_average, as it only ever reads two
    files at once.

    Parameters
    ----------
    files : str or Path
        The files to average together.
    **kwargs
        Additional keyword arguments to pass to the :func:`average_multiple_objects`
        function.

    Returns
    -------
    GSData
        The averaged GSData object.

    See Also
    --------
    average_multiple_objects : The function that actually does the averaging.
    """
    obj = GSData.from_file(files[0])

    for i, fl in enumerate(files[1:]):
        new = GSData.from_file(fl)
        try:
            obj = average_multiple_objects(obj, new, **kwargs)
        except ValueError as e:
            raise ValueError(f"{e!s}: File {i}") from e

    return obj