Source code for edges.averaging.combiners

"""Functions for combining multiple GSData files/objects."""

import logging
import warnings
from collections.abc import Sequence
from pathlib import Path
from typing import Any

import numpy as np
from pygsdata import GSData, gsregister

from .utils import NsamplesStrategy, get_weights_from_strategy

logger = logging.getLogger(__name__)


[docs] @gsregister("gather") def average_multiple_objects( *objs: Sequence[GSData], nsamples_strategy: NsamplesStrategy = NsamplesStrategy.FLAGGED_NSAMPLES, use_resids: bool | None = None, ) -> GSData: """Average multiple GSData objects together. In this function, each GSData object is expected to have the same data shape, so that each GSData object's data array can be directly summed together. This is most useful when each file represents a single night's worth of data, and the time axis represents different LSTs. However, the function is agnostic to the details of what each object represents. Parameters ---------- objs : GSData The GSData objects to average together. nsamples_strategy : NsamplesStrategy The strategy to use when considering the weights with which to combine the objects. See the documentation for :class:`~edges.analysis.averaging.lstbin.NsamplesStrategy` for more information. use_resids : bool, optional If True, the residuals will be averaged and added to the average model. If False, the residuals will be ignored. If None, the residuals will be used if they are present in all objects, and otherwise ignored. Returns ------- GSData The averaged GSData object. Raises ------ ValueError If the objects do not all have the same shape, or if use_resids is True and one or more of the objects has no residuals. """ if any(obj.data.shape != objs[0].data.shape for obj in objs[1:]): raise ValueError("All objects must have the same shape to average them.") if use_resids is None: use_resids = all(obj.residuals is not None for obj in objs) if use_resids and any(obj.residuals is None for obj in objs): raise ValueError("One or more of the input objects has no residuals.") weights, nsamples = [], [] for obj in objs: w, n = get_weights_from_strategy(obj, nsamples_strategy) weights.append(w) nsamples.append(n) wtot = np.sum(weights, axis=0) ntot = np.sum(nsamples, axis=0) if use_resids: residuals = np.nansum( [obj.residuals * w for obj, w in zip(objs, weights, strict=False)], axis=0 ) residuals[wtot > 0] /= wtot[wtot > 0] tot_model = np.nansum([obj.model for obj in objs], axis=0) nobj = len(objs) - sum(np.all(np.isnan(obj.model), axis=3) for obj in objs) with warnings.catch_warnings(): warnings.filterwarnings("ignore") tot_model /= nobj[..., None] final_data = tot_model + residuals else: final_data = np.nansum( [obj.data * w for obj, w in zip(objs, weights, strict=False)], axis=0 ) final_data[wtot > 0] /= wtot[wtot > 0] residuals = None return objs[0].update( data=final_data, residuals=residuals, nsamples=ntot, flags={}, )
[docs] def average_files_pairwise(*files: str | Path, **kwargs: Any) -> GSData: """Average multiple files together using their flagged weights. This has better memory management than lst_average, as it only ever reads two files at once. Parameters ---------- files : str or Path The files to average together. **kwargs Additional keyword arguments to pass to the :func:`average_multiple_objects` function. Returns ------- GSData The averaged GSData object. See Also -------- average_multiple_objects : The function that actually does the averaging. """ obj = GSData.from_file(files[0]) for i, fl in enumerate(files[1:]): new = GSData.from_file(fl) try: obj = average_multiple_objects(obj, new, **kwargs) except ValueError as e: raise ValueError(f"{e!s}: File {i}") from e return obj