Source code for edges.averaging.utils
"""Utility functions and classes for averaging operations."""
from enum import Enum
import numpy as np
from pygsdata import GSData
[docs]
class NsamplesStrategy(Enum):
"""An enumeration of strategies for computing Nsamples when combining data.
Note that generally the strategy can influence *two* components of the calculation:
firstly it influences how the data is _weighted_ when it is being averaged, and
secondly, it influences what the final number of samples are (i.e. the effective
variance of the data). Generally, these align, but some options specifically choose
different conventions for each of these choices.
Options
-------
FLAGGED_NSAMPLES
Combine the underlying Nsamples, setting any flagged data to have zero samples.
FLAGS_ONLY
Count each datum as one sample, but only if it is not flagged.
FLAGGED_NSAMPLES_UNIFORM
Give each data that is both unflagged and has at least one sample a weight of
unity in an average/model, but propagate nsamples using the full flagged
nsamples.
NSAMPLES_ONLY
Only consider the Nsamples of the underlying data, not any flags.
"""
FLAGGED_NSAMPLES = 0
FLAGS_ONLY = 1
FLAGGED_NSAMPLES_UNIFORM = 2
NSAMPLES_ONLY = 3
UNFLAGGED_UNIFORM = 4
[docs]
def get_weights_from_strategy(
data: GSData, strategy: NsamplesStrategy
) -> tuple[np.ndarray, np.ndarray]:
"""Compute weights and nsamples used for a particular strategy."""
nans = np.isnan(data.data)
# n is the nsamples that is propagated through to compute the variance.
# for now, we always propagate the true "flagged_nsamples" for simplicity.
# In the future, it may be better to adjust it for each strategy such that
# the resulting summed nsamples is indicative of the variance of the average,
# taking into account which weights are being used. This is a little complicated,
# because it depends on assumptions about the distribution of the data.
n = data.flagged_nsamples
if strategy == NsamplesStrategy.FLAGGED_NSAMPLES:
w = data.flagged_nsamples
elif strategy == NsamplesStrategy.FLAGS_ONLY:
w = (~data.complete_flags).astype(float)
elif strategy == NsamplesStrategy.FLAGGED_NSAMPLES_UNIFORM:
w = (data.flagged_nsamples > 0).astype(float)
elif strategy == NsamplesStrategy.NSAMPLES_ONLY:
w = data.nsamples
elif strategy == NsamplesStrategy.UNFLAGGED_UNIFORM:
w = np.ones_like(data.data)
else:
raise ValueError(
f"Invalid nsamples_strategy: {strategy}. "
f"Must be a member of {NsamplesStrategy}"
)
return np.where(nans, 0, w), np.where(nans, 0, n)