Source code for spey_pyhf.data

from typing import Optional, List, Tuple, Dict, Text, Union, Iterator

from dataclasses import dataclass
from abc import ABC, abstractmethod
import json, copy, os
import numpy as np

from spey.base import ModelConfig
from spey import ExpectationType

from . import manager, WorkspaceInterpreter


[docs]class Base(ABC): """Base class for pyhf data input""" def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple: """ Retreive pyhf workspace Args: expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and p-values to be computed. * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth (default). * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth. * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit prescription which means that the SM will be assumed to be the truth. Returns: ``Tuple``: workspace, model and data """ return None, None, None
[docs] @abstractmethod def config( self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None ) -> ModelConfig: r""" Model configuration. Args: allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` value will be allowed to be negative. poi_upper_bound (``float``, default ``40.0``): upper bound for parameter of interest, :math:`\mu`. Returns: ~spey.base.ModelConfig: Model configuration. Information regarding the position of POI in parameter list, suggested input and bounds. """
@property @abstractmethod def isAlive(self) -> bool: """Returns True if at least one bin has non-zero signal yield."""
[docs]@dataclass class SimpleModelData(Base): """ Dataclass for simple statistical model Args: signal_yields (``List[float]``): signal yields background_yields (``List[float]``): background yields data (``List[float]``): observations absolute_uncertainties (``List[float]``): absolute uncertainties on the background """ signal_yields: List[float] background_yields: List[float] data: List[float] absolute_uncertainties: List[float] def __post_init__(self): assert ( len(self.signal_yields) == len(self.background_yields) == len(self.data) == len(self.absolute_uncertainties) ), "Lenght of all input must be the same." def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple: """ Retreive pyhf workspace Args: expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and p-values to be computed. * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth (default). * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth. * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit prescription which means that the SM will be assumed to be the truth. Returns: ``Tuple``: workspace, model and data """ model = manager.pyhf.simplemodels.uncorrelated_background( self.signal_yields, self.background_yields, self.absolute_uncertainties ) data = ( self.background_yields if expected == ExpectationType.apriori else self.data ) + model.config.auxdata return None, model, data @property def isAlive(self) -> bool: """Returns True if at least one bin has non-zero signal yield.""" return any(x > 0 for x in self.signal_yields)
[docs] def config( self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None ) -> ModelConfig: r""" Model configuration. Args: allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` value will be allowed to be negative. poi_upper_bound (``float``, default ``40.0``): upper bound for parameter of interest, :math:`\mu`. Returns: ~spey.base.ModelConfig: Model configuration. Information regarding the position of POI in parameter list, suggested input and bounds. """ signal = np.array(self.signal_yields) nb = np.array(self.background_yields) minimum_poi = ( -np.min(np.true_divide(nb[signal != 0.0], signal[signal != 0.0])).astype( np.float32 ) if len(signal[signal != 0.0]) > 0 else -np.inf ) _, model, _ = self() bounds = model.config.suggested_bounds() bounds[model.config.poi_index] = ( max(minimum_poi, -10.0) if allow_negative_signal else 0.0, bounds[model.config.poi_index][1] if not poi_upper_bound else poi_upper_bound, ) return ModelConfig( poi_index=model.config.poi_index, minimum_poi=minimum_poi, suggested_init=model.config.suggested_init(), suggested_bounds=bounds, parameter_names=model.config.par_names, )
[docs]@dataclass class FullStatisticalModelData(Base): """ Data container for the full statistical model. Args: signal_patch (``List[Dict]``): Patch data for signal model. please see `this link <https://pyhf.readthedocs.io/en/v0.7.0/likelihood.html>`_ for details on the structure of the input. background_only_model (``Dict`` or ``Text``): This input expects background only data that describes the full statistical model for the background. It also accepts ``str`` input which indicates the full path to the background only ``JSON`` file. """ signal_patch: List[Dict] background_only_model: Union[Dict, Text] def __post_init__(self): if isinstance(self.background_only_model, str): if not os.path.isfile(self.background_only_model): raise FileNotFoundError(f"Can not find {self.background_only_model}") with open(self.background_only_model, "r", encoding="uft-8") as f: self.background_only_model = json.load(f) self.background_only_model_apriori = copy.deepcopy(self.background_only_model) interpreter = WorkspaceInterpreter(self.background_only_model) interpreter.add_patch(self.signal_patch) # set data as expected background events self.background_only_model_apriori["observations"] = [ {"name": name, "data": data} for name, data in interpreter.expected_background_yields.items() ] self.workspace_apriori = manager.pyhf.Workspace( self.background_only_model_apriori ) self.workspace = manager.pyhf.Workspace(self.background_only_model) min_ratio = [] for idc, channel in enumerate(self.background_only_model.get("channels", [])): current_signal = [] for sigch in self.signal_patch: if idc == int(sigch["path"].split("/")[2]): current_signal = np.array( sigch.get("value", {}).get("data", []), dtype=np.float32 ) break if len(current_signal) == 0: continue current_bkg = [] for ch in channel["samples"]: if len(current_bkg) == 0: current_bkg = np.zeros(shape=(len(ch["data"]),), dtype=np.float32) current_bkg += np.array(ch["data"], dtype=np.float32) min_ratio.append( np.min( np.true_divide( current_bkg[current_signal != 0.0], current_signal[current_signal != 0.0], ) ) if np.any(current_signal != 0.0) else np.inf ) self._minimum_poi = ( -np.min(min_ratio).astype(np.float32) if len(min_ratio) > 0 else -np.inf ) self._models = {"post": None, "pre": None} # Initialise config model = self()[1] self._config = { "poi_index": model.config.poi_index, "minimum_poi": self._minimum_poi, "suggested_init": model.config.suggested_init(), "suggested_bounds": model.config.suggested_bounds(), "parameter_names": model.config.par_names, "suggested_fixed": model.config.suggested_fixed(), } @property def channels(self) -> Iterator[Text]: """Return channel names""" return (ch["name"] for ch in self.background_only_model["channels"]) @property def channel_properties(self) -> Iterator[Tuple[int, Text, int]]: """Returns an iterator for channel index, name and number of bins""" for idx, channel in enumerate(self.channels): yield idx, channel, self.workspace.channel_nbins[channel] def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple: """ Retreive pyhf workspace Args: expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and p-values to be computed. * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth (default). * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth. * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit prescription which means that the SM will be assumed to be the truth. Returns: ``Tuple``: workspace, model and data """ if expected == ExpectationType.apriori: if self._models["pre"] is None: self._models["pre"] = self.workspace_apriori.model( patches=[self.signal_patch], modifier_settings={ "normsys": {"interpcode": "code4"}, "histosys": {"interpcode": "code4p"}, }, ) return ( self.workspace_apriori, self._models["pre"], self.workspace_apriori.data(self._models["pre"]), ) if self._models["post"] is None: self._models["post"] = self.workspace.model( patches=[self.signal_patch], modifier_settings={ "normsys": {"interpcode": "code4"}, "histosys": {"interpcode": "code4p"}, }, ) return ( self.workspace, self._models["post"], self.workspace.data(self._models["post"]), )
[docs] def config( self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None ) -> ModelConfig: r""" Model configuration. Args: allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` value will be allowed to be negative. poi_upper_bound (``float``, default ``40.0``): upper bound for parameter of interest, :math:`\mu`. Returns: ~spey.base.ModelConfig: Model configuration. Information regarding the position of POI in parameter list, suggested input and bounds. """ bounds = copy.deepcopy(self._config["suggested_bounds"]) bounds[self._config["poi_index"]] = ( max(self._minimum_poi, -10.0) if allow_negative_signal else 0.0, bounds[self._config["poi_index"]][1] if not poi_upper_bound else poi_upper_bound, ) return ModelConfig( poi_index=self._config["poi_index"], minimum_poi=self._minimum_poi, suggested_init=self._config["suggested_init"], suggested_bounds=bounds, parameter_names=self._config["parameter_names"], suggested_fixed=self._config["suggested_fixed"], )
@property def isAlive(self) -> bool: """Returns True if at least one bin has non-zero signal yield.""" for channel in ( ch for ch in self.signal_patch if ch.get("value", None) is not None ): if any(nsig > 0.0 for nsig in channel["value"].get("data", [])): return True return False