Source code for spey_pyhf.data

from typing import Optional, List, Tuple, Dict, Text, Union, Iterator

from dataclasses import dataclass
from abc import ABC, abstractmethod
import json, copy, os
import numpy as np

from spey.base import ModelConfig
from spey import ExpectationType

from . import manager, WorkspaceInterpreter


[docs]class Base(ABC):
    """Base class for pyhf data input"""

    def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple:
        """
        Retreive pyhf workspace

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

        Returns:
            ``Tuple``:
            workspace, model and data
        """
        return None, None, None

[docs]    @abstractmethod
    def config(
        self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None
    ) -> ModelConfig:
        r"""
        Model configuration.

        Args:
            allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu`
              value will be allowed to be negative.
            poi_upper_bound (``float``, default ``40.0``): upper bound for parameter
              of interest, :math:`\mu`.

        Returns:
            ~spey.base.ModelConfig:
            Model configuration. Information regarding the position of POI in
            parameter list, suggested input and bounds.
        """

    @property
    @abstractmethod
    def isAlive(self) -> bool:
        """Returns True if at least one bin has non-zero signal yield."""


[docs]@dataclass
class SimpleModelData(Base):
    """
    Dataclass for simple statistical model

    Args:
        signal_yields (``List[float]``): signal yields
        background_yields (``List[float]``): background yields
        data (``List[float]``): observations
        absolute_uncertainties (``List[float]``): absolute uncertainties on the background
    """

    signal_yields: List[float]
    background_yields: List[float]
    data: List[float]
    absolute_uncertainties: List[float]

    def __post_init__(self):
        assert (
            len(self.signal_yields)
            == len(self.background_yields)
            == len(self.data)
            == len(self.absolute_uncertainties)
        ), "Lenght of all input must be the same."

    def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple:
        """
        Retreive pyhf workspace

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

        Returns:
            ``Tuple``:
            workspace, model and data
        """
        model = manager.pyhf.simplemodels.uncorrelated_background(
            self.signal_yields, self.background_yields, self.absolute_uncertainties
        )

        data = (
            self.background_yields if expected == ExpectationType.apriori else self.data
        ) + model.config.auxdata

        return None, model, data

    @property
    def isAlive(self) -> bool:
        """Returns True if at least one bin has non-zero signal yield."""
        return any(x > 0 for x in self.signal_yields)

[docs]    def config(
        self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None
    ) -> ModelConfig:
        r"""
        Model configuration.

        Args:
            allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu`
              value will be allowed to be negative.
            poi_upper_bound (``float``, default ``40.0``): upper bound for parameter
              of interest, :math:`\mu`.

        Returns:
            ~spey.base.ModelConfig:
            Model configuration. Information regarding the position of POI in
            parameter list, suggested input and bounds.
        """
        signal = np.array(self.signal_yields)
        nb = np.array(self.background_yields)
        minimum_poi = (
            -np.min(np.true_divide(nb[signal != 0.0], signal[signal != 0.0])).astype(
                np.float32
            )
            if len(signal[signal != 0.0]) > 0
            else -np.inf
        )

        _, model, _ = self()

        bounds = model.config.suggested_bounds()
        bounds[model.config.poi_index] = (
            max(minimum_poi, -10.0) if allow_negative_signal else 0.0,
            bounds[model.config.poi_index][1] if not poi_upper_bound else poi_upper_bound,
        )

        return ModelConfig(
            poi_index=model.config.poi_index,
            minimum_poi=minimum_poi,
            suggested_init=model.config.suggested_init(),
            suggested_bounds=bounds,
            parameter_names=model.config.par_names,
        )


[docs]@dataclass
class FullStatisticalModelData(Base):
    """
    Data container for the full statistical model.

    Args:
        signal_patch (``List[Dict]``): Patch data for signal model. please see
            `this link <https://pyhf.readthedocs.io/en/v0.7.0/likelihood.html>`_ for details on
            the structure of the input.
        background_only_model (``Dict`` or ``Text``): This input expects background only data
            that describes the full statistical model for the background. It also accepts ``str``
            input which indicates the full path to the background only ``JSON`` file.
    """

    signal_patch: List[Dict]
    background_only_model: Union[Dict, Text]

    def __post_init__(self):
        if isinstance(self.background_only_model, str):
            if not os.path.isfile(self.background_only_model):
                raise FileNotFoundError(f"Can not find {self.background_only_model}")
            with open(self.background_only_model, "r", encoding="uft-8") as f:
                self.background_only_model = json.load(f)

        self.background_only_model_apriori = copy.deepcopy(self.background_only_model)
        interpreter = WorkspaceInterpreter(self.background_only_model)
        interpreter.add_patch(self.signal_patch)

        # set data as expected background events
        self.background_only_model_apriori["observations"] = [
            {"name": name, "data": data}
            for name, data in interpreter.expected_background_yields.items()
        ]

        self.workspace_apriori = manager.pyhf.Workspace(
            self.background_only_model_apriori
        )
        self.workspace = manager.pyhf.Workspace(self.background_only_model)

        min_ratio = []
        for idc, channel in enumerate(self.background_only_model.get("channels", [])):
            current_signal = []
            for sigch in self.signal_patch:
                if idc == int(sigch["path"].split("/")[2]):
                    current_signal = np.array(
                        sigch.get("value", {}).get("data", []), dtype=np.float32
                    )
                    break
            if len(current_signal) == 0:
                continue
            current_bkg = []
            for ch in channel["samples"]:
                if len(current_bkg) == 0:
                    current_bkg = np.zeros(shape=(len(ch["data"]),), dtype=np.float32)
                current_bkg += np.array(ch["data"], dtype=np.float32)
            min_ratio.append(
                np.min(
                    np.true_divide(
                        current_bkg[current_signal != 0.0],
                        current_signal[current_signal != 0.0],
                    )
                )
                if np.any(current_signal != 0.0)
                else np.inf
            )
        self._minimum_poi = (
            -np.min(min_ratio).astype(np.float32) if len(min_ratio) > 0 else -np.inf
        )

        self._models = {"post": None, "pre": None}
        # Initialise config
        model = self()[1]

        self._config = {
            "poi_index": model.config.poi_index,
            "minimum_poi": self._minimum_poi,
            "suggested_init": model.config.suggested_init(),
            "suggested_bounds": model.config.suggested_bounds(),
            "parameter_names": model.config.par_names,
            "suggested_fixed": model.config.suggested_fixed(),
        }

    @property
    def channels(self) -> Iterator[Text]:
        """Return channel names"""
        return (ch["name"] for ch in self.background_only_model["channels"])

    @property
    def channel_properties(self) -> Iterator[Tuple[int, Text, int]]:
        """Returns an iterator for channel index, name and number of bins"""
        for idx, channel in enumerate(self.channels):
            yield idx, channel, self.workspace.channel_nbins[channel]

    def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple:
        """
        Retreive pyhf workspace

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

        Returns:
            ``Tuple``:
            workspace, model and data
        """
        if expected == ExpectationType.apriori:
            if self._models["pre"] is None:
                self._models["pre"] = self.workspace_apriori.model(
                    patches=[self.signal_patch],
                    modifier_settings={
                        "normsys": {"interpcode": "code4"},
                        "histosys": {"interpcode": "code4p"},
                    },
                )

            return (
                self.workspace_apriori,
                self._models["pre"],
                self.workspace_apriori.data(self._models["pre"]),
            )

        if self._models["post"] is None:
            self._models["post"] = self.workspace.model(
                patches=[self.signal_patch],
                modifier_settings={
                    "normsys": {"interpcode": "code4"},
                    "histosys": {"interpcode": "code4p"},
                },
            )
        return (
            self.workspace,
            self._models["post"],
            self.workspace.data(self._models["post"]),
        )

[docs]    def config(
        self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None
    ) -> ModelConfig:
        r"""
        Model configuration.

        Args:
            allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu`
              value will be allowed to be negative.
            poi_upper_bound (``float``, default ``40.0``): upper bound for parameter
              of interest, :math:`\mu`.

        Returns:
            ~spey.base.ModelConfig:
            Model configuration. Information regarding the position of POI in
            parameter list, suggested input and bounds.
        """
        bounds = copy.deepcopy(self._config["suggested_bounds"])
        bounds[self._config["poi_index"]] = (
            max(self._minimum_poi, -10.0) if allow_negative_signal else 0.0,
            bounds[self._config["poi_index"]][1]
            if not poi_upper_bound
            else poi_upper_bound,
        )

        return ModelConfig(
            poi_index=self._config["poi_index"],
            minimum_poi=self._minimum_poi,
            suggested_init=self._config["suggested_init"],
            suggested_bounds=bounds,
            parameter_names=self._config["parameter_names"],
            suggested_fixed=self._config["suggested_fixed"],
        )

    @property
    def isAlive(self) -> bool:
        """Returns True if at least one bin has non-zero signal yield."""
        for channel in (
            ch for ch in self.signal_patch if ch.get("value", None) is not None
        ):
            if any(nsig > 0.0 for nsig in channel["value"].get("data", [])):
                return True
        return False