Source code for spey_pyhf.helper_functions

"""Helper function for creating and interpreting pyhf inputs"""
from typing import Dict, Iterator, List, Text, Union, Optional

__all__ = ["WorkspaceInterpreter"]


def __dir__():
    return __all__


def remove_from_json(idx: int) -> Dict:
    """
    Remove channel from the json file

    Args:
        idx (``int``): index of the channel

    Returns:
        ``Dict``:
        JSON patch
    """
    return {"op": "remove", "path": f"/channels/{idx}"}


def add_to_json(idx: int, yields: List[float], modifiers: List[Dict]) -> Dict:
    """
    Keep channel in the json file

    Args:
        idx (``int``): index of the channel
        yields (``List[float]``): data
        modifiers (``List[Dict]``): signal modifiers

    Returns:
        ``Dict``:
        json patch
    """
    return {
        "op": "add",
        "path": f"/channels/{idx}/samples/0",
        "value": {"name": "Signal", "data": yields, "modifiers": modifiers},
    }


def _default_modifiers(poi_name: Text) -> List[Dict]:
    """Retreive default modifiers"""
    return [
        {"data": None, "name": "lumi", "type": "lumi"},
        {"data": None, "name": poi_name, "type": "normfactor"},
    ]


[docs]class WorkspaceInterpreter:
    """
    A pyhf workspace interpreter to handle book keeping for the background only models
    and convert signal yields into JSONPatch compatible for pyhf.

    Args:
        background_only_model (``Dict``): descrioption for the background only statistical model
    """

    __slots__ = ["background_only_model", "_signal_dict", "_signal_modifiers"]

    def __init__(self, background_only_model: Dict):
        self.background_only_model = background_only_model
        """Background only statistical model description"""
        self._signal_dict = {}
        self._signal_modifiers = {}

    def __getitem__(self, item):
        return self.background_only_model[item]

    @property
    def channels(self) -> Iterator[List[Text]]:
        """Retreive channel names as iterator"""
        return (ch["name"] for ch in self["channels"])

    @property
    def poi_name(self) -> Dict[Text, Text]:
        """Retreive poi name per measurement"""
        return [(mes["name"], mes["config"]["poi"]) for mes in self["measurements"]]

    @property
    def bin_map(self) -> Dict[Text, int]:
        """Get number of bins per channel"""
        return {ch["name"]: len(ch["samples"][0]["data"]) for ch in self["channels"]}

    @property
    def expected_background_yields(self) -> Dict[Text, List[float]]:
        """Retreive expected background yields with respect to signal injection"""
        yields = {}
        for channel in self["channels"]:
            if channel["name"] in self._signal_dict:
                yields[channel["name"]] = []
                for smp in channel["samples"]:
                    if len(yields[channel["name"]]) == 0:
                        yields[channel["name"]] = [0.0] * len(smp["data"])
                    yields[channel["name"]] = [
                        ch + dt for ch, dt in zip(yields[channel["name"]], smp["data"])
                    ]
        return yields

[docs]    def guess_channel_type(self, channel_name: Text) -> Text:
        """Guess the type of the channel as CR VR or SR"""
        if channel_name not in self.channels:
            raise ValueError(f"Unknown channel: {channel_name}")
        for tp in ["CR", "VR", "SR"]:
            if tp in channel_name.upper():
                return tp

        return "__unknown__"

[docs]    def guess_CRVR(self) -> List[Text]:
        """Retreive control and validation channel names by guess"""
        return [
            name
            for name in self.channels
            if self.guess_channel_type(name) in ["CR", "VR"]
        ]

[docs]    def get_channels(self, channel_index: Union[List[int], List[Text]]) -> List[Text]:
        """
        Retreive channel names with respect to their index

        Args:
            channel_index (``List[int]``): Indices of the channels

        Returns:
            ``List[Text]``:
            Names of the channels corresponding to the given indices
        """
        return [
            name
            for idx, name in enumerate(self.channels)
            if idx in channel_index or name in channel_index
        ]

[docs]    def inject_signal(
        self, channel: Text, data: List[float], modifiers: Optional[List[Dict]] = None
    ) -> None:
        """
        Inject signal to the model

        Args:
            channel (``Text``): channel name
            data (``List[float]``): signal yields

        Raises:
            ``ValueError``: If channel does not exist or number of yields does not match
                with the bin size of the channel
        """
        if channel not in self.channels:
            raise ValueError(
                f"{channel} does not exist. Available channels are "
                + ", ".join(self.channels)
            )
        if len(data) != self.bin_map[channel]:
            raise ValueError(
                f"Number of bins in injection does not match to the channel. "
                f"{self.bin_map[channel]} expected, {len(data)} received."
            )

        self._signal_dict[channel] = data
        self._signal_modifiers[channel] = (
            _default_modifiers(self.poi_name[0][1]) if modifiers is None else modifiers
        )

    @property
    def signal_per_channel(self) -> Dict[Text, List[float]]:
        """Return signal yields in each channel"""
        return self._signal_dict

[docs]    def make_patch(self) -> List[Dict]:
        """
        Make a JSONPatch for the background only model

        Args:
            measurement_index (``int``, default ``0``): in case of multiple measurements
                which one to be used. Detauls is always the first measurement

        Raises:
            ``ValueError``: if there is no signal.

        Returns:
            ``List[Dict]``:
            JSONPatch file for the background only model.
        """
        if not self._signal_dict:
            raise ValueError("Please add signal yields.")

        patch = []
        to_remove = []
        for ich, channel in enumerate(self.channels):
            if channel in self._signal_dict:
                patch.append(
                    add_to_json(
                        ich, self._signal_dict[channel], self._signal_modifiers[channel]
                    )
                )
            else:
                to_remove.append(remove_from_json(ich))

        to_remove.sort(key=lambda p: p["path"].split("/")[-1], reverse=True)

        return patch + to_remove

[docs]    def reset_signal(self) -> None:
        """Clear the signal map"""
        self._signal_dict = {}

[docs]    def add_patch(self, signal_patch: List[Dict]) -> None:
        """Inject signal patch"""
        self._signal_dict = self.patch_to_map(signal_patch=signal_patch)

[docs]    def patch_to_map(self, signal_patch: List[Dict]) -> Dict[Text, Dict]:
        """
        Convert JSONPatch into signal map

        .. code:: python3

            >>> signal_map = {channel_name: {"data" : signal_yields, "modifiers": signal_modifiers}}


        Args:
            signal_patch (``List[Dict]``): JSONPatch for the signal

        Returns:
            ``Dict[Text, Dict]``:
            signal map including the data and modifiers
        """
        signal_map = {}
        for item in signal_patch:
            if item["op"] == "add":
                path = int(item["path"].split("/")[2])
                channel_name = self["channels"][path]["name"]
                signal_map[channel_name] = {
                    "data": item["value"]["data"],
                    "modifiers": item["value"].get(
                        "modifiers", _default_modifiers(poi_name=self.poi_name[0][1])
                    ),
                }
        return signal_map