elicito.utils#

helper functions for setting up the Elicit object

Classes:

Name	Description
`DoubleBound`	constrain double-bounded distributions
`LowerBound`	constrain lower-bounded distributions
`UpperBound`	transform `upper` bounded distribution

Functions:

Name	Description
`compute_num_weights`	Compute number of weights of a tf.keras model.
`dry_run`	Run generative model in forward mode for a single epoch
`get_expert_data`	Load the training data
`get_expert_datformat`	Inspect which data format for the expert data is expected by the method.
`gumbel_softmax_trick`	Apply softmax-gumble trick
`identity`	Identity function. Returns the input
`load`	Load a saved `eliobj` from specified path.
`one_forward_simulation`	Run one forward simulation from prior samples to elicited statistics.
`parallel`	Specify parallelization
`save`	Save the eliobj as pickle.
`save_as_pkl`	Save file as pickle.

DoubleBound #

constrain double-bounded distributions

Methods:

Name	Description
`__init__`	Constrain double-bounded distribution
`forward`	Scale and translate logit transformed variable
`inv_logit`	Implement the inverse-logit transformation
`inverse`	Apply inverse of the log-odds transform
`logit`	Implement the logit transformation for :math:`u \in (0,1)`:

Source code in src/elicito/utils.py

class DoubleBound:
    """
    constrain double-bounded distributions
    """

    def __init__(self, lower: float, upper: float):
        """
        Constrain double-bounded distribution

        A variable constrained to be in the open interval
        (``lower``, ``upper``) is transformed to an unconstrained variable Y
        via a scaled and translated log-odds transform.

        Basis for the here used constraints, is the
        `constraint transforms implementation in [Stan](https://mc-stan.org/docs/reference-manual/transforms.html).

        Parameters
        ----------
        lower
            Lower bound of variable x.

        upper
            Upper bound of variable x.

        """
        self.lower = lower
        self.upper = upper

    def logit(self, u: tf.Tensor) -> tf.Tensor:
        r"""
        Implement the logit transformation for :math:`u \in (0,1)`:

        .. math::

            logit(u) = \log\left(\frac{u}{1-u}\right)

        Parameters
        ----------
        u
            Variable in open unit interval.

        Returns
        -------
        v
            Log-odds of u.

        """
        # log-odds definition
        v = tf.math.log(u / (1 - u))
        # cast v into correct dtype
        v = tf.cast(v, dtype=tf.float32)
        return v

    def inv_logit(self, v: tf.Tensor) -> tf.Tensor:
        r"""
        Implement the inverse-logit transformation

        The inverse-logit transformation is the logistic
        sigmoid for :math:`v \in (-\infty,+\infty)`:

        .. math::

            logit^{-1}(v) = \frac{1}{1+\exp(-v)}

        Parameters
        ----------
        v
            Unconstrained variable

        Returns
        -------
        u
            Logistic sigmoid of the unconstrained variable

        """
        # logistic sigmoid transform
        u = tf.divide(1.0, (1.0 + tf.exp(-v)))
        # cast v to correct dtype
        u = tf.cast(u, dtype=tf.float32)
        return u

    def forward(self, x: tf.Tensor) -> tf.Tensor:
        r"""
        Scale and translate logit transformed variable

        transform variable x with ``lower`` and ``upper`` bound
        into an unconstrained variable y.

        .. math::

            Y = logit\left(\frac{X - lower}{upper - lower}\right)

        Parameters
        ----------
        x
            Variable with lower and upper bound.

        Returns
        -------
        y
            Unconstrained variable.

        """
        # scaled and translated logit transform
        y = self.logit(tf.divide((x - self.lower), (self.upper - self.lower)))
        # cast y to correct dtype
        y = tf.cast(y, dtype=tf.float32)
        return y

    def inverse(self, y: tf.Tensor) -> tf.Tensor:
        r"""
        Apply inverse of the log-odds transform

        unconstrained variable y is transformed into a constrained variable x
        with ``lower`` and ``upper`` bound.

        .. math::

            X = lower + (upper - lower) \cdot logit^{-1}(Y)

        Parameters
        ----------
        y
            Unconstrained variable

        Returns
        -------
        x :
            Constrained variable with lower and upper bound

        """
        # inverse of log-odds transform
        x = self.lower + (self.upper - self.lower) * self.inv_logit(y)
        # cast x to correct dtype
        x = tf.cast(x, dtype=tf.float32)
        return x

init #

__init__(lower: float, upper: float)

Constrain double-bounded distribution

A variable constrained to be in the open interval (lower, upper) is transformed to an unconstrained variable Y via a scaled and translated log-odds transform.

Basis for the here used constraints, is the `constraint transforms implementation in Stan.

Parameters:

Name	Type	Description	Default
`lower`	`float`	Lower bound of variable x.	required
`upper`	`float`	Upper bound of variable x.	required

Source code in src/elicito/utils.py

def __init__(self, lower: float, upper: float):
    """
    Constrain double-bounded distribution

    A variable constrained to be in the open interval
    (``lower``, ``upper``) is transformed to an unconstrained variable Y
    via a scaled and translated log-odds transform.

    Basis for the here used constraints, is the
    `constraint transforms implementation in [Stan](https://mc-stan.org/docs/reference-manual/transforms.html).

    Parameters
    ----------
    lower
        Lower bound of variable x.

    upper
        Upper bound of variable x.

    """
    self.lower = lower
    self.upper = upper

forward #

forward(x: Tensor) -> Tensor

Scale and translate logit transformed variable

transform variable x with lower and upper bound into an unconstrained variable y.

.. math::

Y = logit\left(\frac{X - lower}{upper - lower}\right)

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Variable with lower and upper bound.	required

Returns:

Type	Description
`y`	Unconstrained variable.

Source code in src/elicito/utils.py

def forward(self, x: tf.Tensor) -> tf.Tensor:
    r"""
    Scale and translate logit transformed variable

    transform variable x with ``lower`` and ``upper`` bound
    into an unconstrained variable y.

    .. math::

        Y = logit\left(\frac{X - lower}{upper - lower}\right)

    Parameters
    ----------
    x
        Variable with lower and upper bound.

    Returns
    -------
    y
        Unconstrained variable.

    """
    # scaled and translated logit transform
    y = self.logit(tf.divide((x - self.lower), (self.upper - self.lower)))
    # cast y to correct dtype
    y = tf.cast(y, dtype=tf.float32)
    return y

inv_logit #

inv_logit(v: Tensor) -> Tensor

Implement the inverse-logit transformation

The inverse-logit transformation is the logistic sigmoid for :math:v \in (-\infty,+\infty):

.. math::

logit^{-1}(v) = \frac{1}{1+\exp(-v)}

Parameters:

Name	Type	Description	Default
`v`	`Tensor`	Unconstrained variable	required

Returns:

Type	Description
`u`	Logistic sigmoid of the unconstrained variable

Source code in src/elicito/utils.py

def inv_logit(self, v: tf.Tensor) -> tf.Tensor:
    r"""
    Implement the inverse-logit transformation

    The inverse-logit transformation is the logistic
    sigmoid for :math:`v \in (-\infty,+\infty)`:

    .. math::

        logit^{-1}(v) = \frac{1}{1+\exp(-v)}

    Parameters
    ----------
    v
        Unconstrained variable

    Returns
    -------
    u
        Logistic sigmoid of the unconstrained variable

    """
    # logistic sigmoid transform
    u = tf.divide(1.0, (1.0 + tf.exp(-v)))
    # cast v to correct dtype
    u = tf.cast(u, dtype=tf.float32)
    return u

inverse #

inverse(y: Tensor) -> Tensor

Apply inverse of the log-odds transform

unconstrained variable y is transformed into a constrained variable x with lower and upper bound.

.. math::

X = lower + (upper - lower) \cdot logit^{-1}(Y)

Parameters:

Name	Type	Description	Default
`y`	`Tensor`	Unconstrained variable	required

Returns:

Name	Type	Description
`x`	`Tensor`	Constrained variable with lower and upper bound

Source code in src/elicito/utils.py

def inverse(self, y: tf.Tensor) -> tf.Tensor:
    r"""
    Apply inverse of the log-odds transform

    unconstrained variable y is transformed into a constrained variable x
    with ``lower`` and ``upper`` bound.

    .. math::

        X = lower + (upper - lower) \cdot logit^{-1}(Y)

    Parameters
    ----------
    y
        Unconstrained variable

    Returns
    -------
    x :
        Constrained variable with lower and upper bound

    """
    # inverse of log-odds transform
    x = self.lower + (self.upper - self.lower) * self.inv_logit(y)
    # cast x to correct dtype
    x = tf.cast(x, dtype=tf.float32)
    return x

logit #

logit(u: Tensor) -> Tensor

Implement the logit transformation for :math:u \in (0,1):

.. math::

logit(u) = \log\left(\frac{u}{1-u}\right)

Parameters:

Name	Type	Description	Default
`u`	`Tensor`	Variable in open unit interval.	required

Returns:

Type	Description
`v`	Log-odds of u.

Source code in src/elicito/utils.py

def logit(self, u: tf.Tensor) -> tf.Tensor:
    r"""
    Implement the logit transformation for :math:`u \in (0,1)`:

    .. math::

        logit(u) = \log\left(\frac{u}{1-u}\right)

    Parameters
    ----------
    u
        Variable in open unit interval.

    Returns
    -------
    v
        Log-odds of u.

    """
    # log-odds definition
    v = tf.math.log(u / (1 - u))
    # cast v into correct dtype
    v = tf.cast(v, dtype=tf.float32)
    return v

LowerBound #

constrain lower-bounded distributions

Methods:

Name	Description
`__init__`	Transform `lower` bound variable to unconstrained variable Y
`forward`	Transform `lower`-bounded x via inverse-softplus into an unconstrained y.
`inverse`	Apply softplus to unconstrained y to get `lower`-bounded x

Source code in src/elicito/utils.py

class LowerBound:
    """
    constrain lower-bounded distributions
    """

    def __init__(self, lower: float):
        """
        Transform ``lower`` bound variable to unconstrained variable Y

        use inverse-softplus transform.

        References
        ----------
        - [Stan](https://mc-stan.org/docs/reference-manual/transforms.html)

        Parameters
        ----------
        lower
            Lower bound of variable X.

        """
        self.lower = lower

    def forward(self, x: float) -> Any:
        r"""
        Transform ``lower``-bounded x via inverse-softplus into an unconstrained y.

        .. math::

            Y = softplus^{-1}(X - lower)

        Parameters
        ----------
        x
            Variable with a lower bound.

        Returns
        -------
        y :
            Unconstrained variable.

        """
        # inverse softplus transform
        y = tfp.math.softplus_inverse(x - self.lower)
        # cast y into correct type
        y = tf.cast(y, dtype=tf.float32)
        return y

    def inverse(self, y: float) -> tf.Tensor:
        r"""
        Apply softplus to unconstrained y to get ``lower``-bounded x

        .. math::

            X = softplus(Y) + lower

        Parameters
        ----------
        y
            Unconstrained variable.

        Returns
        -------
        x :
            Variable with a lower bound.

        """
        # softplus transform
        x = tf.math.softplus(y) + self.lower
        # cast x into correct dtype
        x = tf.cast(x, dtype=tf.float32)
        return x

init #

__init__(lower: float)

Transform lower bound variable to unconstrained variable Y

use inverse-softplus transform.

References

Stan

Parameters:

Name	Type	Description	Default
`lower`	`float`	Lower bound of variable X.	required

Source code in src/elicito/utils.py

def __init__(self, lower: float):
    """
    Transform ``lower`` bound variable to unconstrained variable Y

    use inverse-softplus transform.

    References
    ----------
    - [Stan](https://mc-stan.org/docs/reference-manual/transforms.html)

    Parameters
    ----------
    lower
        Lower bound of variable X.

    """
    self.lower = lower

forward #

forward(x: float) -> Any

Transform lower-bounded x via inverse-softplus into an unconstrained y.

.. math::

Y = softplus^{-1}(X - lower)

Parameters:

Name	Type	Description	Default
`x`	`float`	Variable with a lower bound.	required

Returns:

Name	Type	Description
`y`	`Any`	Unconstrained variable.

Source code in src/elicito/utils.py

def forward(self, x: float) -> Any:
    r"""
    Transform ``lower``-bounded x via inverse-softplus into an unconstrained y.

    .. math::

        Y = softplus^{-1}(X - lower)

    Parameters
    ----------
    x
        Variable with a lower bound.

    Returns
    -------
    y :
        Unconstrained variable.

    """
    # inverse softplus transform
    y = tfp.math.softplus_inverse(x - self.lower)
    # cast y into correct type
    y = tf.cast(y, dtype=tf.float32)
    return y

inverse #

inverse(y: float) -> Tensor

Apply softplus to unconstrained y to get lower-bounded x

.. math::

X = softplus(Y) + lower

Parameters:

Name	Type	Description	Default
`y`	`float`	Unconstrained variable.	required

Returns:

Name	Type	Description
`x`	`Tensor`	Variable with a lower bound.

Source code in src/elicito/utils.py

def inverse(self, y: float) -> tf.Tensor:
    r"""
    Apply softplus to unconstrained y to get ``lower``-bounded x

    .. math::

        X = softplus(Y) + lower

    Parameters
    ----------
    y
        Unconstrained variable.

    Returns
    -------
    x :
        Variable with a lower bound.

    """
    # softplus transform
    x = tf.math.softplus(y) + self.lower
    # cast x into correct dtype
    x = tf.cast(x, dtype=tf.float32)
    return x

UpperBound #

transform upper bounded distribution

Methods:

Name	Description
`__init__`	Transform `upper` bounded x into unconstrained y
`forward`	Transform upper-bouned into unconstarined variable
`inverse`	Transform uncstrained into lower-bounded variable

Source code in src/elicito/utils.py

class UpperBound:
    """
    transform ``upper`` bounded distribution
    """

    def __init__(self, upper: float):
        """
        Transform ``upper`` bounded x into unconstrained y

        use inverse-softplus transform.

        Parameters
        ----------
        upper
            Upper bound of variable X.

        References
        ----------
        + [Stan](https://mc-stan.org/docs/reference-manual/transforms.html)

        """
        self.upper = upper

    def forward(self, x: float) -> Any:
        r"""
        Transform upper-bouned into unconstarined variable

        use inverse-softplus transform

        .. math::

            Y = softplus^{-1}(upper - X)

        Parameters
        ----------
        x
            Variable with an upper bound.

        Returns
        -------
        y :
            Unconstrained variable.

        """
        # logarithmic transform
        y = tfp.math.softplus_inverse(self.upper - x)
        # cast y into correct dtype
        y = tf.cast(y, dtype=tf.float32)
        return y

    def inverse(self, y: float) -> tf.Tensor:
        r"""
        Transform uncstrained into lower-bounded variable

        use softplus transform

        .. math::

            X = upper - softplus(Y)

        Parameters
        ----------
        y
            Unconstrained variable.

        Returns
        -------
        x :
            Variable with an upper bound.

        """
        # exponential transform
        x = self.upper - tf.math.softplus(y)
        # cast x into correct dtype
        x = tf.cast(x, dtype=tf.float32)
        return x

init #

__init__(upper: float)

Transform upper bounded x into unconstrained y

use inverse-softplus transform.

Parameters:

Name	Type	Description	Default
`upper`	`float`	Upper bound of variable X.	required

References

Stan

Source code in src/elicito/utils.py

def __init__(self, upper: float):
    """
    Transform ``upper`` bounded x into unconstrained y

    use inverse-softplus transform.

    Parameters
    ----------
    upper
        Upper bound of variable X.

    References
    ----------
    + [Stan](https://mc-stan.org/docs/reference-manual/transforms.html)

    """
    self.upper = upper

forward #

forward(x: float) -> Any

Transform upper-bouned into unconstarined variable

use inverse-softplus transform

.. math::

Y = softplus^{-1}(upper - X)

Parameters:

Name	Type	Description	Default
`x`	`float`	Variable with an upper bound.	required

Returns:

Name	Type	Description
`y`	`Any`	Unconstrained variable.

Source code in src/elicito/utils.py

def forward(self, x: float) -> Any:
    r"""
    Transform upper-bouned into unconstarined variable

    use inverse-softplus transform

    .. math::

        Y = softplus^{-1}(upper - X)

    Parameters
    ----------
    x
        Variable with an upper bound.

    Returns
    -------
    y :
        Unconstrained variable.

    """
    # logarithmic transform
    y = tfp.math.softplus_inverse(self.upper - x)
    # cast y into correct dtype
    y = tf.cast(y, dtype=tf.float32)
    return y

inverse #

inverse(y: float) -> Tensor

Transform uncstrained into lower-bounded variable

use softplus transform

.. math::

X = upper - softplus(Y)

Parameters:

Name	Type	Description	Default
`y`	`float`	Unconstrained variable.	required

Returns:

Name	Type	Description
`x`	`Tensor`	Variable with an upper bound.

Source code in src/elicito/utils.py

def inverse(self, y: float) -> tf.Tensor:
    r"""
    Transform uncstrained into lower-bounded variable

    use softplus transform

    .. math::

        X = upper - softplus(Y)

    Parameters
    ----------
    y
        Unconstrained variable.

    Returns
    -------
    x :
        Variable with an upper bound.

    """
    # exponential transform
    x = self.upper - tf.math.softplus(y)
    # cast x into correct dtype
    x = tf.cast(x, dtype=tf.float32)
    return x

compute_num_weights #

compute_num_weights(
    num_NN_weights: list[TensorShape],
) -> int

Compute number of weights of a tf.keras model.

Parameters:

Name	Type	Description	Default
`num_NN_weights`	`list[TensorShape]`	list of tf.TensorShape objects of each layer in the model.	required

Returns:

Type	Description
`int`	number of weights of the model (incl. biases)

Source code in src/elicito/utils.py

def compute_num_weights(num_NN_weights: list[tf.TensorShape]) -> int:
    """
    Compute number of weights of a tf.keras model.

    Parameters
    ----------
    num_NN_weights :
        list of tf.TensorShape objects of each layer in the model.

    Returns
    -------
    :
        number of weights of the model (incl. biases)
    """
    return sum(
        int(tf.reduce_prod([d if d is not None else 1 for d in shape]))
        for shape in num_NN_weights
    )

dry_run #

dry_run(
    model: dict[str, Any],
    parameters: list[Parameter],
    targets: list[Target],
    trainer: Trainer,
    initializer: Initializer,
    network: Optional[NFDict],
) -> tuple[
    dict[Any, Any],
    Tensor,
    dict[Any, Any],
    dict[Any, Any],
    Any,
]

Run generative model in forward mode for a single epoch

Parameters:

Name	Type	Description	Default
`model`	`dict[str, Any]`	User-input from `model`.	required
`parameters`	`list[Parameter]`	User-input from `parameter`.	required
`targets`	`list[Target]`	User-input from `target`.	required
`trainer`	`Trainer`	User-input from `trainer`.	required
`initializer`	`Initializer`	User-input from `initializer`.	required
`network`	`Optional[NFDict]`	User-input from one of the methods implemented in the `networks` module.	required

Returns:

Type	Description
`tuple[dict[Any, Any], Tensor, dict[Any, Any], dict[Any, Any], Any]`	(elicited_statistics, prior_samples, model_simulations, target_quantities, prior_model)

Source code in src/elicito/utils.py

def dry_run(  # noqa: PLR0913
    model: dict[str, Any],
    parameters: list[Parameter],
    targets: list[Target],
    trainer: Trainer,
    initializer: Initializer,
    network: Optional[NFDict],
) -> tuple[dict[Any, Any], tf.Tensor, dict[Any, Any], dict[Any, Any], Any]:
    """
    Run generative model in forward mode for a single epoch

    Parameters
    ----------
    model
        User-input from [`model`][elicito.elicit.model].

    parameters
        User-input from [`parameter`][elicito.elicit.parameter].

    targets
        User-input from [`target`][elicito.elicit.target].

    trainer
        User-input from [`trainer`][elicito.elicit.trainer].

    initializer
        User-input from [`initializer`][elicito.elicit.initializer].

    network
        User-input from one of the methods implemented in the
        [`networks`][elicito.networks] module.

    Returns
    -------
    :
        (elicited_statistics, prior_samples, model_simulations,
        target_quantities, prior_model)
    """
    if (
        trainer["method"] == "parametric_prior"
        and initializer["distribution"] is not None
    ):
        init_matrix = el.initialization.uniform_samples(
            seed=trainer["seed"],
            hyppar=initializer["distribution"]["hyper"],  # type: ignore [arg-type]
            n_samples=initializer["iterations"],  # type: ignore [arg-type]
            method=initializer["method"],  # type: ignore [arg-type]
            mean=initializer["distribution"]["mean"],
            radius=initializer["distribution"]["radius"],
            parameters=parameters,
        )

        init_matrix_slice = {f"{key}": init_matrix[key][0] for key in init_matrix}

    elif trainer["method"] == "deep_prior" and network is not None:
        init_matrix_slice = None

    else:
        init_matrix_slice = initializer["hyperparams"]

    prior_model = Priors(
        ground_truth=False,
        init_matrix_slice=init_matrix_slice,
        trainer=trainer,
        parameters=parameters,
        network=network,
        expert=None,  # type: ignore
        seed=trainer["seed"],
    )

    (elicited_statistics, prior_samples, model_simulations, target_quantities) = (
        one_forward_simulation(
            prior_model=prior_model, model=model, targets=targets, seed=trainer["seed"]
        )
    )

    return (
        elicited_statistics,
        prior_samples,
        model_simulations,
        target_quantities,
        prior_model,
    )

get_expert_data #

get_expert_data(
    trainer: Trainer,
    model: dict[str, Any],
    targets: list[Target],
    expert: ExpertDict,
    parameters: list[Parameter],
    network: Optional[NFDict],
    seed: int,
) -> tuple[Any, ...]

Load the training data

data can be expert data or data simulations using a pre-defined ground truth.

Parameters:

Name	Type	Description	Default
`trainer`	`Trainer`	Specification of training settings and meta-information for workflow	required
`model`	`dict[str, Any]`	Specification of generative model	required
`targets`	`list[Target]`	List of target quantities	required
`expert`	`ExpertDict`	Provide input data from expert or simulate data from oracle with either the `data` or `simulator` method	required
`parameters`	`list[Parameter]`	List of model parameters specified with :func:`elicit.elicit.parameter`.	required
`network`	`Optional[NFDict]`	Specification of neural network Only required for `deep_prior` method. For `parametric_prior` use `None`.	required
`seed`	`int`	Internal seed for reproducible results	required

Returns:

Name	Type	Description
`expert_data`	`Any`	dictionary containing the training data. Must have same form as the model-simulated elicited statistics. Correct specification of keys can be checked using :func:`elicit.utils.get_expert_datformat`
`expert_prior`	`...`	samples from ground truth. Exists only if expert data are simulated from an oracle. Otherwise this output is `None`

Source code in src/elicito/utils.py

def get_expert_data(  # noqa: PLR0913
    trainer: Trainer,
    model: dict[str, Any],
    targets: list[Target],
    expert: ExpertDict,
    parameters: list[Parameter],
    network: Optional[NFDict],
    seed: int,
) -> tuple[Any, ...]:
    """
    Load the training data

    data can be expert data or data simulations using a pre-defined ground truth.

    Parameters
    ----------
    trainer
        Specification of training settings and meta-information for
        workflow

    model
        Specification of generative model

    targets
        List of target quantities

    expert
        Provide input data from expert or simulate data from oracle with
        either the ``data`` or ``simulator`` method

    parameters
        List of model parameters specified with :func:`elicit.elicit.parameter`.

    network
        Specification of neural network
        Only required for ``deep_prior`` method. For ``parametric_prior``
        use ``None``.

    seed
        Internal seed for reproducible results

    Returns
    -------
    expert_data :
        dictionary containing the training data. Must have same form as the
        model-simulated elicited statistics. Correct specification of
        keys can be checked using :func:`elicit.utils.get_expert_datformat`

    expert_prior :
        samples from ground truth. Exists only if expert data are simulated
        from an oracle. Otherwise this output is ``None``

    """
    try:
        expert["data"]
    except KeyError:
        oracle = True
    else:
        oracle = False

    if oracle:
        # set seed
        tf.random.set_seed(seed)
        # sample from true priors
        prior_model = Priors(
            ground_truth=True,
            init_matrix_slice=None,
            trainer=trainer,
            parameters=parameters,
            network=network,
            expert=expert,
            seed=seed,
        )
        # compute elicited statistics and target quantities
        expert_data, expert_prior, *_ = one_forward_simulation(
            prior_model=prior_model, model=model, targets=targets, seed=seed
        )
        return tuple((expert_data, expert_prior))
    else:
        # load expert data from file
        expert_data = expert["data"]
        return tuple((expert_data, None))

get_expert_datformat #

get_expert_datformat(
    targets: list[Target],
) -> dict[str, list[Any]]

Inspect which data format for the expert data is expected by the method.

Parameters:

Name	Type	Description	Default
`targets`	`list[Target]`	list of target quantities	required

Returns:

Name	Type	Description
`elicit_dict`	`dict[str, list[Any]]`	expected format of expert data.

Source code in src/elicito/utils.py

def get_expert_datformat(targets: list[Target]) -> dict[str, list[Any]]:
    """
    Inspect which data format for the expert data is expected by the method.

    Parameters
    ----------
    targets
        list of target quantities

    Returns
    -------
    elicit_dict :
        expected format of expert data.

    """
    elicit_dict: dict[str, Any] = dict()
    for tar in targets:
        query = tar["query"]["name"]
        if query == "custom":
            query = tar["query"]["func_name"]
        target = tar["name"]
        if query == "pearson_correlation":
            key = "cor_" + target
        else:
            key = query + "_" + target
        elicit_dict[key] = list()

    return elicit_dict

gumbel_softmax_trick #

gumbel_softmax_trick(
    likelihood: Any, upper_thres: float, temp: float = 1.6
) -> Any

Apply softmax-gumble trick

The softmax-gumbel trick computes a continuous approximation of ypred from a discrete likelihood and thus allows for the computation of gradients for discrete random variables.

Currently, this approach is only implemented for models without upper boundary (e.g., Poisson model).

References

Maddison, C. J., Mnih, A. & Teh, Y. W. The concrete distribution: A continuous relaxation of discrete random variables in International Conference on Learning Representations (2017). https://doi.org/10.48550/arXiv.1611.00712
Jang, E., Gu, S. & Poole, B. Categorical reparameterization with gumbel-softmax in International Conference on Learning Representations (2017). https://openreview.net/forum?id=rkE3y85ee.
Joo, W., Kim, D., Shin, S. & Moon, I.-C. Generalized gumbel-softmax gradient estimator for generic discrete random variables. Preprint at https://doi.org/10.48550/arXiv.2003.01847 (2020).

Parameters:

Name	Type	Description	Default
`likelihood`	`Any`	shape = [B, num_samples, num_obs, 1] likelihood function used in the generative model. Must be a tfp.distributions object.	required
`upper_thres`	`float`	upper threshold at which the distribution of the outcome variable is truncated. For double-bounded distribution (e.g. Binomial) this is simply the "total count" information. Lower-bounded distribution (e.g. Poisson) must be truncated to create an artificial double-boundedness.	required
`temp`	`float`	temperature hyperparameter of softmax function. A temperature going towards zero yields approximates a categorical distribution, while a temperature >> 0 approximates a continuous distribution.	`1.6`

Returns:

Name	Type	Description
`ypred`	`Any`	continuously approximated ypred from the discrete likelihood.

Raise

ValueError if rank of likelihood is not 4. The shape of the likelihood obj must have an extra final dimension, i.e., (B, num_samples, num_obs, 1), for the softmax-gumbel computation. Use for example tf.expand_dims(mu,-1) for expanding the batch-shape of the likelihood.

if likelihood is not in tfp.distributions module. The likelihood
must be a tfp.distributions object.

Source code in src/elicito/utils.py

def gumbel_softmax_trick(likelihood: Any, upper_thres: float, temp: float = 1.6) -> Any:
    """
    Apply softmax-gumble trick

    The softmax-gumbel trick computes a continuous approximation of ypred from
    a discrete likelihood and thus allows for the computation of gradients for
    discrete random variables.

    Currently, this approach is only implemented for models without upper
    boundary (e.g., Poisson model).

    References
    ----------
    - Maddison, C. J., Mnih, A. & Teh, Y. W. The concrete distribution:
      A continuous relaxation of discrete random variables in International
      Conference on Learning Representations (2017).
      https://doi.org/10.48550/arXiv.1611.00712
    - Jang, E., Gu, S. & Poole, B. Categorical reparameterization with
      gumbel-softmax in International Conference on Learning Representations
      (2017). https://openreview.net/forum?id=rkE3y85ee.
    - Joo, W., Kim, D., Shin, S. & Moon, I.-C. Generalized gumbel-softmax
      gradient estimator for generic discrete random variables. Preprint
      at https://doi.org/10.48550/arXiv.2003.01847 (2020).

    Parameters
    ----------
    likelihood
        shape = [B, num_samples, num_obs, 1]
        likelihood function used in the generative model.
        Must be a tfp.distributions object.

    upper_thres
        upper threshold at which the distribution of the outcome variable is
        truncated. For double-bounded distribution (e.g. Binomial) this is
        simply the "total count" information. Lower-bounded distribution
        (e.g. Poisson) must be truncated to create an artificial
        double-boundedness.

    temp
        temperature hyperparameter of softmax function. A temperature going
        towards zero yields approximates a categorical distribution, while
        a temperature >> 0 approximates a continuous distribution.

    Returns
    -------
    ypred :
        continuously approximated ypred from the discrete likelihood.

    Raise
    -----
    ValueError
        if rank of ``likelihood`` is not 4. The shape of the likelihood obj
        must have an extra final dimension, i.e., (B, num_samples, num_obs, 1),
        for the softmax-gumbel computation. Use for example
        ``tf.expand_dims(mu,-1)`` for expanding the batch-shape of the
        likelihood.

        if likelihood is not in tfp.distributions module. The likelihood
        must be a tfp.distributions object.

    """
    # check rank of likelihood object
    if len(likelihood.batch_shape) != 4:  # noqa: PLR2004
        msg = (
            "The 'likelihood' in the generative model must have",
            " batch_shape = (B, num_samples, num_obs, 1).",
            " The additional final axis is required by the softmax-gumbel",
            " computation. Use for example `tf.expand_dims(mu,-1)` for",
            " expanding the batch-shape of the likelihood.",
        )
        raise ValueError(msg)

    # set seed
    tf.random.set_seed(el.SEED)
    # get batch size, num_samples, num_observations
    B, S, number_obs, _ = likelihood.batch_shape
    # constant outcome vector (including zero outcome)
    thres = upper_thres
    c = tf.range(thres + 1, delta=1, dtype=tf.float32)
    # broadcast to shape (B, rep, outcome-length)
    c_brct = tf.broadcast_to(c[None, None, None, :], shape=(B, S, number_obs, len(c)))
    # compute pmf value
    pi = likelihood.prob(c_brct)
    # prevent underflow
    pi = tf.where(pi < 1.8 * 10 ** (-30), 1.8 * 10 ** (-30), pi)
    # sample from uniform
    u = tfd.Uniform(0, 1).sample((B, S, number_obs, len(c)))
    # generate a gumbel sample from uniform sample
    g = -tf.math.log(-tf.math.log(u))
    # softmax gumbel trick
    w = tf.nn.softmax(
        tf.math.divide(
            tf.math.add(tf.math.log(pi), g),
            temp,
        )
    )
    # reparameterization/linear transformation
    ypred = tf.reduce_sum(tf.multiply(w, c), axis=-1)
    return ypred

identity #

identity(x: float) -> Any

Identity function. Returns the input

Parameters:

Name	Type	Description	Default
`x`	`float`	Input x	required

Returns:

Name	Type	Description
`x`	`Any`	input x without transformation.

Source code in src/elicito/utils.py

def identity(x: float) -> Any:
    """
    Identity function. Returns the input

    Parameters
    ----------
    x
        Input x

    Returns
    -------
    x :
        input x without transformation.

    """
    return x

load #

load(file: str) -> Any

Load a saved eliobj from specified path.

Parameters:

Name	Type	Description	Default
`file`	`str`	path where `eliobj` object is saved.	required

Returns:

Name	Type	Description
`eliobj`	`Any`	loaded `eliobj` object.

Source code in src/elicito/utils.py

def load(file: str) -> Any:
    """
    Load a saved ``eliobj`` from specified path.

    Parameters
    ----------
    file
        path where ``eliobj`` object is saved.

    Returns
    -------
    eliobj :
        loaded ``eliobj`` object.

    """
    try:
        import pandas as pd
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "data_wrangling", requirement="pandas"
        ) from exc

    obj_pickled = pd.read_pickle(file)  # noqa: S301
    obj = pickle.loads(obj_pickled)  # noqa: S301

    eliobj = el.Elicit(
        model=obj["model"],
        parameters=obj["parameters"],
        targets=obj["targets"],
        expert=obj["expert"],
        optimizer=obj["optimizer"],
        trainer=obj["trainer"],
        initializer=obj["initializer"],
        network=obj["network"],
    )

    # add results if already fitted
    try:
        obj["results"]
    except:  # noqa: E722
        eliobj.temp_history = obj["temp_history"]
        eliobj.temp_results = obj["temp_results"]
    else:
        eliobj.results = obj["results"]  # type: ignore

    return eliobj

one_forward_simulation #

one_forward_simulation(
    prior_model: Priors,
    model: dict[str, Any],
    targets: list[Target],
    seed: int,
) -> tuple[
    dict[Any, Any], Tensor, dict[Any, Any], dict[Any, Any]
]

Run one forward simulation from prior samples to elicited statistics.

Parameters:

Name	Type	Description	Default
`prior_model`	`Priors`	Initialized prior distributions which can be used for sampling.	required
`model`	`dict[str, Any]`	Specification of generative model	required
`targets`	`list[Target]`	List of target quantities	required
`seed`	`int`	Random seed.	required

Returns:

Name	Type	Description
`elicited_statistics`	`dict[Any, Any]`	Dictionary containing the elicited statistics that can be used to compute the loss components
`prior_samples`	`Tensor`	Samples from prior distributions
`model_simulations`	`dict[Any, Any]`	Samples from the generative model (likelihood) given the prior samples for the model parameters
`target_quantities`	`dict[Any, Any]`	Target quantities as a function of the model simulations.

Source code in src/elicito/utils.py

def one_forward_simulation(
    prior_model: Priors, model: dict[str, Any], targets: list[Target], seed: int
) -> tuple[dict[Any, Any], tf.Tensor, dict[Any, Any], dict[Any, Any]]:
    """
    Run one forward simulation from prior samples to elicited statistics.

    Parameters
    ----------
    prior_model
        Initialized prior distributions which can be used for sampling.

    model
        Specification of generative model

    targets
        List of target quantities

    seed
        Random seed.

    Returns
    -------
    elicited_statistics :
        Dictionary containing the elicited statistics that can be used to
        compute the loss components

    prior_samples :
        Samples from prior distributions

    model_simulations :
        Samples from the generative model (likelihood) given the prior samples
        for the model parameters

    target_quantities :
        Target quantities as a function of the model simulations.

    """
    # set seed
    tf.random.set_seed(seed)
    # generate samples from initialized prior
    prior_samples = prior_model()
    # simulate prior predictive distribution based on prior samples
    # and generative model
    model_simulations = simulate_from_generator(prior_samples, seed, model)
    # compute the target quantities
    target_quantities = computation_target_quantities(
        model_simulations, prior_samples, targets
    )
    # compute the elicited statistics by applying a specific elicitation
    # method on the target quantities
    elicited_statistics = computation_elicited_statistics(target_quantities, targets)
    return (elicited_statistics, prior_samples, model_simulations, target_quantities)

parallel #

parallel(
    runs: int = 4,
    cores: Optional[int] = None,
    seeds: Optional[list[int]] = None,
) -> Parallel

Specify parallelization

Specification for parallelizing training by running multiple training instances with different seeds simultaneously.

Parameters:

Name	Type	Description	Default
`runs`	`int`	Number of replication.	`4`
`cores`	`Optional[int]`	Number of cores that should be used.	`None`
`seeds`	`Optional[list[int]]`	A list of seeds. If `None` seeds are drawn from a Uniform(0,999999) distribution. The seed information corresponding to each chain is stored in `eliobj.results`.	`None`

Returns:

Name	Type	Description
`parallel_dict`	`Parallel`	dictionary containing the parallelization settings.

Source code in src/elicito/utils.py

def parallel(
    runs: int = 4, cores: Optional[int] = None, seeds: Optional[list[int]] = None
) -> Parallel:
    """
    Specify parallelization

    Specification for parallelizing training by running multiple training
    instances with different seeds simultaneously.

    Parameters
    ----------
    runs
        Number of replication.

    cores
        Number of cores that should be used.

    seeds
        A list of seeds. If ``None`` seeds are drawn from a Uniform(0,999999)
        distribution. The seed information corresponding to each chain is
        stored in ``eliobj.results``.

    Returns
    -------
    parallel_dict :
        dictionary containing the parallelization settings.

    """
    parallel_dict: Parallel = dict(runs=runs, cores=cores, seeds=seeds)  # type: ignore

    if cores is None:
        parallel_dict["cores"] = runs

    return parallel_dict

save #

save(
    eliobj: Any,
    name: Optional[str] = None,
    file: Optional[str] = None,
    overwrite: bool = False,
) -> None

Save the eliobj as pickle.

Parameters:

Name	Type	Description	Default
`eliobj`	`Any`	Instance of the :func:`elicit.elicit.Elicit` class.	required
`name`	`Optional[str]`	Name of the saved .pkl file. File is saved as .results/{method}/{name}_{seed}.pkl	`None`
`file`	`Optional[str]`	Path to file, including file name, e.g. file="res" (saved as res.pkl) or file="method1/res" (saved as method1/res.pkl)	`None`
`overwrite`	`bool`	Whether to overwrite existing file.	`False`

Source code in src/elicito/utils.py

def save(
    eliobj: Any,
    name: Optional[str] = None,
    file: Optional[str] = None,
    overwrite: bool = False,
) -> None:
    """
    Save the eliobj as pickle.

    Parameters
    ----------
    eliobj
        Instance of the :func:`elicit.elicit.Elicit` class.

    name
        Name of the saved .pkl file.
        File is saved as .results/{method}/{name}_{seed}.pkl

    file
        Path to file, including file name,
        e.g. file="res" (saved as res.pkl) or
        file="method1/res" (saved as method1/res.pkl)

    overwrite
        Whether to overwrite existing file.

    """
    # either name or file must be specified
    if not ((name is None) ^ (file is None)):
        msg = (
            "Name and file cannot be both None or specified.",
            "Either one has to be None.",
        )
        raise AssertionError(msg)

    if (name is not None) and (file is None):
        if name.endswith(".pkl"):
            name = name.removesuffix(".pkl")
        # create saving path
        path = f"./results/{eliobj.trainer['method']}/{name}_{eliobj.trainer['seed']}"

    if (name is None) and (file is not None):
        # postprocess file (or name) to avoid file.pkl.pkl
        if file.endswith(".pkl"):
            file = file.removesuffix(".pkl")
        path = "./" + file

    # check whether saving path is already used
    if os.path.isfile(path + ".pkl") and not overwrite:
        user_ans = input(
            f"{path=} is not empty."
            + "\nDo you want to overwrite it?"
            + " Press 'y' for overwriting and 'n' to abort."
        )
        while user_ans not in ["n", "y"]:
            user_ans = input(
                "Please press either 'y' for overwriting or 'n'"
                + "to abort the process."
            )

        if user_ans == "n":
            overwrite = False
            logger.info("Process aborted. File is not overwritten.")

    if not os.path.isfile(path + ".pkl") or overwrite:
        storage = dict()
        # user inputs
        storage["model"] = eliobj.model
        storage["parameters"] = eliobj.parameters
        storage["targets"] = eliobj.targets
        storage["expert"] = eliobj.expert
        storage["optimizer"] = eliobj.optimizer
        storage["trainer"] = eliobj.trainer
        storage["initializer"] = eliobj.initializer
        storage["network"] = eliobj.network
        # results
        try:
            eliobj.results
        except:  # noqa: E722
            storage["temp_results"] = []
            storage["temp_history"] = []
        else:
            storage["results"] = eliobj.results

        save_as_pkl(storage, path + ".pkl")

        print(f"saved in: {path}.pkl")

save_as_pkl #

save_as_pkl(obj: Any, save_dir: str) -> None

Save file as pickle.

Parameters:

Name	Type	Description	Default
`obj`	`Any`	Variable that needs to be saved.	required
`save_dir`	`str`	Path indicating the file location.	required

Examples:

>>> save_as_pkl(obj, "results/file.pkl")

Source code in src/elicito/utils.py

def save_as_pkl(obj: Any, save_dir: str) -> None:
    """
    Save file as pickle.

    Parameters
    ----------
    obj
        Variable that needs to be saved.

    save_dir
        Path indicating the file location.

    Examples
    --------
    >>> save_as_pkl(obj, "results/file.pkl")  # doctest: +SKIP

    """
    # if directory does not exist, create it
    os.makedirs(os.path.dirname(save_dir), exist_ok=True)
    # save obj to location as pickle
    serialized_obj = cloudpickle.dumps(obj)
    with open(save_dir, "wb") as file:
        pickle.dump(serialized_obj, file=file)

elicito.utils#

DoubleBound #

__init__ #

forward #

inv_logit #

inverse #

logit #

LowerBound #

__init__ #

forward #

inverse #

UpperBound #

__init__ #

forward #

inverse #

compute_num_weights #

dry_run #

get_expert_data #

get_expert_datformat #

gumbel_softmax_trick #

identity #

load #

one_forward_simulation #

parallel #

save #

save_as_pkl #

init #

init #

init #