Source code for respyabc.evaluation

"""This module contains all plots and point estimates that can
be used to conduct model evaluation."""

import pyabc
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from scipy.stats import norm
from pyabc.visualization import plot_sample_numbers
from pyabc.visualization import plot_epsilons
from pyabc.visualization import plot_acceptance_rates_trajectory
from pyabc.visualization import plot_kde_2d
from pyabc.visualization import plot_effective_sample_sizes
from pyabc.transition import MultivariateNormalTransition
from pyabc.visualization.credible import compute_kde_max
from pyabc.visualization.credible import compute_credible_interval
from pyabc.visualization.credible import compute_quantile


[docs]def compute_point_estimate(history, run=None): """Returns point estimates for the pyabc run. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. run : int, optional Positive integer determining which pyabc run should be used. If `None` last run is used. Returns ------- df_stacked_moments : pandas.DataFrame Data frame including the point estimate and its varianc for all parameters. """ if run is None: run = history.max_t magnitudes, probabilities = history.get_distribution(m=0, t=run) mean = np.array(magnitudes).T @ np.array(probabilities) var = np.array(magnitudes.var()) * np.sum(np.array(probabilities) ** 2) stacked_moments = np.vstack((mean, var)) df_stacked_moments = pd.DataFrame( stacked_moments, columns=magnitudes.columns, index=["estimate", "est_variance"] ) return df_stacked_moments
[docs]def compute_distribution_bounds(history, parameter, alpha, run): """Returns distribution bounds from pyabc posterior distribution. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter : str Parameter for which the credible interval should be computed. alpha : float Level of credibility. Must be between zero and one. run : int Positive integer determining which pyabc run should be used. If `None` last run is used. Returns ------- lower: float Lower bound of the interval. upper: float Upper bound of the interval. """ if run is None: run = history.max_t magnitudes, probabilities = history.get_distribution(m=0, t=run) magnitudes["probabilities"] = probabilities magnitudes_sorted = magnitudes.sort_values(by=parameter) magnitudes_sorted["cum_probabilities"] = magnitudes_sorted["probabilities"].cumsum() cut_magnitudes = magnitudes_sorted[ (magnitudes_sorted["cum_probabilities"] >= alpha / 2) & (magnitudes_sorted["cum_probabilities"] <= 1 - alpha / 2) ] cut_indexed = cut_magnitudes.reset_index(drop=True) cut_magnitudes = cut_indexed[parameter] lower = cut_magnitudes[0] upper = cut_magnitudes[len(cut_magnitudes) - 1] return lower, upper
[docs]def compute_central_credible_interval( history, parameter, interval_type="simulated", alpha=0.05 ): """Returns credible intervals for the all pyabc runs. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter : str Parameter for which the credible interval should be computed. interval_type : {"simulated", "mean"}, optional Method that is used to compute the interval ranges. The default is ``"simulated"``. alpha : float, optional Level of credibility. Must be between zero and one. Returns ------- df_ccf : pandas.DataFrame Data frame containing the credibility intervals for all runs. """ ccf = np.full([history.max_t + 1, 3], np.nan) for t in range(history.max_t + 1): df_point_estimate = compute_point_estimate(history, run=t) estimate, variance = df_point_estimate[parameter] if interval_type == "simulated": lower, upper = compute_distribution_bounds( history=history, parameter=parameter, alpha=alpha, run=t ) elif interval_type == "mean": upper = norm.ppf(1 - alpha / 2, loc=estimate, scale=variance) lower = norm.ppf(alpha, loc=estimate, scale=variance) ccf[t, :] = np.array([lower, estimate, upper]) df_ccf = pd.DataFrame(ccf, columns=["lower", "estimate", "upper"]) return df_ccf
[docs]def plot_kernel_density_posterior(history, parameter, xmin, xmax): """Plot the Kernel densities of the posterior distribution of an pyABC run. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter : str String including the name of the parameter for which the posterior should be plotted. xmin : float Minimum value for the x-axis' range. xmax : float Maximum value for the x-axis' range. Returns ------- Plot with posterior distribution of parameter. """ fig, ax = plt.subplots() for t in range(history.max_t + 1): df, w = history.get_distribution(m=0, t=t) pyabc.visualization.plot_kde_1d( df, w, xmin=xmin, xmax=xmax, x=parameter, ax=ax, label="PDF t={}".format(t) ) ax.legend()
[docs]def plot_credible_intervals( history, parameter, interval_type="simulated", alpha=0.05, main_title="Central Credible Intervals", y_label=None, ): """Plot the credible intervals of the posterior distribution of an pyABC run. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter : str String including the name of the parameter for which the posterior should be plotted. interval_type : {"simulated", "mean"}, optional Method that is used to compute the interval ranges. The default is ``"simulated"``. alpha : float, optional Level of credibility. Must be between zero and one. main_title : str, optional Main title of the plot. y_label : str or None, default None Label of y axis. If `None`, name of parameter is used. Returns ------- Plot with the central credible intervals of the parameter. """ if y_label is None: y_label = parameter df = compute_central_credible_interval( history=history, parameter=parameter, interval_type=interval_type, alpha=alpha ) fig, ax = plt.subplots() ax.errorbar( range(history.max_t + 1), df["estimate"], yerr=(df["upper"] - df["estimate"]), fmt="o", ) ax.set_xticks(np.arange(history.max_t + 1)) ax.set_ylabel(ylabel=y_label) ax.set_xlabel(xlabel="run") fig.suptitle(main_title)
[docs]def plot_history_summary( history, parameter_name, parameter_value, confidence_levels=[0.95, 0.9, 0.5], size=(12, 8), ): """Wrapper to plot the credible intervals of the posterior distribution, the sample numbers, the epsilons and the acceptance rates of an pyABC run. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter_name : str String including the name of the parameter for which the posterior should be plotted. parameter_value : float Magnitude of true parameter. confidence_levels : list, optional A list of floats indicating the levels for which the credible intervals are computed. size : tuple, optional Tuple of floats that is passed to :func:`plt.gcf().set_size_inches()`. Returns ------- Credible intervals of the posterior distribution, the sample numbers, the epsilons and the acceptance rates """ fig, ax = plt.subplots(2, 2) pyabc.visualization.plot_credible_intervals( history, levels=confidence_levels, ts=range(history.max_t + 1), show_mean=True, show_kde_max_1d=True, refval={parameter_name: parameter_value}, arr_ax=ax[0][0], ) plot_sample_numbers(history, ax=ax[1][0]) plot_epsilons(history, ax=ax[0][1]) plot_acceptance_rates_trajectory(history, ax=ax[1][1]) plt.gcf().set_size_inches(size) plt.gcf().tight_layout()
[docs]def plot_history_summary_no_kde( history, size=(12, 8), ): """Wrapper to plot the credible intervals of the posterior distribution, the sample numbers, the epsilons and the acceptance rates of an pyABC run. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. size : tuple, optional Tuple of floats that is passed to :func:`plt.gcf().set_size_inches()`. Returns ------- Credible intervals of the posterior distribution, the sample numbers, the epsilons and the acceptance rates """ fig, ax = plt.subplots(2, 2) plot_effective_sample_sizes(history, ax=ax[0][0]) plot_sample_numbers(history, ax=ax[1][0]) plot_epsilons(history, ax=ax[0][1]) plot_acceptance_rates_trajectory(history, ax=ax[1][1]) plt.gcf().set_size_inches(size) plt.gcf().tight_layout()
[docs]def plot_multiple_credible_intervals( history, parameter_names, number_rows, number_columns, confidence_levels=[0.95, 0.9, 0.5], size=(12, 8), legend_location="lower right", delete_axes=None, ): """Wrapper to plot the credible intervals for multiple parameters. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter_names : list of str Strings including the name of the parameter for which the posterior should be plotted. number_rows : int Positive integer indicating the number of rows of plots. number_columns : int Positive integer indicating the number of plots per column. confidence_levels : list, optional A list of floats indicating the levels for which the credible intervals are computed. size : tuple, optional Tuple of floats that is passed to :func:`plt.gcf().set_size_inches()`. legend_location : str, optional Location of legend in plot. Default is "lower right" delete_axes : list of integers or None If list of integers, list specifies position of plot that should be deleted. Returns ------- Credible intervals of the posterior distributions. """ fig, ax = plt.subplots(number_rows, number_columns) column_index = 0 row_index = 0 for index in range(len(parameter_names)): if number_rows == 1: plot_credible_intervals_pyabc( history, levels=confidence_levels, par_names=[parameter_names[index]], ts=range(history.max_t + 1), show_mean=True, show_kde_max_1d=True, arr_ax=ax[column_index], ) else: plot_credible_intervals_pyabc( history, levels=confidence_levels, par_names=[parameter_names[index]], ts=range(history.max_t + 1), show_mean=True, show_kde_max_1d=True, arr_ax=ax[row_index][column_index], ) column_index += 1 if (column_index) == number_columns: row_index += 1 column_index = 0 lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes] lines, labels = lines_labels[0] if delete_axes is not None: fig.delaxes(ax[delete_axes[0]][delete_axes[1]]) plt.legend(lines, labels, bbox_to_anchor=(1.3, 1)) plt.gcf().set_size_inches(size) plt.gcf().tight_layout()
[docs]def plot_2d_histogram( history, parameter_names, parameter_true, xmin, xmax, ymin, ymax, numx=200, numy=200, label="true theta", figsize=(10, 8), ): """Wrapper to plot 2 dimensional kernel density estimates. Parameters ---------- history : pyabc.smc An object created by :func:`pyabc.abc.run()` or :func:`respyabc.respyabc()`. parameter_names : list of str Strings including the name of the parameter for which the posterior should be plotted. xmin: float Minimum value for axes of first parameter. xmax: float Maximum value for axes of first parameter. ymin: float Minimum value for axes of second parameter. ymax: float Maximum value for axes of second parameter. label: str, optional Label for the true value. figsize : tuple, optional Tuple of floats that is passed to figsize. Returns ------- Plots for two dimensional kernel density estimates over all populations. """ fig = plt.figure(figsize=figsize) for t in range(history.max_t + 1): ncol = np.ceil(history.max_t / 3) if ncol == 0: ncol = 1 ax = fig.add_subplot(3, ncol, t + 1) ax = plot_kde_2d( *history.get_distribution(m=0, t=t), parameter_names[0], parameter_names[1], xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, numx=numx, numy=numy, ax=ax, ) ax.scatter([parameter_true[0]], [parameter_true[1]], color="C1", label=label) ax.set_title("Posterior t={}".format(t)) ax.legend() fig.tight_layout()
[docs]def plot_credible_intervals_pyabc( history, m=0, ts=None, plot_legend=False, par_names=None, levels=None, show_mean=False, show_kde_max=False, show_kde_max_1d=False, size=None, refval=None, refval_color="C1", kde=None, kde_1d=None, arr_ax=None, ): """Taken from pyABC to adjust legend settings. Plot credible intervals over time. Parameters ---------- history: History The history to extract data from. m: int, optional (default = 0) The id of the model to plot for. ts: Union[List[int], int], optional (default = all) The time points to plot for. par_names: List[str], optional The parameter to plot for. If None, then all parameters are used. levels: List[float], optional (default = [0.95]) Confidence intervals to compute. show_mean: bool, optional (default = False) Whether to show the mean apart from the median as well. show_kde_max: bool, optional (default = False) Whether to show the one of the sampled points that gives the highest KDE value for the specified KDE. Note: It is not attemtped to find the overall hightest KDE value, but rather the sampled point with the highest value is taken as an approximation (of the MAP-value). show_kde_max_1d: bool, optional (default = False) Same as `show_kde_max`, but here the KDE is applied componentwise. size: tuple of float Size of the plot. refval: dict, optional (default = None) A dictionary of reference parameter values to plot for each of `par_names`. refval_color: str, optional Color to use for the reference value. kde: Transition, optional (default = MultivariateNormalTransition) The KDE to use for `show_kde_max`. kde_1d: Transition, optional (default = MultivariateNormalTransition) The KDE to use for `show_kde_max_1d`. arr_ax: List, optional Array of axes to use. Assumed to be a 1-dimensional list. Returns ------- arr_ax: Array of generated axes. """ if levels is None: levels = [0.95] levels = sorted(levels) if par_names is None: # extract all parameter names df, _ = history.get_distribution(m=m) par_names = list(df.columns.values) # dimensions n_par = len(par_names) n_confidence = len(levels) if ts is None: ts = list(range(0, history.max_t + 1)) n_pop = len(ts) # prepare axes if arr_ax is None: _, arr_ax = plt.subplots( nrows=n_par, ncols=1, sharex=False, sharey=False, figsize=size ) if not isinstance(arr_ax, (list, np.ndarray)): arr_ax = [arr_ax] fig = arr_ax[0].get_figure() # prepare matrices cis = np.empty((n_par, n_pop, 2 * n_confidence)) median = np.empty((n_par, n_pop)) if show_mean: mean = np.empty((n_par, n_pop)) if show_kde_max: kde_max = np.empty((n_par, n_pop)) if show_kde_max_1d: kde_max_1d = np.empty((n_par, n_pop)) if kde is None and show_kde_max: kde = MultivariateNormalTransition() if kde_1d is None and show_kde_max_1d: kde_1d = MultivariateNormalTransition() # fill matrices # iterate over populations for i_t, t in enumerate(ts): df, w = history.get_distribution(m=m, t=t) # normalize weights to be sure w /= w.sum() # fit kde if show_kde_max: _kde_max_pnt = compute_kde_max(kde, df, w) # iterate over parameters for i_par, par in enumerate(par_names): # as numpy array vals = np.array(df[par]) # median median[i_par, i_t] = compute_quantile(vals, w, 0.5) # mean if show_mean: mean[i_par, i_t] = np.sum(w * vals) # kde max if show_kde_max: kde_max[i_par, i_t] = _kde_max_pnt[par] if show_kde_max_1d: _kde_max_1d_pnt = compute_kde_max(kde_1d, df[[par]], w) kde_max_1d[i_par, i_t] = _kde_max_1d_pnt[par] # levels for i_c, confidence in enumerate(levels): lb, ub = compute_credible_interval(vals, w, confidence) cis[i_par, i_t, i_c] = lb cis[i_par, i_t, -1 - i_c] = ub # plot for i_par, (par, ax) in enumerate(zip(par_names, arr_ax)): for i_c, confidence in reversed(list(enumerate(levels))): ax.errorbar( x=range(n_pop), y=median[i_par].flatten(), yerr=[ median[i_par] - cis[i_par, :, i_c], cis[i_par, :, -1 - i_c] - median[i_par], ], capsize=(5.0 / n_confidence) * (i_c + 1), label="{:.2f}".format(confidence), ) ax.set_title(f"Parameter {par}") # mean if show_mean: ax.plot(range(n_pop), mean[i_par], "x-", label="Mean") # kde max if show_kde_max: ax.plot(range(n_pop), kde_max[i_par], "x-", label="Max KDE") if show_kde_max_1d: ax.plot(range(n_pop), kde_max_1d[i_par], "x-", label="Max KDE 1d") # reference value if refval is not None: ax.hlines( refval[par], xmin=0, xmax=n_pop - 1, color=refval_color, label="Reference value", ) ax.set_xticks(range(n_pop)) ax.set_xticklabels(ts) ax.set_ylabel(par) if plot_legend is True: ax.legend() # format arr_ax[-1].set_xlabel("Population t") if size is not None: fig.set_size_inches(size) fig.tight_layout() return arr_ax