"""This module contains all plots and point estimates that can
be used to conduct model evaluation."""
import pyabc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
from pyabc.visualization import plot_sample_numbers
from pyabc.visualization import plot_epsilons
from pyabc.visualization import plot_acceptance_rates_trajectory
from pyabc.visualization import plot_kde_2d
from pyabc.visualization import plot_effective_sample_sizes
from pyabc.transition import MultivariateNormalTransition
from pyabc.visualization.credible import compute_kde_max
from pyabc.visualization.credible import compute_credible_interval
from pyabc.visualization.credible import compute_quantile
[docs]def compute_point_estimate(history, run=None):
"""Returns point estimates for the pyabc run.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
run : int, optional
Positive integer determining which pyabc run should be used. If `None`
last run is used.
Returns
-------
df_stacked_moments : pandas.DataFrame
Data frame including the point estimate and its varianc for all parameters.
"""
if run is None:
run = history.max_t
magnitudes, probabilities = history.get_distribution(m=0, t=run)
mean = np.array(magnitudes).T @ np.array(probabilities)
var = np.array(magnitudes.var()) * np.sum(np.array(probabilities) ** 2)
stacked_moments = np.vstack((mean, var))
df_stacked_moments = pd.DataFrame(
stacked_moments, columns=magnitudes.columns, index=["estimate", "est_variance"]
)
return df_stacked_moments
[docs]def compute_distribution_bounds(history, parameter, alpha, run):
"""Returns distribution bounds from pyabc posterior distribution.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter : str
Parameter for which the credible interval should be computed.
alpha : float
Level of credibility. Must be between zero and one.
run : int
Positive integer determining which pyabc run should be used. If `None`
last run is used.
Returns
-------
lower: float
Lower bound of the interval.
upper: float
Upper bound of the interval.
"""
if run is None:
run = history.max_t
magnitudes, probabilities = history.get_distribution(m=0, t=run)
magnitudes["probabilities"] = probabilities
magnitudes_sorted = magnitudes.sort_values(by=parameter)
magnitudes_sorted["cum_probabilities"] = magnitudes_sorted["probabilities"].cumsum()
cut_magnitudes = magnitudes_sorted[
(magnitudes_sorted["cum_probabilities"] >= alpha / 2)
& (magnitudes_sorted["cum_probabilities"] <= 1 - alpha / 2)
]
cut_indexed = cut_magnitudes.reset_index(drop=True)
cut_magnitudes = cut_indexed[parameter]
lower = cut_magnitudes[0]
upper = cut_magnitudes[len(cut_magnitudes) - 1]
return lower, upper
[docs]def compute_central_credible_interval(
history, parameter, interval_type="simulated", alpha=0.05
):
"""Returns credible intervals for the all pyabc runs.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter : str
Parameter for which the credible interval should be computed.
interval_type : {"simulated", "mean"}, optional
Method that is used to compute the interval ranges.
The default is ``"simulated"``.
alpha : float, optional
Level of credibility. Must be between zero and one.
Returns
-------
df_ccf : pandas.DataFrame
Data frame containing the credibility intervals for all runs.
"""
ccf = np.full([history.max_t + 1, 3], np.nan)
for t in range(history.max_t + 1):
df_point_estimate = compute_point_estimate(history, run=t)
estimate, variance = df_point_estimate[parameter]
if interval_type == "simulated":
lower, upper = compute_distribution_bounds(
history=history, parameter=parameter, alpha=alpha, run=t
)
elif interval_type == "mean":
upper = norm.ppf(1 - alpha / 2, loc=estimate, scale=variance)
lower = norm.ppf(alpha, loc=estimate, scale=variance)
ccf[t, :] = np.array([lower, estimate, upper])
df_ccf = pd.DataFrame(ccf, columns=["lower", "estimate", "upper"])
return df_ccf
[docs]def plot_kernel_density_posterior(history, parameter, xmin, xmax):
"""Plot the Kernel densities of the posterior distribution of an pyABC run.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter : str
String including the name of the parameter for which
the posterior should be plotted.
xmin : float
Minimum value for the x-axis' range.
xmax : float
Maximum value for the x-axis' range.
Returns
-------
Plot with posterior distribution of parameter.
"""
fig, ax = plt.subplots()
for t in range(history.max_t + 1):
df, w = history.get_distribution(m=0, t=t)
pyabc.visualization.plot_kde_1d(
df, w, xmin=xmin, xmax=xmax, x=parameter, ax=ax, label="PDF t={}".format(t)
)
ax.legend()
[docs]def plot_credible_intervals(
history,
parameter,
interval_type="simulated",
alpha=0.05,
main_title="Central Credible Intervals",
y_label=None,
):
"""Plot the credible intervals of the posterior distribution of an pyABC run.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter : str
String including the name of the parameter for which
the posterior should be plotted.
interval_type : {"simulated", "mean"}, optional
Method that is used to compute the interval ranges.
The default is ``"simulated"``.
alpha : float, optional
Level of credibility. Must be between zero and one.
main_title : str, optional
Main title of the plot.
y_label : str or None, default None
Label of y axis. If `None`, name of parameter is used.
Returns
-------
Plot with the central credible intervals of the parameter.
"""
if y_label is None:
y_label = parameter
df = compute_central_credible_interval(
history=history, parameter=parameter, interval_type=interval_type, alpha=alpha
)
fig, ax = plt.subplots()
ax.errorbar(
range(history.max_t + 1),
df["estimate"],
yerr=(df["upper"] - df["estimate"]),
fmt="o",
)
ax.set_xticks(np.arange(history.max_t + 1))
ax.set_ylabel(ylabel=y_label)
ax.set_xlabel(xlabel="run")
fig.suptitle(main_title)
[docs]def plot_history_summary(
history,
parameter_name,
parameter_value,
confidence_levels=[0.95, 0.9, 0.5],
size=(12, 8),
):
"""Wrapper to plot the credible intervals of the posterior distribution,
the sample numbers, the epsilons and the acceptance rates of an pyABC run.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter_name : str
String including the name of the parameter for which
the posterior should be plotted.
parameter_value : float
Magnitude of true parameter.
confidence_levels : list, optional
A list of floats indicating the levels for which the credible
intervals are computed.
size : tuple, optional
Tuple of floats that is passed to :func:`plt.gcf().set_size_inches()`.
Returns
-------
Credible intervals of the posterior distribution,
the sample numbers, the epsilons and the acceptance rates
"""
fig, ax = plt.subplots(2, 2)
pyabc.visualization.plot_credible_intervals(
history,
levels=confidence_levels,
ts=range(history.max_t + 1),
show_mean=True,
show_kde_max_1d=True,
refval={parameter_name: parameter_value},
arr_ax=ax[0][0],
)
plot_sample_numbers(history, ax=ax[1][0])
plot_epsilons(history, ax=ax[0][1])
plot_acceptance_rates_trajectory(history, ax=ax[1][1])
plt.gcf().set_size_inches(size)
plt.gcf().tight_layout()
[docs]def plot_history_summary_no_kde(
history,
size=(12, 8),
):
"""Wrapper to plot the credible intervals of the posterior distribution,
the sample numbers, the epsilons and the acceptance rates of an pyABC run.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
size : tuple, optional
Tuple of floats that is passed to :func:`plt.gcf().set_size_inches()`.
Returns
-------
Credible intervals of the posterior distribution,
the sample numbers, the epsilons and the acceptance rates
"""
fig, ax = plt.subplots(2, 2)
plot_effective_sample_sizes(history, ax=ax[0][0])
plot_sample_numbers(history, ax=ax[1][0])
plot_epsilons(history, ax=ax[0][1])
plot_acceptance_rates_trajectory(history, ax=ax[1][1])
plt.gcf().set_size_inches(size)
plt.gcf().tight_layout()
[docs]def plot_multiple_credible_intervals(
history,
parameter_names,
number_rows,
number_columns,
confidence_levels=[0.95, 0.9, 0.5],
size=(12, 8),
legend_location="lower right",
delete_axes=None,
):
"""Wrapper to plot the credible intervals for multiple parameters.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter_names : list of str
Strings including the name of the parameter for which
the posterior should be plotted.
number_rows : int
Positive integer indicating the number of rows of plots.
number_columns : int
Positive integer indicating the number of plots per column.
confidence_levels : list, optional
A list of floats indicating the levels for which the credible
intervals are computed.
size : tuple, optional
Tuple of floats that is passed to :func:`plt.gcf().set_size_inches()`.
legend_location : str, optional
Location of legend in plot. Default is "lower right"
delete_axes : list of integers or None
If list of integers, list specifies position of plot that should
be deleted.
Returns
-------
Credible intervals of the posterior distributions.
"""
fig, ax = plt.subplots(number_rows, number_columns)
column_index = 0
row_index = 0
for index in range(len(parameter_names)):
if number_rows == 1:
plot_credible_intervals_pyabc(
history,
levels=confidence_levels,
par_names=[parameter_names[index]],
ts=range(history.max_t + 1),
show_mean=True,
show_kde_max_1d=True,
arr_ax=ax[column_index],
)
else:
plot_credible_intervals_pyabc(
history,
levels=confidence_levels,
par_names=[parameter_names[index]],
ts=range(history.max_t + 1),
show_mean=True,
show_kde_max_1d=True,
arr_ax=ax[row_index][column_index],
)
column_index += 1
if (column_index) == number_columns:
row_index += 1
column_index = 0
lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes]
lines, labels = lines_labels[0]
if delete_axes is not None:
fig.delaxes(ax[delete_axes[0]][delete_axes[1]])
plt.legend(lines, labels, bbox_to_anchor=(1.3, 1))
plt.gcf().set_size_inches(size)
plt.gcf().tight_layout()
[docs]def plot_2d_histogram(
history,
parameter_names,
parameter_true,
xmin,
xmax,
ymin,
ymax,
numx=200,
numy=200,
label="true theta",
figsize=(10, 8),
):
"""Wrapper to plot 2 dimensional kernel density estimates.
Parameters
----------
history : pyabc.smc
An object created by :func:`pyabc.abc.run()` or
:func:`respyabc.respyabc()`.
parameter_names : list of str
Strings including the name of the parameter for which
the posterior should be plotted.
xmin: float
Minimum value for axes of first parameter.
xmax: float
Maximum value for axes of first parameter.
ymin: float
Minimum value for axes of second parameter.
ymax: float
Maximum value for axes of second parameter.
label: str, optional
Label for the true value.
figsize : tuple, optional
Tuple of floats that is passed to figsize.
Returns
-------
Plots for two dimensional kernel density estimates over all populations.
"""
fig = plt.figure(figsize=figsize)
for t in range(history.max_t + 1):
ncol = np.ceil(history.max_t / 3)
if ncol == 0:
ncol = 1
ax = fig.add_subplot(3, ncol, t + 1)
ax = plot_kde_2d(
*history.get_distribution(m=0, t=t),
parameter_names[0],
parameter_names[1],
xmin=xmin,
xmax=xmax,
ymin=ymin,
ymax=ymax,
numx=numx,
numy=numy,
ax=ax,
)
ax.scatter([parameter_true[0]], [parameter_true[1]], color="C1", label=label)
ax.set_title("Posterior t={}".format(t))
ax.legend()
fig.tight_layout()
[docs]def plot_credible_intervals_pyabc(
history,
m=0,
ts=None,
plot_legend=False,
par_names=None,
levels=None,
show_mean=False,
show_kde_max=False,
show_kde_max_1d=False,
size=None,
refval=None,
refval_color="C1",
kde=None,
kde_1d=None,
arr_ax=None,
):
"""Taken from pyABC to adjust legend
settings. Plot credible intervals over time.
Parameters
----------
history: History
The history to extract data from.
m: int, optional (default = 0)
The id of the model to plot for.
ts: Union[List[int], int], optional (default = all)
The time points to plot for.
par_names: List[str], optional
The parameter to plot for. If None, then all parameters are used.
levels: List[float], optional (default = [0.95])
Confidence intervals to compute.
show_mean: bool, optional (default = False)
Whether to show the mean apart from the median as well.
show_kde_max: bool, optional (default = False)
Whether to show the one of the sampled points that gives the highest
KDE value for the specified KDE.
Note: It is not attemtped to find the overall hightest KDE value, but
rather the sampled point with the highest value is taken as an
approximation (of the MAP-value).
show_kde_max_1d: bool, optional (default = False)
Same as `show_kde_max`, but here the KDE is applied componentwise.
size: tuple of float
Size of the plot.
refval: dict, optional (default = None)
A dictionary of reference parameter values to plot for each of
`par_names`.
refval_color: str, optional
Color to use for the reference value.
kde: Transition, optional (default = MultivariateNormalTransition)
The KDE to use for `show_kde_max`.
kde_1d: Transition, optional (default = MultivariateNormalTransition)
The KDE to use for `show_kde_max_1d`.
arr_ax: List, optional
Array of axes to use. Assumed to be a 1-dimensional list.
Returns
-------
arr_ax: Array of generated axes.
"""
if levels is None:
levels = [0.95]
levels = sorted(levels)
if par_names is None:
# extract all parameter names
df, _ = history.get_distribution(m=m)
par_names = list(df.columns.values)
# dimensions
n_par = len(par_names)
n_confidence = len(levels)
if ts is None:
ts = list(range(0, history.max_t + 1))
n_pop = len(ts)
# prepare axes
if arr_ax is None:
_, arr_ax = plt.subplots(
nrows=n_par, ncols=1, sharex=False, sharey=False, figsize=size
)
if not isinstance(arr_ax, (list, np.ndarray)):
arr_ax = [arr_ax]
fig = arr_ax[0].get_figure()
# prepare matrices
cis = np.empty((n_par, n_pop, 2 * n_confidence))
median = np.empty((n_par, n_pop))
if show_mean:
mean = np.empty((n_par, n_pop))
if show_kde_max:
kde_max = np.empty((n_par, n_pop))
if show_kde_max_1d:
kde_max_1d = np.empty((n_par, n_pop))
if kde is None and show_kde_max:
kde = MultivariateNormalTransition()
if kde_1d is None and show_kde_max_1d:
kde_1d = MultivariateNormalTransition()
# fill matrices
# iterate over populations
for i_t, t in enumerate(ts):
df, w = history.get_distribution(m=m, t=t)
# normalize weights to be sure
w /= w.sum()
# fit kde
if show_kde_max:
_kde_max_pnt = compute_kde_max(kde, df, w)
# iterate over parameters
for i_par, par in enumerate(par_names):
# as numpy array
vals = np.array(df[par])
# median
median[i_par, i_t] = compute_quantile(vals, w, 0.5)
# mean
if show_mean:
mean[i_par, i_t] = np.sum(w * vals)
# kde max
if show_kde_max:
kde_max[i_par, i_t] = _kde_max_pnt[par]
if show_kde_max_1d:
_kde_max_1d_pnt = compute_kde_max(kde_1d, df[[par]], w)
kde_max_1d[i_par, i_t] = _kde_max_1d_pnt[par]
# levels
for i_c, confidence in enumerate(levels):
lb, ub = compute_credible_interval(vals, w, confidence)
cis[i_par, i_t, i_c] = lb
cis[i_par, i_t, -1 - i_c] = ub
# plot
for i_par, (par, ax) in enumerate(zip(par_names, arr_ax)):
for i_c, confidence in reversed(list(enumerate(levels))):
ax.errorbar(
x=range(n_pop),
y=median[i_par].flatten(),
yerr=[
median[i_par] - cis[i_par, :, i_c],
cis[i_par, :, -1 - i_c] - median[i_par],
],
capsize=(5.0 / n_confidence) * (i_c + 1),
label="{:.2f}".format(confidence),
)
ax.set_title(f"Parameter {par}")
# mean
if show_mean:
ax.plot(range(n_pop), mean[i_par], "x-", label="Mean")
# kde max
if show_kde_max:
ax.plot(range(n_pop), kde_max[i_par], "x-", label="Max KDE")
if show_kde_max_1d:
ax.plot(range(n_pop), kde_max_1d[i_par], "x-", label="Max KDE 1d")
# reference value
if refval is not None:
ax.hlines(
refval[par],
xmin=0,
xmax=n_pop - 1,
color=refval_color,
label="Reference value",
)
ax.set_xticks(range(n_pop))
ax.set_xticklabels(ts)
ax.set_ylabel(par)
if plot_legend is True:
ax.legend()
# format
arr_ax[-1].set_xlabel("Population t")
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return arr_ax