"""
Container for the result of running a laplace approximation.
"""
from typing import (
Any,
Dict,
Hashable,
List,
MutableMapping,
Optional,
Tuple,
Union,
)
import numpy as np
import pandas as pd
try:
import xarray as xr
XARRAY_INSTALLED = True
except ImportError:
XARRAY_INSTALLED = False
from cmdstanpy.cmdstan_args import Method
from cmdstanpy.utils.data_munging import build_xarray_data
from cmdstanpy.utils.stancsv import scan_generic_csv
from .metadata import InferenceMetadata
from .mle import CmdStanMLE
from .runset import RunSet
# TODO list:
# - docs and example notebook
# - make sure features like standalone GQ are updated/working
[docs]class CmdStanLaplace:
def __init__(self, runset: RunSet, mode: CmdStanMLE) -> None:
"""Initialize object."""
if not runset.method == Method.LAPLACE:
raise ValueError(
'Wrong runset method, expecting laplace runset, '
'found method {}'.format(runset.method)
)
self._runset = runset
self._mode = mode
self._draws: np.ndarray = np.array(())
config = scan_generic_csv(runset.csv_files[0])
self._metadata = InferenceMetadata(config)
def _assemble_draws(self) -> None:
if self._draws.shape != (0,):
return
with open(self._runset.csv_files[0], 'r') as fd:
while (fd.readline()).startswith("#"):
pass
self._draws = np.loadtxt(
fd,
dtype=float,
ndmin=2,
delimiter=',',
comments="#",
)
[docs] def stan_variable(self, var: str) -> np.ndarray:
"""
Return a numpy.ndarray which contains the estimates for the
for the named Stan program variable where the dimensions of the
numpy.ndarray match the shape of the Stan program variable.
This functionaltiy is also available via a shortcut using ``.`` -
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
:param var: variable name
See Also
--------
CmdStanMLE.stan_variables
CmdStanMCMC.stan_variable
CmdStanPathfinder.stan_variable
CmdStanVB.stan_variable
CmdStanGQ.stan_variable
"""
self._assemble_draws()
try:
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(
self._draws
)
return out
except KeyError:
# pylint: disable=raise-missing-from
raise ValueError(
f'Unknown variable name: {var}\n'
'Available variables are '
+ ", ".join(self._metadata.stan_vars.keys())
)
[docs] def stan_variables(self) -> Dict[str, np.ndarray]:
"""
Return a dictionary mapping Stan program variables names
to the corresponding numpy.ndarray containing the inferred values.
:param inc_warmup: When ``True`` and the warmup draws are present in
the MCMC sample, then the warmup draws are included.
Default value is ``False``
See Also
--------
CmdStanGQ.stan_variable
CmdStanMCMC.stan_variables
CmdStanMLE.stan_variables
CmdStanPathfinder.stan_variables
CmdStanVB.stan_variables
"""
result = {}
for name in self._metadata.stan_vars:
result[name] = self.stan_variable(name)
return result
[docs] def method_variables(self) -> Dict[str, np.ndarray]:
"""
Returns a dictionary of all sampler variables, i.e., all
output column names ending in `__`. Assumes that all variables
are scalar variables where column name is variable name.
Maps each column name to a numpy.ndarray (draws x chains x 1)
containing per-draw diagnostic values.
"""
self._assemble_draws()
return {
name: var.extract_reshape(self._draws)
for name, var in self._metadata.method_vars.items()
}
[docs] def draws(self) -> np.ndarray:
"""
Return a numpy.ndarray containing the draws from the
approximate posterior distribution. This is a 2-D array
of shape (draws, parameters).
"""
self._assemble_draws()
return self._draws
def draws_pd(
self,
vars: Union[List[str], str, None] = None,
) -> pd.DataFrame:
if vars is not None:
if isinstance(vars, str):
vars_list = [vars]
else:
vars_list = vars
self._assemble_draws()
cols = []
if vars is not None:
for var in dict.fromkeys(vars_list):
if var in self._metadata.method_vars:
cols.append(var)
elif var in self._metadata.stan_vars:
info = self._metadata.stan_vars[var]
cols.extend(
self.column_names[info.start_idx : info.end_idx]
)
else:
raise ValueError(f'Unknown variable: {var}')
else:
cols = list(self.column_names)
return pd.DataFrame(self._draws, columns=self.column_names)[cols]
[docs] def draws_xr(
self,
vars: Union[str, List[str], None] = None,
) -> "xr.Dataset":
"""
Returns the sampler draws as a xarray Dataset.
:param vars: optional list of variable names.
See Also
--------
CmdStanMCMC.draws_xr
CmdStanGQ.draws_xr
"""
if not XARRAY_INSTALLED:
raise RuntimeError(
'Package "xarray" is not installed, cannot produce draws array.'
)
if vars is None:
vars_list = list(self._metadata.stan_vars.keys())
elif isinstance(vars, str):
vars_list = [vars]
else:
vars_list = vars
self._assemble_draws()
meta = self._metadata.cmdstan_config
attrs: MutableMapping[Hashable, Any] = {
"stan_version": f"{meta['stan_version_major']}."
f"{meta['stan_version_minor']}.{meta['stan_version_patch']}",
"model": meta["model"],
}
data: MutableMapping[Hashable, Any] = {}
coordinates: MutableMapping[Hashable, Any] = {
"draw": np.arange(self._draws.shape[0]),
}
for var in vars_list:
build_xarray_data(
data,
self._metadata.stan_vars[var],
self._draws[:, np.newaxis, :],
)
return (
xr.Dataset(data, coords=coordinates, attrs=attrs)
.transpose('draw', ...)
.squeeze()
)
@property
def mode(self) -> CmdStanMLE:
"""
Return the maximum a posteriori estimate (mode)
as a :class:`CmdStanMLE` object.
"""
return self._mode
@property
def metadata(self) -> InferenceMetadata:
"""
Returns object which contains CmdStan configuration as well as
information about the names and structure of the inference method
and model output variables.
"""
return self._metadata
def __repr__(self) -> str:
mode = '\n'.join(
['\t' + line for line in repr(self.mode).splitlines()]
)[1:]
rep = 'CmdStanLaplace: model={} \nmode=({})\n{}'.format(
self._runset.model,
mode,
self._runset._args.method_args.compose(0, cmd=[]),
)
rep = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format(
rep,
'\n\t'.join(self._runset.csv_files),
'\n\t'.join(self._runset.stdout_files),
)
return rep
def __getattr__(self, attr: str) -> np.ndarray:
"""Synonymous with ``fit.stan_variable(attr)"""
if attr.startswith("_"):
raise AttributeError(f"Unknown variable name {attr}")
try:
return self.stan_variable(attr)
except ValueError as e:
# pylint: disable=raise-missing-from
raise AttributeError(*e.args)
def __getstate__(self) -> dict:
# This function returns the mapping of objects to serialize with pickle.
# See https://docs.python.org/3/library/pickle.html#object.__getstate__
# for details. We call _assemble_draws to ensure posterior samples have
# been loaded prior to serialization.
self._assemble_draws()
return self.__dict__
@property
def column_names(self) -> Tuple[str, ...]:
"""
Names of all outputs from the sampler, comprising sampler parameters
and all components of all model parameters, transformed parameters,
and quantities of interest. Corresponds to Stan CSV file header row,
with names munged to array notation, e.g. `beta[1]` not `beta.1`.
"""
return self._metadata.cmdstan_config['column_names'] # type: ignore
[docs] def save_csvfiles(self, dir: Optional[str] = None) -> None:
"""
Move output CSV files to specified directory. If files were
written to the temporary session directory, clean filename.
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
'bernoulli-201912081451-1.csv'.
:param dir: directory path
See Also
--------
stanfit.RunSet.save_csvfiles
cmdstanpy.from_csv
"""
self._runset.save_csvfiles(dir)