Source code for cmdstanpy.stanfit.pathfinder

"""
Container for the result of running Pathfinder.
"""

from typing import Dict, List, Optional, Tuple, Union

import numpy as np

from cmdstanpy.cmdstan_args import Method
from cmdstanpy.stanfit.metadata import InferenceMetadata
from cmdstanpy.stanfit.runset import RunSet
from cmdstanpy.utils.stancsv import scan_generic_csv


[docs]class CmdStanPathfinder: """ Container for outputs from the Pathfinder algorithm. Created by :meth:`CmdStanModel.pathfinder()`. """ def __init__(self, runset: RunSet): """Initialize object.""" if not runset.method == Method.PATHFINDER: raise ValueError( 'Wrong runset method, expecting Pathfinder runset, ' 'found method {}'.format(runset.method) ) self._runset = runset self._draws: np.ndarray = np.array(()) config = scan_generic_csv(runset.csv_files[0]) self._metadata = InferenceMetadata(config)
[docs] def create_inits( self, seed: Optional[int] = None, chains: int = 4 ) -> Union[List[Dict[str, np.ndarray]], Dict[str, np.ndarray]]: """ Create initial values for the parameters of the model by randomly selecting draws from the Pathfinder approximation. :param seed: Used for random selection, defaults to None :param chains: Number of initial values to return, defaults to 4 :return: The initial values for the parameters of the model. If ``chains`` is 1, a dictionary is returned, otherwise a list of dictionaries is returned, in the format expected for the ``inits`` argument. of :meth:`CmdStanModel.sample`. """ self._assemble_draws() rng = np.random.default_rng(seed) idxs = rng.choice(self._draws.shape[0], size=chains, replace=False) if chains == 1: draw = self._draws[idxs[0]] return { name: var.extract_reshape(draw) for name, var in self._metadata.stan_vars.items() } else: return [ { name: var.extract_reshape(self._draws[idx]) for name, var in self._metadata.stan_vars.items() } for idx in idxs ]
def __repr__(self) -> str: rep = 'CmdStanPathfinder: model={}{}'.format( self._runset.model, self._runset._args.method_args.compose(0, cmd=[]), ) rep = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format( rep, '\n\t'.join(self._runset.csv_files), '\n\t'.join(self._runset.stdout_files), ) return rep # below this is identical to same functions in Laplace def _assemble_draws(self) -> None: if self._draws.shape != (0,): return with open(self._runset.csv_files[0], 'r') as fd: while (fd.readline()).startswith("#"): pass self._draws = np.loadtxt( fd, dtype=float, ndmin=2, delimiter=',', comments="#", )
[docs] def stan_variable(self, var: str) -> np.ndarray: """ Return a numpy.ndarray which contains the estimates for the for the named Stan program variable where the dimensions of the numpy.ndarray match the shape of the Stan program variable. This functionaltiy is also available via a shortcut using ``.`` - writing ``fit.a`` is a synonym for ``fit.stan_variable("a")`` :param var: variable name See Also -------- CmdStanPathfinder.stan_variables CmdStanMLE.stan_variable CmdStanMCMC.stan_variable CmdStanVB.stan_variable CmdStanGQ.stan_variable CmdStanLaplace.stan_variable """ self._assemble_draws() try: out: np.ndarray = self._metadata.stan_vars[var].extract_reshape( self._draws ) return out except KeyError: # pylint: disable=raise-missing-from raise ValueError( f'Unknown variable name: {var}\n' 'Available variables are ' + ", ".join(self._metadata.stan_vars.keys()) )
[docs] def stan_variables(self) -> Dict[str, np.ndarray]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. See Also -------- CmdStanPathfinder.stan_variable CmdStanMCMC.stan_variables CmdStanMLE.stan_variables CmdStanVB.stan_variables CmdStanGQ.stan_variables CmdStanLaplace.stan_variables """ result = {} for name in self._metadata.stan_vars: result[name] = self.stan_variable(name) return result
[docs] def method_variables(self) -> Dict[str, np.ndarray]: """ Returns a dictionary of all sampler variables, i.e., all output column names ending in `__`. Assumes that all variables are scalar variables where column name is variable name. Maps each column name to a numpy.ndarray (draws x chains x 1) containing per-draw diagnostic values. """ self._assemble_draws() return { name: var.extract_reshape(self._draws) for name, var in self._metadata.method_vars.items() }
[docs] def draws(self) -> np.ndarray: """ Return a numpy.ndarray containing the draws from the approximate posterior distribution. This is a 2-D array of shape (draws, parameters). """ self._assemble_draws() return self._draws
def __getattr__(self, attr: str) -> np.ndarray: """Synonymous with ``fit.stan_variable(attr)""" if attr.startswith("_"): raise AttributeError(f"Unknown variable name {attr}") try: return self.stan_variable(attr) except ValueError as e: # pylint: disable=raise-missing-from raise AttributeError(*e.args) def __getstate__(self) -> dict: # This function returns the mapping of objects to serialize with pickle. # See https://docs.python.org/3/library/pickle.html#object.__getstate__ # for details. We call _assemble_draws to ensure posterior samples have # been loaded prior to serialization. self._assemble_draws() return self.__dict__ @property def metadata(self) -> InferenceMetadata: """ Returns object which contains CmdStan configuration as well as information about the names and structure of the inference method and model output variables. """ return self._metadata @property def column_names(self) -> Tuple[str, ...]: """ Names of all outputs from the sampler, comprising sampler parameters and all components of all model parameters, transformed parameters, and quantities of interest. Corresponds to Stan CSV file header row, with names munged to array notation, e.g. `beta[1]` not `beta.1`. """ return self._metadata.cmdstan_config['column_names'] # type: ignore @property def is_resampled(self) -> bool: """ Returns True if the draws were resampled from several Pathfinder approximations, False otherwise. """ return ( # type: ignore self._metadata.cmdstan_config.get("num_paths", 4) > 1 and self._metadata.cmdstan_config.get('psis_resample', 1) in (1, 'true') and self._metadata.cmdstan_config.get('calculate_lp', 1) in (1, 'true') )
[docs] def save_csvfiles(self, dir: Optional[str] = None) -> None: """ Move output CSV files to specified directory. If files were written to the temporary session directory, clean filename. E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as 'bernoulli-201912081451-1.csv'. :param dir: directory path See Also -------- stanfit.RunSet.save_csvfiles cmdstanpy.from_csv """ self._runset.save_csvfiles(dir)