A CmdStanModel object is an R6 object created by the cmdstan_model() function. The object stores the path to a Stan program and compiled executable (once created), and provides methods for fitting the model using Stan's algorithms.


CmdStanModel objects have the following associated methods, many of which have their own (linked) documentation pages:

Stan code

$stan_file()Return the file path to the Stan program.
$code()Return Stan program as a character vector.
$print()Print readable version of Stan program.
$check_syntax()Check Stan syntax without having to compile.
$format()Format and canonicalize the Stan model code.


$compile()Compile Stan program.
$exe_file()Return the file path to the compiled executable.
$hpp_file()Return the file path to the .hpp file containing the generated C++ code.
$save_hpp_file()Save the .hpp file containing the generated C++ code.
$expose_functions()Expose Stan functions for use in R.


$diagnose()Run CmdStan's "diagnose" method to test gradients, return CmdStanDiagnose object.

Model fitting

$sample()Run CmdStan's "sample" method, return CmdStanMCMC object.
$sample_mpi()Run CmdStan's "sample" method with MPI, return CmdStanMCMC object.
$optimize()Run CmdStan's "optimize" method, return CmdStanMLE object.
$variational()Run CmdStan's "variational" method, return CmdStanVB object.
$pathfinder()Run CmdStan's "pathfinder" method, return CmdStanPathfinder object.
$generate_quantities()Run CmdStan's "generate quantities" method, return CmdStanGQ object.

# \dontrun{
# Set path to CmdStan
# (Note: if you installed CmdStan via install_cmdstan() with default settings
# then setting the path is unnecessary but the default below should still work.
# Otherwise use the `path` argument to specify the location of your
# CmdStan installation.)
set_cmdstan_path(path = NULL)
#> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.35.0

# Create a CmdStanModel object from a Stan program,
# here using the example model that comes with CmdStan
file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan")
mod <- cmdstan_model(file)
#> data {
#>   int<lower=0> N;
#>   array[N] int<lower=0, upper=1> y;
#> }
#> parameters {
#>   real<lower=0, upper=1> theta;
#> }
#> model {
#>   theta ~ beta(1, 1); // uniform prior on interval 0,1
#>   y ~ bernoulli(theta);
#> }
# Print with line numbers. This can be set globally using the
# `cmdstanr_print_line_numbers` option.
mod$print(line_numbers = TRUE)
#>  1: data {
#>  2:   int<lower=0> N;
#>  3:   array[N] int<lower=0, upper=1> y;
#>  4: }
#>  5: parameters {
#>  6:   real<lower=0, upper=1> theta;
#>  7: }
#>  8: model {
#>  9:   theta ~ beta(1, 1); // uniform prior on interval 0,1
#> 10:   y ~ bernoulli(theta);
#> 11: }

# Data as a named list (like RStan)
stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))

# Run MCMC using the 'sample' method
fit_mcmc <- mod$sample(
  data = stan_data,
  seed = 123,
  chains = 2,
  parallel_chains = 2
# Use 'posterior' package for summaries
#> # A tibble: 2 × 10
#>   variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
#>   <chr>     <dbl>  <dbl> <dbl> <dbl>   <dbl>  <dbl> <dbl>    <dbl>    <dbl>
#> 1 lp__     -7.30  -7.00  0.811 0.344 -8.83   -6.75   1.00     702.     776.
#> 2 theta     0.254  0.238 0.125 0.124  0.0807  0.483  1.00     634.     580.

# Check sampling diagnostics
#> $num_divergent
#> [1] 0 0
#> $num_max_treedepth
#> [1] 0 0
#> $ebfmi
#> [1] 1.1148699 0.8012192

# Get posterior draws
draws <- fit_mcmc$draws()
#> # A draws_array: 1000 iterations, 2 chains, and 2 variables
#> , , variable = lp__
#>          chain
#> iteration    1     2
#>         1 -7.0  -6.8
#>         2 -7.9  -6.9
#>         3 -7.4  -6.8
#>         4 -6.7  -6.8
#>         5 -6.9 -10.2
#> , , variable = theta
#>          chain
#> iteration    1    2
#>         1 0.17 0.23
#>         2 0.46 0.18
#>         3 0.41 0.28
#>         4 0.25 0.23
#>         5 0.18 0.62
#> # ... with 995 more iterations

# Convert to data frame using posterior::as_draws_df
#> # A draws_df: 1000 iterations, 2 chains, and 2 variables
#>    lp__ theta
#> 1  -7.0  0.17
#> 2  -7.9  0.46
#> 3  -7.4  0.41
#> 4  -6.7  0.25
#> 5  -6.9  0.18
#> 6  -6.9  0.33
#> 7  -7.2  0.15
#> 8  -6.8  0.29
#> 9  -6.8  0.24
#> 10 -6.8  0.24
#> # ... with 1990 more draws
#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}

# Plot posterior using bayesplot (ggplot2)
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# For models fit using MCMC, if you like working with RStan's stanfit objects
# then you can create one with rstan::read_stan_csv()
# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files())

# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm)
# and also demonstrate specifying data as a path to a file instead of a list
my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json")
fit_optim <- mod$optimize(data = my_data_file, seed = 123)
#> # A tibble: 2 × 2
#>   variable estimate
#>   <chr>       <dbl>
#> 1 lp__        -5.00
#> 2 theta        0.2 

# Run 'optimize' again with 'jacobian=TRUE' and then draw from Laplace approximation
# to the posterior
fit_optim <- mod$optimize(data = my_data_file, jacobian = TRUE)
fit_laplace <- mod$laplace(data = my_data_file, mode = fit_optim, draws = 2000)
#> # A tibble: 3 × 7
#>   variable      mean median    sd   mad     q5      q95
#>   <chr>        <dbl>  <dbl> <dbl> <dbl>  <dbl>    <dbl>
#> 1 lp__        -7.25  -6.96  0.761 0.290 -8.64  -6.75   
#> 2 lp_approx__ -0.505 -0.216 0.751 0.296 -1.96  -0.00191
#> 3 theta        0.269  0.248 0.124 0.119  0.102  0.505  

# Run 'variational' method to use ADVI to approximate posterior
fit_vb <- mod$variational(data = stan_data, seed = 123)
#> # A tibble: 3 × 7
#>   variable      mean median    sd   mad     q5      q95
#>   <chr>        <dbl>  <dbl> <dbl> <dbl>  <dbl>    <dbl>
#> 1 lp__        -7.14  -6.93  0.528 0.247 -8.21  -6.75   
#> 2 lp_approx__ -0.520 -0.244 0.740 0.326 -1.90  -0.00227
#> 3 theta        0.251  0.236 0.107 0.108  0.100  0.446  
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Run 'pathfinder' method, a new alternative to the variational method
fit_pf <- mod$pathfinder(data = stan_data, seed = 123)
# Specifying initial values as a function
fit_mcmc_w_init_fun <- mod$sample(
  data = stan_data,
  seed = 123,
  chains = 2,
  refresh = 0,
  init = function() list(theta = runif(1))
fit_mcmc_w_init_fun_2 <- mod$sample(
  data = stan_data,
  seed = 123,
  chains = 2,
  refresh = 0,
  init = function(chain_id) {
    # silly but demonstrates optional use of chain_id
    list(theta = 1 / (chain_id + 1))
#> [[1]]
#> [[1]]$theta
#> [1] 0.5
#> [[2]]
#> [[2]]$theta
#> [1] 0.3333333

# Specifying initial values as a list of lists
fit_mcmc_w_init_list <- mod$sample(
  data = stan_data,
  seed = 123,
  chains = 2,
  refresh = 0,
  init = list(
    list(theta = 0.75), # chain 1
    list(theta = 0.25)  # chain 2
fit_optim_w_init_list <- mod$optimize(
  data = stan_data,
  seed = 123,
  init = list(
    list(theta = 0.75)
#> [[1]]
#> [[1]]$theta
#> [1] 0.75
# }