https://mc-stan.org/about/logo/ The cmdstan_model() function creates a new CmdStanModel object from a file containing a Stan program.

cmdstan_model(stan_file, compile = TRUE, ...)

Arguments

stan_file

The path to a .stan file containing a Stan program. The helper function write_stan_tempfile() is provided for cases when it is more convenient to specify the Stan program as a string.

compile

Do compilation? The default is TRUE. If FALSE compilation can be done later via the $compile() method.

...

Optionally, additional arguments to pass to the $compile() method if compile=TRUE.

Value

A CmdStanModel object.

See also

install_cmdstan(), cmdstan_path()

The CmdStanR website (mc-stan.org/cmdstanr) for online documentation and tutorials.

The Stan and CmdStan documentation:

Examples

# \dontrun{ library(cmdstanr) library(posterior) library(bayesplot) color_scheme_set("brightblue") # Set path to cmdstan # (Note: if you installed CmdStan via install_cmdstan() with default settings # then setting the path is unnecessary but the default below should still work. # Otherwise use the `path` argument to specify the location of your # CmdStan installation.) set_cmdstan_path(path = NULL)
#> CmdStan path set to: /Users/jgabry/.cmdstanr/cmdstan-2.24.0
# Create a CmdStanModel object from a Stan program, # here using the example model that comes with CmdStan file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan") mod <- cmdstan_model(file)
#> Model executable is up to date!
mod$print()
#> data { #> int<lower=0> N; #> int<lower=0,upper=1> y[N]; #> } #> parameters { #> real<lower=0,upper=1> theta; #> } #> model { #> theta ~ beta(1,1); // uniform prior on interval 0,1 #> y ~ bernoulli(theta); #> }
# Data as a named list (like RStan) stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) # Run MCMC using the 'sample' method fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, parallel_chains = 2 )
#> Running MCMC with 2 parallel chains... #> #> Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup) #> Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup) #> Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup) #> Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup) #> Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup) #> Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup) #> Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup) #> Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup) #> Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup) #> Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup) #> Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup) #> Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling) #> Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling) #> Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling) #> Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling) #> Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling) #> Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling) #> Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling) #> Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling) #> Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling) #> Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling) #> Chain 1 Iteration: 2000 / 2000 [100%] (Sampling) #> Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup) #> Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup) #> Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup) #> Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup) #> Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup) #> Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup) #> Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup) #> Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup) #> Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup) #> Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup) #> Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup) #> Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling) #> Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling) #> Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling) #> Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling) #> Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling) #> Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling) #> Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling) #> Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling) #> Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling) #> Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling) #> Chain 2 Iteration: 2000 / 2000 [100%] (Sampling) #> Chain 1 finished in 0.1 seconds. #> Chain 2 finished in 0.1 seconds. #> #> Both chains finished successfully. #> Mean chain execution time: 0.1 seconds. #> Total execution time: 0.1 seconds.
# Use 'posterior' package for summaries fit_mcmc$summary()
#> # A tibble: 2 x 10 #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail #> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 lp__ -7.28 -7.00 0.739 0.342 -8.80 -6.75 1.00 815. 621. #> 2 theta 0.254 0.235 0.123 0.123 0.0809 0.485 1.00 752. 589.
# Get posterior draws draws <- fit_mcmc$draws() print(draws)
#> # A draws_array: 1000 iterations, 2 chains, and 2 variables #> , , variable = lp__ #> #> chain #> iteration 1 2 #> 1 -6.8 -6.8 #> 2 -7.3 -6.8 #> 3 -7.1 -7.0 #> 4 -7.1 -8.5 #> 5 -7.1 -7.8 #> #> , , variable = theta #> #> chain #> iteration 1 2 #> 1 0.30 0.230 #> 2 0.13 0.199 #> 3 0.16 0.165 #> 4 0.37 0.074 #> 5 0.15 0.103 #> #> # ... with 995 more iterations
# Convert to data frame using posterior::as_draws_df as_draws_df(draws)
#> # A draws_df: 1000 iterations, 2 chains, and 2 variables #> lp__ theta #> 1 -6.8 0.30 #> 2 -7.3 0.13 #> 3 -7.1 0.16 #> 4 -7.1 0.37 #> 5 -7.1 0.15 #> 6 -7.5 0.12 #> 7 -7.2 0.38 #> 8 -6.8 0.22 #> 9 -7.0 0.34 #> 10 -6.8 0.22 #> # ... with 1990 more draws #> # ... hidden meta-columns {'.chain', '.iteration', '.draw'}
# Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Call CmdStan's diagnose and stansummary utilities fit_mcmc$cmdstan_diagnose()
#> Running bin/diagnose \ #> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-1c8bb9.csv \ #> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-2-1c8bb9.csv #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-1c8bb9.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-2-1c8bb9.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. #> #> Checking sampler transitions for divergences. #> No divergent transitions found. #> #> Checking E-BFMI - sampler transitions HMC potential energy. #> E-BFMI satisfactory for all transitions. #> #> Effective sample size satisfactory. #> #> Split R-hat values satisfactory all parameters. #> #> Processing complete, no problems detected.
fit_mcmc$cmdstan_summary()
#> Running bin/stansummary \ #> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-1c8bb9.csv \ #> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-2-1c8bb9.csv #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-1c8bb9.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-2-1c8bb9.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> #> Warmup took (0.0070, 0.0070) seconds, 0.014 seconds total #> Sampling took (0.018, 0.017) seconds, 0.035 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> #> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 16747 1.0 #> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.1e+04 1.0e+00 #> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.9e+01 2.6e+13 #> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.3e+04 1.0e+00 #> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 4.0e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan #> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.0e+04 1.0e+00 #> #> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 21577 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, #> and R_hat is the potential scale reduction factor on split chains (at #> convergence, R_hat=1).
# For models fit using MCMC, if you like working with RStan's stanfit objects # then you can create one with rstan::read_stan_csv() # stanfit <- rstan::read_stan_csv(fit_mcmc$output_files()) # Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm) # and also demonstrate specifying data as a path to a file instead of a list my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json") fit_optim <- mod$optimize(data = my_data_file, seed = 123)
#> method = optimize #> optimize #> algorithm = lbfgs (Default) #> lbfgs #> init_alpha = 0.001 (Default) #> tol_obj = 9.9999999999999998e-13 (Default) #> tol_rel_obj = 10000 (Default) #> tol_grad = 1e-08 (Default) #> tol_rel_grad = 10000000 (Default) #> tol_param = 1e-08 (Default) #> history_size = 5 (Default) #> iter = 2000 (Default) #> save_iterations = 0 (Default) #> id = 1 #> data #> file = /Users/jgabry/.cmdstanr/cmdstan-2.24.0/examples/bernoulli/bernoulli.data.json #> init = 2 (Default) #> random #> seed = 123 #> output #> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-1946bf.csv #> diagnostic_file = (Default) #> refresh = 100 (Default) #> #> Initial log joint probability = -9.51104 #> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes #> 6 -5.00402 0.000103557 2.55661e-07 1 1 9 #> Optimization terminated normally: #> Convergence detected: relative gradient magnitude is below tolerance #> Finished in 0.1 seconds.
fit_optim$summary()
#> # A tibble: 2 x 2 #> variable estimate #> <chr> <dbl> #> 1 lp__ -5.00 #> 2 theta 0.2
# Run 'variational' method to approximate the posterior (default is meanfield ADVI) fit_vb <- mod$variational(data = stan_data, seed = 123)
#> method = variational #> variational #> algorithm = meanfield (Default) #> meanfield #> iter = 10000 (Default) #> grad_samples = 1 (Default) #> elbo_samples = 100 (Default) #> eta = 1 (Default) #> adapt #> engaged = 1 (Default) #> iter = 50 (Default) #> tol_rel_obj = 0.01 (Default) #> eval_elbo = 100 (Default) #> output_samples = 1000 (Default) #> id = 1 #> data #> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/standata-205e6773c051.json #> init = 2 (Default) #> random #> seed = 123 #> output #> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-573ae6.csv #> diagnostic_file = (Default) #> refresh = 100 (Default) #> #> ------------------------------------------------------------ #> EXPERIMENTAL ALGORITHM: #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ #> #> #> #> Gradient evaluation took 2.2e-05 seconds #> 1000 transitions using 10 leapfrog steps per transition would take 0.22 seconds. #> Adjust your expectations accordingly! #> #> #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) #> Iteration: 50 / 250 [ 20%] (Adaptation) #> Iteration: 100 / 250 [ 40%] (Adaptation) #> Iteration: 150 / 250 [ 60%] (Adaptation) #> Iteration: 200 / 250 [ 80%] (Adaptation) #> Success! Found best value [eta = 1] earlier than expected. #> #> Begin stochastic gradient ascent. #> iter ELBO delta_ELBO_mean delta_ELBO_med notes #> 100 -6.262 1.000 1.000 #> 200 -6.263 0.500 1.000 #> 300 -6.307 0.336 0.007 MEDIAN ELBO CONVERGED #> #> Drawing a sample of size 1000 from the approximate posterior... #> COMPLETED. #> Finished in 0.1 seconds.
fit_vb$summary()
#> # A tibble: 3 x 7 #> variable mean median sd mad q5 q95 #> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 lp__ -7.18 -6.94 0.588 0.259 -8.36 -6.75 #> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06 -0.00257 #> 3 theta 0.263 0.246 0.115 0.113 0.106 0.481
# Plot approximate posterior using bayesplot mcmc_hist(fit_vb$draws("theta"))
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Specifying initial values as a function fit_mcmc_w_init_fun <- mod$sample( data = stan_data, seed = 123, chains = 2, refresh = 0, init = function() list(theta = runif(1)) )
#> Running MCMC with 2 sequential chains... #> #> Chain 1 finished in 0.1 seconds. #> Chain 2 finished in 0.1 seconds. #> #> Both chains finished successfully. #> Mean chain execution time: 0.1 seconds. #> Total execution time: 0.2 seconds.
fit_mcmc_w_init_fun_2 <- mod$sample( data = stan_data, seed = 123, chains = 2, refresh = 0, init = function(chain_id) { # silly but demonstrates optional use of chain_id list(theta = 1 / (chain_id + 1)) } )
#> Running MCMC with 2 sequential chains... #> #> Chain 1 finished in 0.1 seconds. #> Chain 2 finished in 0.1 seconds. #> #> Both chains finished successfully. #> Mean chain execution time: 0.1 seconds. #> Total execution time: 0.2 seconds.
fit_mcmc_w_init_fun_2$init()
#> [[1]] #> [[1]]$theta #> [1] 0.5 #> #> #> [[2]] #> [[2]]$theta #> [1] 0.3333333 #> #>
# Specifying initial values as a list of lists fit_mcmc_w_init_list <- mod$sample( data = stan_data, seed = 123, chains = 2, refresh = 0, init = list( list(theta = 0.75), # chain 1 list(theta = 0.25) # chain 2 ) )
#> Running MCMC with 2 sequential chains... #> #> Chain 1 finished in 0.1 seconds. #> Chain 2 finished in 0.1 seconds. #> #> Both chains finished successfully. #> Mean chain execution time: 0.1 seconds. #> Total execution time: 0.2 seconds.
fit_optim_w_init_list <- mod$optimize( data = stan_data, seed = 123, init = list( list(theta = 0.75) ) )
#> method = optimize #> optimize #> algorithm = lbfgs (Default) #> lbfgs #> init_alpha = 0.001 (Default) #> tol_obj = 9.9999999999999998e-13 (Default) #> tol_rel_obj = 10000 (Default) #> tol_grad = 1e-08 (Default) #> tol_rel_grad = 10000000 (Default) #> tol_param = 1e-08 (Default) #> history_size = 5 (Default) #> iter = 2000 (Default) #> save_iterations = 0 (Default) #> id = 1 #> data #> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/standata-205e4374abe5.json #> init = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/init-1-205e20c169fc.json #> random #> seed = 123 #> output #> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpAcsuBs/bernoulli-202008031244-1-153030.csv #> diagnostic_file = (Default) #> refresh = 100 (Default) #> #> Initial log joint probability = -11.6657 #> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes #> 6 -5.00402 0.000237915 9.55309e-07 1 1 9 #> Optimization terminated normally: #> Convergence detected: relative gradient magnitude is below tolerance #> Finished in 0.1 seconds.
fit_optim_w_init_list$init()
#> [[1]] #> [[1]]$theta #> [1] 0.75 #> #>
# }