Summary statistics

We can easily customize the summary statistics reported by $summary() and $print().

fit <- cmdstanr::cmdstanr_example("schools", method = "sample")
Warning: 411 of 4000 (10.0%) transitions ended with a divergence.
See https://mc-stan.org/misc/warnings for details.
Warning: 1 of 4 chains had an E-BFMI less than 0.2.
See https://mc-stan.org/misc/warnings for details.
fit$summary()
   variable       mean     median       sd      mad         q5       q95
1      lp__ -55.581147 -56.536200 7.135791 7.809299 -66.108490 -44.17410
2        mu   6.053657   5.516030 3.777403 3.597633  -0.119312  12.18247
3       tau   4.456447   3.456865 3.641807 3.645828   0.778481  11.91315
4  theta[1]   8.436941   7.287125 6.350694 4.628344   0.249281  20.38983
5  theta[2]   6.522999   5.993620 5.308671 4.494072  -1.760367  15.64359
6  theta[3]   5.147676   4.349235 6.014958 5.082679  -5.803791  14.17618
7  theta[4]   6.189408   5.541310 5.578086 4.501730  -2.557880  15.63053
8  theta[5]   4.669689   4.952550 5.338198 4.271882  -5.029484  12.96141
9  theta[6]   5.123046   4.853110 5.623562 4.579796  -4.797084  13.70336
10 theta[7]   8.283195   7.324155 5.703079 4.912514   1.112880  18.94623
11 theta[8]   6.499894   5.594240 6.114406 4.839688  -2.747390  16.75831
       rhat   ess_bulk    ess_tail
1  1.348190   9.585696          NA
2  1.094243 190.870204  910.104440
3  1.305410  10.562073    4.833756
4  1.037209 205.819798 1097.512010
5  1.234963 615.265251 1158.714996
6  1.270209 429.358164 1118.125260
7  1.060915 288.566093 1331.398737
8  1.265317 356.169676 1352.544578
9  1.281493 482.412281 1311.140711
10 1.041368  75.113061  938.699910
11 1.069623 436.937773 1068.232198

By default all variables are summaries with the follow functions:

[1] "mean"      "median"    "sd"        "mad"       "quantile2"

To change the variables summarized, we use the variables argument

fit$summary(variables = c("mu", "tau"))
  variable     mean   median       sd      mad        q5      q95     rhat
1       mu 6.053657 5.516030 3.777403 3.597633 -0.119312 12.18247 1.094243
2      tau 4.456447 3.456865 3.641807 3.645828  0.778481 11.91315 1.305410
   ess_bulk   ess_tail
1 190.87020 910.104440
2  10.56207   4.833756

We can additionally change which functions are used

fit$summary(variables = c("mu", "tau"), mean, sd)
  variable     mean       sd
1       mu 6.053657 3.777403
2      tau 4.456447 3.641807

To summarize all variables with non-default functions, it is necessary to set explicitly set the variables argument, either to NULL or the full vector of variable names.

fit$metadata()$model_params
 [1] "lp__"     "mu"       "tau"      "theta[1]" "theta[2]" "theta[3]"
 [7] "theta[4]" "theta[5]" "theta[6]" "theta[7]" "theta[8]"
fit$summary(variables = NULL, "mean", "median")
   variable       mean     median
1      lp__ -55.581147 -56.536200
2        mu   6.053657   5.516030
3       tau   4.456447   3.456865
4  theta[1]   8.436941   7.287125
5  theta[2]   6.522999   5.993620
6  theta[3]   5.147676   4.349235
7  theta[4]   6.189408   5.541310
8  theta[5]   4.669689   4.952550
9  theta[6]   5.123046   4.853110
10 theta[7]   8.283195   7.324155
11 theta[8]   6.499894   5.594240

Summary functions can be specified by character string, function, or using a formula (or anything else supported by rlang::as_function()). If these arguments are named, those names will be used in the tibble output. If the summary results are named they will take precedence.

my_sd <- function(x) c(My_SD = sd(x))
fit$summary(
  c("mu", "tau"), 
  MEAN = mean, 
  "median",
  my_sd,
  ~quantile(.x, probs = c(0.1, 0.9)),
  Minimum = function(x) min(x)
)        
  variable     MEAN   median    My_SD      10%       90%   Minimum
1       mu 6.053657 5.516030 3.777403 1.565533 10.863700 -8.554010
2      tau 4.456447 3.456865 3.641807 0.778481  9.666494  0.774844

Arguments to all summary functions can also be specified with .args.

fit$summary(c("mu", "tau"), quantile, .args = list(probs = c(0.025, .05, .95, .975)))
  variable      2.5%        5%      95%    97.5%
1       mu -1.614829 -0.119312 12.18247 13.54298
2      tau  0.778481  0.778481 11.91315 13.43694

The summary functions are applied to the array of sample values, with dimension iter_samplingxchains.

fit$summary(variables = NULL, dim, colMeans)
   variable dim.1 dim.2          1          2          3          4
1      lp__  1000     4 -56.698590 -58.592226 -56.911658 -50.122112
2        mu  1000     4   6.836467   6.338743   6.214918   4.824501
3       tau  1000     4   4.751132   5.540196   4.617587   2.916872
4  theta[1]  1000     4   9.139180   9.470688   8.631066   6.506831
5  theta[2]  1000     4   7.088238   6.756565   6.691301   5.555891
6  theta[3]  1000     4   6.087235   5.430155   5.208329   3.864985
7  theta[4]  1000     4   6.963234   6.883517   6.322756   4.588127
8  theta[5]  1000     4   5.312623   4.530775   4.623091   4.212267
9  theta[6]  1000     4   5.778286   4.999767   5.497782   4.216348
10 theta[7]  1000     4   9.354323   9.367796   8.483796   5.926863
11 theta[8]  1000     4   7.269928   7.165762   6.670007   4.893880

For this reason users may have unexpected results if they use stats::var() directly, as it will return a covariance matrix. An alternative is the distributional::variance() function, which can also be accessed via posterior::variance().

fit$summary(c("mu", "tau"), posterior::variance, ~var(as.vector(.x)))
  variable posterior::variance ~var(as.vector(.x))
1       mu            14.26877            14.26877
2      tau            13.26276            13.26276

Summary functions need not be numeric, but these won’t work with $print().

strict_pos <- function(x) if (all(x > 0)) "yes" else "no"
fit$summary(variables = NULL, "Strictly Positive" = strict_pos)
   variable Strictly Positive
1      lp__                no
2        mu                no
3       tau               yes
4  theta[1]                no
5  theta[2]                no
6  theta[3]                no
7  theta[4]                no
8  theta[5]                no
9  theta[6]                no
10 theta[7]                no
11 theta[8]                no
# fit$print(variables = NULL, "Strictly Positive" = strict_pos)

For more information, see posterior::summarise_draws(), which is called by $summary().

Extracting posterior draws/samples

The $draws() method can be used to extract the posterior draws in formats provided by the posterior package. Here we demonstrate only the draws_array and draws_df formats, but the posterior package supports other useful formats as well.

# default is a 3-D draws_array object from the posterior package
# iterations x chains x variables
draws_arr <- fit$draws() # or format="array"
str(draws_arr)
 'draws_array' num [1:1000, 1:4, 1:11] -61.7 -60.5 -61.8 -60.5 -66.8 ...
 - attr(*, "dimnames")=List of 3
  ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
  ..$ chain    : chr [1:4] "1" "2" "3" "4"
  ..$ variable : chr [1:11] "lp__" "mu" "tau" "theta[1]" ...
# draws x variables data frame
draws_df <- fit$draws(format = "df")
str(draws_df)
draws_df [4,000 × 14] (S3: draws_df/draws/tbl_df/tbl/data.frame)
 $ lp__      : num [1:4000] -61.7 -60.5 -61.8 -60.5 -66.8 ...
 $ mu        : num [1:4000] 8.86 6.18 9.7 4.97 5.16 ...
 $ tau       : num [1:4000] 5.6 8.77 7.58 7.37 11.97 ...
 $ theta[1]  : num [1:4000] 16.716 14.062 6.67 6.25 -0.404 ...
 $ theta[2]  : num [1:4000] 4.377 6.173 15.608 0.638 10.449 ...
 $ theta[3]  : num [1:4000] 11.59 9.27 11.11 3.74 13.42 ...
 $ theta[4]  : num [1:4000] 13.68 12.51 7.77 6.48 10.16 ...
 $ theta[5]  : num [1:4000] -0.877 1.959 7.197 2.097 3.229 ...
 $ theta[6]  : num [1:4000] -5.02 -4.95 5.66 5.24 14.26 ...
 $ theta[7]  : num [1:4000] 13.38 17.44 13.69 6.65 13.77 ...
 $ theta[8]  : num [1:4000] 10.46 5.48 -6.15 20.92 31.79 ...
 $ .chain    : int [1:4000] 1 1 1 1 1 1 1 1 1 1 ...
 $ .iteration: int [1:4000] 1 2 3 4 5 6 7 8 9 10 ...
 $ .draw     : int [1:4000] 1 2 3 4 5 6 7 8 9 10 ...
print(draws_df)
# A draws_df: 1000 iterations, 4 chains, and 11 variables
   lp__   mu  tau theta[1] theta[2] theta[3] theta[4] theta[5]
1   -62  8.9  5.6     16.7     4.38     11.6   13.676    -0.88
2   -61  6.2  8.8     14.1     6.17      9.3   12.515     1.96
3   -62  9.7  7.6      6.7    15.61     11.1    7.770     7.20
4   -60  5.0  7.4      6.3     0.64      3.7    6.477     2.10
5   -67  5.2 12.0     -0.4    10.45     13.4   10.162     3.23
6   -63 11.1  7.9     24.6    11.68     18.8   -1.606     3.79
7   -61 14.7  6.1     28.9    11.15     15.3    6.780     9.95
8   -63 13.4  5.7     13.7    21.03      7.2   20.051    12.99
9   -64 16.0  8.3     20.2     2.65     11.3   19.000     3.51
10  -59  2.9  4.5      4.2    11.37      2.5   -0.057     6.30
# ... with 3990 more draws, and 3 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}

To convert an existing draws object to a different format use the posterior::as_draws_*() functions.

To manipulate the draws objects use the various methods described in the posterior package vignettes and documentation.