Detailed Description

Functions
__kernel void	stan::math::opencl_kernels::add_batch (__global double B, __global double A, unsigned int rows, unsigned int cols, unsigned int batch_size)
	Sums a batch of matrices.

__kernel void	stan::math::opencl_kernels::batch_identity (__global double *A, unsigned int batch_rows, unsigned int size)
	Makes a batch of smaller identity matrices inside the input matrix.

__kernel void	stan::math::opencl_kernels::categorical_logit_glm (__global double logp_global, __global double exp_lin_global, __global double inv_sum_exp_lin_global, __global double neg_softmax_lin_global, __global double alpha_derivative, const __global int y_global, const __global double x_beta_global, const __global double alpha_global, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_alpha_derivative, const int need_neg_softmax_lin_global)
	GPU implementation of Generalized Linear Model (GLM) with categorical distribution and logit (softmax) link function.

__kernel void	stan::math::opencl_kernels::categorical_logit_glm_beta_derivative (__global double beta_derivative, __global double temp, const __global int y, const __global double x, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector)
	Calculates derivative wrt beta.

__kernel void	stan::math::opencl_kernels::is_symmetric (__global double A, __global int flag, unsigned int rows, unsigned int cols, double tolerance)
	Check if the `matrix_cl` is symmetric.

__kernel void	stan::math::opencl_kernels::cholesky_decompose (__global double *A, int rows)
	Calculates the Cholesky Decomposition of a matrix on an OpenCL.

__kernel void	stan::math::opencl_kernels::cumulative_sum1 (__global SCAL out_wgs, __global SCAL out_threads, __global SCAL *in, int size)
	First kernel of the cumulative sum implementation.

__kernel void	stan::math::opencl_kernels::cumulative_sum2 (__global SCAL *data, int size)
	Second kernel of the cumulative sum implementation.

__kernel void	stan::math::opencl_kernels::cumulative_sum3 (__global SCAL out, __global SCAL in_data, __global SCAL in_threads, __global SCAL in_wgs, int size)
	Third kernel of the cumulative sum implementation.

void	stan::math::opencl_kernels::atomic_add_double (__global double *val, double delta)
	Atomically add to a double value.

void	stan::math::opencl_kernels::local_atomic_add_double (__local double *val, double delta)
	Atomically add to a local double value.

double	stan::math::opencl_kernels::beta (double a, double b)
	Return the beta function applied to the specified arguments.

double	stan::math::opencl_kernels::digamma (double x)
	Calculates the digamma function - derivative of logarithm of gamma.

double	stan::math::opencl_kernels::inv_logit (double x)
	Returns the inverse logit function applied to the kernel generator expression.

double	stan::math::opencl_kernels::inv_Phi (double p)
	Return the inv_Phi function applied to the specified argument.

double	stan::math::opencl_kernels::inv_square (double x)
	Calculates `1 / (x*x)`

double	stan::math::opencl_kernels::log1m (double a)
	Calculates the natural logarithm of one minus the specified value.

double	stan::math::opencl_kernels::log1m_exp (double a)
	Calculates the natural logarithm of one minus the exponential of the specified value without overflow,.

double	stan::math::opencl_kernels::log1m_inv_logit (double x)
	Return the the natural logarithm of 1 minus the inverse logit applied to the kernel generator expression.

double	stan::math::opencl_kernels::log1p_exp (double a)
	Calculates the log of 1 plus the exponential of the specified value without overflow.

double	stan::math::opencl_kernels::log_diff_exp (double x, double y)
	The natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y.

double	stan::math::opencl_kernels::log_inv_logit (double x)
	Return the natural logarithm of the inverse logit of the specified argument.

double	stan::math::opencl_kernels::log_inv_logit_diff (double x, double y)
	Returns the natural logarithm of the difference of the inverse logits of the specified arguments.

double	stan::math::opencl_kernels::logit (double x)
	Return the log odds applied to the kernel generator expression.

double	stan::math::opencl_kernels::Phi (double x)
	Return the Phi function applied to the specified argument.

double	stan::math::opencl_kernels::Phi_approx (double x)
	Return the Phi_approx function applied to the specified argument.

__kernel void	stan::math::opencl_kernels::diag_inv (__global double A, __global double tmp_inv, int rows)
	Calculates inplace submatrix inversions along the matrix diagonal.

__kernel void	stan::math::opencl_kernels::divide_columns_vec (__global double A, __global double vec, int vec_size)
	Takes vector A and divides columns vector in A element-wise by the values in vec.

__kernel void	stan::math::opencl_kernels::fill_strict_tri (__global double *A, double val, unsigned int rows, unsigned int cols, unsigned int view_A)
	Stores constant in the triangular part of a matrix on the OpenCL device.

__kernel void	stan::math::opencl_kernels::gp_exp_quad_cov (const __global double x, __global double res, const double sigma_sq, const double neg_half_inv_l_sq, const int size, const int element_size)
	GPU part of calculation of squared exponential kernel.

__kernel void	stan::math::opencl_kernels::gp_exp_quad_cov_cross (const __global double x1, const __global double x2, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size1, const int size2, const int element_size)
	GPU part of calculation of squared exponential kernel.

__kernel void	stan::math::opencl_kernels::gp_exponential_cov (const __global double x, __global double res, const double sigma_sq, const double neg_inv_l, const int size, const int element_size)
	GPU part of calculation of Matern exponential kernel.

__kernel void	stan::math::opencl_kernels::gp_exponential_cov_cross (const __global double x1, const __global double x2, __global double *res, const double sigma_sq, const double neg_inv_l, const int size1, const int size2, const int element_size)
	GPU part of calculation of Matern exponential kernel.

__kernel void	stan::math::opencl_kernels::gp_matern32_cov (const __global double x, __global double res, const double sigma_sq, const double root_3_inv_l, const int size, const int element_size)
	GPU part of calculation of Matern 3/2 kernel.

__kernel void	stan::math::opencl_kernels::gp_matern32_cov_cross (const __global double x1, const __global double x2, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size1, const int size2, const int element_size)
	GPU part of calculation of Matern 3/2 kernel.

__kernel void	stan::math::opencl_kernels::gp_matern52_cov (const __global double x, __global double res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size, const int element_size)
	GPU part of calculation of Matern 5/2 kernel.

__kernel void	stan::math::opencl_kernels::gp_matern52_cov_cross (const __global double x1, const __global double x2, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size1, const int size2, const int element_size)
	GPU part of calculation of Matern 5/2 kernel.

__kernel void	stan::math::opencl_kernels::indexing_rev (__global double adj, const __global int index, const __global double *res, int size)
	Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.

__kernel void	stan::math::opencl_kernels::indexing_rev (__global double adj, const __global int index, const __global double res, __local double adj_loc, int index_size, int adj_size)
	Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.

__kernel void	stan::math::opencl_kernels::inv_lower_tri_multiply (__global double A, __global double temp, const int A_rows, const int rows)
	Calculates B = C * A.

__kernel void	stan::math::opencl_kernels::matrix_multiply (const __global double A, const __global double B, __global double *C, const int M, const int N, const int K, unsigned int view_A, unsigned int view_B)
	Matrix multiplication on the OpenCL device.

__kernel void	stan::math::opencl_kernels::row_vector_matrix_multiply (const __global double A, const __global double B, __global double *R, const int N, const int K, unsigned int view_A, unsigned int view_B)
	Row vector-matrix multiplication R=A*B on the OpenCL device.

__kernel void	stan::math::opencl_kernels::merge_step (__global SCAL output, __global SCAL input, int run_len, int size, int tasks)
	Merges sorted runs into longer sorted runs.

__kernel void	stan::math::opencl_kernels::multiply_transpose (const __global double A, __global double B, const int M, const int N)
	Matrix multiplication of the form A*A^T on the OpenCL device.

__kernel void	stan::math::opencl_kernels::neg_binomial_2_log_glm (__global double logp_global, __global double theta_derivative_global, __global double theta_derivative_sum, __global double phi_derivative_global, const __global int y_global, const __global double x, const __global double alpha, const __global double beta, const __global double *phi_global, const int N, const int M, const int is_y_vector, const int is_alpha_vector, const int is_phi_vector, const int need_theta_derivative, const int need_theta_derivative_sum, const int need_phi_derivative, const int need_phi_derivative_sum, const int need_logp1, const int need_logp2, const int need_logp3, const int need_logp4)
	GPU implementation of Generalized Linear Model (GLM) with Negative-Binomial-2 distribution and log link function.

__kernel void	stan::math::opencl_kernels::neg_rect_lower_tri_multiply (__global double A, const __global double temp, const int A_rows, const int rows)
	Calculates C = -B * A where B is rectangular and A is a lower triangular.

__kernel void	stan::math::opencl_kernels::ordered_logistic_glm (__global double location_sum, __global double logp_global, __global double location_derivative, __global double cuts_derivative, const __global int y_global, const __global double x, const __global double beta, const __global double cuts, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_location_derivative, const int need_cuts_derivative)
	GPU implementation of ordinal regression Generalized Linear Model (GLM).

__kernel void	stan::math::opencl_kernels::ordered_logistic (__global double logp_global, __global double lambda_derivative, __global double cuts_derivative, const __global int y_global, const __global double lambda_global, const __global double cuts, const int N_instances, const int N_classes, const int is_y_vector, const int is_cuts_matrix, const int need_lambda_derivative, const int need_cuts_derivative)
	GPU implementation of ordinal regression.

__kernel void	stan::math::opencl_kernels::pack (__global double A, __global double B, unsigned int rows, unsigned int cols, unsigned int view)
	Packs a flat matrix to a packed triangular matrix.

__kernel void	stan::math::opencl_kernels::rep_matrix_rev (__global double A_adj, __global double B_adj, unsigned int B_rows, unsigned int B_cols, unsigned int view_B)
	Implements reverse pass of rep_matrix.

__kernel void	stan::math::opencl_kernels::unpack (__global double B, __global double A, unsigned int rows, unsigned int cols, unsigned int view)
	Unpacks a packed triangular matrix to a flat matrix.

int	stan::math::either (int left_view, int right_view)
	Determines which parts are nonzero in any of the input views.

int	stan::math::both (int left_view, int right_view)
	Determines which parts are nonzero in both input views.

bool	stan::math::contains_nonzero (int view, int part)
	Check whether a view contains certain nonzero part.

Variables
const kernel_cl< out_buffer, in_buffer, int, int, int >	stan::math::opencl_kernels::add_batch ("add_batch", {indexing_helpers, add_batch_kernel_code})
	See the docs for add_batch() .

const kernel_cl< out_buffer, int, int >	stan::math::opencl_kernels::batch_identity ("batch_identity", {indexing_helpers, batch_identity_kernel_code})
	See the docs for batch_identity() .

const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int >	stan::math::opencl_kernels::categorical_logit_glm ("categorical_logit_glm", {categorical_logit_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
	See the docs for categorical_logit_glm() .

const kernel_cl< in_out_buffer, in_out_buffer, in_buffer, in_buffer, int, int, int, int >	stan::math::opencl_kernels::categorical_logit_glm_beta_derivative ("categorical_logit_glm_beta_derivative", {categorical_logit_glm_beta_derivative_kernel_code})
	See the docs for categorical_logit_glm_beta_derivative() .

const kernel_cl< in_buffer, out_buffer, int, int, const double >	stan::math::opencl_kernels::check_symmetric ("is_symmetric", {indexing_helpers, is_symmetric_kernel_code})
	See the docs for check_symmetric() .

const kernel_cl< in_out_buffer, int >	stan::math::opencl_kernels::cholesky_decompose ("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code})
	See the docs for cholesky_decompose() .

const kernel_cl< in_out_buffer, in_out_buffer, int >	stan::math::opencl_kernels::diag_inv ("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})
	See the docs for add() .

const kernel_cl< out_buffer, in_buffer, int >	stan::math::opencl_kernels::divide_columns_vec ("divide_columns_vec", {indexing_helpers, divide_columns_kernel_code})
	See the docs for divide_columns_vec() .

const kernel_cl< out_buffer, double, int, int, matrix_cl_view >	stan::math::opencl_kernels::fill_strict_tri ("fill_strict_tri", {indexing_helpers, view_kernel_helpers, fill_strict_tri_kernel_code})
	See the docs for fill_strict_tri_kernel_code() .

const kernel_cl< in_buffer, out_buffer, double, double, int, int >	stan::math::opencl_kernels::gp_exp_quad_cov ("gp_exp_quad_cov", {gp_exp_quad_cov_kernel_code})
	See the docs for gp_exp_quad_cov() .

const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int >	stan::math::opencl_kernels::gp_exp_quad_cov_cross ("gp_exp_quad_cov_cross", {gp_exp_quad_cov_cross_kernel_code})
	See the docs for gp_exp_quad_cov_cross() .

const kernel_cl< in_buffer, out_buffer, double, double, int, int >	stan::math::opencl_kernels::gp_exponential_cov ("gp_exponential_cov", {gp_exponential_cov_kernel_code})
	See the docs for gp_exponential_cov() .

const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int >	stan::math::opencl_kernels::gp_exponential_cov_cross ("gp_exponential_cov_cross", {gp_exponential_cov_cross_kernel_code})
	See the docs for gp_exponential_cov_cross() .

const kernel_cl< in_buffer, out_buffer, double, double, int, int >	stan::math::opencl_kernels::gp_matern32_cov ("gp_matern32_cov", {gp_matern32_cov_kernel_code})
	See the docs for gp_matern32_cov() .

const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int >	stan::math::opencl_kernels::gp_matern32_cov_cross ("gp_matern32_cov_cross", {gp_matern32_cov_cross_kernel_code})
	See the docs for gp_matern32_cov_cross() .

const kernel_cl< in_buffer, out_buffer, double, double, double, int, int >	stan::math::opencl_kernels::gp_matern52_cov ("gp_matern52_cov", {gp_matern52_cov_kernel_code})
	See the docs for gp_matern52_cov() .

const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, double, int, int, int >	stan::math::opencl_kernels::gp_matern52_cov_cross ("gp_matern52_cov_cross", {gp_matern52_cov_cross_kernel_code})
	See the docs for gp_matern52_cov_cross() .

const kernel_cl< in_out_buffer, in_buffer, in_buffer, int >	stan::math::opencl_kernels::indexing_rev_global_atomic ("indexing_rev", {atomic_add_double_device_function, indexing_rev_global_atomic_kernel_code})
	See the docs for add_batch() .

const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int >	stan::math::opencl_kernels::indexing_rev_local_atomic ("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_atomic_kernel_code})
	See the docs for add_batch() .

const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int >	stan::math::opencl_kernels::indexing_rev_local_independent ("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_independent_kernel_code})
	See the docs for add_batch() .

const kernel_cl< in_buffer, out_buffer, int, int >	stan::math::opencl_kernels::inv_lower_tri_multiply ("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
	See the docs for add() .

const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, int, matrix_cl_view, matrix_cl_view >	stan::math::opencl_kernels::matrix_multiply ("matrix_multiply", {thread_block_helpers, view_kernel_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
	See the docs for matrix_multiply() .

const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, matrix_cl_view, matrix_cl_view >	stan::math::opencl_kernels::row_vector_matrix_multiply ("row_vector_matrix_multiply", {view_kernel_helpers, row_vector_matrix_multiply_kernel_code}, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}})
	See the docs for row_vector_matrix_multiply() .

const kernel_cl< in_buffer, out_buffer, int, int >	stan::math::opencl_kernels::multiply_transpose ("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}})
	See the docs for add() .

const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int, int, int, int, int, int, int, int >	stan::math::opencl_kernels::neg_binomial_2_log_glm ("neg_binomial_2_log_glm", {digamma_device_function, log1p_exp_device_function, neg_binomial_2_log_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
	See the docs for neg_binomial_2_log_glm_lpmf() .

const kernel_cl< in_out_buffer, in_buffer, int, int >	stan::math::opencl_kernels::neg_rect_lower_tri_multiply ("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
	See the docs for neg_rect_lower_tri_multiply() .

const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int >	stan::math::opencl_kernels::ordered_logistic_glm ("ordered_logistic_glm", {log1p_exp_device_function, log1m_exp_device_function, inv_logit_device_function, ordered_logistic_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
	See the docs for ordered_logistic_glm() .

const kernel_cl< out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int >	stan::math::opencl_kernels::ordered_logistic ("ordered_logistic", {log1p_exp_device_function, log1m_exp_device_function, inv_logit_device_function, ordered_logistic_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
	See the docs for ordered_logistic() .

const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view >	stan::math::opencl_kernels::pack ("pack", {indexing_helpers, pack_kernel_code})
	See the docs for pack() .

const kernel_cl< in_out_buffer, in_buffer, int, int, matrix_cl_view >	stan::math::opencl_kernels::rep_matrix_rev ("rep_matrix_rev", {view_kernel_helpers, rep_matrix_rev_kernel_code})
	See the docs for rep_matrix_rev() .

const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view >	stan::math::opencl_kernels::unpack ("unpack", {indexing_helpers, unpack_kernel_code})
	See the docs for unpack() .

Table of contents

Detailed Description

Functions

Variables