Automatic Differentiation
 
Loading...
Searching...
No Matches
Custom OpenCL kernels

Detailed Description

Functions

__kernel void stan::math::opencl_kernels::add_batch (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int batch_size)
 Sums a batch of matrices.
 
__kernel void stan::math::opencl_kernels::batch_identity (__global double *A, unsigned int batch_rows, unsigned int size)
 Makes a batch of smaller identity matrices inside the input matrix.
 
__kernel void stan::math::opencl_kernels::categorical_logit_glm (__global double *logp_global, __global double *exp_lin_global, __global double *inv_sum_exp_lin_global, __global double *neg_softmax_lin_global, __global double *alpha_derivative, const __global int *y_global, const __global double *x_beta_global, const __global double *alpha_global, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_alpha_derivative, const int need_neg_softmax_lin_global)
 GPU implementation of Generalized Linear Model (GLM) with categorical distribution and logit (softmax) link function.
 
__kernel void stan::math::opencl_kernels::categorical_logit_glm_beta_derivative (__global double *beta_derivative, __global double *temp, const __global int *y, const __global double *x, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector)
 Calculates derivative wrt beta.
 
__kernel void stan::math::opencl_kernels::is_symmetric (__global double *A, __global int *flag, unsigned int rows, unsigned int cols, double tolerance)
 Check if the matrix_cl is symmetric.
 
__kernel void stan::math::opencl_kernels::cholesky_decompose (__global double *A, int rows)
 Calculates the Cholesky Decomposition of a matrix on an OpenCL.
 
__kernel void stan::math::opencl_kernels::cumulative_sum1 (__global SCAL *out_wgs, __global SCAL *out_threads, __global SCAL *in, int size)
 First kernel of the cumulative sum implementation.
 
__kernel void stan::math::opencl_kernels::cumulative_sum2 (__global SCAL *data, int size)
 Second kernel of the cumulative sum implementation.
 
__kernel void stan::math::opencl_kernels::cumulative_sum3 (__global SCAL *out, __global SCAL *in_data, __global SCAL *in_threads, __global SCAL *in_wgs, int size)
 Third kernel of the cumulative sum implementation.
 
void stan::math::opencl_kernels::atomic_add_double (__global double *val, double delta)
 Atomically add to a double value.
 
void stan::math::opencl_kernels::local_atomic_add_double (__local double *val, double delta)
 Atomically add to a local double value.
 
double stan::math::opencl_kernels::beta (double a, double b)
 Return the beta function applied to the specified arguments.
 
double stan::math::opencl_kernels::digamma (double x)
 Calculates the digamma function - derivative of logarithm of gamma.
 
double stan::math::opencl_kernels::inv_logit (double x)
 Returns the inverse logit function applied to the kernel generator expression.
 
double stan::math::opencl_kernels::inv_Phi (double p)
 Return the inv_Phi function applied to the specified argument.
 
double stan::math::opencl_kernels::inv_square (double x)
 Calculates 1 / (x*x)
 
double stan::math::opencl_kernels::log1m (double a)
 Calculates the natural logarithm of one minus the specified value.
 
double stan::math::opencl_kernels::log1m_exp (double a)
 Calculates the natural logarithm of one minus the exponential of the specified value without overflow,.
 
double stan::math::opencl_kernels::log1m_inv_logit (double x)
 Return the the natural logarithm of 1 minus the inverse logit applied to the kernel generator expression.
 
double stan::math::opencl_kernels::log1p_exp (double a)
 Calculates the log of 1 plus the exponential of the specified value without overflow.
 
double stan::math::opencl_kernels::log_diff_exp (double x, double y)
 The natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y.
 
double stan::math::opencl_kernels::log_inv_logit (double x)
 Return the natural logarithm of the inverse logit of the specified argument.
 
double stan::math::opencl_kernels::log_inv_logit_diff (double x, double y)
 Returns the natural logarithm of the difference of the inverse logits of the specified arguments.
 
double stan::math::opencl_kernels::logit (double x)
 Return the log odds applied to the kernel generator expression.
 
double stan::math::opencl_kernels::Phi (double x)
 Return the Phi function applied to the specified argument.
 
double stan::math::opencl_kernels::Phi_approx (double x)
 Return the Phi_approx function applied to the specified argument.
 
__kernel void stan::math::opencl_kernels::diag_inv (__global double *A, __global double *tmp_inv, int rows)
 Calculates inplace submatrix inversions along the matrix diagonal.
 
__kernel void stan::math::opencl_kernels::divide_columns_vec (__global double *A, __global double *vec, int vec_size)
 Takes vector A and divides columns vector in A element-wise by the values in vec.
 
__kernel void stan::math::opencl_kernels::fill_strict_tri (__global double *A, double val, unsigned int rows, unsigned int cols, unsigned int view_A)
 Stores constant in the triangular part of a matrix on the OpenCL device.
 
__kernel void stan::math::opencl_kernels::gp_exp_quad_cov (const __global double *x, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size, const int element_size)
 GPU part of calculation of squared exponential kernel.
 
__kernel void stan::math::opencl_kernels::gp_exp_quad_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size1, const int size2, const int element_size)
 GPU part of calculation of squared exponential kernel.
 
__kernel void stan::math::opencl_kernels::gp_exponential_cov (const __global double *x, __global double *res, const double sigma_sq, const double neg_inv_l, const int size, const int element_size)
 GPU part of calculation of Matern exponential kernel.
 
__kernel void stan::math::opencl_kernels::gp_exponential_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double neg_inv_l, const int size1, const int size2, const int element_size)
 GPU part of calculation of Matern exponential kernel.
 
__kernel void stan::math::opencl_kernels::gp_matern32_cov (const __global double *x, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size, const int element_size)
 GPU part of calculation of Matern 3/2 kernel.
 
__kernel void stan::math::opencl_kernels::gp_matern32_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size1, const int size2, const int element_size)
 GPU part of calculation of Matern 3/2 kernel.
 
__kernel void stan::math::opencl_kernels::gp_matern52_cov (const __global double *x, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size, const int element_size)
 GPU part of calculation of Matern 5/2 kernel.
 
__kernel void stan::math::opencl_kernels::gp_matern52_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size1, const int size2, const int element_size)
 GPU part of calculation of Matern 5/2 kernel.
 
__kernel void stan::math::opencl_kernels::indexing_rev (__global double *adj, const __global int *index, const __global double *res, int size)
 Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.
 
__kernel void stan::math::opencl_kernels::indexing_rev (__global double *adj, const __global int *index, const __global double *res, __local double *adj_loc, int index_size, int adj_size)
 Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.
 
__kernel void stan::math::opencl_kernels::inv_lower_tri_multiply (__global double *A, __global double *temp, const int A_rows, const int rows)
 Calculates B = C * A.
 
__kernel void stan::math::opencl_kernels::matrix_multiply (const __global double *A, const __global double *B, __global double *C, const int M, const int N, const int K, unsigned int view_A, unsigned int view_B)
 Matrix multiplication on the OpenCL device.
 
__kernel void stan::math::opencl_kernels::row_vector_matrix_multiply (const __global double *A, const __global double *B, __global double *R, const int N, const int K, unsigned int view_A, unsigned int view_B)
 Row vector-matrix multiplication R=A*B on the OpenCL device.
 
__kernel void stan::math::opencl_kernels::merge_step (__global SCAL *output, __global SCAL *input, int run_len, int size, int tasks)
 Merges sorted runs into longer sorted runs.
 
__kernel void stan::math::opencl_kernels::multiply_transpose (const __global double *A, __global double *B, const int M, const int N)
 Matrix multiplication of the form A*A^T on the OpenCL device.
 
__kernel void stan::math::opencl_kernels::neg_binomial_2_log_glm (__global double *logp_global, __global double *theta_derivative_global, __global double *theta_derivative_sum, __global double *phi_derivative_global, const __global int *y_global, const __global double *x, const __global double *alpha, const __global double *beta, const __global double *phi_global, const int N, const int M, const int is_y_vector, const int is_alpha_vector, const int is_phi_vector, const int need_theta_derivative, const int need_theta_derivative_sum, const int need_phi_derivative, const int need_phi_derivative_sum, const int need_logp1, const int need_logp2, const int need_logp3, const int need_logp4)
 GPU implementation of Generalized Linear Model (GLM) with Negative-Binomial-2 distribution and log link function.
 
__kernel void stan::math::opencl_kernels::neg_rect_lower_tri_multiply (__global double *A, const __global double *temp, const int A_rows, const int rows)
 Calculates C = -B * A where B is rectangular and A is a lower triangular.
 
__kernel void stan::math::opencl_kernels::ordered_logistic_glm (__global double *location_sum, __global double *logp_global, __global double *location_derivative, __global double *cuts_derivative, const __global int *y_global, const __global double *x, const __global double *beta, const __global double *cuts, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_location_derivative, const int need_cuts_derivative)
 GPU implementation of ordinal regression Generalized Linear Model (GLM).
 
__kernel void stan::math::opencl_kernels::ordered_logistic (__global double *logp_global, __global double *lambda_derivative, __global double *cuts_derivative, const __global int *y_global, const __global double *lambda_global, const __global double *cuts, const int N_instances, const int N_classes, const int is_y_vector, const int is_cuts_matrix, const int need_lambda_derivative, const int need_cuts_derivative)
 GPU implementation of ordinal regression.
 
__kernel void stan::math::opencl_kernels::pack (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int view)
 Packs a flat matrix to a packed triangular matrix.
 
__kernel void stan::math::opencl_kernels::rep_matrix_rev (__global double *A_adj, __global double *B_adj, unsigned int B_rows, unsigned int B_cols, unsigned int view_B)
 Implements reverse pass of rep_matrix.
 
__kernel void stan::math::opencl_kernels::unpack (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int view)
 Unpacks a packed triangular matrix to a flat matrix.
 
int stan::math::either (int left_view, int right_view)
 Determines which parts are nonzero in any of the input views.
 
int stan::math::both (int left_view, int right_view)
 Determines which parts are nonzero in both input views.
 
bool stan::math::contains_nonzero (int view, int part)
 Check whether a view contains certain nonzero part.
 

Variables

const kernel_cl< out_buffer, in_buffer, int, int, int > stan::math::opencl_kernels::add_batch ("add_batch", {indexing_helpers, add_batch_kernel_code})
 See the docs for add_batch() .
 
const kernel_cl< out_buffer, int, int > stan::math::opencl_kernels::batch_identity ("batch_identity", {indexing_helpers, batch_identity_kernel_code})
 See the docs for batch_identity() .
 
const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int > stan::math::opencl_kernels::categorical_logit_glm ("categorical_logit_glm", {categorical_logit_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
 See the docs for categorical_logit_glm() .
 
const kernel_cl< in_out_buffer, in_out_buffer, in_buffer, in_buffer, int, int, int, int > stan::math::opencl_kernels::categorical_logit_glm_beta_derivative ("categorical_logit_glm_beta_derivative", {categorical_logit_glm_beta_derivative_kernel_code})
 See the docs for categorical_logit_glm_beta_derivative() .
 
const kernel_cl< in_buffer, out_buffer, int, int, const double > stan::math::opencl_kernels::check_symmetric ("is_symmetric", {indexing_helpers, is_symmetric_kernel_code})
 See the docs for check_symmetric() .
 
const kernel_cl< in_out_buffer, int > stan::math::opencl_kernels::cholesky_decompose ("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code})
 See the docs for cholesky_decompose() .
 
const kernel_cl< in_out_buffer, in_out_buffer, int > stan::math::opencl_kernels::diag_inv ("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})
 See the docs for add() .
 
const kernel_cl< out_buffer, in_buffer, int > stan::math::opencl_kernels::divide_columns_vec ("divide_columns_vec", {indexing_helpers, divide_columns_kernel_code})
 See the docs for divide_columns_vec() .
 
const kernel_cl< out_buffer, double, int, int, matrix_cl_viewstan::math::opencl_kernels::fill_strict_tri ("fill_strict_tri", {indexing_helpers, view_kernel_helpers, fill_strict_tri_kernel_code})
 See the docs for fill_strict_tri_kernel_code() .
 
const kernel_cl< in_buffer, out_buffer, double, double, int, int > stan::math::opencl_kernels::gp_exp_quad_cov ("gp_exp_quad_cov", {gp_exp_quad_cov_kernel_code})
 See the docs for gp_exp_quad_cov() .
 
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > stan::math::opencl_kernels::gp_exp_quad_cov_cross ("gp_exp_quad_cov_cross", {gp_exp_quad_cov_cross_kernel_code})
 See the docs for gp_exp_quad_cov_cross() .
 
const kernel_cl< in_buffer, out_buffer, double, double, int, int > stan::math::opencl_kernels::gp_exponential_cov ("gp_exponential_cov", {gp_exponential_cov_kernel_code})
 See the docs for gp_exponential_cov() .
 
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > stan::math::opencl_kernels::gp_exponential_cov_cross ("gp_exponential_cov_cross", {gp_exponential_cov_cross_kernel_code})
 See the docs for gp_exponential_cov_cross() .
 
const kernel_cl< in_buffer, out_buffer, double, double, int, int > stan::math::opencl_kernels::gp_matern32_cov ("gp_matern32_cov", {gp_matern32_cov_kernel_code})
 See the docs for gp_matern32_cov() .
 
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > stan::math::opencl_kernels::gp_matern32_cov_cross ("gp_matern32_cov_cross", {gp_matern32_cov_cross_kernel_code})
 See the docs for gp_matern32_cov_cross() .
 
const kernel_cl< in_buffer, out_buffer, double, double, double, int, int > stan::math::opencl_kernels::gp_matern52_cov ("gp_matern52_cov", {gp_matern52_cov_kernel_code})
 See the docs for gp_matern52_cov() .
 
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, double, int, int, int > stan::math::opencl_kernels::gp_matern52_cov_cross ("gp_matern52_cov_cross", {gp_matern52_cov_cross_kernel_code})
 See the docs for gp_matern52_cov_cross() .
 
const kernel_cl< in_out_buffer, in_buffer, in_buffer, int > stan::math::opencl_kernels::indexing_rev_global_atomic ("indexing_rev", {atomic_add_double_device_function, indexing_rev_global_atomic_kernel_code})
 See the docs for add_batch() .
 
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > stan::math::opencl_kernels::indexing_rev_local_atomic ("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_atomic_kernel_code})
 See the docs for add_batch() .
 
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > stan::math::opencl_kernels::indexing_rev_local_independent ("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_independent_kernel_code})
 See the docs for add_batch() .
 
const kernel_cl< in_buffer, out_buffer, int, int > stan::math::opencl_kernels::inv_lower_tri_multiply ("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
 See the docs for add() .
 
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, int, matrix_cl_view, matrix_cl_viewstan::math::opencl_kernels::matrix_multiply ("matrix_multiply", {thread_block_helpers, view_kernel_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
 See the docs for matrix_multiply() .
 
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, matrix_cl_view, matrix_cl_viewstan::math::opencl_kernels::row_vector_matrix_multiply ("row_vector_matrix_multiply", {view_kernel_helpers, row_vector_matrix_multiply_kernel_code}, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}})
 See the docs for row_vector_matrix_multiply() .
 
const kernel_cl< in_buffer, out_buffer, int, int > stan::math::opencl_kernels::multiply_transpose ("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}})
 See the docs for add() .
 
const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int, int, int, int, int, int, int, int > stan::math::opencl_kernels::neg_binomial_2_log_glm ("neg_binomial_2_log_glm", {digamma_device_function, log1p_exp_device_function, neg_binomial_2_log_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
 See the docs for neg_binomial_2_log_glm_lpmf() .
 
const kernel_cl< in_out_buffer, in_buffer, int, int > stan::math::opencl_kernels::neg_rect_lower_tri_multiply ("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
 See the docs for neg_rect_lower_tri_multiply() .
 
const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int > stan::math::opencl_kernels::ordered_logistic_glm ("ordered_logistic_glm", {log1p_exp_device_function, log1m_exp_device_function, inv_logit_device_function, ordered_logistic_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
 See the docs for ordered_logistic_glm() .
 
const kernel_cl< out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int > stan::math::opencl_kernels::ordered_logistic ("ordered_logistic", {log1p_exp_device_function, log1m_exp_device_function, inv_logit_device_function, ordered_logistic_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}})
 See the docs for ordered_logistic() .
 
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_viewstan::math::opencl_kernels::pack ("pack", {indexing_helpers, pack_kernel_code})
 See the docs for pack() .
 
const kernel_cl< in_out_buffer, in_buffer, int, int, matrix_cl_viewstan::math::opencl_kernels::rep_matrix_rev ("rep_matrix_rev", {view_kernel_helpers, rep_matrix_rev_kernel_code})
 See the docs for rep_matrix_rev() .
 
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_viewstan::math::opencl_kernels::unpack ("unpack", {indexing_helpers, unpack_kernel_code})
 See the docs for unpack() .