|
auto | compile_kernel (const char *name, const std::vector< std::string > &sources, const std::unordered_map< std::string, int > &options) |
| Compile an OpenCL kernel.
|
|
__kernel void | add_batch (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int batch_size) |
| Sums a batch of matrices.
|
|
__kernel void | batch_identity (__global double *A, unsigned int batch_rows, unsigned int size) |
| Makes a batch of smaller identity matrices inside the input matrix.
|
|
__kernel void | categorical_logit_glm (__global double *logp_global, __global double *exp_lin_global, __global double *inv_sum_exp_lin_global, __global double *neg_softmax_lin_global, __global double *alpha_derivative, const __global int *y_global, const __global double *x_beta_global, const __global double *alpha_global, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_alpha_derivative, const int need_neg_softmax_lin_global) |
| GPU implementation of Generalized Linear Model (GLM) with categorical distribution and logit (softmax) link function.
|
|
__kernel void | categorical_logit_glm_beta_derivative (__global double *beta_derivative, __global double *temp, const __global int *y, const __global double *x, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector) |
| Calculates derivative wrt beta.
|
|
__kernel void | is_symmetric (__global double *A, __global int *flag, unsigned int rows, unsigned int cols, double tolerance) |
| Check if the matrix_cl is symmetric.
|
|
__kernel void | cholesky_decompose (__global double *A, int rows) |
| Calculates the Cholesky Decomposition of a matrix on an OpenCL.
|
|
__kernel void | cumulative_sum1 (__global SCAL *out_wgs, __global SCAL *out_threads, __global SCAL *in, int size) |
| First kernel of the cumulative sum implementation.
|
|
__kernel void | cumulative_sum2 (__global SCAL *data, int size) |
| Second kernel of the cumulative sum implementation.
|
|
__kernel void | cumulative_sum3 (__global SCAL *out, __global SCAL *in_data, __global SCAL *in_threads, __global SCAL *in_wgs, int size) |
| Third kernel of the cumulative sum implementation.
|
|
void | atomic_add_double (__global double *val, double delta) |
| Atomically add to a double value.
|
|
void | local_atomic_add_double (__local double *val, double delta) |
| Atomically add to a local double value.
|
|
double | beta (double a, double b) |
| Return the beta function applied to the specified arguments.
|
|
double | binomial_coefficient_log (double n, double k) |
| Return the log of the binomial coefficient for the specified arguments.
|
|
double | digamma (double x) |
| Calculates the digamma function - derivative of logarithm of gamma.
|
|
double | inv_logit (double x) |
| Returns the inverse logit function applied to the kernel generator expression.
|
|
double | inv_Phi (double p) |
| Return the inv_Phi function applied to the specified argument.
|
|
double | inv_square (double x) |
| Calculates 1 / (x*x)
|
|
double | lbeta (double a, double b) |
| Return the log of the beta function applied to the specified arguments.
|
|
double | lgamma_stirling (double x) |
| Return the Stirling approximation to the lgamma function.
|
|
double | lgamma_stirling_diff (double x) |
| Return the difference between log of the gamma function and its Stirling approximation.
|
|
double | lmultiply (double a, double b) |
| Return the first argument times the natural log of the second argument if either argument is non-zero and 0 if both arguments are 0.
|
|
double | log1m (double a) |
| Calculates the natural logarithm of one minus the specified value.
|
|
double | log1m_exp (double a) |
| Calculates the natural logarithm of one minus the exponential of the specified value without overflow,.
|
|
double | log1m_inv_logit (double x) |
| Return the the natural logarithm of 1 minus the inverse logit applied to the kernel generator expression.
|
|
double | log1p_exp (double a) |
| Calculates the log of 1 plus the exponential of the specified value without overflow.
|
|
double | log_diff_exp (double x, double y) |
| The natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y.
|
|
double | log_inv_logit (double x) |
| Return the natural logarithm of the inverse logit of the specified argument.
|
|
double | log_inv_logit_diff (double x, double y) |
| Returns the natural logarithm of the difference of the inverse logits of the specified arguments.
|
|
double | logit (double x) |
| Return the log odds applied to the kernel generator expression.
|
|
double | multiply_log (double a, double b) |
| Calculate the value of the first argument times log of the second argument while behaving properly with 0 inputs.
|
|
double | Phi (double x) |
| Return the Phi function applied to the specified argument.
|
|
double | Phi_approx (double x) |
| Return the Phi_approx function applied to the specified argument.
|
|
double | trigamma (double x) |
| Return the trigamma function applied to the argument.
|
|
__kernel void | diag_inv (__global double *A, __global double *tmp_inv, int rows) |
| Calculates inplace submatrix inversions along the matrix diagonal.
|
|
__kernel void | divide_columns_vec (__global double *A, __global double *vec, int vec_size) |
| Takes vector A and divides columns vector in A element-wise by the values in vec.
|
|
__kernel void | fill_strict_tri (__global double *A, double val, unsigned int rows, unsigned int cols, unsigned int view_A) |
| Stores constant in the triangular part of a matrix on the OpenCL device.
|
|
__kernel void | gp_exp_quad_cov (const __global double *x, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size, const int element_size) |
| GPU part of calculation of squared exponential kernel.
|
|
__kernel void | gp_exp_quad_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size1, const int size2, const int element_size) |
| GPU part of calculation of squared exponential kernel.
|
|
__kernel void | gp_exponential_cov (const __global double *x, __global double *res, const double sigma_sq, const double neg_inv_l, const int size, const int element_size) |
| GPU part of calculation of Matern exponential kernel.
|
|
__kernel void | gp_exponential_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double neg_inv_l, const int size1, const int size2, const int element_size) |
| GPU part of calculation of Matern exponential kernel.
|
|
__kernel void | gp_matern32_cov (const __global double *x, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size, const int element_size) |
| GPU part of calculation of Matern 3/2 kernel.
|
|
__kernel void | gp_matern32_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size1, const int size2, const int element_size) |
| GPU part of calculation of Matern 3/2 kernel.
|
|
__kernel void | gp_matern52_cov (const __global double *x, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size, const int element_size) |
| GPU part of calculation of Matern 5/2 kernel.
|
|
__kernel void | gp_matern52_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size1, const int size2, const int element_size) |
| GPU part of calculation of Matern 5/2 kernel.
|
|
__kernel void | indexing_rev (__global double *adj, const __global int *index, const __global double *res, int size) |
| Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.
|
|
__kernel void | indexing_rev (__global double *adj, const __global int *index, const __global double *res, __local double *adj_loc, int index_size, int adj_size) |
| Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.
|
|
__kernel void | inv_lower_tri_multiply (__global double *A, __global double *temp, const int A_rows, const int rows) |
| Calculates B = C * A.
|
|
__kernel void | matrix_multiply (const __global double *A, const __global double *B, __global double *C, const int M, const int N, const int K, unsigned int view_A, unsigned int view_B) |
| Matrix multiplication on the OpenCL device.
|
|
__kernel void | row_vector_matrix_multiply (const __global double *A, const __global double *B, __global double *R, const int N, const int K, unsigned int view_A, unsigned int view_B) |
| Row vector-matrix multiplication R=A*B on the OpenCL device.
|
|
void | merge (__global SCAL *A, __global SCAL *B, __global SCAL *res, int A_size, int B_size) |
| Merges two sorted runs into a single sorted run of combined length.
|
|
int | binary_search (__global SCAL *input, int start, int end, SCAL value) |
| Searches for the index of the element that is larger than or equal to given value in given range.
|
|
__kernel void | merge_step (__global SCAL *output, __global SCAL *input, int run_len, int size, int tasks) |
| Merges sorted runs into longer sorted runs.
|
|
int | get_sturm_count_tri (const __global double *diagonal, const __global double *subdiagonal_squared, const double shift, const int n) |
| Calculates lower Sturm count of a tridiagonal matrix T - number of eigenvalues lower than shift.
|
|
void | eigenvals_bisect (const __global double *diagonal, const __global double *subdiagonal_squared, double *low_res, double *high_res, const double min_eigval, const double max_eigval, const int n, const int i) |
| Calculates i-th largest eigenvalue of tridiagonal matrix represented by a LDL decomposition using bisection.
|
|
int | get_sturm_count_ldl (const __global double_d *l, const __global double_d *d, const double_d shift, const int n) |
| Calculates Sturm count of a LDL decomposition of a tridiagonal matrix - number of eigenvalues larger or equal to shift.
|
|
void | eigenvals_bisect_refine (const __global double_d *l, const __global double_d *d, double_d *low_res, double_d *high_res, const int n, const int i) |
| Refines bounds on the i-th largest eigenvalue of a LDL decomposition using bisection.
|
|
__kernel void | eigenvals (const __global double *diagonal, const __global double *subdiagonal_squared, const __global double_d *l, const __global double_d *d, __global double *eigval_global, __global double_d *shifted_low_global, __global double_d *shifted_high_global, const double min_eigval, const double max_eigval, const double shift, const char do_refine) |
| Calculates eigenvalues of a tridiagonal matrix T and refines shifted eigenvalues using shifted LDL decomposition of T.
|
|
int | get_twisted_factorization (const __global double_d *l, const __global double_d *d, double_d shift, __global double_d *l_plus, __global double_d *u_minus, __global double_d *s) |
| Calculates shifted LDL and UDU factorizations.
|
|
void | calculate_eigenvector (const __global double_d *l_plus, const __global double_d *u_minus, const __global double *subdiag, int twist_idx, __global double *eigenvectors) |
| Calculates an eigenvector from twisted factorization T - shift * I = L+.
|
|
__kernel void | get_eigenvectors (const __global double_d *l, const __global double_d *d, const __global double *subdiag, const __global double_d *shifted_eigvals, __global double_d *l_plus, __global double_d *u_minus, __global double_d *temp, __global double *eigenvectors) |
| Calculates eigenvectors for (shifted) eigenvalues.
|
|
__kernel void | multiply_transpose (const __global double *A, __global double *B, const int M, const int N) |
| Matrix multiplication of the form A*A^T on the OpenCL device.
|
|
__kernel void | neg_binomial_2_log_glm (__global double *logp_global, __global double *theta_derivative_global, __global double *theta_derivative_sum, __global double *phi_derivative_global, const __global int *y_global, const __global double *x, const __global double *alpha, const __global double *beta, const __global double *phi_global, const int N, const int M, const int is_y_vector, const int is_alpha_vector, const int is_phi_vector, const int need_theta_derivative, const int need_theta_derivative_sum, const int need_phi_derivative, const int need_phi_derivative_sum, const int need_logp1, const int need_logp2, const int need_logp3, const int need_logp4) |
| GPU implementation of Generalized Linear Model (GLM) with Negative-Binomial-2 distribution and log link function.
|
|
__kernel void | neg_rect_lower_tri_multiply (__global double *A, const __global double *temp, const int A_rows, const int rows) |
| Calculates C = -B * A where B is rectangular and A is a lower triangular.
|
|
__kernel void | ordered_logistic_glm (__global double *location_sum, __global double *logp_global, __global double *location_derivative, __global double *cuts_derivative, const __global int *y_global, const __global double *x, const __global double *beta, const __global double *cuts, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_location_derivative, const int need_cuts_derivative) |
| GPU implementation of ordinal regression Generalized Linear Model (GLM).
|
|
__kernel void | ordered_logistic (__global double *logp_global, __global double *lambda_derivative, __global double *cuts_derivative, const __global int *y_global, const __global double *lambda_global, const __global double *cuts, const int N_instances, const int N_classes, const int is_y_vector, const int is_cuts_matrix, const int need_lambda_derivative, const int need_cuts_derivative) |
| GPU implementation of ordinal regression.
|
|
__kernel void | pack (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int view) |
| Packs a flat matrix to a packed triangular matrix.
|
|
__kernel void | rep_matrix_rev (__global double *A_adj, __global double *B_adj, unsigned int B_rows, unsigned int B_cols, unsigned int view_B) |
| Implements reverse pass of rep_matrix.
|
|
__kernel void | tridiagonalization_householder (__global double *P, __global double *V, __global double *q_glob, const int P_rows, const int V_rows, const int j, const int k) |
| Calculates householder vector and first element of the vector v.
|
|
__kernel void | tridiagonalization_v_step_1 (const __global double *P, const __global double *V, __global double *Uu, __global double *Vu, const int P_rows, const int V_rows, const int k) |
| Calculates first part of constructing the vector v: Uu = Pb * u and Vu = Vl * u.
|
|
__kernel void | tridiagonalization_v_step_2 (const __global double *P, __global double *V, const __global double *Uu, const __global double *Vu, const int P_rows, const int V_rows, const int k, const int j) |
| Second part in constructing vector v: v = Pb * u + V * Uu + U * Vu.
|
|
__kernel void | tridiagonalization_v_step_3 (__global double *P, __global double *V, __global double *q, const int P_rows, const int V_rows, const int k, const int j) |
| Third part in constructing vector v: v-=0.5*(v^T*u)*u, where u is the householder vector.
|
|
__kernel void | unpack (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int view) |
| Unpacks a packed triangular matrix to a flat matrix.
|
|
|
const kernel_cl< out_buffer, in_buffer, int, int, int > | add_batch ("add_batch", {indexing_helpers, add_batch_kernel_code}) |
| See the docs for add_batch() .
|
|
const kernel_cl< out_buffer, int, int > | batch_identity ("batch_identity", {indexing_helpers, batch_identity_kernel_code}) |
| See the docs for batch_identity() .
|
|
const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int > | categorical_logit_glm ("categorical_logit_glm", {categorical_logit_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}}) |
| See the docs for categorical_logit_glm() .
|
|
const kernel_cl< in_out_buffer, in_out_buffer, in_buffer, in_buffer, int, int, int, int > | categorical_logit_glm_beta_derivative ("categorical_logit_glm_beta_derivative", {categorical_logit_glm_beta_derivative_kernel_code}) |
| See the docs for categorical_logit_glm_beta_derivative() .
|
|
const kernel_cl< in_buffer, out_buffer, int, int, const double > | check_symmetric ("is_symmetric", {indexing_helpers, is_symmetric_kernel_code}) |
| See the docs for check_symmetric() .
|
|
const kernel_cl< in_out_buffer, int > | cholesky_decompose ("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code}) |
| See the docs for cholesky_decompose() .
|
|
const kernel_cl< in_out_buffer, in_out_buffer, int > | diag_inv ("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}}) |
| See the docs for add() .
|
|
const kernel_cl< out_buffer, in_buffer, int > | divide_columns_vec ("divide_columns_vec", {indexing_helpers, divide_columns_kernel_code}) |
| See the docs for divide_columns_vec() .
|
|
const kernel_cl< out_buffer, double, int, int, matrix_cl_view > | fill_strict_tri ("fill_strict_tri", {indexing_helpers, view_kernel_helpers, fill_strict_tri_kernel_code}) |
| See the docs for fill_strict_tri_kernel_code() .
|
|
const kernel_cl< in_buffer, out_buffer, double, double, int, int > | gp_exp_quad_cov ("gp_exp_quad_cov", {gp_exp_quad_cov_kernel_code}) |
| See the docs for gp_exp_quad_cov() .
|
|
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > | gp_exp_quad_cov_cross ("gp_exp_quad_cov_cross", {gp_exp_quad_cov_cross_kernel_code}) |
| See the docs for gp_exp_quad_cov_cross() .
|
|
const kernel_cl< in_buffer, out_buffer, double, double, int, int > | gp_exponential_cov ("gp_exponential_cov", {gp_exponential_cov_kernel_code}) |
| See the docs for gp_exponential_cov() .
|
|
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > | gp_exponential_cov_cross ("gp_exponential_cov_cross", {gp_exponential_cov_cross_kernel_code}) |
| See the docs for gp_exponential_cov_cross() .
|
|
const kernel_cl< in_buffer, out_buffer, double, double, int, int > | gp_matern32_cov ("gp_matern32_cov", {gp_matern32_cov_kernel_code}) |
| See the docs for gp_matern32_cov() .
|
|
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > | gp_matern32_cov_cross ("gp_matern32_cov_cross", {gp_matern32_cov_cross_kernel_code}) |
| See the docs for gp_matern32_cov_cross() .
|
|
const kernel_cl< in_buffer, out_buffer, double, double, double, int, int > | gp_matern52_cov ("gp_matern52_cov", {gp_matern52_cov_kernel_code}) |
| See the docs for gp_matern52_cov() .
|
|
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, double, int, int, int > | gp_matern52_cov_cross ("gp_matern52_cov_cross", {gp_matern52_cov_cross_kernel_code}) |
| See the docs for gp_matern52_cov_cross() .
|
|
static const std::string | indexing_helpers |
| Defines helper macros for common matrix indexing operations.
|
|
static const std::string | thread_block_helpers |
| Defines a helper macro for kernels with 2D local size.
|
|
const kernel_cl< in_out_buffer, in_buffer, in_buffer, int > | indexing_rev_global_atomic ("indexing_rev", {atomic_add_double_device_function, indexing_rev_global_atomic_kernel_code}) |
| See the docs for add_batch() .
|
|
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > | indexing_rev_local_atomic ("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_atomic_kernel_code}) |
| See the docs for add_batch() .
|
|
const kernel_cl< in_out_buffer, in_buffer, in_buffer, cl::LocalSpaceArg, int, int > | indexing_rev_local_independent ("indexing_rev", {atomic_add_double_device_function, indexing_rev_local_independent_kernel_code}) |
| See the docs for add_batch() .
|
|
const kernel_cl< in_buffer, out_buffer, int, int > | inv_lower_tri_multiply ("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
| See the docs for add() .
|
|
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, int, matrix_cl_view, matrix_cl_view > | matrix_multiply ("matrix_multiply", {thread_block_helpers, view_kernel_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
| See the docs for matrix_multiply() .
|
|
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, matrix_cl_view, matrix_cl_view > | row_vector_matrix_multiply ("row_vector_matrix_multiply", {view_kernel_helpers, row_vector_matrix_multiply_kernel_code}, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}}) |
| See the docs for row_vector_matrix_multiply() .
|
|
const kernel_cl< in_buffer, in_buffer, in_buffer, in_buffer, out_buffer, out_buffer, out_buffer, double, double, double, char > | eigenvals ("eigenvals", {stan::math::internal::double_d_src, eigenvals_bisect_kernel_code}) |
|
const kernel_cl< in_buffer, in_buffer, in_buffer, in_buffer, in_out_buffer, in_out_buffer, in_out_buffer, out_buffer > | get_eigenvectors ("get_eigenvectors", {stan::math::internal::double_d_src, get_eigenvectors_kernel_code}) |
|
const kernel_cl< in_buffer, out_buffer, int, int > | multiply_transpose ("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}}) |
| See the docs for add() .
|
|
const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int, int, int, int, int, int, int, int > | neg_binomial_2_log_glm ("neg_binomial_2_log_glm", {digamma_device_function, log1p_exp_device_function, neg_binomial_2_log_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}}) |
| See the docs for neg_binomial_2_log_glm_lpmf() .
|
|
const kernel_cl< in_out_buffer, in_buffer, int, int > | neg_rect_lower_tri_multiply ("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
| See the docs for neg_rect_lower_tri_multiply() .
|
|
const kernel_cl< out_buffer, out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int > | ordered_logistic_glm ("ordered_logistic_glm", {log1p_exp_device_function, log1m_exp_device_function, inv_logit_device_function, ordered_logistic_glm_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}}) |
| See the docs for ordered_logistic_glm() .
|
|
const kernel_cl< out_buffer, out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int, int, int, int, int, int > | ordered_logistic ("ordered_logistic", {log1p_exp_device_function, log1m_exp_device_function, inv_logit_device_function, ordered_logistic_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}}) |
| See the docs for ordered_logistic() .
|
|
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > | pack ("pack", {indexing_helpers, pack_kernel_code}) |
| See the docs for pack() .
|
|
const kernel_cl< in_out_buffer, in_buffer, int, int, matrix_cl_view > | rep_matrix_rev ("rep_matrix_rev", {view_kernel_helpers, rep_matrix_rev_kernel_code}) |
| See the docs for rep_matrix_rev() .
|
|
const kernel_cl< in_out_buffer, in_out_buffer, out_buffer, int, int, int, int > | tridiagonalization_householder ("tridiagonalization_householder", {tridiagonalization_householder_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 1024}}) |
|
const kernel_cl< in_buffer, in_buffer, out_buffer, out_buffer, int, int, int > | tridiagonalization_v_step_1 ("tridiagonalization_v_step_1", {tridiagonalization_v_step_1_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}}) |
|
const kernel_cl< in_buffer, out_buffer, in_buffer, in_buffer, int, int, int, int > | tridiagonalization_v_step_2 ("tridiagonalization_v_step_2", {tridiagonalization_v_step_2_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}}) |
|
const kernel_cl< in_out_buffer, in_out_buffer, out_buffer, int, int, int, int > | tridiagonalization_v_step_3 ("tridiagonalization_v_step_3", {tridiagonalization_v_step_3_kernel_code}, {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 1024}}) |
|
const kernel_cl< out_buffer, in_buffer, int, int, matrix_cl_view > | unpack ("unpack", {indexing_helpers, unpack_kernel_code}) |
| See the docs for unpack() .
|
|