|
auto | compile_kernel (const char *name, const std::vector< std::string > &sources, const std::unordered_map< std::string, int > &options) |
| Compile an OpenCL kernel.
|
|
__kernel void | add_batch (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int batch_size) |
| Sums a batch of matrices.
|
|
__kernel void | batch_identity (__global double *A, unsigned int batch_rows, unsigned int size) |
| Makes a batch of smaller identity matrices inside the input matrix.
|
|
__kernel void | categorical_logit_glm (__global double *logp_global, __global double *exp_lin_global, __global double *inv_sum_exp_lin_global, __global double *neg_softmax_lin_global, __global double *alpha_derivative, const __global int *y_global, const __global double *x_beta_global, const __global double *alpha_global, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_alpha_derivative, const int need_neg_softmax_lin_global) |
| GPU implementation of Generalized Linear Model (GLM) with categorical distribution and logit (softmax) link function.
|
|
__kernel void | categorical_logit_glm_beta_derivative (__global double *beta_derivative, __global double *temp, const __global int *y, const __global double *x, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector) |
| Calculates derivative wrt beta.
|
|
__kernel void | is_symmetric (__global double *A, __global int *flag, unsigned int rows, unsigned int cols, double tolerance) |
| Check if the matrix_cl is symmetric.
|
|
__kernel void | cholesky_decompose (__global double *A, int rows) |
| Calculates the Cholesky Decomposition of a matrix on an OpenCL.
|
|
__kernel void | cumulative_sum1 (__global SCAL *out_wgs, __global SCAL *out_threads, __global SCAL *in, int size) |
| First kernel of the cumulative sum implementation.
|
|
__kernel void | cumulative_sum2 (__global SCAL *data, int size) |
| Second kernel of the cumulative sum implementation.
|
|
__kernel void | cumulative_sum3 (__global SCAL *out, __global SCAL *in_data, __global SCAL *in_threads, __global SCAL *in_wgs, int size) |
| Third kernel of the cumulative sum implementation.
|
|
void | atomic_add_double (__global double *val, double delta) |
| Atomically add to a double value.
|
|
void | local_atomic_add_double (__local double *val, double delta) |
| Atomically add to a local double value.
|
|
double | beta (double a, double b) |
| Return the beta function applied to the specified arguments.
|
|
double | binomial_coefficient_log (double n, double k) |
| Return the log of the binomial coefficient for the specified arguments.
|
|
double | digamma (double x) |
| Calculates the digamma function - derivative of logarithm of gamma.
|
|
double | inv_logit (double x) |
| Returns the inverse logit function applied to the kernel generator expression.
|
|
double | inv_Phi (double p) |
| Return the inv_Phi function applied to the specified argument.
|
|
double | inv_square (double x) |
| Calculates 1 / (x*x)
|
|
double | lbeta (double a, double b) |
| Return the log of the beta function applied to the specified arguments.
|
|
double | lgamma_stirling (double x) |
| Return the Stirling approximation to the lgamma function.
|
|
double | lgamma_stirling_diff (double x) |
| Return the difference between log of the gamma function and its Stirling approximation.
|
|
double | lmultiply (double a, double b) |
| Return the first argument times the natural log of the second argument if either argument is non-zero and 0 if both arguments are 0.
|
|
double | log1m (double a) |
| Calculates the natural logarithm of one minus the specified value.
|
|
double | log1m_exp (double a) |
| Calculates the natural logarithm of one minus the exponential of the specified value without overflow,.
|
|
double | log1m_inv_logit (double x) |
| Return the the natural logarithm of 1 minus the inverse logit applied to the kernel generator expression.
|
|
double | log1p_exp (double a) |
| Calculates the log of 1 plus the exponential of the specified value without overflow.
|
|
double | log_diff_exp (double x, double y) |
| The natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y.
|
|
double | log_inv_logit (double x) |
| Return the natural logarithm of the inverse logit of the specified argument.
|
|
double | log_inv_logit_diff (double x, double y) |
| Returns the natural logarithm of the difference of the inverse logits of the specified arguments.
|
|
double | logit (double x) |
| Return the log odds applied to the kernel generator expression.
|
|
double | multiply_log (double a, double b) |
| Calculate the value of the first argument times log of the second argument while behaving properly with 0 inputs.
|
|
double | Phi (double x) |
| Return the Phi function applied to the specified argument.
|
|
double | Phi_approx (double x) |
| Return the Phi_approx function applied to the specified argument.
|
|
double | trigamma (double x) |
| Return the trigamma function applied to the argument.
|
|
__kernel void | diag_inv (__global double *A, __global double *tmp_inv, int rows) |
| Calculates inplace submatrix inversions along the matrix diagonal.
|
|
__kernel void | divide_columns_vec (__global double *A, __global double *vec, int vec_size) |
| Takes vector A and divides columns vector in A element-wise by the values in vec.
|
|
__kernel void | fill_strict_tri (__global double *A, double val, unsigned int rows, unsigned int cols, unsigned int view_A) |
| Stores constant in the triangular part of a matrix on the OpenCL device.
|
|
__kernel void | gp_exp_quad_cov (const __global double *x, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size, const int element_size) |
| GPU part of calculation of squared exponential kernel.
|
|
__kernel void | gp_exp_quad_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double neg_half_inv_l_sq, const int size1, const int size2, const int element_size) |
| GPU part of calculation of squared exponential kernel.
|
|
__kernel void | gp_exponential_cov (const __global double *x, __global double *res, const double sigma_sq, const double neg_inv_l, const int size, const int element_size) |
| GPU part of calculation of Matern exponential kernel.
|
|
__kernel void | gp_exponential_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double neg_inv_l, const int size1, const int size2, const int element_size) |
| GPU part of calculation of Matern exponential kernel.
|
|
__kernel void | gp_matern32_cov (const __global double *x, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size, const int element_size) |
| GPU part of calculation of Matern 3/2 kernel.
|
|
__kernel void | gp_matern32_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double root_3_inv_l, const int size1, const int size2, const int element_size) |
| GPU part of calculation of Matern 3/2 kernel.
|
|
__kernel void | gp_matern52_cov (const __global double *x, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size, const int element_size) |
| GPU part of calculation of Matern 5/2 kernel.
|
|
__kernel void | gp_matern52_cov_cross (const __global double *x1, const __global double *x2, __global double *res, const double sigma_sq, const double root_5_inv_l, const double inv_l_sq_5_3, const int size1, const int size2, const int element_size) |
| GPU part of calculation of Matern 5/2 kernel.
|
|
__kernel void | indexing_rev (__global double *adj, const __global int *index, const __global double *res, int size) |
| Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.
|
|
__kernel void | indexing_rev (__global double *adj, const __global int *index, const __global double *res, __local double *adj_loc, int index_size, int adj_size) |
| Increments adjoint of the indexing operation argument given the indices and adjoints of the indexing result.
|
|
__kernel void | inv_lower_tri_multiply (__global double *A, __global double *temp, const int A_rows, const int rows) |
| Calculates B = C * A.
|
|
__kernel void | matrix_multiply (const __global double *A, const __global double *B, __global double *C, const int M, const int N, const int K, unsigned int view_A, unsigned int view_B) |
| Matrix multiplication on the OpenCL device.
|
|
__kernel void | row_vector_matrix_multiply (const __global double *A, const __global double *B, __global double *R, const int N, const int K, unsigned int view_A, unsigned int view_B) |
| Row vector-matrix multiplication R=A*B on the OpenCL device.
|
|
void | merge (__global SCAL *A, __global SCAL *B, __global SCAL *res, int A_size, int B_size) |
| Merges two sorted runs into a single sorted run of combined length.
|
|
int | binary_search (__global SCAL *input, int start, int end, SCAL value) |
| Searches for the index of the element that is larger than or equal to given value in given range.
|
|
__kernel void | merge_step (__global SCAL *output, __global SCAL *input, int run_len, int size, int tasks) |
| Merges sorted runs into longer sorted runs.
|
|
int | get_sturm_count_tri (const __global double *diagonal, const __global double *subdiagonal_squared, const double shift, const int n) |
| Calculates lower Sturm count of a tridiagonal matrix T - number of eigenvalues lower than shift.
|
|
void | eigenvals_bisect (const __global double *diagonal, const __global double *subdiagonal_squared, double *low_res, double *high_res, const double min_eigval, const double max_eigval, const int n, const int i) |
| Calculates i-th largest eigenvalue of tridiagonal matrix represented by a LDL decomposition using bisection.
|
|
int | get_sturm_count_ldl (const __global double_d *l, const __global double_d *d, const double_d shift, const int n) |
| Calculates Sturm count of a LDL decomposition of a tridiagonal matrix - number of eigenvalues larger or equal to shift.
|
|
void | eigenvals_bisect_refine (const __global double_d *l, const __global double_d *d, double_d *low_res, double_d *high_res, const int n, const int i) |
| Refines bounds on the i-th largest eigenvalue of a LDL decomposition using bisection.
|
|
__kernel void | eigenvals (const __global double *diagonal, const __global double *subdiagonal_squared, const __global double_d *l, const __global double_d *d, __global double *eigval_global, __global double_d *shifted_low_global, __global double_d *shifted_high_global, const double min_eigval, const double max_eigval, const double shift, const char do_refine) |
| Calculates eigenvalues of a tridiagonal matrix T and refines shifted eigenvalues using shifted LDL decomposition of T.
|
|
int | get_twisted_factorization (const __global double_d *l, const __global double_d *d, double_d shift, __global double_d *l_plus, __global double_d *u_minus, __global double_d *s) |
| Calculates shifted LDL and UDU factorizations.
|
|
void | calculate_eigenvector (const __global double_d *l_plus, const __global double_d *u_minus, const __global double *subdiag, int twist_idx, __global double *eigenvectors) |
| Calculates an eigenvector from twisted factorization T - shift * I = L+.
|
|
__kernel void | get_eigenvectors (const __global double_d *l, const __global double_d *d, const __global double *subdiag, const __global double_d *shifted_eigvals, __global double_d *l_plus, __global double_d *u_minus, __global double_d *temp, __global double *eigenvectors) |
| Calculates eigenvectors for (shifted) eigenvalues.
|
|
__kernel void | multiply_transpose (const __global double *A, __global double *B, const int M, const int N) |
| Matrix multiplication of the form A*A^T on the OpenCL device.
|
|
__kernel void | neg_binomial_2_log_glm (__global double *logp_global, __global double *theta_derivative_global, __global double *theta_derivative_sum, __global double *phi_derivative_global, const __global int *y_global, const __global double *x, const __global double *alpha, const __global double *beta, const __global double *phi_global, const int N, const int M, const int is_y_vector, const int is_alpha_vector, const int is_phi_vector, const int need_theta_derivative, const int need_theta_derivative_sum, const int need_phi_derivative, const int need_phi_derivative_sum, const int need_logp1, const int need_logp2, const int need_logp3, const int need_logp4) |
| GPU implementation of Generalized Linear Model (GLM) with Negative-Binomial-2 distribution and log link function.
|
|
__kernel void | neg_rect_lower_tri_multiply (__global double *A, const __global double *temp, const int A_rows, const int rows) |
| Calculates C = -B * A where B is rectangular and A is a lower triangular.
|
|
__kernel void | ordered_logistic_glm (__global double *location_sum, __global double *logp_global, __global double *location_derivative, __global double *cuts_derivative, const __global int *y_global, const __global double *x, const __global double *beta, const __global double *cuts, const int N_instances, const int N_attributes, const int N_classes, const int is_y_vector, const int need_location_derivative, const int need_cuts_derivative) |
| GPU implementation of ordinal regression Generalized Linear Model (GLM).
|
|
__kernel void | ordered_logistic (__global double *logp_global, __global double *lambda_derivative, __global double *cuts_derivative, const __global int *y_global, const __global double *lambda_global, const __global double *cuts, const int N_instances, const int N_classes, const int is_y_vector, const int is_cuts_matrix, const int need_lambda_derivative, const int need_cuts_derivative) |
| GPU implementation of ordinal regression.
|
|
__kernel void | pack (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int view) |
| Packs a flat matrix to a packed triangular matrix.
|
|
__kernel void | rep_matrix_rev (__global double *A_adj, __global double *B_adj, unsigned int B_rows, unsigned int B_cols, unsigned int view_B) |
| Implements reverse pass of rep_matrix.
|
|
__kernel void | tridiagonalization_householder (__global double *P, __global double *V, __global double *q_glob, const int P_rows, const int V_rows, const int j, const int k) |
| Calculates householder vector and first element of the vector v.
|
|
__kernel void | tridiagonalization_v_step_1 (const __global double *P, const __global double *V, __global double *Uu, __global double *Vu, const int P_rows, const int V_rows, const int k) |
| Calculates first part of constructing the vector v: Uu = Pb * u and Vu = Vl * u.
|
|
__kernel void | tridiagonalization_v_step_2 (const __global double *P, __global double *V, const __global double *Uu, const __global double *Vu, const int P_rows, const int V_rows, const int k, const int j) |
| Second part in constructing vector v: v = Pb * u + V * Uu + U * Vu.
|
|
__kernel void | tridiagonalization_v_step_3 (__global double *P, __global double *V, __global double *q, const int P_rows, const int V_rows, const int k, const int j) |
| Third part in constructing vector v: v-=0.5*(v^T*u)*u, where u is the householder vector.
|
|
__kernel void | unpack (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int view) |
| Unpacks a packed triangular matrix to a flat matrix.
|
|