math/opencl_2kernels_2cholesky__decompose_8hpp_source.html

#ifndef STAN_MATH_OPENCL_KERNELS_CHOLESKY_DECOMPOSE_HPP

#define STAN_MATH_OPENCL_KERNELS_CHOLESKY_DECOMPOSE_HPP

#ifdef STAN_OPENCL


#include <stan/math/opencl/kernel_cl.hpp>

#include <stan/math/opencl/buffer_types.hpp>

#include <string>


namespace stan {

namespace math {

namespace opencl_kernels {

// \cond

static constexpr const char* cholesky_decompose_kernel_code = STRINGIFY(

    // \endcond

    __kernel void cholesky_decompose(__global double* A, int rows) {

      const int local_index = get_local_id(0);

      // The following code is the sequential version of the inplace

      // cholesky decomposition. Only the innermost loops are parallelized. The

      // rows are processed sequentially. This loop process all the rows:

      for (int j = 0; j < rows; j++) {

        if (local_index == 0) {

          double sum = 0.0;

          for (int k = 0; k < j; k++) {

            sum = sum + A(j, k) * A(j, k);

          }

          A(j, j) = sqrt(A(j, j) - sum);

        }

        barrier(CLK_LOCAL_MEM_FENCE);

        if (local_index < j) {

          A(local_index, j) = 0.0;

        } else if (local_index > j) {

          double sum = 0.0;

          for (int k = 0; k < j; k++)

            sum = sum + A(local_index, k) * A(j, k);

          A(local_index, j) = (A(local_index, j) - sum) / A(j, j);

        }

        barrier(CLK_LOCAL_MEM_FENCE);

      }

    }

    // \cond

);

// \endcond


const kernel_cl<in_out_buffer, int> cholesky_decompose(

    "cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code});


}  // namespace opencl_kernels

}  // namespace math

}  // namespace stan

#endif

#endif

buffer_types.hpp

stan::math::opencl_kernels::cholesky_decompose
const kernel_cl< in_out_buffer, int > cholesky_decompose("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code})
See the docs for cholesky_decompose() .

stan::math::rows
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
Definition rows.hpp:22

kernel_cl.hpp

stan::math::opencl_kernels::indexing_helpers
static const std::string indexing_helpers
Defines helper macros for common matrix indexing operations.
Definition helpers.hpp:14

stan::math::sqrt
fvar< T > sqrt(const fvar< T > &x)
Definition sqrt.hpp:17

stan::math::sum
auto sum(const std::vector< T > &m)
Return the sum of the entries of the specified standard vector.
Definition sum.hpp:23

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

STRINGIFY
#define STRINGIFY(...)
Definition stringify.hpp:9

stan::math::opencl_kernels::kernel_cl
Creates functor for kernels.
Definition kernel_cl.hpp:174