math/opencl_2prim_2gp__matern32__cov_8hpp_source.html

#ifndef STAN_MATH_OPENCL_PRIM_GP_MATERN32_COV_HPP

#define STAN_MATH_OPENCL_PRIM_GP_MATERN32_COV_HPP

#ifdef STAN_OPENCL


#include <stan/math/prim/meta.hpp>

#include <stan/math/prim/fun/square.hpp>

#include <stan/math/opencl/matrix_cl.hpp>

#include <stan/math/opencl/kernels/gp_matern32_cov.hpp>

#include <stan/math/opencl/err.hpp>

#include <CL/opencl.hpp>


namespace stan {

namespace math {

template <typename T1, typename T2, typename T3,

          require_all_kernel_expressions_and_none_scalar_t<T1>* = nullptr,

          require_all_arithmetic_t<T2, T3>* = nullptr>

inline matrix_cl<return_type_t<T1, T2, T3>> gp_matern32_cov(

    const T1& x, const T2 sigma, const T3 length_scale) {

  const auto& x_eval = x.eval();

  matrix_cl<return_type_t<T1, T2, T3>> res(x.cols(), x.cols());

  int block_size = 16;

  int n_blocks = (x.cols() + block_size - 1) / block_size;

  int blocked_size = block_size * n_blocks;

  try {

    opencl_kernels::gp_matern32_cov(

        cl::NDRange(blocked_size, blocked_size),

        cl::NDRange(block_size, block_size), x_eval, res, sigma * sigma,

        std::sqrt(3.0) / length_scale, x.cols(), x.rows());

  } catch (const cl::Error& e) {

    check_opencl_error("gp_matern32_cov", e);

  }

  return res;

}


template <typename T1, typename T2, typename T3, typename T4,

          require_all_kernel_expressions_and_none_scalar_t<T1, T2>* = nullptr,

          require_all_arithmetic_t<T3, T4>* = nullptr>

inline matrix_cl<return_type_t<T1, T2, T3, T4>> gp_matern32_cov(

    const T1& x, const T2& y, const T3 sigma, const T4 length_scale) {

  check_size_match("gp_matern32_cov_cross", "x", x.rows(), "y", y.rows());

  matrix_cl<return_type_t<T1, T2, T3, T4>> res(x.cols(), y.cols());

  const auto& x_eval = x.eval();

  const auto& y_eval = y.eval();

  int block_size = 16;

  int x_blocks = (x.cols() + block_size - 1) / block_size;

  int x_blocked_size = block_size * x_blocks;

  int y_blocks = (y.cols() + block_size - 1) / block_size;

  int y_blocked_size = block_size * y_blocks;

  try {

    opencl_kernels::gp_matern32_cov_cross(

        cl::NDRange(x_blocked_size, y_blocked_size),

        cl::NDRange(block_size, block_size), x_eval, y_eval, res, sigma * sigma,

        std::sqrt(3.0) / length_scale, x.cols(), y.cols(), x.rows());

  } catch (const cl::Error& e) {

    check_opencl_error("gp_matern32_cov_cross", e);

  }

  return res;

}


template <typename T1, typename T2, typename T3,

          require_all_kernel_expressions_and_none_scalar_t<T1, T3>* = nullptr,

          require_all_arithmetic_t<T2>* = nullptr>

inline matrix_cl<return_type_t<T1, T2, T3>> gp_matern32_cov(

    const T1& x, const T2 sigma, const T3 length_scale) {

  const auto& x_eval = elt_divide(x, rowwise_broadcast(length_scale)).eval();

  matrix_cl<return_type_t<T1, T2, T3>> res(x.cols(), x.cols());

  int block_size = 16;

  int n_blocks = (x.cols() + block_size - 1) / block_size;

  int blocked_size = block_size * n_blocks;

  try {

    opencl_kernels::gp_matern32_cov(cl::NDRange(blocked_size, blocked_size),

                                    cl::NDRange(block_size, block_size), x_eval,

                                    res, sigma * sigma, std::sqrt(3.0),

                                    x.cols(), x.rows());

  } catch (const cl::Error& e) {

    check_opencl_error("gp_matern32_cov", e);

  }

  return res;

}


template <

    typename T1, typename T2, typename T3, typename T4,

    require_all_kernel_expressions_and_none_scalar_t<T1, T2, T4>* = nullptr,

    require_all_arithmetic_t<T3>* = nullptr>

inline matrix_cl<return_type_t<T1, T2, T3, T4>> gp_matern32_cov(

    const T1& x, const T2& y, const T3 sigma, const T4 length_scale) {

  check_size_match("gp_matern32_cov_cross", "x", x.rows(), "y", y.rows());

  matrix_cl<return_type_t<T1, T2, T3, T4>> res(x.cols(), y.cols());

  const auto& x_eval = elt_divide(x, rowwise_broadcast(length_scale)).eval();

  const auto& y_eval = elt_divide(y, rowwise_broadcast(length_scale)).eval();

  int block_size = 16;

  int x_blocks = (x.cols() + block_size - 1) / block_size;

  int x_blocked_size = block_size * x_blocks;

  int y_blocks = (y.cols() + block_size - 1) / block_size;

  int y_blocked_size = block_size * y_blocks;

  try {

    opencl_kernels::gp_matern32_cov_cross(

        cl::NDRange(x_blocked_size, y_blocked_size),

        cl::NDRange(block_size, block_size), x_eval, y_eval, res, sigma * sigma,

        std::sqrt(3.0), x.cols(), y.cols(), x.rows());

  } catch (const cl::Error& e) {

    check_opencl_error("gp_matern32_cov_cross", e);

  }

  return res;

}


}  // namespace math

}  // namespace stan


#endif

#endif

stan::math::matrix_cl::cols
int cols() const
Definition matrix_cl.hpp:66

stan::math::matrix_cl
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47

stan::require_all_arithmetic_t
require_all_t< std::is_arithmetic< std::decay_t< Types > >... > require_all_arithmetic_t
Require all of the types satisfy std::is_arithmetic.
Definition require_generics.hpp:322

stan::math::check_opencl_error
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
Definition check_opencl.hpp:23

stan::math::elt_divide
elt_divide_< as_operation_cl_t< T_a >, as_operation_cl_t< T_b > > elt_divide(T_a &&a, T_b &&b)
Definition binary_operation.hpp:209

stan::math::rowwise_broadcast
auto rowwise_broadcast(T &&a)
Broadcast an expression in rowwise dimmension.
Definition broadcast.hpp:145

stan::require_all_kernel_expressions_and_none_scalar_t
require_all_t< is_kernel_expression_and_not_scalar< Types >... > require_all_kernel_expressions_and_none_scalar_t
Enables a template if all given types are non-scalar types that are a valid kernel generator expressi...
Definition is_kernel_expression.hpp:58

stan::math::opencl_kernels::gp_matern32_cov
const kernel_cl< in_buffer, out_buffer, double, double, int, int > gp_matern32_cov("gp_matern32_cov", {gp_matern32_cov_kernel_code})
See the docs for gp_matern32_cov() .

stan::math::opencl_kernels::gp_matern32_cov_cross
const kernel_cl< in_buffer, in_buffer, out_buffer, double, double, int, int, int > gp_matern32_cov_cross("gp_matern32_cov_cross", {gp_matern32_cov_cross_kernel_code})
See the docs for gp_matern32_cov_cross() .

stan::math::gp_matern32_cov
matrix_cl< return_type_t< T1, T2, T3 > > gp_matern32_cov(const T1 &x, const T2 sigma, const T3 length_scale)
Matern 3/2 kernel on the GPU.
Definition gp_matern32_cov.hpp:29

matrix_cl.hpp

stan::math::e
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20

stan::math::check_size_match
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
Definition check_size_match.hpp:24

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

err.hpp

gp_matern32_cov.hpp

square.hpp

meta.hpp