math/multiply__transpose_8hpp_source.html

#ifndef STAN_MATH_OPENCL_MULTIPLY_TRANSPOSE_HPP

#define STAN_MATH_OPENCL_MULTIPLY_TRANSPOSE_HPP

#ifdef STAN_OPENCL


#include <stan/math/opencl/matrix_cl.hpp>

#include <stan/math/opencl/kernels/multiply_transpose.hpp>

#include <stan/math/opencl/err.hpp>


#include <stan/math/prim/fun/Eigen.hpp>

#include <stan/math/prim/meta.hpp>


namespace stan {

namespace math {

template <typename T, typename = require_arithmetic_t<T>>

inline matrix_cl<T> multiply_transpose(const matrix_cl<T>& A) {

  matrix_cl<T> temp(A.rows(), A.rows(),

                    A.view() == matrix_cl_view::Diagonal

                        ? matrix_cl_view::Diagonal

                        : matrix_cl_view::Entire);


  if (A.size() == 0) {

    return temp;

  }

  // padding the matrices so the dimensions are divisible with local

  // improves performance because we can omit if statements in the

  // multiply kernel

  int local

      = opencl_kernels::multiply_transpose.get_option("THREAD_BLOCK_SIZE");

  int Mpad = ((A.rows() + local - 1) / local) * local;

  int wpt = opencl_kernels::multiply_transpose.get_option("WORK_PER_THREAD");

  try {

    opencl_kernels::multiply_transpose(cl::NDRange(Mpad, Mpad / wpt),

                                       cl::NDRange(local, local / wpt), A, temp,

                                       A.rows(), A.cols());

  } catch (cl::Error& e) {

    check_opencl_error("multiply self transpose", e);

  }

  return temp;

}

}  // namespace math

}  // namespace stan


#endif

#endif

Eigen.hpp

stan::math::matrix_cl::size
int size() const
Definition matrix_cl.hpp:68

stan::math::matrix_cl::cols
int cols() const
Definition matrix_cl.hpp:66

stan::math::matrix_cl::rows
int rows() const
Definition matrix_cl.hpp:64

stan::math::matrix_cl::view
const matrix_cl_view & view() const
Definition matrix_cl.hpp:70

stan::math::matrix_cl
Represents an arithmetic matrix on the OpenCL device.
Definition matrix_cl.hpp:47

stan::math::check_opencl_error
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occurred.
Definition check_opencl.hpp:23

stan::math::opencl_kernels::multiply_transpose
const kernel_cl< in_buffer, out_buffer, int, int > multiply_transpose("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}})
See the docs for add() .

stan::math::multiply_transpose
matrix_cl< T > multiply_transpose(const matrix_cl< T > &A)
Computes the product of a square OpenCL matrix with its transpose.
Definition multiply_transpose.hpp:24

multiply_transpose.hpp

matrix_cl.hpp

stan::math::e
static constexpr double e()
Return the base of the natural logarithm.
Definition constants.hpp:20

stan::math::matrix_cl_view::Entire
@ Entire

stan::math::matrix_cl_view::Diagonal
@ Diagonal

stan
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...
Definition unit_vector_constrain.hpp:15

err.hpp

meta.hpp