#ifndef STAN_MATH_GPU_KERNELS_TRANSPOSE_HPP #define STAN_MATH_GPU_KERNELS_TRANSPOSE_HPP #ifdef STAN_OPENCL #include <stan/math/gpu/kernel_cl.hpp> namespace stan { namespace math { namespace opencl_kernels { // \cond const char *transpose_kernel_code = STRINGIFY( // \endcond /** * Takes the transpose of the matrix on the GPU. * * @param[out] B The output matrix to hold transpose of A. * @param[in] A The input matrix to transpose into B. * @param rows The number of rows for A. * @param cols The number of columns for A. * @note Code is a <code>const char*</code> held in * <code>transpose_kernel_code.</code> * This kernel uses the helper macros available in helpers.cl. */ __kernel void transpose(__global double *B, __global double *A, unsigned int rows, unsigned int cols) { int i = get_global_id(0); int j = get_global_id(1); if (i < rows && j < cols) { BT(j, i) = A(i, j); } } // \cond ); // \endcond /** * See the docs for \link kernels/transpose.hpp transpose() \endlink */ const global_range_kernel<cl::Buffer, cl::Buffer, int, int> transpose( "transpose", transpose_kernel_code); } // namespace opencl_kernels } // namespace math } // namespace stan #endif #endif