1 #ifndef STAN_MATH_OPENCL_KERNELS_DIAGONAL_INVERSE_LOWER_TRI_HPP 2 #define STAN_MATH_OPENCL_KERNELS_DIAGONAL_INVERSE_LOWER_TRI_HPP 10 namespace opencl_kernels {
12 static const char* diag_inv_kernel_code =
STRINGIFY(
42 __kernel
void diag_inv(__global
double* A, __global
double* tmp_inv,
44 int index = get_local_id(0);
45 int group = get_group_id(0);
46 int block_size = get_local_size(0);
47 int A_offset = group * block_size;
49 int tmp_offset = group * block_size * block_size + index * block_size;
55 for (
int k = 0; k < block_size; k++) {
56 double diag_ele = A(A_offset + k, A_offset + k);
63 tmp_inv[tmp_offset + k] /= diag_ele;
65 barrier(CLK_LOCAL_MEM_FENCE);
68 for (
int i =
max(k + 1, index); i < block_size; i++) {
69 double factor = A(A_offset + i, A_offset + k);
70 tmp_inv[tmp_offset + i] -= tmp_inv[tmp_offset + k] * factor;
72 barrier(CLK_LOCAL_MEM_FENCE);
74 for (
int j = 0; j < block_size; j++) {
76 A(A_offset + j, A_offset + index) = tmp_inv[tmp_offset + j];
89 {{
"THREAD_BLOCK_SIZE", 32}});
int rows(const Eigen::Matrix< T, R, C > &m)
Return the number of rows in the specified matrix, vector, or row vector.
static const char * indexing_helpers
int max(const std::vector< int > &x)
Returns the maximum coefficient in the specified column vector.
Creates functor for kernels.
const kernel_cl< in_out_buffer, in_out_buffer, int > diag_inv("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})
See the docs for add() .