Stan Math Library  2.20.0
reverse mode automatic differentiation
diag_inv.hpp
Go to the documentation of this file.
1 #ifndef STAN_MATH_OPENCL_KERNELS_DIAGONAL_INVERSE_LOWER_TRI_HPP
2 #define STAN_MATH_OPENCL_KERNELS_DIAGONAL_INVERSE_LOWER_TRI_HPP
3 #ifdef STAN_OPENCL
4 
7 
8 namespace stan {
9 namespace math {
10 namespace opencl_kernels {
11 // \cond
12 static const char* diag_inv_kernel_code = STRINGIFY(
13  // \endcond
42  __kernel void diag_inv(__global double* A, __global double* tmp_inv,
43  int rows) {
44  int index = get_local_id(0);
45  int group = get_group_id(0);
46  int block_size = get_local_size(0);
47  int A_offset = group * block_size;
48  // offset inside the matrix with batched identities
49  int tmp_offset = group * block_size * block_size + index * block_size;
50 
51  // The following code is the sequential version of forward
52  // substitution with the identity matrix as RHS. Only the innermost loops
53  // are parallelized. The rows are processed sequentially. This loop
54  // process all the rows:
55  for (int k = 0; k < block_size; k++) {
56  double diag_ele = A(A_offset + k, A_offset + k);
57 
58  // Each element under the diagonal of the RHS is divided by diag_ele.
59  // Each thread in a thread block does 1 division.
60  // Threads that are assigned elements above the diagonal
61  // skip this division.
62  if (index <= k) {
63  tmp_inv[tmp_offset + k] /= diag_ele;
64  }
65  barrier(CLK_LOCAL_MEM_FENCE);
66  // Each thread updates one column in the RHS matrix
67  // (ignores values above the diagonal).
68  for (int i = max(k + 1, index); i < block_size; i++) { // NOLINT
69  double factor = A(A_offset + i, A_offset + k);
70  tmp_inv[tmp_offset + i] -= tmp_inv[tmp_offset + k] * factor;
71  }
72  barrier(CLK_LOCAL_MEM_FENCE);
73  }
74  for (int j = 0; j < block_size; j++) {
75  // Each thread copies one column.
76  A(A_offset + j, A_offset + index) = tmp_inv[tmp_offset + j];
77  }
78  }
79  // \cond
80 );
81 // \endcond
82 
88  "diag_inv", {indexing_helpers, diag_inv_kernel_code},
89  {{"THREAD_BLOCK_SIZE", 32}});
90 
91 } // namespace opencl_kernels
92 } // namespace math
93 } // namespace stan
94 #endif
95 #endif
int rows(const Eigen::Matrix< T, R, C > &m)
Return the number of rows in the specified matrix, vector, or row vector.
Definition: rows.hpp:20
#define STRINGIFY(src)
Definition: kernel_cl.hpp:22
static const char * indexing_helpers
Definition: helpers.hpp:14
int max(const std::vector< int > &x)
Returns the maximum coefficient in the specified column vector.
Definition: max.hpp:21
Creates functor for kernels.
Definition: kernel_cl.hpp:201
const kernel_cl< in_out_buffer, in_out_buffer, int > diag_inv("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})
See the docs for add() .

     [ Stan Home Page ] © 2011–2018, Stan Development Team.