Stan Math Library  2.20.0
reverse mode automatic differentiation
cholesky_decompose.hpp
Go to the documentation of this file.
1 #ifndef STAN_MATH_REV_MAT_FUN_CHOLESKY_DECOMPOSE_HPP
2 #define STAN_MATH_REV_MAT_FUN_CHOLESKY_DECOMPOSE_HPP
3 
4 #include <stan/math/rev/meta.hpp>
10 #include <stan/math/rev/core.hpp>
15 
16 #ifdef STAN_OPENCL
18 #endif
19 
20 #include <algorithm>
21 #include <vector>
22 
23 namespace stan {
24 namespace math {
25 
26 namespace internal {
34 inline void set_lower_tri_coeff_ref(Eigen::Matrix<var, -1, -1>& L,
35  vari** vari_ref) {
36  size_t pos = 0;
37  vari* dummy = new vari(0.0, false);
38 
39  for (size_type j = 0; j < L.cols(); ++j) {
40  for (size_type i = j; i < L.cols(); ++i) {
41  L.coeffRef(i, j).vi_ = vari_ref[pos++];
42  }
43  for (size_type k = 0; k < j; ++k)
44  L.coeffRef(k, j).vi_ = dummy;
45  }
46  return;
47 }
48 } // namespace internal
49 class cholesky_block : public vari {
50  public:
51  int M_;
53  typedef Eigen::Block<Eigen::MatrixXd> Block_;
56 
73  cholesky_block(const Eigen::Matrix<var, -1, -1>& A,
74  const Eigen::Matrix<double, -1, -1>& L_A)
75  : vari(0.0),
76  M_(A.rows()),
77  vari_ref_A_(ChainableStack::instance_->memalloc_.alloc_array<vari*>(
78  A.rows() * (A.rows() + 1) / 2)),
79  vari_ref_L_(ChainableStack::instance_->memalloc_.alloc_array<vari*>(
80  A.rows() * (A.rows() + 1) / 2)) {
81  size_t pos = 0;
82  block_size_ = std::max(M_ / 8, 8);
83  block_size_ = std::min(block_size_, 128);
84  for (size_type j = 0; j < M_; ++j) {
85  for (size_type i = j; i < M_; ++i) {
86  vari_ref_A_[pos] = A.coeffRef(i, j).vi_;
87  vari_ref_L_[pos] = new vari(L_A.coeffRef(i, j), false);
88  ++pos;
89  }
90  }
91  }
92 
99  inline void symbolic_rev(Block_& L, Block_& L_adj) {
100  using Eigen::Lower;
101  using Eigen::StrictlyUpper;
102  using Eigen::Upper;
103  L.transposeInPlace();
104  L_adj = (L * L_adj.triangularView<Lower>()).eval();
105  L_adj.triangularView<StrictlyUpper>()
106  = L_adj.adjoint().triangularView<StrictlyUpper>();
107  L.triangularView<Upper>().solveInPlace(L_adj);
108  L.triangularView<Upper>().solveInPlace(L_adj.transpose());
109  }
110 
117  virtual void chain() {
118  using Eigen::Block;
119  using Eigen::Lower;
120  using Eigen::MatrixXd;
121  using Eigen::StrictlyUpper;
122  using Eigen::Upper;
123  auto L_adj = Eigen::MatrixXd::Zero(M_, M_).eval();
124  auto L = Eigen::MatrixXd::Zero(M_, M_).eval();
125  size_t pos = 0;
126  for (size_type j = 0; j < M_; ++j) {
127  for (size_type i = j; i < M_; ++i) {
128  L_adj.coeffRef(i, j) = vari_ref_L_[pos]->adj_;
129  L.coeffRef(i, j) = vari_ref_L_[pos]->val_;
130  ++pos;
131  }
132  }
133 
134  for (int k = M_; k > 0; k -= block_size_) {
135  int j = std::max(0, k - block_size_);
136  Block_ R = L.block(j, 0, k - j, j);
137  Block_ D = L.block(j, j, k - j, k - j);
138  Block_ B = L.block(k, 0, M_ - k, j);
139  Block_ C = L.block(k, j, M_ - k, k - j);
140  Block_ R_adj = L_adj.block(j, 0, k - j, j);
141  Block_ D_adj = L_adj.block(j, j, k - j, k - j);
142  Block_ B_adj = L_adj.block(k, 0, M_ - k, j);
143  Block_ C_adj = L_adj.block(k, j, M_ - k, k - j);
144  if (C_adj.size() > 0) {
145  C_adj = D.transpose()
146  .triangularView<Upper>()
147  .solve(C_adj.transpose())
148  .transpose();
149  B_adj.noalias() -= C_adj * R;
150  D_adj.noalias() -= C_adj.transpose() * C;
151  }
152  symbolic_rev(D, D_adj);
153  R_adj.noalias() -= C_adj.transpose() * B;
154  R_adj.noalias() -= D_adj.selfadjointView<Lower>() * R;
155  D_adj.diagonal() *= 0.5;
156  D_adj.triangularView<StrictlyUpper>().setZero();
157  }
158  pos = 0;
159  for (size_type j = 0; j < M_; ++j)
160  for (size_type i = j; i < M_; ++i)
161  vari_ref_A_[pos++]->adj_ += L_adj.coeffRef(i, j);
162  }
163 };
164 
165 class cholesky_scalar : public vari {
166  public:
167  int M_;
170 
185  cholesky_scalar(const Eigen::Matrix<var, -1, -1>& A,
186  const Eigen::Matrix<double, -1, -1>& L_A)
187  : vari(0.0),
188  M_(A.rows()),
189  vari_ref_A_(ChainableStack::instance_->memalloc_.alloc_array<vari*>(
190  A.rows() * (A.rows() + 1) / 2)),
191  vari_ref_L_(ChainableStack::instance_->memalloc_.alloc_array<vari*>(
192  A.rows() * (A.rows() + 1) / 2)) {
193  size_t accum = 0;
194  size_t accum_i = accum;
195  for (size_type j = 0; j < M_; ++j) {
196  for (size_type i = j; i < M_; ++i) {
197  accum_i += i;
198  size_t pos = j + accum_i;
199  vari_ref_A_[pos] = A.coeffRef(i, j).vi_;
200  vari_ref_L_[pos] = new vari(L_A.coeffRef(i, j), false);
201  }
202  accum += j;
203  accum_i = accum;
204  }
205  }
206 
219  virtual void chain() {
220  using Eigen::Matrix;
221  using Eigen::RowMajor;
222  Matrix<double, -1, -1, RowMajor> adjL(M_, M_);
223  Matrix<double, -1, -1, RowMajor> LA(M_, M_);
224  Matrix<double, -1, -1, RowMajor> adjA(M_, M_);
225  size_t pos = 0;
226  for (size_type i = 0; i < M_; ++i) {
227  for (size_type j = 0; j <= i; ++j) {
228  adjL.coeffRef(i, j) = vari_ref_L_[pos]->adj_;
229  LA.coeffRef(i, j) = vari_ref_L_[pos]->val_;
230  ++pos;
231  }
232  }
233 
234  --pos;
235  for (int i = M_ - 1; i >= 0; --i) {
236  for (int j = i; j >= 0; --j) {
237  if (i == j) {
238  adjA.coeffRef(i, j) = 0.5 * adjL.coeff(i, j) / LA.coeff(i, j);
239  } else {
240  adjA.coeffRef(i, j) = adjL.coeff(i, j) / LA.coeff(j, j);
241  adjL.coeffRef(j, j)
242  -= adjL.coeff(i, j) * LA.coeff(i, j) / LA.coeff(j, j);
243  }
244  for (int k = j - 1; k >= 0; --k) {
245  adjL.coeffRef(i, k) -= adjA.coeff(i, j) * LA.coeff(j, k);
246  adjL.coeffRef(j, k) -= adjA.coeff(i, j) * LA.coeff(i, k);
247  }
248  vari_ref_A_[pos--]->adj_ += adjA.coeffRef(i, j);
249  }
250  }
251  }
252 };
253 #ifdef STAN_OPENCL
254 class cholesky_opencl : public vari {
255  public:
256  int M_;
259 
275  cholesky_opencl(const Eigen::Matrix<var, -1, -1>& A,
276  const Eigen::Matrix<double, -1, -1>& L_A)
277  : vari(0.0),
278  M_(A.rows()),
279  vari_ref_A_(ChainableStack::instance_->memalloc_.alloc_array<vari*>(
280  A.rows() * (A.rows() + 1) / 2)),
281  vari_ref_L_(ChainableStack::instance_->memalloc_.alloc_array<vari*>(
282  A.rows() * (A.rows() + 1) / 2)) {
283  size_t pos = 0;
284  for (size_type j = 0; j < M_; ++j) {
285  for (size_type i = j; i < M_; ++i) {
286  vari_ref_A_[pos] = A.coeffRef(i, j).vi_;
287  vari_ref_L_[pos] = new vari(L_A.coeffRef(i, j), false);
288  ++pos;
289  }
290  }
291  }
292 
299  inline void symbolic_rev(matrix_cl& L, matrix_cl& L_adj) {
300  L_adj = opencl::multiply<TriangularViewCL::Upper, TriangularViewCL::Entire>(
301  transpose(L), L_adj);
303  L = transpose(tri_inverse<TriangularViewCL::Lower>(L));
304  L_adj = L
306  TriangularViewCL::Entire>(L, L_adj));
308  }
309 
318  virtual void chain() {
319  const int packed_size = M_ * (M_ + 1) / 2;
320  std::vector<double> L_adj_cpu(packed_size);
321  std::vector<double> L_val_cpu(packed_size);
322 
323  for (size_type j = 0; j < packed_size; ++j) {
324  L_adj_cpu[j] = vari_ref_L_[j]->adj_;
325  L_val_cpu[j] = vari_ref_L_[j]->val_;
326  }
327  matrix_cl L = packed_copy<TriangularViewCL::Lower>(L_val_cpu, M_);
328  matrix_cl L_adj = packed_copy<TriangularViewCL::Lower>(L_adj_cpu, M_);
329  int block_size
331  block_size = std::max(block_size, 8);
332  block_size = std::min(
334  // The following is an OpenCL implementation of
335  // the chain() function from the cholesky_block
336  // vari class implementation
337  for (int k = M_; k > 0; k -= block_size) {
338  const int j = std::max(0, k - block_size);
339  const int k_j_ind = k - j;
340  const int m_k_ind = M_ - k;
341 
342  matrix_cl R(k_j_ind, j);
343  matrix_cl D(k_j_ind, k_j_ind);
344  matrix_cl B(m_k_ind, j);
345  matrix_cl C(m_k_ind, k_j_ind);
346 
347  matrix_cl R_adj(k_j_ind, j);
348  matrix_cl D_adj(k_j_ind, k_j_ind);
349  matrix_cl B_adj(m_k_ind, j);
350  matrix_cl C_adj(m_k_ind, k_j_ind);
351 
352  R.sub_block(L, j, 0, 0, 0, k_j_ind, j);
353  D.sub_block(L, j, j, 0, 0, k_j_ind, k_j_ind);
354  B.sub_block(L, k, 0, 0, 0, m_k_ind, j);
355  C.sub_block(L, k, j, 0, 0, m_k_ind, k_j_ind);
356 
357  R_adj.sub_block(L_adj, j, 0, 0, 0, k_j_ind, j);
358  D_adj.sub_block(L_adj, j, j, 0, 0, k_j_ind, k_j_ind);
359  B_adj.sub_block(L_adj, k, 0, 0, 0, m_k_ind, j);
360  C_adj.sub_block(L_adj, k, j, 0, 0, m_k_ind, k_j_ind);
361 
362  C_adj
363  = opencl::multiply<TriangularViewCL::Entire, TriangularViewCL::Lower>(
364  C_adj, tri_inverse<TriangularViewCL::Lower>(D));
365  B_adj = B_adj - C_adj * R;
366  D_adj = D_adj - transpose(C_adj) * C;
367 
368  symbolic_rev(D, D_adj);
369 
370  R_adj = R_adj - transpose(C_adj) * B - D_adj * R;
371  D_adj = diagonal_multiply(D_adj, 0.5);
373 
374  L_adj.sub_block(R_adj, 0, 0, j, 0, k_j_ind, j);
375  L_adj.sub_block(D_adj, 0, 0, j, j, k_j_ind, k_j_ind);
376  L_adj.sub_block(B_adj, 0, 0, k, 0, m_k_ind, j);
377  L_adj.sub_block(C_adj, 0, 0, k, j, m_k_ind, k_j_ind);
378  }
379  L_adj_cpu = packed_copy<TriangularViewCL::Lower>(L_adj);
380  for (size_type j = 0; j < packed_size; ++j) {
381  vari_ref_A_[j]->adj_ += L_adj_cpu[j];
382  }
383  }
384 };
385 #endif
386 
398 inline Eigen::Matrix<var, -1, -1> cholesky_decompose(
399  const Eigen::Matrix<var, -1, -1>& A) {
400  check_square("cholesky_decompose", "A", A);
401  Eigen::Matrix<double, -1, -1> L_A(value_of_rec(A));
402 #ifdef STAN_OPENCL
403  L_A = cholesky_decompose(L_A);
404 #else
405  check_symmetric("cholesky_decompose", "A", A);
406  Eigen::LLT<Eigen::Ref<Eigen::MatrixXd>, Eigen::Lower> L_factor(L_A);
407  check_pos_definite("cholesky_decompose", "m", L_factor);
408 #endif
409  // Memory allocated in arena.
410  // cholesky_scalar gradient faster for small matrices compared to
411  // cholesky_block
412  vari* dummy = new vari(0.0, false);
413  Eigen::Matrix<var, -1, -1> L(A.rows(), A.cols());
414  if (L_A.rows() <= 35) {
415  cholesky_scalar* baseVari = new cholesky_scalar(A, L_A);
416  size_t accum = 0;
417  size_t accum_i = accum;
418  for (size_type j = 0; j < L.cols(); ++j) {
419  for (size_type i = j; i < L.cols(); ++i) {
420  accum_i += i;
421  size_t pos = j + accum_i;
422  L.coeffRef(i, j).vi_ = baseVari->vari_ref_L_[pos];
423  }
424  for (size_type k = 0; k < j; ++k)
425  L.coeffRef(k, j).vi_ = dummy;
426  accum += j;
427  accum_i = accum;
428  }
429  } else {
430 #ifdef STAN_OPENCL
431  if (L_A.rows()
433  cholesky_opencl* baseVari = new cholesky_opencl(A, L_A);
435  } else {
436  cholesky_block* baseVari = new cholesky_block(A, L_A);
438  }
439 #else
440  cholesky_block* baseVari = new cholesky_block(A, L_A);
442 #endif
443  }
444 
445  return L;
446 }
447 } // namespace math
448 } // namespace stan
449 #endif
virtual void chain()
Reverse mode differentiation algorithm refernce:
int rows(const Eigen::Matrix< T, R, C > &m)
Return the number of rows in the specified matrix, vector, or row vector.
Definition: rows.hpp:20
cholesky_block(const Eigen::Matrix< var, -1, -1 > &A, const Eigen::Matrix< double, -1, -1 > &L_A)
Constructor for cholesky function.
int min(const std::vector< int > &x)
Returns the minimum coefficient in the specified column vector.
Definition: min.hpp:20
double value_of_rec(const fvar< T > &v)
Return the value of the specified variable.
void symbolic_rev(Block_ &L, Block_ &L_adj)
Symbolic adjoint calculation for cholesky factor A.
cholesky_scalar(const Eigen::Matrix< var, -1, -1 > &A, const Eigen::Matrix< double, -1, -1 > &L_A)
Constructor for cholesky function.
void check_square(const char *function, const char *name, const matrix_cl &y)
Check if the matrix_cl is square.
The variable implementation base class.
Definition: vari.hpp:30
The API to access the methods and values in opencl_context_base.
opencl_context_base::tuning_struct & tuning_opts()
Returns the thread block size for the Cholesky Decompositions L_11.
Independent (input) and dependent (output) variables for gradients.
Definition: var.hpp:33
void triangular_transpose()
Copies a lower/upper triangular of a matrix to it&#39;s upper/lower.
void cholesky_decompose(matrix_cl &A)
Performs an in-place of the the lower-triangular Cholesky factor (i.e., matrix square root) of the sp...
const double val_
The value of this variable.
Definition: vari.hpp:38
auto multiply(const matrix_cl &A, const matrix_cl &B)
Computes the product of the specified matrices with the option of specifying the triangularity of eit...
Definition: multiply.hpp:36
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic >::Index size_type
Type for sizes and indexes in an Eigen matrix with double e.
Definition: typedefs.hpp:11
void zeros()
Stores zeros in the matrix on the OpenCL device.
Definition: zeros.hpp:27
void sub_block(const matrix_cl &A, size_t A_i, size_t A_j, size_t this_i, size_t this_j, size_t nrows, size_t ncols)
Write the contents of A into this starting at the top left of this
Definition: sub_block.hpp:28
virtual void chain()
Reverse mode differentiation algorithm using OpenCL.
Represents a matrix on the OpenCL device.
Definition: matrix_cl.hpp:29
cholesky_opencl(const Eigen::Matrix< var, -1, -1 > &A, const Eigen::Matrix< double, -1, -1 > &L_A)
Constructor for OpenCL cholesky function.
void set_lower_tri_coeff_ref(Eigen::Matrix< var, -1, -1 > &L, vari **vari_ref)
Set the lower right triangular of a var matrix given a set of vari**.
int max(const std::vector< int > &x)
Returns the maximum coefficient in the specified column vector.
Definition: max.hpp:21
matrix_cl transpose(const matrix_cl &src)
Takes the transpose of the matrix on the OpenCL device.
Definition: transpose.hpp:20
void symbolic_rev(matrix_cl &L, matrix_cl &L_adj)
Symbolic adjoint calculation for cholesky factor A.
double adj_
The adjoint of this variable, which is the partial derivative of this variable with respect to the ro...
Definition: vari.hpp:44
virtual void chain()
Reverse mode differentiation algorithm refernce:
This struct always provides access to the autodiff stack using the singleton pattern.
void check_pos_definite(const char *function, const char *name, const Eigen::Matrix< T_y, -1, -1 > &y)
Check if the specified square, symmetric matrix is positive definite.
void check_symmetric(const char *function, const char *name, const matrix_cl &y)
Check if the matrix_cl is symmetric.
matrix_cl diagonal_multiply(const matrix_cl &A, const double scalar)
Multiplies the diagonal of a matrix on the OpenCL device with the specified scalar.
Eigen::Block< Eigen::MatrixXd > Block_

     [ Stan Home Page ] © 2011–2018, Stan Development Team.