Merge branch 'develop' into feature/issue-1221-opencl-rev-mdivide-left-tri-v2

febfbe9f · Rok Češnovar · GitHub · 5e75f25e · fb376954 · febfbe9f
Unverified Commit febfbe9f authored 5 years ago by Rok Češnovar Committed by GitHub 5 years ago
Hide whitespace changes
Inline Side-by-side

Showing

with 250 additions and 197 deletions
+250 -197
--- a/stan/math/fwd/mat/fun/Eigen_NumTraits.hpp
+++ b/stan/math/fwd/mat/fun/Eigen_NumTraits.hpp
@@ -52,14 +52,12 @@ struct NumTraits<stan::math::fvar<T>> : GenericNumTraits<stan::math::fvar<T>> {
  static int digits10() { return std::numeric_limits<double>::digits10; }
 };

-namespace internal {
-
 /**
 * Scalar product traits specialization for Eigen for forward-mode
 * autodiff variables.
 */
-template <typename T>
-struct scalar_product_traits<stan::math::fvar<T>, double> {
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<stan::math::fvar<T>, double, BinaryOp> {
  typedef stan::math::fvar<T> ReturnType;
 };

@@ -67,11 +65,10 @@ struct scalar_product_traits<stan::math::fvar<T>, double> {
 * Scalar product traits specialization for Eigen for forward-mode
 * autodiff variables.
 */
-template <typename T>
-struct scalar_product_traits<double, stan::math::fvar<T>> {
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<double, stan::math::fvar<T>, BinaryOp> {
  typedef stan::math::fvar<T> ReturnType;
 };
-}  // namespace internal

 }  // namespace Eigen
 #endif
--- a/stan/math/fwd/scal/fun/log_diff_exp.hpp
+++ b/stan/math/fwd/scal/fun/log_diff_exp.hpp
@@ -12,9 +12,11 @@ namespace math {

 template <typename T>
 inline fvar<T> log_diff_exp(const fvar<T>& x1, const fvar<T>& x2) {
-  using std::exp;
-  if (x1.val_ <= x2.val_)
+  if (x1.val_ <= x2.val_) {
+    if (x1.val_ < INFTY && x1.val_ == x2.val_)
+      return fvar<T>(NEGATIVE_INFTY, NOT_A_NUMBER);
    return fvar<T>(NOT_A_NUMBER, NOT_A_NUMBER);
+  }
  return fvar<T>(
      log_diff_exp(x1.val_, x2.val_),
      -(x1.d_ / expm1(x2.val_ - x1.val_) + x2.d_ / expm1(x1.val_ - x2.val_)));
@@ -22,17 +24,24 @@ inline fvar<T> log_diff_exp(const fvar<T>& x1, const fvar<T>& x2) {

 template <typename T1, typename T2>
 inline fvar<T2> log_diff_exp(const T1& x1, const fvar<T2>& x2) {
-  using std::exp;
-  if (x1 <= x2.val_)
+  if (x1 <= x2.val_) {
+    if (x1 < INFTY && x1 == x2.val_)
+      return fvar<T2>(NEGATIVE_INFTY, x2.d_ * NEGATIVE_INFTY);
    return fvar<T2>(NOT_A_NUMBER, NOT_A_NUMBER);
+  }
  return fvar<T2>(log_diff_exp(x1, x2.val_), -x2.d_ / expm1(x1 - x2.val_));
 }

 template <typename T1, typename T2>
 inline fvar<T1> log_diff_exp(const fvar<T1>& x1, const T2& x2) {
-  using std::exp;
-  if (x1.val_ <= x2)
+  if (x1.val_ <= x2) {
+    if (x1.val_ < INFTY && x1.val_ == x2) {
+      if (x2 == NEGATIVE_INFTY)
+        return fvar<T1>(NEGATIVE_INFTY, x1.d_);
+      return fvar<T1>(NEGATIVE_INFTY, x1.d_ * INFTY);
+    }
    return fvar<T1>(NOT_A_NUMBER, NOT_A_NUMBER);
+  }
  return fvar<T1>(log_diff_exp(x1.val_, x2), -x1.d_ / expm1(x2 - x1.val_));
 }
 }  // namespace math

--- a/stan/math/fwd/scal/fun/log_sum_exp.hpp
+++ b/stan/math/fwd/scal/fun/log_sum_exp.hpp
@@ -3,6 +3,7 @@

 #include <stan/math/fwd/meta.hpp>
 #include <stan/math/fwd/core.hpp>
+#include <stan/math/prim/scal/fun/constants.hpp>
 #include <stan/math/prim/scal/fun/log_sum_exp.hpp>

 namespace stan {
@@ -19,12 +20,16 @@ inline fvar<T> log_sum_exp(const fvar<T>& x1, const fvar<T>& x2) {
 template <typename T>
 inline fvar<T> log_sum_exp(double x1, const fvar<T>& x2) {
  using std::exp;
+  if (x1 == NEGATIVE_INFTY)
+    return fvar<T>(x2.val_, x2.d_);
  return fvar<T>(log_sum_exp(x1, x2.val_), x2.d_ / (exp(x1 - x2.val_) + 1));
 }

 template <typename T>
 inline fvar<T> log_sum_exp(const fvar<T>& x1, double x2) {
  using std::exp;
+  if (x2 == NEGATIVE_INFTY)
+    return fvar<T>(x1.val_, x1.d_);
  return fvar<T>(log_sum_exp(x1.val_, x2), x1.d_ / (1 + exp(x2 - x1.val_)));
 }


--- a/stan/math/opencl/matrix_cl.hpp
+++ b/stan/math/opencl/matrix_cl.hpp
@@ -271,6 +271,10 @@ class matrix_cl<T, enable_if_arithmetic<T>> {
    }
  }

+  template <typename Type>
+  using enable_if_eigen_base = std::enable_if_t<
+      std::is_base_of<Eigen::EigenBase<Type>, std::decay_t<Type>>::value>;
+
  /**
   * Constructor for the matrix_cl that
   * creates a copy of the Eigen matrix on the OpenCL device.
@@ -283,12 +287,11 @@ class matrix_cl<T, enable_if_arithmetic<T>> {
   * @throw <code>std::system_error</code> if the
   * matrices do not have matching dimensions
   */
-  explicit matrix_cl(
-      const Eigen::Ref<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>&
-          A,
-      matrix_cl_view partial_view = matrix_cl_view::Entire)
+  template <typename eigen_base, typename = enable_if_eigen_base<eigen_base>>
+  explicit matrix_cl(const eigen_base& A,
+                     matrix_cl_view partial_view = matrix_cl_view::Entire)
      : rows_(A.rows()), cols_(A.cols()), view_(partial_view) {
-    if (size() == 0) {
+    if (this->size() == 0) {
      return;
    }
    cl::Context& ctx = opencl_context.context();
@@ -297,7 +300,7 @@ class matrix_cl<T, enable_if_arithmetic<T>> {
      buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, sizeof(T) * A.size());
      cl::Event transfer_event;
      queue.enqueueWriteBuffer(buffer_cl_, CL_FALSE, 0, sizeof(T) * A.size(),
-                               A.data(), NULL, &transfer_event);
+                               A.eval().data(), NULL, &transfer_event);
      this->add_write_event(transfer_event);
    } catch (const cl::Error& e) {
      check_opencl_error("matrix constructor", e);

--- a/stan/math/opencl/matrix_cl_view.hpp
+++ b/stan/math/opencl/matrix_cl_view.hpp
@@ -70,7 +70,9 @@ inline const matrix_cl_view transpose(const matrix_cl_view view) {
 */
 inline const matrix_cl_view invert(const matrix_cl_view view) {
  typedef typename std::underlying_type<matrix_cl_view>::type underlying;
-  return static_cast<matrix_cl_view>(~static_cast<underlying>(view));
+  return static_cast<matrix_cl_view>(
+      static_cast<underlying>(matrix_cl_view::Entire)
+      & ~static_cast<underlying>(view));
 }

 /**

--- a/stan/math/prim/mat/fun/LDLT_factor.hpp
+++ b/stan/math/prim/mat/fun/LDLT_factor.hpp
@@ -99,19 +99,11 @@ class LDLT_factor {
    ldltP_->solveInPlace(invA);
  }

-#if EIGEN_VERSION_AT_LEAST(3, 3, 0)
  template <typename Rhs>
  inline const Eigen::Solve<ldlt_t, Rhs> solve(
      const Eigen::MatrixBase<Rhs>& b) const {
    return ldltP_->solve(b);
  }
-#else
-  template <typename Rhs>
-  inline const Eigen::internal::solve_retval<ldlt_t, Rhs> solve(
-      const Eigen::MatrixBase<Rhs>& b) const {
-    return ldltP_->solve(b);
-  }
-#endif

  inline matrix_t solveRight(const matrix_t& B) const {
    return ldltP_->solve(B.transpose()).transpose();

--- a/stan/math/prim/mat/fun/cholesky_decompose.hpp
+++ b/stan/math/prim/mat/fun/cholesky_decompose.hpp
@@ -6,10 +6,7 @@
 #include <stan/math/prim/mat/err/check_square.hpp>
 #include <stan/math/prim/mat/err/check_symmetric.hpp>
 #ifdef STAN_OPENCL
-#include <stan/math/opencl/opencl_context.hpp>
-#include <stan/math/opencl/err/check_symmetric.hpp>
-#include <stan/math/opencl/cholesky_decompose.hpp>
-#include <stan/math/opencl/copy.hpp>
+#include <stan/math/opencl/opencl.hpp>
 #endif

 #include <cmath>

--- a/stan/math/prim/mat/fun/log_sum_exp.hpp
+++ b/stan/math/prim/mat/fun/log_sum_exp.hpp
@@ -25,6 +25,8 @@ namespace math {
 template <int R, int C>
 double log_sum_exp(const Eigen::Matrix<double, R, C>& x) {
  const double max = x.maxCoeff();
+  if (!std::isfinite(max))
+    return max;
  return max + std::log((x.array() - max).exp().sum());
 }


--- a/stan/math/prim/mat/fun/multiply.hpp
+++ b/stan/math/prim/mat/fun/multiply.hpp
@@ -5,6 +5,9 @@
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/arr/err/check_matching_sizes.hpp>
 #include <stan/math/prim/mat/err/check_multiplicable.hpp>
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/opencl.hpp>
+#endif
 #include <type_traits>

 namespace stan {
@@ -55,7 +58,19 @@ template <int R1, int C1, int R2, int C2, typename T1, typename T2,
 inline Eigen::Matrix<return_type_t<T1, T2>, R1, C2> multiply(
    const Eigen::Matrix<T1, R1, C1>& m1, const Eigen::Matrix<T2, R2, C2>& m2) {
  check_multiplicable("multiply", "m1", m1, "m2", m2);
+#ifdef STAN_OPENCL
+  if (m1.rows() * m1.cols() * m2.cols()
+      > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+    matrix_cl<double> m1_cl(m1);
+    matrix_cl<double> m2_cl(m2);
+    matrix_cl<double> m3_cl = m1_cl * m2_cl;
+    return from_matrix_cl(m3_cl);
+  } else {
+    return m1 * m2;
+  }
+#else
  return m1 * m2;
+#endif
 }

 /**

--- a/stan/math/prim/scal/fun/log_diff_exp.hpp
+++ b/stan/math/prim/scal/fun/log_diff_exp.hpp
@@ -11,16 +11,16 @@ namespace math {

 /**
 * The natural logarithm of the difference of the natural exponentiation
- * of x1 and the natural exponentiation of x2
+ * of x and the natural exponentiation of y
 *
- * This function is only defined for x<0
+ * This function is only defined for x >= y
 *
 *
   \f[
   \mbox{log\_diff\_exp}(x, y) =
   \begin{cases}
-     \textrm{NaN} & \mbox{if } x \leq y\\
-     \ln(\exp(x)-\exp(y)) & \mbox{if } x > y \\[6pt]
+     \textrm{NaN} & \mbox{if } x < y\\
+     \ln(\exp(x)-\exp(y)) & \mbox{if } x \geq y \\[6pt]
     \textrm{NaN} & \mbox{if } x = \textrm{NaN or } y = \textrm{NaN}
   \end{cases}
   \f]
@@ -47,7 +47,7 @@ namespace math {
 template <typename T1, typename T2>
 inline return_type_t<T1, T2> log_diff_exp(const T1 x, const T2 y) {
  if (x <= y)
-    return NOT_A_NUMBER;
+    return (x < INFTY && x == y) ? NEGATIVE_INFTY : NOT_A_NUMBER;
  return x + log1m_exp(y - x);
 }


--- a/stan/math/prim/scal/fun/log_sum_exp.hpp
+++ b/stan/math/prim/scal/fun/log_sum_exp.hpp
@@ -3,8 +3,8 @@

 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/scal/fun/log1p_exp.hpp>
+#include <stan/math/prim/scal/fun/constants.hpp>
 #include <boost/math/tools/promotion.hpp>
-#include <limits>

 namespace stan {
 namespace math {
@@ -46,7 +46,10 @@ namespace math {
 */
 template <typename T1, typename T2>
 inline return_type_t<T1, T2> log_sum_exp(const T2& a, const T1& b) {
-  using std::exp;
+  if (a == NEGATIVE_INFTY)
+    return b;
+  if (a == INFTY && b == INFTY)
+    return INFTY;
  if (a > b)
    return a + log1p_exp(b - a);
  return b + log1p_exp(a - b);

--- a/stan/math/rev/mat/fun/Eigen_NumTraits.hpp
+++ b/stan/math/rev/mat/fun/Eigen_NumTraits.hpp
@@ -79,33 +79,32 @@ struct NumTraits<stan::math::var> : GenericNumTraits<stan::math::var> {
  static int digits10() { return std::numeric_limits<double>::digits10; }
 };

-namespace internal {
 /**
- * Partial specialization of Eigen's remove_all struct to stop
- * Eigen removing pointer from vari* variables
+ * Scalar product traits specialization for Eigen for reverse-mode
+ * autodiff variables.
 */
-template <>
-struct remove_all<stan::math::vari*> {
-  typedef stan::math::vari* type;
+template <typename BinaryOp>
+struct ScalarBinaryOpTraits<stan::math::var, double, BinaryOp> {
+  typedef stan::math::var ReturnType;
 };

-#if EIGEN_VERSION_AT_LEAST(3, 3, 0)
 /**
 * Scalar product traits specialization for Eigen for reverse-mode
 * autodiff variables.
 */
-template <>
-struct scalar_product_traits<stan::math::var, double> {
+template <typename BinaryOp>
+struct ScalarBinaryOpTraits<double, stan::math::var, BinaryOp> {
  typedef stan::math::var ReturnType;
 };

+namespace internal {
 /**
- * Scalar product traits specialization for Eigen for reverse-mode
- * autodiff variables.
+ * Partial specialization of Eigen's remove_all struct to stop
+ * Eigen removing pointer from vari* variables
 */
 template <>
-struct scalar_product_traits<double, stan::math::var> {
-  typedef stan::math::var ReturnType;
+struct remove_all<stan::math::vari*> {
+  typedef stan::math::vari* type;
 };

 /**
@@ -248,129 +247,6 @@ struct general_matrix_matrix_product<Index, stan::math::var, LhsStorageOrder,
        alpha, blocking, info);
  }
 };
-#else
-/**
- * Implemented this for printing to stream.
- */
-template <>
-struct significant_decimals_default_impl<stan::math::var, false> {
-  static inline int run() {
-    using std::ceil;
-    using std::log;
-    return cast<double, int>(
-        ceil(-log(std::numeric_limits<double>::epsilon()) / log(10.0)));
-  }
-};
-
-/**
- * Scalar product traits override for Eigen for automatic
- * gradient variables.
- */
-template <>
-struct scalar_product_traits<stan::math::var, double> {
-  typedef stan::math::var ReturnType;
-};
-
-/**
- * Scalar product traits override for Eigen for automatic
- * gradient variables.
- */
-template <>
-struct scalar_product_traits<double, stan::math::var> {
-  typedef stan::math::var ReturnType;
-};
-
-/**
- * Override matrix-vector and matrix-matrix products to use more efficient
- * implementation.
- */
-template <typename Index, bool ConjugateLhs, bool ConjugateRhs>
-struct general_matrix_vector_product<Index, stan::math::var, ColMajor,
-                                     ConjugateLhs, stan::math::var,
-                                     ConjugateRhs> {
-  typedef stan::math::var LhsScalar;
-  typedef stan::math::var RhsScalar;
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType
-      ResScalar;
-  enum { LhsStorageOrder = ColMajor };
-
-  EIGEN_DONT_INLINE static void run(Index rows, Index cols,
-                                    const LhsScalar* lhs, Index lhsStride,
-                                    const RhsScalar* rhs, Index rhsIncr,
-                                    ResScalar* res, Index resIncr,
-                                    const ResScalar& alpha) {
-    for (Index i = 0; i < rows; i++) {
-      res[i * resIncr] += stan::math::var(new stan::math::gevv_vvv_vari(
-          &alpha,
-          (static_cast<int>(LhsStorageOrder) == static_cast<int>(ColMajor))
-              ? (&lhs[i])
-              : (&lhs[i * lhsStride]),
-          (static_cast<int>(LhsStorageOrder) == static_cast<int>(ColMajor))
-              ? (lhsStride)
-              : (1),
-          rhs, rhsIncr, cols));
-    }
-  }
-};
-template <typename Index, bool ConjugateLhs, bool ConjugateRhs>
-struct general_matrix_vector_product<Index, stan::math::var, RowMajor,
-                                     ConjugateLhs, stan::math::var,
-                                     ConjugateRhs> {
-  typedef stan::math::var LhsScalar;
-  typedef stan::math::var RhsScalar;
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType
-      ResScalar;
-  enum { LhsStorageOrder = RowMajor };
-
-  EIGEN_DONT_INLINE static void run(Index rows, Index cols,
-                                    const LhsScalar* lhs, Index lhsStride,
-                                    const RhsScalar* rhs, Index rhsIncr,
-                                    ResScalar* res, Index resIncr,
-                                    const RhsScalar& alpha) {
-    for (Index i = 0; i < rows; i++) {
-      res[i * resIncr] += stan::math::var(new stan::math::gevv_vvv_vari(
-          &alpha,
-          (static_cast<int>(LhsStorageOrder) == static_cast<int>(ColMajor))
-              ? (&lhs[i])
-              : (&lhs[i * lhsStride]),
-          (static_cast<int>(LhsStorageOrder) == static_cast<int>(ColMajor))
-              ? (lhsStride)
-              : (1),
-          rhs, rhsIncr, cols));
-    }
-  }
-};
-template <typename Index, int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
-struct general_matrix_matrix_product<Index, stan::math::var, LhsStorageOrder,
-                                     ConjugateLhs, stan::math::var,
-                                     RhsStorageOrder, ConjugateRhs, ColMajor> {
-  typedef stan::math::var LhsScalar;
-  typedef stan::math::var RhsScalar;
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType
-      ResScalar;
-  static void run(Index rows, Index cols, Index depth, const LhsScalar* lhs,
-                  Index lhsStride, const RhsScalar* rhs, Index rhsStride,
-                  ResScalar* res, Index resStride, const ResScalar& alpha,
-                  level3_blocking<LhsScalar, RhsScalar>& /* blocking */,
-                  GemmParallelInfo<Index>* /* info = 0 */) {
-    for (Index i = 0; i < cols; i++) {
-      general_matrix_vector_product<
-          Index, LhsScalar, LhsStorageOrder, ConjugateLhs, RhsScalar,
-          ConjugateRhs>::run(rows, depth, lhs, lhsStride,
-                             &rhs[(static_cast<int>(RhsStorageOrder)
-                                   == static_cast<int>(ColMajor))
-                                      ? (i * rhsStride)
-                                      : (i)],
-                             (static_cast<int>(RhsStorageOrder)
-                              == static_cast<int>(ColMajor))
-                                 ? (1)
-                                 : (rhsStride),
-                             &res[i * resStride], 1, alpha);
-    }
-  }
-};
-#endif
 }  // namespace internal
 }  // namespace Eigen
 #endif
--- a/stan/math/rev/mat/fun/log_sum_exp.hpp
+++ b/stan/math/rev/mat/fun/log_sum_exp.hpp
@@ -6,6 +6,7 @@
 #include <stan/math/rev/mat/fun/typedefs.hpp>
 #include <stan/math/prim/scal/fun/log_sum_exp.hpp>
 #include <stan/math/prim/mat/fun/Eigen.hpp>
+#include <cmath>
 #include <limits>

 namespace stan {
@@ -19,6 +20,8 @@ namespace internal {
 template <int R, int C>
 inline double log_sum_exp_as_double(const Eigen::Matrix<var, R, C>& x) {
  const double max = x.val().maxCoeff();
+  if (!std::isfinite(max))
+    return max;
  return max + std::log((x.val().array() - max).exp().sum());
 }


--- a/stan/math/rev/mat/fun/multiply.hpp
+++ b/stan/math/rev/mat/fun/multiply.hpp
@@ -85,20 +85,53 @@ class multiply_mat_vari : public vari {
    Map<matrix_d> Bd(Bd_, A_cols_, B_cols_);
    Ad = A.val();
    Bd = B.val();
-
+#ifdef STAN_OPENCL
+    if (Ad.rows() * Ad.cols() * Bd.cols()
+        > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+      matrix_cl<double> Ad_cl(Ad);
+      matrix_cl<double> Bd_cl(Bd);
+      matrix_cl<double> variRefAB_cl = Ad_cl * Bd_cl;
+      matrix_d temp = from_matrix_cl(variRefAB_cl);
+      Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
+          = temp.unaryExpr([](double x) { return new vari(x, false); });
+    } else {
+      Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
+          = (Ad * Bd).unaryExpr([](double x) { return new vari(x, false); });
+    }
+#else
    Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
        = (Ad * Bd).unaryExpr([](double x) { return new vari(x, false); });
+#endif
  }

  virtual void chain() {
    using Eigen::Map;
    matrix_d adjAB(A_rows_, B_cols_);
-
    adjAB = Map<matrix_vi>(variRefAB_, A_rows_, B_cols_).adj();
+#ifdef STAN_OPENCL
+    if (A_rows_ * A_cols_ * B_cols_
+        > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+      matrix_cl<double> adjAB_cl(adjAB);
+      matrix_cl<double> Ad_cl(Ad_, A_rows_, A_cols_);
+      matrix_cl<double> Bd_cl(Bd_, A_cols_, B_cols_);
+      matrix_cl<double> variRefA_cl = adjAB_cl * transpose(Bd_cl);
+      matrix_cl<double> variRefB_cl = transpose(Ad_cl) * adjAB_cl;
+      matrix_d temp_variRefA = from_matrix_cl(variRefA_cl);
+      matrix_d temp_variRefB = from_matrix_cl(variRefB_cl);
+      Map<matrix_vi>(variRefA_, A_rows_, A_cols_).adj() += temp_variRefA;
+      Map<matrix_vi>(variRefB_, A_cols_, B_cols_).adj() += temp_variRefB;
+    } else {
+      Map<matrix_vi>(variRefA_, A_rows_, A_cols_).adj()
+          += adjAB * Map<matrix_d>(Bd_, A_cols_, B_cols_).transpose();
+      Map<matrix_vi>(variRefB_, A_cols_, B_cols_).adj()
+          += Map<matrix_d>(Ad_, A_rows_, A_cols_).transpose() * adjAB;
+    }
+#else
    Map<matrix_vi>(variRefA_, A_rows_, A_cols_).adj()
        += adjAB * Map<matrix_d>(Bd_, A_cols_, B_cols_).transpose();
    Map<matrix_vi>(variRefB_, A_cols_, B_cols_).adj()
        += Map<matrix_d>(Ad_, A_rows_, A_cols_).transpose() * adjAB;
+#endif
  }
 };

@@ -238,17 +271,44 @@ class multiply_mat_vari<double, Ra, Ca, Tb, Cb> : public vari {
    Map<matrix_d> Bd(Bd_, A_cols_, B_cols_);
    Ad = A;
    Bd = B.val();
-
+#ifdef STAN_OPENCL
+    if (Ad.rows() * Ad.cols() * Bd.cols()
+        > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+      matrix_cl<double> Ad_cl(Ad);
+      matrix_cl<double> Bd_cl(Bd);
+      matrix_cl<double> variRefAB_cl = Ad_cl * Bd_cl;
+      matrix_d temp = from_matrix_cl(variRefAB_cl);
+      Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
+          = temp.unaryExpr([](double x) { return new vari(x, false); });
+    } else {
+      Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
+          = (Ad * Bd).unaryExpr([](double x) { return new vari(x, false); });
+    }
+#else
    Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
        = (Ad * Bd).unaryExpr([](double x) { return new vari(x, false); });
+#endif
  }

  virtual void chain() {
    using Eigen::Map;
    matrix_d adjAB = Map<matrix_vi>(variRefAB_, A_rows_, B_cols_).adj();
-
+#ifdef STAN_OPENCL
+    if (A_rows_ * A_cols_ * B_cols_
+        > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+      matrix_cl<double> adjAB_cl(adjAB);
+      matrix_cl<double> Ad_cl(Ad_, A_rows_, A_cols_);
+      matrix_cl<double> variRefB_cl = transpose(Ad_cl) * adjAB_cl;
+      matrix_d temp_variRefB = from_matrix_cl(variRefB_cl);
+      Map<matrix_vi>(variRefB_, A_cols_, B_cols_).adj() += temp_variRefB;
+    } else {
+      Map<matrix_vi>(variRefB_, A_cols_, B_cols_).adj()
+          += Map<matrix_d>(Ad_, A_rows_, A_cols_).transpose() * adjAB;
+    }
+#else
    Map<matrix_vi>(variRefB_, A_cols_, B_cols_).adj()
        += Map<matrix_d>(Ad_, A_rows_, A_cols_).transpose() * adjAB;
+#endif
  }
 };

@@ -381,17 +441,44 @@ class multiply_mat_vari<Ta, Ra, Ca, double, Cb> : public vari {
    Map<matrix_d> Bd(Bd_, A_cols_, B_cols_);
    Ad = A.val();
    Bd = B.val();
-
+#ifdef STAN_OPENCL
+    if (Ad.rows() * Ad.cols() * Bd.cols()
+        > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+      matrix_cl<double> Ad_cl(Ad);
+      matrix_cl<double> Bd_cl(Bd);
+      matrix_cl<double> variRefAB_cl = Ad_cl * Bd_cl;
+      matrix_d temp = from_matrix_cl(variRefAB_cl);
+      Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
+          = temp.unaryExpr([](double x) { return new vari(x, false); });
+    } else {
+      Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
+          = (Ad * Bd).unaryExpr([](double x) { return new vari(x, false); });
+    }
+#else
    Map<matrix_vi>(variRefAB_, A_rows_, B_cols_)
        = (Ad * Bd).unaryExpr([](double x) { return new vari(x, false); });
+#endif
  }

  virtual void chain() {
    using Eigen::Map;
    matrix_d adjAB = Map<matrix_vi>(variRefAB_, A_rows_, B_cols_).adj();
-
+#ifdef STAN_OPENCL
+    if (A_rows_ * A_cols_ * B_cols_
+        > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
+      matrix_cl<double> adjAB_cl(adjAB);
+      matrix_cl<double> Bd_cl(Bd_, A_cols_, B_cols_);
+      matrix_cl<double> variRefA_cl = adjAB_cl * transpose(Bd_cl);
+      matrix_d temp_variRefA = from_matrix_cl(variRefA_cl);
+      Map<matrix_vi>(variRefA_, A_rows_, A_cols_).adj() += temp_variRefA;
+    } else {
+      Map<matrix_vi>(variRefA_, A_rows_, A_cols_).adj()
+          += adjAB * Map<matrix_d>(Bd_, A_cols_, B_cols_).transpose();
+    }
+#else
    Map<matrix_vi>(variRefA_, A_rows_, A_cols_).adj()
        += adjAB * Map<matrix_d>(Bd_, A_cols_, B_cols_).transpose();
+#endif
  }
 };


--- a/stan/math/rev/scal/fun/log_diff_exp.hpp
+++ b/stan/math/rev/scal/fun/log_diff_exp.hpp
@@ -4,6 +4,7 @@
 #include <stan/math/rev/meta.hpp>
 #include <stan/math/rev/core.hpp>
 #include <stan/math/rev/scal/fun/calculate_chain.hpp>
+#include <stan/math/prim/scal/fun/constants.hpp>
 #include <stan/math/prim/scal/fun/expm1.hpp>
 #include <stan/math/prim/scal/fun/log_diff_exp.hpp>

@@ -24,13 +25,23 @@ class log_diff_exp_vd_vari : public op_vd_vari {
 public:
  log_diff_exp_vd_vari(vari* avi, double b)
      : op_vd_vari(log_diff_exp(avi->val_, b), avi, b) {}
-  void chain() { avi_->adj_ += adj_ * calculate_chain(avi_->val_, val_); }
+  void chain() {
+    if (val_ == NEGATIVE_INFTY)
+      avi_->adj_ += (bd_ == NEGATIVE_INFTY) ? adj_ : adj_ * INFTY;
+    else
+      avi_->adj_ += adj_ * calculate_chain(avi_->val_, val_);
+  }
 };
 class log_diff_exp_dv_vari : public op_dv_vari {
 public:
  log_diff_exp_dv_vari(double a, vari* bvi)
      : op_dv_vari(log_diff_exp(a, bvi->val_), a, bvi) {}
-  void chain() { bvi_->adj_ -= adj_ / expm1(ad_ - bvi_->val_); }
+  void chain() {
+    if (val_ == NEGATIVE_INFTY)
+      bvi_->adj_ -= adj_ * INFTY;
+    else
+      bvi_->adj_ -= adj_ / expm1(ad_ - bvi_->val_);
+  }
 };
 }  // namespace internal

@@ -39,7 +50,7 @@ class log_diff_exp_dv_vari : public op_dv_vari {
 *
 * @param[in] a First argument.
 * @param[in] b Second argument.
- * @return Log difference of the expnoentiated arguments.
+ * @return Log difference of the exponentiated arguments.
 */
 inline var log_diff_exp(const var& a, const var& b) {
  return var(new internal::log_diff_exp_vv_vari(a.vi_, b.vi_));
@@ -50,7 +61,7 @@ inline var log_diff_exp(const var& a, const var& b) {
 *
 * @param[in] a First argument.
 * @param[in] b Second argument.
- * @return Log difference of the expnoentiated arguments.
+ * @return Log difference of the exponentiated arguments.
 */
 inline var log_diff_exp(const var& a, double b) {
  return var(new internal::log_diff_exp_vd_vari(a.vi_, b));
@@ -61,7 +72,7 @@ inline var log_diff_exp(const var& a, double b) {
 *
 * @param[in] a First argument.
 * @param[in] b Second argument.
- * @return Log difference of the expnoentiated arguments.
+ * @return Log difference of the exponentiated arguments.
 */
 inline var log_diff_exp(double a, const var& b) {
  return var(new internal::log_diff_exp_dv_vari(a, b.vi_));

--- a/stan/math/rev/scal/fun/log_sum_exp.hpp
+++ b/stan/math/rev/scal/fun/log_sum_exp.hpp
@@ -4,6 +4,7 @@
 #include <stan/math/rev/meta.hpp>
 #include <stan/math/rev/core.hpp>
 #include <stan/math/rev/scal/fun/calculate_chain.hpp>
+#include <stan/math/prim/scal/fun/constants.hpp>
 #include <stan/math/prim/scal/fun/log_sum_exp.hpp>

 namespace stan {
@@ -24,13 +25,23 @@ class log_sum_exp_vd_vari : public op_vd_vari {
 public:
  log_sum_exp_vd_vari(vari* avi, double b)
      : op_vd_vari(log_sum_exp(avi->val_, b), avi, b) {}
-  void chain() { avi_->adj_ += adj_ * calculate_chain(avi_->val_, val_); }
+  void chain() {
+    if (val_ == NEGATIVE_INFTY)
+      avi_->adj_ += adj_;
+    else
+      avi_->adj_ += adj_ * calculate_chain(avi_->val_, val_);
+  }
 };
 class log_sum_exp_dv_vari : public op_dv_vari {
 public:
  log_sum_exp_dv_vari(double a, vari* bvi)
      : op_dv_vari(log_sum_exp(a, bvi->val_), a, bvi) {}
-  void chain() { bvi_->adj_ += adj_ * calculate_chain(bvi_->val_, val_); }
+  void chain() {
+    if (val_ == NEGATIVE_INFTY)
+      bvi_->adj_ += adj_;
+    else
+      bvi_->adj_ += adj_ * calculate_chain(bvi_->val_, val_);
+  }
 };

 }  // namespace internal

--- a/test/unit/math/fwd/scal/fun/log_diff_exp_test.cpp
+++ b/test/unit/math/fwd/scal/fun/log_diff_exp_test.cpp
 #include <stan/math/fwd/scal.hpp>
 #include <gtest/gtest.h>
 #include <test/unit/math/fwd/scal/fun/nan_util.hpp>
+#include <limits>

 TEST(AgradFwdLogDiffExp, Fvar) {
  using stan::math::fvar;
@@ -25,6 +26,26 @@ TEST(AgradFwdLogDiffExp, Fvar) {
  fvar<double> c = log_diff_exp(z, y);
  EXPECT_FLOAT_EQ(log_diff_exp(1.1, 0.5), c.val_);
  EXPECT_FLOAT_EQ(2 / (1 - exp(1.1 - 0.5)), c.d_);
+
+  double ninf = -std::numeric_limits<double>::infinity();
+  fvar<double> d = log_diff_exp(x, ninf);
+  EXPECT_FLOAT_EQ(x.val_, d.val_);
+  EXPECT_FLOAT_EQ(1.0, d.d_);
+
+  fvar<double> e = log_diff_exp(x, 1.2);
+  EXPECT_FLOAT_EQ(-std::numeric_limits<double>::infinity(), e.val_);
+  EXPECT_FLOAT_EQ(+std::numeric_limits<double>::infinity(), e.d_);
+
+  fvar<double> w(-std::numeric_limits<double>::infinity());
+  w.d_ = 1.0;
+
+  fvar<double> g = log_diff_exp(w, ninf);
+  EXPECT_FLOAT_EQ(w.val_, g.val_);
+  EXPECT_FLOAT_EQ(1.0, g.d_);
+
+  fvar<double> f = log_diff_exp(0.5, y);
+  EXPECT_FLOAT_EQ(-std::numeric_limits<double>::infinity(), f.val_);
+  EXPECT_FLOAT_EQ(-std::numeric_limits<double>::infinity(), f.d_);
 }

 TEST(AgradFwdLogDiffExp, AgradFvar_exception) {

--- a/test/unit/math/fwd/scal/fun/log_sum_exp_test.cpp
+++ b/test/unit/math/fwd/scal/fun/log_sum_exp_test.cpp
 #include <stan/math/fwd/scal.hpp>
 #include <gtest/gtest.h>
 #include <test/unit/math/fwd/scal/fun/nan_util.hpp>
+#include <limits>

 TEST(AgradFwdLogSumExp, Fvar) {
  using stan::math::fvar;
@@ -23,6 +24,17 @@ TEST(AgradFwdLogSumExp, Fvar) {
  fvar<double> c = log_sum_exp(z, x);
  EXPECT_FLOAT_EQ(log_sum_exp(1.4, 0.5), c.val_);
  EXPECT_FLOAT_EQ(1.0 * exp(0.5) / (exp(0.5) + exp(1.4)), c.d_);
+
+  double ninf = -std::numeric_limits<double>::infinity();
+  fvar<double> w(-std::numeric_limits<double>::infinity(), 1.5);
+
+  fvar<double> d = log_sum_exp(w, ninf);
+  EXPECT_FLOAT_EQ(-std::numeric_limits<double>::infinity(), d.val_);
+  EXPECT_FLOAT_EQ(1.5, d.d_);
+
+  fvar<double> e = log_sum_exp(ninf, w);
+  EXPECT_FLOAT_EQ(-std::numeric_limits<double>::infinity(), e.val_);
+  EXPECT_FLOAT_EQ(1.5, e.d_);
 }

 TEST(AgradFwdLogSumExp, FvarFvarDouble) {

--- a/test/unit/math/opencl/matrix_cl_view.cpp
+++ b/test/unit/math/opencl/matrix_cl_view.cpp
@@ -93,15 +93,13 @@ TEST(matrix_cl_view, invert) {
  EXPECT_EQ(matrix_cl_view::Diagonal, invert(matrix_cl_view::Entire));
 }

-TEST(matrix_cl_view, from_eigen_triangular_type) {
-  using stan::math::from_eigen_triangular_type;
+TEST(matrix_cl_view, from_eigen_uplo_type) {
+  using stan::math::from_eigen_uplo_type;
  using stan::math::matrix_cl_view;
-  EXPECT_EQ(matrix_cl_view::Lower, from_eigen_triangular_type(Eigen::Lower));
-  EXPECT_EQ(matrix_cl_view::Upper, from_eigen_triangular_type(Eigen::Upper));
-  EXPECT_EQ(matrix_cl_view::Entire,
-            from_eigen_triangular_type(Eigen::SelfAdjoint));
-  EXPECT_EQ(matrix_cl_view::Entire,
-            from_eigen_triangular_type(Eigen::UnitDiag));
+  EXPECT_EQ(matrix_cl_view::Lower, from_eigen_uplo_type(Eigen::Lower));
+  EXPECT_EQ(matrix_cl_view::Upper, from_eigen_uplo_type(Eigen::Upper));
+  EXPECT_EQ(matrix_cl_view::Entire, from_eigen_uplo_type(Eigen::SelfAdjoint));
+  EXPECT_EQ(matrix_cl_view::Entire, from_eigen_uplo_type(Eigen::UnitDiag));
 }

 #endif
--- a/test/unit/math/prim/mat/fun/log_sum_exp_test.cpp
+++ b/test/unit/math/prim/mat/fun/log_sum_exp_test.cpp
 #include <stan/math/prim/mat.hpp>
 #include <gtest/gtest.h>
+#include <limits>

 template <int R, int C>
 void test_log_sum_exp(const Eigen::Matrix<double, R, C>& as) {
@@ -25,11 +26,19 @@ TEST(MathFunctions, log_sum_exp) {
  v << 1, 2, 3;
  test_log_sum_exp(v);

-  Matrix<double, Dynamic, 1> rv(3);
+  Matrix<double, 1, Dynamic> rv(3);
  rv << 1, 2, 3;
  test_log_sum_exp(rv);

  Matrix<double, Dynamic, Dynamic> m_trivial(1, 1);
  m_trivial << 2;
  EXPECT_FLOAT_EQ(2, log_sum_exp(m_trivial));
+
+  Matrix<double, Dynamic, 1> i(3);
+  i << 1, 2, -std::numeric_limits<double>::infinity();
+  test_log_sum_exp(i);
+
+  Matrix<double, Dynamic, 1> ii(1);
+  ii << -std::numeric_limits<double>::infinity();
+  test_log_sum_exp(ii);
 }