Stan Math Library  2.20.0
reverse mode automatic differentiation
categorical_logit_glm_lpmf.hpp
Go to the documentation of this file.
1 #ifndef STAN_MATH_PRIM_MAT_PROB_CATEGORICAL_LOGIT_GLM_LPMF_HPP
2 #define STAN_MATH_PRIM_MAT_PROB_CATEGORICAL_LOGIT_GLM_LPMF_HPP
3 
8 #include <Eigen/Core>
9 
10 namespace stan {
11 namespace math {
12 
36 template <bool propto, typename T_y, typename T_x_scalar, int T_x_rows,
37  typename T_alpha_scalar, typename T_beta_scalar>
40  const T_y& y, const Eigen::Matrix<T_x_scalar, T_x_rows, Eigen::Dynamic>& x,
41  const Eigen::Matrix<T_alpha_scalar, Eigen::Dynamic, 1>& alpha,
42  const Eigen::Matrix<T_beta_scalar, Eigen::Dynamic, Eigen::Dynamic>& beta) {
43  typedef typename stan::partials_return_type<
44  T_x_scalar, T_alpha_scalar, T_beta_scalar>::type T_partials_return;
45  static const char* function = "categorical_logit_glm_lpmf";
46 
47  using Eigen::Array;
48  using Eigen::Dynamic;
49  using Eigen::Matrix;
50  using std::exp;
51  using std::log;
52 
53  const size_t N_instances = T_x_rows == 1 ? length(y) : x.rows();
54  const size_t N_attributes = x.cols();
55  const size_t N_classes = beta.cols();
56 
57  if (is_vector<T_y>::value && T_x_rows != 1) {
58  check_consistent_size(function, "Vector of dependent variables", y,
59  N_instances);
60  }
61  check_consistent_size(function, "Intercept vector", alpha, N_classes);
62  check_size_match(function, "x.cols()", N_attributes, "beta.rows()",
63  beta.rows());
64  check_bounded(function, "categorical outcome out of support", y, 1,
65  N_classes);
66 
67  if (size_zero(y, x, beta))
68  return 0;
69 
70  if (!include_summand<propto, T_x_scalar, T_alpha_scalar,
71  T_beta_scalar>::value)
72  return 0;
73 
74  const auto& x_val = value_of_rec(x);
75  const auto& beta_val = value_of_rec(beta);
76  const auto& alpha_val = value_of_rec(alpha);
77 
78  const auto& alpha_val_vec = as_column_vector_or_scalar(alpha_val).transpose();
79 
80  Array<T_partials_return, T_x_rows, Dynamic> lin
81  = (x_val * beta_val).rowwise() + alpha_val_vec;
82  Array<T_partials_return, T_x_rows, 1> lin_max
83  = lin.rowwise()
84  .maxCoeff(); // This is used to prevent overflow when
85  // calculating softmax/log_sum_exp and similar
86  // expressions
87  Array<T_partials_return, T_x_rows, Dynamic> exp_lin
88  = exp(lin.colwise() - lin_max);
89  Array<T_partials_return, T_x_rows, 1> inv_sum_exp_lin
90  = 1 / exp_lin.rowwise().sum();
91 
92  T_partials_return logp = log(inv_sum_exp_lin).sum() - lin_max.sum();
93  if (T_x_rows == 1) {
94  logp *= N_instances;
95  }
96  scalar_seq_view<T_y> y_seq(y);
97  for (int i = 0; i < N_instances; i++) {
98  if (T_x_rows == 1) {
99  logp += lin(0, y_seq[i] - 1);
100  } else {
101  logp += lin(i, y_seq[i] - 1);
102  }
103  }
104  // TODO(Tadej) maybe we can replace previous block with the following line
105  // when we have newer Eigen T_partials_return logp =
106  // lin(Eigen::all,y-1).sum() + log(inv_sum_exp_lin).sum() - lin_max.sum();
107 
108  if (!std::isfinite(logp)) {
109  check_finite(function, "Weight vector", beta);
110  check_finite(function, "Intercept", alpha);
111  check_finite(function, "Matrix of independent variables", x);
112  }
113 
114  // Compute the derivatives.
116  Matrix<T_alpha_scalar, Dynamic, 1>,
117  Matrix<T_beta_scalar, Dynamic, Dynamic>>
118  ops_partials(x, alpha, beta);
119 
121  if (T_x_rows == 1) {
122  Array<double, 1, Dynamic> beta_y = beta_val.col(y_seq[0] - 1);
123  for (int i = 1; i < N_instances; i++) {
124  beta_y += beta_val.col(y_seq[i] - 1).array();
125  }
126  ops_partials.edge1_.partials_
127  = beta_y
128  - (exp_lin.matrix() * beta_val.transpose()).array().colwise()
129  * inv_sum_exp_lin * N_instances;
130  } else {
131  Array<double, Dynamic, Dynamic> beta_y(N_instances, N_attributes);
132  for (int i = 0; i < N_instances; i++) {
133  beta_y.row(i) = beta_val.col(y_seq[i] - 1);
134  }
135  ops_partials.edge1_.partials_
136  = beta_y
137  - (exp_lin.matrix() * beta_val.transpose()).array().colwise()
138  * inv_sum_exp_lin;
139  // TODO(Tadej) maybe we can replace previous block with the following line
140  // when we have newer Eigen ops_partials.edge1_.partials_ = beta_val(y -
141  // 1, all) - (exp_lin.matrix() * beta.transpose()).colwise() *
142  // inv_sum_exp_lin;
143  }
144  }
147  Array<T_partials_return, T_x_rows, Dynamic> neg_softmax_lin
148  = exp_lin.colwise() * -inv_sum_exp_lin;
150  if (T_x_rows == 1) {
151  ops_partials.edge2_.partials_
152  = neg_softmax_lin.colwise().sum() * N_instances;
153  } else {
154  ops_partials.edge2_.partials_ = neg_softmax_lin.colwise().sum();
155  }
156  for (int i = 0; i < N_instances; i++) {
157  ops_partials.edge2_.partials_[y_seq[i] - 1] += 1;
158  }
159  }
161  Matrix<T_partials_return, Dynamic, Dynamic> beta_derivative;
162  if (T_x_rows == 1) {
163  beta_derivative
164  = x_val.transpose() * neg_softmax_lin.matrix() * N_instances;
165  } else {
166  beta_derivative = x_val.transpose() * neg_softmax_lin.matrix();
167  }
168 
169  for (int i = 0; i < N_instances; i++) {
170  if (T_x_rows == 1) {
171  beta_derivative.col(y_seq[i] - 1) += x_val;
172  } else {
173  beta_derivative.col(y_seq[i] - 1) += x_val.row(i);
174  }
175  }
176  // TODO(Tadej) maybe we can replace previous loop with the following line
177  // when we have newer Eigen ops_partials.edge3_.partials_(Eigen::all, y -
178  // 1) += x_val.colwise.sum().transpose();
179 
180  ops_partials.edge3_.partials_ = std::move(beta_derivative);
181  }
182  }
183  return ops_partials.build(logp);
184 }
185 
186 template <typename T_y, typename T_x_scalar, int T_x_rows,
187  typename T_alpha_scalar, typename T_beta_scalar>
190  const T_y& y, const Eigen::Matrix<T_x_scalar, T_x_rows, Eigen::Dynamic>& x,
191  const Eigen::Matrix<T_alpha_scalar, Eigen::Dynamic, 1>& alpha,
192  const Eigen::Matrix<T_beta_scalar, Eigen::Dynamic, Eigen::Dynamic>& beta) {
193  return categorical_logit_glm_lpmf<false>(y, x, alpha, beta);
194 }
195 
196 } // namespace math
197 } // namespace stan
198 
199 #endif
const Eigen::Matrix< T, Eigen::Dynamic, 1 > & as_column_vector_or_scalar(const Eigen::Matrix< T, Eigen::Dynamic, 1 > &a)
Converts input argument to a column vector or a scalar.
void check_finite(const char *function, const char *name, const T_y &y)
Check if y is finite.
bool isfinite(const stan::math::var &v)
Checks if the given number has finite value.
void check_bounded(const char *function, const char *name, const T_y &y, const T_low &low, const T_high &high)
Check if the value is between the low and high values, inclusively.
double value_of_rec(const fvar< T > &v)
Return the value of the specified variable.
Extends std::true_type when instantiated with zero or more template parameters, all of which extend t...
Definition: conjunction.hpp:14
fvar< T > log(const fvar< T > &x)
Definition: log.hpp:12
scalar_seq_view provides a uniform sequence-like wrapper around either a scalar or a sequence of scal...
This template builds partial derivatives with respect to a set of operands.
size_t length(const std::vector< T > &x)
Returns the length of the provided std::vector.
Definition: length.hpp:16
bool size_zero(T &x)
Returns 1 if input is of length 0, returns 0 otherwise.
Definition: size_zero.hpp:18
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
Template metaprogram to calculate whether a summand needs to be included in a proportional (log) prob...
fvar< T > beta(const fvar< T > &x1, const fvar< T > &x2)
Return fvar with the beta function applied to the specified arguments and its gradient.
Definition: beta.hpp:51
void check_consistent_size(const char *function, const char *name, const T &x, size_t expected_size)
Check if the dimension of x is consistent, which is defined to be expected_size if x is a vector or 1...
Template metaprogram to calculate the partial derivative type resulting from promoting all the scalar...
boost::math::tools::promote_args< double, typename scalar_type< T >::type, typename return_type< Types_pack... >::type >::type type
Definition: return_type.hpp:36
fvar< T > exp(const fvar< T > &x)
Definition: exp.hpp:11
empty_broadcast_array< ViewElt, Op > partials_
T_return_type build(double value)
Build the node to be stored on the autodiff graph.
internal::ops_partials_edge< double, Op2 > edge2_
return_type< T_x_scalar, T_alpha_scalar, T_beta_scalar >::type categorical_logit_glm_lpmf(const T_y &y, const Eigen::Matrix< T_x_scalar, T_x_rows, Eigen::Dynamic > &x, const Eigen::Matrix< T_alpha_scalar, Eigen::Dynamic, 1 > &alpha, const Eigen::Matrix< T_beta_scalar, Eigen::Dynamic, Eigen::Dynamic > &beta)
Returns the log PMF of the Generalized Linear Model (GLM) with categorical distribution and logit (so...
internal::ops_partials_edge< double, Op3 > edge3_
internal::ops_partials_edge< double, Op1 > edge1_

     [ Stan Home Page ] © 2011–2018, Stan Development Team.