Unverified Commit 0784a82f authored by wds15's avatar wds15 Committed by GitHub
Browse files

Merge pull request #1066 from stan-dev/feature/issue-1062-ode-speedup

Feature/issue 1062 ode speedup
parents d3254859 fd7846f3
No related merge requests found
Showing with 94 additions and 32 deletions
+94 -32
......@@ -37,6 +37,20 @@ namespace math {
* to the second base system equation, and so on through the last base
* system equation.
*
* <p>Note: Calculating the sensitivity system requires the Jacobian
* of the base ODE RHS wrt to the parameters theta. The parameter
* vector theta is constant for successive calls to the exposed
* operator(). For this reason, the parameter vector theta is copied
* upon construction onto the nochain var autodiff tape which is used
* in the the nested autodiff performed in the operator() of this
* adaptor. Doing so reduces the size of the nested autodiff and
* speeds up autodiff. As a side effect, the parameter vector theta
* will remain on the nochain autodiff part of the autodiff tape being
* in use even after destruction of the given instance. Moreover, the
* adjoint zeroing for the nested system does not cover the theta
* parameter vector part of the nochain autodiff tape and is therefore
* set to zero using a dedicated loop.
*
* @tparam F base ode system functor. Must provide
* <code>operator()(double t, std::vector<double> y, std::vector<var> theta,
* std::vector<double> x, std::vector<int>x_int, std::ostream*
......@@ -47,7 +61,7 @@ struct coupled_ode_system<F, double, var> {
const F& f_;
const std::vector<double>& y0_dbl_;
const std::vector<var>& theta_;
const std::vector<double> theta_dbl_;
std::vector<var> theta_nochain_;
const std::vector<double>& x_;
const std::vector<int>& x_int_;
const size_t N_;
......@@ -74,13 +88,15 @@ struct coupled_ode_system<F, double, var> {
: f_(f),
y0_dbl_(y0),
theta_(theta),
theta_dbl_(value_of(theta)),
x_(x),
x_int_(x_int),
N_(y0.size()),
M_(theta.size()),
size_(N_ + N_ * M_),
msgs_(msgs) {}
msgs_(msgs) {
for (const var& p : theta)
theta_nochain_.emplace_back(var(new vari(p.val(), false)));
}
/**
* Calculates the derivative of the coupled ode system with respect
......@@ -103,11 +119,9 @@ struct coupled_ode_system<F, double, var> {
try {
start_nested();
vector<var> y_vars(z.begin(), z.begin() + N_);
vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end());
const vector<var> y_vars(z.begin(), z.begin() + N_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_nochain_, x_, x_int_, msgs_);
check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(),
"states", N_);
......@@ -120,7 +134,7 @@ struct coupled_ode_system<F, double, var> {
// orders derivatives by equation (i.e. if there are 2 eqns
// (y1, y2) and 2 parameters (a, b), dy_dt will be ordered as:
// dy1_dt, dy2_dt, dy1_da, dy2_da, dy1_db, dy2_db
double temp_deriv = theta_vars[j].adj();
double temp_deriv = theta_nochain_[j].adj();
const size_t offset = N_ + N_ * j;
for (size_t k = 0; k < N_; k++)
temp_deriv += z[offset + k] * y_vars[k].adj();
......@@ -129,6 +143,12 @@ struct coupled_ode_system<F, double, var> {
}
set_zero_all_adjoints_nested();
// Parameters stored on the outer (non-nested) nochain stack are not
// reset to zero by the last call. This is done as a separate step here.
// See efficiency note above on template specalization for more details
// on this.
for (size_t j = 0; j < M_; ++j)
theta_nochain_[j].vi_->set_zero_adjoint();
}
} catch (const std::exception& e) {
recover_memory_nested();
......@@ -226,7 +246,6 @@ template <typename F>
struct coupled_ode_system<F, var, double> {
const F& f_;
const std::vector<var>& y0_;
const std::vector<double> y0_dbl_;
const std::vector<double>& theta_dbl_;
const std::vector<double>& x_;
const std::vector<int>& x_int_;
......@@ -253,7 +272,6 @@ struct coupled_ode_system<F, var, double> {
const std::vector<int>& x_int, std::ostream* msgs)
: f_(f),
y0_(y0),
y0_dbl_(value_of(y0)),
theta_dbl_(theta),
x_(x),
x_int_(x_int),
......@@ -283,7 +301,7 @@ struct coupled_ode_system<F, var, double> {
try {
start_nested();
vector<var> y_vars(z.begin(), z.begin() + N_);
const vector<var> y_vars(z.begin(), z.begin() + N_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_dbl_, x_, x_int_, msgs_);
......@@ -339,7 +357,7 @@ struct coupled_ode_system<F, var, double> {
std::vector<double> initial_state() const {
std::vector<double> initial(size_, 0.0);
for (size_t i = 0; i < N_; i++)
initial[i] = y0_dbl_[i];
initial[i] = value_of(y0_[i]);
for (size_t i = 0; i < N_; i++)
initial[N_ + i * N_ + i] = 1.0;
return initial;
......@@ -406,6 +424,20 @@ struct coupled_ode_system<F, var, double> {
* parameters with respect to the second base system equation, and
* so on through the last base system equation.
*
* <p>Note: Calculating the sensitivity system requires the Jacobian
* of the base ODE RHS wrt to the parameters theta. The parameter
* vector theta is constant for successive calls to the exposed
* operator(). For this reason, the parameter vector theta is copied
* upon construction onto the nochain var autodiff tape which is used
* in the the nested autodiff performed in the operator() of this
* adaptor. Doing so reduces the size of the nested autodiff and
* speeds up autodiff. As a side effect, the parameter vector theta
* will remain on the nochain autodiff part of the autodiff tape being
* in use even after destruction of the given instance. Moreover, the
* adjoint zeroing for the nested system does not cover the theta
* parameter vector part of the nochain autodiff tape and is therefore
* set to zero using a dedicated loop.
*
* @tparam F base ode system functor. Must provide
* <code>operator()(double t, std::vector<var> y, std::vector<var> theta,
* std::vector<double> x, std::vector<int>x_int, std::ostream*
......@@ -415,9 +447,8 @@ template <typename F>
struct coupled_ode_system<F, var, var> {
const F& f_;
const std::vector<var>& y0_;
const std::vector<double> y0_dbl_;
const std::vector<var>& theta_;
const std::vector<double> theta_dbl_;
std::vector<var> theta_nochain_;
const std::vector<double>& x_;
const std::vector<int>& x_int_;
const size_t N_;
......@@ -443,15 +474,16 @@ struct coupled_ode_system<F, var, var> {
const std::vector<int>& x_int, std::ostream* msgs)
: f_(f),
y0_(y0),
y0_dbl_(value_of(y0)),
theta_(theta),
theta_dbl_(value_of(theta)),
x_(x),
x_int_(x_int),
N_(y0.size()),
M_(theta.size()),
size_(N_ + N_ * (N_ + M_)),
msgs_(msgs) {}
msgs_(msgs) {
for (const var& p : theta)
theta_nochain_.emplace_back(var(new vari(p.val(), false)));
}
/**
* Calculates the derivative of the coupled ode system with respect
......@@ -474,11 +506,9 @@ struct coupled_ode_system<F, var, var> {
try {
start_nested();
vector<var> y_vars(z.begin(), z.begin() + N_);
vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end());
const vector<var> y_vars(z.begin(), z.begin() + N_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_nochain_, x_, x_int_, msgs_);
check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(),
"states", N_);
......@@ -500,7 +530,7 @@ struct coupled_ode_system<F, var, var> {
}
for (size_t j = 0; j < M_; j++) {
double temp_deriv = theta_vars[j].adj();
double temp_deriv = theta_nochain_[j].adj();
const size_t offset = N_ + N_ * N_ + N_ * j;
for (size_t k = 0; k < N_; k++)
temp_deriv += z[offset + k] * y_vars[k].adj();
......@@ -509,6 +539,12 @@ struct coupled_ode_system<F, var, var> {
}
set_zero_all_adjoints_nested();
// Parameters stored on the outer (non-nested) nochain stack are not
// reset to zero by the last call. This is done as a separate step here.
// See efficiency note above on template specalization for more details
// on this.
for (size_t j = 0; j < M_; ++j)
theta_nochain_[j].vi_->set_zero_adjoint();
}
} catch (const std::exception& e) {
recover_memory_nested();
......@@ -545,7 +581,7 @@ struct coupled_ode_system<F, var, var> {
std::vector<double> initial_state() const {
std::vector<double> initial(size_, 0.0);
for (size_t i = 0; i < N_; i++)
initial[i] = y0_dbl_[i];
initial[i] = value_of(y0_[i]);
for (size_t i = 0; i < N_; i++)
initial[N_ + i * N_ + i] = 1.0;
return initial;
......
......@@ -21,7 +21,7 @@ static void grad(vari* vi);
* Independent (input) and dependent (output) variables for gradients.
*
* This class acts as a smart pointer, with resources managed by
* an agenda-based memory manager scoped to a single gradient
* an arena-based memory manager scoped to a single gradient
* calculation.
*
* An var is constructed with a double and used like any
......
......@@ -148,11 +148,11 @@ class cvodes_ode_data {
*/
inline void rhs(double t, const double y[], double dy_dt[]) const {
const std::vector<double> y_vec(y, y + N_);
const std::vector<double> dy_dt_vec
const std::vector<double>& dy_dt_vec
= f_(t, y_vec, theta_dbl_, x_, x_int_, msgs_);
check_size_match("cvodes_ode_data", "dz_dt", dy_dt_vec.size(), "states",
N_);
std::copy(dy_dt_vec.begin(), dy_dt_vec.end(), dy_dt);
std::move(dy_dt_vec.begin(), dy_dt_vec.end(), dy_dt);
}
/**
......@@ -163,14 +163,13 @@ class cvodes_ode_data {
* y to be the initial of the coupled ode system.
*/
inline int jacobian_states(double t, const double y[], SUNMatrix J) const {
const std::vector<double> y_vec(y, y + N_);
start_nested();
std::vector<var> y_vec_var(y_vec.begin(), y_vec.end());
const std::vector<var> y_vec_var(y, y + N_);
coupled_ode_system<F, var, double> ode_jacobian(f_, y_vec_var, theta_dbl_,
x_, x_int_, msgs_);
std::vector<double> jacobian_y(ode_jacobian.size(), 0);
std::vector<double>&& jacobian_y = std::vector<double>(ode_jacobian.size());
ode_jacobian(ode_jacobian.initial_state(), jacobian_y, t);
std::copy(jacobian_y.begin() + N_, jacobian_y.end(), SM_DATA_D(J));
std::move(jacobian_y.begin() + N_, jacobian_y.end(), SM_DATA_D(J));
recover_memory_nested();
return 0;
}
......@@ -184,14 +183,14 @@ class cvodes_ode_data {
inline void rhs_sens(double t, const double y[], N_Vector* yS,
N_Vector* ySdot) const {
std::vector<double> z(coupled_state_.size());
std::vector<double> dz_dt(coupled_state_.size());
std::vector<double>&& dz_dt = std::vector<double>(coupled_state_.size());
std::copy(y, y + N_, z.begin());
for (std::size_t s = 0; s < S_; s++)
std::copy(NV_DATA_S(yS[s]), NV_DATA_S(yS[s]) + N_,
z.begin() + (s + 1) * N_);
coupled_ode_(z, dz_dt, t);
for (std::size_t s = 0; s < S_; s++)
std::copy(dz_dt.begin() + (s + 1) * N_, dz_dt.begin() + (s + 2) * N_,
std::move(dz_dt.begin() + (s + 1) * N_, dz_dt.begin() + (s + 2) * N_,
NV_DATA_S(ySdot[s]));
}
};
......
......@@ -18,6 +18,8 @@ struct StanAgradRevOde : public ::testing::Test {
TEST_F(StanAgradRevOde, coupled_ode_system_dv) {
using stan::math::coupled_ode_system;
stan::math::start_nested();
harm_osc_ode_fun harm_osc;
std::vector<stan::math::var> theta;
......@@ -39,15 +41,22 @@ TEST_F(StanAgradRevOde, coupled_ode_system_dv) {
z0.push_back(1.0);
z0.push_back(2.0);
std::size_t stack_size = stan::math::nested_size();
coupled_ode_system<harm_osc_ode_fun, double, stan::math::var> system(
harm_osc, y0, theta, x, x_int, &msgs);
EXPECT_EQ(stack_size, stan::math::nested_size())
<< "expecting no new things on the stack";
system(z0, dz_dt, t0);
EXPECT_FLOAT_EQ(0.5, dz_dt[0]);
EXPECT_FLOAT_EQ(-1.075, dz_dt[1]);
EXPECT_FLOAT_EQ(2, dz_dt[2]);
EXPECT_FLOAT_EQ(-1.8, dz_dt[3]);
stan::math::recover_memory_nested();
}
TEST_F(StanAgradRevOde, decouple_states_dv) {
using stan::math::coupled_ode_system;
......@@ -193,6 +202,8 @@ TEST_F(StanAgradRevOde, memory_recovery_exception_dv) {
TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
using stan::math::coupled_ode_system;
stan::math::start_nested();
harm_osc_ode_fun harm_osc;
std::vector<double> theta;
......@@ -219,9 +230,14 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
y0_var.push_back(1.0);
y0_var.push_back(0.5);
std::size_t stack_size = stan::math::nested_size();
coupled_ode_system<harm_osc_ode_fun, stan::math::var, double> system(
harm_osc, y0_var, theta, x, x_int, &msgs);
EXPECT_EQ(stack_size, stan::math::nested_size())
<< "expecting no new things on the stack";
system(z0, dz_dt, t0);
EXPECT_FLOAT_EQ(0.5, dz_dt[0]);
......@@ -230,6 +246,8 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
EXPECT_FLOAT_EQ(-1.0 * 1.0 - 0.15 * 0.0, dz_dt[3]);
EXPECT_FLOAT_EQ(0.0 * 0.0 + 1.0 * 1.0, dz_dt[4]);
EXPECT_FLOAT_EQ(-1.0 * 0.0 - 0.15 * 1.0, dz_dt[5]);
stan::math::recover_memory_nested();
}
TEST_F(StanAgradRevOde, decouple_states_vd) {
using stan::math::coupled_ode_system;
......@@ -374,6 +392,7 @@ TEST_F(StanAgradRevOde, memory_recovery_exception_vd) {
TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
using stan::math::coupled_ode_system;
stan::math::start_nested();
const size_t N = 2;
const size_t M = 1;
const size_t z_size = N + N * N + N * M;
......@@ -386,9 +405,15 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
theta_var.push_back(0.15);
harm_osc_ode_fun harm_osc;
std::size_t stack_size = stan::math::nested_size();
coupled_ode_system<harm_osc_ode_fun, stan::math::var, stan::math::var> system(
harm_osc, y0_var, theta_var, x, x_int, &msgs);
EXPECT_EQ(stack_size, stan::math::nested_size())
<< "expecting no new things on the stack";
std::vector<double> z0(z_size, 0);
z0[0] = 1.0;
z0[1] = 0.5;
......@@ -419,6 +444,8 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
EXPECT_FLOAT_EQ(-0.15, dz_dt[5]);
EXPECT_FLOAT_EQ(0, dz_dt[6]);
EXPECT_FLOAT_EQ(-0.5, dz_dt[7]);
stan::math::recover_memory_nested();
}
TEST_F(StanAgradRevOde, decouple_states_vv) {
using stan::math::coupled_ode_system;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment