Commit 323ce62b authored by rok-cesnovar's avatar rok-cesnovar
Browse files

packed_copy now returns result

Showing with 37 additions and 53 deletions
+37 -53
......@@ -98,17 +98,15 @@ void copy(Eigen::Matrix<double, R, C>& dst, const matrix_cl& src) {
* copies it to the std::vector.
*
* @tparam triangular_view the triangularity of the source matrix
* @param dst the destination std::vector
* @param src the flat triangular source matrix on the OpenCL device
* @return the packed std::vector
*/
template <TriangularViewCL triangular_view>
inline void packed_copy(std::vector<double>& dst, const matrix_cl& src) {
inline std::vector<double> packed_copy(const matrix_cl& src) {
const int packed_size = src.rows() * (src.rows() + 1) / 2;
check_size_match("copy (OpenCL -> packed std::vector)", "dst.size()",
dst.size(), "src.rows() * (src.rows() + 1) / 2",
packed_size);
if (src.size() == 0) {
return;
std::vector<double> dst(packed_size);
if (dst.size() == 0) {
return dst;
}
cl::CommandQueue queue = opencl_context.queue();
try {
......@@ -124,24 +122,27 @@ inline void packed_copy(std::vector<double>& dst, const matrix_cl& src) {
}
/**
* Copies and unpacks the packed triangular matrix from
* the source std::vector to the flat matrix_cl on the OpenCL device.
* Copies the packed triangular matrix from
* the source std::vector to an OpenCL buffer and
* unpacks it to a flat matrix on the OpenCL device.
*
* @tparam triangular_view the triangularity of the source matrix
* @param src the packed source std::vector
* @param dst the destination flat matrix on the OpenCL device
* @param rows the number of rows in the flat matrix
* @return the destination flat matrix on the OpenCL device
* @throw <code>std::invalid_argument</code> if the
* size of the vector does not match the expected size
* for the packed triangular matrix
*/
template <TriangularViewCL triangular_view>
inline void packed_copy(matrix_cl& dst, const std::vector<double>& src) {
const int packed_size = dst.rows() * (dst.rows() + 1) / 2;
inline matrix_cl packed_copy(const std::vector<double>& src, int rows) {
const int packed_size = rows * (rows + 1) / 2;
check_size_match("copy (packed std::vector -> OpenCL)", "src.size()",
src.size(), "dst.rows() * (dst.rows() + 1) / 2",
src.size(), "rows * (rows + 1) / 2",
packed_size);
if (src.size() == 0) {
return;
matrix_cl dst(rows, rows);
if (dst.size() == 0) {
return dst;
}
cl::CommandQueue queue = opencl_context.queue();
try {
......@@ -154,6 +155,7 @@ inline void packed_copy(matrix_cl& dst, const std::vector<double>& src) {
} catch (const cl::Error& e) {
check_opencl_error("packed_copy (std::vector->OpenCL)", e);
}
return dst;
}
/**
......
......@@ -323,19 +323,15 @@ class cholesky_opencl : public vari {
*/
virtual void chain() {
const int packed_size = M_ * (M_ + 1) / 2;
std::vector<double> L_adj_cpu;
L_adj_cpu.reserve(packed_size);
std::vector<double> L_val_cpu;
L_val_cpu.reserve(packed_size);
std::vector<double> L_adj_cpu(packed_size);
std::vector<double> L_val_cpu(packed_size);
for (size_type j = 0; j < packed_size; ++j) {
L_adj_cpu.push_back(vari_ref_L_[j]->adj_);
L_val_cpu.push_back(vari_ref_L_[j]->val_);
L_adj_cpu[j] = vari_ref_L_[j]->adj_;
L_val_cpu[j] = vari_ref_L_[j]->val_;
}
matrix_cl L(M_, M_);
matrix_cl L_adj(M_, M_);
packed_copy<TriangularViewCL::Lower>(L, L_val_cpu);
packed_copy<TriangularViewCL::Lower>(L_adj, L_adj_cpu);
matrix_cl L = packed_copy<TriangularViewCL::Lower>(L_val_cpu, M_);
matrix_cl L_adj = packed_copy<TriangularViewCL::Lower>(L_adj_cpu, M_);
int block_size
= M_ / opencl_context.tuning_opts().cholesky_rev_block_partition;
block_size = std::max(block_size, 8);
......@@ -386,7 +382,7 @@ class cholesky_opencl : public vari {
L_adj.sub_block(B_adj, 0, 0, k, 0, m_k_ind, j);
L_adj.sub_block(C_adj, 0, 0, k, j, m_k_ind, k_j_ind);
}
packed_copy<TriangularViewCL::Lower>(L_adj_cpu, L_adj);
L_adj_cpu = packed_copy<TriangularViewCL::Lower>(L_adj);
for (size_type j = 0; j < packed_size; ++j) {
vari_ref_A_[j]->adj_ += L_adj_cpu[j];
}
......
......@@ -101,10 +101,10 @@ TEST(MathMatrixGPU, matrix_cl_pack_unpack_copy_lower) {
for (size_t i = 0; i < packed_mat.size(); i++) {
packed_mat[i] = i;
}
stan::math::matrix_cl m_cl(size, size);
stan::math::matrix_d m_flat_cpu(size, size);
stan::math::packed_copy<stan::math::TriangularViewCL::Lower>(m_cl,
packed_mat);
auto m_cl =
stan::math::packed_copy<stan::math::TriangularViewCL::Lower>(packed_mat,
size);
stan::math::copy(m_flat_cpu, m_cl);
size_t pos = 0;
for (size_t j = 0; j < size; ++j) {
......@@ -116,8 +116,8 @@ TEST(MathMatrixGPU, matrix_cl_pack_unpack_copy_lower) {
pos++;
}
}
stan::math::packed_copy<stan::math::TriangularViewCL::Lower>(packed_mat_dst,
m_cl);
packed_mat_dst =
stan::math::packed_copy<stan::math::TriangularViewCL::Lower>(m_cl);
for (size_t i = 0; i < packed_mat.size(); i++) {
EXPECT_EQ(packed_mat[i], packed_mat_dst[i]);
}
......@@ -131,10 +131,10 @@ TEST(MathMatrixGPU, matrix_cl_pack_unpack_copy_upper) {
for (size_t i = 0; i < packed_mat.size(); i++) {
packed_mat[i] = i;
}
stan::math::matrix_cl m_cl(size, size);
stan::math::matrix_d m_flat_cpu(size, size);
stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(m_cl,
packed_mat);
auto m_cl =
stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(packed_mat,
size);
stan::math::copy(m_flat_cpu, m_cl);
size_t pos = 0;
for (size_t j = 0; j < size; ++j) {
......@@ -146,33 +146,19 @@ TEST(MathMatrixGPU, matrix_cl_pack_unpack_copy_upper) {
EXPECT_EQ(m_flat_cpu(i, j), 0.0);
}
}
stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(packed_mat_dst,
m_cl);
packed_mat_dst =
stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(m_cl);
for (size_t i = 0; i < packed_mat.size(); i++) {
EXPECT_EQ(packed_mat[i], packed_mat_dst[i]);
}
}
TEST(MathMatrixGPU, matrix_cl_pack_unpack_copy_exception) {
int size = 51;
int packed_size = size * (size + 1) / 2;
std::vector<double> packed_mat;
std::vector<double> packed_mat_dst;
for (size_t i = 0; i < packed_mat.size(); i++) {
packed_mat[i] = i;
}
stan::math::matrix_cl m_cl(size, size);
stan::math::matrix_cl m_cl_zero;
EXPECT_NO_THROW(stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(packed_mat, 0));
EXPECT_NO_THROW(stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(m_cl_zero));
EXPECT_THROW(stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(
m_cl, packed_mat),
std::invalid_argument);
EXPECT_THROW(stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(
packed_mat_dst, m_cl),
std::invalid_argument);
EXPECT_NO_THROW(stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(
m_cl_zero, packed_mat));
EXPECT_NO_THROW(stan::math::packed_copy<stan::math::TriangularViewCL::Upper>(
packed_mat_dst, m_cl_zero));
packed_mat, 1), std::invalid_argument);
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment