Commit 6adb7122 authored by Rayleigh L's avatar Rayleigh L
Browse files

Merge branch 'develop' of https://github.com/stan-dev/math into feature/issue-838-linseq

No related merge requests found
Showing with 459 additions and 134 deletions
+459 -134
......@@ -34,7 +34,10 @@ pipeline {
'Run additional distribution tests on RowVectors (takes 5x as long)',
name: 'withRowVector')
}
options { skipDefaultCheckout() }
options {
skipDefaultCheckout()
preserveStashes(buildCount: 7)
}
stages {
stage('Kill previous builds') {
when {
......@@ -126,25 +129,37 @@ pipeline {
sh "echo CC=${env.CXX} -Werror > make/local"
sh "make -j${env.PARALLEL} test-headers"
}
post {
always {
warnings canRunOnFailed: true, consoleParsers: [[parserName: 'GNU C Compiler 4 (gcc)'], [parserName: 'Clang (LLVM based)']]
deleteDir()
}
}
post { always { deleteDir() } }
}
stage('Linux Unit with MPI') {
agent { label 'linux' }
steps {
deleteDir()
unstash 'MathSetup'
sh "echo CC=${MPICXX} >> make/local"
sh "echo STAN_MPI=true >> make/local"
runTests("test/unit")
stage('Always-run tests part 1') {
parallel {
stage('Linux Unit with MPI') {
agent { label 'linux' }
steps {
deleteDir()
unstash 'MathSetup'
sh "echo CC=${MPICXX} >> make/local"
sh "echo STAN_MPI=true >> make/local"
runTests("test/unit")
}
post { always { retry(3) { deleteDir() } } }
}
stage('GPU Tests') {
agent { label "gpu" }
steps {
deleteDir()
unstash 'MathSetup'
sh "echo CC=${env.CXX} -Werror > make/local"
sh "echo STAN_OPENCL=true>> make/local"
sh "echo OPENCL_PLATFORM_ID=0>> make/local"
sh "echo OPENCL_DEVICE_ID=1>> make/local"
runTests("test/unit/math/gpu")
}
post { always { retry(3) { deleteDir() } } }
}
}
post { always { retry(3) { deleteDir() } } }
}
stage('Always-run tests') {
stage('Always-run tests part 2') {
parallel {
stage('Distribution tests') {
agent { label "distribution-tests" }
......@@ -170,17 +185,17 @@ pipeline {
}
failure {
echo "Distribution tests failed. Check out dist.log.zip artifact for test logs."
}
}
}
}
stage('Mac Unit with Threading') {
agent { label 'osx' }
stage('Threading tests') {
agent any
steps {
deleteDir()
unstash 'MathSetup'
sh "echo CC=${env.CXX} -Werror > make/local"
sh "echo CXXFLAGS+=-DSTAN_THREADS >> make/local"
runTests("test/unit")
runTests("test/unit -f thread")
}
post { always { retry(3) { deleteDir() } } }
}
......@@ -197,7 +212,7 @@ pipeline {
sh "echo CC=${env.CXX} -Werror > make/local"
sh "echo STAN_OPENCL=true>> make/local"
sh "echo OPENCL_PLATFORM_ID=0>> make/local"
sh "echo OPENCL_DEVICE_ID=0>> make/local"
sh "echo OPENCL_DEVICE_ID=1>> make/local"
runTests("test/unit")
}
post { always { retry(3) { deleteDir() } } }
......@@ -213,6 +228,17 @@ pipeline {
}
post { always { retry(3) { deleteDir() } } }
}
stage('Mac Unit with Threading') {
agent { label 'osx' }
steps {
deleteDir()
unstash 'MathSetup'
sh "echo CC=${env.CXX} -Werror > make/local"
sh "echo CXXFLAGS+=-DSTAN_THREADS >> make/local"
runTests("test/unit")
}
post { always { retry(3) { deleteDir() } } }
}
}
}
stage('Upstream tests') {
......@@ -252,7 +278,7 @@ pipeline {
post {
always {
node("osx || linux") {
warnings canRunOnFailed: true, consoleParsers: [[parserName: 'GNU C Compiler 4 (gcc)'], [parserName: 'Clang (LLVM based)']]
warnings canRunOnFailed: true, consoleParsers: [[parserName: 'Clang (LLVM based)']]
}
}
success {
......
......@@ -39,7 +39,7 @@ If this is in the file `/path/to/foo/foo.cpp`, then you can compile and run this
```
> cd /path/to/foo
> clang++ -std=c++11 -I /path/to/stan-math -I /path/to/Eigen -I /path/to/boost -I /path/to/sundials foo.cpp
> clang++ -std=c++1y -I /path/to/stan-math -I /path/to/Eigen -I /path/to/boost -I /path/to/sundials foo.cpp
> ./a.out
log normal(1 | 2, 3)=-2.07311
```
......@@ -54,7 +54,7 @@ The `-I` includes provide paths pointing to the four necessary includes:
Note that the paths should *not* include the final directories `stan`, `Eigen`, or `boost` on the paths. An example of a real instantiation:
```
clang++ -std=c++11 -I ~/stan-dev/math -I ~/stan-dev/math/lib/eigen_3.3.3/ -I ~/stan-dev/math/lib/boost_1.66.0/ -I ~/stan-dev/math/lib/sundials_3.1.0/include foo.cpp
clang++ -std=c++1y -I ~/stan-dev/math -I ~/stan-dev/math/lib/eigen_3.3.3/ -I ~/stan-dev/math/lib/boost_1.66.0/ -I ~/stan-dev/math/lib/sundials_3.1.0/include foo.cpp
```
The following directories all exist below the links given to `-I`: `~/stan-dev/math/stan` and `~/stan-dev/math/lib/eigen_3.3.3/Eigen` and `~stan-dev/math/lib/boost_1.66.0/boost` and `~stan-dev/math/lib/sundials_3.1.0/include`.
......
......@@ -68,12 +68,16 @@ $(BOOST)/user-config.jam:
echo "# In case of a compiler mismatch used by mpicxx and" >> $(BOOST)/user-config.jam
echo "# the compiler used for Stan, consider configuring" >> $(BOOST)/user-config.jam
echo "# the boost toolset here" >> $(BOOST)/user-config.jam
echo "# Moreover, should your mpicxx command live in a" >> $(BOOST)/user-config.jam
echo "# in a non-standard directory, then consider to tell" >> $(BOOST)/user-config.jam
echo "# boost mpi using this syntax:" >> $(BOOST)/user-config.jam
echo "#using mpi : /path/to/mpicxx ;" >> $(BOOST)/user-config.jam
echo "using mpi ;" >> $(BOOST)/user-config.jam
$(BOOST_LIB)/mpi.so: $(BOOST)/user-config.jam
@mkdir -p $(dir $@)
cd $(BOOST); ./bootstrap.sh
cd $(BOOST); ./b2 --user-config=user-config.jam --layout=system --with-mpi --with-serialization -j$(BOOST_PARALLEL_BUILD) variant=release link=shared threading=multi runtime-link=shared
cd $(BOOST); ./b2 --user-config=user-config.jam --layout=system --with-mpi --with-serialization -j$(BOOST_PARALLEL_BUILD) variant=release link=shared threading=multi runtime-link=shared hardcode-dll-paths=true dll-path="$(BOOST_LIB_ABS)"
$(BOOST_LIB)/libboost_serialization.so: $(BOOST_LIB)/mpi.so
......
......@@ -9,5 +9,5 @@
ifdef STAN_MPI
LIBMPI = $(BOOST_LIB)/libboost_serialization$(DLL) $(BOOST_LIB)/libboost_mpi$(DLL) $(MATH)bin/math/prim/arr/functor/mpi_cluster_inst.o
CXXFLAGS_MPI = -DSTAN_MPI
LDFLAGS_MPI ?= -Wl,-lboost_mpi -Wl,-lboost_serialization -Wl,-L,"$(BOOST_LIB_ABS)" -Wl,-rpath,"$(BOOST_LIB_ABS)"
LDFLAGS_MPI ?= -Wl,-L,"$(BOOST_LIB_ABS)" -Wl,-rpath,"$(BOOST_LIB_ABS)"
endif
......@@ -46,6 +46,13 @@ class ops_partials_edge<Dx, fvar<Dx> > {
* This is the specialization for when the return type is fvar,
* which should be for forward mode and all higher-order cases.
*
* NB: since ops_partials_edge.partials_ and ops_partials_edge.partials_vec
* are sometimes represented internally as a broadcast_array, we need to take
* care with assignments to them. Indeed, we can assign any right hand side
* which allows for indexing to a broadcast_array. The resulting behaviour is
* that the entry for the first index is what gets assigned. The most common
* use-case should be where the rhs is some container of length 1.
*
* @tparam Op1 type of the first operand
* @tparam Op2 type of the second operand
* @tparam Op3 type of the third operand
......
......@@ -2,6 +2,7 @@
#define STAN_MATH_GPU_ADD_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/kernels/add.hpp>
#include <stan/math/gpu/err/check_matching_dims.hpp>
#include <CL/cl.hpp>
......@@ -26,14 +27,10 @@ inline matrix_gpu add(const matrix_gpu& A, const matrix_gpu& B) {
if (C.size() == 0) {
return C;
}
cl::Kernel kernel = opencl_context.get_kernel("add");
cl::CommandQueue cmdQueue = opencl_context.queue();
try {
opencl_context.set_kernel_args(kernel, C.buffer(), A.buffer(), B.buffer(),
A.rows(), A.cols());
cmdQueue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(A.rows(), A.cols()),
cl::NullRange, NULL, NULL);
opencl_kernels::add(cl::NDRange(A.rows(), A.cols()), C.buffer(), A.buffer(),
B.buffer(), A.rows(), A.cols());
} catch (const cl::Error& e) {
check_opencl_error("add", e);
}
......
#ifndef STAN_MATH_GPU_CONSTANTS_HPP
#define STAN_MATH_GPU_CONSTANTS_HPP
#ifdef STAN_OPENCL
namespace stan {
namespace math {
enum class TriangularViewGPU { Lower = 0, Upper = 1, Entire = 2 };
enum class TriangularMapGPU { UpperToLower = 0, LowerToUpper = 1 };
} // namespace math
} // namespace stan
#endif
#endif
......@@ -3,7 +3,9 @@
#ifdef STAN_OPENCL
#include <stan/math/gpu/opencl_context.hpp>
#include <stan/math/gpu/kernel_cl.hpp>
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/kernels/copy.hpp>
#include <stan/math/prim/mat/fun/Eigen.hpp>
#include <stan/math/prim/scal/err/check_size_match.hpp>
#include <CL/cl.hpp>
......@@ -110,15 +112,8 @@ inline void copy(matrix_gpu& dst, const matrix_gpu& src) {
* see the matrix_gpu(matrix_gpu&) constructor
* for explanation
*/
cl::CommandQueue& cmdQueue = opencl_context.queue();
cl::Kernel kernel = opencl_context.get_kernel("copy");
kernel.setArg(0, src.buffer());
kernel.setArg(1, dst.buffer());
kernel.setArg(2, dst.rows());
kernel.setArg(3, dst.cols());
cmdQueue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(dst.rows(), dst.cols()),
cl::NullRange, NULL, NULL);
opencl_kernels::copy(cl::NDRange(dst.rows(), dst.cols()), src.buffer(),
dst.buffer(), dst.rows(), dst.cols());
} catch (const cl::Error& e) {
std::cout << e.err() << std::endl;
check_opencl_error("copy GPU->GPU", e);
......
#ifndef STAN_MATH_GPU_COPY_TRIANGULAR_HPP
#define STAN_MATH_GPU_COPY_TRIANGULAR_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/constants.hpp>
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/copy.hpp>
#include <stan/math/gpu/kernels/copy_triangular.hpp>
#include <CL/cl.hpp>
namespace stan {
......@@ -16,27 +19,24 @@ namespace math {
* @param src the source matrix
* @tparam triangular_map int to describe
* which part of the matrix to copy:
* Lower - copies the lower triangular
* Upper - copes the upper triangular
* TriangularViewGPU::Lower - copies the lower triangular
* TriangularViewGPU::Upper - copes the upper triangular
*
* @return the matrix with the copied content
*
*/
template <int triangular_map>
template <TriangularViewGPU triangular_view = TriangularViewGPU::Entire>
inline matrix_gpu copy_triangular(const matrix_gpu& src) {
if (src.size() == 0 || src.size() == 1) {
matrix_gpu dst(src);
return dst;
}
matrix_gpu dst(src.rows(), src.cols());
cl::Kernel kernel = opencl_context.get_kernel("copy_triangular");
cl::CommandQueue cmdQueue = opencl_context.queue();
try {
opencl_context.set_kernel_args(kernel, dst.buffer(), src.buffer(),
dst.rows(), dst.cols(), triangular_map);
cmdQueue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(dst.rows(), dst.cols()),
cl::NullRange, NULL, NULL);
opencl_kernels::copy_triangular(cl::NDRange(dst.rows(), dst.cols()),
dst.buffer(), src.buffer(), dst.rows(),
dst.cols(), triangular_view);
} catch (const cl::Error& e) {
check_opencl_error("copy_triangular", e);
}
......
#ifndef STAN_MATH_GPU_DIAGONAL_MULTIPLY_HPP
#define STAN_MATH_GPU_DIAGONAL_MULTIPLY_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/kernels/scalar_mul_diagonal.hpp>
#include <Eigen/Dense>
namespace stan {
namespace math {
/**
* Multiplies the diagonal of a matrix on the GPU with the specified scalar.
*
* @param A input matrix
* @param scalar scalar
* @return copy of the input matrix with the diagonal multiplied by scalar
*/
inline matrix_gpu diagonal_multiply(const matrix_gpu& A, const double scalar) {
matrix_gpu B(A);
if (B.size() == 0)
return B;
// For rectangular matrices
int min_dim = B.rows();
if (B.cols() < min_dim)
min_dim = B.cols();
try {
opencl_kernels::scalar_mul_diagonal(cl::NDRange(min_dim), B.buffer(),
scalar, B.rows(), min_dim);
} catch (const cl::Error& e) {
check_opencl_error("diagonal_multiply", e);
}
return B;
}
} // namespace math
} // namespace stan
#endif
#endif
......@@ -2,6 +2,7 @@
#define STAN_MATH_GPU_ERR_CHECK_DIAGONAL_ZEROS_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/kernels/check_diagonal_zeros.hpp>
#include <stan/math/prim/scal/err/domain_error.hpp>
namespace stan {
......@@ -20,9 +21,6 @@ inline void check_diagonal_zeros(const char* function, const char* name,
const matrix_gpu& y) {
if (y.size() == 0)
return;
cl::Kernel kernel_check_diagonal_zeros
= opencl_context.get_kernel("is_zero_on_diagonal");
cl::CommandQueue cmd_queue = opencl_context.queue();
cl::Context ctx = opencl_context.context();
try {
......@@ -30,13 +28,9 @@ inline void check_diagonal_zeros(const char* function, const char* name,
cl::Buffer buffer_flag(ctx, CL_MEM_READ_WRITE, sizeof(int));
cmd_queue.enqueueWriteBuffer(buffer_flag, CL_TRUE, 0, sizeof(int),
&zero_on_diagonal_flag);
opencl_context.set_kernel_args(kernel_check_diagonal_zeros, y.buffer(),
y.rows(), y.cols(), buffer_flag);
cmd_queue.enqueueNDRangeKernel(kernel_check_diagonal_zeros, cl::NullRange,
cl::NDRange(y.rows(), y.cols()),
cl::NullRange);
opencl_kernels::check_diagonal_zeros(cl::NDRange(y.rows(), y.cols()),
y.buffer(), buffer_flag, y.rows(),
y.cols());
cmd_queue.enqueueReadBuffer(buffer_flag, CL_TRUE, 0, sizeof(int),
&zero_on_diagonal_flag);
// if zeros were found on the diagonal
......
......@@ -2,6 +2,7 @@
#define STAN_MATH_GPU_ERR_CHECK_NAN_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/kernels/check_nan.hpp>
#include <stan/math/prim/scal/err/domain_error.hpp>
namespace stan {
......@@ -21,7 +22,6 @@ inline void check_nan(const char* function, const char* name,
if (y.size() == 0)
return;
cl::Kernel kernel_check_nan = opencl_context.get_kernel("is_nan");
cl::CommandQueue cmd_queue = opencl_context.queue();
cl::Context& ctx = opencl_context.context();
try {
......@@ -29,13 +29,8 @@ inline void check_nan(const char* function, const char* name,
cl::Buffer buffer_nan_flag(ctx, CL_MEM_READ_WRITE, sizeof(int));
cmd_queue.enqueueWriteBuffer(buffer_nan_flag, CL_TRUE, 0, sizeof(int),
&nan_flag);
opencl_context.set_kernel_args(kernel_check_nan, y.buffer(), y.rows(),
y.cols(), buffer_nan_flag);
cmd_queue.enqueueNDRangeKernel(kernel_check_nan, cl::NullRange,
cl::NDRange(y.rows(), y.cols()),
cl::NullRange);
opencl_kernels::check_nan(cl::NDRange(y.rows(), y.cols()), y.buffer(),
buffer_nan_flag, y.rows(), y.cols());
cmd_queue.enqueueReadBuffer(buffer_nan_flag, CL_TRUE, 0, sizeof(int),
&nan_flag);
// if NaN values were found in the matrix
......
......@@ -3,6 +3,7 @@
#ifdef STAN_OPENCL
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/prim/scal/err/domain_error.hpp>
#include <stan/math/gpu/kernels/check_symmetric.hpp>
namespace stan {
namespace math {
......@@ -21,7 +22,6 @@ inline void check_symmetric(const char* function, const char* name,
if (y.size() == 0)
return;
check_square(function, name, y);
cl::Kernel kernel_check_symmetric = opencl_context.get_kernel("is_symmetric");
cl::CommandQueue cmd_queue = opencl_context.queue();
cl::Context& ctx = opencl_context.context();
try {
......@@ -29,14 +29,9 @@ inline void check_symmetric(const char* function, const char* name,
cl::Buffer buffer_symmetric_flag(ctx, CL_MEM_READ_WRITE, sizeof(int));
cmd_queue.enqueueWriteBuffer(buffer_symmetric_flag, CL_TRUE, 0, sizeof(int),
&symmetric_flag);
opencl_context.set_kernel_args(kernel_check_symmetric, y.buffer(), y.rows(),
y.cols(), buffer_symmetric_flag,
math::CONSTRAINT_TOLERANCE);
cmd_queue.enqueueNDRangeKernel(kernel_check_symmetric, cl::NullRange,
cl::NDRange(y.rows(), y.cols()),
cl::NullRange);
opencl_kernels::check_symmetric(cl::NDRange(y.rows(), y.cols()), y.buffer(),
buffer_symmetric_flag, y.rows(), y.cols(),
math::CONSTRAINT_TOLERANCE);
cmd_queue.enqueueReadBuffer(buffer_symmetric_flag, CL_TRUE, 0, sizeof(int),
&symmetric_flag);
// if the matrix is not symmetric
......
......@@ -2,6 +2,7 @@
#define STAN_MATH_GPU_IDENTITY_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/matrix_gpu.hpp>
#include <stan/math/gpu/kernels/identity.hpp>
#include <CL/cl.hpp>
namespace stan {
......@@ -20,14 +21,11 @@ inline matrix_gpu identity(int rows_cols) {
if (rows_cols == 0) {
return A;
}
cl::Kernel kernel = opencl_context.get_kernel("identity");
cl::CommandQueue cmdQueue = opencl_context.queue();
try {
opencl_context.set_kernel_args(kernel, A.buffer(), A.rows(), A.cols());
cmdQueue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(A.rows(), A.cols()),
cl::NullRange, NULL, NULL);
opencl_kernels::identity(cl::NDRange(A.rows(), A.cols()), A.buffer(),
A.rows(), A.cols());
} catch (const cl::Error& e) {
check_opencl_error("identity", e);
}
......
#ifndef STAN_MATH_GPU_KERNEL_CL_HPP
#define STAN_MATH_GPU_KERNEL_CL_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/opencl_context.hpp>
#include <stan/math/gpu/kernels/helpers.hpp>
#include <CL/cl.hpp>
#include <string>
#include <algorithm>
#include <map>
#include <vector>
// Used for importing the opencl kernels at compile time.
// There has been much discussion about the best ways to do this:
// https://github.com/bstatcomp/math/pull/7
// and https://github.com/stan-dev/math/pull/966
#ifndef STRINGIFY
#define STRINGIFY(src) #src
#endif
namespace stan {
namespace math {
namespace opencl_kernels {
/**
* Compile an OpenCL kernel.
*
* @param name The name for the kernel
* @param source A string literal containing the code for the kernel.
* @param options The values of macros to be passed at compile time.
* @note The macros defined in kernels/helpers.hpp are included in the kernel
* compilation for ease of writing and reading kernels.
*/
auto compile_kernel(const char* name, const char* source,
std::map<const char*, int> options) {
std::string kernel_opts = "";
for (auto&& comp_opts : options) {
kernel_opts += std::string(" -D") + comp_opts.first + "="
+ std::to_string(comp_opts.second);
}
std::string kernel_source(opencl_kernels::helpers);
kernel_source.append(source);
cl::Program program;
try {
cl::Program::Sources src(1, std::make_pair(kernel_source.c_str(),
strlen(kernel_source.c_str())));
program = cl::Program(opencl_context.context(), src);
program.build({opencl_context.device()}, kernel_opts.c_str());
return cl::Kernel(program, name);
} catch (const cl::Error& e) {
// in case of CL_BUILD_PROGRAM_FAILURE, print the build error
if (e.err() == -11) {
std::string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(
opencl_context.device()[0]);
system_error("compile_kernel", name, e.err(), buildlog.c_str());
} else {
check_opencl_error(name, e);
}
}
return cl::Kernel(); // never reached because check_opencl_error throws
}
/**
* Functor used for compiling kernels.
*
* @tparam Args Parameter pack of all kernel argument types.
*/
template <typename... Args>
class kernel_functor {
private:
cl::Kernel kernel_;
std::map<const char*, int> opts_;
public:
/**
* functor to access the kernel compiler.
* @param name The name for the kernel.
* @param source A string literal containing the code for the kernel.
* @param options The values of macros to be passed at compile time.
*/
kernel_functor(const char* name, const char* source,
std::map<const char*, int> options) {
auto base_opts = opencl_context.base_opts();
options.insert(base_opts.begin(), base_opts.end());
kernel_ = compile_kernel(name, source, options);
opts_ = options;
}
auto operator()() const { return cl::make_kernel<Args...>(kernel_); }
/**
* @return The options that the kernel was compiled with.
*/
const std::map<const char*, int>& get_opts() const { return opts_; }
};
/**
* Creates functor for kernels that only need access to defining
* the global work size.
*
* @tparam Args Parameter pack of all kernel argument types.
*/
template <typename... Args>
struct global_range_kernel {
const kernel_functor<Args...> make_functor;
/**
* Creates functor for kernels that only need access to defining
* the global work size.
* @param name The name for the kernel
* @param source A string literal containing the code for the kernel.
* @param options The values of macros to be passed at compile time.
*/
global_range_kernel(const char* name, const char* source,
const std::map<const char*, int> options = {})
: make_functor(name, source, options) {}
/**
* Executes a kernel
* @param global_thread_size The global work size.
* @param args The arguments to pass to the kernel.
* @tparam Args Parameter pack of all kernel argument types.
*/
auto operator()(cl::NDRange global_thread_size, Args... args) const {
auto f = make_functor();
cl::EnqueueArgs eargs(opencl_context.queue(), global_thread_size);
f(eargs, args...).wait();
}
};
/**
* Creates functor for kernels that need to define both
* local and global work size.
* @tparam Args Parameter pack of all kernel argument types.
*/
template <typename... Args>
struct local_range_kernel {
const kernel_functor<Args...> make_functor;
/**
* Creates kernels that need access to defining the global thread
* siez and the thread block size.
* @param name The name for the kernel
* @param source A string literal containing the code for the kernel.
* @param options The values of macros to be passed at compile time.
*/
local_range_kernel(const char* name, const char* source,
const std::map<const char*, int> options = {})
: make_functor(name, source, options) {}
/**
* Executes a kernel
* @param global_thread_size The global work size.
* @param thread_block_size The thread block size.
* @param args The arguments to pass to the kernel.
* @tparam Args Parameter pack of all kernel argument types.
*/
auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size,
Args... args) const {
auto f = make_functor();
cl::EnqueueArgs eargs(opencl_context.queue(), global_thread_size,
thread_block_size);
f(eargs, args...).wait();
}
};
} // namespace opencl_kernels
} // namespace math
} // namespace stan
#endif
#endif
#ifndef STAN_MATH_GPU_KERNELS_ADD_HPP
#define STAN_MATH_GPU_KERNELS_ADD_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/kernel_cl.hpp>
namespace stan {
namespace math {
namespace opencl_kernels {
// \cond
const char *add_kernel_code = STRINGIFY(
// \endcond
/**
* Matrix addition on the GPU
*
* @param[out] C Output matrix.
* @param[in] A LHS of matrix addition.
* @param[in] B RHS of matrix addition.
* @param rows Number of rows for matrix A.
* @param cols Number of cols for matrix A.
* @note Code is a <code>const char*</code> held in
* <code>add_kernel_code.</code>
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel void add(__global write_only double *C,
__global read_only double *A,
__global read_only double *B, read_only unsigned int rows,
read_only unsigned int cols) {
int i = get_global_id(0);
int j = get_global_id(1);
if (i < rows && j < cols) {
C(i, j) = A(i, j) + B(i, j);
}
}
// \cond
);
// \endcond
/**
* See the docs for \link kernels/add.hpp add() \endlink
*/
const global_range_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int, int> add(
"add", add_kernel_code);
} // namespace opencl_kernels
} // namespace math
} // namespace stan
#endif
#endif
R"(
#ifndef A
#define A(i, j) A[j * rows + i]
#endif
#ifndef B
#define B(i, j) B[j * rows + i]
#endif
#ifndef C
#define C(i, j) C[j * rows + i]
#endif
/**
* Matrix addition on the GPU
*
* @param[out] C Output matrix.
* @param[in] A LHS of matrix addition.
* @param[in] B RHS of matrix addition.
* @param rows Number of rows for matrix A.
* @param cols Number of rows for matrix B.
*
*/
__kernel void add(__global double *C, __global double *A, __global double *B,
unsigned int rows, unsigned int cols) {
int i = get_global_id(0);
int j = get_global_id(1);
if (i < rows && j < cols) {
C(i, j) = A(i, j) + B(i, j);
}
};)"
#ifndef STAN_MATH_GPU_KERNELS_CHECK_DIAGONAL_ZEROS_HPP
#define STAN_MATH_GPU_KERNELS_CHECK_DIAGONAL_ZEROS_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/kernel_cl.hpp>
namespace stan {
namespace math {
namespace opencl_kernels {
// \cond
const char *is_zero_on_diagonal_kernel_code = STRINGIFY(
// \endcond
/**
* Check if the <code>matrix_gpu</code> has zeros on the diagonal
*
* @param[in] A Matrix to check.
* @param[out] flag the flag to be written to if any diagonal is zero.
* @param rows The number of rows for A.
* @param cols The number of cols of A.
* @note Code is a <code>const char*</code> held in
* <code>is_zero_on_diagonal_kernel_code.</code>
* Kernel for stan/math/gpu/err/check_diagonal_zeros.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel void is_zero_on_diagonal(
__global read_only double *A, __global int *flag,
read_only unsigned int rows, write_only unsigned int cols) {
const int i = get_global_id(0);
if (i < rows && i < cols) {
if (A(i, i) == 0) {
flag[0] = 1;
}
}
}
// \cond
);
// \endcond
/**
* See the docs for \link kernels/check_diagonal_zeros.hpp
* check_diagonal_zeros() \endlink
*/
const global_range_kernel<cl::Buffer, cl::Buffer, int, int>
check_diagonal_zeros("is_zero_on_diagonal",
is_zero_on_diagonal_kernel_code);
} // namespace opencl_kernels
} // namespace math
} // namespace stan
#endif
#endif
R"(
#ifndef A
#define A(i, j) A[j * rows + i]
#endif
/**
* Check if the <code>matrix_gpu</code> has zeros on the diagonal
*
* @param[in] A Matrix to check.
* @param rows The number of rows for A.
* @param cols The number of cols of A.
* @param[out] flag the flag to be written to if any diagonal is zero.
*
* @note Kernel for stan/math/gpu/err/check_diagonal_zeros.hpp
*/
__kernel void is_zero_on_diagonal(__global double *A, int rows, int cols,
__global int *flag) {
const int i = get_global_id(0);
if (i < rows && i < cols) {
if (A(i, i) == 0) {
flag[0] = 1;
}
}
};)"
#ifndef STAN_MATH_GPU_KERNELS_CHECK_NAN_HPP
#define STAN_MATH_GPU_KERNELS_CHECK_NAN_HPP
#ifdef STAN_OPENCL
#include <stan/math/gpu/kernel_cl.hpp>
namespace stan {
namespace math {
namespace opencl_kernels {
// \cond
const char *is_nan_kernel_code = STRINGIFY(
// \endcond
/**
* Check if the <code>matrix_gpu</code> has NaN values
*
* @param[in] A The matrix to check.
* @param rows The number of rows in matrix A.
* @param cols The number of columns in matrix A.
* @param[out] flag the flag to be written to if any diagonal is zero.
* @note Code is a <code>const char*</code> held in
* <code>is_nan_kernel_code.</code>
* Kernel for stan/math/gpu/err/check_nan.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel void is_nan(
__global read_only double *A, __global write_only int *flag,
read_only unsigned int rows, read_only unsigned int cols) {
const int i = get_global_id(0);
const int j = get_global_id(1);
if (i < rows && j < cols) {
if (isnan(A(i, j))) {
flag[0] = 1;
}
}
}
// \cond
);
// \endcond
/**
* See the docs for \link kernels/check_nan.hpp is_nan() \endlink
*/
const global_range_kernel<cl::Buffer, cl::Buffer, int, int> check_nan(
"is_nan", is_nan_kernel_code);
} // namespace opencl_kernels
} // namespace math
} // namespace stan
#endif
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment