Stan Math Library  2.20.0
reverse mode automatic differentiation
kernel_cl.hpp
Go to the documentation of this file.
1 #ifndef STAN_MATH_OPENCL_KERNEL_CL_HPP
2 #define STAN_MATH_OPENCL_KERNEL_CL_HPP
3 #ifdef STAN_OPENCL
10 #include <CL/cl.hpp>
11 #include <string>
12 #include <algorithm>
13 #include <map>
14 #include <vector>
15 #include <utility>
16 
17 // Used for importing the OpenCL kernels at compile time.
18 // There has been much discussion about the best ways to do this:
19 // https://github.com/bstatcomp/math/pull/7
20 // and https://github.com/stan-dev/math/pull/966
21 #ifndef STRINGIFY
22 #define STRINGIFY(src) #src
23 #endif
24 
25 namespace stan {
26 namespace math {
27 namespace opencl_kernels {
28 namespace internal {
37 template <typename T>
38 inline const T& get_kernel_args(const T& t) {
39  return t;
40 }
41 
42 inline const cl::Buffer& get_kernel_args(const stan::math::matrix_cl& m) {
43  return m.buffer();
44 }
45 
46 template <typename T>
47 inline void assign_event(const cl::Event&, to_const_matrix_cl_t<T>&) {}
48 
49 template <>
50 inline void assign_event<in_buffer>(const cl::Event& e,
51  const stan::math::matrix_cl& m) {
52  m.add_read_event(e);
53 }
54 
55 template <>
56 inline void assign_event<out_buffer>(const cl::Event& e,
57  const stan::math::matrix_cl& m) {
58  m.add_write_event(e);
59 }
60 
61 template <>
62 inline void assign_event<in_out_buffer>(const cl::Event& e,
63  const stan::math::matrix_cl& m) {
64  m.add_read_write_event(e);
65 }
66 
67 template <typename T,
68  typename std::enable_if_t<std::is_same<T, cl::Event>::value, int> = 0>
69 inline void assign_events(const T&) {}
70 
84 template <typename Arg, typename... Args>
85 inline void assign_events(const cl::Event& new_event,
87  to_const_matrix_cl_t<Args>&... args) {
88  assign_event<Arg>(new_event, m);
89  assign_events<Args...>(new_event, args...);
90 }
91 
92 template <typename T>
93 inline const std::vector<cl::Event> select_events(to_const_matrix_cl_t<T>& t) {
94  return std::vector<cl::Event>();
95 }
96 
97 template <>
98 inline const std::vector<cl::Event> select_events<in_buffer>(
99  const stan::math::matrix_cl& m) {
100  return m.write_events();
101 }
102 
103 template <>
104 inline const std::vector<cl::Event> select_events<out_buffer>(
105  const stan::math::matrix_cl& m) {
106  return m.read_write_events();
107 }
108 
109 template <>
110 inline const std::vector<cl::Event> select_events<in_out_buffer>(
111  const stan::math::matrix_cl& m) {
112  return m.read_write_events();
113 }
114 
115 } // namespace internal
116 
124 inline auto compile_kernel(const char* name,
125  const std::vector<const char*>& sources,
126  std::map<const char*, int>& options) {
127  std::string kernel_opts = "";
128  for (auto&& comp_opts : options) {
129  kernel_opts += std::string(" -D") + comp_opts.first + "="
130  + std::to_string(comp_opts.second);
131  }
132  std::string kernel_source;
133  for (const char* source : sources) {
134  kernel_source.append(source);
135  }
136  cl::Program program;
137  try {
138  cl::Program::Sources src(1, std::make_pair(kernel_source.c_str(),
139  strlen(kernel_source.c_str())));
140  program = cl::Program(opencl_context.context(), src);
141  program.build({opencl_context.device()}, kernel_opts.c_str());
142 
143  return cl::Kernel(program, name);
144  } catch (const cl::Error& e) {
145  // in case of CL_BUILD_PROGRAM_FAILURE, print the build error
146  if (e.err() == -11) {
147  std::string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(
148  opencl_context.device()[0]);
149  system_error("compile_kernel", name, e.err(), buildlog.c_str());
150  } else {
151  check_opencl_error(name, e);
152  }
153  }
154  return cl::Kernel(); // never reached because check_opencl_error throws
155 }
156 
162 template <typename... Args>
164  private:
165  cl::Kernel kernel_;
166  std::map<const char*, int> opts_;
167 
168  public:
175  kernel_functor(const char* name, const std::vector<const char*>& sources,
176  const std::map<const char*, int>& options) {
177  auto base_opts = opencl_context.base_opts();
178  for (auto& it : options) {
179  if (base_opts[it.first] > it.second) {
180  base_opts[it.first] = it.second;
181  }
182  }
183  kernel_ = compile_kernel(name, sources, base_opts);
184  opts_ = base_opts;
185  }
186 
187  auto operator()() const { return cl::make_kernel<Args...>(kernel_); }
188 
192  inline const std::map<const char*, int>& get_opts() const { return opts_; }
193 };
194 
200 template <typename... Args>
201 struct kernel_cl {
210  kernel_cl(const char* name, const char* source,
211  const std::map<const char*, int>& options = {})
212  : make_functor(name, {source}, options) {}
220  kernel_cl(const char* name, const std::vector<const char*>& sources,
221  const std::map<const char*, int>& options = {})
222  : make_functor(name, sources, options) {}
229  auto operator()(cl::NDRange global_thread_size,
230  internal::to_const_matrix_cl_t<Args>&... args) const {
231  auto f = make_functor();
232  const std::vector<cl::Event> kernel_events
233  = vec_concat(internal::select_events<Args>(args)...);
234  cl::EnqueueArgs eargs(opencl_context.queue(), kernel_events,
235  global_thread_size);
236  cl::Event kern_event = f(eargs, internal::get_kernel_args(args)...);
237  internal::assign_events<Args...>(kern_event, args...);
238  return kern_event;
239  }
240 
248  auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size,
249  internal::to_const_matrix_cl_t<Args>&... args) const {
250  auto f = make_functor();
251  const std::vector<cl::Event> kernel_events
252  = vec_concat(internal::select_events<Args>(args)...);
253  cl::EnqueueArgs eargs(opencl_context.queue(), kernel_events,
254  global_thread_size, thread_block_size);
255  cl::Event kern_event = f(eargs, internal::get_kernel_args(args)...);
256  internal::assign_events<Args...>(kern_event, args...);
257  return kern_event;
258  }
259 };
260 
261 } // namespace opencl_kernels
262 } // namespace math
263 } // namespace stan
264 
265 #endif
266 #endif
std::vector< cl::Device > device()
Returns a vector containing the OpenCL device used to create the context.
const std::vector< T > & vec_concat(const std::vector< T > &v1)
Ends the recursion to extract the event stack.
Definition: vec_concat.hpp:17
void assign_event< in_out_buffer >(const cl::Event &e, const stan::math::matrix_cl &m)
Definition: kernel_cl.hpp:62
void system_error(const char *function, const char *name, const int &y, const char *msg1, const char *msg2)
Throw a system error with a consistently formatted message.
void assign_event< in_buffer >(const cl::Event &e, const stan::math::matrix_cl &m)
Definition: kernel_cl.hpp:50
const cl::Buffer & buffer() const
Definition: matrix_cl.hpp:170
auto compile_kernel(const char *name, const std::vector< const char *> &sources, std::map< const char *, int > &options)
Compile an OpenCL kernel.
Definition: kernel_cl.hpp:124
const std::vector< cl::Event > select_events(to_const_matrix_cl_t< T > &t)
Definition: kernel_cl.hpp:93
The API to access the methods and values in opencl_context_base.
void assign_event< out_buffer >(const cl::Event &e, const stan::math::matrix_cl &m)
Definition: kernel_cl.hpp:56
The matrix_cl class - allocates memory space on the OpenCL device, functions for transfering matrices...
const std::vector< cl::Event > select_events< out_buffer >(const stan::math::matrix_cl &m)
Definition: kernel_cl.hpp:104
auto operator()(cl::NDRange global_thread_size, internal::to_const_matrix_cl_t< Args > &... args) const
Executes a kernel.
Definition: kernel_cl.hpp:229
Functor used for compiling kernels.
Definition: kernel_cl.hpp:163
const kernel_functor< internal::to_const_buffer_t< Args > &... > make_functor
Definition: kernel_cl.hpp:202
const typename internal::to_matrix_cl< T >::type to_const_matrix_cl_t
void assign_event(const cl::Event &, to_const_matrix_cl_t< T > &)
Definition: kernel_cl.hpp:47
Represents a matrix on the OpenCL device.
Definition: matrix_cl.hpp:29
checking OpenCL error numbers
const std::vector< cl::Event > select_events< in_out_buffer >(const stan::math::matrix_cl &m)
Definition: kernel_cl.hpp:110
kernel_cl(const char *name, const char *source, const std::map< const char *, int > &options={})
Creates functor for kernels that only need access to defining the global work size.
Definition: kernel_cl.hpp:210
cl::Context & context()
Returns the reference to the OpenCL context.
Initialization for OpenCL:
opencl_context_base::map_base_opts base_opts()
Returns a copy of the map of kernel defines.
double e()
Return the base of the natural logarithm.
Definition: constants.hpp:87
auto operator()(cl::NDRange global_thread_size, cl::NDRange thread_block_size, internal::to_const_matrix_cl_t< Args > &... args) const
Executes a kernel.
Definition: kernel_cl.hpp:248
Creates functor for kernels.
Definition: kernel_cl.hpp:201
const std::map< const char *, int > & get_opts() const
Definition: kernel_cl.hpp:192
kernel_cl(const char *name, const std::vector< const char *> &sources, const std::map< const char *, int > &options={})
Creates functor for kernels that only need access to defining the global work size.
Definition: kernel_cl.hpp:220
const std::vector< cl::Event > select_events< in_buffer >(const stan::math::matrix_cl &m)
Definition: kernel_cl.hpp:98
kernel_functor(const char *name, const std::vector< const char *> &sources, const std::map< const char *, int > &options)
functor to access the kernel compiler.
Definition: kernel_cl.hpp:175
const T & get_kernel_args(const T &t)
Extracts the kernel&#39;s arguments, used in the global and local kernel constructor. ...
Definition: kernel_cl.hpp:38
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occured.
cl::CommandQueue & queue()
Returns the reference to the active OpenCL command queue for the device.

     [ Stan Home Page ] © 2011–2018, Stan Development Team.