1 #ifndef STAN_MATH_OPENCL_OPENCL_CONTEXT_HPP 2 #define STAN_MATH_OPENCL_OPENCL_CONTEXT_HPP 5 #define DEVICE_FILTER CL_DEVICE_TYPE_ALL 6 #ifndef OPENCL_DEVICE_ID 7 #error OPENCL_DEVICE_ID_NOT_SET 9 #ifndef OPENCL_PLATFORM_ID 10 #error OPENCL_PLATFORM_ID_NOT_SET 45 inline cl::size_t<N>
to_size_t(
const size_t (&values)[N]) {
57 inline cl::size_t<3>
to_size_t(
const size_t (&values)[3]) {
59 for (
size_t i = 0; i < 3; i++)
104 if (OPENCL_PLATFORM_ID >= platforms_.size()) {
106 "CL_INVALID_PLATFORM");
108 platform_ = platforms_[OPENCL_PLATFORM_ID];
109 platform_name_ = platform_.getInfo<CL_PLATFORM_NAME>();
111 if (devices_.size() == 0) {
113 "CL_DEVICE_NOT_FOUND");
115 if (OPENCL_DEVICE_ID >= devices_.size()) {
117 "CL_INVALID_DEVICE");
119 device_ = devices_[OPENCL_DEVICE_ID];
121 cl_command_queue_properties device_properties;
122 device_.getInfo<cl_command_queue_properties>(CL_DEVICE_QUEUE_PROPERTIES,
124 device_.getInfo<
size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE,
125 &max_thread_block_size_);
127 context_ = cl::Context(device_);
128 if (device_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
129 command_queue_ = cl::CommandQueue(
130 context_, device_, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
nullptr);
132 command_queue_ = cl::CommandQueue(context_, device_, 0,
nullptr);
134 int thread_block_size_sqrt
135 =
static_cast<int>(
sqrt(static_cast<double>(max_thread_block_size_)));
140 if (thread_block_size_sqrt < base_opts_[
"THREAD_BLOCK_SIZE"]) {
141 base_opts_[
"THREAD_BLOCK_SIZE"] = thread_block_size_sqrt;
142 base_opts_[
"WORK_PER_THREAD"] = 1;
144 if (max_thread_block_size_ < base_opts_[
"LOCAL_SIZE_"]) {
146 const int p =
std::log(max_thread_block_size_)
147 /
std::log(base_opts_[
"REDUCTION_STEP_SIZE"]);
148 base_opts_[
"LOCAL_SIZE_"]
149 =
std::pow(base_opts_[
"REDUCTION_STEP_SIZE"], p);
153 if (max_thread_block_size_ >= 256) {
154 tuning_opts_.cholesky_min_L11_size = 256;
156 tuning_opts_.cholesky_min_L11_size = max_thread_block_size_;
158 }
catch (
const cl::Error&
e) {
177 map_base_opts base_opts_
183 {
"THREAD_BLOCK_SIZE", 32},
184 {
"WORK_PER_THREAD", 8},
185 {
"REDUCTION_STEP_SIZE", 4},
186 {
"LOCAL_SIZE_", 64}};
190 int cholesky_min_L11_size = 256;
191 int cholesky_partition = 4;
192 int cholesky_size_worth_transfer = 1250;
194 int cholesky_rev_min_block_size = 512;
195 int cholesky_rev_block_partition = 8;
197 int multiply_split_upper_limit = 2000000;
200 int tri_inverse_size_worth_transfer = 100;
225 std::ostringstream msg;
227 msg <<
"Platform ID: " << OPENCL_DEVICE_ID <<
"\n";
228 msg <<
"Platform Name: " 232 msg <<
"Platform Vendor: " 236 msg <<
"\tDevice " << OPENCL_DEVICE_ID <<
": " 238 msg <<
"\t\tDevice Name: " 241 msg <<
"\t\tDevice Type: " 244 msg <<
"\t\tDevice Vendor: " 246 .
device_.getInfo<CL_DEVICE_VENDOR>()
248 msg <<
"\t\tDevice Max Compute Units: " 250 .
device_.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>()
252 msg <<
"\t\tDevice Global Memory: " 254 .
device_.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()
256 msg <<
"\t\tDevice Max Clock Frequency: " 258 .
device_.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>()
260 msg <<
"\t\tDevice Max Allocateable Memory: " 262 .
device_.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>()
264 msg <<
"\t\tDevice Local Memory: " 266 .
device_.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()
268 msg <<
"\t\tDevice Available: " 270 .
device_.getInfo<CL_DEVICE_AVAILABLE>()
281 std::vector<cl::Platform> all_platforms;
283 std::ostringstream msg;
287 msg <<
"Number of Platforms: " << all_platforms.size() <<
"\n";
288 for (
auto plat_iter : all_platforms) {
289 cl::Platform platform(plat_iter);
291 msg <<
"Platform ID: " << platform_id++ <<
"\n";
292 msg <<
"Platform Name: " << platform.getInfo<CL_PLATFORM_NAME>() <<
"\n";
293 msg <<
"Platform Vendor: " << platform.getInfo<CL_PLATFORM_VENDOR>()
297 std::vector<cl::Device> all_devices;
298 platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
300 for (
auto device_iter : all_devices) {
301 cl::Device device(device_iter);
303 msg <<
"\tDevice " << device_id++ <<
": " 305 msg <<
"\t\tDevice Name: " << device.getInfo<CL_DEVICE_NAME>()
307 msg <<
"\t\tDevice Type: " << device.getInfo<CL_DEVICE_TYPE>()
309 msg <<
"\t\tDevice Vendor: " << device.getInfo<CL_DEVICE_VENDOR>()
311 msg <<
"\t\tDevice Max Compute Units: " 312 << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() <<
"\n";
313 msg <<
"\t\tDevice Global Memory: " 314 << device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() <<
"\n";
315 msg <<
"\t\tDevice Max Clock Frequency: " 316 << device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>() <<
"\n";
317 msg <<
"\t\tDevice Max Allocateable Memory: " 318 << device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() <<
"\n";
319 msg <<
"\t\tDevice Local Memory: " 320 << device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() <<
"\n";
321 msg <<
"\t\tDevice Available: " 322 << device.getInfo<CL_DEVICE_AVAILABLE>() <<
"\n";
324 }
catch (
const cl::Error&
e) {
329 msg <<
"\tno (OpenCL) devices in the platform with ID " << platform_id
383 inline std::vector<cl::Device>
device() {
The opencl_context_base class represents an OpenCL context in the standard Meyers singleton design pa...
std::vector< cl::Device > device()
Returns a vector containing the OpenCL device used to create the context.
std::string platform_name_
std::vector< cl::Platform > platforms_
std::string capabilities() const
Returns the description of the OpenCL platforms and devices that are available.
fvar< T > sqrt(const fvar< T > &x)
std::string description() const
Returns the description of the OpenCL platform and device that is used.
void system_error(const char *function, const char *name, const int &y, const char *msg1, const char *msg2)
Throw a system error with a consistently formatted message.
std::map< const char *, int > map_base_opts
static opencl_context_base & getInstance()
fvar< T > log(const fvar< T > &x)
The API to access the methods and values in opencl_context_base.
opencl_context_base::tuning_struct & tuning_opts()
Returns the thread block size for the Cholesky Decompositions L_11.
std::vector< cl::Platform > platform()
Returns a vector containing the OpenCL platform used to create the context.
cl::CommandQueue command_queue_
checking OpenCL error numbers
cl::Context & context()
Returns the reference to the OpenCL context.
T get(const std::vector< T > &x, size_t n)
Returns the n-th element of the provided std::vector.
cl::size_t< N > to_size_t(const size_t(&values)[N])
A helper function to convert an array to a cl::size_t<N>.
void domain_error(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
Throw a domain error with a consistently formatted message.
opencl_context_base::map_base_opts base_opts()
Returns a copy of the map of kernel defines.
size_t max_thread_block_size_
double e()
Return the base of the natural logarithm.
int max_thread_block_size()
Returns the maximum thread block size defined by CL_DEVICE_MAX_WORK_GROUP_SIZE for the device in the ...
static opencl_context opencl_context
struct stan::math::opencl_context_base::tuning_struct tuning_opts_
fvar< T > pow(const fvar< T > &x1, const fvar< T > &x2)
std::vector< cl::Device > devices_
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occured.
cl::CommandQueue & queue()
Returns the reference to the active OpenCL command queue for the device.