1 #ifndef STAN_MATH_OPENCL_MULTIPLY_HPP 2 #define STAN_MATH_OPENCL_MULTIPLY_HPP 40 if (A.
size() == 0 || B.
size() == 0) {
50 cl::NDRange(temp.cols() * local_size), cl::NDRange(local_size), A, B,
51 temp, B.
rows(), B.cols(), triangular_view_A, triangular_view_B);
52 }
catch (cl::Error&
e) {
60 cl::NDRange(temp.rows()), A, B, temp, A.
rows(), A.
cols(),
61 triangular_view_A, triangular_view_B);
62 }
catch (cl::Error&
e) {
69 const int Mpad = ((A.
rows() + local - 1) / local) * local;
70 const int Npad = ((B.
cols() + local - 1) / local) * local;
86 cl::NDRange(local, local / wpt), A, B,
88 triangular_view_A, triangular_view_B);
92 cl::NDRange(local, local / wpt, 1), A, B,
94 triangular_view_A, triangular_view_B);
96 tempSplit, A.
rows(), B.
cols(), split);
98 }
catch (cl::Error&
e) {
120 }
catch (
const cl::Error&
e) {
fvar< T > sqrt(const fvar< T > &x)
fvar< T > operator*(const fvar< T > &x, const fvar< T > &y)
Return the product of the two arguments.
int multiply_split_upper_limit
The API to access the methods and values in opencl_context_base.
const kernel_cl< out_buffer, in_buffer, double, int, int > scalar_mul("scalar_mul", {indexing_helpers, scalar_mul_kernel_code})
See the docs for add() .
The matrix_cl class - allocates memory space on the OpenCL device, functions for transfering matrices...
opencl_context_base::tuning_struct & tuning_opts()
Returns the thread block size for the Cholesky Decompositions L_11.
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
Check if the provided sizes match.
auto multiply(const matrix_cl &A, const matrix_cl &B)
Computes the product of the specified matrices with the option of specifying the triangularity of eit...
void zeros()
Stores zeros in the matrix on the OpenCL device.
Represents a matrix on the OpenCL device.
checking OpenCL error numbers
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL > row_vector_matrix_multiply("row_vector_matrix_multiply", row_vector_matrix_multiply_kernel_code, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}})
See the docs for row_vector_matrix_multiply() .
const kernel_cl< out_buffer, in_buffer, int, int, int > add_batch("add_batch", {indexing_helpers, add_batch_kernel_code})
See the docs for add_batch() .
double e()
Return the base of the natural logarithm.
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, int, TriangularViewCL, TriangularViewCL > matrix_multiply("matrix_multiply", {thread_block_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
See the docs for matrix_multiply() .
void check_opencl_error(const char *function, const cl::Error &e)
Throws the domain error with specifying the OpenCL error that occured.
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL > matrix_vector_multiply("matrix_vector_multiply", matrix_vector_multiply_kernel_code)
See the docs for matrix_vector_multiply() .