![]() |
Stan Math Library
2.20.0
reverse mode automatic differentiation
|
Namespaces | |
internal | |
Classes | |
struct | in_buffer |
struct | in_out_buffer |
struct | kernel_cl |
Creates functor for kernels. More... | |
class | kernel_functor |
Functor used for compiling kernels. More... | |
struct | out_buffer |
Functions | |
auto | compile_kernel (const char *name, const std::vector< const char *> &sources, std::map< const char *, int > &options) |
Compile an OpenCL kernel. More... | |
__kernel void | add (__global double *C, __global double *A, __global double *B, unsigned int rows, unsigned int cols) |
Matrix addition on the OpenCL device. More... | |
__kernel void | add_batch (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int batch_size) |
Sums a batch of matrices. More... | |
__kernel void | is_zero_on_diagonal (__global double *A, __global int *flag, unsigned int rows, unsigned int cols) |
Check if the matrix_cl has zeros on the diagonal. More... | |
__kernel void | is_nan (__global double *A, __global int *flag, unsigned int rows, unsigned int cols) |
Check if the matrix_cl has NaN values. More... | |
__kernel void | is_symmetric (__global double *A, __global int *flag, unsigned int rows, unsigned int cols, double tolerance) |
Check if the matrix_cl is symmetric. More... | |
__kernel void | cholesky_decompose (__global double *A, int rows) |
Calculates the Cholesky Decomposition of a matrix on an OpenCL. More... | |
__kernel void | copy (__global double *A, __global double *B, unsigned int rows, unsigned int cols) |
Copy one matrix to another. More... | |
__kernel void | copy_triangular (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int lower_upper) |
Copies the lower or upper triangular of the source matrix to the destination matrix. More... | |
__kernel void | diag_inv (__global double *A, __global double *tmp_inv, int rows) |
Calculates inplace submatrix inversions along the matrix diagonal. More... | |
__kernel void | identity (__global double *A, unsigned int rows, unsigned int cols) |
Makes an identity matrix on the OpenCL device. More... | |
__kernel void | batch_identity (__global double *A, unsigned int batch_rows, unsigned int size) |
Makes a batch of smaller identity matrices inside the input matrix. More... | |
__kernel void | inv_lower_tri_multiply (__global double *A, __global double *temp, const int A_rows, const int rows) |
Calculates B = C * A. More... | |
__kernel void | matrix_multiply (const __global double *A, const __global double *B, __global double *C, const int M, const int N, const int K, unsigned int lower_upper_A, unsigned int lower_upper_B) |
Matrix multiplication on the OpenCL device. More... | |
__kernel void | matrix_vector_multiply (const __global double *A, const __global double *B, __global double *R, const int M, const int N, unsigned int lower_upper_A, unsigned int lower_upper_B) |
Matrix-vector multiplication R=A*B on the OpenCL device. More... | |
__kernel void | row_vector_matrix_multiply (const __global double *A, const __global double *B, __global double *R, const int N, const int K, unsigned int lower_upper_A, unsigned int lower_upper_B) |
Row vector-matrix multiplication R=A*B on the OpenCL device. More... | |
__kernel void | multiply_transpose (const __global double *A, __global double *B, const int M, const int N) |
Matrix multiplication of the form A*A^T on the OpenCL device. More... | |
__kernel void | neg_rect_lower_tri_multiply (__global double *A, const __global double *temp, const int A_rows, const int rows) |
Calculates C = -B * A where B is rectangular and A is a lower triangular. More... | |
__kernel void | pack (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int part) |
Packs a flat matrix to a packed triangular matrix. More... | |
__kernel void | scalar_mul (__global double *A, const __global double *B, const double scalar, const unsigned int rows, const unsigned int cols) |
Multiplication of the matrix A with a scalar. More... | |
__kernel void | scalar_mul_diagonal (__global double *A, const double scalar, const unsigned int rows, const unsigned int min_dim) |
Multiplication of the matrix A diagonal with a scalar. More... | |
__kernel void | sub_block (__global double *src, __global double *dst, unsigned int src_offset_i, unsigned int src_offset_j, unsigned int dst_offset_i, unsigned int dst_offset_j, unsigned int size_i, unsigned int size_j, unsigned int src_rows, unsigned int src_cols, unsigned int dst_rows, unsigned int dst_cols, unsigned int triangular_view) |
Copies a submatrix of the source matrix to the destination matrix. More... | |
__kernel void | subtract (__global double *C, __global double *A, __global double *B, unsigned int rows, unsigned int cols) |
Matrix subtraction on the OpenCL device Subtracts the second matrix from the first matrix and stores the result in the third matrix (C=A-B). More... | |
__kernel void | transpose (__global double *B, __global double *A, unsigned int rows, unsigned int cols) |
Takes the transpose of the matrix on the OpenCL device. More... | |
__kernel void | triangular_transpose (__global double *A, unsigned int rows, unsigned int cols, unsigned int copy_direction) |
Copies a lower/upper triangular of a matrix to it's upper/lower. More... | |
__kernel void | unpack (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int part) |
Unpacks a packed triangular matrix to a flat matrix. More... | |
__kernel void | zeros (__global double *A, unsigned int rows, unsigned int cols, unsigned int part) |
Stores zeros in the matrix on the OpenCL device. More... | |
Variables | |
const kernel_cl< out_buffer, in_buffer, in_buffer, int, int > | add ("add", {indexing_helpers, add_kernel_code}) |
See the docs for add() . More... | |
const kernel_cl< out_buffer, in_buffer, int, int, int > | add_batch ("add_batch", {indexing_helpers, add_batch_kernel_code}) |
See the docs for add_batch() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int > | check_diagonal_zeros ("is_zero_on_diagonal", {indexing_helpers, is_zero_on_diagonal_kernel_code}) |
See the docs for check_diagonal_zeros() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int > | check_nan ("is_nan", {indexing_helpers, is_nan_kernel_code}) |
See the docs for is_nan() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int, const double > | check_symmetric ("is_symmetric", {indexing_helpers, is_symmetric_kernel_code}) |
See the docs for check_symmetric() . More... | |
const kernel_cl< in_out_buffer, int > | cholesky_decompose ("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code}) |
See the docs for cholesky_decompose() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int > | copy ("copy", {indexing_helpers, copy_kernel_code}) |
See the docs for copy() . More... | |
const kernel_cl< out_buffer, in_buffer, int, int, TriangularViewCL > | copy_triangular ("copy_triangular", {indexing_helpers, copy_triangular_kernel_code}) |
See the docs for copy_triangular() . More... | |
const kernel_cl< in_out_buffer, in_out_buffer, int > | diag_inv ("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}}) |
See the docs for add() . More... | |
static const char * | indexing_helpers |
static const char * | thread_block_helpers |
const kernel_cl< out_buffer, int, int > | identity ("identity", {indexing_helpers, identity_kernel_code}) |
See the docs for identity() . More... | |
const kernel_cl< out_buffer, int, int > | batch_identity ("batch_identity", {indexing_helpers, batch_identity_kernel_code}) |
See the docs for batch_identity() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int > | inv_lower_tri_multiply ("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
See the docs for add() . More... | |
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, int, TriangularViewCL, TriangularViewCL > | matrix_multiply ("matrix_multiply", {thread_block_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
See the docs for matrix_multiply() . More... | |
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL > | matrix_vector_multiply ("matrix_vector_multiply", matrix_vector_multiply_kernel_code) |
See the docs for matrix_vector_multiply() . More... | |
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL > | row_vector_matrix_multiply ("row_vector_matrix_multiply", row_vector_matrix_multiply_kernel_code, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}}) |
See the docs for row_vector_matrix_multiply() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int > | multiply_transpose ("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}}) |
See the docs for add() . More... | |
const kernel_cl< in_out_buffer, in_buffer, int, int > | neg_rect_lower_tri_multiply ("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
See the docs for neg_rect_lower_tri_multiply() . More... | |
const kernel_cl< out_buffer, in_buffer, int, int, TriangularViewCL > | pack ("pack", {indexing_helpers, pack_kernel_code}) |
See the docs for pack() . More... | |
const kernel_cl< out_buffer, in_buffer, double, int, int > | scalar_mul ("scalar_mul", {indexing_helpers, scalar_mul_kernel_code}) |
See the docs for add() . More... | |
const kernel_cl< in_out_buffer, double, int, int > | scalar_mul_diagonal ("scalar_mul_diagonal", {indexing_helpers, scalar_mul_diagonal_kernel_code}) |
See the docs for add() . More... | |
const kernel_cl< in_buffer, out_buffer, int, int, int, int, int, int, int, int, int, int, TriangularViewCL > | sub_block ("sub_block", {indexing_helpers, sub_block_kernel_code}) |
See the docs for sub_block() . More... | |
const kernel_cl< out_buffer, in_buffer, in_buffer, int, int > | subtract ("subtract", {indexing_helpers, subtract_kernel_code}) |
See the docs for subtract() . More... | |
const kernel_cl< out_buffer, in_buffer, int, int > | transpose ("transpose", {indexing_helpers, transpose_kernel_code}) |
See the docs for transpose() . More... | |
const kernel_cl< in_out_buffer, int, int, TriangularMapCL > | triangular_transpose ("triangular_transpose", {indexing_helpers, triangular_transpose_kernel_code}) |
See the docs for triangular_transpose() . More... | |
const kernel_cl< out_buffer, in_buffer, int, int, TriangularViewCL > | unpack ("unpack", {indexing_helpers, unpack_kernel_code}) |
See the docs for unpack() . More... | |
const kernel_cl< out_buffer, int, int, TriangularViewCL > | zeros ("zeros", {indexing_helpers, zeros_kernel_code}) |
See the docs for zeros() . More... | |
__kernel void stan::math::opencl_kernels::add | ( | __global double * | C, |
__global double * | A, | ||
__global double * | B, | ||
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Matrix addition on the OpenCL device.
[out] | C | Output matrix. |
[in] | A | LHS of matrix addition. |
[in] | B | RHS of matrix addition. |
rows | Number of rows for matrix A. | |
cols | Number of cols for matrix A. |
const char*
held in add_kernel_code.
This kernel uses the helper macros available in helpers.cl. __kernel void stan::math::opencl_kernels::add_batch | ( | __global double * | B, |
__global double * | A, | ||
unsigned int | rows, | ||
unsigned int | cols, | ||
unsigned int | batch_size | ||
) |
Sums a batch of matrices.
Buffer A contains batch_size matrices of size rows x cols. All elements at matching indices are summed up and stored to the resulting matrix B.
[out] | B | buffer of the result matrix. |
[in] | A | buffer containing the entire batch. |
rows | Number of rows for a single matrix in the batch. | |
cols | Number of cols for a single matrix in the batch. | |
batch_size | Number of matrices in the batch. |
const char*
held in add_batch_kernel_code.
This kernel uses the helper macros available in helpers.cl. __kernel void stan::math::opencl_kernels::batch_identity | ( | __global double * | A, |
unsigned int | batch_rows, | ||
unsigned int | size | ||
) |
Makes a batch of smaller identity matrices inside the input matrix.
This kernel operates inplace on the matrix A, filling it with smaller identity matrices with a size of batch_rows x batch_rows. This kernel expects a 3D organization of threads: 1st dim: the number of matrices in the batch. 2nd dim: the number of cols/rows in batch matrices. 3rd dim: the number of cols/rows in batch matrices. Each thread in the organization assigns a single value in the batch. In order to create a batch of 3 matrices the size of NxN you need to run the kernel batch_identity(A, N, 3*N*N) with (3, N, N) threads. The special case of batch_identity(A, N, N*N) executed on (1, N, N) threads creates a single identity matrix the size of NxN and is therefore equal to the basic identity kernel.
[in,out] | A | The batched identity matrix output. |
batch_rows | The number of rows/cols for the smaller matrices in the batch | |
size | The size of A. |
const char*
held in identity_kernel_code.
This kernel uses the helper macros available in helpers.cl. Definition at line 68 of file identity.hpp.
__kernel void stan::math::opencl_kernels::cholesky_decompose | ( | __global double * | A, |
int | rows | ||
) |
Calculates the Cholesky Decomposition of a matrix on an OpenCL.
This kernel is run with threads organized in one dimension and in a single thread block. The kernel is best suited for small input matrices as it only utilizes a single streaming multiprocessor. The kernels is used as a part of a blocked cholesky decompose.
[in,out] | A | The input matrix and the result of the cholesky decomposition |
rows | The number of rows for A and B. |
const char*
held in cholesky_decompose_kernel_code.
Used in math/opencl/cholesky_decompose.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 32 of file cholesky_decompose.hpp.
|
inline |
Compile an OpenCL kernel.
name | The name for the kernel |
sources | A std::vector of strings containing the code for the kernel. |
options | The values of macros to be passed at compile time. |
Definition at line 124 of file kernel_cl.hpp.
__kernel void stan::math::opencl_kernels::copy | ( | __global double * | A, |
__global double * | B, | ||
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Copy one matrix to another.
[in] | A | The matrix to copy. |
[out] | B | The matrix to copy A to. |
rows | The number of rows in A. | |
cols | The number of cols in A. |
const char*
held in copy_kernel_code.
Kernel used in math/opencl/matrix_cl.hpp. This kernel uses the helper macros available in helpers.cl. __kernel void stan::math::opencl_kernels::copy_triangular | ( | __global double * | A, |
__global double * | B, | ||
unsigned int | rows, | ||
unsigned int | cols, | ||
unsigned int | lower_upper | ||
) |
Copies the lower or upper triangular of the source matrix to the destination matrix.
Both matrices are stored on the OpenCL device.
[out] | A | Output matrix to copy triangular to. |
[in] | B | The matrix to copy the triangular from. |
rows | The number of rows of B. | |
cols | The number of cols of B. | |
lower_upper | determines which part of the matrix to copy: LOWER: 0 - copies the lower triangular UPPER: 1 - copes the upper triangular |
const char*
held in copy_triangular_kernel_code.
Used in math/opencl/copy_triangular_opencl.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 33 of file copy_triangular.hpp.
__kernel void stan::math::opencl_kernels::diag_inv | ( | __global double * | A, |
__global double * | tmp_inv, | ||
int | rows | ||
) |
Calculates inplace submatrix inversions along the matrix diagonal.
For a full guide to the inverse lower triangular kernels see the link here. In the special case that the thread block size is larger than the input matrix A then this kernel will perform the complete lower triangular of matrix A. More often, TB is smaller than A and A will have lower triangular inverses calculated on submatrices along the diagonal equal to the size of the thread block. Running this kernel on a matrix with N = 4
This kernel is run with threads organized in a single dimension. If we want to calculate N blocks of size TB across the diagonal we spawn N x TB threads with TB used as the thread block size.
[in,out] | A | The input matrix. |
[in,out] | tmp_inv | A matrix with batches of identities matrices along the diagonal. |
rows | The number of rows for A. |
const char*
held in diag_inv_kernel_code.
Used in math/opencl/tri_inverse.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 42 of file diag_inv.hpp.
__kernel void stan::math::opencl_kernels::identity | ( | __global double * | A, |
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Makes an identity matrix on the OpenCL device.
[in,out] | A | The identity matrix output. |
rows | The number of rows for A. | |
cols | The number of cols for A. |
const char*
held in identity_kernel_code.
Used in math/opencl/identity_opencl.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 25 of file identity.hpp.
__kernel void stan::math::opencl_kernels::inv_lower_tri_multiply | ( | __global double * | A, |
__global double * | temp, | ||
const int | A_rows, | ||
const int | rows | ||
) |
Calculates B = C * A.
C is an inverse matrix and A is lower triangular.
This kernel is used in the final iteration of the batched lower triangular inversion. For a full guide to the inverse lower triangular kernels see the link here. The full inverse requires calculation of the lower left rectangular matrix within the lower left triangular C3 = -C2*A3*C1. where C2 is the inverse of the bottom right lower triangular, C1 is the inverse of the upper left lower and A3 is the original lower triangulars lower left rectangular. This kernel takes the output from neg_rect_lower_tri_multiply
and applies the submatrix multiplcation to get the final output for C3.
Graphically, this kernel calculates the C2 * A3. The kernel is executed using (N, N, m) threads, where N is the size of the input matrices.
[in] | A | input matrix that is being inverted. |
[out] | temp | output matrix with results of the batched matrix multiplications |
A_rows | The number of rows for A. | |
rows | The number of rows in a single matrix of the batch |
const char*
held in inv_lower_tri_multiply_kernel_code.
Used in math/opencl/tri_inverse.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 45 of file inv_lower_tri_multiply.hpp.
__kernel void stan::math::opencl_kernels::is_nan | ( | __global double * | A, |
__global int * | flag, | ||
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Check if the matrix_cl
has NaN values.
[in] | A | The matrix to check. |
rows | The number of rows in matrix A. | |
cols | The number of columns in matrix A. | |
[out] | flag | the flag to be written to if any diagonal is zero. |
const char*
held in is_nan_kernel_code.
Kernel for stan/math/opencl/err/check_nan.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 26 of file check_nan.hpp.
__kernel void stan::math::opencl_kernels::is_symmetric | ( | __global double * | A, |
__global int * | flag, | ||
unsigned int | rows, | ||
unsigned int | cols, | ||
double | tolerance | ||
) |
Check if the matrix_cl
is symmetric.
[in] | A | The matrix to check. |
rows | The number of rows in matrix A. | |
cols | The number of columns in matrix A. | |
[out] | flag | the flag to be written to if any diagonal is zero. |
tolerance | The numerical tolerance to check wheter two values are equal |
const char*
held in is_symmetric_kernel_code.
Kernel for stan/math/opencl/err/check_symmetric.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 28 of file check_symmetric.hpp.
__kernel void stan::math::opencl_kernels::is_zero_on_diagonal | ( | __global double * | A, |
__global int * | flag, | ||
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Check if the matrix_cl
has zeros on the diagonal.
[in] | A | Matrix to check. |
[out] | flag | the flag to be written to if any diagonal is zero. |
rows | The number of rows for A. | |
cols | The number of cols of A. |
const char*
held in is_zero_on_diagonal_kernel_code.
Kernel for stan/math/opencl/err/check_diagonal_zeros.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 26 of file check_diagonal_zeros.hpp.
__kernel void stan::math::opencl_kernels::matrix_multiply | ( | const __global double * | A, |
const __global double * | B, | ||
__global double * | C, | ||
const int | M, | ||
const int | N, | ||
const int | K, | ||
unsigned int | lower_upper_A, | ||
unsigned int | lower_upper_B | ||
) |
Matrix multiplication on the OpenCL device.
[in] | A | the left matrix in matrix multiplication |
[in] | B | the right matrix in matrix multiplication |
[out] | C | the output matrix |
[in] | M | Number of rows for matrix A |
[in] | N | Number of cols for matrix B |
[in] | K | Number of cols for matrix A and number of rows for matrix B |
[in] | lower_upper_A | the triangularity of A (lower, upper or none) |
[in] | lower_upper_B | the triangularity of B (lower, upper or none) |
Definition at line 26 of file matrix_multiply.hpp.
__kernel void stan::math::opencl_kernels::matrix_vector_multiply | ( | const __global double * | A, |
const __global double * | B, | ||
__global double * | R, | ||
const int | M, | ||
const int | N, | ||
unsigned int | lower_upper_A, | ||
unsigned int | lower_upper_B | ||
) |
Matrix-vector multiplication R=A*B on the OpenCL device.
[in] | A | matrix in matrix-vector multiplication |
[in] | B | vector in matrix-vector multiplication |
[out] | R | the output vector |
[in] | M | Number of rows for matrix A |
[in] | N | Number of cols for matrix A and number of rows for vector B |
[in] | lower_upper_A | the triangularity of A (lower, upper or none) |
[in] | lower_upper_B | the triangularity of B (lower, upper or none) |
Definition at line 185 of file matrix_multiply.hpp.
__kernel void stan::math::opencl_kernels::multiply_transpose | ( | const __global double * | A, |
__global double * | B, | ||
const int | M, | ||
const int | N | ||
) |
Matrix multiplication of the form A*A^T on the OpenCL device.
[in] | A | matrix A |
[out] | B | the output matrix |
[in] | M | Number of rows for matrix A |
[in] | N | Number of cols for matrix A and the number of rows for matrix A^T |
Definition at line 23 of file multiply_transpose.hpp.
__kernel void stan::math::opencl_kernels::neg_rect_lower_tri_multiply | ( | __global double * | A, |
const __global double * | temp, | ||
const int | A_rows, | ||
const int | rows | ||
) |
Calculates C = -B * A where B is rectangular and A is a lower triangular.
For a full guide to the inverse lower triangular kernels see the link here.
Graphically, this kernel calculates -temp * C1
where temp is the C2 * A3 calculation from inv_lower_tri_multiply() The kernel is executed using (N, N, m) threads, where N is the size of the input matrices.
[in,out] | A | Input matrix that is being inverted. |
[in] | temp | Temporary matrix with the intermediate results. |
A_rows | Number of rows for A. | |
rows | The number of rows in a single matrix of the batch |
const char*
held in neg_rect_lower_tri_multiply_kernel_code Used in math/opencl/tri_inverse.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 39 of file neg_rect_lower_tri_multiply.hpp.
__kernel void stan::math::opencl_kernels::pack | ( | __global double * | A, |
__global double * | B, | ||
unsigned int | rows, | ||
unsigned int | cols, | ||
unsigned int | part | ||
) |
Packs a flat matrix to a packed triangular matrix.
[out] | A | packed buffer |
[in] | B | flat matrix |
rows | number of columns for matrix B | |
cols | number of columns for matrix B | |
part | parameter that defines the triangularity of the input matrix LOWER - lower triangular UPPER - upper triangular if the part parameter is not specified |
const char*
held in pack_kernel_code.
This kernel uses the helper macros available in helpers.cl. __kernel void stan::math::opencl_kernels::row_vector_matrix_multiply | ( | const __global double * | A, |
const __global double * | B, | ||
__global double * | R, | ||
const int | N, | ||
const int | K, | ||
unsigned int | lower_upper_A, | ||
unsigned int | lower_upper_B | ||
) |
Row vector-matrix multiplication R=A*B on the OpenCL device.
[in] | A | row vector in row vector-matrix multiplication |
[in] | B | matrix in row vector-matrix multiplication |
[out] | R | the output vector |
[in] | N | Number of cols for row vector A and number of rows for matrix B |
[in] | K | Number of cols for matrix B |
[in] | lower_upper_A | the triangularity of A (lower, upper or none) |
[in] | lower_upper_B | the triangularity of B (lower, upper or none) |
Definition at line 229 of file matrix_multiply.hpp.
__kernel void stan::math::opencl_kernels::scalar_mul | ( | __global double * | A, |
const __global double * | B, | ||
const double | scalar, | ||
const unsigned int | rows, | ||
const unsigned int | cols | ||
) |
Multiplication of the matrix A with a scalar.
[out] | A | output matrix |
[in] | B | input matrix |
[in] | scalar | the value with which to multiply A |
[in] | rows | the number of rows in A |
[in] | cols | the number of columns in A |
Definition at line 23 of file scalar_mul.hpp.
__kernel void stan::math::opencl_kernels::scalar_mul_diagonal | ( | __global double * | A, |
const double | scalar, | ||
const unsigned int | rows, | ||
const unsigned int | min_dim | ||
) |
Multiplication of the matrix A diagonal with a scalar.
[in,out] | A | matrix A |
[in] | scalar | the value with which to multiply the diagonal of A |
[in] | rows | the number of rows in A |
[in] | min_dim | the size of the smaller dimension of A |
Definition at line 22 of file scalar_mul_diagonal.hpp.
__kernel void stan::math::opencl_kernels::sub_block | ( | __global double * | src, |
__global double * | dst, | ||
unsigned int | src_offset_i, | ||
unsigned int | src_offset_j, | ||
unsigned int | dst_offset_i, | ||
unsigned int | dst_offset_j, | ||
unsigned int | size_i, | ||
unsigned int | size_j, | ||
unsigned int | src_rows, | ||
unsigned int | src_cols, | ||
unsigned int | dst_rows, | ||
unsigned int | dst_cols, | ||
unsigned int | triangular_view | ||
) |
Copies a submatrix of the source matrix to the destination matrix.
The submatrix to copy starts at (0, 0) and is of size size_rows x size_cols. The submatrix is copied to the destination matrix starting at (dst_offset_rows, dst_offset_cols)
[in] | src | The source matrix. |
[out] | dst | The destination submatrix. |
src_offset_i | The offset row in src. | |
src_offset_j | The offset column in src. | |
dst_offset_i | The offset row in dst. | |
dst_offset_j | The offset column in dst. | |
size_i | The number of rows in the submatrix. | |
size_j | The number of columns in the submatrix. | |
src_rows | The number of rows in the source matrix. | |
src_cols | The number of cols in the source matrix. | |
src_rows | The number of rows in the destination matrix. | |
dst_cols | The number of cols in the destination matrix. | |
dst_rows | The number of rows in the destination matrix. | |
triangular_view | the triangularity of src (lower, upper or none) |
const char*
held in sub_block_kernel_code.
Used in math/opencl/copy_submatrix_opencl.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 44 of file sub_block.hpp.
__kernel void stan::math::opencl_kernels::subtract | ( | __global double * | C, |
__global double * | A, | ||
__global double * | B, | ||
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Matrix subtraction on the OpenCL device Subtracts the second matrix from the first matrix and stores the result in the third matrix (C=A-B).
[out] | C | The output matrix. |
[in] | B | RHS input matrix. |
[in] | A | LHS input matrix. |
rows | The number of rows for matrix A. | |
cols | The number of columns for matrix A. |
const char*
held in subtract_kernel_code.
Used in math/opencl/subtract_opencl.hpp This kernel uses the helper macros available in helpers.cl. Definition at line 30 of file subtract.hpp.
__kernel void stan::math::opencl_kernels::transpose | ( | __global double * | B, |
__global double * | A, | ||
unsigned int | rows, | ||
unsigned int | cols | ||
) |
Takes the transpose of the matrix on the OpenCL device.
[out] | B | The output matrix to hold transpose of A. |
[in] | A | The input matrix to transpose into B. |
rows | The number of rows for A. | |
cols | The number of columns for A. |
const char*
held in transpose_kernel_code.
This kernel uses the helper macros available in helpers.cl. Definition at line 25 of file transpose.hpp.
__kernel void stan::math::opencl_kernels::triangular_transpose | ( | __global double * | A, |
unsigned int | rows, | ||
unsigned int | cols, | ||
unsigned int | copy_direction | ||
) |
Copies a lower/upper triangular of a matrix to it's upper/lower.
[in,out] | A | The matrix. |
rows | The number of rows in A. | |
cols | The number of cols in A. | |
copy_direction | A value of zero or one specifying which direction to copy LOWER_TO_UPPER: 1 UPPER_TO_LOWER: 0 |
const char*
held in triangular_transpose_kernel_code.
Used in mat/opencl/triangular_transpose.hpp. This kernel uses the helper macros available in helpers.cl. Definition at line 29 of file triangular_transpose.hpp.
__kernel void stan::math::opencl_kernels::unpack | ( | __global double * | B, |
__global double * | A, | ||
unsigned int | rows, | ||
unsigned int | cols, | ||
unsigned int | part | ||
) |
Unpacks a packed triangular matrix to a flat matrix.
[out] | B | flat matrix |
[in] | A | packed buffer |
rows | number of columns for matrix B | |
cols | number of columns for matrix B | |
part | parameter that defines the triangularity of the input matrix LOWER - lower triangular UPPER - upper triangular if the part parameter is not specified |
const char*
held in unpack_kernel_code.
This kernel uses the helper macros available in helpers.cl. Definition at line 31 of file unpack.hpp.
__kernel void stan::math::opencl_kernels::zeros | ( | __global double * | A, |
unsigned int | rows, | ||
unsigned int | cols, | ||
unsigned int | part | ||
) |
Stores zeros in the matrix on the OpenCL device.
Supports writing zeroes to the lower and upper triangular or the whole matrix.
[out] | A | matrix |
rows | Number of rows for matrix A | |
cols | Number of columns for matrix A | |
part | optional parameter that describes where to assign zeros: LOWER - lower triangular UPPER - upper triangular if the part parameter is not specified, zeros are assigned to the whole matrix. |
const char*
held in zeros_kernel_code.
This kernel uses the helper macros available in helpers.cl. const kernel_cl<out_buffer, in_buffer, in_buffer, int, int> stan::math::opencl_kernels::add("add", {indexing_helpers, add_kernel_code}) |
See the docs for add() .
const kernel_cl<out_buffer, in_buffer, int, int, int> stan::math::opencl_kernels::add_batch("add_batch", {indexing_helpers, add_batch_kernel_code}) |
See the docs for add_batch() .
const kernel_cl<out_buffer, int, int> stan::math::opencl_kernels::batch_identity("batch_identity", {indexing_helpers, batch_identity_kernel_code}) |
See the docs for batch_identity() .
const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::check_diagonal_zeros("is_zero_on_diagonal", {indexing_helpers, is_zero_on_diagonal_kernel_code}) |
See the docs for check_diagonal_zeros() .
const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::check_nan("is_nan", {indexing_helpers, is_nan_kernel_code}) |
See the docs for is_nan() .
const kernel_cl<in_buffer, out_buffer, int, int, const double> stan::math::opencl_kernels::check_symmetric("is_symmetric", {indexing_helpers, is_symmetric_kernel_code}) |
See the docs for check_symmetric() .
const kernel_cl<in_out_buffer, int> stan::math::opencl_kernels::cholesky_decompose("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code}) |
See the docs for cholesky_decompose() .
const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::copy("copy", {indexing_helpers, copy_kernel_code}) |
See the docs for copy() .
const kernel_cl<out_buffer, in_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::copy_triangular("copy_triangular", {indexing_helpers, copy_triangular_kernel_code}) |
See the docs for copy_triangular() .
const kernel_cl<in_out_buffer, in_out_buffer, int> stan::math::opencl_kernels::diag_inv("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}}) |
See the docs for add() .
const kernel_cl<out_buffer, int, int> stan::math::opencl_kernels::identity("identity", {indexing_helpers, identity_kernel_code}) |
See the docs for identity() .
|
static |
Definition at line 14 of file helpers.hpp.
const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::inv_lower_tri_multiply("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
See the docs for add() .
const kernel_cl<in_buffer, in_buffer, out_buffer, int, int, int, TriangularViewCL, TriangularViewCL> stan::math::opencl_kernels::matrix_multiply("matrix_multiply", {thread_block_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
See the docs for matrix_multiply() .
const kernel_cl<in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL> stan::math::opencl_kernels::matrix_vector_multiply("matrix_vector_multiply", matrix_vector_multiply_kernel_code) |
See the docs for matrix_vector_multiply() .
const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::multiply_transpose("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}}) |
See the docs for add() .
const kernel_cl<in_out_buffer, in_buffer, int, int> stan::math::opencl_kernels::neg_rect_lower_tri_multiply("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}}) |
See the docs for neg_rect_lower_tri_multiply() .
const kernel_cl<out_buffer, in_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::pack("pack", {indexing_helpers, pack_kernel_code}) |
See the docs for pack() .
const kernel_cl<in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL> stan::math::opencl_kernels::row_vector_matrix_multiply("row_vector_matrix_multiply", row_vector_matrix_multiply_kernel_code, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}}) |
See the docs for row_vector_matrix_multiply() .
const kernel_cl<out_buffer, in_buffer, double, int, int> stan::math::opencl_kernels::scalar_mul("scalar_mul", {indexing_helpers, scalar_mul_kernel_code}) |
See the docs for add() .
const kernel_cl<in_out_buffer, double, int, int> stan::math::opencl_kernels::scalar_mul_diagonal("scalar_mul_diagonal", {indexing_helpers, scalar_mul_diagonal_kernel_code}) |
See the docs for add() .
const kernel_cl<in_buffer, out_buffer, int, int, int, int, int, int, int, int, int, int, TriangularViewCL> stan::math::opencl_kernels::sub_block("sub_block", {indexing_helpers, sub_block_kernel_code}) |
See the docs for sub_block() .
const kernel_cl<out_buffer, in_buffer, in_buffer, int, int> stan::math::opencl_kernels::subtract("subtract", {indexing_helpers, subtract_kernel_code}) |
See the docs for subtract() .
|
static |
Definition at line 48 of file helpers.hpp.
const kernel_cl<out_buffer, in_buffer, int, int> stan::math::opencl_kernels::transpose("transpose", {indexing_helpers, transpose_kernel_code}) |
See the docs for transpose() .
const kernel_cl<in_out_buffer, int, int, TriangularMapCL> stan::math::opencl_kernels::triangular_transpose("triangular_transpose", {indexing_helpers, triangular_transpose_kernel_code}) |
See the docs for triangular_transpose() .
const kernel_cl<out_buffer, in_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::unpack("unpack", {indexing_helpers, unpack_kernel_code}) |
See the docs for unpack() .
const kernel_cl<out_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::zeros("zeros", {indexing_helpers, zeros_kernel_code}) |
See the docs for zeros() .