Stan Math Library  2.20.0
reverse mode automatic differentiation
Namespaces | Classes | Functions | Variables
stan::math::opencl_kernels Namespace Reference

Namespaces

 internal
 

Classes

struct  in_buffer
 
struct  in_out_buffer
 
struct  kernel_cl
 Creates functor for kernels. More...
 
class  kernel_functor
 Functor used for compiling kernels. More...
 
struct  out_buffer
 

Functions

auto compile_kernel (const char *name, const std::vector< const char *> &sources, std::map< const char *, int > &options)
 Compile an OpenCL kernel. More...
 
__kernel void add (__global double *C, __global double *A, __global double *B, unsigned int rows, unsigned int cols)
 Matrix addition on the OpenCL device. More...
 
__kernel void add_batch (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int batch_size)
 Sums a batch of matrices. More...
 
__kernel void is_zero_on_diagonal (__global double *A, __global int *flag, unsigned int rows, unsigned int cols)
 Check if the matrix_cl has zeros on the diagonal. More...
 
__kernel void is_nan (__global double *A, __global int *flag, unsigned int rows, unsigned int cols)
 Check if the matrix_cl has NaN values. More...
 
__kernel void is_symmetric (__global double *A, __global int *flag, unsigned int rows, unsigned int cols, double tolerance)
 Check if the matrix_cl is symmetric. More...
 
__kernel void cholesky_decompose (__global double *A, int rows)
 Calculates the Cholesky Decomposition of a matrix on an OpenCL. More...
 
__kernel void copy (__global double *A, __global double *B, unsigned int rows, unsigned int cols)
 Copy one matrix to another. More...
 
__kernel void copy_triangular (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int lower_upper)
 Copies the lower or upper triangular of the source matrix to the destination matrix. More...
 
__kernel void diag_inv (__global double *A, __global double *tmp_inv, int rows)
 Calculates inplace submatrix inversions along the matrix diagonal. More...
 
__kernel void identity (__global double *A, unsigned int rows, unsigned int cols)
 Makes an identity matrix on the OpenCL device. More...
 
__kernel void batch_identity (__global double *A, unsigned int batch_rows, unsigned int size)
 Makes a batch of smaller identity matrices inside the input matrix. More...
 
__kernel void inv_lower_tri_multiply (__global double *A, __global double *temp, const int A_rows, const int rows)
 Calculates B = C * A. More...
 
__kernel void matrix_multiply (const __global double *A, const __global double *B, __global double *C, const int M, const int N, const int K, unsigned int lower_upper_A, unsigned int lower_upper_B)
 Matrix multiplication on the OpenCL device. More...
 
__kernel void matrix_vector_multiply (const __global double *A, const __global double *B, __global double *R, const int M, const int N, unsigned int lower_upper_A, unsigned int lower_upper_B)
 Matrix-vector multiplication R=A*B on the OpenCL device. More...
 
__kernel void row_vector_matrix_multiply (const __global double *A, const __global double *B, __global double *R, const int N, const int K, unsigned int lower_upper_A, unsigned int lower_upper_B)
 Row vector-matrix multiplication R=A*B on the OpenCL device. More...
 
__kernel void multiply_transpose (const __global double *A, __global double *B, const int M, const int N)
 Matrix multiplication of the form A*A^T on the OpenCL device. More...
 
__kernel void neg_rect_lower_tri_multiply (__global double *A, const __global double *temp, const int A_rows, const int rows)
 Calculates C = -B * A where B is rectangular and A is a lower triangular. More...
 
__kernel void pack (__global double *A, __global double *B, unsigned int rows, unsigned int cols, unsigned int part)
 Packs a flat matrix to a packed triangular matrix. More...
 
__kernel void scalar_mul (__global double *A, const __global double *B, const double scalar, const unsigned int rows, const unsigned int cols)
 Multiplication of the matrix A with a scalar. More...
 
__kernel void scalar_mul_diagonal (__global double *A, const double scalar, const unsigned int rows, const unsigned int min_dim)
 Multiplication of the matrix A diagonal with a scalar. More...
 
__kernel void sub_block (__global double *src, __global double *dst, unsigned int src_offset_i, unsigned int src_offset_j, unsigned int dst_offset_i, unsigned int dst_offset_j, unsigned int size_i, unsigned int size_j, unsigned int src_rows, unsigned int src_cols, unsigned int dst_rows, unsigned int dst_cols, unsigned int triangular_view)
 Copies a submatrix of the source matrix to the destination matrix. More...
 
__kernel void subtract (__global double *C, __global double *A, __global double *B, unsigned int rows, unsigned int cols)
 Matrix subtraction on the OpenCL device Subtracts the second matrix from the first matrix and stores the result in the third matrix (C=A-B). More...
 
__kernel void transpose (__global double *B, __global double *A, unsigned int rows, unsigned int cols)
 Takes the transpose of the matrix on the OpenCL device. More...
 
__kernel void triangular_transpose (__global double *A, unsigned int rows, unsigned int cols, unsigned int copy_direction)
 Copies a lower/upper triangular of a matrix to it's upper/lower. More...
 
__kernel void unpack (__global double *B, __global double *A, unsigned int rows, unsigned int cols, unsigned int part)
 Unpacks a packed triangular matrix to a flat matrix. More...
 
__kernel void zeros (__global double *A, unsigned int rows, unsigned int cols, unsigned int part)
 Stores zeros in the matrix on the OpenCL device. More...
 

Variables

const kernel_cl< out_buffer, in_buffer, in_buffer, int, int > add ("add", {indexing_helpers, add_kernel_code})
 See the docs for add() . More...
 
const kernel_cl< out_buffer, in_buffer, int, int, int > add_batch ("add_batch", {indexing_helpers, add_batch_kernel_code})
 See the docs for add_batch() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int > check_diagonal_zeros ("is_zero_on_diagonal", {indexing_helpers, is_zero_on_diagonal_kernel_code})
 See the docs for check_diagonal_zeros() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int > check_nan ("is_nan", {indexing_helpers, is_nan_kernel_code})
 See the docs for is_nan() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int, const double > check_symmetric ("is_symmetric", {indexing_helpers, is_symmetric_kernel_code})
 See the docs for check_symmetric() . More...
 
const kernel_cl< in_out_buffer, int > cholesky_decompose ("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code})
 See the docs for cholesky_decompose() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int > copy ("copy", {indexing_helpers, copy_kernel_code})
 See the docs for copy() . More...
 
const kernel_cl< out_buffer, in_buffer, int, int, TriangularViewCLcopy_triangular ("copy_triangular", {indexing_helpers, copy_triangular_kernel_code})
 See the docs for copy_triangular() . More...
 
const kernel_cl< in_out_buffer, in_out_buffer, int > diag_inv ("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})
 See the docs for add() . More...
 
static const char * indexing_helpers
 
static const char * thread_block_helpers
 
const kernel_cl< out_buffer, int, int > identity ("identity", {indexing_helpers, identity_kernel_code})
 See the docs for identity() . More...
 
const kernel_cl< out_buffer, int, int > batch_identity ("batch_identity", {indexing_helpers, batch_identity_kernel_code})
 See the docs for batch_identity() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int > inv_lower_tri_multiply ("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
 See the docs for add() . More...
 
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, int, TriangularViewCL, TriangularViewCLmatrix_multiply ("matrix_multiply", {thread_block_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
 See the docs for matrix_multiply() . More...
 
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCLmatrix_vector_multiply ("matrix_vector_multiply", matrix_vector_multiply_kernel_code)
 See the docs for matrix_vector_multiply() . More...
 
const kernel_cl< in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCLrow_vector_matrix_multiply ("row_vector_matrix_multiply", row_vector_matrix_multiply_kernel_code, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}})
 See the docs for row_vector_matrix_multiply() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int > multiply_transpose ("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}})
 See the docs for add() . More...
 
const kernel_cl< in_out_buffer, in_buffer, int, int > neg_rect_lower_tri_multiply ("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})
 See the docs for neg_rect_lower_tri_multiply() . More...
 
const kernel_cl< out_buffer, in_buffer, int, int, TriangularViewCLpack ("pack", {indexing_helpers, pack_kernel_code})
 See the docs for pack() . More...
 
const kernel_cl< out_buffer, in_buffer, double, int, int > scalar_mul ("scalar_mul", {indexing_helpers, scalar_mul_kernel_code})
 See the docs for add() . More...
 
const kernel_cl< in_out_buffer, double, int, int > scalar_mul_diagonal ("scalar_mul_diagonal", {indexing_helpers, scalar_mul_diagonal_kernel_code})
 See the docs for add() . More...
 
const kernel_cl< in_buffer, out_buffer, int, int, int, int, int, int, int, int, int, int, TriangularViewCLsub_block ("sub_block", {indexing_helpers, sub_block_kernel_code})
 See the docs for sub_block() . More...
 
const kernel_cl< out_buffer, in_buffer, in_buffer, int, int > subtract ("subtract", {indexing_helpers, subtract_kernel_code})
 See the docs for subtract() . More...
 
const kernel_cl< out_buffer, in_buffer, int, int > transpose ("transpose", {indexing_helpers, transpose_kernel_code})
 See the docs for transpose() . More...
 
const kernel_cl< in_out_buffer, int, int, TriangularMapCLtriangular_transpose ("triangular_transpose", {indexing_helpers, triangular_transpose_kernel_code})
 See the docs for triangular_transpose() . More...
 
const kernel_cl< out_buffer, in_buffer, int, int, TriangularViewCLunpack ("unpack", {indexing_helpers, unpack_kernel_code})
 See the docs for unpack() . More...
 
const kernel_cl< out_buffer, int, int, TriangularViewCLzeros ("zeros", {indexing_helpers, zeros_kernel_code})
 See the docs for zeros() . More...
 

Function Documentation

◆ add()

__kernel void stan::math::opencl_kernels::add ( __global double *  C,
__global double *  A,
__global double *  B,
unsigned int  rows,
unsigned int  cols 
)

Matrix addition on the OpenCL device.

Parameters
[out]COutput matrix.
[in]ALHS of matrix addition.
[in]BRHS of matrix addition.
rowsNumber of rows for matrix A.
colsNumber of cols for matrix A.
Note
Code is a const char* held in add_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 26 of file add.hpp.

◆ add_batch()

__kernel void stan::math::opencl_kernels::add_batch ( __global double *  B,
__global double *  A,
unsigned int  rows,
unsigned int  cols,
unsigned int  batch_size 
)

Sums a batch of matrices.

Buffer A contains batch_size matrices of size rows x cols. All elements at matching indices are summed up and stored to the resulting matrix B.

Parameters
[out]Bbuffer of the result matrix.
[in]Abuffer containing the entire batch.
rowsNumber of rows for a single matrix in the batch.
colsNumber of cols for a single matrix in the batch.
batch_sizeNumber of matrices in the batch.
Note
Code is a const char* held in add_batch_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 62 of file add.hpp.

◆ batch_identity()

__kernel void stan::math::opencl_kernels::batch_identity ( __global double *  A,
unsigned int  batch_rows,
unsigned int  size 
)

Makes a batch of smaller identity matrices inside the input matrix.

This kernel operates inplace on the matrix A, filling it with smaller identity matrices with a size of batch_rows x batch_rows. This kernel expects a 3D organization of threads: 1st dim: the number of matrices in the batch. 2nd dim: the number of cols/rows in batch matrices. 3rd dim: the number of cols/rows in batch matrices. Each thread in the organization assigns a single value in the batch. In order to create a batch of 3 matrices the size of NxN you need to run the kernel batch_identity(A, N, 3*N*N) with (3, N, N) threads. The special case of batch_identity(A, N, N*N) executed on (1, N, N) threads creates a single identity matrix the size of NxN and is therefore equal to the basic identity kernel.

Parameters
[in,out]AThe batched identity matrix output.
batch_rowsThe number of rows/cols for the smaller matrices in the batch
sizeThe size of A.
Note
Code is a const char* held in identity_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 68 of file identity.hpp.

◆ cholesky_decompose()

__kernel void stan::math::opencl_kernels::cholesky_decompose ( __global double *  A,
int  rows 
)

Calculates the Cholesky Decomposition of a matrix on an OpenCL.

This kernel is run with threads organized in one dimension and in a single thread block. The kernel is best suited for small input matrices as it only utilizes a single streaming multiprocessor. The kernels is used as a part of a blocked cholesky decompose.

Parameters
[in,out]AThe input matrix and the result of the cholesky decomposition
rowsThe number of rows for A and B.
Note
Code is a const char* held in cholesky_decompose_kernel_code. Used in math/opencl/cholesky_decompose.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 32 of file cholesky_decompose.hpp.

◆ compile_kernel()

auto stan::math::opencl_kernels::compile_kernel ( const char *  name,
const std::vector< const char *> &  sources,
std::map< const char *, int > &  options 
)
inline

Compile an OpenCL kernel.

Parameters
nameThe name for the kernel
sourcesA std::vector of strings containing the code for the kernel.
optionsThe values of macros to be passed at compile time.

Definition at line 124 of file kernel_cl.hpp.

◆ copy()

__kernel void stan::math::opencl_kernels::copy ( __global double *  A,
__global double *  B,
unsigned int  rows,
unsigned int  cols 
)

Copy one matrix to another.

Parameters
[in]AThe matrix to copy.
[out]BThe matrix to copy A to.
rowsThe number of rows in A.
colsThe number of cols in A.
Note
Code is a const char* held in copy_kernel_code. Kernel used in math/opencl/matrix_cl.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 26 of file copy.hpp.

◆ copy_triangular()

__kernel void stan::math::opencl_kernels::copy_triangular ( __global double *  A,
__global double *  B,
unsigned int  rows,
unsigned int  cols,
unsigned int  lower_upper 
)

Copies the lower or upper triangular of the source matrix to the destination matrix.

Both matrices are stored on the OpenCL device.

Parameters
[out]AOutput matrix to copy triangular to.
[in]BThe matrix to copy the triangular from.
rowsThe number of rows of B.
colsThe number of cols of B.
lower_upperdetermines which part of the matrix to copy: LOWER: 0 - copies the lower triangular UPPER: 1 - copes the upper triangular
Note
Code is a const char* held in copy_triangular_kernel_code. Used in math/opencl/copy_triangular_opencl.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 33 of file copy_triangular.hpp.

◆ diag_inv()

__kernel void stan::math::opencl_kernels::diag_inv ( __global double *  A,
__global double *  tmp_inv,
int  rows 
)

Calculates inplace submatrix inversions along the matrix diagonal.

For a full guide to the inverse lower triangular kernels see the link here. In the special case that the thread block size is larger than the input matrix A then this kernel will perform the complete lower triangular of matrix A. More often, TB is smaller than A and A will have lower triangular inverses calculated on submatrices along the diagonal equal to the size of the thread block. Running this kernel on a matrix with N = 4

  • thread_block will yield a lower triangular matrix with identity matrices in blue as shown below.
    Identity matrices in the blue triangles

This kernel is run with threads organized in a single dimension. If we want to calculate N blocks of size TB across the diagonal we spawn N x TB threads with TB used as the thread block size.

Parameters
[in,out]AThe input matrix.
[in,out]tmp_invA matrix with batches of identities matrices along the diagonal.
rowsThe number of rows for A.
Note
Code is a const char* held in diag_inv_kernel_code. Used in math/opencl/tri_inverse.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 42 of file diag_inv.hpp.

◆ identity()

__kernel void stan::math::opencl_kernels::identity ( __global double *  A,
unsigned int  rows,
unsigned int  cols 
)

Makes an identity matrix on the OpenCL device.

Parameters
[in,out]AThe identity matrix output.
rowsThe number of rows for A.
colsThe number of cols for A.
Note
Code is a const char* held in identity_kernel_code. Used in math/opencl/identity_opencl.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 25 of file identity.hpp.

◆ inv_lower_tri_multiply()

__kernel void stan::math::opencl_kernels::inv_lower_tri_multiply ( __global double *  A,
__global double *  temp,
const int  A_rows,
const int  rows 
)

Calculates B = C * A.

C is an inverse matrix and A is lower triangular.

This kernel is used in the final iteration of the batched lower triangular inversion. For a full guide to the inverse lower triangular kernels see the link here. The full inverse requires calculation of the lower left rectangular matrix within the lower left triangular C3 = -C2*A3*C1. where C2 is the inverse of the bottom right lower triangular, C1 is the inverse of the upper left lower and A3 is the original lower triangulars lower left rectangular. This kernel takes the output from neg_rect_lower_tri_multiply and applies the submatrix multiplcation to get the final output for C3.

Inverse Calculation

Graphically, this kernel calculates the C2 * A3. The kernel is executed using (N, N, m) threads, where N is the size of the input matrices.

Parameters
[in]Ainput matrix that is being inverted.
[out]tempoutput matrix with results of the batched matrix multiplications
A_rowsThe number of rows for A.
rowsThe number of rows in a single matrix of the batch
Note
Code is a const char* held in inv_lower_tri_multiply_kernel_code. Used in math/opencl/tri_inverse.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 45 of file inv_lower_tri_multiply.hpp.

◆ is_nan()

__kernel void stan::math::opencl_kernels::is_nan ( __global double *  A,
__global int *  flag,
unsigned int  rows,
unsigned int  cols 
)

Check if the matrix_cl has NaN values.

Parameters
[in]AThe matrix to check.
rowsThe number of rows in matrix A.
colsThe number of columns in matrix A.
[out]flagthe flag to be written to if any diagonal is zero.
Note
Code is a const char* held in is_nan_kernel_code. Kernel for stan/math/opencl/err/check_nan.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 26 of file check_nan.hpp.

◆ is_symmetric()

__kernel void stan::math::opencl_kernels::is_symmetric ( __global double *  A,
__global int *  flag,
unsigned int  rows,
unsigned int  cols,
double  tolerance 
)

Check if the matrix_cl is symmetric.

Parameters
[in]AThe matrix to check.
rowsThe number of rows in matrix A.
colsThe number of columns in matrix A.
[out]flagthe flag to be written to if any diagonal is zero.
toleranceThe numerical tolerance to check wheter two values are equal
Note
Code is a const char* held in is_symmetric_kernel_code. Kernel for stan/math/opencl/err/check_symmetric.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 28 of file check_symmetric.hpp.

◆ is_zero_on_diagonal()

__kernel void stan::math::opencl_kernels::is_zero_on_diagonal ( __global double *  A,
__global int *  flag,
unsigned int  rows,
unsigned int  cols 
)

Check if the matrix_cl has zeros on the diagonal.

Parameters
[in]AMatrix to check.
[out]flagthe flag to be written to if any diagonal is zero.
rowsThe number of rows for A.
colsThe number of cols of A.
Note
Code is a const char* held in is_zero_on_diagonal_kernel_code. Kernel for stan/math/opencl/err/check_diagonal_zeros.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 26 of file check_diagonal_zeros.hpp.

◆ matrix_multiply()

__kernel void stan::math::opencl_kernels::matrix_multiply ( const __global double *  A,
const __global double *  B,
__global double *  C,
const int  M,
const int  N,
const int  K,
unsigned int  lower_upper_A,
unsigned int  lower_upper_B 
)

Matrix multiplication on the OpenCL device.

Parameters
[in]Athe left matrix in matrix multiplication
[in]Bthe right matrix in matrix multiplication
[out]Cthe output matrix
[in]MNumber of rows for matrix A
[in]NNumber of cols for matrix B
[in]KNumber of cols for matrix A and number of rows for matrix B
[in]lower_upper_Athe triangularity of A (lower, upper or none)
[in]lower_upper_Bthe triangularity of B (lower, upper or none)

Definition at line 26 of file matrix_multiply.hpp.

◆ matrix_vector_multiply()

__kernel void stan::math::opencl_kernels::matrix_vector_multiply ( const __global double *  A,
const __global double *  B,
__global double *  R,
const int  M,
const int  N,
unsigned int  lower_upper_A,
unsigned int  lower_upper_B 
)

Matrix-vector multiplication R=A*B on the OpenCL device.

Parameters
[in]Amatrix in matrix-vector multiplication
[in]Bvector in matrix-vector multiplication
[out]Rthe output vector
[in]MNumber of rows for matrix A
[in]NNumber of cols for matrix A and number of rows for vector B
[in]lower_upper_Athe triangularity of A (lower, upper or none)
[in]lower_upper_Bthe triangularity of B (lower, upper or none)

Definition at line 185 of file matrix_multiply.hpp.

◆ multiply_transpose()

__kernel void stan::math::opencl_kernels::multiply_transpose ( const __global double *  A,
__global double *  B,
const int  M,
const int  N 
)

Matrix multiplication of the form A*A^T on the OpenCL device.

Parameters
[in]Amatrix A
[out]Bthe output matrix
[in]MNumber of rows for matrix A
[in]NNumber of cols for matrix A and the number of rows for matrix A^T

Definition at line 23 of file multiply_transpose.hpp.

◆ neg_rect_lower_tri_multiply()

__kernel void stan::math::opencl_kernels::neg_rect_lower_tri_multiply ( __global double *  A,
const __global double *  temp,
const int  A_rows,
const int  rows 
)

Calculates C = -B * A where B is rectangular and A is a lower triangular.

For a full guide to the inverse lower triangular kernels see the link here.

Inverse Calculation

Graphically, this kernel calculates -temp * C1 where temp is the C2 * A3 calculation from inv_lower_tri_multiply() The kernel is executed using (N, N, m) threads, where N is the size of the input matrices.

Parameters
[in,out]AInput matrix that is being inverted.
[in]tempTemporary matrix with the intermediate results.
A_rowsNumber of rows for A.
rowsThe number of rows in a single matrix of the batch
Note
Code is a const char* held in neg_rect_lower_tri_multiply_kernel_code Used in math/opencl/tri_inverse.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 39 of file neg_rect_lower_tri_multiply.hpp.

◆ pack()

__kernel void stan::math::opencl_kernels::pack ( __global double *  A,
__global double *  B,
unsigned int  rows,
unsigned int  cols,
unsigned int  part 
)

Packs a flat matrix to a packed triangular matrix.

Parameters
[out]Apacked buffer
[in]Bflat matrix
rowsnumber of columns for matrix B
colsnumber of columns for matrix B
partparameter that defines the triangularity of the input matrix LOWER - lower triangular UPPER - upper triangular if the part parameter is not specified
Note
Code is a const char* held in pack_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 30 of file pack.hpp.

◆ row_vector_matrix_multiply()

__kernel void stan::math::opencl_kernels::row_vector_matrix_multiply ( const __global double *  A,
const __global double *  B,
__global double *  R,
const int  N,
const int  K,
unsigned int  lower_upper_A,
unsigned int  lower_upper_B 
)

Row vector-matrix multiplication R=A*B on the OpenCL device.

Parameters
[in]Arow vector in row vector-matrix multiplication
[in]Bmatrix in row vector-matrix multiplication
[out]Rthe output vector
[in]NNumber of cols for row vector A and number of rows for matrix B
[in]KNumber of cols for matrix B
[in]lower_upper_Athe triangularity of A (lower, upper or none)
[in]lower_upper_Bthe triangularity of B (lower, upper or none)

Definition at line 229 of file matrix_multiply.hpp.

◆ scalar_mul()

__kernel void stan::math::opencl_kernels::scalar_mul ( __global double *  A,
const __global double *  B,
const double  scalar,
const unsigned int  rows,
const unsigned int  cols 
)

Multiplication of the matrix A with a scalar.

Parameters
[out]Aoutput matrix
[in]Binput matrix
[in]scalarthe value with which to multiply A
[in]rowsthe number of rows in A
[in]colsthe number of columns in A

Definition at line 23 of file scalar_mul.hpp.

◆ scalar_mul_diagonal()

__kernel void stan::math::opencl_kernels::scalar_mul_diagonal ( __global double *  A,
const double  scalar,
const unsigned int  rows,
const unsigned int  min_dim 
)

Multiplication of the matrix A diagonal with a scalar.

Parameters
[in,out]Amatrix A
[in]scalarthe value with which to multiply the diagonal of A
[in]rowsthe number of rows in A
[in]min_dimthe size of the smaller dimension of A

Definition at line 22 of file scalar_mul_diagonal.hpp.

◆ sub_block()

__kernel void stan::math::opencl_kernels::sub_block ( __global double *  src,
__global double *  dst,
unsigned int  src_offset_i,
unsigned int  src_offset_j,
unsigned int  dst_offset_i,
unsigned int  dst_offset_j,
unsigned int  size_i,
unsigned int  size_j,
unsigned int  src_rows,
unsigned int  src_cols,
unsigned int  dst_rows,
unsigned int  dst_cols,
unsigned int  triangular_view 
)

Copies a submatrix of the source matrix to the destination matrix.

The submatrix to copy starts at (0, 0) and is of size size_rows x size_cols. The submatrix is copied to the destination matrix starting at (dst_offset_rows, dst_offset_cols)

Parameters
[in]srcThe source matrix.
[out]dstThe destination submatrix.
src_offset_iThe offset row in src.
src_offset_jThe offset column in src.
dst_offset_iThe offset row in dst.
dst_offset_jThe offset column in dst.
size_iThe number of rows in the submatrix.
size_jThe number of columns in the submatrix.
src_rowsThe number of rows in the source matrix.
src_colsThe number of cols in the source matrix.
src_rowsThe number of rows in the destination matrix.
dst_colsThe number of cols in the destination matrix.
dst_rowsThe number of rows in the destination matrix.
triangular_viewthe triangularity of src (lower, upper or none)
Note
Code is a const char* held in sub_block_kernel_code. Used in math/opencl/copy_submatrix_opencl.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 44 of file sub_block.hpp.

◆ subtract()

__kernel void stan::math::opencl_kernels::subtract ( __global double *  C,
__global double *  A,
__global double *  B,
unsigned int  rows,
unsigned int  cols 
)

Matrix subtraction on the OpenCL device Subtracts the second matrix from the first matrix and stores the result in the third matrix (C=A-B).

Parameters
[out]CThe output matrix.
[in]BRHS input matrix.
[in]ALHS input matrix.
rowsThe number of rows for matrix A.
colsThe number of columns for matrix A.
Note
Code is a const char* held in subtract_kernel_code. Used in math/opencl/subtract_opencl.hpp This kernel uses the helper macros available in helpers.cl.

Definition at line 30 of file subtract.hpp.

◆ transpose()

__kernel void stan::math::opencl_kernels::transpose ( __global double *  B,
__global double *  A,
unsigned int  rows,
unsigned int  cols 
)

Takes the transpose of the matrix on the OpenCL device.

Parameters
[out]BThe output matrix to hold transpose of A.
[in]AThe input matrix to transpose into B.
rowsThe number of rows for A.
colsThe number of columns for A.
Note
Code is a const char* held in transpose_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 25 of file transpose.hpp.

◆ triangular_transpose()

__kernel void stan::math::opencl_kernels::triangular_transpose ( __global double *  A,
unsigned int  rows,
unsigned int  cols,
unsigned int  copy_direction 
)

Copies a lower/upper triangular of a matrix to it's upper/lower.

Parameters
[in,out]AThe matrix.
rowsThe number of rows in A.
colsThe number of cols in A.
copy_directionA value of zero or one specifying which direction to copy LOWER_TO_UPPER: 1 UPPER_TO_LOWER: 0
Note
Code is a const char* held in triangular_transpose_kernel_code. Used in mat/opencl/triangular_transpose.hpp. This kernel uses the helper macros available in helpers.cl.

Definition at line 29 of file triangular_transpose.hpp.

◆ unpack()

__kernel void stan::math::opencl_kernels::unpack ( __global double *  B,
__global double *  A,
unsigned int  rows,
unsigned int  cols,
unsigned int  part 
)

Unpacks a packed triangular matrix to a flat matrix.

Parameters
[out]Bflat matrix
[in]Apacked buffer
rowsnumber of columns for matrix B
colsnumber of columns for matrix B
partparameter that defines the triangularity of the input matrix LOWER - lower triangular UPPER - upper triangular if the part parameter is not specified
Note
Code is a const char* held in unpack_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 31 of file unpack.hpp.

◆ zeros()

__kernel void stan::math::opencl_kernels::zeros ( __global double *  A,
unsigned int  rows,
unsigned int  cols,
unsigned int  part 
)

Stores zeros in the matrix on the OpenCL device.

Supports writing zeroes to the lower and upper triangular or the whole matrix.

Parameters
[out]Amatrix
rowsNumber of rows for matrix A
colsNumber of columns for matrix A
partoptional parameter that describes where to assign zeros: LOWER - lower triangular UPPER - upper triangular if the part parameter is not specified, zeros are assigned to the whole matrix.
Note
Code is a const char* held in zeros_kernel_code. This kernel uses the helper macros available in helpers.cl.

Definition at line 31 of file zeros.hpp.

Variable Documentation

◆ add

const kernel_cl<out_buffer, in_buffer, in_buffer, int, int> stan::math::opencl_kernels::add("add", {indexing_helpers, add_kernel_code})

See the docs for add() .

◆ add_batch

const kernel_cl<out_buffer, in_buffer, int, int, int> stan::math::opencl_kernels::add_batch("add_batch", {indexing_helpers, add_batch_kernel_code})

See the docs for add_batch() .

◆ batch_identity

const kernel_cl<out_buffer, int, int> stan::math::opencl_kernels::batch_identity("batch_identity", {indexing_helpers, batch_identity_kernel_code})

See the docs for batch_identity() .

◆ check_diagonal_zeros

const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::check_diagonal_zeros("is_zero_on_diagonal", {indexing_helpers, is_zero_on_diagonal_kernel_code})

See the docs for check_diagonal_zeros() .

◆ check_nan

const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::check_nan("is_nan", {indexing_helpers, is_nan_kernel_code})

See the docs for is_nan() .

◆ check_symmetric

const kernel_cl<in_buffer, out_buffer, int, int, const double> stan::math::opencl_kernels::check_symmetric("is_symmetric", {indexing_helpers, is_symmetric_kernel_code})

See the docs for check_symmetric() .

◆ cholesky_decompose

const kernel_cl<in_out_buffer, int> stan::math::opencl_kernels::cholesky_decompose("cholesky_decompose", {indexing_helpers, cholesky_decompose_kernel_code})

See the docs for cholesky_decompose() .

◆ copy

const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::copy("copy", {indexing_helpers, copy_kernel_code})

See the docs for copy() .

◆ copy_triangular

const kernel_cl<out_buffer, in_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::copy_triangular("copy_triangular", {indexing_helpers, copy_triangular_kernel_code})

See the docs for copy_triangular() .

◆ diag_inv

const kernel_cl<in_out_buffer, in_out_buffer, int> stan::math::opencl_kernels::diag_inv("diag_inv", {indexing_helpers, diag_inv_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}})

See the docs for add() .

◆ identity

const kernel_cl<out_buffer, int, int> stan::math::opencl_kernels::identity("identity", {indexing_helpers, identity_kernel_code})

See the docs for identity() .

◆ indexing_helpers

const char* stan::math::opencl_kernels::indexing_helpers
static
Initial value:
=
R"(
// Matrix access helpers
#ifndef A_batch
#define A_batch(i,j,k) A[(k) * cols * rows + (j) * rows + (i)]
#endif
#ifndef A
#define A(i,j) A[(j) * rows + (i)]
#endif
#ifndef B
#define B(i,j) B[(j) * rows + (i)]
#endif
#ifndef C
#define C(i,j) C[(j) * rows + (i)]
#endif
// Transpose
#ifndef BT
#define BT(i,j) B[(j) * cols + (i)]
#endif
#ifndef AT
#define AT(i,j) A[(j) * cols + (i)]
#endif
// Moving between two buffers
#ifndef src
#define src(i,j) src[(j) * src_rows + (i)]
#endif
#ifndef dst
#define dst(i,j) dst[(j) * dst_rows + (i)]
#endif
)"

Definition at line 14 of file helpers.hpp.

◆ inv_lower_tri_multiply

const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::inv_lower_tri_multiply("inv_lower_tri_multiply", {thread_block_helpers, inv_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})

See the docs for add() .

◆ matrix_multiply

const kernel_cl<in_buffer, in_buffer, out_buffer, int, int, int, TriangularViewCL, TriangularViewCL> stan::math::opencl_kernels::matrix_multiply("matrix_multiply", {thread_block_helpers, matrix_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})

See the docs for matrix_multiply() .

◆ matrix_vector_multiply

const kernel_cl<in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL> stan::math::opencl_kernels::matrix_vector_multiply("matrix_vector_multiply", matrix_vector_multiply_kernel_code)

See the docs for matrix_vector_multiply() .

◆ multiply_transpose

const kernel_cl<in_buffer, out_buffer, int, int> stan::math::opencl_kernels::multiply_transpose("multiply_transpose", {thread_block_helpers, multiply_transpose_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 4}})

See the docs for add() .

◆ neg_rect_lower_tri_multiply

const kernel_cl<in_out_buffer, in_buffer, int, int> stan::math::opencl_kernels::neg_rect_lower_tri_multiply("neg_rect_lower_tri_multiply", {thread_block_helpers, neg_rect_lower_tri_multiply_kernel_code}, {{"THREAD_BLOCK_SIZE", 32}, {"WORK_PER_THREAD", 8}})

See the docs for neg_rect_lower_tri_multiply() .

◆ pack

const kernel_cl<out_buffer, in_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::pack("pack", {indexing_helpers, pack_kernel_code})

See the docs for pack() .

◆ row_vector_matrix_multiply

const kernel_cl<in_buffer, in_buffer, out_buffer, int, int, TriangularViewCL, TriangularViewCL> stan::math::opencl_kernels::row_vector_matrix_multiply("row_vector_matrix_multiply", row_vector_matrix_multiply_kernel_code, {{"LOCAL_SIZE_", 64}, {"REDUCTION_STEP_SIZE", 4}})

See the docs for row_vector_matrix_multiply() .

◆ scalar_mul

const kernel_cl<out_buffer, in_buffer, double, int, int> stan::math::opencl_kernels::scalar_mul("scalar_mul", {indexing_helpers, scalar_mul_kernel_code})

See the docs for add() .

◆ scalar_mul_diagonal

const kernel_cl<in_out_buffer, double, int, int> stan::math::opencl_kernels::scalar_mul_diagonal("scalar_mul_diagonal", {indexing_helpers, scalar_mul_diagonal_kernel_code})

See the docs for add() .

◆ sub_block

const kernel_cl<in_buffer, out_buffer, int, int, int, int, int, int, int, int, int, int, TriangularViewCL> stan::math::opencl_kernels::sub_block("sub_block", {indexing_helpers, sub_block_kernel_code})

See the docs for sub_block() .

◆ subtract

const kernel_cl<out_buffer, in_buffer, in_buffer, int, int> stan::math::opencl_kernels::subtract("subtract", {indexing_helpers, subtract_kernel_code})

See the docs for subtract() .

◆ thread_block_helpers

const char* stan::math::opencl_kernels::thread_block_helpers
static
Initial value:
=
R"(
// The local memory column for each thread block
#define THREAD_BLOCK_SIZE_COL THREAD_BLOCK_SIZE/WORK_PER_THREAD
)"

Definition at line 48 of file helpers.hpp.

◆ transpose

const kernel_cl<out_buffer, in_buffer, int, int> stan::math::opencl_kernels::transpose("transpose", {indexing_helpers, transpose_kernel_code})

See the docs for transpose() .

◆ triangular_transpose

const kernel_cl<in_out_buffer, int, int, TriangularMapCL> stan::math::opencl_kernels::triangular_transpose("triangular_transpose", {indexing_helpers, triangular_transpose_kernel_code})

See the docs for triangular_transpose() .

◆ unpack

const kernel_cl<out_buffer, in_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::unpack("unpack", {indexing_helpers, unpack_kernel_code})

See the docs for unpack() .

◆ zeros

const kernel_cl<out_buffer, int, int, TriangularViewCL> stan::math::opencl_kernels::zeros("zeros", {indexing_helpers, zeros_kernel_code})

See the docs for zeros() .


     [ Stan Home Page ] © 2011–2018, Stan Development Team.