Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Francesco Brarda
stan-math-petsc
Commits
6b3a15d7
Unverified
Commit
6b3a15d7
authored
6 years ago
by
Steve Bronder
Browse files
Options
Download
Email Patches
Plain Diff
removes read_only, write_only, and read_write from the arguments in the GPU kernel signature
parent
159f3dfe
stan-petsc
bugfix/1063-std-lgamma
bugfix/1152-algebra_solver-lambdas
bugfix/issue-1250-lgamma
bugfix/issue-1270-add-check-for-meta-includes
bugfix/issue-2708-map-rect-fail
build/config-device-id
code-cleanup/chain-final
code-cleanup/issue-937-flatten
develop
feature/1258-ad-test-core
feature/automatic-autodiff-testing
feature/concept-chainable-allocator
feature/daniel-windows
feature/eigen-aligned-malloc
feature/faster-ad-tls
feature/faster-ad-tls-v2
feature/faster-ad-tls-v3
feature/faster-ad-tls-v4
feature/faster-ad-tls-v4-windows
feature/faster-ad-tls-v6
feature/intel-tbb-lib
feature/issue-1012-binorm-copula-cdf
feature/issue-1115-newton_solver
feature/issue-123-complex
feature/issue-1257-diff_algebra_solver
feature/issue-38-multi_normal_sufficient
feature/issue-755-laplace
feature/issue-937-flatten-meta-again
feature/issue-937-flatten-meta-the-third
feature/issue-937-flatten-meta-third
feature/issue-962-bivar-norm
feature/issue-989-rev-mat-eig
feature/lambertw
feature/map_rect-cpp17
feature/map_rect-fail-windows
feature/matrix_sqrt
feature/openMP
feature/parallel_for_each
feature/python-test-math-dependencies
feature/refactor-nested
feature/sparse-cholesky
gpu_performance_tests
internal/no-assert
issue-static-init-order
master
mpi_errors
parallel-ad-tape-3
release/v2.19.0
release/v2.19.1
release/v2.20.0
seantest/faster-ad-tls-v3
stancon/syclik
syclik/forward-mode
v2.20.0
v2.19.1
v2.19.0
No related merge requests found
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
stan/math/gpu/kernels/add.hpp
+3
-4
stan/math/gpu/kernels/add.hpp
stan/math/gpu/kernels/check_diagonal_zeros.hpp
+2
-3
stan/math/gpu/kernels/check_diagonal_zeros.hpp
stan/math/gpu/kernels/check_nan.hpp
+2
-3
stan/math/gpu/kernels/check_nan.hpp
stan/math/gpu/kernels/check_symmetric.hpp
+3
-4
stan/math/gpu/kernels/check_symmetric.hpp
stan/math/gpu/kernels/copy.hpp
+2
-3
stan/math/gpu/kernels/copy.hpp
stan/math/gpu/kernels/copy_triangular.hpp
+3
-4
stan/math/gpu/kernels/copy_triangular.hpp
stan/math/gpu/kernels/identity.hpp
+2
-3
stan/math/gpu/kernels/identity.hpp
stan/math/gpu/kernels/matrix_multiply.hpp
+3
-4
stan/math/gpu/kernels/matrix_multiply.hpp
stan/math/gpu/kernels/multiply_transpose.hpp
+3
-3
stan/math/gpu/kernels/multiply_transpose.hpp
stan/math/gpu/kernels/scalar_mul_diagonal.hpp
+3
-4
stan/math/gpu/kernels/scalar_mul_diagonal.hpp
stan/math/gpu/kernels/sub_block.hpp
+5
-8
stan/math/gpu/kernels/sub_block.hpp
stan/math/gpu/kernels/subtract.hpp
+3
-4
stan/math/gpu/kernels/subtract.hpp
stan/math/gpu/kernels/transpose.hpp
+2
-3
stan/math/gpu/kernels/transpose.hpp
stan/math/gpu/kernels/triangular_transpose.hpp
+3
-3
stan/math/gpu/kernels/triangular_transpose.hpp
stan/math/gpu/kernels/zeros.hpp
+2
-3
stan/math/gpu/kernels/zeros.hpp
with
41 additions
and
56 deletions
+41
-56
stan/math/gpu/kernels/add.hpp
View file @
6b3a15d7
...
...
@@ -22,10 +22,9 @@ const char *add_kernel_code = STRINGIFY(
* <code>add_kernel_code.</code>
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
add
(
__global
write_only
double
*
C
,
__global
read_only
double
*
A
,
__global
read_only
double
*
B
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
)
{
__kernel
void
add
(
__global
double
*
C
,
__global
double
*
A
,
__global
double
*
B
,
unsigned
int
rows
,
unsigned
int
cols
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/check_diagonal_zeros.hpp
View file @
6b3a15d7
...
...
@@ -22,9 +22,8 @@ const char *is_zero_on_diagonal_kernel_code = STRINGIFY(
* Kernel for stan/math/gpu/err/check_diagonal_zeros.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
is_zero_on_diagonal
(
__global
read_only
double
*
A
,
__global
int
*
flag
,
read_only
unsigned
int
rows
,
write_only
unsigned
int
cols
)
{
__kernel
void
is_zero_on_diagonal
(
__global
double
*
A
,
__global
int
*
flag
,
unsigned
int
rows
,
unsigned
int
cols
)
{
const
int
i
=
get_global_id
(
0
);
if
(
i
<
rows
&&
i
<
cols
)
{
if
(
A
(
i
,
i
)
==
0
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/check_nan.hpp
View file @
6b3a15d7
...
...
@@ -22,9 +22,8 @@ const char *is_nan_kernel_code = STRINGIFY(
* Kernel for stan/math/gpu/err/check_nan.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
is_nan
(
__global
read_only
double
*
A
,
__global
write_only
int
*
flag
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
)
{
__kernel
void
is_nan
(
__global
double
*
A
,
__global
int
*
flag
,
unsigned
int
rows
,
unsigned
int
cols
)
{
const
int
i
=
get_global_id
(
0
);
const
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/check_symmetric.hpp
View file @
6b3a15d7
...
...
@@ -24,10 +24,9 @@ const char *is_symmetric_kernel_code = STRINGIFY(
* Kernel for stan/math/gpu/err/check_symmetric.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
is_symmetric
(
__global
read_only
double
*
A
,
__global
write_only
int
*
flag
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
,
read_only
double
tolerance
)
{
__kernel
void
is_symmetric
(
__global
double
*
A
,
__global
int
*
flag
,
unsigned
int
rows
,
unsigned
int
cols
,
double
tolerance
)
{
const
int
i
=
get_global_id
(
0
);
const
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/copy.hpp
View file @
6b3a15d7
...
...
@@ -21,9 +21,8 @@ const char *copy_kernel_code = STRINGIFY(
* Kernel used in math/gpu/matrix_gpu.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
copy
(
__global
read_only
double
*
A
,
__global
write_only
double
*
B
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
)
{
__kernel
void
copy
(
__global
double
*
A
,
__global
double
*
B
,
unsigned
int
rows
,
unsigned
int
cols
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/copy_triangular.hpp
View file @
6b3a15d7
...
...
@@ -29,10 +29,9 @@ const char *copy_triangular_kernel_code = STRINGIFY(
* Used in math/gpu/copy_triangular_opencl.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
copy_triangular
(
__global
write_only
double
*
A
,
__global
read_only
double
*
B
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
,
read_only
unsigned
int
lower_upper
)
{
__kernel
void
copy_triangular
(
__global
double
*
A
,
__global
double
*
B
,
unsigned
int
rows
,
unsigned
int
cols
,
unsigned
int
lower_upper
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/identity.hpp
View file @
6b3a15d7
...
...
@@ -21,9 +21,8 @@ const char* identity_kernel_code = STRINGIFY(
* Used in math/gpu/identity_opencl.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
identity
(
__global
write_only
double
*
A
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
)
{
__kernel
void
identity
(
__global
double
*
A
,
unsigned
int
rows
,
unsigned
int
cols
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/matrix_multiply.hpp
View file @
6b3a15d7
...
...
@@ -20,10 +20,9 @@ const char* matrix_multiply_kernel_code = STRINGIFY(
* @param[in] N Number of rows for matrix B
* @param[in] K Number of cols for matrix A and number of rows for matrix B
*/
__kernel
void
matrix_multiply
(
const
__global
read_only
double
*
A
,
const
__global
read_only
double
*
B
,
__global
write_only
double
*
C
,
const
read_only
int
M
,
const
read_only
int
N
,
const
read_only
int
K
)
{
__kernel
void
matrix_multiply
(
const
__global
double
*
A
,
const
__global
double
*
B
,
__global
double
*
C
,
const
int
M
,
const
int
N
,
const
int
K
)
{
// thread index inside the thread_block
const
int
thread_block_row
=
get_local_id
(
0
);
const
int
thread_block_col
=
get_local_id
(
1
);
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/multiply_transpose.hpp
View file @
6b3a15d7
...
...
@@ -19,9 +19,9 @@ const char* multiply_transpose_kernel_code = STRINGIFY(
* @param[in] N Number of cols for matrix A and the number of rows for
* matrix A^T
*/
__kernel
void
multiply_transpose
(
const
__global
read_only
double
*
A
,
__global
write_only
double
*
B
,
const
read_only
int
M
,
const
read_only
int
N
)
{
__kernel
void
multiply_transpose
(
const
__global
double
*
A
,
__global
double
*
B
,
const
int
M
,
const
int
N
)
{
// thread index inside the thread block
const
int
thread_block_row
=
get_local_id
(
0
);
const
int
thread_block_col
=
get_local_id
(
1
);
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/scalar_mul_diagonal.hpp
View file @
6b3a15d7
...
...
@@ -18,10 +18,9 @@ const char *scalar_mul_diagonal_kernel_code = STRINGIFY(
* @param[in] rows the number of rows in A
* @param[in] min_dim the size of the smaller dimension of A
*/
__kernel
void
scalar_mul_diagonal
(
__global
read_write
double
*
A
,
const
read_only
double
scalar
,
const
read_only
unsigned
int
rows
,
const
read_only
unsigned
int
min_dim
)
{
__kernel
void
scalar_mul_diagonal
(
__global
double
*
A
,
const
double
scalar
,
const
unsigned
int
rows
,
const
unsigned
int
min_dim
)
{
int
i
=
get_global_id
(
0
);
if
(
i
<
min_dim
)
{
A
(
i
,
i
)
*=
scalar
;
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/sub_block.hpp
View file @
6b3a15d7
...
...
@@ -39,14 +39,11 @@ const char *sub_block_kernel_code = STRINGIFY(
*
*/
__kernel
void
sub_block
(
__global
read_only
double
*
src
,
read_write
__global
double
*
dst
,
read_only
unsigned
int
src_offset_i
,
read_only
unsigned
int
src_offset_j
,
read_only
unsigned
int
dst_offset_i
,
read_only
unsigned
int
dst_offset_j
,
read_only
unsigned
int
size_i
,
read_only
unsigned
int
size_j
,
read_only
unsigned
int
src_rows
,
read_only
unsigned
int
src_cols
,
read_only
unsigned
int
dst_rows
,
read_only
unsigned
int
dst_cols
)
{
__global
double
*
src
,
__global
double
*
dst
,
unsigned
int
src_offset_i
,
unsigned
int
src_offset_j
,
unsigned
int
dst_offset_i
,
unsigned
int
dst_offset_j
,
unsigned
int
size_i
,
unsigned
int
size_j
,
unsigned
int
src_rows
,
unsigned
int
src_cols
,
unsigned
int
dst_rows
,
unsigned
int
dst_cols
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
((
i
+
src_offset_i
)
<
src_rows
&&
(
j
+
src_offset_j
)
<
src_cols
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/subtract.hpp
View file @
6b3a15d7
...
...
@@ -24,10 +24,9 @@ const char *subtract_kernel_code = STRINGIFY(
* Used in math/gpu/subtract_opencl.hpp
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
subtract
(
__global
write_only
double
*
C
,
__global
read_only
double
*
A
,
__global
read_only
double
*
B
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
)
{
__kernel
void
subtract
(
__global
double
*
C
,
__global
double
*
A
,
__global
double
*
B
,
unsigned
int
rows
,
unsigned
int
cols
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/transpose.hpp
View file @
6b3a15d7
...
...
@@ -21,9 +21,8 @@ const char *transpose_kernel_code = STRINGIFY(
* <code>transpose_kernel_code.</code>
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
transpose
(
__global
read_write
double
*
B
,
__global
read_only
double
*
A
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
)
{
__kernel
void
transpose
(
__global
double
*
B
,
__global
double
*
A
,
unsigned
int
rows
,
unsigned
int
cols
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/triangular_transpose.hpp
View file @
6b3a15d7
...
...
@@ -25,9 +25,9 @@ const char* triangular_transpose_kernel_code = STRINGIFY(
* Used in mat/gpu/triangular_transpose.hpp.
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
triangular_transpose
(
__global
read_write
double
*
A
,
read_only
unsigned
int
row
s
,
read_only
unsigned
int
cols
,
read_only
unsigned
int
copy_direction
)
{
__kernel
void
triangular_transpose
(
__global
double
*
A
,
unsigned
int
rows
,
unsigned
int
col
s
,
unsigned
int
copy_direction
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
stan/math/gpu/kernels/zeros.hpp
View file @
6b3a15d7
...
...
@@ -27,9 +27,8 @@ const char* zeros_kernel_code = STRINGIFY(
* <code>zeros_kernel_code.</code>
* This kernel uses the helper macros available in helpers.cl.
*/
__kernel
void
zeros
(
__global
write_only
double
*
A
,
read_only
unsigned
int
rows
,
read_only
unsigned
int
cols
,
read_only
unsigned
int
part
)
{
__kernel
void
zeros
(
__global
double
*
A
,
unsigned
int
rows
,
unsigned
int
cols
,
unsigned
int
part
)
{
int
i
=
get_global_id
(
0
);
int
j
=
get_global_id
(
1
);
if
(
i
<
rows
&&
j
<
cols
)
{
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help