3 #include "device_functions.h"
7 int atomicCAS(
int* address,
int compare,
int val)
9 __atomic_compare_exchange_n(
10 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
16 unsigned int atomicCAS(
17 unsigned int* address,
unsigned int compare,
unsigned int val)
19 __atomic_compare_exchange_n(
20 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
26 unsigned long long atomicCAS(
27 unsigned long long* address,
28 unsigned long long compare,
29 unsigned long long val)
31 __atomic_compare_exchange_n(
32 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
39 int atomicAdd(
int* address,
int val)
41 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
45 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
47 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
51 unsigned long long atomicAdd(
52 unsigned long long* address,
unsigned long long val)
54 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
58 float atomicAdd(
float* address,
float val)
60 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
63 DEPRECATED(
"use atomicAdd instead")
66 void atomicAddNoRet(
float* address,
float val)
68 __ockl_atomic_add_noret_f32(address, val);
73 double atomicAdd(
double* address,
double val)
75 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
80 int atomicSub(
int* address,
int val)
82 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
86 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
88 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
93 int atomicExch(
int* address,
int val)
95 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
99 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
101 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
105 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
107 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
111 float atomicExch(
float* address,
float val)
113 return __uint_as_float(__atomic_exchange_n(
114 reinterpret_cast<unsigned int*
>(address),
115 __float_as_uint(val),
121 int atomicMin(
int* address,
int val)
123 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
127 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
129 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
133 unsigned long long atomicMin(
134 unsigned long long* address,
unsigned long long val)
136 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
138 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
140 if (tmp1 != tmp) { tmp = tmp1;
continue; }
142 tmp = atomicCAS(address, tmp, val);
150 int atomicMax(
int* address,
int val)
152 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
156 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
158 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
162 unsigned long long atomicMax(
163 unsigned long long* address,
unsigned long long val)
165 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
167 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
169 if (tmp1 != tmp) { tmp = tmp1;
continue; }
171 tmp = atomicCAS(address, tmp, val);
179 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
183 unsigned int __builtin_amdgcn_atomic_inc(
188 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
190 return __builtin_amdgcn_atomic_inc(
191 address, val, __ATOMIC_RELAXED, 1 ,
false);
196 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
200 unsigned int __builtin_amdgcn_atomic_dec(
205 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
207 return __builtin_amdgcn_atomic_dec(
208 address, val, __ATOMIC_RELAXED, 1 ,
false);
213 int atomicAnd(
int* address,
int val)
215 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
219 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
221 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
225 unsigned long long atomicAnd(
226 unsigned long long* address,
unsigned long long val)
228 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
233 int atomicOr(
int* address,
int val)
235 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
239 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
241 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
245 unsigned long long atomicOr(
246 unsigned long long* address,
unsigned long long val)
248 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
253 int atomicXor(
int* address,
int val)
255 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
259 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
261 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
265 unsigned long long atomicXor(
266 unsigned long long* address,
unsigned long long val)
268 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);