3 #include "device_functions.h"
5 #if __has_builtin(__hip_atomic_compare_exchange_strong)
7 #if !__HIP_DEVICE_COMPILE__
9 #define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
10 #define __HIP_MEMORY_SCOPE_WAVEFRONT 2
11 #define __HIP_MEMORY_SCOPE_WORKGROUP 3
12 #define __HIP_MEMORY_SCOPE_AGENT 4
13 #define __HIP_MEMORY_SCOPE_SYSTEM 5
18 int atomicCAS(
int* address,
int compare,
int val) {
19 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
20 __HIP_MEMORY_SCOPE_AGENT);
26 int atomicCAS_system(
int* address,
int compare,
int val) {
27 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
28 __HIP_MEMORY_SCOPE_SYSTEM);
34 unsigned int atomicCAS(
unsigned int* address,
unsigned int compare,
unsigned int val) {
35 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
36 __HIP_MEMORY_SCOPE_AGENT);
42 unsigned int atomicCAS_system(
unsigned int* address,
unsigned int compare,
unsigned int val) {
43 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
44 __HIP_MEMORY_SCOPE_SYSTEM);
50 unsigned long long atomicCAS(
unsigned long long* address,
unsigned long long compare,
51 unsigned long long val) {
52 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
53 __HIP_MEMORY_SCOPE_AGENT);
59 unsigned long long atomicCAS_system(
unsigned long long* address,
unsigned long long compare,
60 unsigned long long val) {
61 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
62 __HIP_MEMORY_SCOPE_SYSTEM);
68 int atomicAdd(
int* address,
int val) {
69 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
74 int atomicAdd_system(
int* address,
int val) {
75 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
80 unsigned int atomicAdd(
unsigned int* address,
unsigned int val) {
81 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
86 unsigned int atomicAdd_system(
unsigned int* address,
unsigned int val) {
87 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
92 unsigned long long atomicAdd(
unsigned long long* address,
unsigned long long val) {
93 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
98 unsigned long long atomicAdd_system(
unsigned long long* address,
unsigned long long val) {
99 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
104 float atomicAdd(
float* address,
float val) {
105 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
110 float atomicAdd_system(
float* address,
float val) {
111 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
114 #if !defined(__HIPCC_RTC__)
115 DEPRECATED(
"use atomicAdd instead")
116 #endif // !defined(__HIPCC_RTC__)
119 void atomicAddNoRet(
float* address,
float val)
121 __ockl_atomic_add_noret_f32(address, val);
126 double atomicAdd(
double* address,
double val) {
127 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
132 double atomicAdd_system(
double* address,
double val) {
133 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
138 int atomicSub(
int* address,
int val) {
139 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
144 int atomicSub_system(
int* address,
int val) {
145 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
150 unsigned int atomicSub(
unsigned int* address,
unsigned int val) {
151 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
156 unsigned int atomicSub_system(
unsigned int* address,
unsigned int val) {
157 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
162 int atomicExch(
int* address,
int val) {
163 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
168 int atomicExch_system(
int* address,
int val) {
169 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
174 unsigned int atomicExch(
unsigned int* address,
unsigned int val) {
175 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
180 unsigned int atomicExch_system(
unsigned int* address,
unsigned int val) {
181 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
186 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val) {
187 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
192 unsigned long long atomicExch_system(
unsigned long long* address,
unsigned long long val) {
193 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
198 float atomicExch(
float* address,
float val) {
199 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
204 float atomicExch_system(
float* address,
float val) {
205 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
210 int atomicMin(
int* address,
int val) {
211 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
216 int atomicMin_system(
int* address,
int val) {
217 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
222 unsigned int atomicMin(
unsigned int* address,
unsigned int val) {
223 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
228 unsigned int atomicMin_system(
unsigned int* address,
unsigned int val) {
229 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
234 unsigned long long atomicMin(
unsigned long long* address,
unsigned long long val) {
235 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
240 unsigned long long atomicMin_system(
unsigned long long* address,
unsigned long long val) {
241 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
246 int atomicMax(
int* address,
int val) {
247 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
252 int atomicMax_system(
int* address,
int val) {
253 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
258 unsigned int atomicMax(
unsigned int* address,
unsigned int val) {
259 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
264 unsigned int atomicMax_system(
unsigned int* address,
unsigned int val) {
265 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
270 unsigned long long atomicMax(
unsigned long long* address,
unsigned long long val) {
271 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
276 unsigned long long atomicMax_system(
unsigned long long* address,
unsigned long long val) {
277 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
282 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
286 unsigned int __builtin_amdgcn_atomic_inc(
291 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
293 return __builtin_amdgcn_atomic_inc(
294 address, val, __ATOMIC_RELAXED, 1 ,
false);
299 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
303 unsigned int __builtin_amdgcn_atomic_dec(
308 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
310 return __builtin_amdgcn_atomic_dec(
311 address, val, __ATOMIC_RELAXED, 1 ,
false);
316 int atomicAnd(
int* address,
int val) {
317 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
322 int atomicAnd_system(
int* address,
int val) {
323 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
328 unsigned int atomicAnd(
unsigned int* address,
unsigned int val) {
329 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
334 unsigned int atomicAnd_system(
unsigned int* address,
unsigned int val) {
335 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
339 unsigned long long atomicAnd(
unsigned long long* address,
unsigned long long val) {
340 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
345 unsigned long long atomicAnd_system(
unsigned long long* address,
unsigned long long val) {
346 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
351 int atomicOr(
int* address,
int val) {
352 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
357 int atomicOr_system(
int* address,
int val) {
358 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
363 unsigned int atomicOr(
unsigned int* address,
unsigned int val) {
364 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
369 unsigned int atomicOr_system(
unsigned int* address,
unsigned int val) {
370 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
375 unsigned long long atomicOr(
unsigned long long* address,
unsigned long long val) {
376 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
381 unsigned long long atomicOr_system(
unsigned long long* address,
unsigned long long val) {
382 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
387 int atomicXor(
int* address,
int val) {
388 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
393 int atomicXor_system(
int* address,
int val) {
394 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
399 unsigned int atomicXor(
unsigned int* address,
unsigned int val) {
400 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
405 unsigned int atomicXor_system(
unsigned int* address,
unsigned int val) {
406 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
411 unsigned long long atomicXor(
unsigned long long* address,
unsigned long long val) {
412 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
417 unsigned long long atomicXor_system(
unsigned long long* address,
unsigned long long val) {
418 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
425 int atomicCAS(
int* address,
int compare,
int val)
427 __atomic_compare_exchange_n(
428 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
434 unsigned int atomicCAS(
435 unsigned int* address,
unsigned int compare,
unsigned int val)
437 __atomic_compare_exchange_n(
438 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
444 unsigned long long atomicCAS(
445 unsigned long long* address,
446 unsigned long long compare,
447 unsigned long long val)
449 __atomic_compare_exchange_n(
450 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
457 int atomicAdd(
int* address,
int val)
459 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
463 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
465 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
469 unsigned long long atomicAdd(
470 unsigned long long* address,
unsigned long long val)
472 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
476 float atomicAdd(
float* address,
float val)
478 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
481 #if !defined(__HIPCC_RTC__)
482 DEPRECATED(
"use atomicAdd instead")
483 #endif // !defined(__HIPCC_RTC__)
486 void atomicAddNoRet(
float* address,
float val)
488 __ockl_atomic_add_noret_f32(address, val);
493 double atomicAdd(
double* address,
double val)
495 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
500 int atomicSub(
int* address,
int val)
502 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
506 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
508 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
513 int atomicExch(
int* address,
int val)
515 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
519 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
521 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
525 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
527 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
531 float atomicExch(
float* address,
float val)
533 return __uint_as_float(__atomic_exchange_n(
534 reinterpret_cast<unsigned int*
>(address),
535 __float_as_uint(val),
541 int atomicMin(
int* address,
int val)
543 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
547 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
549 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
553 unsigned long long atomicMin(
554 unsigned long long* address,
unsigned long long val)
556 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
558 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
560 if (tmp1 != tmp) { tmp = tmp1;
continue; }
562 tmp = atomicCAS(address, tmp, val);
570 int atomicMax(
int* address,
int val)
572 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
576 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
578 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
582 unsigned long long atomicMax(
583 unsigned long long* address,
unsigned long long val)
585 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
587 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
589 if (tmp1 != tmp) { tmp = tmp1;
continue; }
591 tmp = atomicCAS(address, tmp, val);
599 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
603 unsigned int __builtin_amdgcn_atomic_inc(
608 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
610 return __builtin_amdgcn_atomic_inc(
611 address, val, __ATOMIC_RELAXED, 1 ,
false);
616 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
620 unsigned int __builtin_amdgcn_atomic_dec(
625 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
627 return __builtin_amdgcn_atomic_dec(
628 address, val, __ATOMIC_RELAXED, 1 ,
false);
633 int atomicAnd(
int* address,
int val)
635 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
639 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
641 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
645 unsigned long long atomicAnd(
646 unsigned long long* address,
unsigned long long val)
648 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
653 int atomicOr(
int* address,
int val)
655 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
659 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
661 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
665 unsigned long long atomicOr(
666 unsigned long long* address,
unsigned long long val)
668 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
673 int atomicXor(
int* address,
int val)
675 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
679 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
681 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
685 unsigned long long atomicXor(
686 unsigned long long* address,
unsigned long long val)
688 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);