HIP: Heterogenous-computing Interface for Portability
hip_atomic.h
1 
2 
3 #include "device_functions.h"
4 
5 #if __has_builtin(__hip_atomic_compare_exchange_strong)
6 
7 #if !__HIP_DEVICE_COMPILE__
8 //TODO: Remove this after compiler pre-defines the following Macros.
9 #define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
10 #define __HIP_MEMORY_SCOPE_WAVEFRONT 2
11 #define __HIP_MEMORY_SCOPE_WORKGROUP 3
12 #define __HIP_MEMORY_SCOPE_AGENT 4
13 #define __HIP_MEMORY_SCOPE_SYSTEM 5
14 #endif
15 
16 __device__
17 inline
18 int atomicCAS(int* address, int compare, int val) {
19  __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
20  __HIP_MEMORY_SCOPE_AGENT);
21  return compare;
22 }
23 
24 __device__
25 inline
26 int atomicCAS_system(int* address, int compare, int val) {
27  __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
28  __HIP_MEMORY_SCOPE_SYSTEM);
29  return compare;
30 }
31 
32 __device__
33 inline
34 unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
35  __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
36  __HIP_MEMORY_SCOPE_AGENT);
37  return compare;
38 }
39 
40 __device__
41 inline
42 unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
43  __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
44  __HIP_MEMORY_SCOPE_SYSTEM);
45  return compare;
46 }
47 
48 __device__
49 inline
50 unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
51  unsigned long long val) {
52  __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
53  __HIP_MEMORY_SCOPE_AGENT);
54  return compare;
55 }
56 
57 __device__
58 inline
59 unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
60  unsigned long long val) {
61  __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
62  __HIP_MEMORY_SCOPE_SYSTEM);
63  return compare;
64 }
65 
66 __device__
67 inline
68 int atomicAdd(int* address, int val) {
69  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
70 }
71 
72 __device__
73 inline
74 int atomicAdd_system(int* address, int val) {
75  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
76 }
77 
78 __device__
79 inline
80 unsigned int atomicAdd(unsigned int* address, unsigned int val) {
81  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
82 }
83 
84 __device__
85 inline
86 unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
87  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
88 }
89 
90 __device__
91 inline
92 unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
93  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
94 }
95 
96 __device__
97 inline
98 unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
99  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
100 }
101 
102 __device__
103 inline
104 float atomicAdd(float* address, float val) {
105  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
106 }
107 
108 __device__
109 inline
110 float atomicAdd_system(float* address, float val) {
111  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
112 }
113 
114 #if !defined(__HIPCC_RTC__)
115 DEPRECATED("use atomicAdd instead")
116 #endif // !defined(__HIPCC_RTC__)
117 __device__
118 inline
119 void atomicAddNoRet(float* address, float val)
120 {
121  __ockl_atomic_add_noret_f32(address, val);
122 }
123 
124 __device__
125 inline
126 double atomicAdd(double* address, double val) {
127  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
128 }
129 
130 __device__
131 inline
132 double atomicAdd_system(double* address, double val) {
133  return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
134 }
135 
136 __device__
137 inline
138 int atomicSub(int* address, int val) {
139  return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
140 }
141 
142 __device__
143 inline
144 int atomicSub_system(int* address, int val) {
145  return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
146 }
147 
148 __device__
149 inline
150 unsigned int atomicSub(unsigned int* address, unsigned int val) {
151  return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
152 }
153 
154 __device__
155 inline
156 unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
157  return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
158 }
159 
160 __device__
161 inline
162 int atomicExch(int* address, int val) {
163  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
164 }
165 
166 __device__
167 inline
168 int atomicExch_system(int* address, int val) {
169  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
170 }
171 
172 __device__
173 inline
174 unsigned int atomicExch(unsigned int* address, unsigned int val) {
175  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
176 }
177 
178 __device__
179 inline
180 unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
181  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
182 }
183 
184 __device__
185 inline
186 unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
187  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
188 }
189 
190 __device__
191 inline
192 unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
193  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
194 }
195 
196 __device__
197 inline
198 float atomicExch(float* address, float val) {
199  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
200 }
201 
202 __device__
203 inline
204 float atomicExch_system(float* address, float val) {
205  return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
206 }
207 
208 __device__
209 inline
210 int atomicMin(int* address, int val) {
211  return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
212 }
213 
214 __device__
215 inline
216 int atomicMin_system(int* address, int val) {
217  return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
218 }
219 
220 __device__
221 inline
222 unsigned int atomicMin(unsigned int* address, unsigned int val) {
223  return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
224 }
225 
226 __device__
227 inline
228 unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
229  return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
230 }
231 
232 __device__
233 inline
234 unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
235  return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
236 }
237 
238 __device__
239 inline
240 unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
241  return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
242 }
243 
244 __device__
245 inline
246 int atomicMax(int* address, int val) {
247  return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
248 }
249 
250 __device__
251 inline
252 int atomicMax_system(int* address, int val) {
253  return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
254 }
255 
256 __device__
257 inline
258 unsigned int atomicMax(unsigned int* address, unsigned int val) {
259  return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
260 }
261 
262 __device__
263 inline
264 unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
265  return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
266 }
267 
268 __device__
269 inline
270 unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
271  return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
272 }
273 
274 __device__
275 inline
276 unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
277  return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
278 }
279 
280 __device__
281 inline
282 unsigned int atomicInc(unsigned int* address, unsigned int val)
283 {
284  __device__
285  extern
286  unsigned int __builtin_amdgcn_atomic_inc(
287  unsigned int*,
288  unsigned int,
289  unsigned int,
290  unsigned int,
291  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
292 
293  return __builtin_amdgcn_atomic_inc(
294  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
295 }
296 
297 __device__
298 inline
299 unsigned int atomicDec(unsigned int* address, unsigned int val)
300 {
301  __device__
302  extern
303  unsigned int __builtin_amdgcn_atomic_dec(
304  unsigned int*,
305  unsigned int,
306  unsigned int,
307  unsigned int,
308  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
309 
310  return __builtin_amdgcn_atomic_dec(
311  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
312 }
313 
314 __device__
315 inline
316 int atomicAnd(int* address, int val) {
317  return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
318 }
319 
320 __device__
321 inline
322 int atomicAnd_system(int* address, int val) {
323  return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
324 }
325 
326 __device__
327 inline
328 unsigned int atomicAnd(unsigned int* address, unsigned int val) {
329  return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
330 }
331 
332 __device__
333 inline
334 unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
335  return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
336 }
337 __device__
338 inline
339 unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
340  return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
341 }
342 
343 __device__
344 inline
345 unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
346  return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
347 }
348 
349 __device__
350 inline
351 int atomicOr(int* address, int val) {
352  return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
353 }
354 
355 __device__
356 inline
357 int atomicOr_system(int* address, int val) {
358  return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
359 }
360 
361 __device__
362 inline
363 unsigned int atomicOr(unsigned int* address, unsigned int val) {
364  return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
365 }
366 
367 __device__
368 inline
369 unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
370  return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
371 }
372 
373 __device__
374 inline
375 unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
376  return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
377 }
378 
379 __device__
380 inline
381 unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
382  return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
383 }
384 
385 __device__
386 inline
387 int atomicXor(int* address, int val) {
388  return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
389 }
390 
391 __device__
392 inline
393 int atomicXor_system(int* address, int val) {
394  return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
395 }
396 
397 __device__
398 inline
399 unsigned int atomicXor(unsigned int* address, unsigned int val) {
400  return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
401 }
402 
403 __device__
404 inline
405 unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
406  return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
407 }
408 
409 __device__
410 inline
411 unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
412  return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
413 }
414 
415 __device__
416 inline
417 unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
418  return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
419 }
420 
421 #else
422 
423 __device__
424 inline
425 int atomicCAS(int* address, int compare, int val)
426 {
427  __atomic_compare_exchange_n(
428  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
429 
430  return compare;
431 }
432 __device__
433 inline
434 unsigned int atomicCAS(
435  unsigned int* address, unsigned int compare, unsigned int val)
436 {
437  __atomic_compare_exchange_n(
438  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
439 
440  return compare;
441 }
442 __device__
443 inline
444 unsigned long long atomicCAS(
445  unsigned long long* address,
446  unsigned long long compare,
447  unsigned long long val)
448 {
449  __atomic_compare_exchange_n(
450  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
451 
452  return compare;
453 }
454 
455 __device__
456 inline
457 int atomicAdd(int* address, int val)
458 {
459  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
460 }
461 __device__
462 inline
463 unsigned int atomicAdd(unsigned int* address, unsigned int val)
464 {
465  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
466 }
467 __device__
468 inline
469 unsigned long long atomicAdd(
470  unsigned long long* address, unsigned long long val)
471 {
472  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
473 }
474 __device__
475 inline
476 float atomicAdd(float* address, float val)
477 {
478  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
479 }
480 
481 #if !defined(__HIPCC_RTC__)
482 DEPRECATED("use atomicAdd instead")
483 #endif // !defined(__HIPCC_RTC__)
484 __device__
485 inline
486 void atomicAddNoRet(float* address, float val)
487 {
488  __ockl_atomic_add_noret_f32(address, val);
489 }
490 
491 __device__
492 inline
493 double atomicAdd(double* address, double val)
494 {
495  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
496 }
497 
498 __device__
499 inline
500 int atomicSub(int* address, int val)
501 {
502  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
503 }
504 __device__
505 inline
506 unsigned int atomicSub(unsigned int* address, unsigned int val)
507 {
508  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
509 }
510 
511 __device__
512 inline
513 int atomicExch(int* address, int val)
514 {
515  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
516 }
517 __device__
518 inline
519 unsigned int atomicExch(unsigned int* address, unsigned int val)
520 {
521  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
522 }
523 __device__
524 inline
525 unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
526 {
527  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
528 }
529 __device__
530 inline
531 float atomicExch(float* address, float val)
532 {
533  return __uint_as_float(__atomic_exchange_n(
534  reinterpret_cast<unsigned int*>(address),
535  __float_as_uint(val),
536  __ATOMIC_RELAXED));
537 }
538 
539 __device__
540 inline
541 int atomicMin(int* address, int val)
542 {
543  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
544 }
545 __device__
546 inline
547 unsigned int atomicMin(unsigned int* address, unsigned int val)
548 {
549  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
550 }
551 __device__
552 inline
553 unsigned long long atomicMin(
554  unsigned long long* address, unsigned long long val)
555 {
556  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
557  while (val < tmp) {
558  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
559 
560  if (tmp1 != tmp) { tmp = tmp1; continue; }
561 
562  tmp = atomicCAS(address, tmp, val);
563  }
564 
565  return tmp;
566 }
567 
568 __device__
569 inline
570 int atomicMax(int* address, int val)
571 {
572  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
573 }
574 __device__
575 inline
576 unsigned int atomicMax(unsigned int* address, unsigned int val)
577 {
578  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
579 }
580 __device__
581 inline
582 unsigned long long atomicMax(
583  unsigned long long* address, unsigned long long val)
584 {
585  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
586  while (tmp < val) {
587  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
588 
589  if (tmp1 != tmp) { tmp = tmp1; continue; }
590 
591  tmp = atomicCAS(address, tmp, val);
592  }
593 
594  return tmp;
595 }
596 
597 __device__
598 inline
599 unsigned int atomicInc(unsigned int* address, unsigned int val)
600 {
601  __device__
602  extern
603  unsigned int __builtin_amdgcn_atomic_inc(
604  unsigned int*,
605  unsigned int,
606  unsigned int,
607  unsigned int,
608  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
609 
610  return __builtin_amdgcn_atomic_inc(
611  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
612 }
613 
614 __device__
615 inline
616 unsigned int atomicDec(unsigned int* address, unsigned int val)
617 {
618  __device__
619  extern
620  unsigned int __builtin_amdgcn_atomic_dec(
621  unsigned int*,
622  unsigned int,
623  unsigned int,
624  unsigned int,
625  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
626 
627  return __builtin_amdgcn_atomic_dec(
628  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
629 }
630 
631 __device__
632 inline
633 int atomicAnd(int* address, int val)
634 {
635  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
636 }
637 __device__
638 inline
639 unsigned int atomicAnd(unsigned int* address, unsigned int val)
640 {
641  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
642 }
643 __device__
644 inline
645 unsigned long long atomicAnd(
646  unsigned long long* address, unsigned long long val)
647 {
648  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
649 }
650 
651 __device__
652 inline
653 int atomicOr(int* address, int val)
654 {
655  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
656 }
657 __device__
658 inline
659 unsigned int atomicOr(unsigned int* address, unsigned int val)
660 {
661  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
662 }
663 __device__
664 inline
665 unsigned long long atomicOr(
666  unsigned long long* address, unsigned long long val)
667 {
668  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
669 }
670 
671 __device__
672 inline
673 int atomicXor(int* address, int val)
674 {
675  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
676 }
677 __device__
678 inline
679 unsigned int atomicXor(unsigned int* address, unsigned int val)
680 {
681  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
682 }
683 __device__
684 inline
685 unsigned long long atomicXor(
686  unsigned long long* address, unsigned long long val)
687 {
688  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
689 }
690 
691 #endif