HIP: Heterogenous-computing Interface for Portability
hip_atomic.h
1 #pragma once
2 
3 #include "device_functions.h"
4 
5 __device__
6 inline
7 int atomicCAS(int* address, int compare, int val)
8 {
9  __atomic_compare_exchange_n(
10  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
11 
12  return compare;
13 }
14 __device__
15 inline
16 unsigned int atomicCAS(
17  unsigned int* address, unsigned int compare, unsigned int val)
18 {
19  __atomic_compare_exchange_n(
20  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
21 
22  return compare;
23 }
24 __device__
25 inline
26 unsigned long long atomicCAS(
27  unsigned long long* address,
28  unsigned long long compare,
29  unsigned long long val)
30 {
31  __atomic_compare_exchange_n(
32  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
33 
34  return compare;
35 }
36 
37 __device__
38 inline
39 int atomicAdd(int* address, int val)
40 {
41  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
42 }
43 __device__
44 inline
45 unsigned int atomicAdd(unsigned int* address, unsigned int val)
46 {
47  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
48 }
49 __device__
50 inline
51 unsigned long long atomicAdd(
52  unsigned long long* address, unsigned long long val)
53 {
54  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
55 }
56 __device__
57 inline
58 float atomicAdd(float* address, float val)
59 {
60  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
61 }
62 
63 DEPRECATED("use atomicAdd instead")
64 __device__
65 inline
66 void atomicAddNoRet(float* address, float val)
67 {
68  __ockl_atomic_add_noret_f32(address, val);
69 }
70 
71 __device__
72 inline
73 double atomicAdd(double* address, double val)
74 {
75  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
76 }
77 
78 __device__
79 inline
80 int atomicSub(int* address, int val)
81 {
82  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
83 }
84 __device__
85 inline
86 unsigned int atomicSub(unsigned int* address, unsigned int val)
87 {
88  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
89 }
90 
91 __device__
92 inline
93 int atomicExch(int* address, int val)
94 {
95  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
96 }
97 __device__
98 inline
99 unsigned int atomicExch(unsigned int* address, unsigned int val)
100 {
101  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
102 }
103 __device__
104 inline
105 unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
106 {
107  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
108 }
109 __device__
110 inline
111 float atomicExch(float* address, float val)
112 {
113  return __uint_as_float(__atomic_exchange_n(
114  reinterpret_cast<unsigned int*>(address),
115  __float_as_uint(val),
116  __ATOMIC_RELAXED));
117 }
118 
119 __device__
120 inline
121 int atomicMin(int* address, int val)
122 {
123  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
124 }
125 __device__
126 inline
127 unsigned int atomicMin(unsigned int* address, unsigned int val)
128 {
129  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
130 }
131 __device__
132 inline
133 unsigned long long atomicMin(
134  unsigned long long* address, unsigned long long val)
135 {
136  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
137  while (val < tmp) {
138  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
139 
140  if (tmp1 != tmp) { tmp = tmp1; continue; }
141 
142  tmp = atomicCAS(address, tmp, val);
143  }
144 
145  return tmp;
146 }
147 
148 __device__
149 inline
150 int atomicMax(int* address, int val)
151 {
152  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
153 }
154 __device__
155 inline
156 unsigned int atomicMax(unsigned int* address, unsigned int val)
157 {
158  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
159 }
160 __device__
161 inline
162 unsigned long long atomicMax(
163  unsigned long long* address, unsigned long long val)
164 {
165  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
166  while (tmp < val) {
167  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
168 
169  if (tmp1 != tmp) { tmp = tmp1; continue; }
170 
171  tmp = atomicCAS(address, tmp, val);
172  }
173 
174  return tmp;
175 }
176 
177 __device__
178 inline
179 unsigned int atomicInc(unsigned int* address, unsigned int val)
180 {
181  __device__
182  extern
183  unsigned int __builtin_amdgcn_atomic_inc(
184  unsigned int*,
185  unsigned int,
186  unsigned int,
187  unsigned int,
188  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
189 
190  return __builtin_amdgcn_atomic_inc(
191  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
192 }
193 
194 __device__
195 inline
196 unsigned int atomicDec(unsigned int* address, unsigned int val)
197 {
198  __device__
199  extern
200  unsigned int __builtin_amdgcn_atomic_dec(
201  unsigned int*,
202  unsigned int,
203  unsigned int,
204  unsigned int,
205  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
206 
207  return __builtin_amdgcn_atomic_dec(
208  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
209 }
210 
211 __device__
212 inline
213 int atomicAnd(int* address, int val)
214 {
215  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
216 }
217 __device__
218 inline
219 unsigned int atomicAnd(unsigned int* address, unsigned int val)
220 {
221  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
222 }
223 __device__
224 inline
225 unsigned long long atomicAnd(
226  unsigned long long* address, unsigned long long val)
227 {
228  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
229 }
230 
231 __device__
232 inline
233 int atomicOr(int* address, int val)
234 {
235  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
236 }
237 __device__
238 inline
239 unsigned int atomicOr(unsigned int* address, unsigned int val)
240 {
241  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
242 }
243 __device__
244 inline
245 unsigned long long atomicOr(
246  unsigned long long* address, unsigned long long val)
247 {
248  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
249 }
250 
251 __device__
252 inline
253 int atomicXor(int* address, int val)
254 {
255  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
256 }
257 __device__
258 inline
259 unsigned int atomicXor(unsigned int* address, unsigned int val)
260 {
261  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
262 }
263 __device__
264 inline
265 unsigned long long atomicXor(
266  unsigned long long* address, unsigned long long val)
267 {
268  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
269 }
270 
271 // TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.