HIP: Heterogenous-computing Interface for Portability
hip_atomic.h
1 #pragma once
2 
3 #include "device_functions.h"
4 
5 __device__
6 inline
7 int atomicCAS(int* address, int compare, int val)
8 {
9  __atomic_compare_exchange_n(
10  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
11 
12  return compare;
13 }
14 __device__
15 inline
16 unsigned int atomicCAS(
17  unsigned int* address, unsigned int compare, unsigned int val)
18 {
19  __atomic_compare_exchange_n(
20  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
21 
22  return compare;
23 }
24 __device__
25 inline
26 unsigned long long atomicCAS(
27  unsigned long long* address,
28  unsigned long long compare,
29  unsigned long long val)
30 {
31  __atomic_compare_exchange_n(
32  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
33 
34  return compare;
35 }
36 
37 __device__
38 inline
39 int atomicAdd(int* address, int val)
40 {
41  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
42 }
43 __device__
44 inline
45 unsigned int atomicAdd(unsigned int* address, unsigned int val)
46 {
47  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
48 }
49 __device__
50 inline
51 unsigned long long atomicAdd(
52  unsigned long long* address, unsigned long long val)
53 {
54  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
55 }
56 __device__
57 inline
58 float atomicAdd(float* address, float val)
59 {
60  unsigned int* uaddr{reinterpret_cast<unsigned int*>(address)};
61  unsigned int r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
62 
63  unsigned int old;
64  do {
65  old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
66 
67  if (r != old) { r = old; continue; }
68 
69  r = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
70 
71  if (r == old) break;
72  } while (true);
73 
74  return __uint_as_float(r);
75 }
76 
77 __device__
78 inline
79 void atomicAddNoRet(float* address, float val)
80 {
81  __ockl_global_atomic_add_f32(address, val);
82 }
83 
84 __device__
85 inline
86 double atomicAdd(double* address, double val)
87 {
88  unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
89  unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
90 
91  unsigned long long old;
92  do {
93  old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
94 
95  if (r != old) { r = old; continue; }
96 
97  r = atomicCAS(
98  uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
99 
100  if (r == old) break;
101  } while (true);
102 
103  return __longlong_as_double(r);
104 }
105 
106 __device__
107 inline
108 int atomicSub(int* address, int val)
109 {
110  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
111 }
112 __device__
113 inline
114 unsigned int atomicSub(unsigned int* address, unsigned int val)
115 {
116  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
117 }
118 
119 __device__
120 inline
121 int atomicExch(int* address, int val)
122 {
123  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
124 }
125 __device__
126 inline
127 unsigned int atomicExch(unsigned int* address, unsigned int val)
128 {
129  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
130 }
131 __device__
132 inline
133 unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
134 {
135  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
136 }
137 __device__
138 inline
139 float atomicExch(float* address, float val)
140 {
141  return __uint_as_float(__atomic_exchange_n(
142  reinterpret_cast<unsigned int*>(address),
143  __float_as_uint(val),
144  __ATOMIC_RELAXED));
145 }
146 
147 __device__
148 inline
149 int atomicMin(int* address, int val)
150 {
151  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
152 }
153 __device__
154 inline
155 unsigned int atomicMin(unsigned int* address, unsigned int val)
156 {
157  return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
158 }
159 __device__
160 inline
161 unsigned long long atomicMin(
162  unsigned long long* address, unsigned long long val)
163 {
164  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
165  while (val < tmp) {
166  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
167 
168  if (tmp1 != tmp) { tmp = tmp1; continue; }
169 
170  tmp = atomicCAS(address, tmp, val);
171  }
172 
173  return tmp;
174 }
175 
176 __device__
177 inline
178 int atomicMax(int* address, int val)
179 {
180  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
181 }
182 __device__
183 inline
184 unsigned int atomicMax(unsigned int* address, unsigned int val)
185 {
186  return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
187 }
188 __device__
189 inline
190 unsigned long long atomicMax(
191  unsigned long long* address, unsigned long long val)
192 {
193  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
194  while (tmp < val) {
195  const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
196 
197  if (tmp1 != tmp) { tmp = tmp1; continue; }
198 
199  tmp = atomicCAS(address, tmp, val);
200  }
201 
202  return tmp;
203 }
204 
205 __device__
206 inline
207 unsigned int atomicInc(unsigned int* address, unsigned int val)
208 {
209  __device__
210  extern
211  unsigned int __builtin_amdgcn_atomic_inc(
212  unsigned int*,
213  unsigned int,
214  unsigned int,
215  unsigned int,
216  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
217 
218  return __builtin_amdgcn_atomic_inc(
219  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
220 }
221 
222 __device__
223 inline
224 unsigned int atomicDec(unsigned int* address, unsigned int val)
225 {
226  __device__
227  extern
228  unsigned int __builtin_amdgcn_atomic_dec(
229  unsigned int*,
230  unsigned int,
231  unsigned int,
232  unsigned int,
233  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
234 
235  return __builtin_amdgcn_atomic_dec(
236  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
237 }
238 
239 __device__
240 inline
241 int atomicAnd(int* address, int val)
242 {
243  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
244 }
245 __device__
246 inline
247 unsigned int atomicAnd(unsigned int* address, unsigned int val)
248 {
249  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
250 }
251 __device__
252 inline
253 unsigned long long atomicAnd(
254  unsigned long long* address, unsigned long long val)
255 {
256  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
257 }
258 
259 __device__
260 inline
261 int atomicOr(int* address, int val)
262 {
263  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
264 }
265 __device__
266 inline
267 unsigned int atomicOr(unsigned int* address, unsigned int val)
268 {
269  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
270 }
271 __device__
272 inline
273 unsigned long long atomicOr(
274  unsigned long long* address, unsigned long long val)
275 {
276  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
277 }
278 
279 __device__
280 inline
281 int atomicXor(int* address, int val)
282 {
283  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
284 }
285 __device__
286 inline
287 unsigned int atomicXor(unsigned int* address, unsigned int val)
288 {
289  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
290 }
291 __device__
292 inline
293 unsigned long long atomicXor(
294  unsigned long long* address, unsigned long long val)
295 {
296  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
297 }
298 
299 // TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.