HIP: Heterogenous-computing Interface for Portability
hip_atomic.h
1 #pragma once
2 
3 #include "device_functions.h"
4 
5 __device__
6 inline
7 int atomicCAS(int* address, int compare, int val)
8 {
9  __atomic_compare_exchange_n(
10  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
11 
12  return compare;
13 }
14 __device__
15 inline
16 unsigned int atomicCAS(
17  unsigned int* address, unsigned int compare, unsigned int val)
18 {
19  __atomic_compare_exchange_n(
20  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
21 
22  return compare;
23 }
24 __device__
25 inline
26 unsigned long long atomicCAS(
27  unsigned long long* address,
28  unsigned long long compare,
29  unsigned long long val)
30 {
31  __atomic_compare_exchange_n(
32  address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
33 
34  return compare;
35 }
36 
37 __device__
38 inline
39 int atomicAdd(int* address, int val)
40 {
41  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
42 }
43 __device__
44 inline
45 unsigned int atomicAdd(unsigned int* address, unsigned int val)
46 {
47  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
48 }
49 __device__
50 inline
51 unsigned long long atomicAdd(
52  unsigned long long* address, unsigned long long val)
53 {
54  return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
55 }
56 __device__
57 inline
58 float atomicAdd(float* address, float val)
59 {
60  unsigned int* uaddr{reinterpret_cast<unsigned int*>(address)};
61  unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
62  unsigned int r;
63 
64  do {
65  r = old;
66  old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
67  } while (r != old);
68 
69  return __uint_as_float(r);
70 }
71 __device__
72 inline
73 double atomicAdd(double* address, double val)
74 {
75  unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
76  unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
77  unsigned long long r;
78 
79  do {
80  r = old;
81  old = atomicCAS(
82  uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
83  } while (r != old);
84 
85  return __longlong_as_double(r);
86 }
87 
88 __device__
89 inline
90 int atomicSub(int* address, int val)
91 {
92  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
93 }
94 __device__
95 inline
96 unsigned int atomicSub(unsigned int* address, unsigned int val)
97 {
98  return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
99 }
100 
101 __device__
102 inline
103 int atomicExch(int* address, int val)
104 {
105  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
106 }
107 __device__
108 inline
109 unsigned int atomicExch(unsigned int* address, unsigned int val)
110 {
111  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
112 }
113 __device__
114 inline
115 unsigned int atomicExch(unsigned long long* address, unsigned long long val)
116 {
117  return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
118 }
119 __device__
120 inline
121 float atomicExch(float* address, float val)
122 {
123  return __uint_as_float(__atomic_exchange_n(
124  reinterpret_cast<unsigned int*>(address),
125  __float_as_uint(val),
126  __ATOMIC_RELAXED));
127 }
128 
129 __device__
130 inline
131 int atomicMin(int* address, int val)
132 {
133  return __sync_fetch_and_min(address, val);
134 }
135 __device__
136 inline
137 unsigned int atomicMin(unsigned int* address, unsigned int val)
138 {
139  return __sync_fetch_and_umin(address, val);
140 }
141 __device__
142 inline
143 unsigned long long atomicMin(
144  unsigned long long* address, unsigned long long val)
145 {
146  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
147  while (val < tmp) { tmp = atomicCAS(address, tmp, val); }
148 
149  return tmp;
150 }
151 
152 __device__
153 inline
154 int atomicMax(int* address, int val)
155 {
156  return __sync_fetch_and_max(address, val);
157 }
158 __device__
159 inline
160 unsigned int atomicMax(unsigned int* address, unsigned int val)
161 {
162  return __sync_fetch_and_umax(address, val);
163 }
164 __device__
165 inline
166 unsigned long long atomicMax(
167  unsigned long long* address, unsigned long long val)
168 {
169  unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
170  while (tmp < val) { tmp = atomicCAS(address, tmp, val); }
171 
172  return tmp;
173 }
174 
175 __device__
176 inline
177 unsigned int atomicInc(unsigned int* address, unsigned int val)
178 {
179  __device__
180  extern
181  unsigned int __builtin_amdgcn_atomic_inc(
182  unsigned int*,
183  unsigned int,
184  unsigned int,
185  unsigned int,
186  bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
187 
188  return __builtin_amdgcn_atomic_inc(
189  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
190 }
191 
192 __device__
193 inline
194 unsigned int atomicDec(unsigned int* address, unsigned int val)
195 {
196  __device__
197  extern
198  unsigned int __builtin_amdgcn_atomic_dec(
199  unsigned int*,
200  unsigned int,
201  unsigned int,
202  unsigned int,
203  bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
204 
205  return __builtin_amdgcn_atomic_dec(
206  address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
207 }
208 
209 __device__
210 inline
211 int atomicAnd(int* address, int val)
212 {
213  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
214 }
215 __device__
216 inline
217 unsigned int atomicAnd(unsigned int* address, unsigned int val)
218 {
219  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
220 }
221 __device__
222 inline
223 unsigned long long atomicAnd(
224  unsigned long long* address, unsigned long long val)
225 {
226  return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
227 }
228 
229 __device__
230 inline
231 int atomicOr(int* address, int val)
232 {
233  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
234 }
235 __device__
236 inline
237 unsigned int atomicOr(unsigned int* address, unsigned int val)
238 {
239  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
240 }
241 __device__
242 inline
243 unsigned long long atomicOr(
244  unsigned long long* address, unsigned long long val)
245 {
246  return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
247 }
248 
249 __device__
250 inline
251 int atomicXor(int* address, int val)
252 {
253  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
254 }
255 __device__
256 inline
257 unsigned int atomicXor(unsigned int* address, unsigned int val)
258 {
259  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
260 }
261 __device__
262 inline
263 unsigned long long atomicXor(
264  unsigned long long* address, unsigned long long val)
265 {
266  return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
267 }
268 
269 // TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.