HIP: Heterogenous-computing Interface for Portability
hip_cooperative_groups.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
32 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
33 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
34 
35 #if __cplusplus
37 
38 namespace cooperative_groups {
39 
45 class thread_group {
46  protected:
47  uint32_t _type; // thread_group type
48  uint32_t _size; // total number of threads in the tread_group
49  uint64_t _mask; // Lanemask for coalesced and tiled partitioned group types,
50  // LSB represents lane 0, and MSB represents lane 63
51 
52  // Construct a thread group, and set thread group type and other essential
53  // thread group properties. This generic thread group is directly constructed
54  // only when the group is supposed to contain only the calling the thread
55  // (throurh the API - `this_thread()`), and in all other cases, this thread
56  // group object is a sub-object of some other derived thread group object
57  __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size,
58  uint64_t mask = (uint64_t)0) {
59  _type = type;
60  _size = size;
61  _mask = mask;
62  }
63 
64  public:
65  // Total number of threads in the thread group, and this serves the purpose
66  // for all derived cooperative group types since their `size` is directly
67  // saved during the construction
68  __CG_QUALIFIER__ uint32_t size() const {
69  return _size;
70  }
71  // Rank of the calling thread within [0, size())
72  __CG_QUALIFIER__ uint32_t thread_rank() const;
73  // Is this cooperative group type valid?
74  __CG_QUALIFIER__ bool is_valid() const;
75  // synchronize the threads in the thread group
76  __CG_QUALIFIER__ void sync() const;
77 };
78 
85 class multi_grid_group : public thread_group {
86  // Only these friend functions are allowed to construct an object of this class
87  // and access its resources
88  friend __CG_QUALIFIER__ multi_grid_group this_multi_grid();
89 
90  protected:
91  // Construct mutli-grid thread group (through the API this_multi_grid())
92  explicit __CG_QUALIFIER__ multi_grid_group(uint32_t size)
93  : thread_group(internal::cg_multi_grid, size) { }
94 
95  public:
96  // Number of invocations participating in this multi-grid group. In other
97  // words, the number of GPUs
98  __CG_QUALIFIER__ uint32_t num_grids() {
99  return internal::multi_grid::num_grids();
100  }
101  // Rank of this invocation. In other words, an ID number within the range
102  // [0, num_grids()) of the GPU, this kernel is running on
103  __CG_QUALIFIER__ uint32_t grid_rank() {
104  return internal::multi_grid::grid_rank();
105  }
106  __CG_QUALIFIER__ uint32_t thread_rank() const {
107  return internal::multi_grid::thread_rank();
108  }
109  __CG_QUALIFIER__ bool is_valid() const {
110  return internal::multi_grid::is_valid();
111  }
112  __CG_QUALIFIER__ void sync() const {
113  internal::multi_grid::sync();
114  }
115 };
116 
124 __CG_QUALIFIER__ multi_grid_group
125 this_multi_grid() {
126  return multi_grid_group(internal::multi_grid::size());
127 }
128 
135 class grid_group : public thread_group {
136  // Only these friend functions are allowed to construct an object of this class
137  // and access its resources
138  friend __CG_QUALIFIER__ grid_group this_grid();
139 
140  protected:
141  // Construct grid thread group (through the API this_grid())
142  explicit __CG_QUALIFIER__ grid_group(uint32_t size)
143  : thread_group(internal::cg_grid, size) { }
144 
145  public:
146  __CG_QUALIFIER__ uint32_t thread_rank() const {
147  return internal::grid::thread_rank();
148  }
149  __CG_QUALIFIER__ bool is_valid() const {
150  return internal::grid::is_valid();
151  }
152  __CG_QUALIFIER__ void sync() const {
153  internal::grid::sync();
154  }
155 };
156 
164 __CG_QUALIFIER__ grid_group
165 this_grid() {
166  return grid_group(internal::grid::size());
167 }
168 
176 class thread_block : public thread_group {
177  // Only these friend functions are allowed to construct an object of this
178  // class and access its resources
179  friend __CG_QUALIFIER__ thread_block this_thread_block();
180 
181  protected:
182  // Construct a workgroup thread group (through the API this_thread_block())
183  explicit __CG_QUALIFIER__ thread_block(uint32_t size)
184  : thread_group(internal::cg_workgroup, size) { }
185 
186  public:
187  // 3-dimensional block index within the grid
188  __CG_QUALIFIER__ dim3 group_index() {
189  return internal::workgroup::group_index();
190  }
191  // 3-dimensional thread index within the block
192  __CG_QUALIFIER__ dim3 thread_index() {
193  return internal::workgroup::thread_index();
194  }
195  __CG_QUALIFIER__ uint32_t thread_rank() const {
196  return internal::workgroup::thread_rank();
197  }
198  __CG_QUALIFIER__ bool is_valid() const {
199  return internal::workgroup::is_valid();
200  }
201  __CG_QUALIFIER__ void sync() const {
202  internal::workgroup::sync();
203  }
204 };
205 
213 __CG_QUALIFIER__ thread_block
214 this_thread_block() {
215  return thread_block(internal::workgroup::size());
216 }
217 
221 __CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
222  switch (this->_type) {
223  case internal::cg_multi_grid: {
224  return (static_cast<const multi_grid_group*>(this)->thread_rank());
225  }
226  case internal::cg_grid: {
227  return (static_cast<const grid_group*>(this)->thread_rank());
228  }
229  case internal::cg_workgroup: {
230  return (static_cast<const thread_block*>(this)->thread_rank());
231  }
232  default: {
233  assert(false && "invalid cooperative group type");
234  return -1;
235  }
236  }
237 }
238 
239 __CG_QUALIFIER__ bool thread_group::is_valid() const {
240  switch (this->_type) {
241  case internal::cg_multi_grid: {
242  return (static_cast<const multi_grid_group*>(this)->is_valid());
243  }
244  case internal::cg_grid: {
245  return (static_cast<const grid_group*>(this)->is_valid());
246  }
247  case internal::cg_workgroup: {
248  return (static_cast<const thread_block*>(this)->is_valid());
249  }
250  default: {
251  assert(false && "invalid cooperative group type");
252  return false;
253  }
254  }
255 }
256 
257 __CG_QUALIFIER__ void thread_group::sync() const {
258  switch (this->_type) {
259  case internal::cg_multi_grid: {
260  static_cast<const multi_grid_group*>(this)->sync();
261  break;
262  }
263  case internal::cg_grid: {
264  static_cast<const grid_group*>(this)->sync();
265  break;
266  }
267  case internal::cg_workgroup: {
268  static_cast<const thread_block*>(this)->sync();
269  break;
270  }
271  default: {
272  assert(false && "invalid cooperative group type");
273  }
274  }
275 }
276 
281 template <class CGTy>
282 __CG_QUALIFIER__ uint32_t group_size(CGTy const &g) {
283  return g.size();
284 }
285 
286 template <class CGTy>
287 __CG_QUALIFIER__ uint32_t thread_rank(CGTy const &g) {
288  return g.thread_rank();
289 }
290 
291 template <class CGTy>
292 __CG_QUALIFIER__ bool is_valid(CGTy const &g) {
293  return g.is_valid();
294 }
295 
296 template <class CGTy>
297 __CG_QUALIFIER__ void sync(CGTy const &g) {
298  g.sync();
299 }
300 
301 } // namespace cooperative_groups
302 
303 #endif // __cplusplus
304 #endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
hip_cooperative_groups_helper.h
Device side implementation of cooperative group feature.
dim3
Definition: hip_runtime_api.h:330