HIP: Heterogenous-computing Interface for Portability
hip_cooperative_groups.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
32 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
33 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
34 
35 #if __cplusplus
37 
38 namespace cooperative_groups {
39 
45 class thread_group {
46  protected:
47  uint32_t _type; // thread_group type
48  uint32_t _size; // total number of threads in the tread_group
49  uint64_t _mask; // Lanemask for coalesced and tiled partitioned group types,
50  // LSB represents lane 0, and MSB represents lane 63
51 
52  // Construct a thread group, and set thread group type and other essential
53  // thread group properties. This generic thread group is directly constructed
54  // only when the group is supposed to contain only the calling the thread
55  // (throurh the API - `this_thread()`), and in all other cases, this thread
56  // group object is a sub-object of some other derived thread group object
57  __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size,
58  uint64_t mask = (uint64_t)0) {
59  _type = type;
60  _size = size;
61  _mask = mask;
62  }
63 
64  public:
65  // Total number of threads in the thread group, and this serves the purpose
66  // for all derived cooperative group types since their `size` is directly
67  // saved during the construction
68  __CG_QUALIFIER__ uint32_t size() const {
69  return _size;
70  }
71  // Rank of the calling thread within [0, size())
72  __CG_QUALIFIER__ uint32_t thread_rank() const;
73  // Is this cooperative group type valid?
74  __CG_QUALIFIER__ bool is_valid() const;
75  // synchronize the threads in the thread group
76  __CG_QUALIFIER__ void sync() const;
77 };
78 
85 class multi_grid_group : public thread_group {
86  // Only these friend functions are allowed to construct an object of this class
87  // and access its resources
88  friend __CG_QUALIFIER__ multi_grid_group this_multi_grid();
89 
90  protected:
91  // Construct mutli-grid thread group (through the API this_multi_grid())
92  explicit __CG_QUALIFIER__ multi_grid_group(uint32_t size)
93  : thread_group(internal::cg_multi_grid, size) { }
94 
95  public:
96  // Number of invocations participating in this multi-grid group. In other
97  // words, the number of GPUs
98  __CG_QUALIFIER__ uint32_t num_grids() {
99  return internal::multi_grid::num_grids();
100  }
101  // Rank of this invocation. In other words, an ID number within the range
102  // [0, num_grids()) of the GPU, this kernel is running on
103  __CG_QUALIFIER__ uint32_t grid_rank() {
104  return internal::multi_grid::grid_rank();
105  }
106  __CG_QUALIFIER__ uint32_t thread_rank() const {
107  return internal::multi_grid::thread_rank();
108  }
109  __CG_QUALIFIER__ bool is_valid() const {
110  return internal::multi_grid::is_valid();
111  }
112  __CG_QUALIFIER__ void sync() const {
113  internal::multi_grid::sync();
114  }
115 };
116 
124 __CG_QUALIFIER__ multi_grid_group
125 this_multi_grid() {
126  return multi_grid_group(internal::multi_grid::size());
127 }
128 
135 class grid_group : public thread_group {
136  // Only these friend functions are allowed to construct an object of this class
137  // and access its resources
138  friend __CG_QUALIFIER__ grid_group this_grid();
139 
140  protected:
141  // Construct grid thread group (through the API this_grid())
142  explicit __CG_QUALIFIER__ grid_group(uint32_t size)
143  : thread_group(internal::cg_grid, size) { }
144 
145  public:
146  __CG_QUALIFIER__ uint32_t thread_rank() const {
147  return internal::grid::thread_rank();
148  }
149  __CG_QUALIFIER__ bool is_valid() const {
150  return internal::grid::is_valid();
151  }
152  __CG_QUALIFIER__ void sync() const {
153  internal::grid::sync();
154  }
155 };
156 
164 __CG_QUALIFIER__ grid_group
165 this_grid() {
166  return grid_group(internal::grid::size());
167 }
168 
172 __CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
173  switch (this->_type) {
174  case internal::cg_multi_grid: {
175  return (static_cast<const multi_grid_group*>(this)->thread_rank());
176  }
177  case internal::cg_grid: {
178  return (static_cast<const grid_group*>(this)->thread_rank());
179  }
180  default: {
181  return 0; //TODO(mahesha)
182  }
183  }
184 }
185 
186 __CG_QUALIFIER__ bool thread_group::is_valid() const {
187  switch (this->_type) {
188  case internal::cg_multi_grid: {
189  return (static_cast<const multi_grid_group*>(this)->is_valid());
190  }
191  case internal::cg_grid: {
192  return (static_cast<const grid_group*>(this)->is_valid());
193  }
194  default: {
195  return false;
196  }
197  }
198 }
199 
200 __CG_QUALIFIER__ void thread_group::sync() const {
201  switch (this->_type) {
202  case internal::cg_multi_grid: {
203  static_cast<const multi_grid_group*>(this)->sync();
204  break;
205  }
206  case internal::cg_grid: {
207  static_cast<const grid_group*>(this)->sync();
208  break;
209  }
210  }
211 }
212 
213 } // namespace cooperative_groups
214 
215 #endif // __cplusplus
216 #endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
Device side implementation of cooperative group feature.