HIP: Heterogenous-computing Interface for Portability
hip_cooperative_groups_helper.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
31 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
32 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
33 
34 #if __cplusplus
36 #include <hip/hcc_detail/device_functions.h>
37 
38 #if !defined(__align__)
39 #define __align__(x) __attribute__((aligned(x)))
40 #endif
41 
42 #if !defined(__CG_QUALIFIER__)
43 #define __CG_QUALIFIER__ __device__ __forceinline__
44 #endif
45 
46 #if !defined(__CG_STATIC_QUALIFIER__)
47 #define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__
48 #endif
49 
50 #if !defined(WAVEFRONT_SIZE)
51 #define WAVEFRONT_SIZE 64
52 #endif
53 
54 namespace cooperative_groups {
55 
56 namespace internal {
57 
60 typedef enum {
61  cg_invalid,
62  cg_multi_grid,
63  cg_grid,
64  cg_workgroup
65 } group_type;
66 
70 namespace multi_grid {
71 
72 __CG_STATIC_QUALIFIER__ uint32_t num_grids() {
73  return (uint32_t)__ockl_multi_grid_num_grids();
74 }
75 
76 __CG_STATIC_QUALIFIER__ uint32_t grid_rank() {
77  return (uint32_t)__ockl_multi_grid_grid_rank();
78 }
79 
80 __CG_STATIC_QUALIFIER__ uint32_t size() {
81  return (uint32_t)__ockl_multi_grid_size();
82 }
83 
84 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
85  return (uint32_t)__ockl_multi_grid_thread_rank();
86 }
87 
88 __CG_STATIC_QUALIFIER__ bool is_valid() {
89  return (bool)__ockl_multi_grid_is_valid();
90 }
91 
92 __CG_STATIC_QUALIFIER__ void sync() {
93  __ockl_multi_grid_sync();
94 }
95 
96 } // namespace multi_grid
97 
101 namespace grid {
102 
103 __CG_STATIC_QUALIFIER__ uint32_t size() {
104  return (uint32_t)((hipBlockDim_z * hipGridDim_z) *
105  (hipBlockDim_y * hipGridDim_y) *
106  (hipBlockDim_x * hipGridDim_x));
107 }
108 
109 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
110  // Compute global id of the workgroup to which the current thread belongs to
111  uint32_t blkIdx =
112  (uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
113  (hipBlockIdx_y * hipGridDim_x) +
114  (hipBlockIdx_x));
115 
116  // Compute total number of threads being passed to reach current workgroup
117  // within grid
118  uint32_t num_threads_till_current_workgroup =
119  (uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
120 
121  // Compute thread local rank within current workgroup
122  uint32_t local_thread_rank =
123  (uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
124  (hipThreadIdx_y * hipBlockDim_x) +
125  (hipThreadIdx_x));
126 
127  return (num_threads_till_current_workgroup + local_thread_rank);
128 }
129 
130 __CG_STATIC_QUALIFIER__ bool is_valid() {
131  return (bool)__ockl_grid_is_valid();
132 }
133 
134 __CG_STATIC_QUALIFIER__ void sync() {
135  __ockl_grid_sync();
136 }
137 
138 } // namespace grid
139 
144 namespace workgroup {
145 
146 __CG_STATIC_QUALIFIER__ dim3 group_index() {
147  return (dim3((uint32_t)hipBlockIdx_x, (uint32_t)hipBlockIdx_y,
148  (uint32_t)hipBlockIdx_z));
149 }
150 
151 __CG_STATIC_QUALIFIER__ dim3 thread_index() {
152  return (dim3((uint32_t)hipThreadIdx_x, (uint32_t)hipThreadIdx_y,
153  (uint32_t)hipThreadIdx_z));
154 }
155 
156 __CG_STATIC_QUALIFIER__ uint32_t size() {
157  return((uint32_t)(hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
158 }
159 
160 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
161  return ((uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
162  (hipThreadIdx_y * hipBlockDim_x) +
163  (hipThreadIdx_x)));
164 }
165 
166 __CG_STATIC_QUALIFIER__ bool is_valid() {
167  //TODO(mahesha) any functionality need to be added here? I believe not
168  return true;
169 }
170 
171 __CG_STATIC_QUALIFIER__ void sync() {
172  __syncthreads();
173 }
174 
175 } // namespace workgroup
176 
177 } // namespace internal
178 
179 } // namespace cooperative_groups
180 
181 #endif // __cplusplus
182 #endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
dim3
struct dim3 dim3
hip_runtime_api.h
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
dim3
Definition: hip_runtime_api.h:330