HIP: Heterogenous-computing Interface for Portability
hip_cooperative_groups_helper.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
31 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
32 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
33 
34 #if __cplusplus
36 #include <hip/amd_detail/device_functions.h>
37 
38 #if !defined(__align__)
39 #define __align__(x) __attribute__((aligned(x)))
40 #endif
41 
42 #if !defined(__CG_QUALIFIER__)
43 #define __CG_QUALIFIER__ __device__ __forceinline__
44 #endif
45 
46 #if !defined(__CG_STATIC_QUALIFIER__)
47 #define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__
48 #endif
49 
50 #if !defined(_CG_STATIC_CONST_DECL_)
51 #define _CG_STATIC_CONST_DECL_ static constexpr
52 #endif
53 
54 #if !defined(WAVEFRONT_SIZE)
55 #if __gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__
56 #define WAVEFRONT_SIZE 32
57 #else
58 #define WAVEFRONT_SIZE 64
59 #endif
60 
61 namespace cooperative_groups {
62 
63 /* Global scope */
64 template <unsigned int size>
65 using is_power_of_2 = std::integral_constant<bool, (size & (size - 1)) == 0>;
66 
67 template <unsigned int size>
68 using is_valid_wavefront = std::integral_constant<bool, (size <= WAVEFRONT_SIZE)>;
69 
70 template <unsigned int size>
71 using is_valid_tile_size =
72  std::integral_constant<bool, is_power_of_2<size>::value && is_valid_wavefront<size>::value>;
73 
74 template <typename T>
75 using is_valid_type =
76  std::integral_constant<bool, std::is_integral<T>::value || std::is_floating_point<T>::value>;
77 
78 namespace internal {
79 
82 typedef enum {
83  cg_invalid,
84  cg_multi_grid,
85  cg_grid,
86  cg_workgroup,
87  cg_tiled_group
88 } group_type;
89 
93 namespace multi_grid {
94 
95 __CG_STATIC_QUALIFIER__ uint32_t num_grids() { return (uint32_t)__ockl_multi_grid_num_grids(); }
96 
97 __CG_STATIC_QUALIFIER__ uint32_t grid_rank() { return (uint32_t)__ockl_multi_grid_grid_rank(); }
98 
99 __CG_STATIC_QUALIFIER__ uint32_t size() { return (uint32_t)__ockl_multi_grid_size(); }
100 
101 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() { return (uint32_t)__ockl_multi_grid_thread_rank(); }
102 
103 __CG_STATIC_QUALIFIER__ bool is_valid() { return (bool)__ockl_multi_grid_is_valid(); }
104 
105 __CG_STATIC_QUALIFIER__ void sync() { __ockl_multi_grid_sync(); }
106 
107 } // namespace multi_grid
108 
112 namespace grid {
113 
114 __CG_STATIC_QUALIFIER__ uint32_t size() {
115  return (uint32_t)((hipBlockDim_z * hipGridDim_z) * (hipBlockDim_y * hipGridDim_y) *
116  (hipBlockDim_x * hipGridDim_x));
117 }
118 
119 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
120  // Compute global id of the workgroup to which the current thread belongs to
121  uint32_t blkIdx = (uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
122  (hipBlockIdx_y * hipGridDim_x) + (hipBlockIdx_x));
123 
124  // Compute total number of threads being passed to reach current workgroup
125  // within grid
126  uint32_t num_threads_till_current_workgroup =
127  (uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
128 
129  // Compute thread local rank within current workgroup
130  uint32_t local_thread_rank = (uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
131  (hipThreadIdx_y * hipBlockDim_x) + (hipThreadIdx_x));
132 
133  return (num_threads_till_current_workgroup + local_thread_rank);
134 }
135 
136 __CG_STATIC_QUALIFIER__ bool is_valid() { return (bool)__ockl_grid_is_valid(); }
137 
138 __CG_STATIC_QUALIFIER__ void sync() { __ockl_grid_sync(); }
139 
140 } // namespace grid
141 
146 namespace workgroup {
147 
148 __CG_STATIC_QUALIFIER__ dim3 group_index() {
149  return (dim3((uint32_t)hipBlockIdx_x, (uint32_t)hipBlockIdx_y, (uint32_t)hipBlockIdx_z));
150 }
151 
152 __CG_STATIC_QUALIFIER__ dim3 thread_index() {
153  return (dim3((uint32_t)hipThreadIdx_x, (uint32_t)hipThreadIdx_y, (uint32_t)hipThreadIdx_z));
154 }
155 
156 __CG_STATIC_QUALIFIER__ uint32_t size() {
157  return ((uint32_t)(hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
158 }
159 
160 __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
161  return ((uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
162  (hipThreadIdx_y * hipBlockDim_x) + (hipThreadIdx_x)));
163 }
164 
165 __CG_STATIC_QUALIFIER__ bool is_valid() {
166  // TODO(mahesha) any functionality need to be added here? I believe not
167  return true;
168 }
169 
170 __CG_STATIC_QUALIFIER__ void sync() { __syncthreads(); }
171 
172 } // namespace workgroup
173 
174 } // namespace internal
175 
176 } // namespace cooperative_groups
177 
178 #endif // __cplusplus
179 #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
180 #endif
hip_runtime_api.h
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language ...
dim3
struct dim3 dim3
dim3
Definition: hip_runtime_api.h:318