29 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H 30 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H 33 #define __HCC_OR_HIP_CLANG__ 1 34 #define __HCC_ONLY__ 1 35 #define __HIP_CLANG_ONLY__ 0 36 #elif defined(__clang__) && defined(__HIP__) 37 #define __HCC_OR_HIP_CLANG__ 1 38 #define __HCC_ONLY__ 0 39 #define __HIP_CLANG_ONLY__ 1 41 #define __HCC_OR_HIP_CLANG__ 0 42 #define __HCC_ONLY__ 0 43 #define __HIP_CLANG_ONLY__ 0 58 #if __HCC_OR_HIP_CLANG__ 60 #define CUDA_SUCCESS hipSuccess 62 #include <hip/hip_runtime_api.h> 63 #endif // __HCC_OR_HIP_CLANG__ 67 #ifdef HIP_ENABLE_PRINTF 68 #define HCC_ENABLE_ACCELERATOR_PRINTF 1 74 #include <grid_launch.h> 75 #include "hc_printf.hpp" 79 #if GENERIC_GRID_LAUNCH == 0 80 #define hipLaunchParm grid_launch_parm 83 struct Empty_launch_parm {};
85 #define hipLaunchParm hip_impl::Empty_launch_parm 86 #endif // GENERIC_GRID_LAUNCH 88 #if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1 89 #else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. 90 #error(HCC must support GRID_LAUNCH_20) 91 #endif // GRID_LAUNCH_VERSION 95 #if GENERIC_GRID_LAUNCH == 1 && defined __HCC__ 96 #include "grid_launch_GGL.hpp" 97 #endif // GENERIC_GRID_LAUNCH 101 #if __HCC_OR_HIP_CLANG__ 102 extern int HIP_TRACE_API;
105 #include <hip/hcc_detail/hip_ldg.h> 107 #include <hip/hcc_detail/hip_atomic.h> 109 #include <hip/hcc_detail/device_functions.h> 110 #include <hip/hcc_detail/surface_functions.h> 112 #include <hip/hcc_detail/math_functions.h> 113 #include <hip/hcc_detail/texture_functions.h> 117 #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) 118 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__ 122 #if __HIP_DEVICE_COMPILE__ == 1 124 #define assert(COND) \ 134 #if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ 138 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) 139 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) 140 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) 141 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) 142 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) 145 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) 146 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) 149 #define __HIP_ARCH_HAS_DOUBLES__ (1) 152 #define __HIP_ARCH_HAS_WARP_VOTE__ (1) 153 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1) 154 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1) 155 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) 158 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) 159 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) 162 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) 163 #define __HIP_ARCH_HAS_3DGRID__ (1) 164 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) 169 #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \ 170 __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) 171 #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \ 172 __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \ 173 amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) 174 #define select_impl_(_1, _2, impl_, ...) impl_ 175 #define __launch_bounds__(...) \ 176 select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) 179 #if defined(__cplusplus) 181 #elif defined(__STDC_VERSION__) 185 #endif // defined __HCC__ 187 #if __HCC_OR_HIP_CLANG__ 189 __host__ inline void* __get_dynamicgroupbaseptr() {
return nullptr; }
191 #if __HIP_ARCH_GFX701__ == 0 193 __device__
unsigned __hip_ds_bpermute(
int index,
unsigned src);
194 __device__
float __hip_ds_bpermutef(
int index,
float src);
195 __device__
unsigned __hip_ds_permute(
int index,
unsigned src);
196 __device__
float __hip_ds_permutef(
int index,
float src);
198 __device__
unsigned __hip_ds_swizzle(
unsigned int src,
int pattern);
199 __device__
float __hip_ds_swizzlef(
float src,
int pattern);
201 __device__
int __hip_move_dpp(
int src,
int dpp_ctrl,
int row_mask,
int bank_mask,
bool bound_ctrl);
203 #endif //__HIP_ARCH_GFX803__ == 1 205 #endif // __HCC_OR_HIP_CLANG__ 210 typename std::common_type<decltype(hc_get_group_id), decltype(hc_get_group_size),
211 decltype(hc_get_num_groups), decltype(hc_get_workitem_id)>::type f>
213 using R = decltype(f(0));
216 __device__
operator R()
const {
return f(0); }
219 __device__
operator R()
const {
return f(1); }
222 __device__
operator R()
const {
return f(2); }
226 static constexpr X x{};
227 static constexpr Y y{};
228 static constexpr Z z{};
236 #define hipThreadIdx_x (hc_get_workitem_id(0)) 237 #define hipThreadIdx_y (hc_get_workitem_id(1)) 238 #define hipThreadIdx_z (hc_get_workitem_id(2)) 240 #define hipBlockIdx_x (hc_get_group_id(0)) 241 #define hipBlockIdx_y (hc_get_group_id(1)) 242 #define hipBlockIdx_z (hc_get_group_id(2)) 244 #define hipBlockDim_x (hc_get_group_size(0)) 245 #define hipBlockDim_y (hc_get_group_size(1)) 246 #define hipBlockDim_z (hc_get_group_size(2)) 248 #define hipGridDim_x (hc_get_num_groups(0)) 249 #define hipGridDim_y (hc_get_num_groups(1)) 250 #define hipGridDim_z (hc_get_num_groups(2)) 252 #endif // defined __HCC__ 253 #if __HCC_OR_HIP_CLANG__ 254 extern "C" __device__
void* __hip_malloc(
size_t);
255 extern "C" __device__
void* __hip_free(
void* ptr);
257 static inline __device__
void* malloc(
size_t size) {
return __hip_malloc(size); }
258 static inline __device__
void* free(
void* ptr) {
return __hip_free(ptr); }
260 #ifdef __HCC_ACCELERATOR__ 262 #ifdef HC_FEATURE_PRINTF 263 template <
typename... All>
264 static inline __device__
void printf(
const char* format, All... all) {
265 hc::printf(format, all...);
268 template <
typename... All>
269 static inline __device__
void printf(
const char* format, All... all) {}
273 #endif //__HCC_OR_HIP_CLANG__ 277 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) 279 #define HIP_KERNEL_NAME(...) (__VA_ARGS__) 280 #define HIP_SYMBOL(X) #X 282 #if defined __HCC_CPP__ 284 grid_launch_parm* lp,
const char* kernelNameStr);
286 grid_launch_parm* lp,
const char* kernelNameStr);
288 grid_launch_parm* lp,
const char* kernelNameStr);
290 grid_launch_parm* lp,
const char* kernelNameStr);
291 extern void ihipPostLaunchKernel(
const char* kernelName,
hipStream_t stream, grid_launch_parm& lp);
293 #if GENERIC_GRID_LAUNCH == 0 297 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ 299 grid_launch_parm lp; \ 300 lp.dynamic_group_mem_bytes = _groupMemBytes; \ 301 hipStream_t trueStream = \ 302 (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \ 303 _kernelName(lp, ##__VA_ARGS__); \ 304 ihipPostLaunchKernel(#_kernelName, trueStream, lp); \ 306 #endif // GENERIC_GRID_LAUNCH 308 #elif defined(__HCC_C__) 335 #elif defined(__clang__) && defined(__HIP__) 337 #define HIP_KERNEL_NAME(...) __VA_ARGS__ 338 #define HIP_SYMBOL(X) #X 340 typedef int hipLaunchParm;
342 #define hipLaunchKernel(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ 344 kernelName<<<numblocks, numthreads, memperblock, streamId>>>(0, ##__VA_ARGS__); \ 347 #define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ 349 kernelName<<<numblocks, numthreads, memperblock, streamId>>>(__VA_ARGS__); \ 352 #include <hip/hip_runtime_api.h> 354 #pragma push_macro("__DEVICE__") 355 #define __DEVICE__ static __device__ __forceinline__ 357 extern "C" __device__
size_t __ockl_get_local_id(uint);
358 __DEVICE__ uint __hip_get_thread_idx_x() {
return __ockl_get_local_id(0); }
359 __DEVICE__ uint __hip_get_thread_idx_y() {
return __ockl_get_local_id(1); }
360 __DEVICE__ uint __hip_get_thread_idx_z() {
return __ockl_get_local_id(2); }
362 extern "C" __device__
size_t __ockl_get_group_id(uint);
363 __DEVICE__ uint __hip_get_block_idx_x() {
return __ockl_get_group_id(0); }
364 __DEVICE__ uint __hip_get_block_idx_y() {
return __ockl_get_group_id(1); }
365 __DEVICE__ uint __hip_get_block_idx_z() {
return __ockl_get_group_id(2); }
367 extern "C" __device__
size_t __ockl_get_local_size(uint);
368 __DEVICE__ uint __hip_get_block_dim_x() {
return __ockl_get_local_size(0); }
369 __DEVICE__ uint __hip_get_block_dim_y() {
return __ockl_get_local_size(1); }
370 __DEVICE__ uint __hip_get_block_dim_z() {
return __ockl_get_local_size(2); }
372 extern "C" __device__
size_t __ockl_get_num_groups(uint);
373 __DEVICE__ uint __hip_get_grid_dim_x() {
return __ockl_get_num_groups(0); }
374 __DEVICE__ uint __hip_get_grid_dim_y() {
return __ockl_get_num_groups(1); }
375 __DEVICE__ uint __hip_get_grid_dim_z() {
return __ockl_get_num_groups(2); }
377 #define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \ 378 __declspec(property(get = __get_##DIMENSION)) uint DIMENSION; \ 379 __DEVICE__ uint __get_##DIMENSION(void) { \ 383 struct __hip_builtin_threadIdx_t {
384 __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x());
385 __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y());
386 __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z());
389 struct __hip_builtin_blockIdx_t {
390 __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x());
391 __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y());
392 __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z());
395 struct __hip_builtin_blockDim_t {
396 __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x());
397 __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y());
398 __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z());
401 struct __hip_builtin_gridDim_t {
402 __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x());
403 __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y());
404 __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z());
407 #undef __HIP_DEVICE_BUILTIN 408 #pragma pop_macro("__DEVICE__") 410 extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx;
411 extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx;
412 extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim;
413 extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
416 #define hipThreadIdx_x threadIdx.x 417 #define hipThreadIdx_y threadIdx.y 418 #define hipThreadIdx_z threadIdx.z 420 #define hipBlockIdx_x blockIdx.x 421 #define hipBlockIdx_y blockIdx.y 422 #define hipBlockIdx_z blockIdx.z 424 #define hipBlockDim_x blockDim.x 425 #define hipBlockDim_y blockDim.y 426 #define hipBlockDim_z blockDim.z 428 #define hipGridDim_x gridDim.x 429 #define hipGridDim_y gridDim.y 430 #define hipGridDim_z gridDim.z 432 #include <hip/hcc_detail/math_functions.h> 435 #pragma push_macro("__CUDA__") 437 #include <__clang_cuda_math_forward_declares.h> 438 #include <__clang_cuda_complex_builtins.h> 439 #include <cuda_wrappers/algorithm> 440 #include <cuda_wrappers/complex> 442 #pragma pop_macro("__CUDA__") 447 #include <hip/hcc_detail/hip_memory.h> 449 #endif // HIP_HCC_DETAIL_RUNTIME_H
Definition: hip_runtime_api.h:240
#define __host__
Definition: host_defines.h:41
Definition: program_state.cpp:302
Definition: hip_hcc_internal.h:518
Definition: hip_runtime.h:212