Contains definitions of APIs for HIP runtime.
More...
#include <math.h>
#include <string.h>
#include <stddef.h>
#include <hip/hip_runtime_api.h>
#include "grid_launch.h"
#include "hc_printf.hpp"
#include <hip/hcc_detail/hip_atomic.h>
#include <hip/hcc_detail/host_defines.h>
#include <hip/hcc_detail/device_functions.h>
#include <hip/hcc_detail/surface_functions.h>
#include <hip/hcc_detail/texture_functions.h>
#include <hip/hcc_detail/math_functions.h>
#include <hip/hcc_detail/hip_memory.h>
Go to the source code of this file.
|
#define | __HCC_OR_HIP_CLANG__ 1 |
|
#define | __HCC_ONLY__ 1 |
|
#define | __HIP_CLANG_ONLY__ 0 |
|
#define | CUDA_SUCCESS hipSuccess |
|
#define | hipLaunchParm grid_launch_parm |
|
#define | launch_bounds_impl0(requiredMaxThreadsPerBlock) __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) |
|
#define | launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) |
|
#define | select_impl_(_1, _2, impl_, ...) impl_ |
|
#define | __launch_bounds__(...) select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) |
|
#define | hipThreadIdx_x (hc_get_workitem_id(0)) |
|
#define | hipThreadIdx_y (hc_get_workitem_id(1)) |
|
#define | hipThreadIdx_z (hc_get_workitem_id(2)) |
|
#define | hipBlockIdx_x (hc_get_group_id(0)) |
|
#define | hipBlockIdx_y (hc_get_group_id(1)) |
|
#define | hipBlockIdx_z (hc_get_group_id(2)) |
|
#define | hipBlockDim_x (hc_get_group_size(0)) |
|
#define | hipBlockDim_y (hc_get_group_size(1)) |
|
#define | hipBlockDim_z (hc_get_group_size(2)) |
|
#define | hipGridDim_x (hc_get_num_groups(0)) |
|
#define | hipGridDim_y (hc_get_num_groups(1)) |
|
#define | hipGridDim_z (hc_get_num_groups(2)) |
|
#define | __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) |
|
#define | HIP_KERNEL_NAME(...) (__VA_ARGS__) |
|
#define | HIP_SYMBOL(X) #X |
|
|
__host__ void * | __get_dynamicgroupbaseptr () |
|
__device__ unsigned | __hip_ds_bpermute (int index, unsigned src) |
|
__device__ float | __hip_ds_bpermutef (int index, float src) |
|
__device__ unsigned | __hip_ds_permute (int index, unsigned src) |
|
__device__ float | __hip_ds_permutef (int index, float src) |
|
template<int pattern> |
__device__ unsigned | __hip_ds_swizzle_N (unsigned int src) |
|
template<int pattern> |
__device__ float | __hip_ds_swizzlef_N (float src) |
|
template<int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl> |
__device__ int | __hip_move_dpp_N (int src) |
|
__device__ void * | __hip_malloc (size_t) |
|
__device__ void * | __hip_free (void *ptr) |
|
Contains definitions of APIs for HIP runtime.
#define launch_bounds_impl1 |
( |
|
requiredMaxThreadsPerBlock, |
|
|
|
minBlocksPerMultiprocessor |
|
) |
| |
Value:__attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Copies the memory address of symbol symbolName to devPtr.
Definition: hip_fp16_math_fwd.h:53