23 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H 24 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H 31 #ifndef __HIP_SIZE_OF_PAGE 32 #define __HIP_SIZE_OF_PAGE 64 36 #ifndef __HIP_NUM_PAGES 37 #define __HIP_NUM_PAGES (16 * 64 * 64) 40 #define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE) 42 #if __HCC__ || __HIP__ 44 extern __attribute__((weak)) __device__
char __hip_device_heap[__HIP_SIZE_OF_HEAP];
45 extern __attribute__((weak)) __device__
46 uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
48 extern "C" inline __device__
void* __hip_malloc(
size_t size) {
49 char* heap = (
char*)__hip_device_heap;
50 if (size > __HIP_SIZE_OF_HEAP) {
51 return (
void*)
nullptr;
53 uint32_t totalThreads =
54 hipBlockDim_x * hipGridDim_x * hipBlockDim_y
55 * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
56 uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
57 + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
58 + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
61 uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads;
62 uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads;
64 uint32_t stride = size / __HIP_SIZE_OF_PAGE;
65 uint32_t start = numHeapsPerWorkItem * currentWorkItem;
69 while (__hip_device_page_flag[k] > 0) {
73 for (uint32_t i = 0; i < stride - 1; i++) {
74 __hip_device_page_flag[i + start + k] = 1;
77 __hip_device_page_flag[start + stride - 1 + k] = 2;
79 void* ptr = (
void*)(heap
80 + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE);
85 extern "C" inline __device__
void* __hip_free(
void* ptr) {
90 uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap;
91 uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE;
93 while (__hip_device_page_flag[offsetPage] != 0) {
94 if (__hip_device_page_flag[offsetPage] == 2) {
95 __hip_device_page_flag[offsetPage] = 0;
99 __hip_device_page_flag[offsetPage] = 0;
109 #endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H