HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
25 
26 #include <cuda_runtime_api.h>
27 #include <cuda.h>
28 #include <cuda_profiler_api.h>
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 #ifdef __cplusplus
35 #define __dparm(x) = x
36 #else
37 #define __dparm(x)
38 #endif
39 
40 // Add Deprecated Support for CUDA Mapped HIP APIs
41 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
42 #define __HIP_DEPRECATED
43 #elif defined(_MSC_VER)
44 #define __HIP_DEPRECATED __declspec(deprecated)
45 #elif defined(__GNUC__)
46 #define __HIP_DEPRECATED __attribute__((deprecated))
47 #else
48 #define __HIP_DEPRECATED
49 #endif
50 
51 
52 // TODO -move to include/hip_runtime_api.h as a common implementation.
57 typedef enum hipMemcpyKind {
58  hipMemcpyHostToHost,
59  hipMemcpyHostToDevice,
60  hipMemcpyDeviceToHost,
61  hipMemcpyDeviceToDevice,
62  hipMemcpyDefault
63 } hipMemcpyKind;
64 
65 // hipDataType
66 #define hipDataType cudaDataType
67 #define HIP_R_16F CUDA_R_16F
68 #define HIP_R_32F CUDA_R_32F
69 #define HIP_R_64F CUDA_R_64F
70 #define HIP_C_16F CUDA_C_16F
71 #define HIP_C_32F CUDA_C_32F
72 #define HIP_C_64F CUDA_C_64F
73 
74 // hipLibraryPropertyType
75 #define hipLibraryPropertyType libraryPropertyType
76 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
77 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
78 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
79 
80 // hipTextureAddressMode
81 typedef enum cudaTextureAddressMode hipTextureAddressMode;
82 #define hipAddressModeWrap cudaAddressModeWrap
83 #define hipAddressModeClamp cudaAddressModeClamp
84 #define hipAddressModeMirror cudaAddressModeMirror
85 #define hipAddressModeBorder cudaAddressModeBorder
86 
87 // hipTextureFilterMode
88 typedef enum cudaTextureFilterMode hipTextureFilterMode;
89 #define hipFilterModePoint cudaFilterModePoint
90 #define hipFilterModeLinear cudaFilterModeLinear
91 
92 // hipTextureReadMode
93 typedef enum cudaTextureReadMode hipTextureReadMode;
94 #define hipReadModeElementType cudaReadModeElementType
95 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
96 
97 // hipChannelFormatKind
98 typedef enum cudaChannelFormatKind hipChannelFormatKind;
99 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
100 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
101 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
102 #define hipChannelFormatKindNone cudaChannelFormatKindNone
103 
104 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
105 #define hipBoundaryModeZero cudaBoundaryModeZero
106 #define hipBoundaryModeTrap cudaBoundaryModeTrap
107 #define hipBoundaryModeClamp cudaBoundaryModeClamp
108 
109 // hipFuncCache
110 #define hipFuncCachePreferNone cudaFuncCachePreferNone
111 #define hipFuncCachePreferShared cudaFuncCachePreferShared
112 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
113 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
114 
115 // hipResourceType
116 #define hipResourceType cudaResourceType
117 #define hipResourceTypeArray cudaResourceTypeArray
118 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
119 #define hipResourceTypeLinear cudaResourceTypeLinear
120 #define hipResourceTypePitch2D cudaResourceTypePitch2D
121 //
122 // hipErrorNoDevice.
123 
124 
126 #define hipEventDefault cudaEventDefault
127 #define hipEventBlockingSync cudaEventBlockingSync
128 #define hipEventDisableTiming cudaEventDisableTiming
129 #define hipEventInterprocess cudaEventInterprocess
130 #define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
131 #define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
132 
133 
134 #define hipHostMallocDefault cudaHostAllocDefault
135 #define hipHostMallocPortable cudaHostAllocPortable
136 #define hipHostMallocMapped cudaHostAllocMapped
137 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
138 #define hipHostMallocCoherent 0x0
139 #define hipHostMallocNonCoherent 0x0
140 
141 #define hipMemAttachGlobal cudaMemAttachGlobal
142 #define hipMemAttachHost cudaMemAttachHost
143 
144 #define hipHostRegisterDefault cudaHostRegisterDefault
145 #define hipHostRegisterPortable cudaHostRegisterPortable
146 #define hipHostRegisterMapped cudaHostRegisterMapped
147 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
148 
149 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
150 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
151 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
152 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
153 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
154 
155 #define hipOccupancyDefault cudaOccupancyDefault
156 
157 #define hipCooperativeLaunchMultiDeviceNoPreSync \
158  cudaCooperativeLaunchMultiDeviceNoPreSync
159 #define hipCooperativeLaunchMultiDeviceNoPostSync \
160  cudaCooperativeLaunchMultiDeviceNoPostSync
161 
162 
163 // enum CUjit_option redefines
164 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
165 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
166 #define hipJitOptionWallTime CU_JIT_WALL_TIME
167 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
168 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
169 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
170 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
171 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
172 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
173 #define hipJitOptionTarget CU_JIT_TARGET
174 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
175 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
176 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
177 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
178 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
179 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
180 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
181 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
182 
183 typedef cudaEvent_t hipEvent_t;
184 typedef cudaStream_t hipStream_t;
185 typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
186 typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
187 typedef enum cudaLimit hipLimit_t;
188 typedef enum cudaFuncAttribute hipFuncAttribute;
189 typedef enum cudaFuncCache hipFuncCache_t;
190 typedef CUcontext hipCtx_t;
191 typedef enum cudaSharedMemConfig hipSharedMemConfig;
192 typedef CUfunc_cache hipFuncCache;
193 typedef CUjit_option hipJitOption;
194 typedef CUdevice hipDevice_t;
195 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
196 typedef CUmodule hipModule_t;
197 typedef CUfunction hipFunction_t;
198 typedef CUdeviceptr hipDeviceptr_t;
199 typedef struct cudaArray hipArray;
200 typedef struct cudaArray* hipArray_t;
201 typedef struct cudaArray* hipArray_const_t;
202 typedef struct cudaFuncAttributes hipFuncAttributes;
203 typedef struct cudaLaunchParams hipLaunchParams;
204 #define hipFunction_attribute CUfunction_attribute
205 #define hip_Memcpy2D CUDA_MEMCPY2D
206 #define hipMemcpy3DParms cudaMemcpy3DParms
207 #define hipArrayDefault cudaArrayDefault
208 #define hipArrayLayered cudaArrayLayered
209 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
210 #define hipArrayCubemap cudaArrayCubemap
211 #define hipArrayTextureGather cudaArrayTextureGather
212 
213 typedef cudaTextureObject_t hipTextureObject_t;
214 typedef cudaSurfaceObject_t hipSurfaceObject_t;
215 #define hipTextureType1D cudaTextureType1D
216 #define hipTextureType1DLayered cudaTextureType1DLayered
217 #define hipTextureType2D cudaTextureType2D
218 #define hipTextureType2DLayered cudaTextureType2DLayered
219 #define hipTextureType3D cudaTextureType3D
220 #define hipDeviceMapHost cudaDeviceMapHost
221 
222 typedef struct cudaExtent hipExtent;
223 typedef struct cudaPitchedPtr hipPitchedPtr;
224 #define make_hipExtent make_cudaExtent
225 #define make_hipPos make_cudaPos
226 #define make_hipPitchedPtr make_cudaPitchedPtr
227 // Flags that can be used with hipStreamCreateWithFlags
228 #define hipStreamDefault cudaStreamDefault
229 #define hipStreamNonBlocking cudaStreamNonBlocking
230 
231 typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
232 typedef struct cudaResourceDesc hipResourceDesc;
233 typedef struct cudaTextureDesc hipTextureDesc;
234 typedef struct cudaResourceViewDesc hipResourceViewDesc;
235 // adding code for hipmemSharedConfig
236 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
237 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
238 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
239 
240 //Function Attributes
241 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
242 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
243 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
244 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
245 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
246 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
247 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
248 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
249 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
250 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
251 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
252 
253 #if CUDA_VERSION >= 9000
254 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
255 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
256 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
257 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
258 #endif // CUDA_VERSION >= 9000
259 
260 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
261  switch (cuError) {
262  case cudaSuccess:
263  return hipSuccess;
264  case cudaErrorProfilerDisabled:
265  return hipErrorProfilerDisabled;
266  case cudaErrorProfilerNotInitialized:
267  return hipErrorProfilerNotInitialized;
268  case cudaErrorProfilerAlreadyStarted:
269  return hipErrorProfilerAlreadyStarted;
270  case cudaErrorProfilerAlreadyStopped:
271  return hipErrorProfilerAlreadyStopped;
272  case cudaErrorInsufficientDriver:
273  return hipErrorInsufficientDriver;
274  case cudaErrorUnsupportedLimit:
275  return hipErrorUnsupportedLimit;
276  case cudaErrorPeerAccessUnsupported:
277  return hipErrorPeerAccessUnsupported;
278  case cudaErrorInvalidGraphicsContext:
279  return hipErrorInvalidGraphicsContext;
280  case cudaErrorSharedObjectSymbolNotFound:
281  return hipErrorSharedObjectSymbolNotFound;
282  case cudaErrorSharedObjectInitFailed:
283  return hipErrorSharedObjectInitFailed;
284  case cudaErrorOperatingSystem:
285  return hipErrorOperatingSystem;
286  case cudaErrorSetOnActiveProcess:
287  return hipErrorSetOnActiveProcess;
288  case cudaErrorIllegalAddress:
289  return hipErrorIllegalAddress;
290  case cudaErrorInvalidSymbol:
291  return hipErrorInvalidSymbol;
292  case cudaErrorMissingConfiguration:
293  return hipErrorMissingConfiguration;
294  case cudaErrorMemoryAllocation:
295  return hipErrorOutOfMemory;
296  case cudaErrorInitializationError:
297  return hipErrorNotInitialized;
298  case cudaErrorLaunchFailure:
299  return hipErrorLaunchFailure;
300  case cudaErrorCooperativeLaunchTooLarge:
302  case cudaErrorPriorLaunchFailure:
303  return hipErrorPriorLaunchFailure;
304  case cudaErrorLaunchOutOfResources:
306  case cudaErrorInvalidDeviceFunction:
307  return hipErrorInvalidDeviceFunction;
308  case cudaErrorInvalidConfiguration:
309  return hipErrorInvalidConfiguration;
310  case cudaErrorInvalidDevice:
311  return hipErrorInvalidDevice;
312  case cudaErrorInvalidValue:
313  return hipErrorInvalidValue;
314  case cudaErrorInvalidDevicePointer:
316  case cudaErrorInvalidMemcpyDirection:
318  case cudaErrorInvalidResourceHandle:
319  return hipErrorInvalidHandle;
320  case cudaErrorNotReady:
321  return hipErrorNotReady;
322  case cudaErrorNoDevice:
323  return hipErrorNoDevice;
324  case cudaErrorPeerAccessAlreadyEnabled:
326  case cudaErrorPeerAccessNotEnabled:
328  case cudaErrorHostMemoryAlreadyRegistered:
330  case cudaErrorHostMemoryNotRegistered:
332  case cudaErrorMapBufferObjectFailed:
333  return hipErrorMapFailed;
334  case cudaErrorAssert:
335  return hipErrorAssert;
336  case cudaErrorNotSupported:
337  return hipErrorNotSupported;
338  case cudaErrorCudartUnloading:
339  return hipErrorDeinitialized;
340  case cudaErrorInvalidKernelImage:
341  return hipErrorInvalidImage;
342  case cudaErrorUnmapBufferObjectFailed:
343  return hipErrorUnmapFailed;
344  case cudaErrorNoKernelImageForDevice:
345  return hipErrorNoBinaryForGpu;
346  case cudaErrorECCUncorrectable:
347  return hipErrorECCNotCorrectable;
348  case cudaErrorDeviceAlreadyInUse:
349  return hipErrorContextAlreadyInUse;
350  case cudaErrorInvalidPtx:
352  case cudaErrorLaunchTimeout:
353  return hipErrorLaunchTimeOut;
354 #if CUDA_VERSION >= 10010
355  case cudaErrorInvalidSource:
356  return hipErrorInvalidSource;
357  case cudaErrorFileNotFound:
358  return hipErrorFileNotFound;
359  case cudaErrorSymbolNotFound:
360  return hipErrorNotFound;
361  case cudaErrorArrayIsMapped:
362  return hipErrorArrayIsMapped;
363  case cudaErrorNotMappedAsPointer:
364  return hipErrorNotMappedAsPointer;
365  case cudaErrorNotMappedAsArray:
366  return hipErrorNotMappedAsArray;
367  case cudaErrorNotMapped:
368  return hipErrorNotMapped;
369  case cudaErrorAlreadyAcquired:
370  return hipErrorAlreadyAcquired;
371  case cudaErrorAlreadyMapped:
372  return hipErrorAlreadyMapped;
373 #endif
374 #if CUDA_VERSION >= 10020
375  case cudaErrorDeviceUninitialized:
376  return hipErrorInvalidContext;
377 #endif
378  case cudaErrorUnknown:
379  default:
380  return hipErrorUnknown; // Note - translated error.
381  }
382 }
383 
384 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
385  switch (cuError) {
386  case CUDA_SUCCESS:
387  return hipSuccess;
388  case CUDA_ERROR_OUT_OF_MEMORY:
389  return hipErrorOutOfMemory;
390  case CUDA_ERROR_INVALID_VALUE:
391  return hipErrorInvalidValue;
392  case CUDA_ERROR_INVALID_DEVICE:
393  return hipErrorInvalidDevice;
394  case CUDA_ERROR_DEINITIALIZED:
395  return hipErrorDeinitialized;
396  case CUDA_ERROR_NO_DEVICE:
397  return hipErrorNoDevice;
398  case CUDA_ERROR_INVALID_CONTEXT:
399  return hipErrorInvalidContext;
400  case CUDA_ERROR_NOT_INITIALIZED:
401  return hipErrorNotInitialized;
402  case CUDA_ERROR_INVALID_HANDLE:
403  return hipErrorInvalidHandle;
404  case CUDA_ERROR_MAP_FAILED:
405  return hipErrorMapFailed;
406  case CUDA_ERROR_PROFILER_DISABLED:
407  return hipErrorProfilerDisabled;
408  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
409  return hipErrorProfilerNotInitialized;
410  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
411  return hipErrorProfilerAlreadyStarted;
412  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
413  return hipErrorProfilerAlreadyStopped;
414  case CUDA_ERROR_INVALID_IMAGE:
415  return hipErrorInvalidImage;
416  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
417  return hipErrorContextAlreadyCurrent;
418  case CUDA_ERROR_UNMAP_FAILED:
419  return hipErrorUnmapFailed;
420  case CUDA_ERROR_ARRAY_IS_MAPPED:
421  return hipErrorArrayIsMapped;
422  case CUDA_ERROR_ALREADY_MAPPED:
423  return hipErrorAlreadyMapped;
424  case CUDA_ERROR_NO_BINARY_FOR_GPU:
425  return hipErrorNoBinaryForGpu;
426  case CUDA_ERROR_ALREADY_ACQUIRED:
427  return hipErrorAlreadyAcquired;
428  case CUDA_ERROR_NOT_MAPPED:
429  return hipErrorNotMapped;
430  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
431  return hipErrorNotMappedAsArray;
432  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
433  return hipErrorNotMappedAsPointer;
434  case CUDA_ERROR_ECC_UNCORRECTABLE:
435  return hipErrorECCNotCorrectable;
436  case CUDA_ERROR_UNSUPPORTED_LIMIT:
437  return hipErrorUnsupportedLimit;
438  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
439  return hipErrorContextAlreadyInUse;
440  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
441  return hipErrorPeerAccessUnsupported;
442  case CUDA_ERROR_INVALID_PTX:
444  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
445  return hipErrorInvalidGraphicsContext;
446  case CUDA_ERROR_INVALID_SOURCE:
447  return hipErrorInvalidSource;
448  case CUDA_ERROR_FILE_NOT_FOUND:
449  return hipErrorFileNotFound;
450  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
451  return hipErrorSharedObjectSymbolNotFound;
452  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
453  return hipErrorSharedObjectInitFailed;
454  case CUDA_ERROR_OPERATING_SYSTEM:
455  return hipErrorOperatingSystem;
456  case CUDA_ERROR_NOT_FOUND:
457  return hipErrorNotFound;
458  case CUDA_ERROR_NOT_READY:
459  return hipErrorNotReady;
460  case CUDA_ERROR_ILLEGAL_ADDRESS:
461  return hipErrorIllegalAddress;
462  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
464  case CUDA_ERROR_LAUNCH_TIMEOUT:
465  return hipErrorLaunchTimeOut;
466  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
468  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
470  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
471  return hipErrorSetOnActiveProcess;
472  case CUDA_ERROR_ASSERT:
473  return hipErrorAssert;
474  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
476  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
478  case CUDA_ERROR_LAUNCH_FAILED:
479  return hipErrorLaunchFailure;
480  case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
482  case CUDA_ERROR_NOT_SUPPORTED:
483  return hipErrorNotSupported;
484  case CUDA_ERROR_UNKNOWN:
485  default:
486  return hipErrorUnknown; // Note - translated error.
487  }
488 }
489 
490 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
491  switch (hError) {
492  case hipSuccess:
493  return cudaSuccess;
494  case hipErrorOutOfMemory:
495  return cudaErrorMemoryAllocation;
496  case hipErrorProfilerDisabled:
497  return cudaErrorProfilerDisabled;
498  case hipErrorProfilerNotInitialized:
499  return cudaErrorProfilerNotInitialized;
500  case hipErrorProfilerAlreadyStarted:
501  return cudaErrorProfilerAlreadyStarted;
502  case hipErrorProfilerAlreadyStopped:
503  return cudaErrorProfilerAlreadyStopped;
504  case hipErrorInvalidConfiguration:
505  return cudaErrorInvalidConfiguration;
507  return cudaErrorLaunchOutOfResources;
509  return cudaErrorInvalidValue;
510  case hipErrorInvalidHandle:
511  return cudaErrorInvalidResourceHandle;
513  return cudaErrorInvalidDevice;
515  return cudaErrorInvalidMemcpyDirection;
517  return cudaErrorInvalidDevicePointer;
518  case hipErrorNotInitialized:
519  return cudaErrorInitializationError;
520  case hipErrorNoDevice:
521  return cudaErrorNoDevice;
522  case hipErrorNotReady:
523  return cudaErrorNotReady;
525  return cudaErrorPeerAccessNotEnabled;
527  return cudaErrorPeerAccessAlreadyEnabled;
529  return cudaErrorHostMemoryAlreadyRegistered;
531  return cudaErrorHostMemoryNotRegistered;
532  case hipErrorDeinitialized:
533  return cudaErrorCudartUnloading;
534  case hipErrorInvalidSymbol:
535  return cudaErrorInvalidSymbol;
536  case hipErrorInsufficientDriver:
537  return cudaErrorInsufficientDriver;
538  case hipErrorMissingConfiguration:
539  return cudaErrorMissingConfiguration;
540  case hipErrorPriorLaunchFailure:
541  return cudaErrorPriorLaunchFailure;
542  case hipErrorInvalidDeviceFunction:
543  return cudaErrorInvalidDeviceFunction;
544  case hipErrorInvalidImage:
545  return cudaErrorInvalidKernelImage;
547 #if CUDA_VERSION >= 10020
548  return cudaErrorDeviceUninitialized;
549 #else
550  return cudaErrorUnknown;
551 #endif
552  case hipErrorMapFailed:
553  return cudaErrorMapBufferObjectFailed;
554  case hipErrorUnmapFailed:
555  return cudaErrorUnmapBufferObjectFailed;
556  case hipErrorArrayIsMapped:
557 #if CUDA_VERSION >= 10010
558  return cudaErrorArrayIsMapped;
559 #else
560  return cudaErrorUnknown;
561 #endif
562  case hipErrorAlreadyMapped:
563 #if CUDA_VERSION >= 10010
564  return cudaErrorAlreadyMapped;
565 #else
566  return cudaErrorUnknown;
567 #endif
568  case hipErrorNoBinaryForGpu:
569  return cudaErrorNoKernelImageForDevice;
570  case hipErrorAlreadyAcquired:
571 #if CUDA_VERSION >= 10010
572  return cudaErrorAlreadyAcquired;
573 #else
574  return cudaErrorUnknown;
575 #endif
576  case hipErrorNotMapped:
577 #if CUDA_VERSION >= 10010
578  return cudaErrorNotMapped;
579 #else
580  return cudaErrorUnknown;
581 #endif
582  case hipErrorNotMappedAsArray:
583 #if CUDA_VERSION >= 10010
584  return cudaErrorNotMappedAsArray;
585 #else
586  return cudaErrorUnknown;
587 #endif
588  case hipErrorNotMappedAsPointer:
589 #if CUDA_VERSION >= 10010
590  return cudaErrorNotMappedAsPointer;
591 #else
592  return cudaErrorUnknown;
593 #endif
594  case hipErrorECCNotCorrectable:
595  return cudaErrorECCUncorrectable;
596  case hipErrorUnsupportedLimit:
597  return cudaErrorUnsupportedLimit;
598  case hipErrorContextAlreadyInUse:
599  return cudaErrorDeviceAlreadyInUse;
600  case hipErrorPeerAccessUnsupported:
601  return cudaErrorPeerAccessUnsupported;
603  return cudaErrorInvalidPtx;
604  case hipErrorInvalidGraphicsContext:
605  return cudaErrorInvalidGraphicsContext;
606  case hipErrorInvalidSource:
607 #if CUDA_VERSION >= 10010
608  return cudaErrorInvalidSource;
609 #else
610  return cudaErrorUnknown;
611 #endif
612  case hipErrorFileNotFound:
613 #if CUDA_VERSION >= 10010
614  return cudaErrorFileNotFound;
615 #else
616  return cudaErrorUnknown;
617 #endif
618  case hipErrorSharedObjectSymbolNotFound:
619  return cudaErrorSharedObjectSymbolNotFound;
620  case hipErrorSharedObjectInitFailed:
621  return cudaErrorSharedObjectInitFailed;
622  case hipErrorOperatingSystem:
623  return cudaErrorOperatingSystem;
624  case hipErrorNotFound:
625 #if CUDA_VERSION >= 10010
626  return cudaErrorSymbolNotFound;
627 #else
628  return cudaErrorUnknown;
629 #endif
630  case hipErrorIllegalAddress:
631  return cudaErrorIllegalAddress;
632  case hipErrorLaunchTimeOut:
633  return cudaErrorLaunchTimeout;
634  case hipErrorSetOnActiveProcess:
635  return cudaErrorSetOnActiveProcess;
637  return cudaErrorLaunchFailure;
639  return cudaErrorCooperativeLaunchTooLarge;
641  return cudaErrorNotSupported;
642  // HSA: does not exist in CUDA
644  // HSA: does not exist in CUDA
646  case hipErrorUnknown:
647  case hipErrorTbd:
648  default:
649  return cudaErrorUnknown; // Note - translated error.
650  }
651 }
652 
653 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
654  switch (kind) {
655  case hipMemcpyHostToHost:
656  return cudaMemcpyHostToHost;
657  case hipMemcpyHostToDevice:
658  return cudaMemcpyHostToDevice;
659  case hipMemcpyDeviceToHost:
660  return cudaMemcpyDeviceToHost;
661  case hipMemcpyDeviceToDevice:
662  return cudaMemcpyDeviceToDevice;
663  default:
664  return cudaMemcpyDefault;
665  }
666 }
667 
668 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
669  hipTextureAddressMode kind) {
670  switch (kind) {
671  case hipAddressModeWrap:
672  return cudaAddressModeWrap;
673  case hipAddressModeClamp:
674  return cudaAddressModeClamp;
675  case hipAddressModeMirror:
676  return cudaAddressModeMirror;
677  case hipAddressModeBorder:
678  return cudaAddressModeBorder;
679  default:
680  return cudaAddressModeWrap;
681  }
682 }
683 
684 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
685  hipTextureFilterMode kind) {
686  switch (kind) {
687  case hipFilterModePoint:
688  return cudaFilterModePoint;
689  case hipFilterModeLinear:
690  return cudaFilterModeLinear;
691  default:
692  return cudaFilterModePoint;
693  }
694 }
695 
696 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
697  switch (kind) {
698  case hipReadModeElementType:
699  return cudaReadModeElementType;
700  case hipReadModeNormalizedFloat:
701  return cudaReadModeNormalizedFloat;
702  default:
703  return cudaReadModeElementType;
704  }
705 }
706 
707 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
708  hipChannelFormatKind kind) {
709  switch (kind) {
710  case hipChannelFormatKindSigned:
711  return cudaChannelFormatKindSigned;
712  case hipChannelFormatKindUnsigned:
713  return cudaChannelFormatKindUnsigned;
714  case hipChannelFormatKindFloat:
715  return cudaChannelFormatKindFloat;
716  case hipChannelFormatKindNone:
717  return cudaChannelFormatKindNone;
718  default:
719  return cudaChannelFormatKindNone;
720  }
721 }
722 
726 #define HIPRT_CB CUDART_CB
727 typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
728 inline static hipError_t hipInit(unsigned int flags) {
729  return hipCUResultTohipError(cuInit(flags));
730 }
731 
732 inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
733 
734 inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
735 
736 inline static hipError_t hipPeekAtLastError() {
737  return hipCUDAErrorTohipError(cudaPeekAtLastError());
738 }
739 
740 inline static hipError_t hipMalloc(void** ptr, size_t size) {
741  return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
742 }
743 
744 inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
745  return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
746 }
747 
748 inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
749  return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
750 }
751 
752 inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
753  return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
754 }
755 
756 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
757 
758 inline static hipError_t hipMallocHost(void** ptr, size_t size)
759  __attribute__((deprecated("use hipHostMalloc instead")));
760 inline static hipError_t hipMallocHost(void** ptr, size_t size) {
761  return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
762 }
763 
764 inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
765  __attribute__((deprecated("use hipHostMalloc instead")));
766 inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
767  return hipCUResultTohipError(cuMemAllocHost(ptr, size));
768 }
769 
770 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
771  __attribute__((deprecated("use hipHostMalloc instead")));
772 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
773  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
774 }
775 
776 inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
777  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
778 }
779 
780 inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
781  return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
782 }
783 
784 inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
785  size_t width, size_t height,
786  unsigned int flags __dparm(hipArrayDefault)) {
787  return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
788 }
789 
790 inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
791  hipExtent extent, unsigned int flags) {
792  return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
793 }
794 
795 inline static hipError_t hipFreeArray(hipArray* array) {
796  return hipCUDAErrorTohipError(cudaFreeArray(array));
797 }
798 
799 inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
800  return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
801 }
802 
803 inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
804  return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
805 }
806 
807 inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
808  return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
809 }
810 
811 inline static hipError_t hipHostUnregister(void* ptr) {
812  return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
813 }
814 
815 inline static hipError_t hipFreeHost(void* ptr)
816  __attribute__((deprecated("use hipHostFree instead")));
817 inline static hipError_t hipFreeHost(void* ptr) {
818  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
819 }
820 
821 inline static hipError_t hipHostFree(void* ptr) {
822  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
823 }
824 
825 inline static hipError_t hipSetDevice(int device) {
826  return hipCUDAErrorTohipError(cudaSetDevice(device));
827 }
828 
829 inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
830  struct cudaDeviceProp cdprop;
831  memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
832  cdprop.major = prop->major;
833  cdprop.minor = prop->minor;
834  cdprop.totalGlobalMem = prop->totalGlobalMem;
835  cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
836  cdprop.regsPerBlock = prop->regsPerBlock;
837  cdprop.warpSize = prop->warpSize;
838  cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
839  cdprop.clockRate = prop->clockRate;
840  cdprop.totalConstMem = prop->totalConstMem;
841  cdprop.multiProcessorCount = prop->multiProcessorCount;
842  cdprop.l2CacheSize = prop->l2CacheSize;
843  cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
844  cdprop.computeMode = prop->computeMode;
845  cdprop.canMapHostMemory = prop->canMapHostMemory;
846  cdprop.memoryClockRate = prop->memoryClockRate;
847  cdprop.memoryBusWidth = prop->memoryBusWidth;
848  return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
849 }
850 
851 inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
852  return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
853 }
854 
855 inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
856  return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
857 }
858 
859 inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
860  return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
861 }
862 
863 inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
864  hipStream_t stream) {
865  return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
866 }
867 
868 inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
869  hipStream_t stream) {
870  return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
871 }
872 
873 inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
874  hipStream_t stream) {
875  return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
876 }
877 
878 inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
879  hipMemcpyKind copyKind) {
880  return hipCUDAErrorTohipError(
881  cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
882 }
883 
884 
885 inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
886  size_t sizeBytes, hipMemcpyKind copyKind,
887  hipStream_t stream) {
888  cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
889  hipMemcpyKindToCudaMemcpyKind(copyKind),
890  stream);
891 
892  if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
893 
894  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
895 }
896 
897 inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
898  hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
899  return hipCUDAErrorTohipError(
900  cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
901 }
902 
903 inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
904  size_t offset __dparm(0),
905  hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
906  return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
907  hipMemcpyKindToCudaMemcpyKind(copyType)));
908 }
909 
910 inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
911  size_t sizeBytes, size_t offset,
912  hipMemcpyKind copyType,
913  hipStream_t stream __dparm(0)) {
914  return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
915  symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
916 }
917 
918 inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
919  size_t offset __dparm(0),
920  hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
921  return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
922  hipMemcpyKindToCudaMemcpyKind(kind)));
923 }
924 
925 inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
926  size_t sizeBytes, size_t offset,
927  hipMemcpyKind kind,
928  hipStream_t stream __dparm(0)) {
929  return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
930  dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
931 }
932 
933 inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
934  return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
935 }
936 
937 inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
938  return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
939 }
940 
941 inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
942  size_t width, size_t height, hipMemcpyKind kind) {
943  return hipCUDAErrorTohipError(
944  cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
945 }
946 
947 inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
948  return hipCUResultTohipError(cuMemcpy2D(pCopy));
949 }
950 
951 inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
952  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
953 }
954 
955 inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
956 {
957  return hipCUDAErrorTohipError(cudaMemcpy3D(p));
958 }
959 
960 inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
961 {
962  return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
963 }
964 
965 inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
966  size_t width, size_t height, hipMemcpyKind kind,
967  hipStream_t stream) {
968  return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
969  hipMemcpyKindToCudaMemcpyKind(kind), stream));
970 }
971 
972 inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
973  const void* src, size_t spitch, size_t width,
974  size_t height, hipMemcpyKind kind) {
975  return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
976  height, hipMemcpyKindToCudaMemcpyKind(kind)));
977 }
978 
979 __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
980  size_t hOffset, const void* src,
981  size_t count, hipMemcpyKind kind) {
982  return hipCUDAErrorTohipError(
983  cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
984 }
985 
986 __HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
987  size_t wOffset, size_t hOffset,
988  size_t count, hipMemcpyKind kind) {
989  return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
990  hipMemcpyKindToCudaMemcpyKind(kind)));
991 }
992 
993 inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
994  size_t count) {
995  return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
996 }
997 
998 inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
999  size_t count) {
1000  return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1001 }
1002 
1003 inline static hipError_t hipDeviceSynchronize() {
1004  return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1005 }
1006 
1007 inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
1008  return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1009 }
1010 
1011 inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
1012  return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1013 }
1014 
1015 inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
1016  return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1017 }
1018 
1019 inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
1020  return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1021 }
1022 
1023 inline static const char* hipGetErrorString(hipError_t error) {
1024  return cudaGetErrorString(hipErrorToCudaError(error));
1025 }
1026 
1027 inline static const char* hipGetErrorName(hipError_t error) {
1028  return cudaGetErrorName(hipErrorToCudaError(error));
1029 }
1030 
1031 inline static hipError_t hipGetDeviceCount(int* count) {
1032  return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1033 }
1034 
1035 inline static hipError_t hipGetDevice(int* device) {
1036  return hipCUDAErrorTohipError(cudaGetDevice(device));
1037 }
1038 
1039 inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
1040  return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1041 }
1042 
1043 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1044  return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1045 }
1046 
1047 inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
1048  return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1049 }
1050 
1051 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1052  return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1053 }
1054 
1055 inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
1056  unsigned int flags) {
1057  return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1058 }
1059 
1060 inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
1061  return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1062 }
1063 
1064 inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
1065  return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1066 }
1067 
1068 inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
1069  hipStream_t stream __dparm(0)) {
1070  return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1071 }
1072 
1073 inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
1074  hipStream_t stream __dparm(0)) {
1075  return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1076 }
1077 
1078 inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
1079  return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1080 }
1081 
1082 inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
1083  hipStream_t stream __dparm(0)) {
1084  return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1085 }
1086 
1087 inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
1088  return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1089 }
1090 
1091 inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
1092  hipStream_t stream __dparm(0)) {
1093  return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1094 }
1095 
1096 inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
1097  return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1098 }
1099 
1100 inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
1101  return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1102 }
1103 
1104 inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
1105  return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1106 }
1107 
1108 inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
1109  return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1110 }
1111 
1112 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
1113  struct cudaDeviceProp cdprop;
1114  cudaError_t cerror;
1115  cerror = cudaGetDeviceProperties(&cdprop, device);
1116 
1117  strncpy(p_prop->name, cdprop.name, 256);
1118  p_prop->totalGlobalMem = cdprop.totalGlobalMem;
1119  p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
1120  p_prop->regsPerBlock = cdprop.regsPerBlock;
1121  p_prop->warpSize = cdprop.warpSize;
1122  p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
1123  for (int i = 0; i < 3; i++) {
1124  p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
1125  p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
1126  }
1127  p_prop->clockRate = cdprop.clockRate;
1128  p_prop->memoryClockRate = cdprop.memoryClockRate;
1129  p_prop->memoryBusWidth = cdprop.memoryBusWidth;
1130  p_prop->totalConstMem = cdprop.totalConstMem;
1131  p_prop->major = cdprop.major;
1132  p_prop->minor = cdprop.minor;
1133  p_prop->multiProcessorCount = cdprop.multiProcessorCount;
1134  p_prop->l2CacheSize = cdprop.l2CacheSize;
1135  p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
1136  p_prop->computeMode = cdprop.computeMode;
1137  p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
1138 
1139  int ccVers = p_prop->major * 100 + p_prop->minor * 10;
1140  p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
1141  p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
1142  p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
1143  p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
1144  p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
1145  p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
1146  p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
1147  p_prop->arch.hasDoubles = (ccVers >= 130);
1148  p_prop->arch.hasWarpVote = (ccVers >= 120);
1149  p_prop->arch.hasWarpBallot = (ccVers >= 200);
1150  p_prop->arch.hasWarpShuffle = (ccVers >= 300);
1151  p_prop->arch.hasFunnelShift = (ccVers >= 350);
1152  p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
1153  p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
1154  p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
1155  p_prop->arch.has3dGrid = (ccVers >= 200);
1156  p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
1157 
1158  p_prop->concurrentKernels = cdprop.concurrentKernels;
1159  p_prop->pciDomainID = cdprop.pciDomainID;
1160  p_prop->pciBusID = cdprop.pciBusID;
1161  p_prop->pciDeviceID = cdprop.pciDeviceID;
1162  p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
1163  p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
1164  p_prop->canMapHostMemory = cdprop.canMapHostMemory;
1165  p_prop->gcnArch = 0; // Not a GCN arch
1166  p_prop->integrated = cdprop.integrated;
1167  p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
1168  p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
1173 
1174  p_prop->maxTexture1D = cdprop.maxTexture1D;
1175  p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
1176  p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
1177  p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
1178  p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
1179  p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
1180 
1181  p_prop->memPitch = cdprop.memPitch;
1182  p_prop->textureAlignment = cdprop.textureAlignment;
1183  p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
1184  p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
1185  p_prop->ECCEnabled = cdprop.ECCEnabled;
1186  p_prop->tccDriver = cdprop.tccDriver;
1187 
1188  return hipCUDAErrorTohipError(cerror);
1189 }
1190 
1191 inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
1192  enum cudaDeviceAttr cdattr;
1193  cudaError_t cerror;
1194 
1195  switch (attr) {
1197  cdattr = cudaDevAttrMaxThreadsPerBlock;
1198  break;
1200  cdattr = cudaDevAttrMaxBlockDimX;
1201  break;
1203  cdattr = cudaDevAttrMaxBlockDimY;
1204  break;
1206  cdattr = cudaDevAttrMaxBlockDimZ;
1207  break;
1209  cdattr = cudaDevAttrMaxGridDimX;
1210  break;
1212  cdattr = cudaDevAttrMaxGridDimY;
1213  break;
1215  cdattr = cudaDevAttrMaxGridDimZ;
1216  break;
1218  cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1219  break;
1221  cdattr = cudaDevAttrTotalConstantMemory;
1222  break;
1224  cdattr = cudaDevAttrWarpSize;
1225  break;
1227  cdattr = cudaDevAttrMaxRegistersPerBlock;
1228  break;
1230  cdattr = cudaDevAttrClockRate;
1231  break;
1233  cdattr = cudaDevAttrMemoryClockRate;
1234  break;
1236  cdattr = cudaDevAttrGlobalMemoryBusWidth;
1237  break;
1239  cdattr = cudaDevAttrMultiProcessorCount;
1240  break;
1242  cdattr = cudaDevAttrComputeMode;
1243  break;
1245  cdattr = cudaDevAttrL2CacheSize;
1246  break;
1248  cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1249  break;
1251  cdattr = cudaDevAttrComputeCapabilityMajor;
1252  break;
1254  cdattr = cudaDevAttrComputeCapabilityMinor;
1255  break;
1257  cdattr = cudaDevAttrConcurrentKernels;
1258  break;
1260  cdattr = cudaDevAttrPciBusId;
1261  break;
1263  cdattr = cudaDevAttrPciDeviceId;
1264  break;
1266  cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1267  break;
1269  cdattr = cudaDevAttrIsMultiGpuBoard;
1270  break;
1272  cdattr = cudaDevAttrIntegrated;
1273  break;
1275  cdattr = cudaDevAttrMaxTexture1DWidth;
1276  break;
1278  cdattr = cudaDevAttrMaxTexture2DWidth;
1279  break;
1281  cdattr = cudaDevAttrMaxTexture2DHeight;
1282  break;
1284  cdattr = cudaDevAttrMaxTexture3DWidth;
1285  break;
1287  cdattr = cudaDevAttrMaxTexture3DHeight;
1288  break;
1290  cdattr = cudaDevAttrMaxTexture3DDepth;
1291  break;
1293  cdattr = cudaDevAttrMaxPitch;
1294  break;
1296  cdattr = cudaDevAttrTextureAlignment;
1297  break;
1299  cdattr = cudaDevAttrTexturePitchAlignment;
1300  break;
1302  cdattr = cudaDevAttrKernelExecTimeout;
1303  break;
1305  cdattr = cudaDevAttrCanMapHostMemory;
1306  break;
1308  cdattr = cudaDevAttrEccEnabled;
1309  break;
1311  cdattr = cudaDevAttrCooperativeLaunch;
1312  break;
1314  cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1315  break;
1316  default:
1317  return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1318  }
1319 
1320  cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1321 
1322  return hipCUDAErrorTohipError(cerror);
1323 }
1324 
1325 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1326  const void* func,
1327  int blockSize,
1328  size_t dynamicSMemSize) {
1329  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1330  blockSize, dynamicSMemSize));
1331 }
1332 
1333 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1334  const void* func,
1335  int blockSize,
1336  size_t dynamicSMemSize,
1337  unsigned int flags) {
1338  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1339  blockSize, dynamicSMemSize, flags));
1340 }
1341 
1342 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1343  hipFunction_t f,
1344  int blockSize,
1345  size_t dynamicSMemSize ){
1346  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1347  blockSize, dynamicSMemSize));
1348 }
1349 
1350 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1351  hipFunction_t f,
1352  int blockSize,
1353  size_t dynamicSMemSize,
1354  unsigned int flags ) {
1355  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1356  blockSize, dynamicSMemSize, flags));
1357 }
1358 
1359 //TODO - Match CUoccupancyB2DSize
1360 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
1361  hipFunction_t f, size_t dynSharedMemPerBlk,
1362  int blockSizeLimit){
1363  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1364  dynSharedMemPerBlk, blockSizeLimit));
1365 }
1366 
1367 //TODO - Match CUoccupancyB2DSize
1368 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
1369  hipFunction_t f, size_t dynSharedMemPerBlk,
1370  int blockSizeLimit, unsigned int flags){
1371  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1372  dynSharedMemPerBlk, blockSizeLimit, flags));
1373 }
1374 
1375 inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
1376  struct cudaPointerAttributes cPA;
1377  hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1378  if (err == hipSuccess) {
1379 #if (CUDART_VERSION >= 11000)
1380  auto memType = cPA.type;
1381 #else
1382  unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
1383 #endif
1384  switch (memType) {
1385  case cudaMemoryTypeDevice:
1386  attributes->memoryType = hipMemoryTypeDevice;
1387  break;
1388  case cudaMemoryTypeHost:
1389  attributes->memoryType = hipMemoryTypeHost;
1390  break;
1391  default:
1392  return hipErrorUnknown;
1393  }
1394  attributes->device = cPA.device;
1395  attributes->devicePointer = cPA.devicePointer;
1396  attributes->hostPointer = cPA.hostPointer;
1397  attributes->isManaged = 0;
1398  attributes->allocationFlags = 0;
1399  }
1400  return err;
1401 }
1402 
1403 inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
1404  return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1405 }
1406 
1407 inline static hipError_t hipEventCreate(hipEvent_t* event) {
1408  return hipCUDAErrorTohipError(cudaEventCreate(event));
1409 }
1410 
1411 inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1412  return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1413 }
1414 
1415 inline static hipError_t hipEventSynchronize(hipEvent_t event) {
1416  return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1417 }
1418 
1419 inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
1420  return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1421 }
1422 
1423 inline static hipError_t hipEventDestroy(hipEvent_t event) {
1424  return hipCUDAErrorTohipError(cudaEventDestroy(event));
1425 }
1426 
1427 inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
1428  return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1429 }
1430 
1431 inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
1432  return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1433 }
1434 
1435 inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
1436  return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1437 }
1438 
1439 inline static hipError_t hipStreamCreate(hipStream_t* stream) {
1440  return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1441 }
1442 
1443 inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
1444  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1445 }
1446 
1447 inline static hipError_t hipStreamDestroy(hipStream_t stream) {
1448  return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1449 }
1450 
1451 inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
1452  return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1453 }
1454 
1455 inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
1456  return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1457 }
1458 
1459 inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
1460  unsigned int flags) {
1461  return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1462 }
1463 
1464 inline static hipError_t hipStreamQuery(hipStream_t stream) {
1465  return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1466 }
1467 
1468 inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
1469  void* userData, unsigned int flags) {
1470  return hipCUDAErrorTohipError(
1471  cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1472 }
1473 
1474 inline static hipError_t hipDriverGetVersion(int* driverVersion) {
1475  cudaError_t err = cudaDriverGetVersion(driverVersion);
1476 
1477  // Override driver version to match version reported on HCC side.
1478  *driverVersion = 4;
1479 
1480  return hipCUDAErrorTohipError(err);
1481 }
1482 
1483 inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
1484  return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1485 }
1486 
1487 inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
1488  return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1489 }
1490 
1491 inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
1492  return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1493 }
1494 
1495 inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
1496  return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1497 }
1498 
1499 inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
1500  return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1501 }
1502 
1503 inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
1504  return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1505 }
1506 
1507 inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
1508  int* active) {
1509  return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1510 }
1511 
1512 inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
1513  return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1514 }
1515 
1516 inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
1517  return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1518 }
1519 
1520 inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
1521  return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1522 }
1523 
1524 inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
1525  return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1526 }
1527 
1528 inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
1529  hipDeviceptr_t dptr) {
1530  return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1531 }
1532 
1533 inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
1534  size_t count) {
1535  return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1536 }
1537 
1538 inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
1539  int srcDevice, size_t count,
1540  hipStream_t stream __dparm(0)) {
1541  return hipCUDAErrorTohipError(
1542  cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1543 }
1544 
1545 // Profile APIs:
1546 inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
1547 
1548 inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
1549 
1550 inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
1551  return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1552 }
1553 
1554 inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
1555  return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1556 }
1557 
1558 inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
1559  return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1560 }
1561 
1562 inline static hipError_t hipEventQuery(hipEvent_t event) {
1563  return hipCUDAErrorTohipError(cudaEventQuery(event));
1564 }
1565 
1566 inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
1567  return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1568 }
1569 
1570 inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
1571  return hipCUResultTohipError(cuCtxDestroy(ctx));
1572 }
1573 
1574 inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
1575  return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1576 }
1577 
1578 inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
1579  return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1580 }
1581 
1582 inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
1583  return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1584 }
1585 
1586 inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
1587  return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1588 }
1589 
1590 inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
1591  return hipCUResultTohipError(cuCtxGetDevice(device));
1592 }
1593 
1594 inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
1595  return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
1596 }
1597 
1598 inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
1599  return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1600 }
1601 
1602 inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
1603  return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1604 }
1605 
1606 inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
1607  return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1608 }
1609 
1610 inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
1611  return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1612 }
1613 
1614 inline static hipError_t hipCtxSynchronize(void) {
1615  return hipCUResultTohipError(cuCtxSynchronize());
1616 }
1617 
1618 inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
1619  return hipCUResultTohipError(cuCtxGetFlags(flags));
1620 }
1621 
1622 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1623  return hipCUResultTohipError(cuCtxDetach(ctx));
1624 }
1625 
1626 inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
1627  return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1628 }
1629 
1630 inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
1631  return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1632 }
1633 
1634 inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
1635  return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1636 }
1637 
1638 inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
1639  int srcDevice, int dstDevice) {
1640  return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1641 }
1642 
1643 inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
1644  return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1645 }
1646 
1647 inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
1648  return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1649 }
1650 
1651 inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
1652  return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1653 }
1654 
1655 inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
1656  return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1657 }
1658 
1659 inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
1660  return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1661 }
1662 
1663 inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
1664  return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1665 }
1666 
1667 inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
1668  return hipCUResultTohipError(cuModuleLoad(module, fname));
1669 }
1670 
1671 inline static hipError_t hipModuleUnload(hipModule_t hmod) {
1672  return hipCUResultTohipError(cuModuleUnload(hmod));
1673 }
1674 
1675 inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
1676  const char* kname) {
1677  return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
1678 }
1679 
1680 inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
1681  return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1682 }
1683 
1684 inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1685  return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1686 }
1687 
1688 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
1689  const char* name) {
1690  return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1691 }
1692 
1693 inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
1694  return hipCUResultTohipError(cuModuleLoadData(module, image));
1695 }
1696 
1697 inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
1698  unsigned int numOptions, hipJitOption* options,
1699  void** optionValues) {
1700  return hipCUResultTohipError(
1701  cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1702 }
1703 
1704 inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
1705  dim3 dimBlocks, void** args, size_t sharedMemBytes,
1706  hipStream_t stream)
1707 {
1708  return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1709 }
1710 
1711 inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
1712  unsigned int gridDimY, unsigned int gridDimZ,
1713  unsigned int blockDimX, unsigned int blockDimY,
1714  unsigned int blockDimZ, unsigned int sharedMemBytes,
1715  hipStream_t stream, void** kernelParams,
1716  void** extra) {
1717  return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1718  blockDimY, blockDimZ, sharedMemBytes, stream,
1719  kernelParams, extra));
1720 }
1721 
1722 inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
1723  return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1724 }
1725 
1726 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
1727  struct textureReference* tex,
1728  const void* devPtr,
1729  const hipChannelFormatDesc* desc,
1730  size_t size __dparm(UINT_MAX)) {
1731  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1732 }
1733 
1734 __HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
1735  size_t* offset, struct textureReference* tex, const void* devPtr,
1736  const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
1737  return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1738 }
1739 
1740 inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
1741  hipChannelFormatKind f) {
1742  return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1743 }
1744 
1745 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1746  const hipResourceDesc* pResDesc,
1747  const hipTextureDesc* pTexDesc,
1748  const hipResourceViewDesc* pResViewDesc) {
1749  return hipCUDAErrorTohipError(
1750  cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1751 }
1752 
1753 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1754  return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1755 }
1756 
1757 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
1758  const hipResourceDesc* pResDesc) {
1759  return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1760 }
1761 
1762 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
1763  return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1764 }
1765 
1766 inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
1767  hipTextureObject_t textureObject) {
1768  return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1769 }
1770 
1771 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
1772  size_t* offset, const struct textureReference* texref) {
1773  return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1774 }
1775 
1776 inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
1777 {
1778  return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1779 }
1780 
1781 inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
1782  void** kernelParams, unsigned int sharedMemBytes,
1783  hipStream_t stream) {
1784  return hipCUDAErrorTohipError(
1785  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1786 }
1787 
1788 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
1789  int numDevices, unsigned int flags) {
1790  return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1791 }
1792 
1793 #ifdef __cplusplus
1794 }
1795 #endif
1796 
1797 #ifdef __CUDACC__
1798 
1799 template<class T>
1800 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1801  T func,
1802  int blockSize,
1803  size_t dynamicSMemSize) {
1804  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1805  blockSize, dynamicSMemSize));
1806 }
1807 
1808 template <class T>
1809 inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
1810  size_t dynamicSMemSize = 0,
1811  int blockSizeLimit = 0) {
1812  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1813  dynamicSMemSize, blockSizeLimit));
1814 }
1815 
1816 template <class T>
1817 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
1818  size_t dynamicSMemSize = 0,
1819  int blockSizeLimit = 0, unsigned int flags = 0) {
1820  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1821  dynamicSMemSize, blockSizeLimit, flags));
1822 }
1823 
1824 template <class T>
1825 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
1826  int blockSize, size_t dynamicSMemSize,unsigned int flags) {
1827  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1828  blockSize, dynamicSMemSize, flags));
1829 }
1830 
1831 template <class T, int dim, enum cudaTextureReadMode readMode>
1832 inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
1833  const void* devPtr, size_t size = UINT_MAX) {
1834  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1835 }
1836 
1837 template <class T, int dim, enum cudaTextureReadMode readMode>
1838 inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
1839  const void* devPtr, const hipChannelFormatDesc& desc,
1840  size_t size = UINT_MAX) {
1841  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1842 }
1843 
1844 template <class T, int dim, enum cudaTextureReadMode readMode>
1845 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
1846  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1847 }
1848 
1849 template <class T, int dim, enum cudaTextureReadMode readMode>
1850 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
1851  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1852 }
1853 
1854 template <class T, int dim, enum cudaTextureReadMode readMode>
1855 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1856  struct texture<T, dim, readMode>& tex, hipArray_const_t array,
1857  const hipChannelFormatDesc& desc) {
1858  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1859 }
1860 
1861 template <class T, int dim, enum cudaTextureReadMode readMode>
1862 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1863  struct texture<T, dim, readMode>* tex, hipArray_const_t array,
1864  const hipChannelFormatDesc* desc) {
1865  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1866 }
1867 
1868 template <class T, int dim, enum cudaTextureReadMode readMode>
1869 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1870  struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
1871  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1872 }
1873 
1874 template <class T>
1875 inline static hipChannelFormatDesc hipCreateChannelDesc() {
1876  return cudaCreateChannelDesc<T>();
1877 }
1878 
1879 template <class T>
1880 inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
1881  void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
1882  return hipCUDAErrorTohipError(
1883  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1884 }
1885 
1886 #endif //__CUDACC__
1887 
1888 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
hipFuncAttributes
Definition: hip_runtime_api.h:128
hipCtxSynchronize
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
hipPointerGetAttributes
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
hipDeviceAttributeMaxPitch
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:337
hipMemset3DAsync
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipMemcpy3D
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipCtxGetCurrent
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
hipDeviceProp_t::regsPerBlock
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipMallocPitch
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
hipSetDevice
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipDeviceGetP2PAttribute
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipMemsetD16Async
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceAttributeMemoryBusWidth
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:308
hipGetErrorString
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
hipGetDeviceFlags
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipDeviceGetByPCIBusId
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
hipErrorInvalidMemcpyDirection
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:214
hipMalloc3DArray
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
hipDeviceArch_t::hasGlobalInt64Atomics
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
hipDeviceProp_t::minor
int minor
Definition: hip_runtime_api.h:100
hipDeviceAttributeMaxBlockDimX
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:292
hipErrorInvalidDevicePointer
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:213
hipChooseDevice
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
hipIpcCloseMemHandle
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipMemcpy2DAsync
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipMemsetD32
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
hipDeviceProp_t::texturePitchAlignment
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:129
hipDeviceAttributeMaxGridDimX
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:295
hipDeviceArch_t::hasThreadFenceSystem
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipStreamCreate
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipDeviceGetStreamPriorityRange
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
hipIpcEventHandle_st
Definition: hip_runtime_api.h:120
hipDeviceProp_t::maxTexture3D
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:124
hipStreamCreateWithPriority
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipCtxPushCurrent
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipCtxGetDevice
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipFuncCache_t
hipFuncCache_t
Definition: hip_runtime_api.h:308
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:137
hipPeekAtLastError
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipMemcpy3DAsync
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceGetPCIBusId
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
hipHostGetFlags
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
hipErrorHostMemoryNotRegistered
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:265
hipErrorRuntimeOther
hipErrorRuntimeOther
Definition: hip_runtime_api.h:278
hipDeviceAttributeClockRate
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:306
hipMemGetAddressRange
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
hipSurfaceObject_t
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipStreamWaitEvent
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
hipDeviceAttributeMaxGridDimZ
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:297
hipGetDevice
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1672
hipMallocArray
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipMemcpyToArray
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
hipModuleLoadData
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
Definition: hip_module.cpp:1508
hipMemoryTypeDevice
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:151
hipDeviceAttributeMaxRegistersPerBlock
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:302
hipMemcpyDtoDAsync
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
hipErrorNoDevice
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:219
hipDeviceAttributeComputeCapabilityMinor
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:316
hipModuleLaunchKernel
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipDeviceProp_t::l2CacheSize
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDevicePrimaryCtxRelease
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
hipCtxGetApiVersion
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
hipDeviceProp_t::textureAlignment
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:128
hipHostMalloc
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
hipDeviceAttributeKernelExecTimeout
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:340
hipDeviceAttributeL2CacheSize
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:311
hipDeviceGetName
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
hipDeviceAttributeMaxTexture3DWidth
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:330
hipDeviceArch_t::hasSurfaceFuncs
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
hipDeviceAttributeIntegrated
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:324
hipDeviceProp_t::isMultiGpuBoard
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipMemcpyParam2DAsync
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipModuleUnload
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
hipDeviceAttributeMaxGridDimY
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:296
hipMemoryTypeHost
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:150
hipDeviceEnablePeerAccess
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipErrorInvalidContext
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:222
hipDeviceArch_t::hasSharedInt64Atomics
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
hipDeviceProp_t::computeMode
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
hipDeviceAttributeIsMultiGpuBoard
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:323
hipSharedMemConfig
hipSharedMemConfig
Definition: hip_runtime_api.h:318
hipDeviceProp_t::clockRate
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:259
hipFuncGetAttribute
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1427
hipDeviceComputeCapability
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipModuleOccupancyMaxPotentialBlockSize
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1662
hipStreamCallback_t
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:972
hipDeviceArch_t::hasDynamicParallelism
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
hip_Memcpy2D
Definition: driver_types.h:91
hipDeviceProp_t::canMapHostMemory
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
hipDeviceProp_t::sharedMemPerBlock
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipCtxGetFlags
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
hipDevicePrimaryCtxGetState
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
hipDeviceAttributeCooperativeMultiDeviceLaunch
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:326
hipDeviceProp_t::maxThreadsPerMultiProcessor
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipDeviceSetCacheConfig
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipCtxDestroy
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
hipCtxEnablePeerAccess
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
hipDeviceProp_t::major
int major
Definition: hip_runtime_api.h:97
hipDeviceAttributeMaxSharedMemoryPerBlock
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:298
hipMemcpyAtoH
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipGetDeviceCount
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipSuccess
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:197
hipHostUnregister
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
hipStreamGetFlags
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
hipMemsetD8Async
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceAttributeMaxThreadsPerBlock
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:291
hipDeviceProp_t::gcnArch
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:117
hipStreamSynchronize
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
hipGetErrorName
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
hipDeviceProp_t::kernelExecTimeoutEnabled
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:130
hipDeviceGet
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipMemcpyDtoD
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
hipDeviceProp_t::maxTexture1D
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:122
hipMemcpy3DParms
Definition: driver_types.h:383
hipDeviceAttributeMaxBlockDimZ
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:294
hipMallocManaged
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipMemcpyHtoD
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipDriverGetVersion
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
hipDeviceArch_t::hasDoubles
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:239
hipDeviceProp_t::maxThreadsPerBlock
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipDeviceAttributeMaxBlockDimY
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:293
hipMemcpy2DToArray
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
hipMemAllocPitch
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
hipDeviceProp_t
Definition: hip_runtime_api.h:84
hipMemAllocHost
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:766
hipMallocHost
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:760
hipDeviceAttributeMaxTexture2DHeight
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:329
hipDeviceArch_t::hasSharedInt32Atomics
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipFuncSetCacheConfig
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
hipErrorInvalidValue
hipErrorInvalidValue
Definition: hip_runtime_api.h:198
hipDeviceProp_t::memPitch
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:127
hipMemsetD32Async
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipDeviceProp_t::pciBusID
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipRuntimeGetVersion
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
hipDeviceAttributeComputeCapabilityMajor
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:315
hipEventQuery
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
hipDeviceAttributeMaxTexture3DDepth
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:332
ihipCtx_t
Definition: hip_hcc_internal.h:938
hipErrorRuntimeMemory
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:276
hipDeviceAttributeMaxThreadsPerMultiProcessor
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:313
hipStreamGetPriority
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipDeviceProp_t::arch
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipEventSynchronize
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1683
hipHostFree
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
hipDeviceAttributePciBusId
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:319
hipIpcOpenMemHandle
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
hipMemsetD16
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
hipDeviceProp_t::tccDriver
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:132
ihipEvent_t
Definition: hip_hcc_internal.h:759
hipDeviceGetLimit
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
hipMalloc
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
hipIpcMemHandle_st
Definition: hip_runtime_api.h:111
hipEventElapsedTime
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipGetLastError
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
Definition: hip_error.cpp:32
hipInit
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
ihipStream_t
Definition: hip_hcc_internal.h:580
hipDeviceAttributeTexturePitchAlignment
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:339
hipDeviceAttributeWarpSize
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:301
hipDeviceArch_t::hasGlobalInt32Atomics
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
hipArray
Definition: driver_types.h:78
hipDeviceArch_t::hasSyncThreadsExt
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipIpcGetMemHandle
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
hipErrorInvalidDevice
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:220
hipDeviceArch_t::hasFunnelShift
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipCtxDisablePeerAccess
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
hipDeviceAttributeMaxTexture3DHeight
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:331
hipDeviceAttributeMemoryClockRate
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:307
hipErrorNotReady
hipErrorNotReady
Definition: hip_runtime_api.h:250
hipHostGetDevicePointer
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipMemGetInfo
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
Definition: hip_memory.cpp:2296
hipEventDestroy
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
hipDeviceSetSharedMemConfig
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipDeviceReset
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
hipDeviceProp_t::maxGridSize
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
hipDeviceAttributeComputeMode
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:310
hipSetDeviceFlags
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipDeviceAttributePciDeviceId
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:320
hipDeviceProp_t::maxSharedMemoryPerMultiProcessor
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
hipDeviceProp_t::clockInstructionRate
int clockInstructionRate
Definition: hip_runtime_api.h:107
dim3
Definition: hip_runtime_api.h:330
hipStreamQuery
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
Definition: hip_stream.cpp:161
hipDevicePrimaryCtxSetFlags
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
hipPointerAttribute_t
Definition: hip_runtime_api.h:162
hipDeviceAttributeTotalConstantMemory
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:300
hipFree
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
hipDeviceArch_t::hasWarpShuffle
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
hipArrayDefault
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipDevicePrimaryCtxRetain
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipOccupancyMaxPotentialBlockSize
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipModuleLoad
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1513
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1714
hipFreeHost
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:817
hipMemcpyHtoA
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipModuleGetFunction
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
hipDeviceProp_t::memoryBusWidth
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipStreamAddCallback
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
hipDeviceArch_t::hasWarpVote
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
hipDeviceProp_t::name
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipMemcpyDtoHAsync
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
hipDeviceArch_t::hasGlobalFloatAtomicExch
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
hipDeviceProp_t::concurrentKernels
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
hipDeviceArch_t::hasWarpBallot
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
hipDeviceProp_t::totalGlobalMem
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipDeviceAttributeTextureAlignment
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:338
hipFuncGetAttributes
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipEventRecord
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
hipMemcpy2D
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
hipExtent
Definition: driver_types.h:370
hipPitchedPtr
Definition: driver_types.h:363
hipModuleGetGlobal
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
Definition: hip_module.cpp:1113
hipMemset2D
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
hipMemset3D
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
Definition: hip_memory.cpp:2286
hipStreamCreateWithFlags
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipDeviceGetAttribute
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipMemcpyFromArray
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
hipDeviceAttributeCanMapHostMemory
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:341
hipDeviceProp_t::maxThreadsDim
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
ihipModuleSymbol_t
Definition: hip_module.cpp:108
hipMemcpyPeerAsync
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipMemcpyHtoDAsync
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
hipDeviceProp_t::cooperativeMultiDeviceLaunch
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipMemcpyDtoH
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
hipDeviceArch_t::has3dGrid
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipDeviceGetCacheConfig
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
hipMemcpyPeer
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
hipDeviceAttributeMaxTexture1DWidth
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:327
hipDeviceAttributeCooperativeLaunch
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:325
hipDeviceAttributeMultiprocessorCount
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:309
hipDeviceProp_t::pciDeviceID
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipGetDeviceProperties
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
hipMemcpy
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
hipDeviceProp_t::memoryClockRate
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipEventCreateWithFlags
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
hipErrorCooperativeLaunchTooLarge
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:269
hipDeviceProp_t::warpSize
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipCtxGetSharedMemConfig
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
hipDeviceTotalMem
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipFreeArray
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipErrorAssert
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:262
textureReference
Definition: texture_types.h:74
hipCtxPopCurrent
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:133
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:135
hipDeviceCanAccessPeer
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
hipDeviceArch_t::hasFloatAtomicAdd
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipCtxSetCurrent
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
hipDeviceProp_t::cooperativeLaunch
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
hipDeviceArch_t::hasSharedFloatAtomicExch
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
hipTextureDesc
Definition: texture_types.h:95
hipResourceViewDesc
Definition: driver_types.h:323
hipDeviceProp_t::multiProcessorCount
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
hipDeviceProp_t::integrated
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipMemsetD8
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipCtxSetCacheConfig
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipMemset2DAsync
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipDeviceProp_t::ECCEnabled
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:131
hipCtxCreate
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
hipDeviceProp_t::totalConstMem
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipDeviceProp_t::maxTexture2D
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:123
hipLaunchParams_t
Definition: hip_runtime_api.h:339
hipErrorHostMemoryAlreadyRegistered
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:263
hipFuncAttribute
hipFuncAttribute
Definition: hip_runtime_api.h:299
hipCtxSetSharedMemConfig
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1693
hipDeviceAttribute_t
hipDeviceAttribute_t
Definition: hip_runtime_api.h:290
hipFuncSetSharedMemConfig
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
Definition: hip_module.cpp:1419
hipResourceDesc
Definition: driver_types.h:262
hipErrorLaunchFailure
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:267
ihipModule_t
Definition: hip_hcc_internal.h:415
hipDeviceSynchronize
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
hipDeviceAttributeConcurrentKernels
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:317
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:139
hipProfilerStart
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
hipDeviceGetSharedMemConfig
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
hipErrorNotSupported
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:273
hipMemcpyAsync
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:255
hipStreamDestroy
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
hipHostRegister
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
hipFuncSetAttribute
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
Definition: hip_module.cpp:1411
hipProfilerStop
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
hipModuleLoadDataEx
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
Definition: hip_module.cpp:1527
hipEventCreate
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
hipMemsetAsync
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipDeviceAttributeMaxTexture2DWidth
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:328
hipDeviceProp_t::pciDomainID
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipCtxGetCacheConfig
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipMemcpyParam2D
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipHostAlloc
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:772
hipMemset
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
hipDeviceDisablePeerAccess
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
hipDeviceAttributeEccEnabled
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:342
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:321
hipDevicePrimaryCtxReset
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipChannelFormatDesc
Definition: driver_types.h:38
hipErrorPeerAccessAlreadyEnabled
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:257