HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
25 
26 #include <cuda_runtime_api.h>
27 #include <cuda.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 #ifdef __cplusplus
36 #define __dparm(x) = x
37 #else
38 #define __dparm(x)
39 #endif
40 
41 // Add Deprecated Support for CUDA Mapped HIP APIs
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
48 #else
49 #define __HIP_DEPRECATED
50 #endif
51 
52 
53 // TODO -move to include/hip_runtime_api.h as a common implementation.
58 typedef enum hipMemcpyKind {
59  hipMemcpyHostToHost,
60  hipMemcpyHostToDevice,
61  hipMemcpyDeviceToHost,
62  hipMemcpyDeviceToDevice,
63  hipMemcpyDefault
64 } hipMemcpyKind;
65 
66 typedef enum hipMemoryAdvise {
74 
75 typedef enum hipMemRangeAttribute {
81 
82 // hipDataType
83 #define hipDataType cudaDataType
84 #define HIP_R_16F CUDA_R_16F
85 #define HIP_R_32F CUDA_R_32F
86 #define HIP_R_64F CUDA_R_64F
87 #define HIP_C_16F CUDA_C_16F
88 #define HIP_C_32F CUDA_C_32F
89 #define HIP_C_64F CUDA_C_64F
90 
91 // hipLibraryPropertyType
92 #define hipLibraryPropertyType libraryPropertyType
93 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
94 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
95 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
96 
97 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
98 #define HIP_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR
99 
100 //hipArray_Format
101 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
102 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
103 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
104 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
105 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
106 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
107 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
108 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
109 
110 // hipArray_Format
111 #define hipArray_Format CUarray_format
112 
113 inline static CUarray_format hipArray_FormatToCUarray_format(
114  hipArray_Format format) {
115  switch (format) {
116  case HIP_AD_FORMAT_UNSIGNED_INT8:
117  return CU_AD_FORMAT_UNSIGNED_INT8;
118  case HIP_AD_FORMAT_UNSIGNED_INT16:
119  return CU_AD_FORMAT_UNSIGNED_INT16;
120  case HIP_AD_FORMAT_UNSIGNED_INT32:
121  return CU_AD_FORMAT_UNSIGNED_INT32;
122  case HIP_AD_FORMAT_SIGNED_INT8:
123  return CU_AD_FORMAT_SIGNED_INT8;
124  case HIP_AD_FORMAT_SIGNED_INT16:
125  return CU_AD_FORMAT_SIGNED_INT16;
126  case HIP_AD_FORMAT_SIGNED_INT32:
127  return CU_AD_FORMAT_SIGNED_INT32;
128  case HIP_AD_FORMAT_HALF:
129  return CU_AD_FORMAT_HALF;
130  case HIP_AD_FORMAT_FLOAT:
131  return CU_AD_FORMAT_FLOAT;
132  default:
133  return CU_AD_FORMAT_UNSIGNED_INT8;
134  }
135 }
136 
137 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
138 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
139 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
140 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
141 
142 // hipAddress_mode
143 #define hipAddress_mode CUaddress_mode
144 
145 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
146  hipAddress_mode mode) {
147  switch (mode) {
148  case HIP_TR_ADDRESS_MODE_WRAP:
149  return CU_TR_ADDRESS_MODE_WRAP;
150  case HIP_TR_ADDRESS_MODE_CLAMP:
151  return CU_TR_ADDRESS_MODE_CLAMP;
152  case HIP_TR_ADDRESS_MODE_MIRROR:
153  return CU_TR_ADDRESS_MODE_MIRROR;
154  case HIP_TR_ADDRESS_MODE_BORDER:
155  return CU_TR_ADDRESS_MODE_BORDER;
156  default:
157  return CU_TR_ADDRESS_MODE_WRAP;
158  }
159 }
160 
161 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
162 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
163 
164 // hipFilter_mode
165 #define hipFilter_mode CUfilter_mode
166 
167 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
168  hipFilter_mode mode) {
169  switch (mode) {
170  case HIP_TR_FILTER_MODE_POINT:
171  return CU_TR_FILTER_MODE_POINT;
172  case HIP_TR_FILTER_MODE_LINEAR:
173  return CU_TR_FILTER_MODE_LINEAR;
174  default:
175  return CU_TR_FILTER_MODE_POINT;
176  }
177 }
178 
179 //hipResourcetype
180 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
181 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
182 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
183 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
184 
185 // hipResourcetype
186 #define hipResourcetype CUresourcetype
187 
188 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
189  hipResourcetype resType) {
190  switch (resType) {
191  case HIP_RESOURCE_TYPE_ARRAY:
192  return CU_RESOURCE_TYPE_ARRAY;
193  case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
194  return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
195  case HIP_RESOURCE_TYPE_LINEAR:
196  return CU_RESOURCE_TYPE_LINEAR;
197  case HIP_RESOURCE_TYPE_PITCH2D:
198  return CU_RESOURCE_TYPE_PITCH2D;
199  default:
200  return CU_RESOURCE_TYPE_ARRAY;
201  }
202 }
203 
204 #define hipTexRef CUtexref
205 #define hiparray CUarray
206 
207 // hipTextureAddressMode
208 typedef enum cudaTextureAddressMode hipTextureAddressMode;
209 #define hipAddressModeWrap cudaAddressModeWrap
210 #define hipAddressModeClamp cudaAddressModeClamp
211 #define hipAddressModeMirror cudaAddressModeMirror
212 #define hipAddressModeBorder cudaAddressModeBorder
213 
214 // hipTextureFilterMode
215 typedef enum cudaTextureFilterMode hipTextureFilterMode;
216 #define hipFilterModePoint cudaFilterModePoint
217 #define hipFilterModeLinear cudaFilterModeLinear
218 
219 // hipTextureReadMode
220 typedef enum cudaTextureReadMode hipTextureReadMode;
221 #define hipReadModeElementType cudaReadModeElementType
222 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
223 
224 // hipChannelFormatKind
225 typedef enum cudaChannelFormatKind hipChannelFormatKind;
226 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
227 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
228 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
229 #define hipChannelFormatKindNone cudaChannelFormatKindNone
230 
231 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
232 #define hipBoundaryModeZero cudaBoundaryModeZero
233 #define hipBoundaryModeTrap cudaBoundaryModeTrap
234 #define hipBoundaryModeClamp cudaBoundaryModeClamp
235 
236 // hipFuncCache
237 #define hipFuncCachePreferNone cudaFuncCachePreferNone
238 #define hipFuncCachePreferShared cudaFuncCachePreferShared
239 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
240 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
241 
242 // hipResourceType
243 #define hipResourceType cudaResourceType
244 #define hipResourceTypeArray cudaResourceTypeArray
245 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
246 #define hipResourceTypeLinear cudaResourceTypeLinear
247 #define hipResourceTypePitch2D cudaResourceTypePitch2D
248 //
249 // hipErrorNoDevice.
250 
251 
253 #define hipEventDefault cudaEventDefault
254 #define hipEventBlockingSync cudaEventBlockingSync
255 #define hipEventDisableTiming cudaEventDisableTiming
256 #define hipEventInterprocess cudaEventInterprocess
257 #define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
258 #define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
259 
260 
261 #define hipHostMallocDefault cudaHostAllocDefault
262 #define hipHostMallocPortable cudaHostAllocPortable
263 #define hipHostMallocMapped cudaHostAllocMapped
264 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
265 #define hipHostMallocCoherent 0x0
266 #define hipHostMallocNonCoherent 0x0
267 
268 #define hipMemAttachGlobal cudaMemAttachGlobal
269 #define hipMemAttachHost cudaMemAttachHost
270 #define hipMemAttachSingle cudaMemAttachSingle
271 
272 #define hipHostRegisterDefault cudaHostRegisterDefault
273 #define hipHostRegisterPortable cudaHostRegisterPortable
274 #define hipHostRegisterMapped cudaHostRegisterMapped
275 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
276 
277 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
278 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
279 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
280 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
281 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
282 
283 #define hipOccupancyDefault cudaOccupancyDefault
284 
285 #define hipCooperativeLaunchMultiDeviceNoPreSync \
286  cudaCooperativeLaunchMultiDeviceNoPreSync
287 #define hipCooperativeLaunchMultiDeviceNoPostSync \
288  cudaCooperativeLaunchMultiDeviceNoPostSync
289 
290 
291 // enum CUjit_option redefines
292 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
293 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
294 #define hipJitOptionWallTime CU_JIT_WALL_TIME
295 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
296 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
297 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
298 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
299 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
300 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
301 #define hipJitOptionTarget CU_JIT_TARGET
302 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
303 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
304 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
305 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
306 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
307 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
308 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
309 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
310 
311 typedef cudaEvent_t hipEvent_t;
312 typedef cudaStream_t hipStream_t;
313 typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
314 typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
315 typedef enum cudaLimit hipLimit_t;
316 typedef enum cudaFuncAttribute hipFuncAttribute;
317 typedef enum cudaFuncCache hipFuncCache_t;
318 typedef CUcontext hipCtx_t;
319 typedef enum cudaSharedMemConfig hipSharedMemConfig;
320 typedef CUfunc_cache hipFuncCache;
321 typedef CUjit_option hipJitOption;
322 typedef CUdevice hipDevice_t;
323 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
324 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
325 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
326 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
327 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
328 #define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
329 #define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
330 
331 typedef CUmodule hipModule_t;
332 typedef CUfunction hipFunction_t;
333 typedef CUdeviceptr hipDeviceptr_t;
334 typedef struct cudaArray hipArray;
335 typedef struct cudaArray* hipArray_t;
336 typedef struct cudaArray* hipArray_const_t;
337 typedef struct cudaFuncAttributes hipFuncAttributes;
338 typedef struct cudaLaunchParams hipLaunchParams;
339 #define hipFunction_attribute CUfunction_attribute
340 #define hip_Memcpy2D CUDA_MEMCPY2D
341 #define HIP_MEMCPY3D CUDA_MEMCPY3D
342 #define hipMemcpy3DParms cudaMemcpy3DParms
343 #define hipArrayDefault cudaArrayDefault
344 #define hipArrayLayered cudaArrayLayered
345 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
346 #define hipArrayCubemap cudaArrayCubemap
347 #define hipArrayTextureGather cudaArrayTextureGather
348 
349 typedef cudaTextureObject_t hipTextureObject_t;
350 typedef cudaSurfaceObject_t hipSurfaceObject_t;
351 #define hipTextureType1D cudaTextureType1D
352 #define hipTextureType1DLayered cudaTextureType1DLayered
353 #define hipTextureType2D cudaTextureType2D
354 #define hipTextureType2DLayered cudaTextureType2DLayered
355 #define hipTextureType3D cudaTextureType3D
356 
357 #define hipDeviceScheduleAuto cudaDeviceScheduleAuto
358 #define hipDeviceScheduleSpin cudaDeviceScheduleSpin
359 #define hipDeviceScheduleYield cudaDeviceScheduleYield
360 #define hipDeviceScheduleBlockingSync cudaDeviceScheduleBlockingSync
361 #define hipDeviceScheduleMask cudaDeviceScheduleMask
362 #define hipDeviceMapHost cudaDeviceMapHost
363 #define hipDeviceLmemResizeToMax cudaDeviceLmemResizeToMax
364 
365 #define hipCpuDeviceId cudaCpuDeviceId
366 #define hipInvalidDeviceId cudaInvalidDeviceId
367 typedef struct cudaExtent hipExtent;
368 typedef struct cudaPitchedPtr hipPitchedPtr;
369 #define make_hipExtent make_cudaExtent
370 #define make_hipPos make_cudaPos
371 #define make_hipPitchedPtr make_cudaPitchedPtr
372 // Flags that can be used with hipStreamCreateWithFlags
373 #define hipStreamDefault cudaStreamDefault
374 #define hipStreamNonBlocking cudaStreamNonBlocking
375 
376 typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
377 typedef struct cudaResourceDesc hipResourceDesc;
378 typedef struct cudaTextureDesc hipTextureDesc;
379 typedef struct cudaResourceViewDesc hipResourceViewDesc;
380 // adding code for hipmemSharedConfig
381 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
382 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
383 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
384 
385 //Function Attributes
386 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
387 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
388 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
389 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
390 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
391 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
392 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
393 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
394 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
395 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
396 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
397 
398 #if CUDA_VERSION >= 9000
399 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
400 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
401 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
402 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
403 #endif // CUDA_VERSION >= 9000
404 
405 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
406  switch (cuError) {
407  case cudaSuccess:
408  return hipSuccess;
409  case cudaErrorProfilerDisabled:
410  return hipErrorProfilerDisabled;
411  case cudaErrorProfilerNotInitialized:
412  return hipErrorProfilerNotInitialized;
413  case cudaErrorProfilerAlreadyStarted:
414  return hipErrorProfilerAlreadyStarted;
415  case cudaErrorProfilerAlreadyStopped:
416  return hipErrorProfilerAlreadyStopped;
417  case cudaErrorInsufficientDriver:
418  return hipErrorInsufficientDriver;
419  case cudaErrorUnsupportedLimit:
420  return hipErrorUnsupportedLimit;
421  case cudaErrorPeerAccessUnsupported:
422  return hipErrorPeerAccessUnsupported;
423  case cudaErrorInvalidGraphicsContext:
424  return hipErrorInvalidGraphicsContext;
425  case cudaErrorSharedObjectSymbolNotFound:
426  return hipErrorSharedObjectSymbolNotFound;
427  case cudaErrorSharedObjectInitFailed:
428  return hipErrorSharedObjectInitFailed;
429  case cudaErrorOperatingSystem:
430  return hipErrorOperatingSystem;
431  case cudaErrorSetOnActiveProcess:
432  return hipErrorSetOnActiveProcess;
433  case cudaErrorIllegalAddress:
434  return hipErrorIllegalAddress;
435  case cudaErrorInvalidSymbol:
436  return hipErrorInvalidSymbol;
437  case cudaErrorMissingConfiguration:
438  return hipErrorMissingConfiguration;
439  case cudaErrorMemoryAllocation:
440  return hipErrorOutOfMemory;
441  case cudaErrorInitializationError:
442  return hipErrorNotInitialized;
443  case cudaErrorLaunchFailure:
444  return hipErrorLaunchFailure;
445  case cudaErrorCooperativeLaunchTooLarge:
447  case cudaErrorPriorLaunchFailure:
448  return hipErrorPriorLaunchFailure;
449  case cudaErrorLaunchOutOfResources:
451  case cudaErrorInvalidDeviceFunction:
452  return hipErrorInvalidDeviceFunction;
453  case cudaErrorInvalidConfiguration:
454  return hipErrorInvalidConfiguration;
455  case cudaErrorInvalidDevice:
456  return hipErrorInvalidDevice;
457  case cudaErrorInvalidValue:
458  return hipErrorInvalidValue;
459  case cudaErrorInvalidDevicePointer:
461  case cudaErrorInvalidMemcpyDirection:
463  case cudaErrorInvalidResourceHandle:
464  return hipErrorInvalidHandle;
465  case cudaErrorNotReady:
466  return hipErrorNotReady;
467  case cudaErrorNoDevice:
468  return hipErrorNoDevice;
469  case cudaErrorPeerAccessAlreadyEnabled:
471  case cudaErrorPeerAccessNotEnabled:
473  case cudaErrorHostMemoryAlreadyRegistered:
475  case cudaErrorHostMemoryNotRegistered:
477  case cudaErrorMapBufferObjectFailed:
478  return hipErrorMapFailed;
479  case cudaErrorAssert:
480  return hipErrorAssert;
481  case cudaErrorNotSupported:
482  return hipErrorNotSupported;
483  case cudaErrorCudartUnloading:
484  return hipErrorDeinitialized;
485  case cudaErrorInvalidKernelImage:
486  return hipErrorInvalidImage;
487  case cudaErrorUnmapBufferObjectFailed:
488  return hipErrorUnmapFailed;
489  case cudaErrorNoKernelImageForDevice:
490  return hipErrorNoBinaryForGpu;
491  case cudaErrorECCUncorrectable:
492  return hipErrorECCNotCorrectable;
493  case cudaErrorDeviceAlreadyInUse:
494  return hipErrorContextAlreadyInUse;
495  case cudaErrorInvalidPtx:
497  case cudaErrorLaunchTimeout:
498  return hipErrorLaunchTimeOut;
499 #if CUDA_VERSION >= 10010
500  case cudaErrorInvalidSource:
501  return hipErrorInvalidSource;
502  case cudaErrorFileNotFound:
503  return hipErrorFileNotFound;
504  case cudaErrorSymbolNotFound:
505  return hipErrorNotFound;
506  case cudaErrorArrayIsMapped:
507  return hipErrorArrayIsMapped;
508  case cudaErrorNotMappedAsPointer:
509  return hipErrorNotMappedAsPointer;
510  case cudaErrorNotMappedAsArray:
511  return hipErrorNotMappedAsArray;
512  case cudaErrorNotMapped:
513  return hipErrorNotMapped;
514  case cudaErrorAlreadyAcquired:
515  return hipErrorAlreadyAcquired;
516  case cudaErrorAlreadyMapped:
517  return hipErrorAlreadyMapped;
518 #endif
519 #if CUDA_VERSION >= 10020
520  case cudaErrorDeviceUninitialized:
521  return hipErrorInvalidContext;
522 #endif
523  case cudaErrorUnknown:
524  default:
525  return hipErrorUnknown; // Note - translated error.
526  }
527 }
528 
529 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
530  switch (cuError) {
531  case CUDA_SUCCESS:
532  return hipSuccess;
533  case CUDA_ERROR_OUT_OF_MEMORY:
534  return hipErrorOutOfMemory;
535  case CUDA_ERROR_INVALID_VALUE:
536  return hipErrorInvalidValue;
537  case CUDA_ERROR_INVALID_DEVICE:
538  return hipErrorInvalidDevice;
539  case CUDA_ERROR_DEINITIALIZED:
540  return hipErrorDeinitialized;
541  case CUDA_ERROR_NO_DEVICE:
542  return hipErrorNoDevice;
543  case CUDA_ERROR_INVALID_CONTEXT:
544  return hipErrorInvalidContext;
545  case CUDA_ERROR_NOT_INITIALIZED:
546  return hipErrorNotInitialized;
547  case CUDA_ERROR_INVALID_HANDLE:
548  return hipErrorInvalidHandle;
549  case CUDA_ERROR_MAP_FAILED:
550  return hipErrorMapFailed;
551  case CUDA_ERROR_PROFILER_DISABLED:
552  return hipErrorProfilerDisabled;
553  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
554  return hipErrorProfilerNotInitialized;
555  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
556  return hipErrorProfilerAlreadyStarted;
557  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
558  return hipErrorProfilerAlreadyStopped;
559  case CUDA_ERROR_INVALID_IMAGE:
560  return hipErrorInvalidImage;
561  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
562  return hipErrorContextAlreadyCurrent;
563  case CUDA_ERROR_UNMAP_FAILED:
564  return hipErrorUnmapFailed;
565  case CUDA_ERROR_ARRAY_IS_MAPPED:
566  return hipErrorArrayIsMapped;
567  case CUDA_ERROR_ALREADY_MAPPED:
568  return hipErrorAlreadyMapped;
569  case CUDA_ERROR_NO_BINARY_FOR_GPU:
570  return hipErrorNoBinaryForGpu;
571  case CUDA_ERROR_ALREADY_ACQUIRED:
572  return hipErrorAlreadyAcquired;
573  case CUDA_ERROR_NOT_MAPPED:
574  return hipErrorNotMapped;
575  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
576  return hipErrorNotMappedAsArray;
577  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
578  return hipErrorNotMappedAsPointer;
579  case CUDA_ERROR_ECC_UNCORRECTABLE:
580  return hipErrorECCNotCorrectable;
581  case CUDA_ERROR_UNSUPPORTED_LIMIT:
582  return hipErrorUnsupportedLimit;
583  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
584  return hipErrorContextAlreadyInUse;
585  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
586  return hipErrorPeerAccessUnsupported;
587  case CUDA_ERROR_INVALID_PTX:
589  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
590  return hipErrorInvalidGraphicsContext;
591  case CUDA_ERROR_INVALID_SOURCE:
592  return hipErrorInvalidSource;
593  case CUDA_ERROR_FILE_NOT_FOUND:
594  return hipErrorFileNotFound;
595  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
596  return hipErrorSharedObjectSymbolNotFound;
597  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
598  return hipErrorSharedObjectInitFailed;
599  case CUDA_ERROR_OPERATING_SYSTEM:
600  return hipErrorOperatingSystem;
601  case CUDA_ERROR_NOT_FOUND:
602  return hipErrorNotFound;
603  case CUDA_ERROR_NOT_READY:
604  return hipErrorNotReady;
605  case CUDA_ERROR_ILLEGAL_ADDRESS:
606  return hipErrorIllegalAddress;
607  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
609  case CUDA_ERROR_LAUNCH_TIMEOUT:
610  return hipErrorLaunchTimeOut;
611  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
613  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
615  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
616  return hipErrorSetOnActiveProcess;
617  case CUDA_ERROR_ASSERT:
618  return hipErrorAssert;
619  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
621  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
623  case CUDA_ERROR_LAUNCH_FAILED:
624  return hipErrorLaunchFailure;
625  case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
627  case CUDA_ERROR_NOT_SUPPORTED:
628  return hipErrorNotSupported;
629  case CUDA_ERROR_UNKNOWN:
630  default:
631  return hipErrorUnknown; // Note - translated error.
632  }
633 }
634 
635 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
636  switch (hError) {
637  case hipSuccess:
638  return cudaSuccess;
639  case hipErrorOutOfMemory:
640  return cudaErrorMemoryAllocation;
641  case hipErrorProfilerDisabled:
642  return cudaErrorProfilerDisabled;
643  case hipErrorProfilerNotInitialized:
644  return cudaErrorProfilerNotInitialized;
645  case hipErrorProfilerAlreadyStarted:
646  return cudaErrorProfilerAlreadyStarted;
647  case hipErrorProfilerAlreadyStopped:
648  return cudaErrorProfilerAlreadyStopped;
649  case hipErrorInvalidConfiguration:
650  return cudaErrorInvalidConfiguration;
652  return cudaErrorLaunchOutOfResources;
654  return cudaErrorInvalidValue;
655  case hipErrorInvalidHandle:
656  return cudaErrorInvalidResourceHandle;
658  return cudaErrorInvalidDevice;
660  return cudaErrorInvalidMemcpyDirection;
662  return cudaErrorInvalidDevicePointer;
663  case hipErrorNotInitialized:
664  return cudaErrorInitializationError;
665  case hipErrorNoDevice:
666  return cudaErrorNoDevice;
667  case hipErrorNotReady:
668  return cudaErrorNotReady;
670  return cudaErrorPeerAccessNotEnabled;
672  return cudaErrorPeerAccessAlreadyEnabled;
674  return cudaErrorHostMemoryAlreadyRegistered;
676  return cudaErrorHostMemoryNotRegistered;
677  case hipErrorDeinitialized:
678  return cudaErrorCudartUnloading;
679  case hipErrorInvalidSymbol:
680  return cudaErrorInvalidSymbol;
681  case hipErrorInsufficientDriver:
682  return cudaErrorInsufficientDriver;
683  case hipErrorMissingConfiguration:
684  return cudaErrorMissingConfiguration;
685  case hipErrorPriorLaunchFailure:
686  return cudaErrorPriorLaunchFailure;
687  case hipErrorInvalidDeviceFunction:
688  return cudaErrorInvalidDeviceFunction;
689  case hipErrorInvalidImage:
690  return cudaErrorInvalidKernelImage;
692 #if CUDA_VERSION >= 10020
693  return cudaErrorDeviceUninitialized;
694 #else
695  return cudaErrorUnknown;
696 #endif
697  case hipErrorMapFailed:
698  return cudaErrorMapBufferObjectFailed;
699  case hipErrorUnmapFailed:
700  return cudaErrorUnmapBufferObjectFailed;
701  case hipErrorArrayIsMapped:
702 #if CUDA_VERSION >= 10010
703  return cudaErrorArrayIsMapped;
704 #else
705  return cudaErrorUnknown;
706 #endif
707  case hipErrorAlreadyMapped:
708 #if CUDA_VERSION >= 10010
709  return cudaErrorAlreadyMapped;
710 #else
711  return cudaErrorUnknown;
712 #endif
713  case hipErrorNoBinaryForGpu:
714  return cudaErrorNoKernelImageForDevice;
715  case hipErrorAlreadyAcquired:
716 #if CUDA_VERSION >= 10010
717  return cudaErrorAlreadyAcquired;
718 #else
719  return cudaErrorUnknown;
720 #endif
721  case hipErrorNotMapped:
722 #if CUDA_VERSION >= 10010
723  return cudaErrorNotMapped;
724 #else
725  return cudaErrorUnknown;
726 #endif
727  case hipErrorNotMappedAsArray:
728 #if CUDA_VERSION >= 10010
729  return cudaErrorNotMappedAsArray;
730 #else
731  return cudaErrorUnknown;
732 #endif
733  case hipErrorNotMappedAsPointer:
734 #if CUDA_VERSION >= 10010
735  return cudaErrorNotMappedAsPointer;
736 #else
737  return cudaErrorUnknown;
738 #endif
739  case hipErrorECCNotCorrectable:
740  return cudaErrorECCUncorrectable;
741  case hipErrorUnsupportedLimit:
742  return cudaErrorUnsupportedLimit;
743  case hipErrorContextAlreadyInUse:
744  return cudaErrorDeviceAlreadyInUse;
745  case hipErrorPeerAccessUnsupported:
746  return cudaErrorPeerAccessUnsupported;
748  return cudaErrorInvalidPtx;
749  case hipErrorInvalidGraphicsContext:
750  return cudaErrorInvalidGraphicsContext;
751  case hipErrorInvalidSource:
752 #if CUDA_VERSION >= 10010
753  return cudaErrorInvalidSource;
754 #else
755  return cudaErrorUnknown;
756 #endif
757  case hipErrorFileNotFound:
758 #if CUDA_VERSION >= 10010
759  return cudaErrorFileNotFound;
760 #else
761  return cudaErrorUnknown;
762 #endif
763  case hipErrorSharedObjectSymbolNotFound:
764  return cudaErrorSharedObjectSymbolNotFound;
765  case hipErrorSharedObjectInitFailed:
766  return cudaErrorSharedObjectInitFailed;
767  case hipErrorOperatingSystem:
768  return cudaErrorOperatingSystem;
769  case hipErrorNotFound:
770 #if CUDA_VERSION >= 10010
771  return cudaErrorSymbolNotFound;
772 #else
773  return cudaErrorUnknown;
774 #endif
775  case hipErrorIllegalAddress:
776  return cudaErrorIllegalAddress;
777  case hipErrorLaunchTimeOut:
778  return cudaErrorLaunchTimeout;
779  case hipErrorSetOnActiveProcess:
780  return cudaErrorSetOnActiveProcess;
782  return cudaErrorLaunchFailure;
784  return cudaErrorCooperativeLaunchTooLarge;
786  return cudaErrorNotSupported;
787  // HSA: does not exist in CUDA
789  // HSA: does not exist in CUDA
791  case hipErrorUnknown:
792  case hipErrorTbd:
793  default:
794  return cudaErrorUnknown; // Note - translated error.
795  }
796 }
797 
798 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
799  switch (kind) {
800  case hipMemcpyHostToHost:
801  return cudaMemcpyHostToHost;
802  case hipMemcpyHostToDevice:
803  return cudaMemcpyHostToDevice;
804  case hipMemcpyDeviceToHost:
805  return cudaMemcpyDeviceToHost;
806  case hipMemcpyDeviceToDevice:
807  return cudaMemcpyDeviceToDevice;
808  default:
809  return cudaMemcpyDefault;
810  }
811 }
812 
813 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
814  hipTextureAddressMode kind) {
815  switch (kind) {
816  case hipAddressModeWrap:
817  return cudaAddressModeWrap;
818  case hipAddressModeClamp:
819  return cudaAddressModeClamp;
820  case hipAddressModeMirror:
821  return cudaAddressModeMirror;
822  case hipAddressModeBorder:
823  return cudaAddressModeBorder;
824  default:
825  return cudaAddressModeWrap;
826  }
827 }
828 
829 inline static enum cudaMemRangeAttribute hipMemRangeAttributeTocudaMemRangeAttribute(
830  hipMemRangeAttribute kind) {
831  switch (kind) {
833  return cudaMemRangeAttributeReadMostly;
835  return cudaMemRangeAttributePreferredLocation;
837  return cudaMemRangeAttributeAccessedBy;
839  return cudaMemRangeAttributeLastPrefetchLocation;
840  default:
841  return cudaMemRangeAttributeReadMostly;
842  }
843 }
844 
845 inline static enum cudaMemoryAdvise hipMemoryAdviseTocudaMemoryAdvise(
846  hipMemoryAdvise kind) {
847  switch (kind) {
849  return cudaMemAdviseSetReadMostly;
851  return cudaMemAdviseUnsetReadMostly ;
853  return cudaMemAdviseSetPreferredLocation;
855  return cudaMemAdviseUnsetPreferredLocation;
857  return cudaMemAdviseSetAccessedBy;
859  return cudaMemAdviseUnsetAccessedBy;
860  default:
861  return cudaMemAdviseSetReadMostly;
862  }
863 }
864 
865 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
866  hipTextureFilterMode kind) {
867  switch (kind) {
868  case hipFilterModePoint:
869  return cudaFilterModePoint;
870  case hipFilterModeLinear:
871  return cudaFilterModeLinear;
872  default:
873  return cudaFilterModePoint;
874  }
875 }
876 
877 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
878  switch (kind) {
879  case hipReadModeElementType:
880  return cudaReadModeElementType;
881  case hipReadModeNormalizedFloat:
882  return cudaReadModeNormalizedFloat;
883  default:
884  return cudaReadModeElementType;
885  }
886 }
887 
888 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
889  hipChannelFormatKind kind) {
890  switch (kind) {
891  case hipChannelFormatKindSigned:
892  return cudaChannelFormatKindSigned;
893  case hipChannelFormatKindUnsigned:
894  return cudaChannelFormatKindUnsigned;
895  case hipChannelFormatKindFloat:
896  return cudaChannelFormatKindFloat;
897  case hipChannelFormatKindNone:
898  return cudaChannelFormatKindNone;
899  default:
900  return cudaChannelFormatKindNone;
901  }
902 }
903 
907 #define HIPRT_CB CUDART_CB
908 typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
909 inline static hipError_t hipInit(unsigned int flags) {
910  return hipCUResultTohipError(cuInit(flags));
911 }
912 
913 inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
914 
915 inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
916 
917 inline static hipError_t hipPeekAtLastError() {
918  return hipCUDAErrorTohipError(cudaPeekAtLastError());
919 }
920 
921 inline static hipError_t hipMalloc(void** ptr, size_t size) {
922  return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
923 }
924 
925 inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
926  return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
927 }
928 
929 inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
930  return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
931 }
932 
933 inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
934  return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
935 }
936 
937 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
938 
939 inline static hipError_t hipMallocHost(void** ptr, size_t size)
940  __attribute__((deprecated("use hipHostMalloc instead")));
941 inline static hipError_t hipMallocHost(void** ptr, size_t size) {
942  return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
943 }
944 
945 inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
946  __attribute__((deprecated("use hipHostMalloc instead")));
947 inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
948  return hipCUResultTohipError(cuMemAllocHost(ptr, size));
949 }
950 
951 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
952  __attribute__((deprecated("use hipHostMalloc instead")));
953 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
954  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
955 }
956 
957 inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
958  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
959 }
960 
961 inline static hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice,
962  int device) {
963  return hipCUDAErrorTohipError(cudaMemAdvise(dev_ptr, count,
964  hipMemoryAdviseTocudaMemoryAdvise(advice), device));
965 }
966 
967 inline static hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device,
968  hipStream_t stream __dparm(0)) {
969  return hipCUDAErrorTohipError(cudaMemPrefetchAsync(dev_ptr, count, device, stream));
970 }
971 
972 inline static hipError_t hipMemRangeGetAttribute(void* data, size_t data_size,
973  hipMemRangeAttribute attribute,
974  const void* dev_ptr, size_t count) {
975  return hipCUDAErrorTohipError(cudaMemRangeGetAttribute(data, data_size,
976  hipMemRangeAttributeTocudaMemRangeAttribute(attribute), dev_ptr, count));
977 }
978 
979 inline static hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes,
980  hipMemRangeAttribute* attributes,
981  size_t num_attributes, const void* dev_ptr,
982  size_t count) {
983  auto attrs = hipMemRangeAttributeTocudaMemRangeAttribute(*attributes);
984  return hipCUDAErrorTohipError(cudaMemRangeGetAttributes(data, data_sizes, &attrs,
985  num_attributes, dev_ptr, count));
986 }
987 
988 inline static hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr,
989  size_t length __dparm(0),
990  unsigned int flags __dparm(hipMemAttachSingle)) {
991  return hipCUDAErrorTohipError(cudaStreamAttachMemAsync(stream, dev_ptr, length, flags));
992 }
993 
994 inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
995  return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
996 }
997 
998 inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
999  size_t width, size_t height,
1000  unsigned int flags __dparm(hipArrayDefault)) {
1001  return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
1002 }
1003 
1004 inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
1005  hipExtent extent, unsigned int flags) {
1006  return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
1007 }
1008 
1009 inline static hipError_t hipFreeArray(hipArray* array) {
1010  return hipCUDAErrorTohipError(cudaFreeArray(array));
1011 }
1012 
1013 inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
1014  return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
1015 }
1016 
1017 inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
1018  return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
1019 }
1020 
1021 inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
1022  return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
1023 }
1024 
1025 inline static hipError_t hipHostUnregister(void* ptr) {
1026  return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
1027 }
1028 
1029 inline static hipError_t hipFreeHost(void* ptr)
1030  __attribute__((deprecated("use hipHostFree instead")));
1031 inline static hipError_t hipFreeHost(void* ptr) {
1032  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
1033 }
1034 
1035 inline static hipError_t hipHostFree(void* ptr) {
1036  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
1037 }
1038 
1039 inline static hipError_t hipSetDevice(int device) {
1040  return hipCUDAErrorTohipError(cudaSetDevice(device));
1041 }
1042 
1043 inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
1044  struct cudaDeviceProp cdprop;
1045  memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
1046  cdprop.major = prop->major;
1047  cdprop.minor = prop->minor;
1048  cdprop.totalGlobalMem = prop->totalGlobalMem;
1049  cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
1050  cdprop.regsPerBlock = prop->regsPerBlock;
1051  cdprop.warpSize = prop->warpSize;
1052  cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
1053  cdprop.clockRate = prop->clockRate;
1054  cdprop.totalConstMem = prop->totalConstMem;
1055  cdprop.multiProcessorCount = prop->multiProcessorCount;
1056  cdprop.l2CacheSize = prop->l2CacheSize;
1057  cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
1058  cdprop.computeMode = prop->computeMode;
1059  cdprop.canMapHostMemory = prop->canMapHostMemory;
1060  cdprop.memoryClockRate = prop->memoryClockRate;
1061  cdprop.memoryBusWidth = prop->memoryBusWidth;
1062  return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
1063 }
1064 
1065 inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
1066  return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
1067 }
1068 
1069 inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
1070  return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
1071 }
1072 
1073 inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
1074  return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
1075 }
1076 
1077 inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
1078  hipStream_t stream) {
1079  return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
1080 }
1081 
1082 inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
1083  hipStream_t stream) {
1084  return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
1085 }
1086 
1087 inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
1088  hipStream_t stream) {
1089  return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
1090 }
1091 
1092 inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
1093  hipMemcpyKind copyKind) {
1094  return hipCUDAErrorTohipError(
1095  cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
1096 }
1097 
1098 
1099 inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
1100  size_t sizeBytes, hipMemcpyKind copyKind,
1101  hipStream_t stream) {
1102  cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1103  hipMemcpyKindToCudaMemcpyKind(copyKind),
1104  stream);
1105 
1106  if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
1107 
1108  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1109 }
1110 
1111 inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
1112  hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
1113  return hipCUDAErrorTohipError(
1114  cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1115 }
1116 
1117 inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
1118  size_t offset __dparm(0),
1119  hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1120  return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1121  hipMemcpyKindToCudaMemcpyKind(copyType)));
1122 }
1123 
1124 inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
1125  size_t sizeBytes, size_t offset,
1126  hipMemcpyKind copyType,
1127  hipStream_t stream __dparm(0)) {
1128  return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1129  symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1130 }
1131 
1132 inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
1133  size_t offset __dparm(0),
1134  hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1135  return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1136  hipMemcpyKindToCudaMemcpyKind(kind)));
1137 }
1138 
1139 inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
1140  size_t sizeBytes, size_t offset,
1141  hipMemcpyKind kind,
1142  hipStream_t stream __dparm(0)) {
1143  return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1144  dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1145 }
1146 
1147 inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
1148  return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1149 }
1150 
1151 inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
1152  return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1153 }
1154 
1155 inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
1156  size_t width, size_t height, hipMemcpyKind kind) {
1157  return hipCUDAErrorTohipError(
1158  cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1159 }
1160 
1161 inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
1162  return hipCUResultTohipError(cuMemcpy2D(pCopy));
1163 }
1164 
1165 inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
1166  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1167 }
1168 
1169 inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) {
1170  return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1171 }
1172 
1173 inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream) {
1174  return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1175 }
1176 
1177 inline static hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) {
1178  return hipCUResultTohipError(cuMemcpy3D(pCopy));
1179 }
1180 
1181 inline static hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
1182  return hipCUResultTohipError(cuMemcpy3DAsync(pCopy, stream));
1183 }
1184 
1185 inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
1186  size_t width, size_t height, hipMemcpyKind kind,
1187  hipStream_t stream) {
1188  return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1189  hipMemcpyKindToCudaMemcpyKind(kind), stream));
1190 }
1191 
1192 inline static hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray* src,
1193  size_t wOffset, size_t hOffset, size_t width,
1194  size_t height, hipMemcpyKind kind) {
1195  return hipCUDAErrorTohipError(cudaMemcpy2DFromArray(dst, dpitch, src, wOffset, hOffset, width,
1196  height,
1197  hipMemcpyKindToCudaMemcpyKind(kind)));
1198 }
1199 
1200 inline static hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray* src,
1201  size_t wOffset, size_t hOffset, size_t width,
1202  size_t height, hipMemcpyKind kind,
1203  hipStream_t stream) {
1204  return hipCUDAErrorTohipError(cudaMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset,
1205  width, height,
1206  hipMemcpyKindToCudaMemcpyKind(kind),
1207  stream));
1208 }
1209 
1210 inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
1211  const void* src, size_t spitch, size_t width,
1212  size_t height, hipMemcpyKind kind) {
1213  return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1214  height, hipMemcpyKindToCudaMemcpyKind(kind)));
1215 }
1216 
1217 inline static hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset,
1218  const void* src, size_t spitch, size_t width,
1219  size_t height, hipMemcpyKind kind,
1220  hipStream_t stream) {
1221  return hipCUDAErrorTohipError(cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch,
1222  width, height,
1223  hipMemcpyKindToCudaMemcpyKind(kind),
1224  stream));
1225 }
1226 
1227 __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
1228  size_t hOffset, const void* src,
1229  size_t count, hipMemcpyKind kind) {
1230  return hipCUDAErrorTohipError(
1231  cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1232 }
1233 
1234 __HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
1235  size_t wOffset, size_t hOffset,
1236  size_t count, hipMemcpyKind kind) {
1237  return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1238  hipMemcpyKindToCudaMemcpyKind(kind)));
1239 }
1240 
1241 inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
1242  size_t count) {
1243  return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1244 }
1245 
1246 inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
1247  size_t count) {
1248  return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1249 }
1250 
1251 inline static hipError_t hipDeviceSynchronize() {
1252  return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1253 }
1254 
1255 inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
1256  return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1257 }
1258 
1259 inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
1260  return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1261 }
1262 
1263 inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
1264  return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1265 }
1266 
1267 inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
1268  return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1269 }
1270 
1271 inline static const char* hipGetErrorString(hipError_t error) {
1272  return cudaGetErrorString(hipErrorToCudaError(error));
1273 }
1274 
1275 inline static const char* hipGetErrorName(hipError_t error) {
1276  return cudaGetErrorName(hipErrorToCudaError(error));
1277 }
1278 
1279 inline static hipError_t hipGetDeviceCount(int* count) {
1280  return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1281 }
1282 
1283 inline static hipError_t hipGetDevice(int* device) {
1284  return hipCUDAErrorTohipError(cudaGetDevice(device));
1285 }
1286 
1287 inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
1288  return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1289 }
1290 
1291 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1292  return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1293 }
1294 
1295 inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
1296  return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1297 }
1298 
1299 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1300  return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1301 }
1302 
1303 inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
1304  unsigned int flags) {
1305  return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1306 }
1307 
1308 inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
1309  return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1310 }
1311 
1312 inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
1313  return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1314 }
1315 
1316 inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
1317  hipStream_t stream __dparm(0)) {
1318  return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1319 }
1320 
1321 inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
1322  hipStream_t stream __dparm(0)) {
1323  return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1324 }
1325 
1326 inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
1327  return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1328 }
1329 
1330 inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
1331  hipStream_t stream __dparm(0)) {
1332  return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1333 }
1334 
1335 inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
1336  return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1337 }
1338 
1339 inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
1340  hipStream_t stream __dparm(0)) {
1341  return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1342 }
1343 
1344 inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
1345  return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1346 }
1347 
1348 inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
1349  return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1350 }
1351 
1352 inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
1353  return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1354 }
1355 
1356 inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
1357  return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1358 }
1359 
1360 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
1361  struct cudaDeviceProp cdprop;
1362  cudaError_t cerror;
1363  cerror = cudaGetDeviceProperties(&cdprop, device);
1364 
1365  strncpy(p_prop->name, cdprop.name, 256);
1366  p_prop->totalGlobalMem = cdprop.totalGlobalMem;
1367  p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
1368  p_prop->regsPerBlock = cdprop.regsPerBlock;
1369  p_prop->warpSize = cdprop.warpSize;
1370  p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
1371  for (int i = 0; i < 3; i++) {
1372  p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
1373  p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
1374  }
1375  p_prop->clockRate = cdprop.clockRate;
1376  p_prop->memoryClockRate = cdprop.memoryClockRate;
1377  p_prop->memoryBusWidth = cdprop.memoryBusWidth;
1378  p_prop->totalConstMem = cdprop.totalConstMem;
1379  p_prop->major = cdprop.major;
1380  p_prop->minor = cdprop.minor;
1381  p_prop->multiProcessorCount = cdprop.multiProcessorCount;
1382  p_prop->l2CacheSize = cdprop.l2CacheSize;
1383  p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
1384  p_prop->computeMode = cdprop.computeMode;
1385  p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
1386 
1387  int ccVers = p_prop->major * 100 + p_prop->minor * 10;
1388  p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
1389  p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
1390  p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
1391  p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
1392  p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
1393  p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
1394  p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
1395  p_prop->arch.hasDoubles = (ccVers >= 130);
1396  p_prop->arch.hasWarpVote = (ccVers >= 120);
1397  p_prop->arch.hasWarpBallot = (ccVers >= 200);
1398  p_prop->arch.hasWarpShuffle = (ccVers >= 300);
1399  p_prop->arch.hasFunnelShift = (ccVers >= 350);
1400  p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
1401  p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
1402  p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
1403  p_prop->arch.has3dGrid = (ccVers >= 200);
1404  p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
1405 
1406  p_prop->concurrentKernels = cdprop.concurrentKernels;
1407  p_prop->pciDomainID = cdprop.pciDomainID;
1408  p_prop->pciBusID = cdprop.pciBusID;
1409  p_prop->pciDeviceID = cdprop.pciDeviceID;
1410  p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
1411  p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
1412  p_prop->canMapHostMemory = cdprop.canMapHostMemory;
1413  p_prop->gcnArch = 0; // Not a GCN arch
1414  p_prop->integrated = cdprop.integrated;
1415  p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
1416  p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
1421 
1422  p_prop->maxTexture1D = cdprop.maxTexture1D;
1423  p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
1424  p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
1425  p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
1426  p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
1427  p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
1428 
1429  p_prop->memPitch = cdprop.memPitch;
1430  p_prop->textureAlignment = cdprop.textureAlignment;
1431  p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
1432  p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
1433  p_prop->ECCEnabled = cdprop.ECCEnabled;
1434  p_prop->tccDriver = cdprop.tccDriver;
1435 
1436  return hipCUDAErrorTohipError(cerror);
1437 }
1438 
1439 inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
1440  enum cudaDeviceAttr cdattr;
1441  cudaError_t cerror;
1442 
1443  switch (attr) {
1445  cdattr = cudaDevAttrMaxThreadsPerBlock;
1446  break;
1448  cdattr = cudaDevAttrMaxBlockDimX;
1449  break;
1451  cdattr = cudaDevAttrMaxBlockDimY;
1452  break;
1454  cdattr = cudaDevAttrMaxBlockDimZ;
1455  break;
1457  cdattr = cudaDevAttrMaxGridDimX;
1458  break;
1460  cdattr = cudaDevAttrMaxGridDimY;
1461  break;
1463  cdattr = cudaDevAttrMaxGridDimZ;
1464  break;
1466  cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1467  break;
1469  cdattr = cudaDevAttrTotalConstantMemory;
1470  break;
1472  cdattr = cudaDevAttrWarpSize;
1473  break;
1475  cdattr = cudaDevAttrMaxRegistersPerBlock;
1476  break;
1478  cdattr = cudaDevAttrClockRate;
1479  break;
1481  cdattr = cudaDevAttrMemoryClockRate;
1482  break;
1484  cdattr = cudaDevAttrGlobalMemoryBusWidth;
1485  break;
1487  cdattr = cudaDevAttrMultiProcessorCount;
1488  break;
1490  cdattr = cudaDevAttrComputeMode;
1491  break;
1493  cdattr = cudaDevAttrL2CacheSize;
1494  break;
1496  cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1497  break;
1499  cdattr = cudaDevAttrComputeCapabilityMajor;
1500  break;
1502  cdattr = cudaDevAttrComputeCapabilityMinor;
1503  break;
1505  cdattr = cudaDevAttrConcurrentKernels;
1506  break;
1508  cdattr = cudaDevAttrPciBusId;
1509  break;
1511  cdattr = cudaDevAttrPciDeviceId;
1512  break;
1514  cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1515  break;
1517  cdattr = cudaDevAttrIsMultiGpuBoard;
1518  break;
1520  cdattr = cudaDevAttrIntegrated;
1521  break;
1523  cdattr = cudaDevAttrMaxTexture1DWidth;
1524  break;
1526  cdattr = cudaDevAttrMaxTexture2DWidth;
1527  break;
1529  cdattr = cudaDevAttrMaxTexture2DHeight;
1530  break;
1532  cdattr = cudaDevAttrMaxTexture3DWidth;
1533  break;
1535  cdattr = cudaDevAttrMaxTexture3DHeight;
1536  break;
1538  cdattr = cudaDevAttrMaxTexture3DDepth;
1539  break;
1541  cdattr = cudaDevAttrMaxPitch;
1542  break;
1544  cdattr = cudaDevAttrTextureAlignment;
1545  break;
1547  cdattr = cudaDevAttrTexturePitchAlignment;
1548  break;
1550  cdattr = cudaDevAttrKernelExecTimeout;
1551  break;
1553  cdattr = cudaDevAttrCanMapHostMemory;
1554  break;
1556  cdattr = cudaDevAttrEccEnabled;
1557  break;
1559  cdattr = cudaDevAttrCooperativeLaunch;
1560  break;
1562  cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1563  break;
1564  default:
1565  return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1566  }
1567 
1568  cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1569 
1570  return hipCUDAErrorTohipError(cerror);
1571 }
1572 
1573 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1574  const void* func,
1575  int blockSize,
1576  size_t dynamicSMemSize) {
1577  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1578  blockSize, dynamicSMemSize));
1579 }
1580 
1581 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1582  const void* func,
1583  int blockSize,
1584  size_t dynamicSMemSize,
1585  unsigned int flags) {
1586  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1587  blockSize, dynamicSMemSize, flags));
1588 }
1589 
1590 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1591  hipFunction_t f,
1592  int blockSize,
1593  size_t dynamicSMemSize ){
1594  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1595  blockSize, dynamicSMemSize));
1596 }
1597 
1598 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1599  hipFunction_t f,
1600  int blockSize,
1601  size_t dynamicSMemSize,
1602  unsigned int flags ) {
1603  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1604  blockSize, dynamicSMemSize, flags));
1605 }
1606 
1607 //TODO - Match CUoccupancyB2DSize
1608 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
1609  hipFunction_t f, size_t dynSharedMemPerBlk,
1610  int blockSizeLimit){
1611  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1612  dynSharedMemPerBlk, blockSizeLimit));
1613 }
1614 
1615 //TODO - Match CUoccupancyB2DSize
1616 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
1617  hipFunction_t f, size_t dynSharedMemPerBlk,
1618  int blockSizeLimit, unsigned int flags){
1619  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1620  dynSharedMemPerBlk, blockSizeLimit, flags));
1621 }
1622 
1623 inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
1624  struct cudaPointerAttributes cPA;
1625  hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1626  if (err == hipSuccess) {
1627 #if (CUDART_VERSION >= 11000)
1628  auto memType = cPA.type;
1629 #else
1630  unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
1631 #endif
1632  switch (memType) {
1633  case cudaMemoryTypeDevice:
1634  attributes->memoryType = hipMemoryTypeDevice;
1635  break;
1636  case cudaMemoryTypeHost:
1637  attributes->memoryType = hipMemoryTypeHost;
1638  break;
1639  default:
1640  return hipErrorUnknown;
1641  }
1642  attributes->device = cPA.device;
1643  attributes->devicePointer = cPA.devicePointer;
1644  attributes->hostPointer = cPA.hostPointer;
1645  attributes->isManaged = 0;
1646  attributes->allocationFlags = 0;
1647  }
1648  return err;
1649 }
1650 
1651 inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
1652  return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1653 }
1654 
1655 inline static hipError_t hipEventCreate(hipEvent_t* event) {
1656  return hipCUDAErrorTohipError(cudaEventCreate(event));
1657 }
1658 
1659 inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1660  return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1661 }
1662 
1663 inline static hipError_t hipEventSynchronize(hipEvent_t event) {
1664  return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1665 }
1666 
1667 inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
1668  return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1669 }
1670 
1671 inline static hipError_t hipEventDestroy(hipEvent_t event) {
1672  return hipCUDAErrorTohipError(cudaEventDestroy(event));
1673 }
1674 
1675 inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
1676  return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1677 }
1678 
1679 inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
1680  return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1681 }
1682 
1683 inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
1684  return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1685 }
1686 
1687 inline static hipError_t hipStreamCreate(hipStream_t* stream) {
1688  return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1689 }
1690 
1691 inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
1692  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1693 }
1694 
1695 inline static hipError_t hipStreamDestroy(hipStream_t stream) {
1696  return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1697 }
1698 
1699 inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
1700  return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1701 }
1702 
1703 inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
1704  return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1705 }
1706 
1707 inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
1708  unsigned int flags) {
1709  return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1710 }
1711 
1712 inline static hipError_t hipStreamQuery(hipStream_t stream) {
1713  return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1714 }
1715 
1716 inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
1717  void* userData, unsigned int flags) {
1718  return hipCUDAErrorTohipError(
1719  cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1720 }
1721 
1722 inline static hipError_t hipDriverGetVersion(int* driverVersion) {
1723  cudaError_t err = cudaDriverGetVersion(driverVersion);
1724 
1725  // Override driver version to match version reported on HCC side.
1726  *driverVersion = 4;
1727 
1728  return hipCUDAErrorTohipError(err);
1729 }
1730 
1731 inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
1732  return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1733 }
1734 
1735 inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
1736  return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1737 }
1738 
1739 inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
1740  return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1741 }
1742 
1743 inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
1744  return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1745 }
1746 
1747 inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
1748  return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1749 }
1750 
1751 inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
1752  return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1753 }
1754 
1755 inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
1756  int* active) {
1757  return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1758 }
1759 
1760 inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
1761  return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1762 }
1763 
1764 inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
1765  return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1766 }
1767 
1768 inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
1769  return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1770 }
1771 
1772 inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
1773  return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1774 }
1775 
1776 inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
1777  hipDeviceptr_t dptr) {
1778  return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1779 }
1780 
1781 inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
1782  size_t count) {
1783  return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1784 }
1785 
1786 inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
1787  int srcDevice, size_t count,
1788  hipStream_t stream __dparm(0)) {
1789  return hipCUDAErrorTohipError(
1790  cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1791 }
1792 
1793 // Profile APIs:
1794 inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
1795 
1796 inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
1797 
1798 inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
1799  return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1800 }
1801 
1802 inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
1803  return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1804 }
1805 
1806 inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
1807  return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1808 }
1809 
1810 inline static hipError_t hipEventQuery(hipEvent_t event) {
1811  return hipCUDAErrorTohipError(cudaEventQuery(event));
1812 }
1813 
1814 inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
1815  return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1816 }
1817 
1818 inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
1819  return hipCUResultTohipError(cuCtxDestroy(ctx));
1820 }
1821 
1822 inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
1823  return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1824 }
1825 
1826 inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
1827  return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1828 }
1829 
1830 inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
1831  return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1832 }
1833 
1834 inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
1835  return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1836 }
1837 
1838 inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
1839  return hipCUResultTohipError(cuCtxGetDevice(device));
1840 }
1841 
1842 inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
1843  return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
1844 }
1845 
1846 inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
1847  return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1848 }
1849 
1850 inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
1851  return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1852 }
1853 
1854 inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
1855  return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1856 }
1857 
1858 inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
1859  return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1860 }
1861 
1862 inline static hipError_t hipCtxSynchronize(void) {
1863  return hipCUResultTohipError(cuCtxSynchronize());
1864 }
1865 
1866 inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
1867  return hipCUResultTohipError(cuCtxGetFlags(flags));
1868 }
1869 
1870 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1871  return hipCUResultTohipError(cuCtxDetach(ctx));
1872 }
1873 
1874 inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
1875  return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1876 }
1877 
1878 inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
1879  return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1880 }
1881 
1882 inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
1883  return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1884 }
1885 
1886 inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
1887  int srcDevice, int dstDevice) {
1888  return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1889 }
1890 
1891 inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
1892  return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1893 }
1894 
1895 inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
1896  return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1897 }
1898 
1899 inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
1900  return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1901 }
1902 
1903 inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
1904  return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1905 }
1906 
1907 inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
1908  return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1909 }
1910 
1911 inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
1912  return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1913 }
1914 
1915 inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
1916  return hipCUResultTohipError(cuModuleLoad(module, fname));
1917 }
1918 
1919 inline static hipError_t hipModuleUnload(hipModule_t hmod) {
1920  return hipCUResultTohipError(cuModuleUnload(hmod));
1921 }
1922 
1923 inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
1924  const char* kname) {
1925  return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
1926 }
1927 
1928 inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){
1929  hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1930 }
1931 
1932 inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
1933  return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1934 }
1935 
1936 inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1937  return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1938 }
1939 
1940 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
1941  const char* name) {
1942  return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1943 }
1944 
1945 inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
1946  return hipCUResultTohipError(cuModuleLoadData(module, image));
1947 }
1948 
1949 inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
1950  unsigned int numOptions, hipJitOption* options,
1951  void** optionValues) {
1952  return hipCUResultTohipError(
1953  cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1954 }
1955 
1956 inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
1957  dim3 dimBlocks, void** args, size_t sharedMemBytes,
1958  hipStream_t stream)
1959 {
1960  return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1961 }
1962 
1963 inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
1964  unsigned int gridDimY, unsigned int gridDimZ,
1965  unsigned int blockDimX, unsigned int blockDimY,
1966  unsigned int blockDimZ, unsigned int sharedMemBytes,
1967  hipStream_t stream, void** kernelParams,
1968  void** extra) {
1969  return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1970  blockDimY, blockDimZ, sharedMemBytes, stream,
1971  kernelParams, extra));
1972 }
1973 
1974 inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
1975  return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1976 }
1977 
1978 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
1979  struct textureReference* tex,
1980  const void* devPtr,
1981  const hipChannelFormatDesc* desc,
1982  size_t size __dparm(UINT_MAX)) {
1983  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1984 }
1985 
1986 __HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
1987  size_t* offset, struct textureReference* tex, const void* devPtr,
1988  const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
1989  return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1990 }
1991 
1992 inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
1993  hipChannelFormatKind f) {
1994  return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1995 }
1996 
1997 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1998  const hipResourceDesc* pResDesc,
1999  const hipTextureDesc* pTexDesc,
2000  const hipResourceViewDesc* pResViewDesc) {
2001  return hipCUDAErrorTohipError(
2002  cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
2003 }
2004 
2005 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
2006  return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
2007 }
2008 
2009 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
2010  const hipResourceDesc* pResDesc) {
2011  return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
2012 }
2013 
2014 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
2015  return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
2016 }
2017 
2018 inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
2019  hipTextureObject_t textureObject) {
2020  return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
2021 }
2022 
2023 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
2024  size_t* offset, const struct textureReference* texref) {
2025  return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
2026 }
2027 
2028 inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
2029 {
2030  return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
2031 }
2032 
2033 inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
2034  void** kernelParams, unsigned int sharedMemBytes,
2035  hipStream_t stream) {
2036  return hipCUDAErrorTohipError(
2037  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2038 }
2039 
2040 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
2041  int numDevices, unsigned int flags) {
2042  return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
2043 }
2044 
2045 #ifdef __cplusplus
2046 }
2047 #endif
2048 
2049 #ifdef __CUDACC__
2050 
2051 template<class T>
2052 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
2053  T func,
2054  int blockSize,
2055  size_t dynamicSMemSize) {
2056  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
2057  blockSize, dynamicSMemSize));
2058 }
2059 
2060 template <class T>
2061 inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
2062  size_t dynamicSMemSize = 0,
2063  int blockSizeLimit = 0) {
2064  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
2065  dynamicSMemSize, blockSizeLimit));
2066 }
2067 
2068 template <class T>
2069 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
2070  size_t dynamicSMemSize = 0,
2071  int blockSizeLimit = 0, unsigned int flags = 0) {
2072  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
2073  dynamicSMemSize, blockSizeLimit, flags));
2074 }
2075 
2076 template <class T>
2077 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
2078  int blockSize, size_t dynamicSMemSize,unsigned int flags) {
2079  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
2080  blockSize, dynamicSMemSize, flags));
2081 }
2082 
2083 template <class T, int dim, enum cudaTextureReadMode readMode>
2084 inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
2085  const void* devPtr, size_t size = UINT_MAX) {
2086  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
2087 }
2088 
2089 template <class T, int dim, enum cudaTextureReadMode readMode>
2090 inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
2091  const void* devPtr, const hipChannelFormatDesc& desc,
2092  size_t size = UINT_MAX) {
2093  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
2094 }
2095 
2096 template <class T, int dim, enum cudaTextureReadMode readMode>
2097 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
2098  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
2099 }
2100 
2101 template <class T, int dim, enum cudaTextureReadMode readMode>
2102 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
2103  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
2104 }
2105 
2106 template <class T, int dim, enum cudaTextureReadMode readMode>
2107 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
2108  struct texture<T, dim, readMode>& tex, hipArray_const_t array,
2109  const hipChannelFormatDesc& desc) {
2110  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
2111 }
2112 
2113 template <class T, int dim, enum cudaTextureReadMode readMode>
2114 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
2115  struct texture<T, dim, readMode>* tex, hipArray_const_t array,
2116  const hipChannelFormatDesc* desc) {
2117  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
2118 }
2119 
2120 template <class T, int dim, enum cudaTextureReadMode readMode>
2121 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
2122  struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
2123  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
2124 }
2125 
2126 template <class T>
2127 inline static hipChannelFormatDesc hipCreateChannelDesc() {
2128  return cudaCreateChannelDesc<T>();
2129 }
2130 
2131 template <class T>
2132 inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
2133  void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
2134  return hipCUDAErrorTohipError(
2135  cudaLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2136 }
2137 
2138 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){
2139  return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2140 }
2141 
2142 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2143  return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2144 }
2145 
2146 inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){
2147  return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2148 }
2149 
2150 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){
2151  return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2152 }
2153 
2154 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){
2155  return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2156 }
2157 
2158 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){
2159  return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2160 }
2161 
2162 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){
2163  return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2164 }
2165 
2166 inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){
2167  return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2168 }
2169 
2170 inline static hipError_t hipArrayDestroy(hiparray hArray){
2171  return hipCUResultTohipError(cuArrayDestroy(hArray));
2172 }
2173 
2174 inline static hipError_t hipArray3DCreate(hiparray* pHandle,
2175  const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray){
2176  return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
2177 }
2178 
2179 #endif //__CUDACC__
2180 
2181 #endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
hipFuncAttributes
Definition: hip_runtime_api.h:109
hipPointerGetAttributes
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
hipDeviceAttributeMaxPitch
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:345
hipMemset3DAsync
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipMemAdviseUnsetAccessedBy
@ hipMemAdviseUnsetAccessedBy
Definition: hip_runtime_api.h:240
hipMemcpy3D
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
hipIpcOpenMemHandle
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
hipCtxEnablePeerAccess
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
hipDeviceProp_t::regsPerBlock
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipMallocPitch
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
hipSetDevice
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
hipDeviceGetP2PAttribute
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipMemsetD16Async
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipMemcpy2DFromArrayAsync
hipError_t hipMemcpy2DFromArrayAsync(void *dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceAttributeMemoryBusWidth
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:316
hipGetErrorString
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
hipMemRangeGetAttributes
hipError_t hipMemRangeGetAttributes(void **data, size_t *data_sizes, hipMemRangeAttribute *attributes, size_t num_attributes, const void *dev_ptr, size_t count)
Query attributes of a given memory range in AMD HMM.
hipGetDeviceFlags
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipDeviceGetByPCIBusId
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
hipErrorInvalidMemcpyDirection
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:222
hipMalloc3DArray
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
hipDeviceArch_t::hasGlobalInt64Atomics
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
hipDeviceProp_t::minor
int minor
Definition: hip_runtime_api.h:100
hipDeviceAttributeMaxBlockDimX
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:300
hipErrorInvalidDevicePointer
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:221
hipChooseDevice
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
hipMemcpy2DAsync
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipMemsetD32
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
hipDeviceProp_t::texturePitchAlignment
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:130
hipDeviceAttributeMaxGridDimX
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:303
hipDeviceArch_t::hasThreadFenceSystem
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipStreamCreate
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
hipDeviceGetStreamPriorityRange
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
hipIpcEventHandle_st
Definition: hip_runtime_api.h:101
hipDeviceProp_t::maxTexture3D
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:125
hipStreamCreateWithPriority
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
hipMemAdviseUnsetReadMostly
@ hipMemAdviseUnsetReadMostly
Undo the effect of hipMemAdviseSetReadMostly.
Definition: hip_runtime_api.h:234
hipFuncCache_t
hipFuncCache_t
Definition: hip_runtime_api.h:296
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:138
hipPeekAtLastError
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipModuleGetTexRef
hipError_t hipModuleGetTexRef(textureReference **texRef, hipModule_t hmod, const char *name)
returns the handle of the texture reference with the name from the module.
hipMemcpy3DAsync
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceGetPCIBusId
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
hipHostGetFlags
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
hipErrorHostMemoryNotRegistered
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:273
hipErrorRuntimeOther
hipErrorRuntimeOther
Definition: hip_runtime_api.h:286
hipDeviceAttributeClockRate
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:314
hipMemGetAddressRange
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
hipMemRangeAttributePreferredLocation
@ hipMemRangeAttributePreferredLocation
The preferred location of the range.
Definition: hip_runtime_api.h:252
hipSurfaceObject_t
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipStreamWaitEvent
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
hipDeviceAttributeMaxGridDimZ
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:305
hipGetDevice
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
hipMallocArray
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipCtxSetCurrent
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
hipMemcpyToArray
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
HIP_MEMCPY3D
Definition: driver_types.h:398
hipMemoryTypeDevice
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:158
hipDeviceAttributeMaxRegistersPerBlock
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:310
hipMemcpyDtoDAsync
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
hipErrorNoDevice
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:227
hipDeviceAttributeComputeCapabilityMinor
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:324
hipMemcpy2DFromArray
hipError_t hipMemcpy2DFromArray(void *dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipDeviceProp_t::l2CacheSize
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDevicePrimaryCtxRelease
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
hipDeviceProp_t::textureAlignment
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:129
hipHostMalloc
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
hipDeviceAttributeKernelExecTimeout
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:348
hipDeviceAttributeL2CacheSize
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:319
hipMemRangeGetAttribute
hipError_t hipMemRangeGetAttribute(void *data, size_t data_size, hipMemRangeAttribute attribute, const void *dev_ptr, size_t count)
Query an attribute of a given memory range in AMD HMM.
hipDeviceGetName
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
hipDeviceAttributeMaxTexture3DWidth
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:338
hipDeviceArch_t::hasSurfaceFuncs
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
hipDeviceAttributeIntegrated
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:332
hipDeviceProp_t::isMultiGpuBoard
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipMemcpyParam2DAsync
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipMemAdvise
hipError_t hipMemAdvise(const void *dev_ptr, size_t count, hipMemoryAdvise advice, int device)
Advise about the usage of a given memory range to AMD HMM.
hipDeviceAttributeMaxGridDimY
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:304
hipMemoryTypeHost
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:157
hipDeviceEnablePeerAccess
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
hipCtxSetCacheConfig
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipErrorInvalidContext
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:230
hipDeviceArch_t::hasSharedInt64Atomics
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
hipDeviceProp_t::computeMode
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
hipCtxPushCurrent
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
hipDeviceAttributeIsMultiGpuBoard
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:331
hipSharedMemConfig
hipSharedMemConfig
Definition: hip_runtime_api.h:306
hipDrvMemcpy3D
hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D *pCopy)
Copies data between host and device.
hipDeviceProp_t::clockRate
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:267
hipDeviceComputeCapability
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
hipStreamCallback_t
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:1204
hipDeviceArch_t::hasDynamicParallelism
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
hipMemoryAdvise
hipMemoryAdvise
Definition: hip_runtime_api.h:231
hip_Memcpy2D
Definition: driver_types.h:95
hipDeviceProp_t::canMapHostMemory
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
hipDeviceProp_t::sharedMemPerBlock
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipModuleOccupancyMaxPotentialBlockSize
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipIpcCloseMemHandle
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
hipDevicePrimaryCtxGetState
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
hipDeviceAttributeCooperativeMultiDeviceLaunch
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:334
hipLaunchCooperativeKernelMultiDevice
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams *launchParamsList, int numDevices, unsigned int flags)
Launches kernels on multiple devices where thread blocks can cooperate and synchronize as they execut...
hipDeviceProp_t::maxThreadsPerMultiProcessor
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipDeviceSetCacheConfig
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipDeviceProp_t::major
int major
Definition: hip_runtime_api.h:97
hipDeviceAttributeMaxSharedMemoryPerBlock
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:306
hipMemcpyAtoH
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
hipGetDeviceCount
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
hipSuccess
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:204
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
hipHostUnregister
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
hipStreamGetFlags
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
hipMemsetD8Async
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceAttributeMaxThreadsPerBlock
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:299
hipMemRangeAttributeReadMostly
@ hipMemRangeAttributeReadMostly
Definition: hip_runtime_api.h:250
hipDeviceProp_t::gcnArch
int gcnArch
DEPRECATED: use gcnArchName instead.
Definition: hip_runtime_api.h:117
hipStreamSynchronize
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
hipGetErrorName
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
hipDeviceProp_t::kernelExecTimeoutEnabled
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:131
hipDeviceGet
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
hipMemcpyDtoD
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
hipDeviceProp_t::maxTexture1D
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:123
hipMemcpy3DParms
Definition: driver_types.h:387
hipDeviceAttributeMaxBlockDimZ
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:302
hipIpcGetMemHandle
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
hipMemcpyHtoD
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
hipDriverGetVersion
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
hipDeviceArch_t::hasDoubles
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:247
hipDeviceProp_t::maxThreadsPerBlock
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipCtxGetFlags
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
hipDeviceAttributeMaxBlockDimY
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:301
hipMemcpy2DToArray
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipMemAllocPitch
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
hipDeviceProp_t
Definition: hip_runtime_api.h:84
hipMemAllocHost
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:947
hipMallocHost
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:941
hipDeviceAttributeMaxTexture2DHeight
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:337
hipDeviceArch_t::hasSharedInt32Atomics
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipErrorInvalidValue
hipErrorInvalidValue
Definition: hip_runtime_api.h:205
hipDeviceProp_t::memPitch
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:128
hipMemsetD32Async
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipMemRangeAttributeAccessedBy
@ hipMemRangeAttributeAccessedBy
Definition: hip_runtime_api.h:253
hipDeviceProp_t::pciBusID
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipRuntimeGetVersion
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
hipDeviceAttributeComputeCapabilityMajor
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:323
hipLaunchCooperativeKernel
hipError_t hipLaunchCooperativeKernel(const void *f, dim3 gridDim, dim3 blockDimX, void **kernelParams, unsigned int sharedMemBytes, hipStream_t stream)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipEventQuery
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
hipDeviceAttributeMaxTexture3DDepth
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:340
hipErrorRuntimeMemory
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:284
hipDeviceAttributeMaxThreadsPerMultiProcessor
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:321
hipStreamGetPriority
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipDeviceProp_t::arch
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipCtxGetApiVersion
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
hipEventSynchronize
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
hipCtxCreate
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
hipHostFree
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
hipDeviceAttributePciBusId
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:327
hipMemsetD16
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceProp_t::tccDriver
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:133
hipDeviceGetLimit
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
hipMalloc
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
hipIpcMemHandle_st
Definition: hip_runtime_api.h:97
hipEventElapsedTime
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
hipGetLastError
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
hipInit
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
hipDeviceAttributeTexturePitchAlignment
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:347
hipDeviceAttributeWarpSize
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:309
hipDeviceArch_t::hasGlobalInt32Atomics
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
hipFuncSetCacheConfig
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
hipCtxPopCurrent
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
hipArray
Definition: driver_types.h:82
hipDeviceArch_t::hasSyncThreadsExt
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipErrorInvalidDevice
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:228
hipDeviceArch_t::hasFunnelShift
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipDeviceAttributeMaxTexture3DHeight
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:339
hipDeviceAttributeMemoryClockRate
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:315
hipErrorNotReady
hipErrorNotReady
Definition: hip_runtime_api.h:258
hipHostGetDevicePointer
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipMemGetInfo
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
hipEventDestroy
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
hipDeviceSetSharedMemConfig
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
hipDeviceReset
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
hipDeviceProp_t::maxGridSize
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
hipDeviceAttributeComputeMode
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:318
hipSetDeviceFlags
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipCtxGetCurrent
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
hipDeviceAttributePciDeviceId
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:328
hipFuncGetAttributes
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
HIP_ARRAY3D_DESCRIPTOR
Definition: driver_types.h:73
hipFuncGetAttribute
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
hipDeviceProp_t::maxSharedMemoryPerMultiProcessor
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
hipDeviceProp_t::clockInstructionRate
int clockInstructionRate
Definition: hip_runtime_api.h:107
dim3
Definition: hip_runtime_api.h:318
hipStreamQuery
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
hipStreamAttachMemAsync
hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t *dev_ptr, size_t length __dparm(0), unsigned int flags __dparm(hipMemAttachSingle))
Attach memory to a stream asynchronously in AMD HMM.
hipDevicePrimaryCtxSetFlags
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
hipPointerAttribute_t
Definition: hip_runtime_api.h:169
hipDeviceAttributeTotalConstantMemory
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:308
hipFree
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
hipDeviceArch_t::hasWarpShuffle
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
hipArrayDefault
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:203
hipDevicePrimaryCtxRetain
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
hipMemRangeAttributeLastPrefetchLocation
@ hipMemRangeAttributeLastPrefetchLocation
The last location to which the range was prefetched.
Definition: hip_runtime_api.h:255
hipCtxSynchronize
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
hipFreeHost
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:1031
hipMemcpyHtoA
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
hipDeviceProp_t::memoryBusWidth
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipStreamAddCallback
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
hipDeviceArch_t::hasWarpVote
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
hipDeviceProp_t::name
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipMemcpyDtoHAsync
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
hipDeviceArch_t::hasGlobalFloatAtomicExch
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
hipDeviceProp_t::concurrentKernels
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
hipDeviceArch_t::hasWarpBallot
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
hipDeviceProp_t::totalGlobalMem
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipDeviceAttributeTextureAlignment
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:346
hipEventRecord
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
hipDrvMemcpy3DAsync
hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D *pCopy, hipStream_t stream)
Copies data between host and device asynchronously.
hipMemcpy2D
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipExtent
Definition: driver_types.h:374
hipPitchedPtr
Definition: driver_types.h:367
hipMemAdviseSetReadMostly
@ hipMemAdviseSetReadMostly
Definition: hip_runtime_api.h:232
hipMemset2D
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
hipMemset3D
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipStreamCreateWithFlags
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
hipDeviceGetAttribute
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
hipMemcpyFromArray
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
hipDeviceAttributeCanMapHostMemory
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:349
hipDeviceProp_t::maxThreadsDim
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
hipMemcpyPeerAsync
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipMemcpyHtoDAsync
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
hipDeviceProp_t::cooperativeMultiDeviceLaunch
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipMemcpyDtoH
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
hipDeviceArch_t::has3dGrid
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipDeviceGetCacheConfig
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipMemcpyPeer
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
hipDeviceAttributeMaxTexture1DWidth
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:335
hipDeviceAttributeCooperativeLaunch
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:333
hipModuleLoadDataEx
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
hipDeviceAttributeMultiprocessorCount
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:317
hipDeviceProp_t::pciDeviceID
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipGetDeviceProperties
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
hipMemcpy
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
hipDeviceProp_t::memoryClockRate
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipEventCreateWithFlags
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
hipErrorCooperativeLaunchTooLarge
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:277
hipDeviceProp_t::warpSize
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipDeviceTotalMem
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
hipFreeArray
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
hipMallocManaged
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipErrorAssert
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:270
textureReference
Definition: texture_types.h:74
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:134
hipCtxGetSharedMemConfig
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:136
hipDeviceCanAccessPeer
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
hipModuleGetFunction
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
hipDeviceArch_t::hasFloatAtomicAdd
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipMemAdviseUnsetPreferredLocation
@ hipMemAdviseUnsetPreferredLocation
Clear the preferred location for the data.
Definition: hip_runtime_api.h:237
hipMemPrefetchAsync
hipError_t hipMemPrefetchAsync(const void *dev_ptr, size_t count, int device, hipStream_t stream __dparm(0))
Prefetches memory to the specified destination device using AMD HMM.
hipCtxDisablePeerAccess
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
hipDeviceProp_t::cooperativeLaunch
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
hipMemAdviseSetPreferredLocation
@ hipMemAdviseSetPreferredLocation
Definition: hip_runtime_api.h:235
hipDeviceArch_t::hasSharedFloatAtomicExch
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
hipTextureDesc
Definition: texture_types.h:95
hipResourceViewDesc
Definition: driver_types.h:327
hipDeviceProp_t::multiProcessorCount
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
hipCtxSetSharedMemConfig
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
hipDeviceProp_t::integrated
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipMemsetD8
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipMemset2DAsync
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipDeviceProp_t::ECCEnabled
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:132
HIP_ARRAY_DESCRIPTOR
Definition: driver_types.h:66
hipCtxGetDevice
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
hipDeviceProp_t::totalConstMem
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipDeviceProp_t::maxTexture2D
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:124
hipLaunchParams_t
Definition: hip_runtime_api.h:327
hipErrorHostMemoryAlreadyRegistered
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:271
hipFuncAttribute
hipFuncAttribute
Definition: hip_runtime_api.h:287
hipDeviceAttribute_t
hipDeviceAttribute_t
Definition: hip_runtime_api.h:298
hipFuncSetSharedMemConfig
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
hipResourceDesc
Definition: driver_types.h:266
hipErrorLaunchFailure
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:275
hipDeviceSynchronize
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
hipCtxGetCacheConfig
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipCtxDestroy
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
hipModuleLaunchKernel
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipDeviceAttributeConcurrentKernels
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:325
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:140
hipProfilerStart
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
hipDeviceGetSharedMemConfig
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
hipErrorNotSupported
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:281
hipMemcpyAsync
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copy data from src to dst asynchronously.
hipErrorLaunchOutOfResources
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:263
hipOccupancyMaxPotentialBlockSize
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipStreamDestroy
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
hipHostRegister
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
hipModuleLoad
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
hipProfilerStop
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
hipEventCreate
hipError_t hipEventCreate(hipEvent_t *event)
hipMemsetAsync
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipDeviceAttributeMaxTexture2DWidth
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:336
hipDeviceProp_t::pciDomainID
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipModuleLoadData
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
hipMemAttachSingle
#define hipMemAttachSingle
the associated device
Definition: hip_runtime_api.h:174
hipMemcpyParam2D
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
hipHostAlloc
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:953
hipMemset
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceDisablePeerAccess
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipDeviceAttributeEccEnabled
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:350
hipFuncSetAttribute
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:329
hipDevicePrimaryCtxReset
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
hipChannelFormatDesc
Definition: driver_types.h:42
hipModuleUnload
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
hipMemAdviseSetAccessedBy
@ hipMemAdviseSetAccessedBy
Definition: hip_runtime_api.h:238
hipMemRangeAttribute
hipMemRangeAttribute
Definition: hip_runtime_api.h:249
hipErrorPeerAccessAlreadyEnabled
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:265