HIP: Heterogenous-computing Interface for Portability
|
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
26 #include <cuda_runtime_api.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
36 #define __dparm(x) = x
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
49 #define __HIP_DEPRECATED
58 typedef enum hipMemcpyKind {
60 hipMemcpyHostToDevice,
61 hipMemcpyDeviceToHost,
62 hipMemcpyDeviceToDevice,
67 #define hipDataType cudaDataType
68 #define HIP_R_16F CUDA_R_16F
69 #define HIP_R_32F CUDA_R_32F
70 #define HIP_R_64F CUDA_R_64F
71 #define HIP_C_16F CUDA_C_16F
72 #define HIP_C_32F CUDA_C_32F
73 #define HIP_C_64F CUDA_C_64F
76 #define hipLibraryPropertyType libraryPropertyType
77 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
78 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
79 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
81 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
84 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
85 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
86 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
87 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
88 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
89 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
90 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
91 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
94 #define hipArray_Format CUarray_format
96 inline static CUarray_format hipArray_FormatToCUarray_format(
97 hipArray_Format format) {
99 case HIP_AD_FORMAT_UNSIGNED_INT8:
100 return CU_AD_FORMAT_UNSIGNED_INT8;
101 case HIP_AD_FORMAT_UNSIGNED_INT16:
102 return CU_AD_FORMAT_UNSIGNED_INT16;
103 case HIP_AD_FORMAT_UNSIGNED_INT32:
104 return CU_AD_FORMAT_UNSIGNED_INT32;
105 case HIP_AD_FORMAT_SIGNED_INT8:
106 return CU_AD_FORMAT_SIGNED_INT8;
107 case HIP_AD_FORMAT_SIGNED_INT16:
108 return CU_AD_FORMAT_SIGNED_INT16;
109 case HIP_AD_FORMAT_SIGNED_INT32:
110 return CU_AD_FORMAT_SIGNED_INT32;
111 case HIP_AD_FORMAT_HALF:
112 return CU_AD_FORMAT_HALF;
113 case HIP_AD_FORMAT_FLOAT:
114 return CU_AD_FORMAT_FLOAT;
116 return CU_AD_FORMAT_UNSIGNED_INT8;
120 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
121 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
122 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
123 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
126 #define hipAddress_mode CUaddress_mode
128 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
129 hipAddress_mode mode) {
131 case HIP_TR_ADDRESS_MODE_WRAP:
132 return CU_TR_ADDRESS_MODE_WRAP;
133 case HIP_TR_ADDRESS_MODE_CLAMP:
134 return CU_TR_ADDRESS_MODE_CLAMP;
135 case HIP_TR_ADDRESS_MODE_MIRROR:
136 return CU_TR_ADDRESS_MODE_MIRROR;
137 case HIP_TR_ADDRESS_MODE_BORDER:
138 return CU_TR_ADDRESS_MODE_BORDER;
140 return CU_TR_ADDRESS_MODE_WRAP;
144 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
145 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
148 #define hipFilter_mode CUfilter_mode
150 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
151 hipFilter_mode mode) {
153 case HIP_TR_FILTER_MODE_POINT:
154 return CU_TR_FILTER_MODE_POINT;
155 case HIP_TR_FILTER_MODE_LINEAR:
156 return CU_TR_FILTER_MODE_LINEAR;
158 return CU_TR_FILTER_MODE_POINT;
163 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
164 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
165 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
166 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
169 #define hipResourcetype CUresourcetype
171 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
172 hipResourcetype resType) {
174 case HIP_RESOURCE_TYPE_ARRAY:
175 return CU_RESOURCE_TYPE_ARRAY;
176 case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
177 return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
178 case HIP_RESOURCE_TYPE_LINEAR:
179 return CU_RESOURCE_TYPE_LINEAR;
180 case HIP_RESOURCE_TYPE_PITCH2D:
181 return CU_RESOURCE_TYPE_PITCH2D;
183 return CU_RESOURCE_TYPE_ARRAY;
187 #define hipTexRef CUtexref
188 #define hiparray CUarray
191 typedef enum cudaTextureAddressMode hipTextureAddressMode;
192 #define hipAddressModeWrap cudaAddressModeWrap
193 #define hipAddressModeClamp cudaAddressModeClamp
194 #define hipAddressModeMirror cudaAddressModeMirror
195 #define hipAddressModeBorder cudaAddressModeBorder
198 typedef enum cudaTextureFilterMode hipTextureFilterMode;
199 #define hipFilterModePoint cudaFilterModePoint
200 #define hipFilterModeLinear cudaFilterModeLinear
203 typedef enum cudaTextureReadMode hipTextureReadMode;
204 #define hipReadModeElementType cudaReadModeElementType
205 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
208 typedef enum cudaChannelFormatKind hipChannelFormatKind;
209 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
210 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
211 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
212 #define hipChannelFormatKindNone cudaChannelFormatKindNone
214 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
215 #define hipBoundaryModeZero cudaBoundaryModeZero
216 #define hipBoundaryModeTrap cudaBoundaryModeTrap
217 #define hipBoundaryModeClamp cudaBoundaryModeClamp
220 #define hipFuncCachePreferNone cudaFuncCachePreferNone
221 #define hipFuncCachePreferShared cudaFuncCachePreferShared
222 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
223 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
226 #define hipResourceType cudaResourceType
227 #define hipResourceTypeArray cudaResourceTypeArray
228 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
229 #define hipResourceTypeLinear cudaResourceTypeLinear
230 #define hipResourceTypePitch2D cudaResourceTypePitch2D
236 #define hipEventDefault cudaEventDefault
237 #define hipEventBlockingSync cudaEventBlockingSync
238 #define hipEventDisableTiming cudaEventDisableTiming
239 #define hipEventInterprocess cudaEventInterprocess
240 #define hipEventReleaseToDevice 0
241 #define hipEventReleaseToSystem 0
244 #define hipHostMallocDefault cudaHostAllocDefault
245 #define hipHostMallocPortable cudaHostAllocPortable
246 #define hipHostMallocMapped cudaHostAllocMapped
247 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
248 #define hipHostMallocCoherent 0x0
249 #define hipHostMallocNonCoherent 0x0
251 #define hipMemAttachGlobal cudaMemAttachGlobal
252 #define hipMemAttachHost cudaMemAttachHost
254 #define hipHostRegisterDefault cudaHostRegisterDefault
255 #define hipHostRegisterPortable cudaHostRegisterPortable
256 #define hipHostRegisterMapped cudaHostRegisterMapped
257 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
259 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
260 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
261 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
262 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
263 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
265 #define hipOccupancyDefault cudaOccupancyDefault
267 #define hipCooperativeLaunchMultiDeviceNoPreSync \
268 cudaCooperativeLaunchMultiDeviceNoPreSync
269 #define hipCooperativeLaunchMultiDeviceNoPostSync \
270 cudaCooperativeLaunchMultiDeviceNoPostSync
274 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
275 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
276 #define hipJitOptionWallTime CU_JIT_WALL_TIME
277 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
278 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
279 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
280 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
281 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
282 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
283 #define hipJitOptionTarget CU_JIT_TARGET
284 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
285 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
286 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
287 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
288 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
289 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
290 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
291 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
297 typedef enum cudaLimit hipLimit_t;
302 typedef CUfunc_cache hipFuncCache;
303 typedef CUjit_option hipJitOption;
304 typedef CUdevice hipDevice_t;
305 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
306 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
307 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
308 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
309 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
313 typedef CUdeviceptr hipDeviceptr_t;
319 #define hipFunction_attribute CUfunction_attribute
320 #define hip_Memcpy2D CUDA_MEMCPY2D
321 #define hipMemcpy3DParms cudaMemcpy3DParms
322 #define hipArrayDefault cudaArrayDefault
323 #define hipArrayLayered cudaArrayLayered
324 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
325 #define hipArrayCubemap cudaArrayCubemap
326 #define hipArrayTextureGather cudaArrayTextureGather
328 typedef cudaTextureObject_t hipTextureObject_t;
330 #define hipTextureType1D cudaTextureType1D
331 #define hipTextureType1DLayered cudaTextureType1DLayered
332 #define hipTextureType2D cudaTextureType2D
333 #define hipTextureType2DLayered cudaTextureType2DLayered
334 #define hipTextureType3D cudaTextureType3D
335 #define hipDeviceMapHost cudaDeviceMapHost
339 #define make_hipExtent make_cudaExtent
340 #define make_hipPos make_cudaPos
341 #define make_hipPitchedPtr make_cudaPitchedPtr
343 #define hipStreamDefault cudaStreamDefault
344 #define hipStreamNonBlocking cudaStreamNonBlocking
351 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
352 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
353 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
356 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
357 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
358 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
359 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
360 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
361 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
362 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
363 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
364 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
365 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
366 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
368 #if CUDA_VERSION >= 9000
369 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
370 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
371 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
372 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
373 #endif // CUDA_VERSION >= 9000
375 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
379 case cudaErrorProfilerDisabled:
380 return hipErrorProfilerDisabled;
381 case cudaErrorProfilerNotInitialized:
382 return hipErrorProfilerNotInitialized;
383 case cudaErrorProfilerAlreadyStarted:
384 return hipErrorProfilerAlreadyStarted;
385 case cudaErrorProfilerAlreadyStopped:
386 return hipErrorProfilerAlreadyStopped;
387 case cudaErrorInsufficientDriver:
388 return hipErrorInsufficientDriver;
389 case cudaErrorUnsupportedLimit:
390 return hipErrorUnsupportedLimit;
391 case cudaErrorPeerAccessUnsupported:
392 return hipErrorPeerAccessUnsupported;
393 case cudaErrorInvalidGraphicsContext:
394 return hipErrorInvalidGraphicsContext;
395 case cudaErrorSharedObjectSymbolNotFound:
396 return hipErrorSharedObjectSymbolNotFound;
397 case cudaErrorSharedObjectInitFailed:
398 return hipErrorSharedObjectInitFailed;
399 case cudaErrorOperatingSystem:
400 return hipErrorOperatingSystem;
401 case cudaErrorSetOnActiveProcess:
402 return hipErrorSetOnActiveProcess;
403 case cudaErrorIllegalAddress:
404 return hipErrorIllegalAddress;
405 case cudaErrorInvalidSymbol:
406 return hipErrorInvalidSymbol;
407 case cudaErrorMissingConfiguration:
408 return hipErrorMissingConfiguration;
409 case cudaErrorMemoryAllocation:
410 return hipErrorOutOfMemory;
411 case cudaErrorInitializationError:
412 return hipErrorNotInitialized;
413 case cudaErrorLaunchFailure:
415 case cudaErrorCooperativeLaunchTooLarge:
417 case cudaErrorPriorLaunchFailure:
418 return hipErrorPriorLaunchFailure;
419 case cudaErrorLaunchOutOfResources:
421 case cudaErrorInvalidDeviceFunction:
422 return hipErrorInvalidDeviceFunction;
423 case cudaErrorInvalidConfiguration:
424 return hipErrorInvalidConfiguration;
425 case cudaErrorInvalidDevice:
427 case cudaErrorInvalidValue:
429 case cudaErrorInvalidDevicePointer:
431 case cudaErrorInvalidMemcpyDirection:
433 case cudaErrorInvalidResourceHandle:
434 return hipErrorInvalidHandle;
435 case cudaErrorNotReady:
437 case cudaErrorNoDevice:
439 case cudaErrorPeerAccessAlreadyEnabled:
441 case cudaErrorPeerAccessNotEnabled:
443 case cudaErrorHostMemoryAlreadyRegistered:
445 case cudaErrorHostMemoryNotRegistered:
447 case cudaErrorMapBufferObjectFailed:
448 return hipErrorMapFailed;
449 case cudaErrorAssert:
451 case cudaErrorNotSupported:
453 case cudaErrorCudartUnloading:
454 return hipErrorDeinitialized;
455 case cudaErrorInvalidKernelImage:
456 return hipErrorInvalidImage;
457 case cudaErrorUnmapBufferObjectFailed:
458 return hipErrorUnmapFailed;
459 case cudaErrorNoKernelImageForDevice:
460 return hipErrorNoBinaryForGpu;
461 case cudaErrorECCUncorrectable:
462 return hipErrorECCNotCorrectable;
463 case cudaErrorDeviceAlreadyInUse:
464 return hipErrorContextAlreadyInUse;
465 case cudaErrorInvalidPtx:
467 case cudaErrorLaunchTimeout:
468 return hipErrorLaunchTimeOut;
469 #if CUDA_VERSION >= 10010
470 case cudaErrorInvalidSource:
471 return hipErrorInvalidSource;
472 case cudaErrorFileNotFound:
473 return hipErrorFileNotFound;
474 case cudaErrorSymbolNotFound:
475 return hipErrorNotFound;
476 case cudaErrorArrayIsMapped:
477 return hipErrorArrayIsMapped;
478 case cudaErrorNotMappedAsPointer:
479 return hipErrorNotMappedAsPointer;
480 case cudaErrorNotMappedAsArray:
481 return hipErrorNotMappedAsArray;
482 case cudaErrorNotMapped:
483 return hipErrorNotMapped;
484 case cudaErrorAlreadyAcquired:
485 return hipErrorAlreadyAcquired;
486 case cudaErrorAlreadyMapped:
487 return hipErrorAlreadyMapped;
489 #if CUDA_VERSION >= 10020
490 case cudaErrorDeviceUninitialized:
493 case cudaErrorUnknown:
495 return hipErrorUnknown;
499 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
503 case CUDA_ERROR_OUT_OF_MEMORY:
504 return hipErrorOutOfMemory;
505 case CUDA_ERROR_INVALID_VALUE:
507 case CUDA_ERROR_INVALID_DEVICE:
509 case CUDA_ERROR_DEINITIALIZED:
510 return hipErrorDeinitialized;
511 case CUDA_ERROR_NO_DEVICE:
513 case CUDA_ERROR_INVALID_CONTEXT:
515 case CUDA_ERROR_NOT_INITIALIZED:
516 return hipErrorNotInitialized;
517 case CUDA_ERROR_INVALID_HANDLE:
518 return hipErrorInvalidHandle;
519 case CUDA_ERROR_MAP_FAILED:
520 return hipErrorMapFailed;
521 case CUDA_ERROR_PROFILER_DISABLED:
522 return hipErrorProfilerDisabled;
523 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
524 return hipErrorProfilerNotInitialized;
525 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
526 return hipErrorProfilerAlreadyStarted;
527 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
528 return hipErrorProfilerAlreadyStopped;
529 case CUDA_ERROR_INVALID_IMAGE:
530 return hipErrorInvalidImage;
531 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
532 return hipErrorContextAlreadyCurrent;
533 case CUDA_ERROR_UNMAP_FAILED:
534 return hipErrorUnmapFailed;
535 case CUDA_ERROR_ARRAY_IS_MAPPED:
536 return hipErrorArrayIsMapped;
537 case CUDA_ERROR_ALREADY_MAPPED:
538 return hipErrorAlreadyMapped;
539 case CUDA_ERROR_NO_BINARY_FOR_GPU:
540 return hipErrorNoBinaryForGpu;
541 case CUDA_ERROR_ALREADY_ACQUIRED:
542 return hipErrorAlreadyAcquired;
543 case CUDA_ERROR_NOT_MAPPED:
544 return hipErrorNotMapped;
545 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
546 return hipErrorNotMappedAsArray;
547 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
548 return hipErrorNotMappedAsPointer;
549 case CUDA_ERROR_ECC_UNCORRECTABLE:
550 return hipErrorECCNotCorrectable;
551 case CUDA_ERROR_UNSUPPORTED_LIMIT:
552 return hipErrorUnsupportedLimit;
553 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
554 return hipErrorContextAlreadyInUse;
555 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
556 return hipErrorPeerAccessUnsupported;
557 case CUDA_ERROR_INVALID_PTX:
559 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
560 return hipErrorInvalidGraphicsContext;
561 case CUDA_ERROR_INVALID_SOURCE:
562 return hipErrorInvalidSource;
563 case CUDA_ERROR_FILE_NOT_FOUND:
564 return hipErrorFileNotFound;
565 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
566 return hipErrorSharedObjectSymbolNotFound;
567 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
568 return hipErrorSharedObjectInitFailed;
569 case CUDA_ERROR_OPERATING_SYSTEM:
570 return hipErrorOperatingSystem;
571 case CUDA_ERROR_NOT_FOUND:
572 return hipErrorNotFound;
573 case CUDA_ERROR_NOT_READY:
575 case CUDA_ERROR_ILLEGAL_ADDRESS:
576 return hipErrorIllegalAddress;
577 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
579 case CUDA_ERROR_LAUNCH_TIMEOUT:
580 return hipErrorLaunchTimeOut;
581 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
583 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
585 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
586 return hipErrorSetOnActiveProcess;
587 case CUDA_ERROR_ASSERT:
589 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
591 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
593 case CUDA_ERROR_LAUNCH_FAILED:
595 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
597 case CUDA_ERROR_NOT_SUPPORTED:
599 case CUDA_ERROR_UNKNOWN:
601 return hipErrorUnknown;
605 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
609 case hipErrorOutOfMemory:
610 return cudaErrorMemoryAllocation;
611 case hipErrorProfilerDisabled:
612 return cudaErrorProfilerDisabled;
613 case hipErrorProfilerNotInitialized:
614 return cudaErrorProfilerNotInitialized;
615 case hipErrorProfilerAlreadyStarted:
616 return cudaErrorProfilerAlreadyStarted;
617 case hipErrorProfilerAlreadyStopped:
618 return cudaErrorProfilerAlreadyStopped;
619 case hipErrorInvalidConfiguration:
620 return cudaErrorInvalidConfiguration;
622 return cudaErrorLaunchOutOfResources;
624 return cudaErrorInvalidValue;
625 case hipErrorInvalidHandle:
626 return cudaErrorInvalidResourceHandle;
628 return cudaErrorInvalidDevice;
630 return cudaErrorInvalidMemcpyDirection;
632 return cudaErrorInvalidDevicePointer;
633 case hipErrorNotInitialized:
634 return cudaErrorInitializationError;
636 return cudaErrorNoDevice;
638 return cudaErrorNotReady;
640 return cudaErrorPeerAccessNotEnabled;
642 return cudaErrorPeerAccessAlreadyEnabled;
644 return cudaErrorHostMemoryAlreadyRegistered;
646 return cudaErrorHostMemoryNotRegistered;
647 case hipErrorDeinitialized:
648 return cudaErrorCudartUnloading;
649 case hipErrorInvalidSymbol:
650 return cudaErrorInvalidSymbol;
651 case hipErrorInsufficientDriver:
652 return cudaErrorInsufficientDriver;
653 case hipErrorMissingConfiguration:
654 return cudaErrorMissingConfiguration;
655 case hipErrorPriorLaunchFailure:
656 return cudaErrorPriorLaunchFailure;
657 case hipErrorInvalidDeviceFunction:
658 return cudaErrorInvalidDeviceFunction;
659 case hipErrorInvalidImage:
660 return cudaErrorInvalidKernelImage;
662 #if CUDA_VERSION >= 10020
663 return cudaErrorDeviceUninitialized;
665 return cudaErrorUnknown;
667 case hipErrorMapFailed:
668 return cudaErrorMapBufferObjectFailed;
669 case hipErrorUnmapFailed:
670 return cudaErrorUnmapBufferObjectFailed;
671 case hipErrorArrayIsMapped:
672 #if CUDA_VERSION >= 10010
673 return cudaErrorArrayIsMapped;
675 return cudaErrorUnknown;
677 case hipErrorAlreadyMapped:
678 #if CUDA_VERSION >= 10010
679 return cudaErrorAlreadyMapped;
681 return cudaErrorUnknown;
683 case hipErrorNoBinaryForGpu:
684 return cudaErrorNoKernelImageForDevice;
685 case hipErrorAlreadyAcquired:
686 #if CUDA_VERSION >= 10010
687 return cudaErrorAlreadyAcquired;
689 return cudaErrorUnknown;
691 case hipErrorNotMapped:
692 #if CUDA_VERSION >= 10010
693 return cudaErrorNotMapped;
695 return cudaErrorUnknown;
697 case hipErrorNotMappedAsArray:
698 #if CUDA_VERSION >= 10010
699 return cudaErrorNotMappedAsArray;
701 return cudaErrorUnknown;
703 case hipErrorNotMappedAsPointer:
704 #if CUDA_VERSION >= 10010
705 return cudaErrorNotMappedAsPointer;
707 return cudaErrorUnknown;
709 case hipErrorECCNotCorrectable:
710 return cudaErrorECCUncorrectable;
711 case hipErrorUnsupportedLimit:
712 return cudaErrorUnsupportedLimit;
713 case hipErrorContextAlreadyInUse:
714 return cudaErrorDeviceAlreadyInUse;
715 case hipErrorPeerAccessUnsupported:
716 return cudaErrorPeerAccessUnsupported;
718 return cudaErrorInvalidPtx;
719 case hipErrorInvalidGraphicsContext:
720 return cudaErrorInvalidGraphicsContext;
721 case hipErrorInvalidSource:
722 #if CUDA_VERSION >= 10010
723 return cudaErrorInvalidSource;
725 return cudaErrorUnknown;
727 case hipErrorFileNotFound:
728 #if CUDA_VERSION >= 10010
729 return cudaErrorFileNotFound;
731 return cudaErrorUnknown;
733 case hipErrorSharedObjectSymbolNotFound:
734 return cudaErrorSharedObjectSymbolNotFound;
735 case hipErrorSharedObjectInitFailed:
736 return cudaErrorSharedObjectInitFailed;
737 case hipErrorOperatingSystem:
738 return cudaErrorOperatingSystem;
739 case hipErrorNotFound:
740 #if CUDA_VERSION >= 10010
741 return cudaErrorSymbolNotFound;
743 return cudaErrorUnknown;
745 case hipErrorIllegalAddress:
746 return cudaErrorIllegalAddress;
747 case hipErrorLaunchTimeOut:
748 return cudaErrorLaunchTimeout;
749 case hipErrorSetOnActiveProcess:
750 return cudaErrorSetOnActiveProcess;
752 return cudaErrorLaunchFailure;
754 return cudaErrorCooperativeLaunchTooLarge;
756 return cudaErrorNotSupported;
761 case hipErrorUnknown:
764 return cudaErrorUnknown;
768 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
770 case hipMemcpyHostToHost:
771 return cudaMemcpyHostToHost;
772 case hipMemcpyHostToDevice:
773 return cudaMemcpyHostToDevice;
774 case hipMemcpyDeviceToHost:
775 return cudaMemcpyDeviceToHost;
776 case hipMemcpyDeviceToDevice:
777 return cudaMemcpyDeviceToDevice;
779 return cudaMemcpyDefault;
783 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
784 hipTextureAddressMode kind) {
786 case hipAddressModeWrap:
787 return cudaAddressModeWrap;
788 case hipAddressModeClamp:
789 return cudaAddressModeClamp;
790 case hipAddressModeMirror:
791 return cudaAddressModeMirror;
792 case hipAddressModeBorder:
793 return cudaAddressModeBorder;
795 return cudaAddressModeWrap;
799 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
800 hipTextureFilterMode kind) {
802 case hipFilterModePoint:
803 return cudaFilterModePoint;
804 case hipFilterModeLinear:
805 return cudaFilterModeLinear;
807 return cudaFilterModePoint;
811 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
813 case hipReadModeElementType:
814 return cudaReadModeElementType;
815 case hipReadModeNormalizedFloat:
816 return cudaReadModeNormalizedFloat;
818 return cudaReadModeElementType;
822 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
823 hipChannelFormatKind kind) {
825 case hipChannelFormatKindSigned:
826 return cudaChannelFormatKindSigned;
827 case hipChannelFormatKindUnsigned:
828 return cudaChannelFormatKindUnsigned;
829 case hipChannelFormatKindFloat:
830 return cudaChannelFormatKindFloat;
831 case hipChannelFormatKindNone:
832 return cudaChannelFormatKindNone;
834 return cudaChannelFormatKindNone;
841 #define HIPRT_CB CUDART_CB
843 inline static hipError_t
hipInit(
unsigned int flags) {
844 return hipCUResultTohipError(cuInit(flags));
847 inline static hipError_t
hipDeviceReset() {
return hipCUDAErrorTohipError(cudaDeviceReset()); }
849 inline static hipError_t
hipGetLastError() {
return hipCUDAErrorTohipError(cudaGetLastError()); }
852 return hipCUDAErrorTohipError(cudaPeekAtLastError());
855 inline static hipError_t
hipMalloc(
void** ptr,
size_t size) {
856 return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
859 inline static hipError_t
hipMallocPitch(
void** ptr,
size_t* pitch,
size_t width,
size_t height) {
860 return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
863 inline static hipError_t
hipMemAllocPitch(hipDeviceptr_t* dptr,
size_t* pitch,
size_t widthInBytes,
size_t height,
unsigned int elementSizeBytes){
864 return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
868 return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
871 inline static hipError_t
hipFree(
void* ptr) {
return hipCUDAErrorTohipError(cudaFree(ptr)); }
873 inline static hipError_t
hipMallocHost(
void** ptr,
size_t size)
874 __attribute__((deprecated(
"use hipHostMalloc instead")));
876 return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
880 __attribute__((deprecated(
"use hipHostMalloc instead")));
882 return hipCUResultTohipError(cuMemAllocHost(ptr, size));
885 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags)
886 __attribute__((deprecated(
"use hipHostMalloc instead")));
887 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags) {
888 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
891 inline static hipError_t
hipHostMalloc(
void** ptr,
size_t size,
unsigned int flags) {
892 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
895 inline static hipError_t
hipMallocManaged(
void** ptr,
size_t size,
unsigned int flags) {
896 return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
900 size_t width,
size_t height,
902 return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
907 return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
911 return hipCUDAErrorTohipError(cudaFreeArray(array));
915 return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
918 inline static hipError_t
hipHostGetFlags(
unsigned int* flagsPtr,
void* hostPtr) {
919 return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
922 inline static hipError_t
hipHostRegister(
void* ptr,
size_t size,
unsigned int flags) {
923 return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
927 return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
931 __attribute__((deprecated(
"use hipHostFree instead")));
933 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
937 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
941 return hipCUDAErrorTohipError(cudaSetDevice(device));
945 struct cudaDeviceProp cdprop;
946 memset(&cdprop, 0x0,
sizeof(
struct cudaDeviceProp));
947 cdprop.major = prop->
major;
948 cdprop.minor = prop->
minor;
963 return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
966 inline static hipError_t
hipMemcpyHtoD(hipDeviceptr_t dst,
void* src,
size_t size) {
967 return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
970 inline static hipError_t
hipMemcpyDtoH(
void* dst, hipDeviceptr_t src,
size_t size) {
971 return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
974 inline static hipError_t
hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size) {
975 return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
978 inline static hipError_t
hipMemcpyHtoDAsync(hipDeviceptr_t dst,
void* src,
size_t size,
980 return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
983 inline static hipError_t
hipMemcpyDtoHAsync(
void* dst, hipDeviceptr_t src,
size_t size,
985 return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
988 inline static hipError_t
hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size,
990 return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
993 inline static hipError_t
hipMemcpy(
void* dst,
const void* src,
size_t sizeBytes,
994 hipMemcpyKind copyKind) {
995 return hipCUDAErrorTohipError(
996 cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
1000 inline static hipError_t hipMemcpyWithStream(
void* dst,
const void* src,
1001 size_t sizeBytes, hipMemcpyKind copyKind,
1003 cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1004 hipMemcpyKindToCudaMemcpyKind(copyKind),
1007 if (error != cudaSuccess)
return hipCUDAErrorTohipError(error);
1009 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1012 inline static hipError_t
hipMemcpyAsync(
void* dst,
const void* src,
size_t sizeBytes,
1013 hipMemcpyKind copyKind,
hipStream_t stream __dparm(0)) {
1014 return hipCUDAErrorTohipError(
1015 cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1018 inline static hipError_t hipMemcpyToSymbol(
const void* symbol,
const void* src,
size_t sizeBytes,
1019 size_t offset __dparm(0),
1020 hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1021 return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1022 hipMemcpyKindToCudaMemcpyKind(copyType)));
1025 inline static hipError_t hipMemcpyToSymbolAsync(
const void* symbol,
const void* src,
1026 size_t sizeBytes,
size_t offset,
1027 hipMemcpyKind copyType,
1029 return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1030 symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1033 inline static hipError_t hipMemcpyFromSymbol(
void* dst,
const void* symbolName,
size_t sizeBytes,
1034 size_t offset __dparm(0),
1035 hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1036 return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1037 hipMemcpyKindToCudaMemcpyKind(kind)));
1040 inline static hipError_t hipMemcpyFromSymbolAsync(
void* dst,
const void* symbolName,
1041 size_t sizeBytes,
size_t offset,
1044 return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1045 dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1048 inline static hipError_t hipGetSymbolAddress(
void** devPtr,
const void* symbolName) {
1049 return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1052 inline static hipError_t hipGetSymbolSize(
size_t* size,
const void* symbolName) {
1053 return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1056 inline static hipError_t
hipMemcpy2D(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
1057 size_t width,
size_t height, hipMemcpyKind kind) {
1058 return hipCUDAErrorTohipError(
1059 cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1063 return hipCUResultTohipError(cuMemcpy2D(pCopy));
1067 return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1072 return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1077 return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1080 inline static hipError_t
hipMemcpy2DAsync(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
1081 size_t width,
size_t height, hipMemcpyKind kind,
1083 return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1084 hipMemcpyKindToCudaMemcpyKind(kind), stream));
1088 const void* src,
size_t spitch,
size_t width,
1089 size_t height, hipMemcpyKind kind) {
1090 return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1091 height, hipMemcpyKindToCudaMemcpyKind(kind)));
1095 size_t hOffset,
const void* src,
1096 size_t count, hipMemcpyKind kind) {
1097 return hipCUDAErrorTohipError(
1098 cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1102 size_t wOffset,
size_t hOffset,
1103 size_t count, hipMemcpyKind kind) {
1104 return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1105 hipMemcpyKindToCudaMemcpyKind(kind)));
1110 return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1115 return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1119 return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1123 return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1127 return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1131 return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1135 return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1139 return cudaGetErrorString(hipErrorToCudaError(error));
1143 return cudaGetErrorName(hipErrorToCudaError(error));
1147 return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1151 return hipCUDAErrorTohipError(cudaGetDevice(device));
1155 return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1159 return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1163 return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1167 return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1171 unsigned int flags) {
1172 return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1175 inline static hipError_t
hipMemset(
void* devPtr,
int value,
size_t count) {
1176 return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1179 inline static hipError_t
hipMemsetD32(hipDeviceptr_t devPtr,
int value,
size_t count) {
1180 return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1183 inline static hipError_t
hipMemsetAsync(
void* devPtr,
int value,
size_t count,
1185 return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1188 inline static hipError_t
hipMemsetD32Async(hipDeviceptr_t devPtr,
int value,
size_t count,
1190 return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1193 inline static hipError_t
hipMemsetD8(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes) {
1194 return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1197 inline static hipError_t
hipMemsetD8Async(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes,
1199 return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1202 inline static hipError_t
hipMemsetD16(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes) {
1203 return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1206 inline static hipError_t
hipMemsetD16Async(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes,
1208 return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1211 inline static hipError_t
hipMemset2D(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height) {
1212 return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1215 inline static hipError_t
hipMemset2DAsync(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height,
hipStream_t stream __dparm(0)) {
1216 return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1220 return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1224 return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1228 struct cudaDeviceProp cdprop;
1230 cerror = cudaGetDeviceProperties(&cdprop, device);
1232 strncpy(p_prop->
name, cdprop.name, 256);
1236 p_prop->
warpSize = cdprop.warpSize;
1238 for (
int i = 0; i < 3; i++) {
1246 p_prop->
major = cdprop.major;
1247 p_prop->
minor = cdprop.minor;
1254 int ccVers = p_prop->
major * 100 + p_prop->
minor * 10;
1275 p_prop->
pciBusID = cdprop.pciBusID;
1296 p_prop->
memPitch = cdprop.memPitch;
1303 return hipCUDAErrorTohipError(cerror);
1307 enum cudaDeviceAttr cdattr;
1312 cdattr = cudaDevAttrMaxThreadsPerBlock;
1315 cdattr = cudaDevAttrMaxBlockDimX;
1318 cdattr = cudaDevAttrMaxBlockDimY;
1321 cdattr = cudaDevAttrMaxBlockDimZ;
1324 cdattr = cudaDevAttrMaxGridDimX;
1327 cdattr = cudaDevAttrMaxGridDimY;
1330 cdattr = cudaDevAttrMaxGridDimZ;
1333 cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1336 cdattr = cudaDevAttrTotalConstantMemory;
1339 cdattr = cudaDevAttrWarpSize;
1342 cdattr = cudaDevAttrMaxRegistersPerBlock;
1345 cdattr = cudaDevAttrClockRate;
1348 cdattr = cudaDevAttrMemoryClockRate;
1351 cdattr = cudaDevAttrGlobalMemoryBusWidth;
1354 cdattr = cudaDevAttrMultiProcessorCount;
1357 cdattr = cudaDevAttrComputeMode;
1360 cdattr = cudaDevAttrL2CacheSize;
1363 cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1366 cdattr = cudaDevAttrComputeCapabilityMajor;
1369 cdattr = cudaDevAttrComputeCapabilityMinor;
1372 cdattr = cudaDevAttrConcurrentKernels;
1375 cdattr = cudaDevAttrPciBusId;
1378 cdattr = cudaDevAttrPciDeviceId;
1381 cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1384 cdattr = cudaDevAttrIsMultiGpuBoard;
1387 cdattr = cudaDevAttrIntegrated;
1390 cdattr = cudaDevAttrMaxTexture1DWidth;
1393 cdattr = cudaDevAttrMaxTexture2DWidth;
1396 cdattr = cudaDevAttrMaxTexture2DHeight;
1399 cdattr = cudaDevAttrMaxTexture3DWidth;
1402 cdattr = cudaDevAttrMaxTexture3DHeight;
1405 cdattr = cudaDevAttrMaxTexture3DDepth;
1408 cdattr = cudaDevAttrMaxPitch;
1411 cdattr = cudaDevAttrTextureAlignment;
1414 cdattr = cudaDevAttrTexturePitchAlignment;
1417 cdattr = cudaDevAttrKernelExecTimeout;
1420 cdattr = cudaDevAttrCanMapHostMemory;
1423 cdattr = cudaDevAttrEccEnabled;
1426 cdattr = cudaDevAttrCooperativeLaunch;
1429 cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1432 return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1435 cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1437 return hipCUDAErrorTohipError(cerror);
1443 size_t dynamicSMemSize) {
1444 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1445 blockSize, dynamicSMemSize));
1451 size_t dynamicSMemSize,
1452 unsigned int flags) {
1453 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1454 blockSize, dynamicSMemSize, flags));
1460 size_t dynamicSMemSize ){
1461 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1462 blockSize, dynamicSMemSize));
1468 size_t dynamicSMemSize,
1469 unsigned int flags ) {
1470 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1471 blockSize, dynamicSMemSize, flags));
1477 int blockSizeLimit){
1478 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1479 dynSharedMemPerBlk, blockSizeLimit));
1485 int blockSizeLimit,
unsigned int flags){
1486 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1487 dynSharedMemPerBlk, blockSizeLimit, flags));
1491 struct cudaPointerAttributes cPA;
1492 hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1494 #if (CUDART_VERSION >= 11000)
1495 auto memType = cPA.type;
1497 unsigned memType = cPA.memoryType;
1500 case cudaMemoryTypeDevice:
1503 case cudaMemoryTypeHost:
1507 return hipErrorUnknown;
1509 attributes->device = cPA.device;
1510 attributes->devicePointer = cPA.devicePointer;
1511 attributes->hostPointer = cPA.hostPointer;
1512 attributes->isManaged = 0;
1513 attributes->allocationFlags = 0;
1518 inline static hipError_t
hipMemGetInfo(
size_t* free,
size_t* total) {
1519 return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1523 return hipCUDAErrorTohipError(cudaEventCreate(event));
1527 return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1531 return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1535 return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1539 return hipCUDAErrorTohipError(cudaEventDestroy(event));
1543 return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1547 return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1551 return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1555 return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1559 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1563 return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1567 return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1571 return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1575 unsigned int flags) {
1576 return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1580 return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1584 void* userData,
unsigned int flags) {
1585 return hipCUDAErrorTohipError(
1586 cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1590 cudaError_t err = cudaDriverGetVersion(driverVersion);
1595 return hipCUDAErrorTohipError(err);
1599 return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1603 return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1607 return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1611 return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1615 return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1619 return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1624 return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1628 return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1632 return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1636 return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1640 return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1644 hipDeviceptr_t dptr) {
1645 return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1648 inline static hipError_t
hipMemcpyPeer(
void* dst,
int dstDevice,
const void* src,
int srcDevice,
1650 return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1653 inline static hipError_t
hipMemcpyPeerAsync(
void* dst,
int dstDevice,
const void* src,
1654 int srcDevice,
size_t count,
1656 return hipCUDAErrorTohipError(
1657 cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1661 inline static hipError_t
hipProfilerStart() {
return hipCUDAErrorTohipError(cudaProfilerStart()); }
1663 inline static hipError_t
hipProfilerStop() {
return hipCUDAErrorTohipError(cudaProfilerStop()); }
1666 return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1670 return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1674 return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1678 return hipCUDAErrorTohipError(cudaEventQuery(event));
1682 return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1686 return hipCUResultTohipError(cuCtxDestroy(ctx));
1690 return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1694 return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1698 return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1702 return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1706 return hipCUResultTohipError(cuCtxGetDevice(device));
1710 return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (
unsigned int*)apiVersion));
1714 return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1718 return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1722 return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1726 return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1730 return hipCUResultTohipError(cuCtxSynchronize());
1734 return hipCUResultTohipError(cuCtxGetFlags(flags));
1737 inline static hipError_t hipCtxDetach(
hipCtx_t ctx) {
1738 return hipCUResultTohipError(cuCtxDetach(ctx));
1741 inline static hipError_t
hipDeviceGet(hipDevice_t* device,
int ordinal) {
1742 return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1746 return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1749 inline static hipError_t
hipDeviceGetName(
char* name,
int len, hipDevice_t device) {
1750 return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1754 int srcDevice,
int dstDevice) {
1755 return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1759 return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1763 return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1767 return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1771 return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1775 return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1779 return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1783 return hipCUResultTohipError(cuModuleLoad(module, fname));
1787 return hipCUResultTohipError(cuModuleUnload(hmod));
1791 const char* kname) {
1792 return hipCUResultTohipError(cuModuleGetFunction(
function, module, kname));
1795 inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef,
hipModule_t hmod,
const char* name){
1796 hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1800 return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1804 return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1809 return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1813 return hipCUResultTohipError(cuModuleLoadData(module, image));
1817 unsigned int numOptions, hipJitOption* options,
1818 void** optionValues) {
1819 return hipCUResultTohipError(
1820 cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1824 dim3 dimBlocks,
void** args,
size_t sharedMemBytes,
1827 return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1831 unsigned int gridDimY,
unsigned int gridDimZ,
1832 unsigned int blockDimX,
unsigned int blockDimY,
1833 unsigned int blockDimZ,
unsigned int sharedMemBytes,
1836 return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1837 blockDimY, blockDimZ, sharedMemBytes, stream,
1838 kernelParams, extra));
1842 return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1845 __HIP_DEPRECATED
inline static hipError_t hipBindTexture(
size_t* offset,
1849 size_t size __dparm(UINT_MAX)) {
1850 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1853 __HIP_DEPRECATED
inline static hipError_t hipBindTexture2D(
1856 return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1860 hipChannelFormatKind f) {
1861 return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1864 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1868 return hipCUDAErrorTohipError(
1869 cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1872 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1873 return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1878 return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1882 return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1885 inline static hipError_t hipGetTextureObjectResourceDesc(
hipResourceDesc* pResDesc,
1886 hipTextureObject_t textureObject) {
1887 return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1890 __HIP_DEPRECATED
inline static hipError_t hipGetTextureAlignmentOffset(
1892 return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1897 return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1900 inline static hipError_t hipLaunchCooperativeKernel(
const void* f,
dim3 gridDim,
dim3 blockDim,
1901 void** kernelParams,
unsigned int sharedMemBytes,
1903 return hipCUDAErrorTohipError(
1904 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1907 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
1908 int numDevices,
unsigned int flags) {
1909 return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1922 size_t dynamicSMemSize) {
1923 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1924 blockSize, dynamicSMemSize));
1929 size_t dynamicSMemSize = 0,
1930 int blockSizeLimit = 0) {
1931 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1932 dynamicSMemSize, blockSizeLimit));
1936 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* minGridSize,
int* blockSize, T func,
1937 size_t dynamicSMemSize = 0,
1938 int blockSizeLimit = 0,
unsigned int flags = 0) {
1939 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1940 dynamicSMemSize, blockSizeLimit, flags));
1945 int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
1946 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1947 blockSize, dynamicSMemSize, flags));
1950 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1951 inline static hipError_t hipBindTexture(
size_t* offset,
const struct texture<T, dim, readMode>& tex,
1952 const void* devPtr,
size_t size = UINT_MAX) {
1953 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1956 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1957 inline static hipError_t hipBindTexture(
size_t* offset,
struct texture<T, dim, readMode>& tex,
1959 size_t size = UINT_MAX) {
1960 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1963 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1964 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>* tex) {
1965 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1968 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1969 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>& tex) {
1970 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1973 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1974 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1977 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1980 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1981 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1984 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1987 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1988 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1990 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1995 return cudaCreateChannelDesc<T>();
1999 inline static hipError_t hipLaunchCooperativeKernel(T f,
dim3 gridDim,
dim3 blockDim,
2000 void** kernelParams,
unsigned int sharedMemBytes,
hipStream_t stream) {
2001 return hipCUDAErrorTohipError(
2002 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2005 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef,
int dim, hipAddress_mode am){
2006 return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2009 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2010 return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2013 inline static hipError_t hipTexRefSetAddress(
size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr,
size_t bytes){
2014 return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2017 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef,
const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr,
size_t Pitch){
2018 return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2021 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt,
int NumPackedComponents){
2022 return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2025 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef,
unsigned int Flags){
2026 return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2029 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef,
hiparray hArray,
unsigned int Flags){
2030 return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2034 return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2037 inline static hipError_t hipArrayDestroy(
hiparray hArray){
2038 return hipCUResultTohipError(cuArrayDestroy(hArray));
2043 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
Definition: hip_runtime_api.h:128
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:343
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:314
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:220
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
int minor
Definition: hip_runtime_api.h:100
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:298
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:219
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:129
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:301
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
Definition: hip_runtime_api.h:120
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:124
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipFuncCache_t
Definition: hip_runtime_api.h:308
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:137
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:271
hipErrorRuntimeOther
Definition: hip_runtime_api.h:284
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:312
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:303
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1672
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
Definition: hip_module.cpp:1508
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:157
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:308
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:225
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:322
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:128
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:346
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:317
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:336
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:330
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:302
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:156
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:228
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:329
hipSharedMemConfig
Definition: hip_runtime_api.h:318
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:265
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1427
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1662
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:972
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
Definition: driver_types.h:91
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:332
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
int major
Definition: hip_runtime_api.h:97
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:304
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:203
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:297
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:117
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:130
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:122
Definition: driver_types.h:383
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:300
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:245
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:299
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
Definition: hip_runtime_api.h:84
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:881
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:875
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:335
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
hipErrorInvalidValue
Definition: hip_runtime_api.h:204
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:127
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:321
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:338
Definition: hip_hcc_internal.h:938
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:282
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:319
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1683
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:325
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:132
Definition: hip_hcc_internal.h:759
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
Definition: hip_runtime_api.h:111
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
Definition: hip_error.cpp:32
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
Definition: hip_hcc_internal.h:580
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:345
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:307
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
Definition: driver_types.h:78
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:226
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:337
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:313
hipErrorNotReady
Definition: hip_runtime_api.h:256
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
Definition: hip_memory.cpp:2296
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:316
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:326
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
int clockInstructionRate
Definition: hip_runtime_api.h:107
Definition: hip_runtime_api.h:330
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
Definition: hip_stream.cpp:161
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
Definition: hip_runtime_api.h:168
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:306
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1513
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1714
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:932
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:344
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
Definition: driver_types.h:370
Definition: driver_types.h:363
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
Definition: hip_module.cpp:1113
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
Definition: hip_memory.cpp:2286
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:347
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
Definition: hip_module.cpp:108
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:333
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:331
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:315
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:275
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:268
Definition: texture_types.h:74
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:133
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:135
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
Definition: texture_types.h:95
Definition: driver_types.h:323
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:131
Definition: driver_types.h:62
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:123
Definition: hip_runtime_api.h:339
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:269
hipFuncAttribute
Definition: hip_runtime_api.h:299
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1693
hipDeviceAttribute_t
Definition: hip_runtime_api.h:296
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
Definition: hip_module.cpp:1419
Definition: driver_types.h:262
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:273
Definition: hip_hcc_internal.h:415
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:323
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:139
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:279
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:261
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
Definition: hip_module.cpp:1411
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
Definition: hip_module.cpp:1527
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:334
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:887
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:348
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:327
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:263