HIP: Heterogenous-computing Interface for Portability
|
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
26 #include <cuda_runtime_api.h>
28 #include <cuda_profiler_api.h>
35 #define __dparm(x) = x
41 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
42 #define __HIP_DEPRECATED
43 #elif defined(_MSC_VER)
44 #define __HIP_DEPRECATED __declspec(deprecated)
45 #elif defined(__GNUC__)
46 #define __HIP_DEPRECATED __attribute__((deprecated))
48 #define __HIP_DEPRECATED
57 typedef enum hipMemcpyKind {
59 hipMemcpyHostToDevice,
60 hipMemcpyDeviceToHost,
61 hipMemcpyDeviceToDevice,
66 #define hipDataType cudaDataType
67 #define HIP_R_16F CUDA_R_16F
68 #define HIP_R_32F CUDA_R_32F
69 #define HIP_R_64F CUDA_R_64F
70 #define HIP_C_16F CUDA_C_16F
71 #define HIP_C_32F CUDA_C_32F
72 #define HIP_C_64F CUDA_C_64F
75 #define hipLibraryPropertyType libraryPropertyType
76 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
77 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
78 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
81 typedef enum cudaTextureAddressMode hipTextureAddressMode;
82 #define hipAddressModeWrap cudaAddressModeWrap
83 #define hipAddressModeClamp cudaAddressModeClamp
84 #define hipAddressModeMirror cudaAddressModeMirror
85 #define hipAddressModeBorder cudaAddressModeBorder
88 typedef enum cudaTextureFilterMode hipTextureFilterMode;
89 #define hipFilterModePoint cudaFilterModePoint
90 #define hipFilterModeLinear cudaFilterModeLinear
93 typedef enum cudaTextureReadMode hipTextureReadMode;
94 #define hipReadModeElementType cudaReadModeElementType
95 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
98 typedef enum cudaChannelFormatKind hipChannelFormatKind;
99 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
100 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
101 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
102 #define hipChannelFormatKindNone cudaChannelFormatKindNone
104 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
105 #define hipBoundaryModeZero cudaBoundaryModeZero
106 #define hipBoundaryModeTrap cudaBoundaryModeTrap
107 #define hipBoundaryModeClamp cudaBoundaryModeClamp
110 #define hipFuncCachePreferNone cudaFuncCachePreferNone
111 #define hipFuncCachePreferShared cudaFuncCachePreferShared
112 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
113 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
116 #define hipResourceType cudaResourceType
117 #define hipResourceTypeArray cudaResourceTypeArray
118 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
119 #define hipResourceTypeLinear cudaResourceTypeLinear
120 #define hipResourceTypePitch2D cudaResourceTypePitch2D
126 #define hipEventDefault cudaEventDefault
127 #define hipEventBlockingSync cudaEventBlockingSync
128 #define hipEventDisableTiming cudaEventDisableTiming
129 #define hipEventInterprocess cudaEventInterprocess
130 #define hipEventReleaseToDevice 0
131 #define hipEventReleaseToSystem 0
134 #define hipHostMallocDefault cudaHostAllocDefault
135 #define hipHostMallocPortable cudaHostAllocPortable
136 #define hipHostMallocMapped cudaHostAllocMapped
137 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
138 #define hipHostMallocCoherent 0x0
139 #define hipHostMallocNonCoherent 0x0
141 #define hipMemAttachGlobal cudaMemAttachGlobal
142 #define hipMemAttachHost cudaMemAttachHost
144 #define hipHostRegisterDefault cudaHostRegisterDefault
145 #define hipHostRegisterPortable cudaHostRegisterPortable
146 #define hipHostRegisterMapped cudaHostRegisterMapped
147 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
149 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
150 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
151 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
152 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
153 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
155 #define hipOccupancyDefault cudaOccupancyDefault
157 #define hipCooperativeLaunchMultiDeviceNoPreSync \
158 cudaCooperativeLaunchMultiDeviceNoPreSync
159 #define hipCooperativeLaunchMultiDeviceNoPostSync \
160 cudaCooperativeLaunchMultiDeviceNoPostSync
164 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
165 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
166 #define hipJitOptionWallTime CU_JIT_WALL_TIME
167 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
168 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
169 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
170 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
171 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
172 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
173 #define hipJitOptionTarget CU_JIT_TARGET
174 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
175 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
176 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
177 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
178 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
179 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
180 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
181 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
187 typedef enum cudaLimit hipLimit_t;
192 typedef CUfunc_cache hipFuncCache;
193 typedef CUjit_option hipJitOption;
194 typedef CUdevice hipDevice_t;
195 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
198 typedef CUdeviceptr hipDeviceptr_t;
204 #define hipFunction_attribute CUfunction_attribute
205 #define hip_Memcpy2D CUDA_MEMCPY2D
206 #define hipMemcpy3DParms cudaMemcpy3DParms
207 #define hipArrayDefault cudaArrayDefault
208 #define hipArrayLayered cudaArrayLayered
209 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
210 #define hipArrayCubemap cudaArrayCubemap
211 #define hipArrayTextureGather cudaArrayTextureGather
213 typedef cudaTextureObject_t hipTextureObject_t;
215 #define hipTextureType1D cudaTextureType1D
216 #define hipTextureType1DLayered cudaTextureType1DLayered
217 #define hipTextureType2D cudaTextureType2D
218 #define hipTextureType2DLayered cudaTextureType2DLayered
219 #define hipTextureType3D cudaTextureType3D
220 #define hipDeviceMapHost cudaDeviceMapHost
224 #define make_hipExtent make_cudaExtent
225 #define make_hipPos make_cudaPos
226 #define make_hipPitchedPtr make_cudaPitchedPtr
228 #define hipStreamDefault cudaStreamDefault
229 #define hipStreamNonBlocking cudaStreamNonBlocking
236 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
237 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
238 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
241 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
242 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
243 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
244 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
245 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
246 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
247 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
248 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
249 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
250 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
251 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
253 #if CUDA_VERSION >= 9000
254 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
255 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
256 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
257 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
258 #endif // CUDA_VERSION >= 9000
260 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
264 case cudaErrorProfilerDisabled:
265 return hipErrorProfilerDisabled;
266 case cudaErrorProfilerNotInitialized:
267 return hipErrorProfilerNotInitialized;
268 case cudaErrorProfilerAlreadyStarted:
269 return hipErrorProfilerAlreadyStarted;
270 case cudaErrorProfilerAlreadyStopped:
271 return hipErrorProfilerAlreadyStopped;
272 case cudaErrorInsufficientDriver:
273 return hipErrorInsufficientDriver;
274 case cudaErrorUnsupportedLimit:
275 return hipErrorUnsupportedLimit;
276 case cudaErrorPeerAccessUnsupported:
277 return hipErrorPeerAccessUnsupported;
278 case cudaErrorInvalidGraphicsContext:
279 return hipErrorInvalidGraphicsContext;
280 case cudaErrorSharedObjectSymbolNotFound:
281 return hipErrorSharedObjectSymbolNotFound;
282 case cudaErrorSharedObjectInitFailed:
283 return hipErrorSharedObjectInitFailed;
284 case cudaErrorOperatingSystem:
285 return hipErrorOperatingSystem;
286 case cudaErrorSetOnActiveProcess:
287 return hipErrorSetOnActiveProcess;
288 case cudaErrorIllegalAddress:
289 return hipErrorIllegalAddress;
290 case cudaErrorInvalidSymbol:
291 return hipErrorInvalidSymbol;
292 case cudaErrorMissingConfiguration:
293 return hipErrorMissingConfiguration;
294 case cudaErrorMemoryAllocation:
295 return hipErrorOutOfMemory;
296 case cudaErrorInitializationError:
297 return hipErrorNotInitialized;
298 case cudaErrorLaunchFailure:
300 case cudaErrorCooperativeLaunchTooLarge:
302 case cudaErrorPriorLaunchFailure:
303 return hipErrorPriorLaunchFailure;
304 case cudaErrorLaunchOutOfResources:
306 case cudaErrorInvalidDeviceFunction:
307 return hipErrorInvalidDeviceFunction;
308 case cudaErrorInvalidConfiguration:
309 return hipErrorInvalidConfiguration;
310 case cudaErrorInvalidDevice:
312 case cudaErrorInvalidValue:
314 case cudaErrorInvalidDevicePointer:
316 case cudaErrorInvalidMemcpyDirection:
318 case cudaErrorInvalidResourceHandle:
319 return hipErrorInvalidHandle;
320 case cudaErrorNotReady:
322 case cudaErrorNoDevice:
324 case cudaErrorPeerAccessAlreadyEnabled:
326 case cudaErrorPeerAccessNotEnabled:
328 case cudaErrorHostMemoryAlreadyRegistered:
330 case cudaErrorHostMemoryNotRegistered:
332 case cudaErrorMapBufferObjectFailed:
333 return hipErrorMapFailed;
334 case cudaErrorAssert:
336 case cudaErrorNotSupported:
338 case cudaErrorCudartUnloading:
339 return hipErrorDeinitialized;
340 case cudaErrorInvalidKernelImage:
341 return hipErrorInvalidImage;
342 case cudaErrorUnmapBufferObjectFailed:
343 return hipErrorUnmapFailed;
344 case cudaErrorNoKernelImageForDevice:
345 return hipErrorNoBinaryForGpu;
346 case cudaErrorECCUncorrectable:
347 return hipErrorECCNotCorrectable;
348 case cudaErrorDeviceAlreadyInUse:
349 return hipErrorContextAlreadyInUse;
350 case cudaErrorInvalidPtx:
352 case cudaErrorLaunchTimeout:
353 return hipErrorLaunchTimeOut;
354 #if CUDA_VERSION >= 10010
355 case cudaErrorInvalidSource:
356 return hipErrorInvalidSource;
357 case cudaErrorFileNotFound:
358 return hipErrorFileNotFound;
359 case cudaErrorSymbolNotFound:
360 return hipErrorNotFound;
361 case cudaErrorArrayIsMapped:
362 return hipErrorArrayIsMapped;
363 case cudaErrorNotMappedAsPointer:
364 return hipErrorNotMappedAsPointer;
365 case cudaErrorNotMappedAsArray:
366 return hipErrorNotMappedAsArray;
367 case cudaErrorNotMapped:
368 return hipErrorNotMapped;
369 case cudaErrorAlreadyAcquired:
370 return hipErrorAlreadyAcquired;
371 case cudaErrorAlreadyMapped:
372 return hipErrorAlreadyMapped;
374 #if CUDA_VERSION >= 10020
375 case cudaErrorDeviceUninitialized:
378 case cudaErrorUnknown:
380 return hipErrorUnknown;
384 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
388 case CUDA_ERROR_OUT_OF_MEMORY:
389 return hipErrorOutOfMemory;
390 case CUDA_ERROR_INVALID_VALUE:
392 case CUDA_ERROR_INVALID_DEVICE:
394 case CUDA_ERROR_DEINITIALIZED:
395 return hipErrorDeinitialized;
396 case CUDA_ERROR_NO_DEVICE:
398 case CUDA_ERROR_INVALID_CONTEXT:
400 case CUDA_ERROR_NOT_INITIALIZED:
401 return hipErrorNotInitialized;
402 case CUDA_ERROR_INVALID_HANDLE:
403 return hipErrorInvalidHandle;
404 case CUDA_ERROR_MAP_FAILED:
405 return hipErrorMapFailed;
406 case CUDA_ERROR_PROFILER_DISABLED:
407 return hipErrorProfilerDisabled;
408 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
409 return hipErrorProfilerNotInitialized;
410 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
411 return hipErrorProfilerAlreadyStarted;
412 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
413 return hipErrorProfilerAlreadyStopped;
414 case CUDA_ERROR_INVALID_IMAGE:
415 return hipErrorInvalidImage;
416 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
417 return hipErrorContextAlreadyCurrent;
418 case CUDA_ERROR_UNMAP_FAILED:
419 return hipErrorUnmapFailed;
420 case CUDA_ERROR_ARRAY_IS_MAPPED:
421 return hipErrorArrayIsMapped;
422 case CUDA_ERROR_ALREADY_MAPPED:
423 return hipErrorAlreadyMapped;
424 case CUDA_ERROR_NO_BINARY_FOR_GPU:
425 return hipErrorNoBinaryForGpu;
426 case CUDA_ERROR_ALREADY_ACQUIRED:
427 return hipErrorAlreadyAcquired;
428 case CUDA_ERROR_NOT_MAPPED:
429 return hipErrorNotMapped;
430 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
431 return hipErrorNotMappedAsArray;
432 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
433 return hipErrorNotMappedAsPointer;
434 case CUDA_ERROR_ECC_UNCORRECTABLE:
435 return hipErrorECCNotCorrectable;
436 case CUDA_ERROR_UNSUPPORTED_LIMIT:
437 return hipErrorUnsupportedLimit;
438 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
439 return hipErrorContextAlreadyInUse;
440 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
441 return hipErrorPeerAccessUnsupported;
442 case CUDA_ERROR_INVALID_PTX:
444 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
445 return hipErrorInvalidGraphicsContext;
446 case CUDA_ERROR_INVALID_SOURCE:
447 return hipErrorInvalidSource;
448 case CUDA_ERROR_FILE_NOT_FOUND:
449 return hipErrorFileNotFound;
450 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
451 return hipErrorSharedObjectSymbolNotFound;
452 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
453 return hipErrorSharedObjectInitFailed;
454 case CUDA_ERROR_OPERATING_SYSTEM:
455 return hipErrorOperatingSystem;
456 case CUDA_ERROR_NOT_FOUND:
457 return hipErrorNotFound;
458 case CUDA_ERROR_NOT_READY:
460 case CUDA_ERROR_ILLEGAL_ADDRESS:
461 return hipErrorIllegalAddress;
462 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
464 case CUDA_ERROR_LAUNCH_TIMEOUT:
465 return hipErrorLaunchTimeOut;
466 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
468 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
470 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
471 return hipErrorSetOnActiveProcess;
472 case CUDA_ERROR_ASSERT:
474 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
476 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
478 case CUDA_ERROR_LAUNCH_FAILED:
480 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
482 case CUDA_ERROR_NOT_SUPPORTED:
484 case CUDA_ERROR_UNKNOWN:
486 return hipErrorUnknown;
490 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
494 case hipErrorOutOfMemory:
495 return cudaErrorMemoryAllocation;
496 case hipErrorProfilerDisabled:
497 return cudaErrorProfilerDisabled;
498 case hipErrorProfilerNotInitialized:
499 return cudaErrorProfilerNotInitialized;
500 case hipErrorProfilerAlreadyStarted:
501 return cudaErrorProfilerAlreadyStarted;
502 case hipErrorProfilerAlreadyStopped:
503 return cudaErrorProfilerAlreadyStopped;
504 case hipErrorInvalidConfiguration:
505 return cudaErrorInvalidConfiguration;
507 return cudaErrorLaunchOutOfResources;
509 return cudaErrorInvalidValue;
510 case hipErrorInvalidHandle:
511 return cudaErrorInvalidResourceHandle;
513 return cudaErrorInvalidDevice;
515 return cudaErrorInvalidMemcpyDirection;
517 return cudaErrorInvalidDevicePointer;
518 case hipErrorNotInitialized:
519 return cudaErrorInitializationError;
521 return cudaErrorNoDevice;
523 return cudaErrorNotReady;
525 return cudaErrorPeerAccessNotEnabled;
527 return cudaErrorPeerAccessAlreadyEnabled;
529 return cudaErrorHostMemoryAlreadyRegistered;
531 return cudaErrorHostMemoryNotRegistered;
532 case hipErrorDeinitialized:
533 return cudaErrorCudartUnloading;
534 case hipErrorInvalidSymbol:
535 return cudaErrorInvalidSymbol;
536 case hipErrorInsufficientDriver:
537 return cudaErrorInsufficientDriver;
538 case hipErrorMissingConfiguration:
539 return cudaErrorMissingConfiguration;
540 case hipErrorPriorLaunchFailure:
541 return cudaErrorPriorLaunchFailure;
542 case hipErrorInvalidDeviceFunction:
543 return cudaErrorInvalidDeviceFunction;
544 case hipErrorInvalidImage:
545 return cudaErrorInvalidKernelImage;
547 #if CUDA_VERSION >= 10020
548 return cudaErrorDeviceUninitialized;
550 return cudaErrorUnknown;
552 case hipErrorMapFailed:
553 return cudaErrorMapBufferObjectFailed;
554 case hipErrorUnmapFailed:
555 return cudaErrorUnmapBufferObjectFailed;
556 case hipErrorArrayIsMapped:
557 #if CUDA_VERSION >= 10010
558 return cudaErrorArrayIsMapped;
560 return cudaErrorUnknown;
562 case hipErrorAlreadyMapped:
563 #if CUDA_VERSION >= 10010
564 return cudaErrorAlreadyMapped;
566 return cudaErrorUnknown;
568 case hipErrorNoBinaryForGpu:
569 return cudaErrorNoKernelImageForDevice;
570 case hipErrorAlreadyAcquired:
571 #if CUDA_VERSION >= 10010
572 return cudaErrorAlreadyAcquired;
574 return cudaErrorUnknown;
576 case hipErrorNotMapped:
577 #if CUDA_VERSION >= 10010
578 return cudaErrorNotMapped;
580 return cudaErrorUnknown;
582 case hipErrorNotMappedAsArray:
583 #if CUDA_VERSION >= 10010
584 return cudaErrorNotMappedAsArray;
586 return cudaErrorUnknown;
588 case hipErrorNotMappedAsPointer:
589 #if CUDA_VERSION >= 10010
590 return cudaErrorNotMappedAsPointer;
592 return cudaErrorUnknown;
594 case hipErrorECCNotCorrectable:
595 return cudaErrorECCUncorrectable;
596 case hipErrorUnsupportedLimit:
597 return cudaErrorUnsupportedLimit;
598 case hipErrorContextAlreadyInUse:
599 return cudaErrorDeviceAlreadyInUse;
600 case hipErrorPeerAccessUnsupported:
601 return cudaErrorPeerAccessUnsupported;
603 return cudaErrorInvalidPtx;
604 case hipErrorInvalidGraphicsContext:
605 return cudaErrorInvalidGraphicsContext;
606 case hipErrorInvalidSource:
607 #if CUDA_VERSION >= 10010
608 return cudaErrorInvalidSource;
610 return cudaErrorUnknown;
612 case hipErrorFileNotFound:
613 #if CUDA_VERSION >= 10010
614 return cudaErrorFileNotFound;
616 return cudaErrorUnknown;
618 case hipErrorSharedObjectSymbolNotFound:
619 return cudaErrorSharedObjectSymbolNotFound;
620 case hipErrorSharedObjectInitFailed:
621 return cudaErrorSharedObjectInitFailed;
622 case hipErrorOperatingSystem:
623 return cudaErrorOperatingSystem;
624 case hipErrorNotFound:
625 #if CUDA_VERSION >= 10010
626 return cudaErrorSymbolNotFound;
628 return cudaErrorUnknown;
630 case hipErrorIllegalAddress:
631 return cudaErrorIllegalAddress;
632 case hipErrorLaunchTimeOut:
633 return cudaErrorLaunchTimeout;
634 case hipErrorSetOnActiveProcess:
635 return cudaErrorSetOnActiveProcess;
637 return cudaErrorLaunchFailure;
639 return cudaErrorCooperativeLaunchTooLarge;
641 return cudaErrorNotSupported;
646 case hipErrorUnknown:
649 return cudaErrorUnknown;
653 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
655 case hipMemcpyHostToHost:
656 return cudaMemcpyHostToHost;
657 case hipMemcpyHostToDevice:
658 return cudaMemcpyHostToDevice;
659 case hipMemcpyDeviceToHost:
660 return cudaMemcpyDeviceToHost;
661 case hipMemcpyDeviceToDevice:
662 return cudaMemcpyDeviceToDevice;
664 return cudaMemcpyDefault;
668 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
669 hipTextureAddressMode kind) {
671 case hipAddressModeWrap:
672 return cudaAddressModeWrap;
673 case hipAddressModeClamp:
674 return cudaAddressModeClamp;
675 case hipAddressModeMirror:
676 return cudaAddressModeMirror;
677 case hipAddressModeBorder:
678 return cudaAddressModeBorder;
680 return cudaAddressModeWrap;
684 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
685 hipTextureFilterMode kind) {
687 case hipFilterModePoint:
688 return cudaFilterModePoint;
689 case hipFilterModeLinear:
690 return cudaFilterModeLinear;
692 return cudaFilterModePoint;
696 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
698 case hipReadModeElementType:
699 return cudaReadModeElementType;
700 case hipReadModeNormalizedFloat:
701 return cudaReadModeNormalizedFloat;
703 return cudaReadModeElementType;
707 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
708 hipChannelFormatKind kind) {
710 case hipChannelFormatKindSigned:
711 return cudaChannelFormatKindSigned;
712 case hipChannelFormatKindUnsigned:
713 return cudaChannelFormatKindUnsigned;
714 case hipChannelFormatKindFloat:
715 return cudaChannelFormatKindFloat;
716 case hipChannelFormatKindNone:
717 return cudaChannelFormatKindNone;
719 return cudaChannelFormatKindNone;
726 #define HIPRT_CB CUDART_CB
728 inline static hipError_t
hipInit(
unsigned int flags) {
729 return hipCUResultTohipError(cuInit(flags));
732 inline static hipError_t
hipDeviceReset() {
return hipCUDAErrorTohipError(cudaDeviceReset()); }
734 inline static hipError_t
hipGetLastError() {
return hipCUDAErrorTohipError(cudaGetLastError()); }
737 return hipCUDAErrorTohipError(cudaPeekAtLastError());
740 inline static hipError_t
hipMalloc(
void** ptr,
size_t size) {
741 return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
744 inline static hipError_t
hipMallocPitch(
void** ptr,
size_t* pitch,
size_t width,
size_t height) {
745 return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
748 inline static hipError_t
hipMemAllocPitch(hipDeviceptr_t* dptr,
size_t* pitch,
size_t widthInBytes,
size_t height,
unsigned int elementSizeBytes){
749 return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
753 return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
756 inline static hipError_t
hipFree(
void* ptr) {
return hipCUDAErrorTohipError(cudaFree(ptr)); }
758 inline static hipError_t
hipMallocHost(
void** ptr,
size_t size)
759 __attribute__((deprecated(
"use hipHostMalloc instead")));
761 return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
765 __attribute__((deprecated(
"use hipHostMalloc instead")));
767 return hipCUResultTohipError(cuMemAllocHost(ptr, size));
770 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags)
771 __attribute__((deprecated(
"use hipHostMalloc instead")));
772 inline static hipError_t
hipHostAlloc(
void** ptr,
size_t size,
unsigned int flags) {
773 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
776 inline static hipError_t
hipHostMalloc(
void** ptr,
size_t size,
unsigned int flags) {
777 return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
780 inline static hipError_t
hipMallocManaged(
void** ptr,
size_t size,
unsigned int flags) {
781 return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
785 size_t width,
size_t height,
787 return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
792 return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
796 return hipCUDAErrorTohipError(cudaFreeArray(array));
800 return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
803 inline static hipError_t
hipHostGetFlags(
unsigned int* flagsPtr,
void* hostPtr) {
804 return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
807 inline static hipError_t
hipHostRegister(
void* ptr,
size_t size,
unsigned int flags) {
808 return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
812 return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
816 __attribute__((deprecated(
"use hipHostFree instead")));
818 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
822 return hipCUDAErrorTohipError(cudaFreeHost(ptr));
826 return hipCUDAErrorTohipError(cudaSetDevice(device));
830 struct cudaDeviceProp cdprop;
831 memset(&cdprop, 0x0,
sizeof(
struct cudaDeviceProp));
832 cdprop.major = prop->
major;
833 cdprop.minor = prop->
minor;
848 return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
851 inline static hipError_t
hipMemcpyHtoD(hipDeviceptr_t dst,
void* src,
size_t size) {
852 return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
855 inline static hipError_t
hipMemcpyDtoH(
void* dst, hipDeviceptr_t src,
size_t size) {
856 return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
859 inline static hipError_t
hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size) {
860 return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
863 inline static hipError_t
hipMemcpyHtoDAsync(hipDeviceptr_t dst,
void* src,
size_t size,
865 return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
868 inline static hipError_t
hipMemcpyDtoHAsync(
void* dst, hipDeviceptr_t src,
size_t size,
870 return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
873 inline static hipError_t
hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src,
size_t size,
875 return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
878 inline static hipError_t
hipMemcpy(
void* dst,
const void* src,
size_t sizeBytes,
879 hipMemcpyKind copyKind) {
880 return hipCUDAErrorTohipError(
881 cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
885 inline static hipError_t hipMemcpyWithStream(
void* dst,
const void* src,
886 size_t sizeBytes, hipMemcpyKind copyKind,
888 cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
889 hipMemcpyKindToCudaMemcpyKind(copyKind),
892 if (error != cudaSuccess)
return hipCUDAErrorTohipError(error);
894 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
897 inline static hipError_t
hipMemcpyAsync(
void* dst,
const void* src,
size_t sizeBytes,
898 hipMemcpyKind copyKind,
hipStream_t stream __dparm(0)) {
899 return hipCUDAErrorTohipError(
900 cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
903 inline static hipError_t hipMemcpyToSymbol(
const void* symbol,
const void* src,
size_t sizeBytes,
904 size_t offset __dparm(0),
905 hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
906 return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
907 hipMemcpyKindToCudaMemcpyKind(copyType)));
910 inline static hipError_t hipMemcpyToSymbolAsync(
const void* symbol,
const void* src,
911 size_t sizeBytes,
size_t offset,
912 hipMemcpyKind copyType,
914 return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
915 symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
918 inline static hipError_t hipMemcpyFromSymbol(
void* dst,
const void* symbolName,
size_t sizeBytes,
919 size_t offset __dparm(0),
920 hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
921 return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
922 hipMemcpyKindToCudaMemcpyKind(kind)));
925 inline static hipError_t hipMemcpyFromSymbolAsync(
void* dst,
const void* symbolName,
926 size_t sizeBytes,
size_t offset,
929 return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
930 dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
933 inline static hipError_t hipGetSymbolAddress(
void** devPtr,
const void* symbolName) {
934 return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
937 inline static hipError_t hipGetSymbolSize(
size_t* size,
const void* symbolName) {
938 return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
941 inline static hipError_t
hipMemcpy2D(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
942 size_t width,
size_t height, hipMemcpyKind kind) {
943 return hipCUDAErrorTohipError(
944 cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
948 return hipCUResultTohipError(cuMemcpy2D(pCopy));
952 return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
957 return hipCUDAErrorTohipError(cudaMemcpy3D(p));
962 return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
965 inline static hipError_t
hipMemcpy2DAsync(
void* dst,
size_t dpitch,
const void* src,
size_t spitch,
966 size_t width,
size_t height, hipMemcpyKind kind,
968 return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
969 hipMemcpyKindToCudaMemcpyKind(kind), stream));
973 const void* src,
size_t spitch,
size_t width,
974 size_t height, hipMemcpyKind kind) {
975 return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
976 height, hipMemcpyKindToCudaMemcpyKind(kind)));
980 size_t hOffset,
const void* src,
981 size_t count, hipMemcpyKind kind) {
982 return hipCUDAErrorTohipError(
983 cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
987 size_t wOffset,
size_t hOffset,
988 size_t count, hipMemcpyKind kind) {
989 return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
990 hipMemcpyKindToCudaMemcpyKind(kind)));
995 return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1000 return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1004 return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1008 return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1012 return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1016 return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1020 return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1024 return cudaGetErrorString(hipErrorToCudaError(error));
1028 return cudaGetErrorName(hipErrorToCudaError(error));
1032 return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1036 return hipCUDAErrorTohipError(cudaGetDevice(device));
1040 return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1044 return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1048 return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1052 return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1056 unsigned int flags) {
1057 return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1060 inline static hipError_t
hipMemset(
void* devPtr,
int value,
size_t count) {
1061 return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1064 inline static hipError_t
hipMemsetD32(hipDeviceptr_t devPtr,
int value,
size_t count) {
1065 return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1068 inline static hipError_t
hipMemsetAsync(
void* devPtr,
int value,
size_t count,
1070 return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1073 inline static hipError_t
hipMemsetD32Async(hipDeviceptr_t devPtr,
int value,
size_t count,
1075 return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1078 inline static hipError_t
hipMemsetD8(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes) {
1079 return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1082 inline static hipError_t
hipMemsetD8Async(hipDeviceptr_t dest,
unsigned char value,
size_t sizeBytes,
1084 return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1087 inline static hipError_t
hipMemsetD16(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes) {
1088 return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1091 inline static hipError_t
hipMemsetD16Async(hipDeviceptr_t dest,
unsigned short value,
size_t sizeBytes,
1093 return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1096 inline static hipError_t
hipMemset2D(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height) {
1097 return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1100 inline static hipError_t
hipMemset2DAsync(
void* dst,
size_t pitch,
int value,
size_t width,
size_t height,
hipStream_t stream __dparm(0)) {
1101 return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1105 return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1109 return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1113 struct cudaDeviceProp cdprop;
1115 cerror = cudaGetDeviceProperties(&cdprop, device);
1117 strncpy(p_prop->
name, cdprop.name, 256);
1121 p_prop->
warpSize = cdprop.warpSize;
1123 for (
int i = 0; i < 3; i++) {
1131 p_prop->
major = cdprop.major;
1132 p_prop->
minor = cdprop.minor;
1139 int ccVers = p_prop->
major * 100 + p_prop->
minor * 10;
1160 p_prop->
pciBusID = cdprop.pciBusID;
1181 p_prop->
memPitch = cdprop.memPitch;
1188 return hipCUDAErrorTohipError(cerror);
1192 enum cudaDeviceAttr cdattr;
1197 cdattr = cudaDevAttrMaxThreadsPerBlock;
1200 cdattr = cudaDevAttrMaxBlockDimX;
1203 cdattr = cudaDevAttrMaxBlockDimY;
1206 cdattr = cudaDevAttrMaxBlockDimZ;
1209 cdattr = cudaDevAttrMaxGridDimX;
1212 cdattr = cudaDevAttrMaxGridDimY;
1215 cdattr = cudaDevAttrMaxGridDimZ;
1218 cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1221 cdattr = cudaDevAttrTotalConstantMemory;
1224 cdattr = cudaDevAttrWarpSize;
1227 cdattr = cudaDevAttrMaxRegistersPerBlock;
1230 cdattr = cudaDevAttrClockRate;
1233 cdattr = cudaDevAttrMemoryClockRate;
1236 cdattr = cudaDevAttrGlobalMemoryBusWidth;
1239 cdattr = cudaDevAttrMultiProcessorCount;
1242 cdattr = cudaDevAttrComputeMode;
1245 cdattr = cudaDevAttrL2CacheSize;
1248 cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1251 cdattr = cudaDevAttrComputeCapabilityMajor;
1254 cdattr = cudaDevAttrComputeCapabilityMinor;
1257 cdattr = cudaDevAttrConcurrentKernels;
1260 cdattr = cudaDevAttrPciBusId;
1263 cdattr = cudaDevAttrPciDeviceId;
1266 cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1269 cdattr = cudaDevAttrIsMultiGpuBoard;
1272 cdattr = cudaDevAttrIntegrated;
1275 cdattr = cudaDevAttrMaxTexture1DWidth;
1278 cdattr = cudaDevAttrMaxTexture2DWidth;
1281 cdattr = cudaDevAttrMaxTexture2DHeight;
1284 cdattr = cudaDevAttrMaxTexture3DWidth;
1287 cdattr = cudaDevAttrMaxTexture3DHeight;
1290 cdattr = cudaDevAttrMaxTexture3DDepth;
1293 cdattr = cudaDevAttrMaxPitch;
1296 cdattr = cudaDevAttrTextureAlignment;
1299 cdattr = cudaDevAttrTexturePitchAlignment;
1302 cdattr = cudaDevAttrKernelExecTimeout;
1305 cdattr = cudaDevAttrCanMapHostMemory;
1308 cdattr = cudaDevAttrEccEnabled;
1311 cdattr = cudaDevAttrCooperativeLaunch;
1314 cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1317 return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1320 cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1322 return hipCUDAErrorTohipError(cerror);
1328 size_t dynamicSMemSize) {
1329 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1330 blockSize, dynamicSMemSize));
1336 size_t dynamicSMemSize,
1337 unsigned int flags) {
1338 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1339 blockSize, dynamicSMemSize, flags));
1345 size_t dynamicSMemSize ){
1346 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1347 blockSize, dynamicSMemSize));
1353 size_t dynamicSMemSize,
1354 unsigned int flags ) {
1355 return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1356 blockSize, dynamicSMemSize, flags));
1362 int blockSizeLimit){
1363 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1364 dynSharedMemPerBlk, blockSizeLimit));
1370 int blockSizeLimit,
unsigned int flags){
1371 return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1372 dynSharedMemPerBlk, blockSizeLimit, flags));
1376 struct cudaPointerAttributes cPA;
1377 hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1379 #if (CUDART_VERSION >= 11000)
1380 auto memType = cPA.type;
1382 unsigned memType = cPA.memoryType;
1385 case cudaMemoryTypeDevice:
1388 case cudaMemoryTypeHost:
1392 return hipErrorUnknown;
1394 attributes->device = cPA.device;
1395 attributes->devicePointer = cPA.devicePointer;
1396 attributes->hostPointer = cPA.hostPointer;
1397 attributes->isManaged = 0;
1398 attributes->allocationFlags = 0;
1403 inline static hipError_t
hipMemGetInfo(
size_t* free,
size_t* total) {
1404 return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1408 return hipCUDAErrorTohipError(cudaEventCreate(event));
1412 return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1416 return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1420 return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1424 return hipCUDAErrorTohipError(cudaEventDestroy(event));
1428 return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1432 return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1436 return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1440 return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1444 return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1448 return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1452 return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1456 return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1460 unsigned int flags) {
1461 return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1465 return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1469 void* userData,
unsigned int flags) {
1470 return hipCUDAErrorTohipError(
1471 cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1475 cudaError_t err = cudaDriverGetVersion(driverVersion);
1480 return hipCUDAErrorTohipError(err);
1484 return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1488 return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1492 return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1496 return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1500 return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1504 return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1509 return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1513 return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1517 return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1521 return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1525 return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1529 hipDeviceptr_t dptr) {
1530 return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1533 inline static hipError_t
hipMemcpyPeer(
void* dst,
int dstDevice,
const void* src,
int srcDevice,
1535 return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1538 inline static hipError_t
hipMemcpyPeerAsync(
void* dst,
int dstDevice,
const void* src,
1539 int srcDevice,
size_t count,
1541 return hipCUDAErrorTohipError(
1542 cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1546 inline static hipError_t
hipProfilerStart() {
return hipCUDAErrorTohipError(cudaProfilerStart()); }
1548 inline static hipError_t
hipProfilerStop() {
return hipCUDAErrorTohipError(cudaProfilerStop()); }
1551 return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1555 return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1559 return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1563 return hipCUDAErrorTohipError(cudaEventQuery(event));
1567 return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1571 return hipCUResultTohipError(cuCtxDestroy(ctx));
1575 return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1579 return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1583 return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1587 return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1591 return hipCUResultTohipError(cuCtxGetDevice(device));
1595 return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (
unsigned int*)apiVersion));
1599 return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1603 return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1607 return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1611 return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1615 return hipCUResultTohipError(cuCtxSynchronize());
1619 return hipCUResultTohipError(cuCtxGetFlags(flags));
1622 inline static hipError_t hipCtxDetach(
hipCtx_t ctx) {
1623 return hipCUResultTohipError(cuCtxDetach(ctx));
1626 inline static hipError_t
hipDeviceGet(hipDevice_t* device,
int ordinal) {
1627 return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1631 return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1634 inline static hipError_t
hipDeviceGetName(
char* name,
int len, hipDevice_t device) {
1635 return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1639 int srcDevice,
int dstDevice) {
1640 return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1644 return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1648 return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1652 return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1656 return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1660 return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1664 return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1668 return hipCUResultTohipError(cuModuleLoad(module, fname));
1672 return hipCUResultTohipError(cuModuleUnload(hmod));
1676 const char* kname) {
1677 return hipCUResultTohipError(cuModuleGetFunction(
function, module, kname));
1681 return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1685 return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1690 return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1694 return hipCUResultTohipError(cuModuleLoadData(module, image));
1698 unsigned int numOptions, hipJitOption* options,
1699 void** optionValues) {
1700 return hipCUResultTohipError(
1701 cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1705 dim3 dimBlocks,
void** args,
size_t sharedMemBytes,
1708 return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1712 unsigned int gridDimY,
unsigned int gridDimZ,
1713 unsigned int blockDimX,
unsigned int blockDimY,
1714 unsigned int blockDimZ,
unsigned int sharedMemBytes,
1717 return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1718 blockDimY, blockDimZ, sharedMemBytes, stream,
1719 kernelParams, extra));
1723 return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1726 __HIP_DEPRECATED
inline static hipError_t hipBindTexture(
size_t* offset,
1730 size_t size __dparm(UINT_MAX)) {
1731 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1734 __HIP_DEPRECATED
inline static hipError_t hipBindTexture2D(
1737 return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1741 hipChannelFormatKind f) {
1742 return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1745 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1749 return hipCUDAErrorTohipError(
1750 cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1753 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1754 return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1759 return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1763 return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1766 inline static hipError_t hipGetTextureObjectResourceDesc(
hipResourceDesc* pResDesc,
1767 hipTextureObject_t textureObject) {
1768 return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1771 __HIP_DEPRECATED
inline static hipError_t hipGetTextureAlignmentOffset(
1773 return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1778 return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1781 inline static hipError_t hipLaunchCooperativeKernel(
const void* f,
dim3 gridDim,
dim3 blockDim,
1782 void** kernelParams,
unsigned int sharedMemBytes,
1784 return hipCUDAErrorTohipError(
1785 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1788 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
1789 int numDevices,
unsigned int flags) {
1790 return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1803 size_t dynamicSMemSize) {
1804 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1805 blockSize, dynamicSMemSize));
1810 size_t dynamicSMemSize = 0,
1811 int blockSizeLimit = 0) {
1812 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1813 dynamicSMemSize, blockSizeLimit));
1817 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* minGridSize,
int* blockSize, T func,
1818 size_t dynamicSMemSize = 0,
1819 int blockSizeLimit = 0,
unsigned int flags = 0) {
1820 return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1821 dynamicSMemSize, blockSizeLimit, flags));
1826 int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
1827 return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1828 blockSize, dynamicSMemSize, flags));
1831 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1832 inline static hipError_t hipBindTexture(
size_t* offset,
const struct texture<T, dim, readMode>& tex,
1833 const void* devPtr,
size_t size = UINT_MAX) {
1834 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1837 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1838 inline static hipError_t hipBindTexture(
size_t* offset,
struct texture<T, dim, readMode>& tex,
1840 size_t size = UINT_MAX) {
1841 return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1844 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1845 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>* tex) {
1846 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1849 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1850 __HIP_DEPRECATED
inline static hipError_t hipUnbindTexture(
struct texture<T, dim, readMode>& tex) {
1851 return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1854 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1855 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1858 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1861 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1862 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1865 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1868 template <
class T,
int dim, enum cudaTextureReadMode readMode>
1869 __HIP_DEPRECATED
inline static hipError_t hipBindTextureToArray(
1871 return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1876 return cudaCreateChannelDesc<T>();
1880 inline static hipError_t hipLaunchCooperativeKernel(T f,
dim3 gridDim,
dim3 blockDim,
1881 void** kernelParams,
unsigned int sharedMemBytes,
hipStream_t stream) {
1882 return hipCUDAErrorTohipError(
1883 cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1888 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
Definition: hip_runtime_api.h:128
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:337
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:308
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:214
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
int minor
Definition: hip_runtime_api.h:100
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:292
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:213
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:129
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:295
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
Definition: hip_runtime_api.h:120
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:124
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipFuncCache_t
Definition: hip_runtime_api.h:308
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:137
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:265
hipErrorRuntimeOther
Definition: hip_runtime_api.h:278
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:306
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:297
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1672
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
Definition: hip_module.cpp:1508
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:151
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:302
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:219
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:316
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:128
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:340
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:311
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:330
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:324
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:296
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:150
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:222
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:323
hipSharedMemConfig
Definition: hip_runtime_api.h:318
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:259
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1427
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1662
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:972
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
Definition: driver_types.h:91
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:326
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
int major
Definition: hip_runtime_api.h:97
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:298
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:197
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:291
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:117
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:130
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:122
Definition: driver_types.h:383
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:294
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:239
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:293
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
Definition: hip_runtime_api.h:84
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:766
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:760
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:329
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
hipErrorInvalidValue
Definition: hip_runtime_api.h:198
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:127
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:315
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:332
Definition: hip_hcc_internal.h:938
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:276
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:313
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1683
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:319
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:132
Definition: hip_hcc_internal.h:759
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
Definition: hip_runtime_api.h:111
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
Definition: hip_error.cpp:32
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
Definition: hip_hcc_internal.h:580
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:339
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:301
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
Definition: driver_types.h:78
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:220
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:331
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:307
hipErrorNotReady
Definition: hip_runtime_api.h:250
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
Definition: hip_memory.cpp:2296
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:310
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:320
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
int clockInstructionRate
Definition: hip_runtime_api.h:107
Definition: hip_runtime_api.h:330
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
Definition: hip_stream.cpp:161
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
Definition: hip_runtime_api.h:162
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:300
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1513
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1714
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:817
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:338
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
Definition: driver_types.h:370
Definition: driver_types.h:363
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
Definition: hip_module.cpp:1113
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
Definition: hip_memory.cpp:2286
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:341
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
Definition: hip_module.cpp:108
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:327
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:325
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:309
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:269
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:262
Definition: texture_types.h:74
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:133
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:135
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
Definition: texture_types.h:95
Definition: driver_types.h:323
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:131
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:123
Definition: hip_runtime_api.h:339
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:263
hipFuncAttribute
Definition: hip_runtime_api.h:299
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1693
hipDeviceAttribute_t
Definition: hip_runtime_api.h:290
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
Definition: hip_module.cpp:1419
Definition: driver_types.h:262
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:267
Definition: hip_hcc_internal.h:415
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:317
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:139
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:273
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:255
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
Definition: hip_module.cpp:1411
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
Definition: hip_module.cpp:1527
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:328
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:772
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:342
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:321
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:257