HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
25 
26 #include <cuda_runtime_api.h>
27 #include <cuda.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 #ifdef __cplusplus
36 #define __dparm(x) = x
37 #else
38 #define __dparm(x)
39 #endif
40 
41 // Add Deprecated Support for CUDA Mapped HIP APIs
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
48 #else
49 #define __HIP_DEPRECATED
50 #endif
51 
52 
53 // TODO -move to include/hip_runtime_api.h as a common implementation.
58 typedef enum hipMemcpyKind {
59  hipMemcpyHostToHost,
60  hipMemcpyHostToDevice,
61  hipMemcpyDeviceToHost,
62  hipMemcpyDeviceToDevice,
63  hipMemcpyDefault
64 } hipMemcpyKind;
65 
66 typedef enum hipMemoryAdvise {
74 
75 // hipDataType
76 #define hipDataType cudaDataType
77 #define HIP_R_16F CUDA_R_16F
78 #define HIP_R_32F CUDA_R_32F
79 #define HIP_R_64F CUDA_R_64F
80 #define HIP_C_16F CUDA_C_16F
81 #define HIP_C_32F CUDA_C_32F
82 #define HIP_C_64F CUDA_C_64F
83 
84 // hipLibraryPropertyType
85 #define hipLibraryPropertyType libraryPropertyType
86 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
87 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
88 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
89 
90 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
91 #define HIP_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR
92 
93 //hipArray_Format
94 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
95 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
96 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
97 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
98 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
99 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
100 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
101 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
102 
103 // hipArray_Format
104 #define hipArray_Format CUarray_format
105 
106 inline static CUarray_format hipArray_FormatToCUarray_format(
107  hipArray_Format format) {
108  switch (format) {
109  case HIP_AD_FORMAT_UNSIGNED_INT8:
110  return CU_AD_FORMAT_UNSIGNED_INT8;
111  case HIP_AD_FORMAT_UNSIGNED_INT16:
112  return CU_AD_FORMAT_UNSIGNED_INT16;
113  case HIP_AD_FORMAT_UNSIGNED_INT32:
114  return CU_AD_FORMAT_UNSIGNED_INT32;
115  case HIP_AD_FORMAT_SIGNED_INT8:
116  return CU_AD_FORMAT_SIGNED_INT8;
117  case HIP_AD_FORMAT_SIGNED_INT16:
118  return CU_AD_FORMAT_SIGNED_INT16;
119  case HIP_AD_FORMAT_SIGNED_INT32:
120  return CU_AD_FORMAT_SIGNED_INT32;
121  case HIP_AD_FORMAT_HALF:
122  return CU_AD_FORMAT_HALF;
123  case HIP_AD_FORMAT_FLOAT:
124  return CU_AD_FORMAT_FLOAT;
125  default:
126  return CU_AD_FORMAT_UNSIGNED_INT8;
127  }
128 }
129 
130 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
131 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
132 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
133 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
134 
135 // hipAddress_mode
136 #define hipAddress_mode CUaddress_mode
137 
138 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
139  hipAddress_mode mode) {
140  switch (mode) {
141  case HIP_TR_ADDRESS_MODE_WRAP:
142  return CU_TR_ADDRESS_MODE_WRAP;
143  case HIP_TR_ADDRESS_MODE_CLAMP:
144  return CU_TR_ADDRESS_MODE_CLAMP;
145  case HIP_TR_ADDRESS_MODE_MIRROR:
146  return CU_TR_ADDRESS_MODE_MIRROR;
147  case HIP_TR_ADDRESS_MODE_BORDER:
148  return CU_TR_ADDRESS_MODE_BORDER;
149  default:
150  return CU_TR_ADDRESS_MODE_WRAP;
151  }
152 }
153 
154 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
155 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
156 
157 // hipFilter_mode
158 #define hipFilter_mode CUfilter_mode
159 
160 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
161  hipFilter_mode mode) {
162  switch (mode) {
163  case HIP_TR_FILTER_MODE_POINT:
164  return CU_TR_FILTER_MODE_POINT;
165  case HIP_TR_FILTER_MODE_LINEAR:
166  return CU_TR_FILTER_MODE_LINEAR;
167  default:
168  return CU_TR_FILTER_MODE_POINT;
169  }
170 }
171 
172 //hipResourcetype
173 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
174 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
175 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
176 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
177 
178 // hipResourcetype
179 #define hipResourcetype CUresourcetype
180 
181 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
182  hipResourcetype resType) {
183  switch (resType) {
184  case HIP_RESOURCE_TYPE_ARRAY:
185  return CU_RESOURCE_TYPE_ARRAY;
186  case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
187  return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
188  case HIP_RESOURCE_TYPE_LINEAR:
189  return CU_RESOURCE_TYPE_LINEAR;
190  case HIP_RESOURCE_TYPE_PITCH2D:
191  return CU_RESOURCE_TYPE_PITCH2D;
192  default:
193  return CU_RESOURCE_TYPE_ARRAY;
194  }
195 }
196 
197 #define hipTexRef CUtexref
198 #define hiparray CUarray
199 
200 // hipTextureAddressMode
201 typedef enum cudaTextureAddressMode hipTextureAddressMode;
202 #define hipAddressModeWrap cudaAddressModeWrap
203 #define hipAddressModeClamp cudaAddressModeClamp
204 #define hipAddressModeMirror cudaAddressModeMirror
205 #define hipAddressModeBorder cudaAddressModeBorder
206 
207 // hipTextureFilterMode
208 typedef enum cudaTextureFilterMode hipTextureFilterMode;
209 #define hipFilterModePoint cudaFilterModePoint
210 #define hipFilterModeLinear cudaFilterModeLinear
211 
212 // hipTextureReadMode
213 typedef enum cudaTextureReadMode hipTextureReadMode;
214 #define hipReadModeElementType cudaReadModeElementType
215 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
216 
217 // hipChannelFormatKind
218 typedef enum cudaChannelFormatKind hipChannelFormatKind;
219 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
220 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
221 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
222 #define hipChannelFormatKindNone cudaChannelFormatKindNone
223 
224 // hipMemRangeAttribute
225 typedef enum cudaMemRangeAttribute hipMemRangeAttribute;
226 #define hipMemRangeAttributeReadMostly cudaMemRangeAttributeReadMostly
227 #define hipMemRangeAttributePreferredLocation cudaMemRangeAttributePreferredLocation
228 #define hipMemRangeAttributeAccessedBy cudaMemRangeAttributeAccessedBy
229 #define hipMemRangeAttributeLastPrefetchLocation cudaMemRangeAttributeLastPrefetchLocation
230 
231 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
232 #define hipBoundaryModeZero cudaBoundaryModeZero
233 #define hipBoundaryModeTrap cudaBoundaryModeTrap
234 #define hipBoundaryModeClamp cudaBoundaryModeClamp
235 
236 // hipFuncCache
237 #define hipFuncCachePreferNone cudaFuncCachePreferNone
238 #define hipFuncCachePreferShared cudaFuncCachePreferShared
239 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
240 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
241 
242 // hipResourceType
243 #define hipResourceType cudaResourceType
244 #define hipResourceTypeArray cudaResourceTypeArray
245 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
246 #define hipResourceTypeLinear cudaResourceTypeLinear
247 #define hipResourceTypePitch2D cudaResourceTypePitch2D
248 //
249 // hipErrorNoDevice.
250 
251 
253 #define hipEventDefault cudaEventDefault
254 #define hipEventBlockingSync cudaEventBlockingSync
255 #define hipEventDisableTiming cudaEventDisableTiming
256 #define hipEventInterprocess cudaEventInterprocess
257 #define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
258 #define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
259 
260 
261 #define hipHostMallocDefault cudaHostAllocDefault
262 #define hipHostMallocPortable cudaHostAllocPortable
263 #define hipHostMallocMapped cudaHostAllocMapped
264 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
265 #define hipHostMallocCoherent 0x0
266 #define hipHostMallocNonCoherent 0x0
267 
268 #define hipMemAttachGlobal cudaMemAttachGlobal
269 #define hipMemAttachHost cudaMemAttachHost
270 #define hipMemAttachSingle cudaMemAttachSingle
271 
272 #define hipHostRegisterDefault cudaHostRegisterDefault
273 #define hipHostRegisterPortable cudaHostRegisterPortable
274 #define hipHostRegisterMapped cudaHostRegisterMapped
275 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
276 
277 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
278 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
279 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
280 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
281 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
282 
283 #define hipOccupancyDefault cudaOccupancyDefault
284 
285 #define hipCooperativeLaunchMultiDeviceNoPreSync \
286  cudaCooperativeLaunchMultiDeviceNoPreSync
287 #define hipCooperativeLaunchMultiDeviceNoPostSync \
288  cudaCooperativeLaunchMultiDeviceNoPostSync
289 
290 
291 // enum CUjit_option redefines
292 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
293 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
294 #define hipJitOptionWallTime CU_JIT_WALL_TIME
295 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
296 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
297 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
298 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
299 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
300 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
301 #define hipJitOptionTarget CU_JIT_TARGET
302 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
303 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
304 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
305 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
306 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
307 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
308 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
309 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
310 
311 typedef cudaEvent_t hipEvent_t;
312 typedef cudaStream_t hipStream_t;
313 typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
314 typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
315 typedef enum cudaLimit hipLimit_t;
316 typedef enum cudaFuncAttribute hipFuncAttribute;
317 typedef enum cudaFuncCache hipFuncCache_t;
318 typedef CUcontext hipCtx_t;
319 typedef enum cudaSharedMemConfig hipSharedMemConfig;
320 typedef CUfunc_cache hipFuncCache;
321 typedef CUjit_option hipJitOption;
322 typedef CUdevice hipDevice_t;
323 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
324 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
325 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
326 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
327 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
328 #define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
329 #define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
330 
331 typedef CUmodule hipModule_t;
332 typedef CUfunction hipFunction_t;
333 typedef CUdeviceptr hipDeviceptr_t;
334 typedef struct cudaArray hipArray;
335 typedef struct cudaArray* hipArray_t;
336 typedef struct cudaArray* hipArray_const_t;
337 typedef struct cudaFuncAttributes hipFuncAttributes;
338 typedef struct cudaLaunchParams hipLaunchParams;
339 #define hipFunction_attribute CUfunction_attribute
340 #define hip_Memcpy2D CUDA_MEMCPY2D
341 #define HIP_MEMCPY3D CUDA_MEMCPY3D
342 #define hipMemcpy3DParms cudaMemcpy3DParms
343 #define hipArrayDefault cudaArrayDefault
344 #define hipArrayLayered cudaArrayLayered
345 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
346 #define hipArrayCubemap cudaArrayCubemap
347 #define hipArrayTextureGather cudaArrayTextureGather
348 
349 typedef cudaTextureObject_t hipTextureObject_t;
350 typedef cudaSurfaceObject_t hipSurfaceObject_t;
351 #define hipTextureType1D cudaTextureType1D
352 #define hipTextureType1DLayered cudaTextureType1DLayered
353 #define hipTextureType2D cudaTextureType2D
354 #define hipTextureType2DLayered cudaTextureType2DLayered
355 #define hipTextureType3D cudaTextureType3D
356 
357 #define hipDeviceScheduleAuto cudaDeviceScheduleAuto
358 #define hipDeviceScheduleSpin cudaDeviceScheduleSpin
359 #define hipDeviceScheduleYield cudaDeviceScheduleYield
360 #define hipDeviceScheduleBlockingSync cudaDeviceScheduleBlockingSync
361 #define hipDeviceScheduleMask cudaDeviceScheduleMask
362 #define hipDeviceMapHost cudaDeviceMapHost
363 #define hipDeviceLmemResizeToMax cudaDeviceLmemResizeToMax
364 
365 #define hipCpuDeviceId cudaCpuDeviceId
366 #define hipInvalidDeviceId cudaInvalidDeviceId
367 typedef struct cudaExtent hipExtent;
368 typedef struct cudaPitchedPtr hipPitchedPtr;
369 #define make_hipExtent make_cudaExtent
370 #define make_hipPos make_cudaPos
371 #define make_hipPitchedPtr make_cudaPitchedPtr
372 // Flags that can be used with hipStreamCreateWithFlags
373 #define hipStreamDefault cudaStreamDefault
374 #define hipStreamNonBlocking cudaStreamNonBlocking
375 
376 typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
377 typedef struct cudaResourceDesc hipResourceDesc;
378 typedef struct cudaTextureDesc hipTextureDesc;
379 typedef struct cudaResourceViewDesc hipResourceViewDesc;
380 // adding code for hipmemSharedConfig
381 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
382 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
383 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
384 
385 //Function Attributes
386 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
387 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
388 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
389 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
390 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
391 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
392 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
393 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
394 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
395 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
396 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
397 
398 #if CUDA_VERSION >= 9000
399 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
400 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
401 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
402 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
403 #endif // CUDA_VERSION >= 9000
404 
405 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
406  switch (cuError) {
407  case cudaSuccess:
408  return hipSuccess;
409  case cudaErrorProfilerDisabled:
410  return hipErrorProfilerDisabled;
411  case cudaErrorProfilerNotInitialized:
412  return hipErrorProfilerNotInitialized;
413  case cudaErrorProfilerAlreadyStarted:
414  return hipErrorProfilerAlreadyStarted;
415  case cudaErrorProfilerAlreadyStopped:
416  return hipErrorProfilerAlreadyStopped;
417  case cudaErrorInsufficientDriver:
418  return hipErrorInsufficientDriver;
419  case cudaErrorUnsupportedLimit:
420  return hipErrorUnsupportedLimit;
421  case cudaErrorPeerAccessUnsupported:
422  return hipErrorPeerAccessUnsupported;
423  case cudaErrorInvalidGraphicsContext:
424  return hipErrorInvalidGraphicsContext;
425  case cudaErrorSharedObjectSymbolNotFound:
426  return hipErrorSharedObjectSymbolNotFound;
427  case cudaErrorSharedObjectInitFailed:
428  return hipErrorSharedObjectInitFailed;
429  case cudaErrorOperatingSystem:
430  return hipErrorOperatingSystem;
431  case cudaErrorSetOnActiveProcess:
432  return hipErrorSetOnActiveProcess;
433  case cudaErrorIllegalAddress:
434  return hipErrorIllegalAddress;
435  case cudaErrorInvalidSymbol:
436  return hipErrorInvalidSymbol;
437  case cudaErrorMissingConfiguration:
438  return hipErrorMissingConfiguration;
439  case cudaErrorMemoryAllocation:
440  return hipErrorOutOfMemory;
441  case cudaErrorInitializationError:
442  return hipErrorNotInitialized;
443  case cudaErrorLaunchFailure:
444  return hipErrorLaunchFailure;
445  case cudaErrorCooperativeLaunchTooLarge:
447  case cudaErrorPriorLaunchFailure:
448  return hipErrorPriorLaunchFailure;
449  case cudaErrorLaunchOutOfResources:
451  case cudaErrorInvalidDeviceFunction:
452  return hipErrorInvalidDeviceFunction;
453  case cudaErrorInvalidConfiguration:
454  return hipErrorInvalidConfiguration;
455  case cudaErrorInvalidDevice:
456  return hipErrorInvalidDevice;
457  case cudaErrorInvalidValue:
458  return hipErrorInvalidValue;
459  case cudaErrorInvalidDevicePointer:
461  case cudaErrorInvalidMemcpyDirection:
463  case cudaErrorInvalidResourceHandle:
464  return hipErrorInvalidHandle;
465  case cudaErrorNotReady:
466  return hipErrorNotReady;
467  case cudaErrorNoDevice:
468  return hipErrorNoDevice;
469  case cudaErrorPeerAccessAlreadyEnabled:
471  case cudaErrorPeerAccessNotEnabled:
473  case cudaErrorHostMemoryAlreadyRegistered:
475  case cudaErrorHostMemoryNotRegistered:
477  case cudaErrorMapBufferObjectFailed:
478  return hipErrorMapFailed;
479  case cudaErrorAssert:
480  return hipErrorAssert;
481  case cudaErrorNotSupported:
482  return hipErrorNotSupported;
483  case cudaErrorCudartUnloading:
484  return hipErrorDeinitialized;
485  case cudaErrorInvalidKernelImage:
486  return hipErrorInvalidImage;
487  case cudaErrorUnmapBufferObjectFailed:
488  return hipErrorUnmapFailed;
489  case cudaErrorNoKernelImageForDevice:
490  return hipErrorNoBinaryForGpu;
491  case cudaErrorECCUncorrectable:
492  return hipErrorECCNotCorrectable;
493  case cudaErrorDeviceAlreadyInUse:
494  return hipErrorContextAlreadyInUse;
495  case cudaErrorInvalidPtx:
497  case cudaErrorLaunchTimeout:
498  return hipErrorLaunchTimeOut;
499 #if CUDA_VERSION >= 10010
500  case cudaErrorInvalidSource:
501  return hipErrorInvalidSource;
502  case cudaErrorFileNotFound:
503  return hipErrorFileNotFound;
504  case cudaErrorSymbolNotFound:
505  return hipErrorNotFound;
506  case cudaErrorArrayIsMapped:
507  return hipErrorArrayIsMapped;
508  case cudaErrorNotMappedAsPointer:
509  return hipErrorNotMappedAsPointer;
510  case cudaErrorNotMappedAsArray:
511  return hipErrorNotMappedAsArray;
512  case cudaErrorNotMapped:
513  return hipErrorNotMapped;
514  case cudaErrorAlreadyAcquired:
515  return hipErrorAlreadyAcquired;
516  case cudaErrorAlreadyMapped:
517  return hipErrorAlreadyMapped;
518 #endif
519 #if CUDA_VERSION >= 10020
520  case cudaErrorDeviceUninitialized:
521  return hipErrorInvalidContext;
522 #endif
523  case cudaErrorUnknown:
524  default:
525  return hipErrorUnknown; // Note - translated error.
526  }
527 }
528 
529 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
530  switch (cuError) {
531  case CUDA_SUCCESS:
532  return hipSuccess;
533  case CUDA_ERROR_OUT_OF_MEMORY:
534  return hipErrorOutOfMemory;
535  case CUDA_ERROR_INVALID_VALUE:
536  return hipErrorInvalidValue;
537  case CUDA_ERROR_INVALID_DEVICE:
538  return hipErrorInvalidDevice;
539  case CUDA_ERROR_DEINITIALIZED:
540  return hipErrorDeinitialized;
541  case CUDA_ERROR_NO_DEVICE:
542  return hipErrorNoDevice;
543  case CUDA_ERROR_INVALID_CONTEXT:
544  return hipErrorInvalidContext;
545  case CUDA_ERROR_NOT_INITIALIZED:
546  return hipErrorNotInitialized;
547  case CUDA_ERROR_INVALID_HANDLE:
548  return hipErrorInvalidHandle;
549  case CUDA_ERROR_MAP_FAILED:
550  return hipErrorMapFailed;
551  case CUDA_ERROR_PROFILER_DISABLED:
552  return hipErrorProfilerDisabled;
553  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
554  return hipErrorProfilerNotInitialized;
555  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
556  return hipErrorProfilerAlreadyStarted;
557  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
558  return hipErrorProfilerAlreadyStopped;
559  case CUDA_ERROR_INVALID_IMAGE:
560  return hipErrorInvalidImage;
561  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
562  return hipErrorContextAlreadyCurrent;
563  case CUDA_ERROR_UNMAP_FAILED:
564  return hipErrorUnmapFailed;
565  case CUDA_ERROR_ARRAY_IS_MAPPED:
566  return hipErrorArrayIsMapped;
567  case CUDA_ERROR_ALREADY_MAPPED:
568  return hipErrorAlreadyMapped;
569  case CUDA_ERROR_NO_BINARY_FOR_GPU:
570  return hipErrorNoBinaryForGpu;
571  case CUDA_ERROR_ALREADY_ACQUIRED:
572  return hipErrorAlreadyAcquired;
573  case CUDA_ERROR_NOT_MAPPED:
574  return hipErrorNotMapped;
575  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
576  return hipErrorNotMappedAsArray;
577  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
578  return hipErrorNotMappedAsPointer;
579  case CUDA_ERROR_ECC_UNCORRECTABLE:
580  return hipErrorECCNotCorrectable;
581  case CUDA_ERROR_UNSUPPORTED_LIMIT:
582  return hipErrorUnsupportedLimit;
583  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
584  return hipErrorContextAlreadyInUse;
585  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
586  return hipErrorPeerAccessUnsupported;
587  case CUDA_ERROR_INVALID_PTX:
589  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
590  return hipErrorInvalidGraphicsContext;
591  case CUDA_ERROR_INVALID_SOURCE:
592  return hipErrorInvalidSource;
593  case CUDA_ERROR_FILE_NOT_FOUND:
594  return hipErrorFileNotFound;
595  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
596  return hipErrorSharedObjectSymbolNotFound;
597  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
598  return hipErrorSharedObjectInitFailed;
599  case CUDA_ERROR_OPERATING_SYSTEM:
600  return hipErrorOperatingSystem;
601  case CUDA_ERROR_NOT_FOUND:
602  return hipErrorNotFound;
603  case CUDA_ERROR_NOT_READY:
604  return hipErrorNotReady;
605  case CUDA_ERROR_ILLEGAL_ADDRESS:
606  return hipErrorIllegalAddress;
607  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
609  case CUDA_ERROR_LAUNCH_TIMEOUT:
610  return hipErrorLaunchTimeOut;
611  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
613  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
615  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
616  return hipErrorSetOnActiveProcess;
617  case CUDA_ERROR_ASSERT:
618  return hipErrorAssert;
619  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
621  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
623  case CUDA_ERROR_LAUNCH_FAILED:
624  return hipErrorLaunchFailure;
625  case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
627  case CUDA_ERROR_NOT_SUPPORTED:
628  return hipErrorNotSupported;
629  case CUDA_ERROR_UNKNOWN:
630  default:
631  return hipErrorUnknown; // Note - translated error.
632  }
633 }
634 
635 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
636  switch (hError) {
637  case hipSuccess:
638  return cudaSuccess;
639  case hipErrorOutOfMemory:
640  return cudaErrorMemoryAllocation;
641  case hipErrorProfilerDisabled:
642  return cudaErrorProfilerDisabled;
643  case hipErrorProfilerNotInitialized:
644  return cudaErrorProfilerNotInitialized;
645  case hipErrorProfilerAlreadyStarted:
646  return cudaErrorProfilerAlreadyStarted;
647  case hipErrorProfilerAlreadyStopped:
648  return cudaErrorProfilerAlreadyStopped;
649  case hipErrorInvalidConfiguration:
650  return cudaErrorInvalidConfiguration;
652  return cudaErrorLaunchOutOfResources;
654  return cudaErrorInvalidValue;
655  case hipErrorInvalidHandle:
656  return cudaErrorInvalidResourceHandle;
658  return cudaErrorInvalidDevice;
660  return cudaErrorInvalidMemcpyDirection;
662  return cudaErrorInvalidDevicePointer;
663  case hipErrorNotInitialized:
664  return cudaErrorInitializationError;
665  case hipErrorNoDevice:
666  return cudaErrorNoDevice;
667  case hipErrorNotReady:
668  return cudaErrorNotReady;
670  return cudaErrorPeerAccessNotEnabled;
672  return cudaErrorPeerAccessAlreadyEnabled;
674  return cudaErrorHostMemoryAlreadyRegistered;
676  return cudaErrorHostMemoryNotRegistered;
677  case hipErrorDeinitialized:
678  return cudaErrorCudartUnloading;
679  case hipErrorInvalidSymbol:
680  return cudaErrorInvalidSymbol;
681  case hipErrorInsufficientDriver:
682  return cudaErrorInsufficientDriver;
683  case hipErrorMissingConfiguration:
684  return cudaErrorMissingConfiguration;
685  case hipErrorPriorLaunchFailure:
686  return cudaErrorPriorLaunchFailure;
687  case hipErrorInvalidDeviceFunction:
688  return cudaErrorInvalidDeviceFunction;
689  case hipErrorInvalidImage:
690  return cudaErrorInvalidKernelImage;
692 #if CUDA_VERSION >= 10020
693  return cudaErrorDeviceUninitialized;
694 #else
695  return cudaErrorUnknown;
696 #endif
697  case hipErrorMapFailed:
698  return cudaErrorMapBufferObjectFailed;
699  case hipErrorUnmapFailed:
700  return cudaErrorUnmapBufferObjectFailed;
701  case hipErrorArrayIsMapped:
702 #if CUDA_VERSION >= 10010
703  return cudaErrorArrayIsMapped;
704 #else
705  return cudaErrorUnknown;
706 #endif
707  case hipErrorAlreadyMapped:
708 #if CUDA_VERSION >= 10010
709  return cudaErrorAlreadyMapped;
710 #else
711  return cudaErrorUnknown;
712 #endif
713  case hipErrorNoBinaryForGpu:
714  return cudaErrorNoKernelImageForDevice;
715  case hipErrorAlreadyAcquired:
716 #if CUDA_VERSION >= 10010
717  return cudaErrorAlreadyAcquired;
718 #else
719  return cudaErrorUnknown;
720 #endif
721  case hipErrorNotMapped:
722 #if CUDA_VERSION >= 10010
723  return cudaErrorNotMapped;
724 #else
725  return cudaErrorUnknown;
726 #endif
727  case hipErrorNotMappedAsArray:
728 #if CUDA_VERSION >= 10010
729  return cudaErrorNotMappedAsArray;
730 #else
731  return cudaErrorUnknown;
732 #endif
733  case hipErrorNotMappedAsPointer:
734 #if CUDA_VERSION >= 10010
735  return cudaErrorNotMappedAsPointer;
736 #else
737  return cudaErrorUnknown;
738 #endif
739  case hipErrorECCNotCorrectable:
740  return cudaErrorECCUncorrectable;
741  case hipErrorUnsupportedLimit:
742  return cudaErrorUnsupportedLimit;
743  case hipErrorContextAlreadyInUse:
744  return cudaErrorDeviceAlreadyInUse;
745  case hipErrorPeerAccessUnsupported:
746  return cudaErrorPeerAccessUnsupported;
748  return cudaErrorInvalidPtx;
749  case hipErrorInvalidGraphicsContext:
750  return cudaErrorInvalidGraphicsContext;
751  case hipErrorInvalidSource:
752 #if CUDA_VERSION >= 10010
753  return cudaErrorInvalidSource;
754 #else
755  return cudaErrorUnknown;
756 #endif
757  case hipErrorFileNotFound:
758 #if CUDA_VERSION >= 10010
759  return cudaErrorFileNotFound;
760 #else
761  return cudaErrorUnknown;
762 #endif
763  case hipErrorSharedObjectSymbolNotFound:
764  return cudaErrorSharedObjectSymbolNotFound;
765  case hipErrorSharedObjectInitFailed:
766  return cudaErrorSharedObjectInitFailed;
767  case hipErrorOperatingSystem:
768  return cudaErrorOperatingSystem;
769  case hipErrorNotFound:
770 #if CUDA_VERSION >= 10010
771  return cudaErrorSymbolNotFound;
772 #else
773  return cudaErrorUnknown;
774 #endif
775  case hipErrorIllegalAddress:
776  return cudaErrorIllegalAddress;
777  case hipErrorLaunchTimeOut:
778  return cudaErrorLaunchTimeout;
779  case hipErrorSetOnActiveProcess:
780  return cudaErrorSetOnActiveProcess;
782  return cudaErrorLaunchFailure;
784  return cudaErrorCooperativeLaunchTooLarge;
786  return cudaErrorNotSupported;
787  // HSA: does not exist in CUDA
789  // HSA: does not exist in CUDA
791  case hipErrorUnknown:
792  case hipErrorTbd:
793  default:
794  return cudaErrorUnknown; // Note - translated error.
795  }
796 }
797 
798 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
799  switch (kind) {
800  case hipMemcpyHostToHost:
801  return cudaMemcpyHostToHost;
802  case hipMemcpyHostToDevice:
803  return cudaMemcpyHostToDevice;
804  case hipMemcpyDeviceToHost:
805  return cudaMemcpyDeviceToHost;
806  case hipMemcpyDeviceToDevice:
807  return cudaMemcpyDeviceToDevice;
808  default:
809  return cudaMemcpyDefault;
810  }
811 }
812 
813 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
814  hipTextureAddressMode kind) {
815  switch (kind) {
816  case hipAddressModeWrap:
817  return cudaAddressModeWrap;
818  case hipAddressModeClamp:
819  return cudaAddressModeClamp;
820  case hipAddressModeMirror:
821  return cudaAddressModeMirror;
822  case hipAddressModeBorder:
823  return cudaAddressModeBorder;
824  default:
825  return cudaAddressModeWrap;
826  }
827 }
828 
829 inline static enum cudaMemRangeAttribute hipMemRangeAttributeToCudaMemRangeAttribute(
830  hipMemRangeAttribute kind) {
831  switch (kind) {
833  return cudaMemRangeAttributeReadMostly;
835  return cudaMemRangeAttributePreferredLocation;
837  return cudaMemRangeAttributeAccessedBy;
839  return cudaMemRangeAttributeLastPrefetchLocation;
840  default:
841  return cudaMemRangeAttributeReadMostly;
842  }
843 }
844 
845 inline static enum cudaMemoryAdvise hipMemoryAdviseTocudaMemoryAdvise(
846  hipMemoryAdvise kind) {
847  switch (kind) {
849  return cudaMemAdviseSetReadMostly;
851  return cudaMemAdviseUnsetReadMostly ;
853  return cudaMemAdviseSetPreferredLocation;
855  return cudaMemAdviseUnsetPreferredLocation;
857  return cudaMemAdviseSetAccessedBy;
859  return cudaMemAdviseUnsetAccessedBy;
860  default:
861  return cudaMemAdviseSetReadMostly;
862  }
863 }
864 
865 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
866  hipTextureFilterMode kind) {
867  switch (kind) {
868  case hipFilterModePoint:
869  return cudaFilterModePoint;
870  case hipFilterModeLinear:
871  return cudaFilterModeLinear;
872  default:
873  return cudaFilterModePoint;
874  }
875 }
876 
877 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
878  switch (kind) {
879  case hipReadModeElementType:
880  return cudaReadModeElementType;
881  case hipReadModeNormalizedFloat:
882  return cudaReadModeNormalizedFloat;
883  default:
884  return cudaReadModeElementType;
885  }
886 }
887 
888 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
889  hipChannelFormatKind kind) {
890  switch (kind) {
891  case hipChannelFormatKindSigned:
892  return cudaChannelFormatKindSigned;
893  case hipChannelFormatKindUnsigned:
894  return cudaChannelFormatKindUnsigned;
895  case hipChannelFormatKindFloat:
896  return cudaChannelFormatKindFloat;
897  case hipChannelFormatKindNone:
898  return cudaChannelFormatKindNone;
899  default:
900  return cudaChannelFormatKindNone;
901  }
902 }
903 
907 #define HIPRT_CB CUDART_CB
908 typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
909 inline static hipError_t hipInit(unsigned int flags) {
910  return hipCUResultTohipError(cuInit(flags));
911 }
912 
913 inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
914 
915 inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
916 
917 inline static hipError_t hipPeekAtLastError() {
918  return hipCUDAErrorTohipError(cudaPeekAtLastError());
919 }
920 
921 inline static hipError_t hipMalloc(void** ptr, size_t size) {
922  return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
923 }
924 
925 inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
926  return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
927 }
928 
929 inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
930  return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
931 }
932 
933 inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
934  return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
935 }
936 
937 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
938 
939 inline static hipError_t hipMallocHost(void** ptr, size_t size)
940  __attribute__((deprecated("use hipHostMalloc instead")));
941 inline static hipError_t hipMallocHost(void** ptr, size_t size) {
942  return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
943 }
944 
945 inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
946  __attribute__((deprecated("use hipHostMalloc instead")));
947 inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
948  return hipCUResultTohipError(cuMemAllocHost(ptr, size));
949 }
950 
951 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
952  __attribute__((deprecated("use hipHostMalloc instead")));
953 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
954  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
955 }
956 
957 inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
958  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
959 }
960 
961 inline static hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice,
962  int device) {
963  return hipCUDAErrorTohipError(cudaMemAdvise(dev_ptr, count,
964  hipMemoryAdviseTocudaMemoryAdvise(advice), device));
965 }
966 
967 inline static hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device,
968  hipStream_t stream __dparm(0)) {
969  return hipCUDAErrorTohipError(cudaMemPrefetchAsync(dev_ptr, count, device, stream));
970 }
971 
972 inline static hipError_t hipMemRangeGetAttribute(void* data, size_t data_size,
973  hipMemRangeAttribute attribute,
974  const void* dev_ptr, size_t count) {
975  return hipCUDAErrorTohipError(cudaMemRangeGetAttribute(data, data_size,
976  hipMemRangeAttributeToCudaMemRangeAttribute(attribute), dev_ptr, count));
977 }
978 
979 inline static hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes,
980  hipMemRangeAttribute* attributes,
981  size_t num_attributes, const void* dev_ptr,
982  size_t count) {
983  return hipCUDAErrorTohipError(cudaMemRangeGetAttributes(data, data_sizes, attributes,
984  num_attributes, dev_ptr, count));
985 }
986 
987 inline static hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr,
988  size_t length __dparm(0),
989  unsigned int flags __dparm(hipMemAttachSingle)) {
990  return hipCUDAErrorTohipError(cudaStreamAttachMemAsync(stream, dev_ptr, length, flags));
991 }
992 
993 inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
994  return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
995 }
996 
997 inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
998  size_t width, size_t height,
999  unsigned int flags __dparm(hipArrayDefault)) {
1000  return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
1001 }
1002 
1003 inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
1004  hipExtent extent, unsigned int flags) {
1005  return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
1006 }
1007 
1008 inline static hipError_t hipFreeArray(hipArray* array) {
1009  return hipCUDAErrorTohipError(cudaFreeArray(array));
1010 }
1011 
1012 inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
1013  return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
1014 }
1015 
1016 inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
1017  return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
1018 }
1019 
1020 inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
1021  return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
1022 }
1023 
1024 inline static hipError_t hipHostUnregister(void* ptr) {
1025  return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
1026 }
1027 
1028 inline static hipError_t hipFreeHost(void* ptr)
1029  __attribute__((deprecated("use hipHostFree instead")));
1030 inline static hipError_t hipFreeHost(void* ptr) {
1031  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
1032 }
1033 
1034 inline static hipError_t hipHostFree(void* ptr) {
1035  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
1036 }
1037 
1038 inline static hipError_t hipSetDevice(int device) {
1039  return hipCUDAErrorTohipError(cudaSetDevice(device));
1040 }
1041 
1042 inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
1043  struct cudaDeviceProp cdprop;
1044  memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
1045  cdprop.major = prop->major;
1046  cdprop.minor = prop->minor;
1047  cdprop.totalGlobalMem = prop->totalGlobalMem;
1048  cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
1049  cdprop.regsPerBlock = prop->regsPerBlock;
1050  cdprop.warpSize = prop->warpSize;
1051  cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
1052  cdprop.clockRate = prop->clockRate;
1053  cdprop.totalConstMem = prop->totalConstMem;
1054  cdprop.multiProcessorCount = prop->multiProcessorCount;
1055  cdprop.l2CacheSize = prop->l2CacheSize;
1056  cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
1057  cdprop.computeMode = prop->computeMode;
1058  cdprop.canMapHostMemory = prop->canMapHostMemory;
1059  cdprop.memoryClockRate = prop->memoryClockRate;
1060  cdprop.memoryBusWidth = prop->memoryBusWidth;
1061  return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
1062 }
1063 
1064 inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
1065  return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
1066 }
1067 
1068 inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
1069  return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
1070 }
1071 
1072 inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
1073  return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
1074 }
1075 
1076 inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
1077  hipStream_t stream) {
1078  return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
1079 }
1080 
1081 inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
1082  hipStream_t stream) {
1083  return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
1084 }
1085 
1086 inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
1087  hipStream_t stream) {
1088  return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
1089 }
1090 
1091 inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
1092  hipMemcpyKind copyKind) {
1093  return hipCUDAErrorTohipError(
1094  cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
1095 }
1096 
1097 
1098 inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
1099  size_t sizeBytes, hipMemcpyKind copyKind,
1100  hipStream_t stream) {
1101  cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1102  hipMemcpyKindToCudaMemcpyKind(copyKind),
1103  stream);
1104 
1105  if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
1106 
1107  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1108 }
1109 
1110 inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
1111  hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
1112  return hipCUDAErrorTohipError(
1113  cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1114 }
1115 
1116 inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
1117  size_t offset __dparm(0),
1118  hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1119  return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1120  hipMemcpyKindToCudaMemcpyKind(copyType)));
1121 }
1122 
1123 inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
1124  size_t sizeBytes, size_t offset,
1125  hipMemcpyKind copyType,
1126  hipStream_t stream __dparm(0)) {
1127  return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1128  symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1129 }
1130 
1131 inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
1132  size_t offset __dparm(0),
1133  hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1134  return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1135  hipMemcpyKindToCudaMemcpyKind(kind)));
1136 }
1137 
1138 inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
1139  size_t sizeBytes, size_t offset,
1140  hipMemcpyKind kind,
1141  hipStream_t stream __dparm(0)) {
1142  return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1143  dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1144 }
1145 
1146 inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
1147  return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1148 }
1149 
1150 inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
1151  return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1152 }
1153 
1154 inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
1155  size_t width, size_t height, hipMemcpyKind kind) {
1156  return hipCUDAErrorTohipError(
1157  cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1158 }
1159 
1160 inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
1161  return hipCUResultTohipError(cuMemcpy2D(pCopy));
1162 }
1163 
1164 inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
1165  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1166 }
1167 
1168 inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) {
1169  return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1170 }
1171 
1172 inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream) {
1173  return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1174 }
1175 
1176 inline static hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) {
1177  return hipCUResultTohipError(cuMemcpy3D(pCopy));
1178 }
1179 
1180 inline static hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
1181  return hipCUResultTohipError(cuMemcpy3DAsync(pCopy, stream));
1182 }
1183 
1184 inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
1185  size_t width, size_t height, hipMemcpyKind kind,
1186  hipStream_t stream) {
1187  return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1188  hipMemcpyKindToCudaMemcpyKind(kind), stream));
1189 }
1190 
1191 inline static hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray* src,
1192  size_t wOffset, size_t hOffset, size_t width,
1193  size_t height, hipMemcpyKind kind) {
1194  return hipCUDAErrorTohipError(cudaMemcpy2DFromArray(dst, dpitch, src, wOffset, hOffset, width,
1195  height,
1196  hipMemcpyKindToCudaMemcpyKind(kind)));
1197 }
1198 
1199 inline static hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray* src,
1200  size_t wOffset, size_t hOffset, size_t width,
1201  size_t height, hipMemcpyKind kind,
1202  hipStream_t stream) {
1203  return hipCUDAErrorTohipError(cudaMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset,
1204  width, height,
1205  hipMemcpyKindToCudaMemcpyKind(kind),
1206  stream));
1207 }
1208 
1209 inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
1210  const void* src, size_t spitch, size_t width,
1211  size_t height, hipMemcpyKind kind) {
1212  return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1213  height, hipMemcpyKindToCudaMemcpyKind(kind)));
1214 }
1215 
1216 inline static hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset,
1217  const void* src, size_t spitch, size_t width,
1218  size_t height, hipMemcpyKind kind,
1219  hipStream_t stream) {
1220  return hipCUDAErrorTohipError(cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch,
1221  width, height,
1222  hipMemcpyKindToCudaMemcpyKind(kind),
1223  stream));
1224 }
1225 
1226 __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
1227  size_t hOffset, const void* src,
1228  size_t count, hipMemcpyKind kind) {
1229  return hipCUDAErrorTohipError(
1230  cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1231 }
1232 
1233 __HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
1234  size_t wOffset, size_t hOffset,
1235  size_t count, hipMemcpyKind kind) {
1236  return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1237  hipMemcpyKindToCudaMemcpyKind(kind)));
1238 }
1239 
1240 inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
1241  size_t count) {
1242  return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1243 }
1244 
1245 inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
1246  size_t count) {
1247  return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1248 }
1249 
1250 inline static hipError_t hipDeviceSynchronize() {
1251  return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1252 }
1253 
1254 inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
1255  return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1256 }
1257 
1258 inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
1259  return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1260 }
1261 
1262 inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
1263  return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1264 }
1265 
1266 inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
1267  return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1268 }
1269 
1270 inline static const char* hipGetErrorString(hipError_t error) {
1271  return cudaGetErrorString(hipErrorToCudaError(error));
1272 }
1273 
1274 inline static const char* hipGetErrorName(hipError_t error) {
1275  return cudaGetErrorName(hipErrorToCudaError(error));
1276 }
1277 
1278 inline static hipError_t hipGetDeviceCount(int* count) {
1279  return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1280 }
1281 
1282 inline static hipError_t hipGetDevice(int* device) {
1283  return hipCUDAErrorTohipError(cudaGetDevice(device));
1284 }
1285 
1286 inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
1287  return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1288 }
1289 
1290 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1291  return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1292 }
1293 
1294 inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
1295  return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1296 }
1297 
1298 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1299  return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1300 }
1301 
1302 inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
1303  unsigned int flags) {
1304  return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1305 }
1306 
1307 inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
1308  return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1309 }
1310 
1311 inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
1312  return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1313 }
1314 
1315 inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
1316  hipStream_t stream __dparm(0)) {
1317  return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1318 }
1319 
1320 inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
1321  hipStream_t stream __dparm(0)) {
1322  return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1323 }
1324 
1325 inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
1326  return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1327 }
1328 
1329 inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
1330  hipStream_t stream __dparm(0)) {
1331  return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1332 }
1333 
1334 inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
1335  return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1336 }
1337 
1338 inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
1339  hipStream_t stream __dparm(0)) {
1340  return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1341 }
1342 
1343 inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
1344  return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1345 }
1346 
1347 inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
1348  return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1349 }
1350 
1351 inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
1352  return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1353 }
1354 
1355 inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
1356  return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1357 }
1358 
1359 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
1360  struct cudaDeviceProp cdprop;
1361  cudaError_t cerror;
1362  cerror = cudaGetDeviceProperties(&cdprop, device);
1363 
1364  strncpy(p_prop->name, cdprop.name, 256);
1365  p_prop->totalGlobalMem = cdprop.totalGlobalMem;
1366  p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
1367  p_prop->regsPerBlock = cdprop.regsPerBlock;
1368  p_prop->warpSize = cdprop.warpSize;
1369  p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
1370  for (int i = 0; i < 3; i++) {
1371  p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
1372  p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
1373  }
1374  p_prop->clockRate = cdprop.clockRate;
1375  p_prop->memoryClockRate = cdprop.memoryClockRate;
1376  p_prop->memoryBusWidth = cdprop.memoryBusWidth;
1377  p_prop->totalConstMem = cdprop.totalConstMem;
1378  p_prop->major = cdprop.major;
1379  p_prop->minor = cdprop.minor;
1380  p_prop->multiProcessorCount = cdprop.multiProcessorCount;
1381  p_prop->l2CacheSize = cdprop.l2CacheSize;
1382  p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
1383  p_prop->computeMode = cdprop.computeMode;
1384  p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
1385 
1386  int ccVers = p_prop->major * 100 + p_prop->minor * 10;
1387  p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
1388  p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
1389  p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
1390  p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
1391  p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
1392  p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
1393  p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
1394  p_prop->arch.hasDoubles = (ccVers >= 130);
1395  p_prop->arch.hasWarpVote = (ccVers >= 120);
1396  p_prop->arch.hasWarpBallot = (ccVers >= 200);
1397  p_prop->arch.hasWarpShuffle = (ccVers >= 300);
1398  p_prop->arch.hasFunnelShift = (ccVers >= 350);
1399  p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
1400  p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
1401  p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
1402  p_prop->arch.has3dGrid = (ccVers >= 200);
1403  p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
1404 
1405  p_prop->concurrentKernels = cdprop.concurrentKernels;
1406  p_prop->pciDomainID = cdprop.pciDomainID;
1407  p_prop->pciBusID = cdprop.pciBusID;
1408  p_prop->pciDeviceID = cdprop.pciDeviceID;
1409  p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
1410  p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
1411  p_prop->canMapHostMemory = cdprop.canMapHostMemory;
1412  p_prop->gcnArch = 0; // Not a GCN arch
1413  p_prop->integrated = cdprop.integrated;
1414  p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
1415  p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
1420 
1421  p_prop->maxTexture1D = cdprop.maxTexture1D;
1422  p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
1423  p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
1424  p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
1425  p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
1426  p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
1427 
1428  p_prop->memPitch = cdprop.memPitch;
1429  p_prop->textureAlignment = cdprop.textureAlignment;
1430  p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
1431  p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
1432  p_prop->ECCEnabled = cdprop.ECCEnabled;
1433  p_prop->tccDriver = cdprop.tccDriver;
1434 
1435  return hipCUDAErrorTohipError(cerror);
1436 }
1437 
1438 inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
1439  enum cudaDeviceAttr cdattr;
1440  cudaError_t cerror;
1441 
1442  switch (attr) {
1444  cdattr = cudaDevAttrMaxThreadsPerBlock;
1445  break;
1447  cdattr = cudaDevAttrMaxBlockDimX;
1448  break;
1450  cdattr = cudaDevAttrMaxBlockDimY;
1451  break;
1453  cdattr = cudaDevAttrMaxBlockDimZ;
1454  break;
1456  cdattr = cudaDevAttrMaxGridDimX;
1457  break;
1459  cdattr = cudaDevAttrMaxGridDimY;
1460  break;
1462  cdattr = cudaDevAttrMaxGridDimZ;
1463  break;
1465  cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1466  break;
1468  cdattr = cudaDevAttrTotalConstantMemory;
1469  break;
1471  cdattr = cudaDevAttrWarpSize;
1472  break;
1474  cdattr = cudaDevAttrMaxRegistersPerBlock;
1475  break;
1477  cdattr = cudaDevAttrClockRate;
1478  break;
1480  cdattr = cudaDevAttrMemoryClockRate;
1481  break;
1483  cdattr = cudaDevAttrGlobalMemoryBusWidth;
1484  break;
1486  cdattr = cudaDevAttrMultiProcessorCount;
1487  break;
1489  cdattr = cudaDevAttrComputeMode;
1490  break;
1492  cdattr = cudaDevAttrL2CacheSize;
1493  break;
1495  cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1496  break;
1498  cdattr = cudaDevAttrComputeCapabilityMajor;
1499  break;
1501  cdattr = cudaDevAttrComputeCapabilityMinor;
1502  break;
1504  cdattr = cudaDevAttrConcurrentKernels;
1505  break;
1507  cdattr = cudaDevAttrPciBusId;
1508  break;
1510  cdattr = cudaDevAttrPciDeviceId;
1511  break;
1513  cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1514  break;
1516  cdattr = cudaDevAttrIsMultiGpuBoard;
1517  break;
1519  cdattr = cudaDevAttrIntegrated;
1520  break;
1522  cdattr = cudaDevAttrMaxTexture1DWidth;
1523  break;
1525  cdattr = cudaDevAttrMaxTexture2DWidth;
1526  break;
1528  cdattr = cudaDevAttrMaxTexture2DHeight;
1529  break;
1531  cdattr = cudaDevAttrMaxTexture3DWidth;
1532  break;
1534  cdattr = cudaDevAttrMaxTexture3DHeight;
1535  break;
1537  cdattr = cudaDevAttrMaxTexture3DDepth;
1538  break;
1540  cdattr = cudaDevAttrMaxPitch;
1541  break;
1543  cdattr = cudaDevAttrTextureAlignment;
1544  break;
1546  cdattr = cudaDevAttrTexturePitchAlignment;
1547  break;
1549  cdattr = cudaDevAttrKernelExecTimeout;
1550  break;
1552  cdattr = cudaDevAttrCanMapHostMemory;
1553  break;
1555  cdattr = cudaDevAttrEccEnabled;
1556  break;
1558  cdattr = cudaDevAttrCooperativeLaunch;
1559  break;
1561  cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1562  break;
1564  cdattr = cudaDevAttrConcurrentManagedAccess;
1565  break;
1567  cdattr = cudaDevAttrManagedMemory;
1568  break;
1570  cdattr = cudaDevAttrPageableMemoryAccessUsesHostPageTables;
1571  break;
1573  cdattr = cudaDevAttrPageableMemoryAccess;
1574  break;
1576  cdattr = cudaDevAttrDirectManagedMemAccessFromHost;
1577  break;
1578  default:
1579  return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1580  }
1581 
1582  cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1583 
1584  return hipCUDAErrorTohipError(cerror);
1585 }
1586 
1587 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1588  const void* func,
1589  int blockSize,
1590  size_t dynamicSMemSize) {
1591  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1592  blockSize, dynamicSMemSize));
1593 }
1594 
1595 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1596  const void* func,
1597  int blockSize,
1598  size_t dynamicSMemSize,
1599  unsigned int flags) {
1600  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1601  blockSize, dynamicSMemSize, flags));
1602 }
1603 
1604 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1605  hipFunction_t f,
1606  int blockSize,
1607  size_t dynamicSMemSize ){
1608  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1609  blockSize, dynamicSMemSize));
1610 }
1611 
1612 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1613  hipFunction_t f,
1614  int blockSize,
1615  size_t dynamicSMemSize,
1616  unsigned int flags ) {
1617  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1618  blockSize, dynamicSMemSize, flags));
1619 }
1620 
1621 //TODO - Match CUoccupancyB2DSize
1622 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
1623  hipFunction_t f, size_t dynSharedMemPerBlk,
1624  int blockSizeLimit){
1625  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1626  dynSharedMemPerBlk, blockSizeLimit));
1627 }
1628 
1629 //TODO - Match CUoccupancyB2DSize
1630 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
1631  hipFunction_t f, size_t dynSharedMemPerBlk,
1632  int blockSizeLimit, unsigned int flags){
1633  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1634  dynSharedMemPerBlk, blockSizeLimit, flags));
1635 }
1636 
1637 inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
1638  struct cudaPointerAttributes cPA;
1639  hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1640  if (err == hipSuccess) {
1641 #if (CUDART_VERSION >= 11000)
1642  auto memType = cPA.type;
1643 #else
1644  unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
1645 #endif
1646  switch (memType) {
1647  case cudaMemoryTypeDevice:
1648  attributes->memoryType = hipMemoryTypeDevice;
1649  break;
1650  case cudaMemoryTypeHost:
1651  attributes->memoryType = hipMemoryTypeHost;
1652  break;
1653  default:
1654  return hipErrorUnknown;
1655  }
1656  attributes->device = cPA.device;
1657  attributes->devicePointer = cPA.devicePointer;
1658  attributes->hostPointer = cPA.hostPointer;
1659  attributes->isManaged = 0;
1660  attributes->allocationFlags = 0;
1661  }
1662  return err;
1663 }
1664 
1665 inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
1666  return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1667 }
1668 
1669 inline static hipError_t hipEventCreate(hipEvent_t* event) {
1670  return hipCUDAErrorTohipError(cudaEventCreate(event));
1671 }
1672 
1673 inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1674  return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1675 }
1676 
1677 inline static hipError_t hipEventSynchronize(hipEvent_t event) {
1678  return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1679 }
1680 
1681 inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
1682  return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1683 }
1684 
1685 inline static hipError_t hipEventDestroy(hipEvent_t event) {
1686  return hipCUDAErrorTohipError(cudaEventDestroy(event));
1687 }
1688 
1689 inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
1690  return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1691 }
1692 
1693 inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
1694  return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1695 }
1696 
1697 inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
1698  return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1699 }
1700 
1701 inline static hipError_t hipStreamCreate(hipStream_t* stream) {
1702  return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1703 }
1704 
1705 inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
1706  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1707 }
1708 
1709 inline static hipError_t hipStreamDestroy(hipStream_t stream) {
1710  return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1711 }
1712 
1713 inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
1714  return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1715 }
1716 
1717 inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
1718  return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1719 }
1720 
1721 inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
1722  unsigned int flags) {
1723  return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1724 }
1725 
1726 inline static hipError_t hipStreamQuery(hipStream_t stream) {
1727  return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1728 }
1729 
1730 inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
1731  void* userData, unsigned int flags) {
1732  return hipCUDAErrorTohipError(
1733  cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1734 }
1735 
1736 inline static hipError_t hipDriverGetVersion(int* driverVersion) {
1737  cudaError_t err = cudaDriverGetVersion(driverVersion);
1738 
1739  // Override driver version to match version reported on HCC side.
1740  *driverVersion = 4;
1741 
1742  return hipCUDAErrorTohipError(err);
1743 }
1744 
1745 inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
1746  return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1747 }
1748 
1749 inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
1750  return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1751 }
1752 
1753 inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
1754  return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1755 }
1756 
1757 inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
1758  return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1759 }
1760 
1761 inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
1762  return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1763 }
1764 
1765 inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
1766  return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1767 }
1768 
1769 inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
1770  int* active) {
1771  return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1772 }
1773 
1774 inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
1775  return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1776 }
1777 
1778 inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
1779  return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1780 }
1781 
1782 inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
1783  return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1784 }
1785 
1786 inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
1787  return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1788 }
1789 
1790 inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
1791  hipDeviceptr_t dptr) {
1792  return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1793 }
1794 
1795 inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
1796  size_t count) {
1797  return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1798 }
1799 
1800 inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
1801  int srcDevice, size_t count,
1802  hipStream_t stream __dparm(0)) {
1803  return hipCUDAErrorTohipError(
1804  cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1805 }
1806 
1807 // Profile APIs:
1808 inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
1809 
1810 inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
1811 
1812 inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
1813  return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1814 }
1815 
1816 inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
1817  return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1818 }
1819 
1820 inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
1821  return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1822 }
1823 
1824 inline static hipError_t hipEventQuery(hipEvent_t event) {
1825  return hipCUDAErrorTohipError(cudaEventQuery(event));
1826 }
1827 
1828 inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
1829  return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1830 }
1831 
1832 inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
1833  return hipCUResultTohipError(cuCtxDestroy(ctx));
1834 }
1835 
1836 inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
1837  return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1838 }
1839 
1840 inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
1841  return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1842 }
1843 
1844 inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
1845  return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1846 }
1847 
1848 inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
1849  return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1850 }
1851 
1852 inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
1853  return hipCUResultTohipError(cuCtxGetDevice(device));
1854 }
1855 
1856 inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
1857  return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
1858 }
1859 
1860 inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
1861  return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1862 }
1863 
1864 inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
1865  return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1866 }
1867 
1868 inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
1869  return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1870 }
1871 
1872 inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
1873  return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1874 }
1875 
1876 inline static hipError_t hipCtxSynchronize(void) {
1877  return hipCUResultTohipError(cuCtxSynchronize());
1878 }
1879 
1880 inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
1881  return hipCUResultTohipError(cuCtxGetFlags(flags));
1882 }
1883 
1884 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1885  return hipCUResultTohipError(cuCtxDetach(ctx));
1886 }
1887 
1888 inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
1889  return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1890 }
1891 
1892 inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
1893  return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1894 }
1895 
1896 inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
1897  return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1898 }
1899 
1900 inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
1901  int srcDevice, int dstDevice) {
1902  return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1903 }
1904 
1905 inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
1906  return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1907 }
1908 
1909 inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
1910  return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1911 }
1912 
1913 inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
1914  return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1915 }
1916 
1917 inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
1918  return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1919 }
1920 
1921 inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
1922  return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1923 }
1924 
1925 inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
1926  return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1927 }
1928 
1929 inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
1930  return hipCUResultTohipError(cuModuleLoad(module, fname));
1931 }
1932 
1933 inline static hipError_t hipModuleUnload(hipModule_t hmod) {
1934  return hipCUResultTohipError(cuModuleUnload(hmod));
1935 }
1936 
1937 inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
1938  const char* kname) {
1939  return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
1940 }
1941 
1942 inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){
1943  hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1944 }
1945 
1946 inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
1947  return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1948 }
1949 
1950 inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1951  return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1952 }
1953 
1954 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
1955  const char* name) {
1956  return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1957 }
1958 
1959 inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
1960  return hipCUResultTohipError(cuModuleLoadData(module, image));
1961 }
1962 
1963 inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
1964  unsigned int numOptions, hipJitOption* options,
1965  void** optionValues) {
1966  return hipCUResultTohipError(
1967  cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1968 }
1969 
1970 inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
1971  dim3 dimBlocks, void** args, size_t sharedMemBytes,
1972  hipStream_t stream)
1973 {
1974  return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1975 }
1976 
1977 inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
1978  unsigned int gridDimY, unsigned int gridDimZ,
1979  unsigned int blockDimX, unsigned int blockDimY,
1980  unsigned int blockDimZ, unsigned int sharedMemBytes,
1981  hipStream_t stream, void** kernelParams,
1982  void** extra) {
1983  return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1984  blockDimY, blockDimZ, sharedMemBytes, stream,
1985  kernelParams, extra));
1986 }
1987 
1988 inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
1989  return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1990 }
1991 
1992 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
1993  struct textureReference* tex,
1994  const void* devPtr,
1995  const hipChannelFormatDesc* desc,
1996  size_t size __dparm(UINT_MAX)) {
1997  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1998 }
1999 
2000 __HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
2001  size_t* offset, struct textureReference* tex, const void* devPtr,
2002  const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
2003  return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
2004 }
2005 
2006 inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
2007  hipChannelFormatKind f) {
2008  return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
2009 }
2010 
2011 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
2012  const hipResourceDesc* pResDesc,
2013  const hipTextureDesc* pTexDesc,
2014  const hipResourceViewDesc* pResViewDesc) {
2015  return hipCUDAErrorTohipError(
2016  cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
2017 }
2018 
2019 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
2020  return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
2021 }
2022 
2023 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
2024  const hipResourceDesc* pResDesc) {
2025  return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
2026 }
2027 
2028 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
2029  return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
2030 }
2031 
2032 inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
2033  hipTextureObject_t textureObject) {
2034  return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
2035 }
2036 
2037 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
2038  size_t* offset, const struct textureReference* texref) {
2039  return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
2040 }
2041 
2042 inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
2043 {
2044  return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
2045 }
2046 
2047 inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
2048  void** kernelParams, unsigned int sharedMemBytes,
2049  hipStream_t stream) {
2050  return hipCUDAErrorTohipError(
2051  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2052 }
2053 
2054 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
2055  int numDevices, unsigned int flags) {
2056  return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
2057 }
2058 
2059 #ifdef __cplusplus
2060 }
2061 #endif
2062 
2063 #ifdef __CUDACC__
2064 
2065 template<class T>
2066 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
2067  T func,
2068  int blockSize,
2069  size_t dynamicSMemSize) {
2070  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
2071  blockSize, dynamicSMemSize));
2072 }
2073 
2074 template <class T>
2075 inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
2076  size_t dynamicSMemSize = 0,
2077  int blockSizeLimit = 0) {
2078  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
2079  dynamicSMemSize, blockSizeLimit));
2080 }
2081 
2082 template <class T>
2083 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
2084  size_t dynamicSMemSize = 0,
2085  int blockSizeLimit = 0, unsigned int flags = 0) {
2086  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
2087  dynamicSMemSize, blockSizeLimit, flags));
2088 }
2089 
2090 template <class T>
2091 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
2092  int blockSize, size_t dynamicSMemSize,unsigned int flags) {
2093  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
2094  blockSize, dynamicSMemSize, flags));
2095 }
2096 
2097 template <class T, int dim, enum cudaTextureReadMode readMode>
2098 inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
2099  const void* devPtr, size_t size = UINT_MAX) {
2100  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
2101 }
2102 
2103 template <class T, int dim, enum cudaTextureReadMode readMode>
2104 inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
2105  const void* devPtr, const hipChannelFormatDesc& desc,
2106  size_t size = UINT_MAX) {
2107  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
2108 }
2109 
2110 template <class T, int dim, enum cudaTextureReadMode readMode>
2111 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
2112  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
2113 }
2114 
2115 template <class T, int dim, enum cudaTextureReadMode readMode>
2116 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
2117  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
2118 }
2119 
2120 template <class T, int dim, enum cudaTextureReadMode readMode>
2121 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
2122  struct texture<T, dim, readMode>& tex, hipArray_const_t array,
2123  const hipChannelFormatDesc& desc) {
2124  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
2125 }
2126 
2127 template <class T, int dim, enum cudaTextureReadMode readMode>
2128 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
2129  struct texture<T, dim, readMode>* tex, hipArray_const_t array,
2130  const hipChannelFormatDesc* desc) {
2131  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
2132 }
2133 
2134 template <class T, int dim, enum cudaTextureReadMode readMode>
2135 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
2136  struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
2137  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
2138 }
2139 
2140 template <class T>
2141 inline static hipChannelFormatDesc hipCreateChannelDesc() {
2142  return cudaCreateChannelDesc<T>();
2143 }
2144 
2145 template <class T>
2146 inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
2147  void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
2148  return hipCUDAErrorTohipError(
2149  cudaLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2150 }
2151 
2152 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){
2153  return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2154 }
2155 
2156 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2157  return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2158 }
2159 
2160 inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){
2161  return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2162 }
2163 
2164 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){
2165  return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2166 }
2167 
2168 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){
2169  return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2170 }
2171 
2172 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){
2173  return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2174 }
2175 
2176 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){
2177  return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2178 }
2179 
2180 inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){
2181  return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2182 }
2183 
2184 inline static hipError_t hipArrayDestroy(hiparray hArray){
2185  return hipCUResultTohipError(cuArrayDestroy(hArray));
2186 }
2187 
2188 inline static hipError_t hipArray3DCreate(hiparray* pHandle,
2189  const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray){
2190  return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
2191 }
2192 
2193 #endif //__CUDACC__
2194 
2195 #endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H
hipFuncAttributes
Definition: hip_runtime_api.h:109
hipPointerGetAttributes
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
hipDeviceAttributeMaxPitch
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:368
hipMemset3DAsync
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipMemAdviseUnsetAccessedBy
@ hipMemAdviseUnsetAccessedBy
Definition: hip_runtime_api.h:240
hipMemcpy3D
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
hipIpcOpenMemHandle
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
hipCtxEnablePeerAccess
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
hipDeviceProp_t::regsPerBlock
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipMallocPitch
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
hipSetDevice
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
hipDeviceAttributeManagedMemory
@ hipDeviceAttributeManagedMemory
Device supports allocating managed memory on this system.
Definition: hip_runtime_api.h:384
hipDeviceGetP2PAttribute
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipMemsetD16Async
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipMemcpy2DFromArrayAsync
hipError_t hipMemcpy2DFromArrayAsync(void *dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceAttributeMemoryBusWidth
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:339
hipDeviceAttributePageableMemoryAccessUsesHostPageTables
@ hipDeviceAttributePageableMemoryAccessUsesHostPageTables
Definition: hip_runtime_api.h:391
hipGetErrorString
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
hipMemRangeGetAttributes
hipError_t hipMemRangeGetAttributes(void **data, size_t *data_sizes, hipMemRangeAttribute *attributes, size_t num_attributes, const void *dev_ptr, size_t count)
Query attributes of a given memory range in AMD HMM.
hipGetDeviceFlags
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipDeviceGetByPCIBusId
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
hipErrorInvalidMemcpyDirection
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:222
hipMalloc3DArray
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
hipDeviceArch_t::hasGlobalInt64Atomics
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
hipDeviceProp_t::minor
int minor
Definition: hip_runtime_api.h:100
hipDeviceAttributeMaxBlockDimX
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:323
hipErrorInvalidDevicePointer
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:221
hipChooseDevice
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
hipMemcpy2DAsync
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipMemsetD32
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
hipDeviceProp_t::texturePitchAlignment
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:130
hipDeviceAttributeMaxGridDimX
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:326
hipDeviceArch_t::hasThreadFenceSystem
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipStreamCreate
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
hipDeviceGetStreamPriorityRange
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
hipIpcEventHandle_st
Definition: hip_runtime_api.h:101
hipDeviceProp_t::maxTexture3D
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:125
hipStreamCreateWithPriority
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
hipMemAdviseUnsetReadMostly
@ hipMemAdviseUnsetReadMostly
Undo the effect of hipMemAdviseSetReadMostly.
Definition: hip_runtime_api.h:234
hipFuncCache_t
hipFuncCache_t
Definition: hip_runtime_api.h:296
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:138
hipPeekAtLastError
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipModuleGetTexRef
hipError_t hipModuleGetTexRef(textureReference **texRef, hipModule_t hmod, const char *name)
returns the handle of the texture reference with the name from the module.
hipMemcpy3DAsync
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceGetPCIBusId
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
hipHostGetFlags
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
hipErrorHostMemoryNotRegistered
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:274
hipErrorRuntimeOther
hipErrorRuntimeOther
Definition: hip_runtime_api.h:309
hipDeviceAttributeClockRate
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:337
hipMemGetAddressRange
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
hipDeviceAttributeDirectManagedMemAccessFromHost
@ hipDeviceAttributeDirectManagedMemAccessFromHost
Definition: hip_runtime_api.h:385
hipSurfaceObject_t
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipStreamWaitEvent
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
hipDeviceAttributeMaxGridDimZ
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:328
hipGetDevice
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
hipMallocArray
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipCtxSetCurrent
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
hipMemcpyToArray
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
HIP_MEMCPY3D
Definition: driver_types.h:406
hipMemoryTypeDevice
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:158
hipDeviceAttributeMaxRegistersPerBlock
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:333
hipMemcpyDtoDAsync
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
hipErrorNoDevice
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:227
hipDeviceAttributeComputeCapabilityMinor
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:347
hipMemcpy2DFromArray
hipError_t hipMemcpy2DFromArray(void *dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipDeviceProp_t::l2CacheSize
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDevicePrimaryCtxRelease
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
hipDeviceProp_t::textureAlignment
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:129
hipHostMalloc
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
hipDeviceAttributeKernelExecTimeout
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:371
hipDeviceAttributeL2CacheSize
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:342
hipMemRangeGetAttribute
hipError_t hipMemRangeGetAttribute(void *data, size_t data_size, hipMemRangeAttribute attribute, const void *dev_ptr, size_t count)
Query an attribute of a given memory range in AMD HMM.
hipDeviceGetName
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
hipDeviceAttributeMaxTexture3DWidth
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:361
hipDeviceArch_t::hasSurfaceFuncs
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
hipDeviceAttributeIntegrated
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:355
hipDeviceProp_t::isMultiGpuBoard
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipMemcpyParam2DAsync
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipMemAdvise
hipError_t hipMemAdvise(const void *dev_ptr, size_t count, hipMemoryAdvise advice, int device)
Advise about the usage of a given memory range to AMD HMM.
hipDeviceAttributeMaxGridDimY
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:327
hipMemoryTypeHost
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:157
hipDeviceEnablePeerAccess
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
hipCtxSetCacheConfig
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipErrorInvalidContext
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:230
hipDeviceArch_t::hasSharedInt64Atomics
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
hipDeviceProp_t::computeMode
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
hipCtxPushCurrent
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
hipDeviceAttributeIsMultiGpuBoard
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:354
hipSharedMemConfig
hipSharedMemConfig
Definition: hip_runtime_api.h:306
hipDrvMemcpy3D
hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D *pCopy)
Copies data between host and device.
hipDeviceProp_t::clockRate
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:267
hipDeviceComputeCapability
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
hipStreamCallback_t
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:1236
hipDeviceArch_t::hasDynamicParallelism
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
hipMemoryAdvise
hipMemoryAdvise
Definition: hip_runtime_api.h:231
hip_Memcpy2D
Definition: driver_types.h:98
hipDeviceProp_t::canMapHostMemory
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
hipDeviceProp_t::sharedMemPerBlock
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipModuleOccupancyMaxPotentialBlockSize
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipIpcCloseMemHandle
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
hipDeviceAttributeConcurrentManagedAccess
@ hipDeviceAttributeConcurrentManagedAccess
Definition: hip_runtime_api.h:387
hipDevicePrimaryCtxGetState
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
hipDeviceAttributeCooperativeMultiDeviceLaunch
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:357
hipLaunchCooperativeKernelMultiDevice
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams *launchParamsList, int numDevices, unsigned int flags)
Launches kernels on multiple devices where thread blocks can cooperate and synchronize as they execut...
hipDeviceProp_t::maxThreadsPerMultiProcessor
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipDeviceSetCacheConfig
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
hipDeviceProp_t::major
int major
Definition: hip_runtime_api.h:97
hipDeviceAttributeMaxSharedMemoryPerBlock
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:329
hipMemcpyAtoH
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
hipGetDeviceCount
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
hipSuccess
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:204
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
hipHostUnregister
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
hipStreamGetFlags
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
hipMemsetD8Async
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceAttributeMaxThreadsPerBlock
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:322
hipDeviceProp_t::gcnArch
int gcnArch
DEPRECATED: use gcnArchName instead.
Definition: hip_runtime_api.h:117
hipStreamSynchronize
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
hipGetErrorName
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
hipDeviceProp_t::kernelExecTimeoutEnabled
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:131
hipDeviceGet
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
hipMemcpyDtoD
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
hipDeviceProp_t::maxTexture1D
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:123
hipMemcpy3DParms
Definition: driver_types.h:395
hipDeviceAttributeMaxBlockDimZ
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:325
hipIpcGetMemHandle
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
hipMemcpyHtoD
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
hipDriverGetVersion
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
hipDeviceArch_t::hasDoubles
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:247
hipDeviceProp_t::maxThreadsPerBlock
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipCtxGetFlags
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
hipDeviceAttributeMaxBlockDimY
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:324
hipMemcpy2DToArray
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipMemAllocPitch
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
hipDeviceProp_t
Definition: hip_runtime_api.h:84
hipMemAllocHost
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:947
hipMallocHost
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:941
hipDeviceAttributeMaxTexture2DHeight
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:360
hipDeviceArch_t::hasSharedInt32Atomics
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipErrorInvalidValue
hipErrorInvalidValue
Definition: hip_runtime_api.h:205
hipDeviceProp_t::memPitch
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:128
hipMemsetD32Async
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipDeviceProp_t::pciBusID
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipRuntimeGetVersion
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
hipDeviceAttributeComputeCapabilityMajor
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:346
hipLaunchCooperativeKernel
hipError_t hipLaunchCooperativeKernel(const void *f, dim3 gridDim, dim3 blockDimX, void **kernelParams, unsigned int sharedMemBytes, hipStream_t stream)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipEventQuery
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
hipDeviceAttributeMaxTexture3DDepth
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:363
hipErrorRuntimeMemory
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:307
hipDeviceAttributeMaxThreadsPerMultiProcessor
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:344
hipStreamGetPriority
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipDeviceProp_t::arch
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipCtxGetApiVersion
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
hipEventSynchronize
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
hipCtxCreate
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
hipHostFree
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
hipDeviceAttributePciBusId
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:350
hipMemsetD16
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceProp_t::tccDriver
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:133
hipMemRangeAttributeAccessedBy
@ hipMemRangeAttributeAccessedBy
Definition: hip_runtime_api.h:253
hipDeviceGetLimit
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
hipMalloc
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
hipIpcMemHandle_st
Definition: hip_runtime_api.h:97
hipEventElapsedTime
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
hipGetLastError
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
hipInit
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
hipDeviceAttributeTexturePitchAlignment
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:370
hipDeviceAttributeWarpSize
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:332
hipDeviceArch_t::hasGlobalInt32Atomics
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
hipFuncSetCacheConfig
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
hipCtxPopCurrent
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
hipArray
Definition: driver_types.h:84
hipDeviceArch_t::hasSyncThreadsExt
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipErrorInvalidDevice
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:228
hipDeviceArch_t::hasFunnelShift
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipDeviceAttributeMaxTexture3DHeight
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:362
hipDeviceAttributeMemoryClockRate
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:338
hipErrorNotReady
hipErrorNotReady
Definition: hip_runtime_api.h:258
hipHostGetDevicePointer
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipMemGetInfo
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
hipEventDestroy
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
hipDeviceSetSharedMemConfig
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
hipDeviceReset
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
hipDeviceProp_t::maxGridSize
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
hipDeviceAttributeComputeMode
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:341
hipSetDeviceFlags
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipCtxGetCurrent
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
hipDeviceAttributePciDeviceId
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:351
hipFuncGetAttributes
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
HIP_ARRAY3D_DESCRIPTOR
Definition: driver_types.h:75
hipFuncGetAttribute
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
hipDeviceProp_t::maxSharedMemoryPerMultiProcessor
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
hipDeviceProp_t::clockInstructionRate
int clockInstructionRate
Definition: hip_runtime_api.h:107
dim3
Definition: hip_runtime_api.h:318
hipStreamQuery
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
hipStreamAttachMemAsync
hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t *dev_ptr, size_t length __dparm(0), unsigned int flags __dparm(hipMemAttachSingle))
Attach memory to a stream asynchronously in AMD HMM.
hipMemcpy2DToArrayAsync
hipError_t hipMemcpy2DToArrayAsync(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipDevicePrimaryCtxSetFlags
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
hipPointerAttribute_t
Definition: hip_runtime_api.h:169
hipDeviceAttributeTotalConstantMemory
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:331
hipFree
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
hipDeviceArch_t::hasWarpShuffle
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
hipArrayDefault
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:203
hipDevicePrimaryCtxRetain
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
hipCtxSynchronize
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
hipFreeHost
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:1030
hipMemcpyHtoA
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
hipDeviceProp_t::memoryBusWidth
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipStreamAddCallback
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
hipDeviceArch_t::hasWarpVote
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
hipDeviceProp_t::name
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipMemcpyDtoHAsync
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
hipDeviceArch_t::hasGlobalFloatAtomicExch
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
hipDeviceProp_t::concurrentKernels
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
hipMemRangeAttributeReadMostly
@ hipMemRangeAttributeReadMostly
Definition: hip_runtime_api.h:250
hipDeviceArch_t::hasWarpBallot
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
hipMemRangeAttributeLastPrefetchLocation
@ hipMemRangeAttributeLastPrefetchLocation
The last location to which the range was prefetched.
Definition: hip_runtime_api.h:255
hipDeviceProp_t::totalGlobalMem
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipDeviceAttributeTextureAlignment
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:369
hipEventRecord
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
hipDrvMemcpy3DAsync
hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D *pCopy, hipStream_t stream)
Copies data between host and device asynchronously.
hipMemcpy2D
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
hipExtent
Definition: driver_types.h:382
hipPitchedPtr
Definition: driver_types.h:375
hipMemAdviseSetReadMostly
@ hipMemAdviseSetReadMostly
Definition: hip_runtime_api.h:232
hipMemset2D
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
hipMemset3D
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipStreamCreateWithFlags
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
hipDeviceGetAttribute
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
hipMemcpyFromArray
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
hipDeviceAttributeCanMapHostMemory
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:372
hipDeviceProp_t::maxThreadsDim
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
hipMemcpyPeerAsync
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipMemcpyHtoDAsync
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
hipDeviceProp_t::cooperativeMultiDeviceLaunch
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipMemcpyDtoH
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
hipDeviceArch_t::has3dGrid
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipDeviceGetCacheConfig
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipMemcpyPeer
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
hipDeviceAttributeMaxTexture1DWidth
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:358
hipDeviceAttributeCooperativeLaunch
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:356
hipModuleLoadDataEx
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
hipDeviceAttributeMultiprocessorCount
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:340
hipDeviceProp_t::pciDeviceID
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipGetDeviceProperties
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
hipMemcpy
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
hipDeviceProp_t::memoryClockRate
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipEventCreateWithFlags
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
hipErrorCooperativeLaunchTooLarge
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:278
hipDeviceProp_t::warpSize
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipDeviceTotalMem
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
hipFreeArray
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
hipMallocManaged
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipErrorAssert
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:271
textureReference
Definition: texture_types.h:74
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:134
hipCtxGetSharedMemConfig
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:136
hipDeviceCanAccessPeer
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
hipModuleGetFunction
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
hipDeviceArch_t::hasFloatAtomicAdd
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipMemAdviseUnsetPreferredLocation
@ hipMemAdviseUnsetPreferredLocation
Clear the preferred location for the data.
Definition: hip_runtime_api.h:237
hipMemPrefetchAsync
hipError_t hipMemPrefetchAsync(const void *dev_ptr, size_t count, int device, hipStream_t stream __dparm(0))
Prefetches memory to the specified destination device using AMD HMM.
hipCtxDisablePeerAccess
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
hipDeviceProp_t::cooperativeLaunch
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
hipMemAdviseSetPreferredLocation
@ hipMemAdviseSetPreferredLocation
Definition: hip_runtime_api.h:235
hipDeviceArch_t::hasSharedFloatAtomicExch
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
hipTextureDesc
Definition: texture_types.h:95
hipResourceViewDesc
Definition: driver_types.h:334
hipDeviceProp_t::multiProcessorCount
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
hipCtxSetSharedMemConfig
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
hipDeviceProp_t::integrated
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipMemsetD8
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipMemset2DAsync
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipDeviceProp_t::ECCEnabled
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:132
HIP_ARRAY_DESCRIPTOR
Definition: driver_types.h:68
hipCtxGetDevice
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
hipDeviceProp_t::totalConstMem
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipDeviceProp_t::maxTexture2D
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:124
hipLaunchParams_t
Definition: hip_runtime_api.h:327
hipMemRangeAttributePreferredLocation
@ hipMemRangeAttributePreferredLocation
The preferred location of the range.
Definition: hip_runtime_api.h:252
hipErrorHostMemoryAlreadyRegistered
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:272
hipFuncAttribute
hipFuncAttribute
Definition: hip_runtime_api.h:287
hipDeviceAttribute_t
hipDeviceAttribute_t
Definition: hip_runtime_api.h:321
hipFuncSetSharedMemConfig
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
hipResourceDesc
Definition: driver_types.h:273
hipErrorLaunchFailure
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:276
hipDeviceSynchronize
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
hipCtxGetCacheConfig
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
hipCtxDestroy
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
hipModuleLaunchKernel
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipDeviceAttributeConcurrentKernels
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:348
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:140
hipProfilerStart
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
hipDeviceGetSharedMemConfig
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
hipErrorNotSupported
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:282
hipMemcpyAsync
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copy data from src to dst asynchronously.
hipErrorLaunchOutOfResources
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:263
hipOccupancyMaxPotentialBlockSize
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipStreamDestroy
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
hipHostRegister
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
hipModuleLoad
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
hipProfilerStop
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
hipEventCreate
hipError_t hipEventCreate(hipEvent_t *event)
hipDeviceAttributePageableMemoryAccess
@ hipDeviceAttributePageableMemoryAccess
Definition: hip_runtime_api.h:389
hipMemsetAsync
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipDeviceAttributeMaxTexture2DWidth
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:359
hipDeviceProp_t::pciDomainID
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipModuleLoadData
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
hipMemAttachSingle
#define hipMemAttachSingle
the associated device
Definition: hip_runtime_api.h:174
hipMemcpyParam2D
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
hipHostAlloc
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:953
hipMemset
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceDisablePeerAccess
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
hipDeviceAttributeEccEnabled
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:373
hipFuncSetAttribute
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:352
hipDevicePrimaryCtxReset
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
hipChannelFormatDesc
Definition: driver_types.h:44
hipModuleUnload
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
hipMemAdviseSetAccessedBy
@ hipMemAdviseSetAccessedBy
Definition: hip_runtime_api.h:238
hipMemRangeAttribute
hipMemRangeAttribute
Definition: hip_runtime_api.h:249
hipErrorPeerAccessAlreadyEnabled
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:265