HIP: Heterogenous-computing Interface for Portability
hip_runtime_api.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
24 #define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
25 
26 #include <cuda_runtime_api.h>
27 #include <cuda.h>
28 #include <cuda_profiler_api.h>
29 #include <cuda_fp16.h>
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 #ifdef __cplusplus
36 #define __dparm(x) = x
37 #else
38 #define __dparm(x)
39 #endif
40 
41 // Add Deprecated Support for CUDA Mapped HIP APIs
42 #if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
43 #define __HIP_DEPRECATED
44 #elif defined(_MSC_VER)
45 #define __HIP_DEPRECATED __declspec(deprecated)
46 #elif defined(__GNUC__)
47 #define __HIP_DEPRECATED __attribute__((deprecated))
48 #else
49 #define __HIP_DEPRECATED
50 #endif
51 
52 
53 // TODO -move to include/hip_runtime_api.h as a common implementation.
58 typedef enum hipMemcpyKind {
59  hipMemcpyHostToHost,
60  hipMemcpyHostToDevice,
61  hipMemcpyDeviceToHost,
62  hipMemcpyDeviceToDevice,
63  hipMemcpyDefault
64 } hipMemcpyKind;
65 
66 // hipDataType
67 #define hipDataType cudaDataType
68 #define HIP_R_16F CUDA_R_16F
69 #define HIP_R_32F CUDA_R_32F
70 #define HIP_R_64F CUDA_R_64F
71 #define HIP_C_16F CUDA_C_16F
72 #define HIP_C_32F CUDA_C_32F
73 #define HIP_C_64F CUDA_C_64F
74 
75 // hipLibraryPropertyType
76 #define hipLibraryPropertyType libraryPropertyType
77 #define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
78 #define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
79 #define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
80 
81 #define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
82 
83 //hipArray_Format
84 #define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
85 #define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
86 #define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
87 #define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
88 #define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
89 #define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
90 #define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
91 #define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
92 
93 // hipArray_Format
94 #define hipArray_Format CUarray_format
95 
96 inline static CUarray_format hipArray_FormatToCUarray_format(
97  hipArray_Format format) {
98  switch (format) {
99  case HIP_AD_FORMAT_UNSIGNED_INT8:
100  return CU_AD_FORMAT_UNSIGNED_INT8;
101  case HIP_AD_FORMAT_UNSIGNED_INT16:
102  return CU_AD_FORMAT_UNSIGNED_INT16;
103  case HIP_AD_FORMAT_UNSIGNED_INT32:
104  return CU_AD_FORMAT_UNSIGNED_INT32;
105  case HIP_AD_FORMAT_SIGNED_INT8:
106  return CU_AD_FORMAT_SIGNED_INT8;
107  case HIP_AD_FORMAT_SIGNED_INT16:
108  return CU_AD_FORMAT_SIGNED_INT16;
109  case HIP_AD_FORMAT_SIGNED_INT32:
110  return CU_AD_FORMAT_SIGNED_INT32;
111  case HIP_AD_FORMAT_HALF:
112  return CU_AD_FORMAT_HALF;
113  case HIP_AD_FORMAT_FLOAT:
114  return CU_AD_FORMAT_FLOAT;
115  default:
116  return CU_AD_FORMAT_UNSIGNED_INT8;
117  }
118 }
119 
120 #define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
121 #define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
122 #define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
123 #define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
124 
125 // hipAddress_mode
126 #define hipAddress_mode CUaddress_mode
127 
128 inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
129  hipAddress_mode mode) {
130  switch (mode) {
131  case HIP_TR_ADDRESS_MODE_WRAP:
132  return CU_TR_ADDRESS_MODE_WRAP;
133  case HIP_TR_ADDRESS_MODE_CLAMP:
134  return CU_TR_ADDRESS_MODE_CLAMP;
135  case HIP_TR_ADDRESS_MODE_MIRROR:
136  return CU_TR_ADDRESS_MODE_MIRROR;
137  case HIP_TR_ADDRESS_MODE_BORDER:
138  return CU_TR_ADDRESS_MODE_BORDER;
139  default:
140  return CU_TR_ADDRESS_MODE_WRAP;
141  }
142 }
143 
144 #define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
145 #define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
146 
147 // hipFilter_mode
148 #define hipFilter_mode CUfilter_mode
149 
150 inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
151  hipFilter_mode mode) {
152  switch (mode) {
153  case HIP_TR_FILTER_MODE_POINT:
154  return CU_TR_FILTER_MODE_POINT;
155  case HIP_TR_FILTER_MODE_LINEAR:
156  return CU_TR_FILTER_MODE_LINEAR;
157  default:
158  return CU_TR_FILTER_MODE_POINT;
159  }
160 }
161 
162 //hipResourcetype
163 #define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
164 #define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
165 #define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
166 #define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
167 
168 // hipResourcetype
169 #define hipResourcetype CUresourcetype
170 
171 inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
172  hipResourcetype resType) {
173  switch (resType) {
174  case HIP_RESOURCE_TYPE_ARRAY:
175  return CU_RESOURCE_TYPE_ARRAY;
176  case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
177  return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
178  case HIP_RESOURCE_TYPE_LINEAR:
179  return CU_RESOURCE_TYPE_LINEAR;
180  case HIP_RESOURCE_TYPE_PITCH2D:
181  return CU_RESOURCE_TYPE_PITCH2D;
182  default:
183  return CU_RESOURCE_TYPE_ARRAY;
184  }
185 }
186 
187 #define hipTexRef CUtexref
188 #define hiparray CUarray
189 
190 // hipTextureAddressMode
191 typedef enum cudaTextureAddressMode hipTextureAddressMode;
192 #define hipAddressModeWrap cudaAddressModeWrap
193 #define hipAddressModeClamp cudaAddressModeClamp
194 #define hipAddressModeMirror cudaAddressModeMirror
195 #define hipAddressModeBorder cudaAddressModeBorder
196 
197 // hipTextureFilterMode
198 typedef enum cudaTextureFilterMode hipTextureFilterMode;
199 #define hipFilterModePoint cudaFilterModePoint
200 #define hipFilterModeLinear cudaFilterModeLinear
201 
202 // hipTextureReadMode
203 typedef enum cudaTextureReadMode hipTextureReadMode;
204 #define hipReadModeElementType cudaReadModeElementType
205 #define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
206 
207 // hipChannelFormatKind
208 typedef enum cudaChannelFormatKind hipChannelFormatKind;
209 #define hipChannelFormatKindSigned cudaChannelFormatKindSigned
210 #define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
211 #define hipChannelFormatKindFloat cudaChannelFormatKindFloat
212 #define hipChannelFormatKindNone cudaChannelFormatKindNone
213 
214 #define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
215 #define hipBoundaryModeZero cudaBoundaryModeZero
216 #define hipBoundaryModeTrap cudaBoundaryModeTrap
217 #define hipBoundaryModeClamp cudaBoundaryModeClamp
218 
219 // hipFuncCache
220 #define hipFuncCachePreferNone cudaFuncCachePreferNone
221 #define hipFuncCachePreferShared cudaFuncCachePreferShared
222 #define hipFuncCachePreferL1 cudaFuncCachePreferL1
223 #define hipFuncCachePreferEqual cudaFuncCachePreferEqual
224 
225 // hipResourceType
226 #define hipResourceType cudaResourceType
227 #define hipResourceTypeArray cudaResourceTypeArray
228 #define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
229 #define hipResourceTypeLinear cudaResourceTypeLinear
230 #define hipResourceTypePitch2D cudaResourceTypePitch2D
231 //
232 // hipErrorNoDevice.
233 
234 
236 #define hipEventDefault cudaEventDefault
237 #define hipEventBlockingSync cudaEventBlockingSync
238 #define hipEventDisableTiming cudaEventDisableTiming
239 #define hipEventInterprocess cudaEventInterprocess
240 #define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
241 #define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
242 
243 
244 #define hipHostMallocDefault cudaHostAllocDefault
245 #define hipHostMallocPortable cudaHostAllocPortable
246 #define hipHostMallocMapped cudaHostAllocMapped
247 #define hipHostMallocWriteCombined cudaHostAllocWriteCombined
248 #define hipHostMallocCoherent 0x0
249 #define hipHostMallocNonCoherent 0x0
250 
251 #define hipMemAttachGlobal cudaMemAttachGlobal
252 #define hipMemAttachHost cudaMemAttachHost
253 
254 #define hipHostRegisterDefault cudaHostRegisterDefault
255 #define hipHostRegisterPortable cudaHostRegisterPortable
256 #define hipHostRegisterMapped cudaHostRegisterMapped
257 #define hipHostRegisterIoMemory cudaHostRegisterIoMemory
258 
259 #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
260 #define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
261 #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
262 #define hipLimitMallocHeapSize cudaLimitMallocHeapSize
263 #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
264 
265 #define hipOccupancyDefault cudaOccupancyDefault
266 
267 #define hipCooperativeLaunchMultiDeviceNoPreSync \
268  cudaCooperativeLaunchMultiDeviceNoPreSync
269 #define hipCooperativeLaunchMultiDeviceNoPostSync \
270  cudaCooperativeLaunchMultiDeviceNoPostSync
271 
272 
273 // enum CUjit_option redefines
274 #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
275 #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
276 #define hipJitOptionWallTime CU_JIT_WALL_TIME
277 #define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
278 #define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
279 #define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
280 #define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
281 #define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
282 #define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
283 #define hipJitOptionTarget CU_JIT_TARGET
284 #define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
285 #define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
286 #define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
287 #define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
288 #define hipJitOptionCacheMode CU_JIT_CACHE_MODE
289 #define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
290 #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
291 #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
292 
293 typedef cudaEvent_t hipEvent_t;
294 typedef cudaStream_t hipStream_t;
295 typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
296 typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
297 typedef enum cudaLimit hipLimit_t;
298 typedef enum cudaFuncAttribute hipFuncAttribute;
299 typedef enum cudaFuncCache hipFuncCache_t;
300 typedef CUcontext hipCtx_t;
301 typedef enum cudaSharedMemConfig hipSharedMemConfig;
302 typedef CUfunc_cache hipFuncCache;
303 typedef CUjit_option hipJitOption;
304 typedef CUdevice hipDevice_t;
305 typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
306 #define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
307 #define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
308 #define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
309 #define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
310 
311 typedef CUmodule hipModule_t;
312 typedef CUfunction hipFunction_t;
313 typedef CUdeviceptr hipDeviceptr_t;
314 typedef struct cudaArray hipArray;
315 typedef struct cudaArray* hipArray_t;
316 typedef struct cudaArray* hipArray_const_t;
317 typedef struct cudaFuncAttributes hipFuncAttributes;
318 typedef struct cudaLaunchParams hipLaunchParams;
319 #define hipFunction_attribute CUfunction_attribute
320 #define hip_Memcpy2D CUDA_MEMCPY2D
321 #define hipMemcpy3DParms cudaMemcpy3DParms
322 #define hipArrayDefault cudaArrayDefault
323 #define hipArrayLayered cudaArrayLayered
324 #define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
325 #define hipArrayCubemap cudaArrayCubemap
326 #define hipArrayTextureGather cudaArrayTextureGather
327 
328 typedef cudaTextureObject_t hipTextureObject_t;
329 typedef cudaSurfaceObject_t hipSurfaceObject_t;
330 #define hipTextureType1D cudaTextureType1D
331 #define hipTextureType1DLayered cudaTextureType1DLayered
332 #define hipTextureType2D cudaTextureType2D
333 #define hipTextureType2DLayered cudaTextureType2DLayered
334 #define hipTextureType3D cudaTextureType3D
335 #define hipDeviceMapHost cudaDeviceMapHost
336 
337 typedef struct cudaExtent hipExtent;
338 typedef struct cudaPitchedPtr hipPitchedPtr;
339 #define make_hipExtent make_cudaExtent
340 #define make_hipPos make_cudaPos
341 #define make_hipPitchedPtr make_cudaPitchedPtr
342 // Flags that can be used with hipStreamCreateWithFlags
343 #define hipStreamDefault cudaStreamDefault
344 #define hipStreamNonBlocking cudaStreamNonBlocking
345 
346 typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
347 typedef struct cudaResourceDesc hipResourceDesc;
348 typedef struct cudaTextureDesc hipTextureDesc;
349 typedef struct cudaResourceViewDesc hipResourceViewDesc;
350 // adding code for hipmemSharedConfig
351 #define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
352 #define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
353 #define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
354 
355 //Function Attributes
356 #define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
357 #define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
358 #define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
359 #define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
360 #define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
361 #define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
362 #define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
363 #define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
364 #define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
365 #define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
366 #define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
367 
368 #if CUDA_VERSION >= 9000
369 #define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
370 #define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
371 #define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
372 #define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
373 #endif // CUDA_VERSION >= 9000
374 
375 inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
376  switch (cuError) {
377  case cudaSuccess:
378  return hipSuccess;
379  case cudaErrorProfilerDisabled:
380  return hipErrorProfilerDisabled;
381  case cudaErrorProfilerNotInitialized:
382  return hipErrorProfilerNotInitialized;
383  case cudaErrorProfilerAlreadyStarted:
384  return hipErrorProfilerAlreadyStarted;
385  case cudaErrorProfilerAlreadyStopped:
386  return hipErrorProfilerAlreadyStopped;
387  case cudaErrorInsufficientDriver:
388  return hipErrorInsufficientDriver;
389  case cudaErrorUnsupportedLimit:
390  return hipErrorUnsupportedLimit;
391  case cudaErrorPeerAccessUnsupported:
392  return hipErrorPeerAccessUnsupported;
393  case cudaErrorInvalidGraphicsContext:
394  return hipErrorInvalidGraphicsContext;
395  case cudaErrorSharedObjectSymbolNotFound:
396  return hipErrorSharedObjectSymbolNotFound;
397  case cudaErrorSharedObjectInitFailed:
398  return hipErrorSharedObjectInitFailed;
399  case cudaErrorOperatingSystem:
400  return hipErrorOperatingSystem;
401  case cudaErrorSetOnActiveProcess:
402  return hipErrorSetOnActiveProcess;
403  case cudaErrorIllegalAddress:
404  return hipErrorIllegalAddress;
405  case cudaErrorInvalidSymbol:
406  return hipErrorInvalidSymbol;
407  case cudaErrorMissingConfiguration:
408  return hipErrorMissingConfiguration;
409  case cudaErrorMemoryAllocation:
410  return hipErrorOutOfMemory;
411  case cudaErrorInitializationError:
412  return hipErrorNotInitialized;
413  case cudaErrorLaunchFailure:
414  return hipErrorLaunchFailure;
415  case cudaErrorCooperativeLaunchTooLarge:
417  case cudaErrorPriorLaunchFailure:
418  return hipErrorPriorLaunchFailure;
419  case cudaErrorLaunchOutOfResources:
421  case cudaErrorInvalidDeviceFunction:
422  return hipErrorInvalidDeviceFunction;
423  case cudaErrorInvalidConfiguration:
424  return hipErrorInvalidConfiguration;
425  case cudaErrorInvalidDevice:
426  return hipErrorInvalidDevice;
427  case cudaErrorInvalidValue:
428  return hipErrorInvalidValue;
429  case cudaErrorInvalidDevicePointer:
431  case cudaErrorInvalidMemcpyDirection:
433  case cudaErrorInvalidResourceHandle:
434  return hipErrorInvalidHandle;
435  case cudaErrorNotReady:
436  return hipErrorNotReady;
437  case cudaErrorNoDevice:
438  return hipErrorNoDevice;
439  case cudaErrorPeerAccessAlreadyEnabled:
441  case cudaErrorPeerAccessNotEnabled:
443  case cudaErrorHostMemoryAlreadyRegistered:
445  case cudaErrorHostMemoryNotRegistered:
447  case cudaErrorMapBufferObjectFailed:
448  return hipErrorMapFailed;
449  case cudaErrorAssert:
450  return hipErrorAssert;
451  case cudaErrorNotSupported:
452  return hipErrorNotSupported;
453  case cudaErrorCudartUnloading:
454  return hipErrorDeinitialized;
455  case cudaErrorInvalidKernelImage:
456  return hipErrorInvalidImage;
457  case cudaErrorUnmapBufferObjectFailed:
458  return hipErrorUnmapFailed;
459  case cudaErrorNoKernelImageForDevice:
460  return hipErrorNoBinaryForGpu;
461  case cudaErrorECCUncorrectable:
462  return hipErrorECCNotCorrectable;
463  case cudaErrorDeviceAlreadyInUse:
464  return hipErrorContextAlreadyInUse;
465  case cudaErrorInvalidPtx:
467  case cudaErrorLaunchTimeout:
468  return hipErrorLaunchTimeOut;
469 #if CUDA_VERSION >= 10010
470  case cudaErrorInvalidSource:
471  return hipErrorInvalidSource;
472  case cudaErrorFileNotFound:
473  return hipErrorFileNotFound;
474  case cudaErrorSymbolNotFound:
475  return hipErrorNotFound;
476  case cudaErrorArrayIsMapped:
477  return hipErrorArrayIsMapped;
478  case cudaErrorNotMappedAsPointer:
479  return hipErrorNotMappedAsPointer;
480  case cudaErrorNotMappedAsArray:
481  return hipErrorNotMappedAsArray;
482  case cudaErrorNotMapped:
483  return hipErrorNotMapped;
484  case cudaErrorAlreadyAcquired:
485  return hipErrorAlreadyAcquired;
486  case cudaErrorAlreadyMapped:
487  return hipErrorAlreadyMapped;
488 #endif
489 #if CUDA_VERSION >= 10020
490  case cudaErrorDeviceUninitialized:
491  return hipErrorInvalidContext;
492 #endif
493  case cudaErrorUnknown:
494  default:
495  return hipErrorUnknown; // Note - translated error.
496  }
497 }
498 
499 inline static hipError_t hipCUResultTohipError(CUresult cuError) {
500  switch (cuError) {
501  case CUDA_SUCCESS:
502  return hipSuccess;
503  case CUDA_ERROR_OUT_OF_MEMORY:
504  return hipErrorOutOfMemory;
505  case CUDA_ERROR_INVALID_VALUE:
506  return hipErrorInvalidValue;
507  case CUDA_ERROR_INVALID_DEVICE:
508  return hipErrorInvalidDevice;
509  case CUDA_ERROR_DEINITIALIZED:
510  return hipErrorDeinitialized;
511  case CUDA_ERROR_NO_DEVICE:
512  return hipErrorNoDevice;
513  case CUDA_ERROR_INVALID_CONTEXT:
514  return hipErrorInvalidContext;
515  case CUDA_ERROR_NOT_INITIALIZED:
516  return hipErrorNotInitialized;
517  case CUDA_ERROR_INVALID_HANDLE:
518  return hipErrorInvalidHandle;
519  case CUDA_ERROR_MAP_FAILED:
520  return hipErrorMapFailed;
521  case CUDA_ERROR_PROFILER_DISABLED:
522  return hipErrorProfilerDisabled;
523  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
524  return hipErrorProfilerNotInitialized;
525  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
526  return hipErrorProfilerAlreadyStarted;
527  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
528  return hipErrorProfilerAlreadyStopped;
529  case CUDA_ERROR_INVALID_IMAGE:
530  return hipErrorInvalidImage;
531  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
532  return hipErrorContextAlreadyCurrent;
533  case CUDA_ERROR_UNMAP_FAILED:
534  return hipErrorUnmapFailed;
535  case CUDA_ERROR_ARRAY_IS_MAPPED:
536  return hipErrorArrayIsMapped;
537  case CUDA_ERROR_ALREADY_MAPPED:
538  return hipErrorAlreadyMapped;
539  case CUDA_ERROR_NO_BINARY_FOR_GPU:
540  return hipErrorNoBinaryForGpu;
541  case CUDA_ERROR_ALREADY_ACQUIRED:
542  return hipErrorAlreadyAcquired;
543  case CUDA_ERROR_NOT_MAPPED:
544  return hipErrorNotMapped;
545  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
546  return hipErrorNotMappedAsArray;
547  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
548  return hipErrorNotMappedAsPointer;
549  case CUDA_ERROR_ECC_UNCORRECTABLE:
550  return hipErrorECCNotCorrectable;
551  case CUDA_ERROR_UNSUPPORTED_LIMIT:
552  return hipErrorUnsupportedLimit;
553  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
554  return hipErrorContextAlreadyInUse;
555  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
556  return hipErrorPeerAccessUnsupported;
557  case CUDA_ERROR_INVALID_PTX:
559  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
560  return hipErrorInvalidGraphicsContext;
561  case CUDA_ERROR_INVALID_SOURCE:
562  return hipErrorInvalidSource;
563  case CUDA_ERROR_FILE_NOT_FOUND:
564  return hipErrorFileNotFound;
565  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
566  return hipErrorSharedObjectSymbolNotFound;
567  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
568  return hipErrorSharedObjectInitFailed;
569  case CUDA_ERROR_OPERATING_SYSTEM:
570  return hipErrorOperatingSystem;
571  case CUDA_ERROR_NOT_FOUND:
572  return hipErrorNotFound;
573  case CUDA_ERROR_NOT_READY:
574  return hipErrorNotReady;
575  case CUDA_ERROR_ILLEGAL_ADDRESS:
576  return hipErrorIllegalAddress;
577  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
579  case CUDA_ERROR_LAUNCH_TIMEOUT:
580  return hipErrorLaunchTimeOut;
581  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
583  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
585  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
586  return hipErrorSetOnActiveProcess;
587  case CUDA_ERROR_ASSERT:
588  return hipErrorAssert;
589  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
591  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
593  case CUDA_ERROR_LAUNCH_FAILED:
594  return hipErrorLaunchFailure;
595  case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
597  case CUDA_ERROR_NOT_SUPPORTED:
598  return hipErrorNotSupported;
599  case CUDA_ERROR_UNKNOWN:
600  default:
601  return hipErrorUnknown; // Note - translated error.
602  }
603 }
604 
605 inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
606  switch (hError) {
607  case hipSuccess:
608  return cudaSuccess;
609  case hipErrorOutOfMemory:
610  return cudaErrorMemoryAllocation;
611  case hipErrorProfilerDisabled:
612  return cudaErrorProfilerDisabled;
613  case hipErrorProfilerNotInitialized:
614  return cudaErrorProfilerNotInitialized;
615  case hipErrorProfilerAlreadyStarted:
616  return cudaErrorProfilerAlreadyStarted;
617  case hipErrorProfilerAlreadyStopped:
618  return cudaErrorProfilerAlreadyStopped;
619  case hipErrorInvalidConfiguration:
620  return cudaErrorInvalidConfiguration;
622  return cudaErrorLaunchOutOfResources;
624  return cudaErrorInvalidValue;
625  case hipErrorInvalidHandle:
626  return cudaErrorInvalidResourceHandle;
628  return cudaErrorInvalidDevice;
630  return cudaErrorInvalidMemcpyDirection;
632  return cudaErrorInvalidDevicePointer;
633  case hipErrorNotInitialized:
634  return cudaErrorInitializationError;
635  case hipErrorNoDevice:
636  return cudaErrorNoDevice;
637  case hipErrorNotReady:
638  return cudaErrorNotReady;
640  return cudaErrorPeerAccessNotEnabled;
642  return cudaErrorPeerAccessAlreadyEnabled;
644  return cudaErrorHostMemoryAlreadyRegistered;
646  return cudaErrorHostMemoryNotRegistered;
647  case hipErrorDeinitialized:
648  return cudaErrorCudartUnloading;
649  case hipErrorInvalidSymbol:
650  return cudaErrorInvalidSymbol;
651  case hipErrorInsufficientDriver:
652  return cudaErrorInsufficientDriver;
653  case hipErrorMissingConfiguration:
654  return cudaErrorMissingConfiguration;
655  case hipErrorPriorLaunchFailure:
656  return cudaErrorPriorLaunchFailure;
657  case hipErrorInvalidDeviceFunction:
658  return cudaErrorInvalidDeviceFunction;
659  case hipErrorInvalidImage:
660  return cudaErrorInvalidKernelImage;
662 #if CUDA_VERSION >= 10020
663  return cudaErrorDeviceUninitialized;
664 #else
665  return cudaErrorUnknown;
666 #endif
667  case hipErrorMapFailed:
668  return cudaErrorMapBufferObjectFailed;
669  case hipErrorUnmapFailed:
670  return cudaErrorUnmapBufferObjectFailed;
671  case hipErrorArrayIsMapped:
672 #if CUDA_VERSION >= 10010
673  return cudaErrorArrayIsMapped;
674 #else
675  return cudaErrorUnknown;
676 #endif
677  case hipErrorAlreadyMapped:
678 #if CUDA_VERSION >= 10010
679  return cudaErrorAlreadyMapped;
680 #else
681  return cudaErrorUnknown;
682 #endif
683  case hipErrorNoBinaryForGpu:
684  return cudaErrorNoKernelImageForDevice;
685  case hipErrorAlreadyAcquired:
686 #if CUDA_VERSION >= 10010
687  return cudaErrorAlreadyAcquired;
688 #else
689  return cudaErrorUnknown;
690 #endif
691  case hipErrorNotMapped:
692 #if CUDA_VERSION >= 10010
693  return cudaErrorNotMapped;
694 #else
695  return cudaErrorUnknown;
696 #endif
697  case hipErrorNotMappedAsArray:
698 #if CUDA_VERSION >= 10010
699  return cudaErrorNotMappedAsArray;
700 #else
701  return cudaErrorUnknown;
702 #endif
703  case hipErrorNotMappedAsPointer:
704 #if CUDA_VERSION >= 10010
705  return cudaErrorNotMappedAsPointer;
706 #else
707  return cudaErrorUnknown;
708 #endif
709  case hipErrorECCNotCorrectable:
710  return cudaErrorECCUncorrectable;
711  case hipErrorUnsupportedLimit:
712  return cudaErrorUnsupportedLimit;
713  case hipErrorContextAlreadyInUse:
714  return cudaErrorDeviceAlreadyInUse;
715  case hipErrorPeerAccessUnsupported:
716  return cudaErrorPeerAccessUnsupported;
718  return cudaErrorInvalidPtx;
719  case hipErrorInvalidGraphicsContext:
720  return cudaErrorInvalidGraphicsContext;
721  case hipErrorInvalidSource:
722 #if CUDA_VERSION >= 10010
723  return cudaErrorInvalidSource;
724 #else
725  return cudaErrorUnknown;
726 #endif
727  case hipErrorFileNotFound:
728 #if CUDA_VERSION >= 10010
729  return cudaErrorFileNotFound;
730 #else
731  return cudaErrorUnknown;
732 #endif
733  case hipErrorSharedObjectSymbolNotFound:
734  return cudaErrorSharedObjectSymbolNotFound;
735  case hipErrorSharedObjectInitFailed:
736  return cudaErrorSharedObjectInitFailed;
737  case hipErrorOperatingSystem:
738  return cudaErrorOperatingSystem;
739  case hipErrorNotFound:
740 #if CUDA_VERSION >= 10010
741  return cudaErrorSymbolNotFound;
742 #else
743  return cudaErrorUnknown;
744 #endif
745  case hipErrorIllegalAddress:
746  return cudaErrorIllegalAddress;
747  case hipErrorLaunchTimeOut:
748  return cudaErrorLaunchTimeout;
749  case hipErrorSetOnActiveProcess:
750  return cudaErrorSetOnActiveProcess;
752  return cudaErrorLaunchFailure;
754  return cudaErrorCooperativeLaunchTooLarge;
756  return cudaErrorNotSupported;
757  // HSA: does not exist in CUDA
759  // HSA: does not exist in CUDA
761  case hipErrorUnknown:
762  case hipErrorTbd:
763  default:
764  return cudaErrorUnknown; // Note - translated error.
765  }
766 }
767 
768 inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
769  switch (kind) {
770  case hipMemcpyHostToHost:
771  return cudaMemcpyHostToHost;
772  case hipMemcpyHostToDevice:
773  return cudaMemcpyHostToDevice;
774  case hipMemcpyDeviceToHost:
775  return cudaMemcpyDeviceToHost;
776  case hipMemcpyDeviceToDevice:
777  return cudaMemcpyDeviceToDevice;
778  default:
779  return cudaMemcpyDefault;
780  }
781 }
782 
783 inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
784  hipTextureAddressMode kind) {
785  switch (kind) {
786  case hipAddressModeWrap:
787  return cudaAddressModeWrap;
788  case hipAddressModeClamp:
789  return cudaAddressModeClamp;
790  case hipAddressModeMirror:
791  return cudaAddressModeMirror;
792  case hipAddressModeBorder:
793  return cudaAddressModeBorder;
794  default:
795  return cudaAddressModeWrap;
796  }
797 }
798 
799 inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
800  hipTextureFilterMode kind) {
801  switch (kind) {
802  case hipFilterModePoint:
803  return cudaFilterModePoint;
804  case hipFilterModeLinear:
805  return cudaFilterModeLinear;
806  default:
807  return cudaFilterModePoint;
808  }
809 }
810 
811 inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
812  switch (kind) {
813  case hipReadModeElementType:
814  return cudaReadModeElementType;
815  case hipReadModeNormalizedFloat:
816  return cudaReadModeNormalizedFloat;
817  default:
818  return cudaReadModeElementType;
819  }
820 }
821 
822 inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
823  hipChannelFormatKind kind) {
824  switch (kind) {
825  case hipChannelFormatKindSigned:
826  return cudaChannelFormatKindSigned;
827  case hipChannelFormatKindUnsigned:
828  return cudaChannelFormatKindUnsigned;
829  case hipChannelFormatKindFloat:
830  return cudaChannelFormatKindFloat;
831  case hipChannelFormatKindNone:
832  return cudaChannelFormatKindNone;
833  default:
834  return cudaChannelFormatKindNone;
835  }
836 }
837 
841 #define HIPRT_CB CUDART_CB
842 typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
843 inline static hipError_t hipInit(unsigned int flags) {
844  return hipCUResultTohipError(cuInit(flags));
845 }
846 
847 inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
848 
849 inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
850 
851 inline static hipError_t hipPeekAtLastError() {
852  return hipCUDAErrorTohipError(cudaPeekAtLastError());
853 }
854 
855 inline static hipError_t hipMalloc(void** ptr, size_t size) {
856  return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
857 }
858 
859 inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
860  return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
861 }
862 
863 inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
864  return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
865 }
866 
867 inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
868  return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
869 }
870 
871 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
872 
873 inline static hipError_t hipMallocHost(void** ptr, size_t size)
874  __attribute__((deprecated("use hipHostMalloc instead")));
875 inline static hipError_t hipMallocHost(void** ptr, size_t size) {
876  return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
877 }
878 
879 inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
880  __attribute__((deprecated("use hipHostMalloc instead")));
881 inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
882  return hipCUResultTohipError(cuMemAllocHost(ptr, size));
883 }
884 
885 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
886  __attribute__((deprecated("use hipHostMalloc instead")));
887 inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
888  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
889 }
890 
891 inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
892  return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
893 }
894 
895 inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
896  return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
897 }
898 
899 inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
900  size_t width, size_t height,
901  unsigned int flags __dparm(hipArrayDefault)) {
902  return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
903 }
904 
905 inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
906  hipExtent extent, unsigned int flags) {
907  return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
908 }
909 
910 inline static hipError_t hipFreeArray(hipArray* array) {
911  return hipCUDAErrorTohipError(cudaFreeArray(array));
912 }
913 
914 inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
915  return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
916 }
917 
918 inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
919  return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
920 }
921 
922 inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
923  return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
924 }
925 
926 inline static hipError_t hipHostUnregister(void* ptr) {
927  return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
928 }
929 
930 inline static hipError_t hipFreeHost(void* ptr)
931  __attribute__((deprecated("use hipHostFree instead")));
932 inline static hipError_t hipFreeHost(void* ptr) {
933  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
934 }
935 
936 inline static hipError_t hipHostFree(void* ptr) {
937  return hipCUDAErrorTohipError(cudaFreeHost(ptr));
938 }
939 
940 inline static hipError_t hipSetDevice(int device) {
941  return hipCUDAErrorTohipError(cudaSetDevice(device));
942 }
943 
944 inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
945  struct cudaDeviceProp cdprop;
946  memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
947  cdprop.major = prop->major;
948  cdprop.minor = prop->minor;
949  cdprop.totalGlobalMem = prop->totalGlobalMem;
950  cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
951  cdprop.regsPerBlock = prop->regsPerBlock;
952  cdprop.warpSize = prop->warpSize;
953  cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
954  cdprop.clockRate = prop->clockRate;
955  cdprop.totalConstMem = prop->totalConstMem;
956  cdprop.multiProcessorCount = prop->multiProcessorCount;
957  cdprop.l2CacheSize = prop->l2CacheSize;
958  cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
959  cdprop.computeMode = prop->computeMode;
960  cdprop.canMapHostMemory = prop->canMapHostMemory;
961  cdprop.memoryClockRate = prop->memoryClockRate;
962  cdprop.memoryBusWidth = prop->memoryBusWidth;
963  return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
964 }
965 
966 inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
967  return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
968 }
969 
970 inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
971  return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
972 }
973 
974 inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
975  return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
976 }
977 
978 inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
979  hipStream_t stream) {
980  return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
981 }
982 
983 inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
984  hipStream_t stream) {
985  return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
986 }
987 
988 inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
989  hipStream_t stream) {
990  return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
991 }
992 
993 inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
994  hipMemcpyKind copyKind) {
995  return hipCUDAErrorTohipError(
996  cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
997 }
998 
999 
1000 inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
1001  size_t sizeBytes, hipMemcpyKind copyKind,
1002  hipStream_t stream) {
1003  cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
1004  hipMemcpyKindToCudaMemcpyKind(copyKind),
1005  stream);
1006 
1007  if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
1008 
1009  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1010 }
1011 
1012 inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
1013  hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
1014  return hipCUDAErrorTohipError(
1015  cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
1016 }
1017 
1018 inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
1019  size_t offset __dparm(0),
1020  hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
1021  return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
1022  hipMemcpyKindToCudaMemcpyKind(copyType)));
1023 }
1024 
1025 inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
1026  size_t sizeBytes, size_t offset,
1027  hipMemcpyKind copyType,
1028  hipStream_t stream __dparm(0)) {
1029  return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
1030  symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
1031 }
1032 
1033 inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
1034  size_t offset __dparm(0),
1035  hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
1036  return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
1037  hipMemcpyKindToCudaMemcpyKind(kind)));
1038 }
1039 
1040 inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
1041  size_t sizeBytes, size_t offset,
1042  hipMemcpyKind kind,
1043  hipStream_t stream __dparm(0)) {
1044  return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
1045  dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
1046 }
1047 
1048 inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
1049  return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
1050 }
1051 
1052 inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
1053  return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
1054 }
1055 
1056 inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
1057  size_t width, size_t height, hipMemcpyKind kind) {
1058  return hipCUDAErrorTohipError(
1059  cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
1060 }
1061 
1062 inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
1063  return hipCUResultTohipError(cuMemcpy2D(pCopy));
1064 }
1065 
1066 inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
1067  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
1068 }
1069 
1070 inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
1071 {
1072  return hipCUDAErrorTohipError(cudaMemcpy3D(p));
1073 }
1074 
1075 inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
1076 {
1077  return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
1078 }
1079 
1080 inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
1081  size_t width, size_t height, hipMemcpyKind kind,
1082  hipStream_t stream) {
1083  return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
1084  hipMemcpyKindToCudaMemcpyKind(kind), stream));
1085 }
1086 
1087 inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
1088  const void* src, size_t spitch, size_t width,
1089  size_t height, hipMemcpyKind kind) {
1090  return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
1091  height, hipMemcpyKindToCudaMemcpyKind(kind)));
1092 }
1093 
1094 __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
1095  size_t hOffset, const void* src,
1096  size_t count, hipMemcpyKind kind) {
1097  return hipCUDAErrorTohipError(
1098  cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
1099 }
1100 
1101 __HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
1102  size_t wOffset, size_t hOffset,
1103  size_t count, hipMemcpyKind kind) {
1104  return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
1105  hipMemcpyKindToCudaMemcpyKind(kind)));
1106 }
1107 
1108 inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
1109  size_t count) {
1110  return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
1111 }
1112 
1113 inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
1114  size_t count) {
1115  return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
1116 }
1117 
1118 inline static hipError_t hipDeviceSynchronize() {
1119  return hipCUDAErrorTohipError(cudaDeviceSynchronize());
1120 }
1121 
1122 inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
1123  return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
1124 }
1125 
1126 inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
1127  return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
1128 }
1129 
1130 inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
1131  return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
1132 }
1133 
1134 inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
1135  return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
1136 }
1137 
1138 inline static const char* hipGetErrorString(hipError_t error) {
1139  return cudaGetErrorString(hipErrorToCudaError(error));
1140 }
1141 
1142 inline static const char* hipGetErrorName(hipError_t error) {
1143  return cudaGetErrorName(hipErrorToCudaError(error));
1144 }
1145 
1146 inline static hipError_t hipGetDeviceCount(int* count) {
1147  return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
1148 }
1149 
1150 inline static hipError_t hipGetDevice(int* device) {
1151  return hipCUDAErrorTohipError(cudaGetDevice(device));
1152 }
1153 
1154 inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
1155  return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
1156 }
1157 
1158 inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
1159  return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
1160 }
1161 
1162 inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
1163  return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
1164 }
1165 
1166 inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
1167  return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
1168 }
1169 
1170 inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
1171  unsigned int flags) {
1172  return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
1173 }
1174 
1175 inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
1176  return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
1177 }
1178 
1179 inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
1180  return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
1181 }
1182 
1183 inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
1184  hipStream_t stream __dparm(0)) {
1185  return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
1186 }
1187 
1188 inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
1189  hipStream_t stream __dparm(0)) {
1190  return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
1191 }
1192 
1193 inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
1194  return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
1195 }
1196 
1197 inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
1198  hipStream_t stream __dparm(0)) {
1199  return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
1200 }
1201 
1202 inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
1203  return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
1204 }
1205 
1206 inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
1207  hipStream_t stream __dparm(0)) {
1208  return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
1209 }
1210 
1211 inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
1212  return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
1213 }
1214 
1215 inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
1216  return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
1217 }
1218 
1219 inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
1220  return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
1221 }
1222 
1223 inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
1224  return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
1225 }
1226 
1227 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
1228  struct cudaDeviceProp cdprop;
1229  cudaError_t cerror;
1230  cerror = cudaGetDeviceProperties(&cdprop, device);
1231 
1232  strncpy(p_prop->name, cdprop.name, 256);
1233  p_prop->totalGlobalMem = cdprop.totalGlobalMem;
1234  p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
1235  p_prop->regsPerBlock = cdprop.regsPerBlock;
1236  p_prop->warpSize = cdprop.warpSize;
1237  p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
1238  for (int i = 0; i < 3; i++) {
1239  p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
1240  p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
1241  }
1242  p_prop->clockRate = cdprop.clockRate;
1243  p_prop->memoryClockRate = cdprop.memoryClockRate;
1244  p_prop->memoryBusWidth = cdprop.memoryBusWidth;
1245  p_prop->totalConstMem = cdprop.totalConstMem;
1246  p_prop->major = cdprop.major;
1247  p_prop->minor = cdprop.minor;
1248  p_prop->multiProcessorCount = cdprop.multiProcessorCount;
1249  p_prop->l2CacheSize = cdprop.l2CacheSize;
1250  p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
1251  p_prop->computeMode = cdprop.computeMode;
1252  p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
1253 
1254  int ccVers = p_prop->major * 100 + p_prop->minor * 10;
1255  p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
1256  p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
1257  p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
1258  p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
1259  p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
1260  p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
1261  p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
1262  p_prop->arch.hasDoubles = (ccVers >= 130);
1263  p_prop->arch.hasWarpVote = (ccVers >= 120);
1264  p_prop->arch.hasWarpBallot = (ccVers >= 200);
1265  p_prop->arch.hasWarpShuffle = (ccVers >= 300);
1266  p_prop->arch.hasFunnelShift = (ccVers >= 350);
1267  p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
1268  p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
1269  p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
1270  p_prop->arch.has3dGrid = (ccVers >= 200);
1271  p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
1272 
1273  p_prop->concurrentKernels = cdprop.concurrentKernels;
1274  p_prop->pciDomainID = cdprop.pciDomainID;
1275  p_prop->pciBusID = cdprop.pciBusID;
1276  p_prop->pciDeviceID = cdprop.pciDeviceID;
1277  p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
1278  p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
1279  p_prop->canMapHostMemory = cdprop.canMapHostMemory;
1280  p_prop->gcnArch = 0; // Not a GCN arch
1281  p_prop->integrated = cdprop.integrated;
1282  p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
1283  p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
1288 
1289  p_prop->maxTexture1D = cdprop.maxTexture1D;
1290  p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
1291  p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
1292  p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
1293  p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
1294  p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
1295 
1296  p_prop->memPitch = cdprop.memPitch;
1297  p_prop->textureAlignment = cdprop.textureAlignment;
1298  p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
1299  p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
1300  p_prop->ECCEnabled = cdprop.ECCEnabled;
1301  p_prop->tccDriver = cdprop.tccDriver;
1302 
1303  return hipCUDAErrorTohipError(cerror);
1304 }
1305 
1306 inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
1307  enum cudaDeviceAttr cdattr;
1308  cudaError_t cerror;
1309 
1310  switch (attr) {
1312  cdattr = cudaDevAttrMaxThreadsPerBlock;
1313  break;
1315  cdattr = cudaDevAttrMaxBlockDimX;
1316  break;
1318  cdattr = cudaDevAttrMaxBlockDimY;
1319  break;
1321  cdattr = cudaDevAttrMaxBlockDimZ;
1322  break;
1324  cdattr = cudaDevAttrMaxGridDimX;
1325  break;
1327  cdattr = cudaDevAttrMaxGridDimY;
1328  break;
1330  cdattr = cudaDevAttrMaxGridDimZ;
1331  break;
1333  cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
1334  break;
1336  cdattr = cudaDevAttrTotalConstantMemory;
1337  break;
1339  cdattr = cudaDevAttrWarpSize;
1340  break;
1342  cdattr = cudaDevAttrMaxRegistersPerBlock;
1343  break;
1345  cdattr = cudaDevAttrClockRate;
1346  break;
1348  cdattr = cudaDevAttrMemoryClockRate;
1349  break;
1351  cdattr = cudaDevAttrGlobalMemoryBusWidth;
1352  break;
1354  cdattr = cudaDevAttrMultiProcessorCount;
1355  break;
1357  cdattr = cudaDevAttrComputeMode;
1358  break;
1360  cdattr = cudaDevAttrL2CacheSize;
1361  break;
1363  cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
1364  break;
1366  cdattr = cudaDevAttrComputeCapabilityMajor;
1367  break;
1369  cdattr = cudaDevAttrComputeCapabilityMinor;
1370  break;
1372  cdattr = cudaDevAttrConcurrentKernels;
1373  break;
1375  cdattr = cudaDevAttrPciBusId;
1376  break;
1378  cdattr = cudaDevAttrPciDeviceId;
1379  break;
1381  cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
1382  break;
1384  cdattr = cudaDevAttrIsMultiGpuBoard;
1385  break;
1387  cdattr = cudaDevAttrIntegrated;
1388  break;
1390  cdattr = cudaDevAttrMaxTexture1DWidth;
1391  break;
1393  cdattr = cudaDevAttrMaxTexture2DWidth;
1394  break;
1396  cdattr = cudaDevAttrMaxTexture2DHeight;
1397  break;
1399  cdattr = cudaDevAttrMaxTexture3DWidth;
1400  break;
1402  cdattr = cudaDevAttrMaxTexture3DHeight;
1403  break;
1405  cdattr = cudaDevAttrMaxTexture3DDepth;
1406  break;
1408  cdattr = cudaDevAttrMaxPitch;
1409  break;
1411  cdattr = cudaDevAttrTextureAlignment;
1412  break;
1414  cdattr = cudaDevAttrTexturePitchAlignment;
1415  break;
1417  cdattr = cudaDevAttrKernelExecTimeout;
1418  break;
1420  cdattr = cudaDevAttrCanMapHostMemory;
1421  break;
1423  cdattr = cudaDevAttrEccEnabled;
1424  break;
1426  cdattr = cudaDevAttrCooperativeLaunch;
1427  break;
1429  cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
1430  break;
1431  default:
1432  return hipCUDAErrorTohipError(cudaErrorInvalidValue);
1433  }
1434 
1435  cerror = cudaDeviceGetAttribute(pi, cdattr, device);
1436 
1437  return hipCUDAErrorTohipError(cerror);
1438 }
1439 
1440 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1441  const void* func,
1442  int blockSize,
1443  size_t dynamicSMemSize) {
1444  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1445  blockSize, dynamicSMemSize));
1446 }
1447 
1448 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1449  const void* func,
1450  int blockSize,
1451  size_t dynamicSMemSize,
1452  unsigned int flags) {
1453  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1454  blockSize, dynamicSMemSize, flags));
1455 }
1456 
1457 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1458  hipFunction_t f,
1459  int blockSize,
1460  size_t dynamicSMemSize ){
1461  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
1462  blockSize, dynamicSMemSize));
1463 }
1464 
1465 inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
1466  hipFunction_t f,
1467  int blockSize,
1468  size_t dynamicSMemSize,
1469  unsigned int flags ) {
1470  return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
1471  blockSize, dynamicSMemSize, flags));
1472 }
1473 
1474 //TODO - Match CUoccupancyB2DSize
1475 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
1476  hipFunction_t f, size_t dynSharedMemPerBlk,
1477  int blockSizeLimit){
1478  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
1479  dynSharedMemPerBlk, blockSizeLimit));
1480 }
1481 
1482 //TODO - Match CUoccupancyB2DSize
1483 inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
1484  hipFunction_t f, size_t dynSharedMemPerBlk,
1485  int blockSizeLimit, unsigned int flags){
1486  return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
1487  dynSharedMemPerBlk, blockSizeLimit, flags));
1488 }
1489 
1490 inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
1491  struct cudaPointerAttributes cPA;
1492  hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
1493  if (err == hipSuccess) {
1494 #if (CUDART_VERSION >= 11000)
1495  auto memType = cPA.type;
1496 #else
1497  unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
1498 #endif
1499  switch (memType) {
1500  case cudaMemoryTypeDevice:
1501  attributes->memoryType = hipMemoryTypeDevice;
1502  break;
1503  case cudaMemoryTypeHost:
1504  attributes->memoryType = hipMemoryTypeHost;
1505  break;
1506  default:
1507  return hipErrorUnknown;
1508  }
1509  attributes->device = cPA.device;
1510  attributes->devicePointer = cPA.devicePointer;
1511  attributes->hostPointer = cPA.hostPointer;
1512  attributes->isManaged = 0;
1513  attributes->allocationFlags = 0;
1514  }
1515  return err;
1516 }
1517 
1518 inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
1519  return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
1520 }
1521 
1522 inline static hipError_t hipEventCreate(hipEvent_t* event) {
1523  return hipCUDAErrorTohipError(cudaEventCreate(event));
1524 }
1525 
1526 inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
1527  return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
1528 }
1529 
1530 inline static hipError_t hipEventSynchronize(hipEvent_t event) {
1531  return hipCUDAErrorTohipError(cudaEventSynchronize(event));
1532 }
1533 
1534 inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
1535  return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
1536 }
1537 
1538 inline static hipError_t hipEventDestroy(hipEvent_t event) {
1539  return hipCUDAErrorTohipError(cudaEventDestroy(event));
1540 }
1541 
1542 inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
1543  return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
1544 }
1545 
1546 inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
1547  return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
1548 }
1549 
1550 inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
1551  return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
1552 }
1553 
1554 inline static hipError_t hipStreamCreate(hipStream_t* stream) {
1555  return hipCUDAErrorTohipError(cudaStreamCreate(stream));
1556 }
1557 
1558 inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
1559  return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
1560 }
1561 
1562 inline static hipError_t hipStreamDestroy(hipStream_t stream) {
1563  return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
1564 }
1565 
1566 inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
1567  return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
1568 }
1569 
1570 inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
1571  return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
1572 }
1573 
1574 inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
1575  unsigned int flags) {
1576  return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
1577 }
1578 
1579 inline static hipError_t hipStreamQuery(hipStream_t stream) {
1580  return hipCUDAErrorTohipError(cudaStreamQuery(stream));
1581 }
1582 
1583 inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
1584  void* userData, unsigned int flags) {
1585  return hipCUDAErrorTohipError(
1586  cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
1587 }
1588 
1589 inline static hipError_t hipDriverGetVersion(int* driverVersion) {
1590  cudaError_t err = cudaDriverGetVersion(driverVersion);
1591 
1592  // Override driver version to match version reported on HCC side.
1593  *driverVersion = 4;
1594 
1595  return hipCUDAErrorTohipError(err);
1596 }
1597 
1598 inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
1599  return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
1600 }
1601 
1602 inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
1603  return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
1604 }
1605 
1606 inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
1607  return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
1608 }
1609 
1610 inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
1611  return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
1612 }
1613 
1614 inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
1615  return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
1616 }
1617 
1618 inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
1619  return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
1620 }
1621 
1622 inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
1623  int* active) {
1624  return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
1625 }
1626 
1627 inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
1628  return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
1629 }
1630 
1631 inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
1632  return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
1633 }
1634 
1635 inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
1636  return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
1637 }
1638 
1639 inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
1640  return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
1641 }
1642 
1643 inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
1644  hipDeviceptr_t dptr) {
1645  return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
1646 }
1647 
1648 inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
1649  size_t count) {
1650  return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
1651 }
1652 
1653 inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
1654  int srcDevice, size_t count,
1655  hipStream_t stream __dparm(0)) {
1656  return hipCUDAErrorTohipError(
1657  cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
1658 }
1659 
1660 // Profile APIs:
1661 inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
1662 
1663 inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
1664 
1665 inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
1666  return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
1667 }
1668 
1669 inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
1670  return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
1671 }
1672 
1673 inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
1674  return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
1675 }
1676 
1677 inline static hipError_t hipEventQuery(hipEvent_t event) {
1678  return hipCUDAErrorTohipError(cudaEventQuery(event));
1679 }
1680 
1681 inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
1682  return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
1683 }
1684 
1685 inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
1686  return hipCUResultTohipError(cuCtxDestroy(ctx));
1687 }
1688 
1689 inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
1690  return hipCUResultTohipError(cuCtxPopCurrent(ctx));
1691 }
1692 
1693 inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
1694  return hipCUResultTohipError(cuCtxPushCurrent(ctx));
1695 }
1696 
1697 inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
1698  return hipCUResultTohipError(cuCtxSetCurrent(ctx));
1699 }
1700 
1701 inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
1702  return hipCUResultTohipError(cuCtxGetCurrent(ctx));
1703 }
1704 
1705 inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
1706  return hipCUResultTohipError(cuCtxGetDevice(device));
1707 }
1708 
1709 inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
1710  return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
1711 }
1712 
1713 inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
1714  return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
1715 }
1716 
1717 inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
1718  return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
1719 }
1720 
1721 inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
1722  return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
1723 }
1724 
1725 inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
1726  return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
1727 }
1728 
1729 inline static hipError_t hipCtxSynchronize(void) {
1730  return hipCUResultTohipError(cuCtxSynchronize());
1731 }
1732 
1733 inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
1734  return hipCUResultTohipError(cuCtxGetFlags(flags));
1735 }
1736 
1737 inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
1738  return hipCUResultTohipError(cuCtxDetach(ctx));
1739 }
1740 
1741 inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
1742  return hipCUResultTohipError(cuDeviceGet(device, ordinal));
1743 }
1744 
1745 inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
1746  return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
1747 }
1748 
1749 inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
1750  return hipCUResultTohipError(cuDeviceGetName(name, len, device));
1751 }
1752 
1753 inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
1754  int srcDevice, int dstDevice) {
1755  return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
1756 }
1757 
1758 inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
1759  return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
1760 }
1761 
1762 inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
1763  return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
1764 }
1765 
1766 inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
1767  return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
1768 }
1769 
1770 inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
1771  return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
1772 }
1773 
1774 inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
1775  return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
1776 }
1777 
1778 inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
1779  return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
1780 }
1781 
1782 inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
1783  return hipCUResultTohipError(cuModuleLoad(module, fname));
1784 }
1785 
1786 inline static hipError_t hipModuleUnload(hipModule_t hmod) {
1787  return hipCUResultTohipError(cuModuleUnload(hmod));
1788 }
1789 
1790 inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
1791  const char* kname) {
1792  return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
1793 }
1794 
1795 inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){
1796  hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
1797 }
1798 
1799 inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
1800  return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
1801 }
1802 
1803 inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
1804  return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
1805 }
1806 
1807 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
1808  const char* name) {
1809  return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
1810 }
1811 
1812 inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
1813  return hipCUResultTohipError(cuModuleLoadData(module, image));
1814 }
1815 
1816 inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
1817  unsigned int numOptions, hipJitOption* options,
1818  void** optionValues) {
1819  return hipCUResultTohipError(
1820  cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
1821 }
1822 
1823 inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
1824  dim3 dimBlocks, void** args, size_t sharedMemBytes,
1825  hipStream_t stream)
1826 {
1827  return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
1828 }
1829 
1830 inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
1831  unsigned int gridDimY, unsigned int gridDimZ,
1832  unsigned int blockDimX, unsigned int blockDimY,
1833  unsigned int blockDimZ, unsigned int sharedMemBytes,
1834  hipStream_t stream, void** kernelParams,
1835  void** extra) {
1836  return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
1837  blockDimY, blockDimZ, sharedMemBytes, stream,
1838  kernelParams, extra));
1839 }
1840 
1841 inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
1842  return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
1843 }
1844 
1845 __HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
1846  struct textureReference* tex,
1847  const void* devPtr,
1848  const hipChannelFormatDesc* desc,
1849  size_t size __dparm(UINT_MAX)) {
1850  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1851 }
1852 
1853 __HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
1854  size_t* offset, struct textureReference* tex, const void* devPtr,
1855  const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
1856  return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
1857 }
1858 
1859 inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
1860  hipChannelFormatKind f) {
1861  return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
1862 }
1863 
1864 inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
1865  const hipResourceDesc* pResDesc,
1866  const hipTextureDesc* pTexDesc,
1867  const hipResourceViewDesc* pResViewDesc) {
1868  return hipCUDAErrorTohipError(
1869  cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
1870 }
1871 
1872 inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
1873  return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
1874 }
1875 
1876 inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
1877  const hipResourceDesc* pResDesc) {
1878  return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
1879 }
1880 
1881 inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
1882  return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
1883 }
1884 
1885 inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
1886  hipTextureObject_t textureObject) {
1887  return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
1888 }
1889 
1890 __HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
1891  size_t* offset, const struct textureReference* texref) {
1892  return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
1893 }
1894 
1895 inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
1896 {
1897  return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
1898 }
1899 
1900 inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
1901  void** kernelParams, unsigned int sharedMemBytes,
1902  hipStream_t stream) {
1903  return hipCUDAErrorTohipError(
1904  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
1905 }
1906 
1907 inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
1908  int numDevices, unsigned int flags) {
1909  return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
1910 }
1911 
1912 #ifdef __cplusplus
1913 }
1914 #endif
1915 
1916 #ifdef __CUDACC__
1917 
1918 template<class T>
1919 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
1920  T func,
1921  int blockSize,
1922  size_t dynamicSMemSize) {
1923  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
1924  blockSize, dynamicSMemSize));
1925 }
1926 
1927 template <class T>
1928 inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
1929  size_t dynamicSMemSize = 0,
1930  int blockSizeLimit = 0) {
1931  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1932  dynamicSMemSize, blockSizeLimit));
1933 }
1934 
1935 template <class T>
1936 inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
1937  size_t dynamicSMemSize = 0,
1938  int blockSizeLimit = 0, unsigned int flags = 0) {
1939  return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
1940  dynamicSMemSize, blockSizeLimit, flags));
1941 }
1942 
1943 template <class T>
1944 inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
1945  int blockSize, size_t dynamicSMemSize,unsigned int flags) {
1946  return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
1947  blockSize, dynamicSMemSize, flags));
1948 }
1949 
1950 template <class T, int dim, enum cudaTextureReadMode readMode>
1951 inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
1952  const void* devPtr, size_t size = UINT_MAX) {
1953  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
1954 }
1955 
1956 template <class T, int dim, enum cudaTextureReadMode readMode>
1957 inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
1958  const void* devPtr, const hipChannelFormatDesc& desc,
1959  size_t size = UINT_MAX) {
1960  return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
1961 }
1962 
1963 template <class T, int dim, enum cudaTextureReadMode readMode>
1964 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
1965  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1966 }
1967 
1968 template <class T, int dim, enum cudaTextureReadMode readMode>
1969 __HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
1970  return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
1971 }
1972 
1973 template <class T, int dim, enum cudaTextureReadMode readMode>
1974 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1975  struct texture<T, dim, readMode>& tex, hipArray_const_t array,
1976  const hipChannelFormatDesc& desc) {
1977  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1978 }
1979 
1980 template <class T, int dim, enum cudaTextureReadMode readMode>
1981 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1982  struct texture<T, dim, readMode>* tex, hipArray_const_t array,
1983  const hipChannelFormatDesc* desc) {
1984  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
1985 }
1986 
1987 template <class T, int dim, enum cudaTextureReadMode readMode>
1988 __HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
1989  struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
1990  return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
1991 }
1992 
1993 template <class T>
1994 inline static hipChannelFormatDesc hipCreateChannelDesc() {
1995  return cudaCreateChannelDesc<T>();
1996 }
1997 
1998 template <class T>
1999 inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
2000  void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
2001  return hipCUDAErrorTohipError(
2002  cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
2003 }
2004 
2005 inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){
2006  return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
2007 }
2008 
2009 inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
2010  return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
2011 }
2012 
2013 inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){
2014  return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
2015 }
2016 
2017 inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){
2018  return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
2019 }
2020 
2021 inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){
2022  return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
2023 }
2024 
2025 inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){
2026  return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
2027 }
2028 
2029 inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){
2030  return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
2031 }
2032 
2033 inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){
2034  return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
2035 }
2036 
2037 inline static hipError_t hipArrayDestroy(hiparray hArray){
2038  return hipCUResultTohipError(cuArrayDestroy(hArray));
2039 }
2040 
2041 #endif //__CUDACC__
2042 
2043 #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
hipFuncAttributes
Definition: hip_runtime_api.h:128
hipCtxSynchronize
hipError_t hipCtxSynchronize(void)
Blocks until the default context has completed all preceding requested tasks.
Definition: hip_context.cpp:249
hipPointerGetAttributes
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void *ptr)
Return attributes for the specified pointer.
Definition: hip_memory.cpp:617
hipDeviceAttributeMaxPitch
@ hipDeviceAttributeMaxPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:343
hipMemset3DAsync
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
hipMemcpy3D
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
Copies data between host and device.
Definition: hip_memory.cpp:1712
hipCtxGetCurrent
hipError_t hipCtxGetCurrent(hipCtx_t *ctx)
Get the handle of the current/ default context.
Definition: hip_context.cpp:167
hipDeviceProp_t::regsPerBlock
int regsPerBlock
Registers per block.
Definition: hip_runtime_api.h:88
hipMallocPitch
hipError_t hipMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
Definition: hip_memory.cpp:851
hipSetDevice
hipError_t hipSetDevice(int deviceId)
Set default device to be used for subsequent hip API calls from this thread.
Definition: hip_device.cpp:132
hipDeviceGetP2PAttribute
hipError_t hipDeviceGetP2PAttribute(int *value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice)
Returns a value for attr of link between two devices.
hipMemsetD16Async
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
hipDeviceAttributeMemoryBusWidth
@ hipDeviceAttributeMemoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:314
hipGetErrorString
const char * hipGetErrorString(hipError_t hipError)
Return handy text string message to explain the error which occurred.
Definition: hip_error.cpp:54
hipGetDeviceFlags
hipError_t hipGetDeviceFlags(unsigned int *flags)
Gets the flags set for current device.
hipDeviceGetByPCIBusId
hipError_t hipDeviceGetByPCIBusId(int *device, const char *pciBusId)
Returns a handle to a compute device.
Definition: hip_device.cpp:492
hipErrorInvalidMemcpyDirection
hipErrorInvalidMemcpyDirection
Invalid memory copy direction.
Definition: hip_runtime_api.h:220
hipMalloc3DArray
hipError_t hipMalloc3DArray(hipArray **array, const struct hipChannelFormatDesc *desc, struct hipExtent extent, unsigned int flags)
Allocate an array on the device.
Definition: hip_memory.cpp:1091
hipDeviceArch_t::hasGlobalInt64Atomics
unsigned hasGlobalInt64Atomics
64-bit integer atomics for global memory.
Definition: hip_runtime_api.h:54
hipDeviceProp_t::minor
int minor
Definition: hip_runtime_api.h:100
hipDeviceAttributeMaxBlockDimX
@ hipDeviceAttributeMaxBlockDimX
Maximum x-dimension of a block.
Definition: hip_runtime_api.h:298
hipErrorInvalidDevicePointer
hipErrorInvalidDevicePointer
Invalid Device Pointer.
Definition: hip_runtime_api.h:219
hipChooseDevice
hipError_t hipChooseDevice(int *device, const hipDeviceProp_t *prop)
Device which matches hipDeviceProp_t is returned.
Definition: hip_device.cpp:518
hipIpcCloseMemHandle
hipError_t hipIpcCloseMemHandle(void *devPtr)
Close memory mapped with hipIpcOpenMemHandle.
Definition: hip_memory.cpp:2539
hipMemcpy2DAsync
hipError_t hipMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies data between host and device.
hipLaunchKernel
hipError_t hipLaunchKernel(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0))
C compliant kernel launch API.
hipMemsetD32
hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count)
Fills the memory area pointed to by dest with the constant integer value for specified number of time...
Definition: hip_memory.cpp:2281
hipDeviceProp_t::texturePitchAlignment
size_t texturePitchAlignment
Pitch alignment requirement for texture references bound to pitched memory.
Definition: hip_runtime_api.h:129
hipDeviceAttributeMaxGridDimX
@ hipDeviceAttributeMaxGridDimX
Maximum x-dimension of a grid.
Definition: hip_runtime_api.h:301
hipDeviceArch_t::hasThreadFenceSystem
unsigned hasThreadFenceSystem
__threadfence_system.
Definition: hip_runtime_api.h:67
hipStreamCreate
hipError_t hipStreamCreate(hipStream_t *stream)
Create an asynchronous stream.
Definition: hip_stream.cpp:106
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault))
Returns occupancy for a device function.
hipDeviceGetStreamPriorityRange
hipError_t hipDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
Returns numerical values that correspond to the least and greatest stream priority.
Definition: hip_stream.cpp:122
hipIpcEventHandle_st
Definition: hip_runtime_api.h:120
hipDeviceProp_t::maxTexture3D
int maxTexture3D[3]
Maximum dimensions (width, height, depth) of 3D images, in image elements.
Definition: hip_runtime_api.h:124
hipStreamCreateWithPriority
hipError_t hipStreamCreateWithPriority(hipStream_t *stream, unsigned int flags, int priority)
Create an asynchronous stream with the specified priority.
Definition: hip_stream.cpp:113
hipCtxPushCurrent
hipError_t hipCtxPushCurrent(hipCtx_t ctx)
Push the context to be set as current/ default context.
Definition: hip_context.cpp:154
hipCtxGetDevice
hipError_t hipCtxGetDevice(hipDevice_t *device)
Get the handle of the device associated with current/default context.
Definition: hip_context.cpp:191
hipFuncCache_t
hipFuncCache_t
Definition: hip_runtime_api.h:308
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim
int cooperativeMultiDeviceUnmatchedBlockDim
Definition: hip_runtime_api.h:137
hipPeekAtLastError
hipError_t hipPeekAtLastError(void)
Return last error returned by any HIP runtime API call.
Definition: hip_error.cpp:41
hipMemcpy3DAsync
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream __dparm(0))
Copies data between host and device asynchronously.
hipDeviceGetPCIBusId
hipError_t hipDeviceGetPCIBusId(char *pciBusId, int len, int device)
Returns a PCI Bus Id string for the device, overloaded to take int device ID.
Definition: hip_device.cpp:460
hipHostGetFlags
hipError_t hipHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
Return flags associated with host pointer.
Definition: hip_memory.cpp:1133
hipErrorHostMemoryNotRegistered
hipErrorHostMemoryNotRegistered
Produced when trying to unlock a non-page-locked memory.
Definition: hip_runtime_api.h:271
hipErrorRuntimeOther
hipErrorRuntimeOther
Definition: hip_runtime_api.h:284
hipDeviceAttributeClockRate
@ hipDeviceAttributeClockRate
Peak clock frequency in kilohertz.
Definition: hip_runtime_api.h:312
hipMemGetAddressRange
hipError_t hipMemGetAddressRange(hipDeviceptr_t *pbase, size_t *psize, hipDeviceptr_t dptr)
Get information on memory allocations.
Definition: hip_memory.cpp:2437
hipSurfaceObject_t
unsigned long long hipSurfaceObject_t
Definition: hip_surface_types.h:36
hipStreamWaitEvent
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags)
Make the specified compute stream wait for an event.
Definition: hip_stream.cpp:130
hipDeviceAttributeMaxGridDimZ
@ hipDeviceAttributeMaxGridDimZ
Maximum z-dimension of a grid.
Definition: hip_runtime_api.h:303
hipGetDevice
hipError_t hipGetDevice(int *deviceId)
Return the default device id for the calling host thread.
Definition: hip_device.cpp:32
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1672
hipMallocArray
hipError_t hipMallocArray(hipArray **array, const hipChannelFormatDesc *desc, size_t width, size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault))
Allocate an array on the device.
hipMemcpyToArray
hipError_t hipMemcpyToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1494
hipModuleLoadData
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
builds module from code object which resides in host memory. Image is pointer to that location.
Definition: hip_module.cpp:1508
hipMemoryTypeDevice
@ hipMemoryTypeDevice
Definition: hip_runtime_api.h:157
hipDeviceAttributeMaxRegistersPerBlock
@ hipDeviceAttributeMaxRegistersPerBlock
Definition: hip_runtime_api.h:308
hipMemcpyDtoDAsync
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Device asynchronously.
Definition: hip_memory.cpp:1429
hipErrorNoDevice
hipErrorNoDevice
Call to hipGetDeviceCount returned 0 devices.
Definition: hip_runtime_api.h:225
hipDeviceAttributeComputeCapabilityMinor
@ hipDeviceAttributeComputeCapabilityMinor
Minor compute capability version number.
Definition: hip_runtime_api.h:322
hipModuleLaunchKernel
hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams, void **extra)
launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelp...
hipDeviceProp_t::l2CacheSize
int l2CacheSize
L2 cache size.
Definition: hip_runtime_api.h:104
hipDevicePrimaryCtxRelease
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev)
Release the primary context on the GPU.
Definition: hip_context.cpp:285
hipCtxGetApiVersion
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int *apiVersion)
Returns the approximate HIP api version.
Definition: hip_context.cpp:207
hipDeviceProp_t::textureAlignment
size_t textureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:128
hipHostMalloc
hipError_t hipHostMalloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory.
Definition: hip_memory.cpp:762
hipDeviceAttributeKernelExecTimeout
@ hipDeviceAttributeKernelExecTimeout
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:346
hipDeviceAttributeL2CacheSize
@ hipDeviceAttributeL2CacheSize
Definition: hip_runtime_api.h:317
hipDeviceGetName
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device)
Returns an identifer string for the device.
Definition: hip_device.cpp:446
hipDeviceAttributeMaxTexture3DWidth
@ hipDeviceAttributeMaxTexture3DWidth
Maximum dimension width of 3D images in image elements.
Definition: hip_runtime_api.h:336
hipDeviceArch_t::hasSurfaceFuncs
unsigned hasSurfaceFuncs
Surface functions.
Definition: hip_runtime_api.h:71
hipDeviceAttributeIntegrated
@ hipDeviceAttributeIntegrated
iGPU
Definition: hip_runtime_api.h:330
hipDeviceProp_t::isMultiGpuBoard
int isMultiGpuBoard
1 if device is on a multi-GPU board, 0 if not.
Definition: hip_runtime_api.h:115
hipMemcpyParam2DAsync
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *pCopy, hipStream_t stream __dparm(0))
Copies memory for 2D arrays.
hipModuleUnload
hipError_t hipModuleUnload(hipModule_t module)
Frees the module.
Definition: hip_module.cpp:1244
hipDeviceAttributeMaxGridDimY
@ hipDeviceAttributeMaxGridDimY
Maximum y-dimension of a grid.
Definition: hip_runtime_api.h:302
hipMemoryTypeHost
@ hipMemoryTypeHost
Memory is physically located on host.
Definition: hip_runtime_api.h:156
hipDeviceEnablePeerAccess
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
Enable direct access from current device's virtual address space to memory allocations physically loc...
Definition: hip_peer.cpp:200
hipErrorInvalidContext
hipErrorInvalidContext
Produced when input context is invalid.
Definition: hip_runtime_api.h:228
hipDeviceArch_t::hasSharedInt64Atomics
unsigned hasSharedInt64Atomics
64-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:55
hipDeviceProp_t::computeMode
int computeMode
Compute mode.
Definition: hip_runtime_api.h:106
hipDeviceAttributeIsMultiGpuBoard
@ hipDeviceAttributeIsMultiGpuBoard
Multiple GPU devices.
Definition: hip_runtime_api.h:329
hipSharedMemConfig
hipSharedMemConfig
Definition: hip_runtime_api.h:318
hipDeviceProp_t::clockRate
int clockRate
Max clock frequency of the multiProcessors in khz.
Definition: hip_runtime_api.h:93
hipErrorPeerAccessNotEnabled
hipErrorPeerAccessNotEnabled
Peer access was never enabled from the current device.
Definition: hip_runtime_api.h:265
hipFuncGetAttribute
hipError_t hipFuncGetAttribute(int *value, hipFunction_attribute attrib, hipFunction_t hfunc)
Find out a specific attribute for a given function.
Definition: hip_module.cpp:1427
hipDeviceComputeCapability
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
Returns the compute capability of the device.
Definition: hip_device.cpp:434
hipModuleOccupancyMaxPotentialBlockSize
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
Definition: hip_module.cpp:1662
hipStreamCallback_t
void(* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void *userData)
Definition: hip_runtime_api.h:972
hipDeviceArch_t::hasDynamicParallelism
unsigned hasDynamicParallelism
Dynamic parallelism.
Definition: hip_runtime_api.h:73
hip_Memcpy2D
Definition: driver_types.h:91
hipDeviceProp_t::canMapHostMemory
int canMapHostMemory
Check whether HIP can map host memory.
Definition: hip_runtime_api.h:116
hipDeviceProp_t::sharedMemPerBlock
size_t sharedMemPerBlock
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:87
hipCtxGetFlags
hipError_t hipCtxGetFlags(unsigned int *flags)
Return flags used for creating default context.
Definition: hip_context.cpp:254
hipDevicePrimaryCtxGetState
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int *flags, int *active)
Get the state of the primary context.
Definition: hip_context.cpp:263
hipDeviceAttributeCooperativeMultiDeviceLaunch
@ hipDeviceAttributeCooperativeMultiDeviceLaunch
Support cooperative launch on multiple devices.
Definition: hip_runtime_api.h:332
hipDeviceProp_t::maxThreadsPerMultiProcessor
int maxThreadsPerMultiProcessor
Maximum resident threads per multi-processor.
Definition: hip_runtime_api.h:105
hipDeviceSetCacheConfig
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_device.cpp:74
hipCtxDestroy
hipError_t hipCtxDestroy(hipCtx_t ctx)
Destroy a HIP context.
Definition: hip_context.cpp:109
hipCtxEnablePeerAccess
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags)
Enables direct access to memory allocations in a peer context.
Definition: hip_peer.cpp:221
hipDeviceProp_t::major
int major
Definition: hip_runtime_api.h:97
hipDeviceAttributeMaxSharedMemoryPerBlock
@ hipDeviceAttributeMaxSharedMemoryPerBlock
Definition: hip_runtime_api.h:304
hipMemcpyAtoH
hipError_t hipMemcpyAtoH(void *dst, hipArray *srcArray, size_t srcOffset, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1544
hipGetDeviceCount
hipError_t hipGetDeviceCount(int *count)
Return number of compute-capable devices.
Definition: hip_device.cpp:69
hipSuccess
hipSuccess
Successful completion.
Definition: hip_runtime_api.h:203
hipHostUnregister
hipError_t hipHostUnregister(void *hostPtr)
Un-register host pointer.
Definition: hip_memory.cpp:1233
hipStreamGetFlags
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags)
Return flags associated with this stream.
Definition: hip_stream.cpp:223
hipMemsetD8Async
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
hipDeviceAttributeMaxThreadsPerBlock
@ hipDeviceAttributeMaxThreadsPerBlock
Maximum number of threads per block.
Definition: hip_runtime_api.h:297
hipDeviceProp_t::gcnArch
int gcnArch
AMD GCN Arch Value. Eg: 803, 701.
Definition: hip_runtime_api.h:117
hipStreamSynchronize
hipError_t hipStreamSynchronize(hipStream_t stream)
Wait for all commands in stream to complete.
Definition: hip_stream.cpp:184
hipGetErrorName
const char * hipGetErrorName(hipError_t hip_error)
Return name of the specified error code in text form.
Definition: hip_error.cpp:48
hipDeviceProp_t::kernelExecTimeoutEnabled
int kernelExecTimeoutEnabled
Run time limit for kernels executed on the device.
Definition: hip_runtime_api.h:130
hipDeviceGet
hipError_t hipDeviceGet(hipDevice_t *device, int ordinal)
Returns a handle to a compute device.
Definition: hip_context.cpp:70
hipMemcpyDtoD
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Device.
Definition: hip_memory.cpp:1390
hipDeviceProp_t::maxTexture1D
int maxTexture1D
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:122
hipMemcpy3DParms
Definition: driver_types.h:383
hipDeviceAttributeMaxBlockDimZ
@ hipDeviceAttributeMaxBlockDimZ
Maximum z-dimension of a block.
Definition: hip_runtime_api.h:300
hipMallocManaged
hipError_t hipMallocManaged(void **dev_ptr, size_t size, unsigned int flags __dparm(hipMemAttachGlobal))
Allocates memory that will be automatically managed by AMD HMM.
hipMemcpyHtoD
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void *src, size_t sizeBytes)
Copy data from Host to Device.
Definition: hip_memory.cpp:1374
hipDriverGetVersion
hipError_t hipDriverGetVersion(int *driverVersion)
Returns the approximate HIP driver version.
Definition: hip_context.cpp:85
hipDeviceArch_t::hasDoubles
unsigned hasDoubles
Double-precision floating point.
Definition: hip_runtime_api.h:58
hipErrorInvalidKernelFile
hipErrorInvalidKernelFile
In CUDA DRV, it is CUDA_ERROR_INVALID_PTX.
Definition: hip_runtime_api.h:245
hipDeviceProp_t::maxThreadsPerBlock
int maxThreadsPerBlock
Max work items per work group or workgroup max size.
Definition: hip_runtime_api.h:90
hipDeviceAttributeMaxBlockDimY
@ hipDeviceAttributeMaxBlockDimY
Maximum y-dimension of a block.
Definition: hip_runtime_api.h:299
hipMemcpy2DToArray
hipError_t hipMemcpy2DToArray(hipArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1444
hipMemAllocPitch
hipError_t hipMemAllocPitch(hipDeviceptr_t *dptr, size_t *pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes)
Definition: hip_memory.cpp:862
hipDeviceProp_t
Definition: hip_runtime_api.h:84
hipMemAllocHost
hipError_t hipMemAllocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:881
hipMallocHost
hipError_t hipMallocHost(void **ptr, size_t size)
Allocate pinned host memory [Deprecated].
Definition: hip_runtime_api.h:875
hipDeviceAttributeMaxTexture2DHeight
@ hipDeviceAttributeMaxTexture2DHeight
Maximum dimension height of 2D images in image elements.
Definition: hip_runtime_api.h:335
hipDeviceArch_t::hasSharedInt32Atomics
unsigned hasSharedInt32Atomics
32-bit integer atomics for shared memory.
Definition: hip_runtime_api.h:49
hipFuncSetCacheConfig
hipError_t hipFuncSetCacheConfig(const void *func, hipFuncCache_t config)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:108
hipErrorInvalidValue
hipErrorInvalidValue
Definition: hip_runtime_api.h:204
hipDeviceProp_t::memPitch
size_t memPitch
Maximum pitch in bytes allowed by memory copies.
Definition: hip_runtime_api.h:127
hipMemsetD32Async
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream __dparm(0))
Fills the memory area pointed to by dev with the constant integer value for specified number of times...
hipDeviceProp_t::pciBusID
int pciBusID
PCI Bus ID.
Definition: hip_runtime_api.h:112
hipRuntimeGetVersion
hipError_t hipRuntimeGetVersion(int *runtimeVersion)
Returns the approximate HIP Runtime version.
Definition: hip_context.cpp:97
hipDeviceAttributeComputeCapabilityMajor
@ hipDeviceAttributeComputeCapabilityMajor
Major compute capability version number.
Definition: hip_runtime_api.h:321
hipEventQuery
hipError_t hipEventQuery(hipEvent_t event)
Query event status.
Definition: hip_event.cpp:394
hipDeviceAttributeMaxTexture3DDepth
@ hipDeviceAttributeMaxTexture3DDepth
Maximum dimensions depth of 3D images in image elements.
Definition: hip_runtime_api.h:338
ihipCtx_t
Definition: hip_hcc_internal.h:938
hipErrorRuntimeMemory
hipErrorRuntimeMemory
Definition: hip_runtime_api.h:282
hipDeviceAttributeMaxThreadsPerMultiProcessor
@ hipDeviceAttributeMaxThreadsPerMultiProcessor
Definition: hip_runtime_api.h:319
hipStreamGetPriority
hipError_t hipStreamGetPriority(hipStream_t stream, int *priority)
Query the priority of a stream.
Definition: hip_stream.cpp:238
hipDeviceProp_t::arch
hipDeviceArch_t arch
Architectural feature flags. New for HIP.
Definition: hip_runtime_api.h:109
hipEventSynchronize
hipError_t hipEventSynchronize(hipEvent_t event)
Wait for an event to complete.
Definition: hip_event.cpp:300
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1683
hipHostFree
hipError_t hipHostFree(void *ptr)
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDevi...
Definition: hip_memory.cpp:2396
hipDeviceAttributePciBusId
@ hipDeviceAttributePciBusId
PCI Bus ID.
Definition: hip_runtime_api.h:325
hipIpcOpenMemHandle
hipError_t hipIpcOpenMemHandle(void **devPtr, hipIpcMemHandle_t handle, unsigned int flags)
Opens an interprocess memory handle exported from another process and returns a device pointer usable...
Definition: hip_memory.cpp:2494
hipMemsetD16
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant short value v...
Definition: hip_memory.cpp:2271
hipDeviceProp_t::tccDriver
int tccDriver
1:If device is Tesla device using TCC driver, else 0
Definition: hip_runtime_api.h:132
ihipEvent_t
Definition: hip_hcc_internal.h:759
hipDeviceGetLimit
hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit)
Get Resource limits of current device.
Definition: hip_device.cpp:94
hipMalloc
hipError_t hipMalloc(void **ptr, size_t size)
Allocate memory on the default accelerator.
Definition: hip_memory.cpp:695
hipIpcMemHandle_st
Definition: hip_runtime_api.h:111
hipEventElapsedTime
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
Return the elapsed time between two events.
Definition: hip_event.cpp:344
hipGetLastError
hipError_t hipGetLastError(void)
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess...
Definition: hip_error.cpp:32
hipInit
hipError_t hipInit(unsigned int flags)
Explicitly initializes the HIP runtime.
Definition: hip_context.cpp:39
ihipStream_t
Definition: hip_hcc_internal.h:580
hipDeviceAttributeTexturePitchAlignment
@ hipDeviceAttributeTexturePitchAlignment
Pitch alignment requirement for 2D texture references bound to pitched memory;.
Definition: hip_runtime_api.h:345
hipDeviceAttributeWarpSize
@ hipDeviceAttributeWarpSize
Warp size in threads.
Definition: hip_runtime_api.h:307
hipDeviceArch_t::hasGlobalInt32Atomics
unsigned hasGlobalInt32Atomics
32-bit integer atomics for global memory.
Definition: hip_runtime_api.h:47
hipArray
Definition: driver_types.h:78
hipDeviceArch_t::hasSyncThreadsExt
unsigned hasSyncThreadsExt
__syncthreads_count, syncthreads_and, syncthreads_or.
Definition: hip_runtime_api.h:68
hipIpcGetMemHandle
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *handle, void *devPtr)
Gets an interprocess memory handle for an existing device memory allocation.
Definition: hip_memory.cpp:2458
hipErrorInvalidDevice
hipErrorInvalidDevice
DeviceID must be in range 0...#compute-devices.
Definition: hip_runtime_api.h:226
hipDeviceArch_t::hasFunnelShift
unsigned hasFunnelShift
Funnel two words into one with shift&mask caps.
Definition: hip_runtime_api.h:64
hipCtxDisablePeerAccess
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx)
Disable direct access from current context's virtual address space to memory allocations physically l...
Definition: hip_peer.cpp:227
hipDeviceAttributeMaxTexture3DHeight
@ hipDeviceAttributeMaxTexture3DHeight
Maximum dimensions height of 3D images in image elements.
Definition: hip_runtime_api.h:337
hipDeviceAttributeMemoryClockRate
@ hipDeviceAttributeMemoryClockRate
Peak memory clock frequency in kilohertz.
Definition: hip_runtime_api.h:313
hipErrorNotReady
hipErrorNotReady
Definition: hip_runtime_api.h:256
hipHostGetDevicePointer
hipError_t hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags)
Get Device pointer from Host Pointer allocated through hipHostMalloc.
hipMemGetInfo
hipError_t hipMemGetInfo(size_t *free, size_t *total)
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
Definition: hip_memory.cpp:2296
hipEventDestroy
hipError_t hipEventDestroy(hipEvent_t event)
Destroy the specified event.
Definition: hip_event.cpp:278
hipDeviceSetSharedMemConfig
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config)
The bank width of shared memory on current device is set.
Definition: hip_device.cpp:116
hipDeviceReset
hipError_t hipDeviceReset(void)
The state of current device is discarded and updated to a fresh state.
Definition: hip_device.cpp:148
hipDeviceProp_t::maxGridSize
int maxGridSize[3]
Max grid dimensions (XYZ).
Definition: hip_runtime_api.h:92
hipDeviceAttributeComputeMode
@ hipDeviceAttributeComputeMode
Compute mode that device is currently in.
Definition: hip_runtime_api.h:316
hipSetDeviceFlags
hipError_t hipSetDeviceFlags(unsigned flags)
The current device behavior is changed according the flags passed.
hipDeviceAttributePciDeviceId
@ hipDeviceAttributePciDeviceId
PCI Device ID.
Definition: hip_runtime_api.h:326
hipDeviceProp_t::maxSharedMemoryPerMultiProcessor
size_t maxSharedMemoryPerMultiProcessor
Maximum Shared Memory Per Multiprocessor.
Definition: hip_runtime_api.h:114
hipDeviceProp_t::clockInstructionRate
int clockInstructionRate
Definition: hip_runtime_api.h:107
dim3
Definition: hip_runtime_api.h:330
hipStreamQuery
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed,...
Definition: hip_stream.cpp:161
hipDevicePrimaryCtxSetFlags
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags)
Set flags for the primary context.
Definition: hip_context.cpp:321
hipPointerAttribute_t
Definition: hip_runtime_api.h:168
hipDeviceAttributeTotalConstantMemory
@ hipDeviceAttributeTotalConstantMemory
Constant memory size in bytes.
Definition: hip_runtime_api.h:306
hipFree
hipError_t hipFree(void *ptr)
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSy...
Definition: hip_memory.cpp:2344
hipDeviceArch_t::hasWarpShuffle
unsigned hasWarpShuffle
Warp shuffle operations. (__shfl_*).
Definition: hip_runtime_api.h:63
hipArrayDefault
#define hipArrayDefault
Default HIP array allocation flag.
Definition: hip_runtime_api.h:221
hipDevicePrimaryCtxRetain
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *pctx, hipDevice_t dev)
Retain the primary context on the GPU.
Definition: hip_context.cpp:296
hipOccupancyMaxPotentialBlockSize
hipError_t hipOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f, size_t dynSharedMemPerBlk, int blockSizeLimit)
determine the grid and block sizes to achieves maximum occupancy for a kernel
hipModuleLoad
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
Loads code object from file into a hipModule_t.
Definition: hip_module.cpp:1513
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags)
Returns occupancy for a device function.
Definition: hip_module.cpp:1714
hipFreeHost
hipError_t hipFreeHost(void *ptr)
Free memory allocated by the hcc hip host memory allocation API. [Deprecated].
Definition: hip_runtime_api.h:932
hipMemcpyHtoA
hipError_t hipMemcpyHtoA(hipArray *dstArray, size_t dstOffset, const void *srcHost, size_t count)
Copies data between host and device.
Definition: hip_memory.cpp:1528
hipModuleGetFunction
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname)
Function with kname will be extracted if present in module.
Definition: hip_module.cpp:1309
hipDeviceProp_t::memoryBusWidth
int memoryBusWidth
Global memory bus width in bits.
Definition: hip_runtime_api.h:95
hipStreamAddCallback
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void *userData, unsigned int flags)
Adds a callback to be called on the host after all currently enqueued items in the stream have comple...
Definition: hip_stream.cpp:258
hipDeviceArch_t::hasWarpVote
unsigned hasWarpVote
Warp vote instructions (__any, __all).
Definition: hip_runtime_api.h:61
hipDeviceProp_t::name
char name[256]
Device name.
Definition: hip_runtime_api.h:85
hipMemcpyDtoHAsync
hipError_t hipMemcpyDtoHAsync(void *dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
Copy data from Device to Host asynchronously.
Definition: hip_memory.cpp:1437
hipDeviceArch_t::hasGlobalFloatAtomicExch
unsigned hasGlobalFloatAtomicExch
32-bit float atomic exch for global memory.
Definition: hip_runtime_api.h:48
hipDeviceProp_t::concurrentKernels
int concurrentKernels
Device can possibly execute multiple kernels concurrently.
Definition: hip_runtime_api.h:110
hipDeviceArch_t::hasWarpBallot
unsigned hasWarpBallot
Warp ballot instructions (__ballot).
Definition: hip_runtime_api.h:62
hipDeviceProp_t::totalGlobalMem
size_t totalGlobalMem
Size of global memory region (in bytes).
Definition: hip_runtime_api.h:86
hipDeviceAttributeTextureAlignment
@ hipDeviceAttributeTextureAlignment
Alignment requirement for textures.
Definition: hip_runtime_api.h:344
hipFuncGetAttributes
hipError_t hipFuncGetAttributes(struct hipFuncAttributes *attr, const void *func)
Find out attributes for a given function.
Definition: hip_module.cpp:1393
hipEventRecord
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:213
hipMemcpy2D
hipError_t hipMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:2020
hipExtent
Definition: driver_types.h:370
hipPitchedPtr
Definition: driver_types.h:363
hipModuleGetGlobal
hipError_t hipModuleGetGlobal(void **, size_t *, hipModule_t, const char *)
returns device memory pointer and size of the kernel present in the module with symbol name
Definition: hip_module.cpp:1113
hipMemset2D
hipError_t hipMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
Fills the memory area pointed to by dst with the constant value.
Definition: hip_memory.cpp:2251
hipMemset3D
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent)
Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
Definition: hip_memory.cpp:2286
hipStreamCreateWithFlags
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
Create an asynchronous stream.
Definition: hip_stream.cpp:97
hipDeviceGetAttribute
hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attr, int deviceId)
Query for a specific device attribute.
Definition: hip_device.cpp:354
hipMemcpyFromArray
hipError_t hipMemcpyFromArray(void *dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind)
Copies data between host and device.
Definition: hip_memory.cpp:1511
hipDeviceAttributeCanMapHostMemory
@ hipDeviceAttributeCanMapHostMemory
Device can map host memory into device address space.
Definition: hip_runtime_api.h:347
hipDeviceProp_t::maxThreadsDim
int maxThreadsDim[3]
Max number of threads in each dimension (XYZ) of a block.
Definition: hip_runtime_api.h:91
ihipModuleSymbol_t
Definition: hip_module.cpp:108
hipMemcpyPeerAsync
hipError_t hipMemcpyPeerAsync(void *dst, int dstDeviceId, const void *src, int srcDevice, size_t sizeBytes, hipStream_t stream __dparm(0))
Copies memory from one device to memory on another device.
hipMemcpyHtoDAsync
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void *src, size_t sizeBytes, hipStream_t stream)
Copy data from Host to Device asynchronously.
Definition: hip_memory.cpp:1422
hipDeviceProp_t::cooperativeMultiDeviceLaunch
int cooperativeMultiDeviceLaunch
HIP device supports cooperative launch on multiple devices.
Definition: hip_runtime_api.h:121
hipMemcpyDtoH
hipError_t hipMemcpyDtoH(void *dst, hipDeviceptr_t src, size_t sizeBytes)
Copy data from Device to Host.
Definition: hip_memory.cpp:1382
hipDeviceArch_t::has3dGrid
unsigned has3dGrid
Grid and group dims are 3D (rather than 2D).
Definition: hip_runtime_api.h:72
hipDeviceGetCacheConfig
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_device.cpp:82
hipMemcpyPeer
hipError_t hipMemcpyPeer(void *dst, int dstDeviceId, const void *src, int srcDeviceId, size_t sizeBytes)
Copies memory from one device to memory on another device.
Definition: hip_peer.cpp:207
hipDeviceAttributeMaxTexture1DWidth
@ hipDeviceAttributeMaxTexture1DWidth
Maximum number of elements in 1D images.
Definition: hip_runtime_api.h:333
hipDeviceAttributeCooperativeLaunch
@ hipDeviceAttributeCooperativeLaunch
Support cooperative launch.
Definition: hip_runtime_api.h:331
hipDeviceAttributeMultiprocessorCount
@ hipDeviceAttributeMultiprocessorCount
Number of multiprocessors on the device.
Definition: hip_runtime_api.h:315
hipDeviceProp_t::pciDeviceID
int pciDeviceID
PCI Device ID.
Definition: hip_runtime_api.h:113
hipGetDeviceProperties
hipError_t hipGetDeviceProperties(hipDeviceProp_t *prop, int deviceId)
Returns device properties.
Definition: hip_device.cpp:381
hipMemcpy
hipError_t hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind)
Copy data from src to dst.
Definition: hip_memory.cpp:1367
hipDeviceProp_t::memoryClockRate
int memoryClockRate
Max global memory clock frequency in khz.
Definition: hip_runtime_api.h:94
hipEventCreateWithFlags
hipError_t hipEventCreateWithFlags(hipEvent_t *event, unsigned flags)
Create an event with the specified flags.
Definition: hip_event.cpp:201
hipErrorCooperativeLaunchTooLarge
hipErrorCooperativeLaunchTooLarge
Definition: hip_runtime_api.h:275
hipDeviceProp_t::warpSize
int warpSize
Warp size.
Definition: hip_runtime_api.h:89
hipCtxGetSharedMemConfig
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig *pConfig)
Get Shared memory bank configuration.
Definition: hip_context.cpp:241
hipDeviceTotalMem
hipError_t hipDeviceTotalMem(size_t *bytes, hipDevice_t device)
Returns the total amount of memory on the device.
Definition: hip_device.cpp:480
hipFreeArray
hipError_t hipFreeArray(hipArray *array)
Frees an array on the device.
Definition: hip_memory.cpp:2409
hipErrorAssert
hipErrorAssert
Produced when the kernel calls assert.
Definition: hip_runtime_api.h:268
textureReference
Definition: texture_types.h:74
hipCtxPopCurrent
hipError_t hipCtxPopCurrent(hipCtx_t *ctx)
Pop the current/default context and return the popped context.
Definition: hip_context.cpp:133
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc
int cooperativeMultiDeviceUnmatchedFunc
Definition: hip_runtime_api.h:133
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim
int cooperativeMultiDeviceUnmatchedGridDim
Definition: hip_runtime_api.h:135
hipDeviceCanAccessPeer
hipError_t hipDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
Determine if a device can access a peer's memory.
Definition: hip_peer.cpp:186
hipDeviceArch_t::hasFloatAtomicAdd
unsigned hasFloatAtomicAdd
32-bit float atomic add in global and shared memory.
Definition: hip_runtime_api.h:51
hipCtxSetCurrent
hipError_t hipCtxSetCurrent(hipCtx_t ctx)
Set the passed context as current/default.
Definition: hip_context.cpp:178
hipDeviceProp_t::cooperativeLaunch
int cooperativeLaunch
HIP device supports cooperative launch.
Definition: hip_runtime_api.h:120
hipDeviceArch_t::hasSharedFloatAtomicExch
unsigned hasSharedFloatAtomicExch
32-bit float atomic exch for shared memory.
Definition: hip_runtime_api.h:50
hipTextureDesc
Definition: texture_types.h:95
hipResourceViewDesc
Definition: driver_types.h:323
hipDeviceProp_t::multiProcessorCount
int multiProcessorCount
Number of multi-processors (compute units).
Definition: hip_runtime_api.h:103
hipDeviceProp_t::integrated
int integrated
APU vs dGPU.
Definition: hip_runtime_api.h:119
hipMemsetD8
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2261
hipCtxSetCacheConfig
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig)
Set L1/Shared cache partition.
Definition: hip_context.cpp:225
hipMemset2DAsync
hipError_t hipMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0))
Fills asynchronously the memory area pointed to by dst with the constant value.
hipDeviceProp_t::ECCEnabled
int ECCEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:131
HIP_ARRAY_DESCRIPTOR
Definition: driver_types.h:62
hipCtxCreate
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
Create a context and set it as current/ default context.
Definition: hip_context.cpp:52
hipDeviceProp_t::totalConstMem
size_t totalConstMem
Size of shared memory region (in bytes).
Definition: hip_runtime_api.h:96
hipDeviceProp_t::maxTexture2D
int maxTexture2D[2]
Maximum dimensions (width, height) of 2D images, in image elements.
Definition: hip_runtime_api.h:123
hipLaunchParams_t
Definition: hip_runtime_api.h:339
hipErrorHostMemoryAlreadyRegistered
hipErrorHostMemoryAlreadyRegistered
Produced when trying to lock a page-locked memory.
Definition: hip_runtime_api.h:269
hipFuncAttribute
hipFuncAttribute
Definition: hip_runtime_api.h:299
hipCtxSetSharedMemConfig
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config)
Set Shared memory bank configuration.
Definition: hip_context.cpp:233
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
Returns occupancy for a device function.
Definition: hip_module.cpp:1693
hipDeviceAttribute_t
hipDeviceAttribute_t
Definition: hip_runtime_api.h:296
hipFuncSetSharedMemConfig
hipError_t hipFuncSetSharedMemConfig(const void *func, hipSharedMemConfig config)
Set shared memory configuation for a specific function.
Definition: hip_module.cpp:1419
hipResourceDesc
Definition: driver_types.h:262
hipErrorLaunchFailure
hipErrorLaunchFailure
An exception occurred on the device while executing a kernel.
Definition: hip_runtime_api.h:273
ihipModule_t
Definition: hip_hcc_internal.h:415
hipDeviceSynchronize
hipError_t hipDeviceSynchronize(void)
Waits on all active streams on current device.
Definition: hip_device.cpp:143
hipDeviceAttributeConcurrentKernels
@ hipDeviceAttributeConcurrentKernels
Definition: hip_runtime_api.h:323
hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem
int cooperativeMultiDeviceUnmatchedSharedMem
Definition: hip_runtime_api.h:139
hipProfilerStart
hipError_t hipProfilerStart()
Start recording of profiling information When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2496
hipDeviceGetSharedMemConfig
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *pConfig)
Returns bank width of shared memory for current device.
Definition: hip_device.cpp:124
hipErrorNotSupported
hipErrorNotSupported
Produced when the hip API is not supported/implemented.
Definition: hip_runtime_api.h:279
hipMemcpyAsync
hipError_t hipMemcpyAsync(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream __dparm(0))
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset...
hipErrorLaunchOutOfResources
hipErrorLaunchOutOfResources
Out of resources error.
Definition: hip_runtime_api.h:261
hipStreamDestroy
hipError_t hipStreamDestroy(hipStream_t stream)
Destroys the specified stream.
Definition: hip_stream.cpp:195
hipHostRegister
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
Register host memory so it can be accessed from the current device.
Definition: hip_memory.cpp:1158
hipFuncSetAttribute
hipError_t hipFuncSetAttribute(const void *func, hipFuncAttribute attr, int value)
Set attribute for a specific function.
Definition: hip_module.cpp:1411
hipProfilerStop
hipError_t hipProfilerStop()
Stop recording of profiling information. When using this API, start the profiler with profiling disab...
Definition: hip_hcc.cpp:2502
hipModuleLoadDataEx
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
builds module from code object which resides in host memory. Image is pointer to that location....
Definition: hip_module.cpp:1527
hipEventCreate
hipError_t hipEventCreate(hipEvent_t *event)
Definition: hip_event.cpp:207
hipMemsetAsync
hipError_t hipMemsetAsync(void *dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0))
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value val...
hipDeviceAttributeMaxTexture2DWidth
@ hipDeviceAttributeMaxTexture2DWidth
Maximum dimension width of 2D images in image elements.
Definition: hip_runtime_api.h:334
hipDeviceProp_t::pciDomainID
int pciDomainID
PCI Domain ID.
Definition: hip_runtime_api.h:111
hipCtxGetCacheConfig
hipError_t hipCtxGetCacheConfig(hipFuncCache_t *cacheConfig)
Set Cache configuration for a specific function.
Definition: hip_context.cpp:217
hipMemcpyParam2D
hipError_t hipMemcpyParam2D(const hip_Memcpy2D *pCopy)
Copies memory for 2D arrays.
Definition: hip_memory.cpp:2144
hipHostAlloc
hipError_t hipHostAlloc(void **ptr, size_t size, unsigned int flags)
Allocate device accessible page locked host memory [Deprecated].
Definition: hip_runtime_api.h:887
hipMemset
hipError_t hipMemset(void *dst, int value, size_t sizeBytes)
Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value va...
Definition: hip_memory.cpp:2220
hipDeviceDisablePeerAccess
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId)
Disable direct access from current device's virtual address space to memory allocations physically lo...
Definition: hip_peer.cpp:193
hipDeviceAttributeEccEnabled
@ hipDeviceAttributeEccEnabled
Device has ECC support enabled.
Definition: hip_runtime_api.h:348
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
@ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
Definition: hip_runtime_api.h:327
hipDevicePrimaryCtxReset
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev)
Resets the primary context on the GPU.
Definition: hip_context.cpp:308
hipChannelFormatDesc
Definition: driver_types.h:38
hipErrorPeerAccessAlreadyEnabled
hipErrorPeerAccessAlreadyEnabled
Peer access was already enabled from the current device.
Definition: hip_runtime_api.h:263