HIP: Heterogenous-computing Interface for Portability
|
Classes | |
struct | dim3 |
struct | hipLaunchParams_t |
struct | hipExternalMemoryHandleDesc_st |
struct | hipExternalMemoryBufferDesc_st |
struct | hipExternalSemaphoreHandleDesc_st |
struct | hipExternalSemaphoreSignalParams_st |
struct | hipExternalSemaphoreWaitParams_st |
Macros | |
#define | __HIP_NODISCARD |
#define | hipStreamDefault 0x00 |
#define | hipStreamNonBlocking 0x01 |
#define | hipEventDefault 0x0 |
#define | hipEventBlockingSync 0x1 |
#define | hipEventDisableTiming 0x2 |
#define | hipEventInterprocess 0x4 |
#define | hipEventReleaseToDevice 0x40000000 |
#define | hipEventReleaseToSystem 0x80000000 |
#define | hipHostMallocDefault 0x0 |
#define | hipHostMallocPortable 0x1 |
#define | hipHostMallocMapped 0x2 |
#define | hipHostMallocWriteCombined 0x4 |
#define | hipHostMallocNumaUser 0x20000000 |
#define | hipHostMallocCoherent 0x40000000 |
#define | hipHostMallocNonCoherent 0x80000000 |
#define | hipMemAttachGlobal 0x01 |
#define | hipMemAttachHost 0x02 |
#define | hipMemAttachSingle 0x04 |
#define | hipDeviceMallocDefault 0x0 |
#define | hipDeviceMallocFinegrained 0x1 |
#define | hipMallocSignalMemory 0x2 |
#define | hipHostRegisterDefault 0x0 |
#define | hipHostRegisterPortable 0x1 |
#define | hipHostRegisterMapped 0x2 |
#define | hipHostRegisterIoMemory 0x4 |
#define | hipExtHostRegisterCoarseGrained 0x8 |
#define | hipDeviceScheduleAuto 0x0 |
#define | hipDeviceScheduleSpin 0x1 |
#define | hipDeviceScheduleYield 0x2 |
#define | hipDeviceScheduleBlockingSync 0x4 |
#define | hipDeviceScheduleMask 0x7 |
#define | hipDeviceMapHost 0x8 |
#define | hipDeviceLmemResizeToMax 0x16 |
#define | hipArrayDefault 0x00 |
#define | hipArrayLayered 0x01 |
#define | hipArraySurfaceLoadStore 0x02 |
#define | hipArrayCubemap 0x04 |
#define | hipArrayTextureGather 0x08 |
#define | hipOccupancyDefault 0x00 |
#define | hipCooperativeLaunchMultiDeviceNoPreSync 0x01 |
#define | hipCooperativeLaunchMultiDeviceNoPostSync 0x02 |
#define | hipCpuDeviceId ((int)-1) |
#define | hipInvalidDeviceId ((int)-2) |
#define | hipExtAnyOrderLaunch 0x01 |
#define | hipStreamWaitValueGte 0x0 |
#define | hipStreamWaitValueEq 0x1 |
#define | hipStreamWaitValueAnd 0x2 |
#define | hipStreamWaitValueNor 0x3 |
#define | hipStreamPerThread ((hipStream_t)2) |
Typedefs | |
typedef enum __HIP_NODISCARD hipError_t | hipError_t |
typedef enum hipDeviceAttribute_t | hipDeviceAttribute_t |
typedef enum hipMemoryAdvise | hipMemoryAdvise |
typedef enum hipMemRangeCoherencyMode | hipMemRangeCoherencyMode |
typedef enum hipMemRangeAttribute | hipMemRangeAttribute |
typedef enum hipJitOption | hipJitOption |
typedef enum hipFuncAttribute | hipFuncAttribute |
typedef enum hipFuncCache_t | hipFuncCache_t |
typedef enum hipSharedMemConfig | hipSharedMemConfig |
typedef struct dim3 | dim3 |
typedef struct hipLaunchParams_t | hipLaunchParams |
typedef enum hipExternalMemoryHandleType_enum | hipExternalMemoryHandleType |
typedef struct hipExternalMemoryHandleDesc_st | hipExternalMemoryHandleDesc |
typedef struct hipExternalMemoryBufferDesc_st | hipExternalMemoryBufferDesc |
typedef void * | hipExternalMemory_t |
typedef enum hipExternalSemaphoreHandleType_enum | hipExternalSemaphoreHandleType |
typedef struct hipExternalSemaphoreHandleDesc_st | hipExternalSemaphoreHandleDesc |
typedef void * | hipExternalSemaphore_t |
typedef struct hipExternalSemaphoreSignalParams_st | hipExternalSemaphoreSignalParams |
typedef struct hipExternalSemaphoreWaitParams_st | hipExternalSemaphoreWaitParams |
typedef enum hipGLDeviceList | hipGLDeviceList |
typedef enum hipGraphicsRegisterFlags | hipGraphicsRegisterFlags |
typedef struct _hipGraphicsResource | hipGraphicsResource |
typedef hipGraphicsResource * | hipGraphicsResource_t |
#define hipArrayDefault 0x00 |
Default HIP array allocation flag.
#define hipDeviceMallocFinegrained 0x1 |
Memory is allocated in fine grained region of device.
#define hipDeviceScheduleAuto 0x0 |
Automatically select between Spin and Yield.
#define hipDeviceScheduleSpin 0x1 |
Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power.
#define hipDeviceScheduleYield 0x2 |
Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system.
#define hipEventBlockingSync 0x1 |
Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
#define hipEventDefault 0x0 |
Default flags.
#define hipEventDisableTiming 0x2 |
Disable event's capability to record timing information. May improve performance.
#define hipEventInterprocess 0x4 |
Event can support IPC. Warnig: It is not supported in HIP.
#define hipEventReleaseToDevice 0x40000000 |
Use a device-scope release when recording this event. This flag is useful to obtain more precise timings of commands between events. The flag is a no-op on CUDA platforms.
#define hipEventReleaseToSystem 0x80000000 |
Use a system-scope release when recording this event. This flag is useful to make non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.
#define hipExtAnyOrderLaunch 0x01 |
AnyOrderLaunch of kernels.
#define hipExtHostRegisterCoarseGrained 0x8 |
Coarse Grained host memory lock.
#define hipHostMallocCoherent 0x40000000 |
Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
#define hipHostMallocDefault 0x0 |
Default pinned memory allocation on the host.
#define hipHostMallocMapped 0x2 |
Map the allocation into the address space for the current device. The device pointer can be obtained with hipHostGetDevicePointer.
#define hipHostMallocNonCoherent 0x80000000 |
Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
#define hipHostMallocNumaUser 0x20000000 |
Host memory allocation will follow numa policy set by user.
#define hipHostMallocPortable 0x1 |
Memory is considered allocated by all contexts.
#define hipHostMallocWriteCombined 0x4 |
Allocates the memory as write-combined. On some system configurations, write-combined allocation may be transferred faster across the PCI Express bus, however, could have low read efficiency by most CPUs. It's a good option for data tranfer from host to device via mapped pinned memory.
#define hipHostRegisterDefault 0x0 |
Memory is Mapped and Portable.
#define hipHostRegisterIoMemory 0x4 |
Not supported.
#define hipHostRegisterMapped 0x2 |
Map the allocation into the address space for the current device. The device pointer can be obtained with hipHostGetDevicePointer.
#define hipHostRegisterPortable 0x1 |
Memory is considered registered by all contexts.
#define hipMallocSignalMemory 0x2 |
Memory represents a HSA signal.
#define hipMemAttachGlobal 0x01 |
Memory can be accessed by any stream on any device
#define hipMemAttachHost 0x02 |
Memory cannot be accessed by any stream on any device.
#define hipMemAttachSingle 0x04 |
Memory can only be accessed by a single stream on the associated device.
#define hipStreamDefault 0x00 |
Default stream creation flags. These are used with hipStreamCreate().
#define hipStreamNonBlocking 0x01 |
Stream does not implicitly synchronize with null stream.
#define hipStreamPerThread ((hipStream_t)2) |
Implicit stream per application thread.
typedef struct hipExternalSemaphoreWaitParams_st hipExternalSemaphoreWaitParams |
External semaphore wait parameters, compatible with driver type
typedef enum hipFuncAttribute hipFuncAttribute |
typedef enum hipFuncCache_t hipFuncCache_t |
typedef enum hipSharedMemConfig hipSharedMemConfig |
enum hipDeviceAttribute_t |
Enumerator | |
---|---|
hipDeviceAttributeEccEnabled | Whether ECC support is enabled. |
hipDeviceAttributeAccessPolicyMaxWindowSize | Cuda only. The maximum size of the window policy in bytes. |
hipDeviceAttributeAsyncEngineCount | Cuda only. Asynchronous engines number. |
hipDeviceAttributeCanMapHostMemory | Whether host memory can be mapped into device address space. |
hipDeviceAttributeCanUseHostPointerForRegisteredMem | Cuda only. Device can access host registered memory at the same virtual address as the CPU |
hipDeviceAttributeClockRate | Peak clock frequency in kilohertz. |
hipDeviceAttributeComputeMode | Compute mode that device is currently in. |
hipDeviceAttributeComputePreemptionSupported | Cuda only. Device supports Compute Preemption. |
hipDeviceAttributeConcurrentKernels | Device can possibly execute multiple kernels concurrently. |
hipDeviceAttributeConcurrentManagedAccess | Device can coherently access managed memory concurrently with the CPU. |
hipDeviceAttributeCooperativeLaunch | Support cooperative launch. |
hipDeviceAttributeCooperativeMultiDeviceLaunch | Support cooperative launch on multiple devices. |
hipDeviceAttributeDeviceOverlap | Cuda only. Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. |
hipDeviceAttributeDirectManagedMemAccessFromHost | Host can directly access managed memory on the device without migration |
hipDeviceAttributeGlobalL1CacheSupported | Cuda only. Device supports caching globals in L1. |
hipDeviceAttributeHostNativeAtomicSupported | Cuda only. Link between the device and the host supports native atomic operations. |
hipDeviceAttributeIntegrated | Device is integrated GPU. |
hipDeviceAttributeIsMultiGpuBoard | Multiple GPU devices. |
hipDeviceAttributeKernelExecTimeout | Run time limit for kernels executed on the device. |
hipDeviceAttributeL2CacheSize | Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. |
hipDeviceAttributeLocalL1CacheSupported | caching locals in L1 is supported |
hipDeviceAttributeLuid | Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms. |
hipDeviceAttributeLuidDeviceNodeMask | Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms. |
hipDeviceAttributeComputeCapabilityMajor | Major compute capability version number. |
hipDeviceAttributeManagedMemory | Device supports allocating managed memory on this system. |
hipDeviceAttributeMaxBlocksPerMultiProcessor | Cuda only. Max block size per multiprocessor. |
hipDeviceAttributeMaxBlockDimX | Max block size in width. |
hipDeviceAttributeMaxBlockDimY | Max block size in height. |
hipDeviceAttributeMaxBlockDimZ | Max block size in depth. |
hipDeviceAttributeMaxGridDimX | Max grid size in width. |
hipDeviceAttributeMaxGridDimY | Max grid size in height. |
hipDeviceAttributeMaxGridDimZ | Max grid size in depth. |
hipDeviceAttributeMaxSurface1D | Maximum size of 1D surface. |
hipDeviceAttributeMaxSurface1DLayered | Cuda only. Maximum dimensions of 1D layered surface. |
hipDeviceAttributeMaxSurface2D | Maximum dimension (width, height) of 2D surface. |
hipDeviceAttributeMaxSurface2DLayered | Cuda only. Maximum dimensions of 2D layered surface. |
hipDeviceAttributeMaxSurface3D | Maximum dimension (width, height, depth) of 3D surface. |
hipDeviceAttributeMaxSurfaceCubemap | Cuda only. Maximum dimensions of Cubemap surface. |
hipDeviceAttributeMaxSurfaceCubemapLayered | Cuda only. Maximum dimension of Cubemap layered surface. |
hipDeviceAttributeMaxTexture1DWidth | Maximum size of 1D texture. |
hipDeviceAttributeMaxTexture1DLayered | Cuda only. Maximum dimensions of 1D layered texture. |
hipDeviceAttributeMaxTexture1DLinear | Maximum number of elements allocatable in a 1D linear texture. Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda. |
hipDeviceAttributeMaxTexture1DMipmap | Cuda only. Maximum size of 1D mipmapped texture. |
hipDeviceAttributeMaxTexture2DWidth | Maximum dimension width of 2D texture. |
hipDeviceAttributeMaxTexture2DHeight | Maximum dimension hight of 2D texture. |
hipDeviceAttributeMaxTexture2DGather | Cuda only. Maximum dimensions of 2D texture if gather operations performed. |
hipDeviceAttributeMaxTexture2DLayered | Cuda only. Maximum dimensions of 2D layered texture. |
hipDeviceAttributeMaxTexture2DLinear | Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory. |
hipDeviceAttributeMaxTexture2DMipmap | Cuda only. Maximum dimensions of 2D mipmapped texture. |
hipDeviceAttributeMaxTexture3DWidth | Maximum dimension width of 3D texture. |
hipDeviceAttributeMaxTexture3DHeight | Maximum dimension height of 3D texture. |
hipDeviceAttributeMaxTexture3DDepth | Maximum dimension depth of 3D texture. |
hipDeviceAttributeMaxTexture3DAlt | Cuda only. Maximum dimensions of alternate 3D texture. |
hipDeviceAttributeMaxTextureCubemap | Cuda only. Maximum dimensions of Cubemap texture. |
hipDeviceAttributeMaxTextureCubemapLayered | Cuda only. Maximum dimensions of Cubemap layered texture. |
hipDeviceAttributeMaxThreadsDim | Maximum dimension of a block. |
hipDeviceAttributeMaxThreadsPerBlock | Maximum number of threads per block. |
hipDeviceAttributeMaxThreadsPerMultiProcessor | Maximum resident threads per multiprocessor. |
hipDeviceAttributeMaxPitch | Maximum pitch in bytes allowed by memory copies. |
hipDeviceAttributeMemoryBusWidth | Global memory bus width in bits. |
hipDeviceAttributeMemoryClockRate | Peak memory clock frequency in kilohertz. |
hipDeviceAttributeComputeCapabilityMinor | Minor compute capability version number. |
hipDeviceAttributeMultiGpuBoardGroupID | Cuda only. Unique ID of device group on the same multi-GPU board. |
hipDeviceAttributeMultiprocessorCount | Number of multiprocessors on the device. |
hipDeviceAttributeName | Device name. |
hipDeviceAttributePageableMemoryAccess | Device supports coherently accessing pageable memory without calling hipHostRegister on it |
hipDeviceAttributePageableMemoryAccessUsesHostPageTables | Device accesses pageable memory via the host's page tables. |
hipDeviceAttributePciBusId | PCI Bus ID. |
hipDeviceAttributePciDeviceId | PCI Device ID. |
hipDeviceAttributePciDomainID | PCI Domain ID. |
hipDeviceAttributePersistingL2CacheMaxSize | Cuda11 only. Maximum l2 persisting lines capacity in bytes. |
hipDeviceAttributeMaxRegistersPerBlock | 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. |
hipDeviceAttributeMaxRegistersPerMultiprocessor | 32-bit registers available per block. |
hipDeviceAttributeReservedSharedMemPerBlock | Cuda11 only. Shared memory reserved by CUDA driver per block. |
hipDeviceAttributeMaxSharedMemoryPerBlock | Maximum shared memory available per block in bytes. |
hipDeviceAttributeSharedMemPerBlockOptin | Cuda only. Maximum shared memory per block usable by special opt in. |
hipDeviceAttributeSharedMemPerMultiprocessor | Cuda only. Shared memory available per multiprocessor. |
hipDeviceAttributeSingleToDoublePrecisionPerfRatio | Cuda only. Performance ratio of single precision to double precision. |
hipDeviceAttributeStreamPrioritiesSupported | Cuda only. Whether to support stream priorities. |
hipDeviceAttributeSurfaceAlignment | Cuda only. Alignment requirement for surfaces. |
hipDeviceAttributeTccDriver | Cuda only. Whether device is a Tesla device using TCC driver. |
hipDeviceAttributeTextureAlignment | Alignment requirement for textures. |
hipDeviceAttributeTexturePitchAlignment | Pitch alignment requirement for 2D texture references bound to pitched memory;. |
hipDeviceAttributeTotalConstantMemory | Constant memory size in bytes. |
hipDeviceAttributeTotalGlobalMem | Global memory available on devicice. |
hipDeviceAttributeUnifiedAddressing | Cuda only. An unified address space shared with the host. |
hipDeviceAttributeUuid | Cuda only. Unique ID in 16 byte. |
hipDeviceAttributeWarpSize | Warp size in threads. |
hipDeviceAttributeClockInstructionRate | Frequency in khz of the timer used by the device-side "clock*". |
hipDeviceAttributeArch | Device architecture. |
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor | Maximum Shared Memory PerMultiprocessor. |
hipDeviceAttributeGcnArch | Device gcn architecture. |
hipDeviceAttributeGcnArchName | Device gcnArch name in 256 bytes. |
hipDeviceAttributeHdpMemFlushCntl | Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register. |
hipDeviceAttributeHdpRegFlushCntl | Address of the HDP_REG_COHERENCY_FLUSH_CNTL register. |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc | Supports cooperative launch on multiple devices with unmatched functions |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim | Supports cooperative launch on multiple devices with unmatched grid dimensions |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim | Supports cooperative launch on multiple devices with unmatched block dimensions |
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem | Supports cooperative launch on multiple devices with unmatched shared memories |
hipDeviceAttributeIsLargeBar | Whether it is LargeBar. |
hipDeviceAttributeAsicRevision | Revision of the GPU in this device. |
hipDeviceAttributeCanUseStreamWaitValue | '1' if Device supports hipStreamWaitValue32() and hipStreamWaitValue64(), '0' otherwise. |
hipDeviceAttributeImageSupport | '1' if Device supports image, '0' otherwise. |
hipDeviceAttributePhysicalMultiProcessorCount | All available physical compute units for the device |
enum hipFuncAttribute |
enum hipFuncCache_t |
enum hipGLDeviceList |
enum hipMemoryAdvise |
enum hipMemRangeAttribute |
Enumerator | |
---|---|
hipMemRangeAttributeReadMostly | Whether the range will mostly be read and only occassionally be written to |
hipMemRangeAttributePreferredLocation | The preferred location of the range. |
hipMemRangeAttributeAccessedBy | Memory range has hipMemAdviseSetAccessedBy set for the specified device |
hipMemRangeAttributeLastPrefetchLocation | prefetched The last location to where the range was |
hipMemRangeAttributeCoherencyMode | Returns coherency mode hipMemRangeCoherencyMode for the range |
enum hipSharedMemConfig |