25 #include "concepts.hpp"
26 #include "helpers.hpp"
27 #include "program_state.hpp"
28 #include "hip_runtime_api.h"
34 #include <type_traits>
37 hipError_t ihipExtLaunchMultiKernelMultiDevice(
hipLaunchParams* launchParamsList,
int numDevices,
40 hipError_t hipLaunchCooperativeKernel(
const void* f,
dim3 gridDim,
41 dim3 blockDim,
void** args,
45 hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
50 #pragma GCC visibility push(hidden)
53 template <
typename T,
typename std::enable_if<std::is_
integral<T>{}>::type* =
nullptr>
54 inline T round_up_to_next_multiple_nonnegative(T x, T y) {
62 typename std::enable_if<n ==
sizeof...(Ts)>::type* =
nullptr>
64 const std::tuple<Ts...>&,
65 const kernargs_size_align&,
73 typename std::enable_if<n !=
sizeof...(Ts)>::type* =
nullptr>
75 const std::tuple<Ts...>& formals,
76 const kernargs_size_align& size_align,
78 using T =
typename std::tuple_element<n, std::tuple<Ts...>>::type;
81 !std::is_reference<T>{},
82 "A __global__ function cannot have a reference as one of its "
84 #if defined(HIP_STRICT)
86 std::is_trivially_copyable<T>{},
87 "Only TriviallyCopyable types can be arguments to a __global__ "
91 kernarg.resize(round_up_to_next_multiple_nonnegative(
92 kernarg.size(), size_align.alignment(n)) + size_align.size(n));
95 kernarg.data() + kernarg.size() - size_align.size(n),
96 &std::get<n>(formals),
98 return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
101 template <
typename... Formals,
typename... Actuals>
103 void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
104 static_assert(
sizeof...(Formals) ==
sizeof...(Actuals),
105 "The count of formal arguments must match the count of actuals.");
107 if (
sizeof...(Formals) == 0)
return {};
109 std::tuple<Formals...> to_formals{std::move(actuals)};
111 kernarg.reserve(
sizeof(to_formals));
113 auto& ps = hip_impl::get_program_state();
114 return make_kernarg<0>(to_formals,
115 ps.get_kernargs_size_align(
116 reinterpret_cast<std::uintptr_t
>(kernel)),
121 HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(
hipStream_t stream);
124 __attribute__((visibility(
"hidden")))
125 void hipLaunchKernelGGLImpl(
126 std::uintptr_t function_address,
127 const
dim3& numBlocks,
128 const
dim3& dimBlocks,
129 std::uint32_t sharedMemBytes,
133 const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address,
134 target_agent(stream));
137 dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
138 stream,
nullptr, kernarg);
146 T kernel,
size_t dynSharedMemPerBlk = 0,
int blockSizeLimit = 0) {
148 using namespace hip_impl;
150 hip_impl::hip_init();
151 auto f = get_program_state().kernel_descriptor(
reinterpret_cast<std::uintptr_t
>(kernel),
155 dynSharedMemPerBlk, blockSizeLimit);
160 hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* gridSize,
int* blockSize,
161 T kernel,
size_t dynSharedMemPerBlk = 0,
int blockSizeLimit = 0,
unsigned int flags = 0 ) {
163 using namespace hip_impl;
165 hip_impl::hip_init();
167 auto f = get_program_state().kernel_descriptor(
reinterpret_cast<std::uintptr_t
>(kernel),
171 dynSharedMemPerBlk, blockSizeLimit);
174 template <
typename... Args,
typename F = void (*)(Args...)>
176 void hipLaunchKernelGGL(F kernel,
const dim3& numBlocks,
const dim3& dimBlocks,
179 hip_impl::hip_init();
180 auto kernarg = hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
181 std::size_t kernarg_size =
kernarg.size();
184 HIP_LAUNCH_PARAM_BUFFER_POINTER,
186 HIP_LAUNCH_PARAM_BUFFER_SIZE,
188 HIP_LAUNCH_PARAM_END};
190 hip_impl::hipLaunchKernelGGLImpl(
reinterpret_cast<std::uintptr_t
>(kernel),
191 numBlocks, dimBlocks, sharedMemBytes,
195 template <
typename F>
197 __attribute__((visibility(
"hidden")))
198 hipError_t hipLaunchCooperativeKernel(F f,
dim3 gridDim,
dim3 blockDim,
199 void** args,
size_t sharedMem,
201 hip_impl::hip_init();
202 auto& ps = hip_impl::get_program_state();
203 return hipLaunchCooperativeKernel(
reinterpret_cast<void*
>(f), gridDim,
204 blockDim, args, sharedMem, stream, ps);
208 __attribute__((visibility(
"hidden")))
209 hipError_t hipLaunchCooperativeKernelMultiDevice(
hipLaunchParams* launchParamsList,
211 unsigned int flags) {
213 hip_impl::hip_init();
214 auto& ps = hip_impl::get_program_state();
215 return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps);
218 #pragma GCC visibility pop