25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
31 #if !defined(__HIPCC_RTC__)
36 #if !__HIP_DEVICE_COMPILE__
42 #endif // !defined(__HIPCC_RTC__)
44 #if _LIBCPP_VERSION && __HIP__
47 struct __numeric_type<_Float16>
49 static _Float16 __test(_Float16);
51 typedef _Float16 type;
52 static const bool value =
true;
55 #endif // _LIBCPP_VERSION
57 #pragma push_macro("__DEVICE__")
58 #pragma push_macro("__RETURN_TYPE")
60 #define __DEVICE__ static __device__
61 #define __RETURN_TYPE bool
63 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
66 uint64_t __make_mantissa_base8(
const char* tagp)
72 if (tmp >=
'0' && tmp <=
'7') r = (r * 8u) + tmp -
'0';
83 uint64_t __make_mantissa_base10(
const char* tagp)
89 if (tmp >=
'0' && tmp <=
'9') r = (r * 10u) + tmp -
'0';
100 uint64_t __make_mantissa_base16(
const char* tagp)
106 if (tmp >=
'0' && tmp <=
'9') r = (r * 16u) + tmp -
'0';
107 else if (tmp >=
'a' && tmp <=
'f') r = (r * 16u) + tmp -
'a' + 10;
108 else if (tmp >=
'A' && tmp <=
'F') r = (r * 16u) + tmp -
'A' + 10;
119 uint64_t __make_mantissa(
const char* tagp)
121 if (!tagp)
return 0u;
126 if (*tagp ==
'x' || *tagp ==
'X')
return __make_mantissa_base16(tagp);
127 else return __make_mantissa_base8(tagp);
130 return __make_mantissa_base10(tagp);
132 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
135 #if __HIP_CLANG_ONLY__
138 int amd_mixed_dot(
short2 a,
short2 b,
int c,
bool saturate) {
139 return __ockl_sdot2(a.data, b.data, c, saturate);
144 return __ockl_udot2(a.data, b.data, c, saturate);
148 int amd_mixed_dot(
char4 a,
char4 b,
int c,
bool saturate) {
149 return __ockl_sdot4(a.data, b.data, c, saturate);
153 uint amd_mixed_dot(
uchar4 a,
uchar4 b, uint c,
bool saturate) {
154 return __ockl_udot4(a.data, b.data, c, saturate);
158 int amd_mixed_dot(
int a,
int b,
int c,
bool saturate) {
159 return __ockl_sdot8(a, b, c, saturate);
163 uint amd_mixed_dot(uint a, uint b, uint c,
bool saturate) {
164 return __ockl_udot8(a, b, c, saturate);
168 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
172 float abs(
float x) {
return __ocml_fabs_f32(x); }
175 float acosf(
float x) {
return __ocml_acos_f32(x); }
178 float acoshf(
float x) {
return __ocml_acosh_f32(x); }
181 float asinf(
float x) {
return __ocml_asin_f32(x); }
184 float asinhf(
float x) {
return __ocml_asinh_f32(x); }
187 float atan2f(
float x,
float y) {
return __ocml_atan2_f32(x, y); }
190 float atanf(
float x) {
return __ocml_atan_f32(x); }
193 float atanhf(
float x) {
return __ocml_atanh_f32(x); }
196 float cbrtf(
float x) {
return __ocml_cbrt_f32(x); }
199 float ceilf(
float x) {
return __ocml_ceil_f32(x); }
202 float copysignf(
float x,
float y) {
return __ocml_copysign_f32(x, y); }
205 float cosf(
float x) {
return __ocml_cos_f32(x); }
208 float coshf(
float x) {
return __ocml_cosh_f32(x); }
211 float cospif(
float x) {
return __ocml_cospi_f32(x); }
214 float cyl_bessel_i0f(
float x) {
return __ocml_i0_f32(x); }
217 float cyl_bessel_i1f(
float x) {
return __ocml_i1_f32(x); }
220 float erfcf(
float x) {
return __ocml_erfc_f32(x); }
223 float erfcinvf(
float x) {
return __ocml_erfcinv_f32(x); }
226 float erfcxf(
float x) {
return __ocml_erfcx_f32(x); }
229 float erff(
float x) {
return __ocml_erf_f32(x); }
232 float erfinvf(
float x) {
return __ocml_erfinv_f32(x); }
235 float exp10f(
float x) {
return __ocml_exp10_f32(x); }
238 float exp2f(
float x) {
return __ocml_exp2_f32(x); }
241 float expf(
float x) {
return __ocml_exp_f32(x); }
244 float expm1f(
float x) {
return __ocml_expm1_f32(x); }
247 float fabsf(
float x) {
return __ocml_fabs_f32(x); }
250 float fdimf(
float x,
float y) {
return __ocml_fdim_f32(x, y); }
253 float fdividef(
float x,
float y) {
return x / y; }
256 float floorf(
float x) {
return __ocml_floor_f32(x); }
259 float fmaf(
float x,
float y,
float z) {
return __ocml_fma_f32(x, y, z); }
262 float fmaxf(
float x,
float y) {
return __ocml_fmax_f32(x, y); }
265 float fminf(
float x,
float y) {
return __ocml_fmin_f32(x, y); }
268 float fmodf(
float x,
float y) {
return __ocml_fmod_f32(x, y); }
271 float frexpf(
float x,
int* nptr)
275 __ocml_frexp_f32(x, (__attribute__((address_space(5)))
int*) &tmp);
282 float hypotf(
float x,
float y) {
return __ocml_hypot_f32(x, y); }
285 int ilogbf(
float x) {
return __ocml_ilogb_f32(x); }
288 __RETURN_TYPE isfinite(
float x) {
return __ocml_isfinite_f32(x); }
291 __RETURN_TYPE isinf(
float x) {
return __ocml_isinf_f32(x); }
294 __RETURN_TYPE isnan(
float x) {
return __ocml_isnan_f32(x); }
297 float j0f(
float x) {
return __ocml_j0_f32(x); }
300 float j1f(
float x) {
return __ocml_j1_f32(x); }
303 float jnf(
int n,
float x)
307 if (n == 0)
return j0f(x);
308 if (n == 1)
return j1f(x);
312 for (
int i = 1; i < n; ++i) {
313 float x2 = (2 * i) / x * x1 - x0;
322 float ldexpf(
float x,
int e) {
return __ocml_ldexp_f32(x, e); }
325 float lgammaf(
float x) {
return __ocml_lgamma_f32(x); }
328 long long int llrintf(
float x) {
return __ocml_rint_f32(x); }
331 long long int llroundf(
float x) {
return __ocml_round_f32(x); }
334 float log10f(
float x) {
return __ocml_log10_f32(x); }
337 float log1pf(
float x) {
return __ocml_log1p_f32(x); }
340 float log2f(
float x) {
return __ocml_log2_f32(x); }
343 float logbf(
float x) {
return __ocml_logb_f32(x); }
346 float logf(
float x) {
return __ocml_log_f32(x); }
349 long int lrintf(
float x) {
return __ocml_rint_f32(x); }
352 long int lroundf(
float x) {
return __ocml_round_f32(x); }
355 float modff(
float x,
float* iptr)
359 __ocml_modf_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
366 float nanf(
const char* tagp)
371 uint32_t mantissa : 22;
373 uint32_t exponent : 8;
377 static_assert(
sizeof(
float) ==
sizeof(ieee_float),
"");
381 tmp.bits.exponent = ~0u;
383 tmp.bits.mantissa = __make_mantissa(tagp);
389 float nearbyintf(
float x) {
return __ocml_nearbyint_f32(x); }
392 float nextafterf(
float x,
float y) {
return __ocml_nextafter_f32(x, y); }
395 float norm3df(
float x,
float y,
float z) {
return __ocml_len3_f32(x, y, z); }
398 float norm4df(
float x,
float y,
float z,
float w)
400 return __ocml_len4_f32(x, y, z, w);
404 float normcdff(
float x) {
return __ocml_ncdf_f32(x); }
407 float normcdfinvf(
float x) {
return __ocml_ncdfinv_f32(x); }
410 float normf(
int dim,
const float* a)
413 while (dim--) { r += a[0] * a[0]; ++a; }
415 return __ocml_sqrt_f32(r);
419 float powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
422 float powif(
float base,
int iexp) {
return __ocml_pown_f32(base, iexp); }
425 float rcbrtf(
float x) {
return __ocml_rcbrt_f32(x); }
428 float remainderf(
float x,
float y) {
return __ocml_remainder_f32(x, y); }
431 float remquof(
float x,
float y,
int* quo)
435 __ocml_remquo_f32(x, y, (__attribute__((address_space(5)))
int*) &tmp);
442 float rhypotf(
float x,
float y) {
return __ocml_rhypot_f32(x, y); }
445 float rintf(
float x) {
return __ocml_rint_f32(x); }
448 float rnorm3df(
float x,
float y,
float z)
450 return __ocml_rlen3_f32(x, y, z);
455 float rnorm4df(
float x,
float y,
float z,
float w)
457 return __ocml_rlen4_f32(x, y, z, w);
461 float rnormf(
int dim,
const float* a)
464 while (dim--) { r += a[0] * a[0]; ++a; }
466 return __ocml_rsqrt_f32(r);
470 float roundf(
float x) {
return __ocml_round_f32(x); }
473 float rsqrtf(
float x) {
return __ocml_rsqrt_f32(x); }
476 float scalblnf(
float x,
long int n)
478 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
482 float scalbnf(
float x,
int n) {
return __ocml_scalbn_f32(x, n); }
485 __RETURN_TYPE signbit(
float x) {
return __ocml_signbit_f32(x); }
488 void sincosf(
float x,
float* sptr,
float* cptr)
493 __ocml_sincos_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
498 void sincospif(
float x,
float* sptr,
float* cptr)
503 __ocml_sincospi_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
508 float sinf(
float x) {
return __ocml_sin_f32(x); }
511 float sinhf(
float x) {
return __ocml_sinh_f32(x); }
514 float sinpif(
float x) {
return __ocml_sinpi_f32(x); }
517 float sqrtf(
float x) {
return __ocml_sqrt_f32(x); }
520 float tanf(
float x) {
return __ocml_tan_f32(x); }
523 float tanhf(
float x) {
return __ocml_tanh_f32(x); }
526 float tgammaf(
float x) {
return __ocml_tgamma_f32(x); }
529 float truncf(
float x) {
return __ocml_trunc_f32(x); }
532 float y0f(
float x) {
return __ocml_y0_f32(x); }
535 float y1f(
float x) {
return __ocml_y1_f32(x); }
538 float ynf(
int n,
float x)
543 if (n == 0)
return y0f(x);
544 if (n == 1)
return y1f(x);
548 for (
int i = 1; i < n; ++i) {
549 float x2 = (2 * i) / x * x1 - x0;
560 float __cosf(
float x) {
return __ocml_native_cos_f32(x); }
563 float __exp10f(
float x) {
return __ocml_native_exp10_f32(x); }
566 float __expf(
float x) {
return __ocml_native_exp_f32(x); }
567 #if defined OCML_BASIC_ROUNDED_OPERATIONS
570 float __fadd_rd(
float x,
float y) {
return __ocml_add_rtn_f32(x, y); }
574 float __fadd_rn(
float x,
float y) {
return x + y; }
575 #if defined OCML_BASIC_ROUNDED_OPERATIONS
578 float __fadd_ru(
float x,
float y) {
return __ocml_add_rtp_f32(x, y); }
581 float __fadd_rz(
float x,
float y) {
return __ocml_add_rtz_f32(x, y); }
584 float __fdiv_rd(
float x,
float y) {
return __ocml_div_rtn_f32(x, y); }
588 float __fdiv_rn(
float x,
float y) {
return x / y; }
589 #if defined OCML_BASIC_ROUNDED_OPERATIONS
592 float __fdiv_ru(
float x,
float y) {
return __ocml_div_rtp_f32(x, y); }
595 float __fdiv_rz(
float x,
float y) {
return __ocml_div_rtz_f32(x, y); }
599 float __fdividef(
float x,
float y) {
return x / y; }
600 #if defined OCML_BASIC_ROUNDED_OPERATIONS
603 float __fmaf_rd(
float x,
float y,
float z)
605 return __ocml_fma_rtn_f32(x, y, z);
610 float __fmaf_rn(
float x,
float y,
float z)
612 return __ocml_fma_f32(x, y, z);
614 #if defined OCML_BASIC_ROUNDED_OPERATIONS
617 float __fmaf_ru(
float x,
float y,
float z)
619 return __ocml_fma_rtp_f32(x, y, z);
623 float __fmaf_rz(
float x,
float y,
float z)
625 return __ocml_fma_rtz_f32(x, y, z);
629 float __fmul_rd(
float x,
float y) {
return __ocml_mul_rtn_f32(x, y); }
633 float __fmul_rn(
float x,
float y) {
return x * y; }
634 #if defined OCML_BASIC_ROUNDED_OPERATIONS
637 float __fmul_ru(
float x,
float y) {
return __ocml_mul_rtp_f32(x, y); }
640 float __fmul_rz(
float x,
float y) {
return __ocml_mul_rtz_f32(x, y); }
643 float __frcp_rd(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
647 float __frcp_rn(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
648 #if defined OCML_BASIC_ROUNDED_OPERATIONS
651 float __frcp_ru(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
654 float __frcp_rz(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
658 float __frsqrt_rn(
float x) {
return __llvm_amdgcn_rsq_f32(x); }
659 #if defined OCML_BASIC_ROUNDED_OPERATIONS
662 float __fsqrt_rd(
float x) {
return __ocml_sqrt_rtn_f32(x); }
666 float __fsqrt_rn(
float x) {
return __ocml_native_sqrt_f32(x); }
667 #if defined OCML_BASIC_ROUNDED_OPERATIONS
670 float __fsqrt_ru(
float x) {
return __ocml_sqrt_rtp_f32(x); }
673 float __fsqrt_rz(
float x) {
return __ocml_sqrt_rtz_f32(x); }
676 float __fsub_rd(
float x,
float y) {
return __ocml_sub_rtn_f32(x, y); }
680 float __fsub_rn(
float x,
float y) {
return x - y; }
681 #if defined OCML_BASIC_ROUNDED_OPERATIONS
684 float __fsub_ru(
float x,
float y) {
return __ocml_sub_rtp_f32(x, y); }
687 float __fsub_rz(
float x,
float y) {
return __ocml_sub_rtz_f32(x, y); }
691 float __log10f(
float x) {
return __ocml_native_log10_f32(x); }
694 float __log2f(
float x) {
return __ocml_native_log2_f32(x); }
697 float __logf(
float x) {
return __ocml_native_log_f32(x); }
700 float __powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
703 float __saturatef(
float x) {
return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
706 void __sincosf(
float x,
float* sptr,
float* cptr)
708 *sptr = __ocml_native_sin_f32(x);
709 *cptr = __ocml_native_cos_f32(x);
713 float __sinf(
float x) {
return __ocml_native_sin_f32(x); }
716 float __tanf(
float x) {
return __ocml_tan_f32(x); }
723 double abs(
double x) {
return __ocml_fabs_f64(x); }
726 double acos(
double x) {
return __ocml_acos_f64(x); }
729 double acosh(
double x) {
return __ocml_acosh_f64(x); }
732 double asin(
double x) {
return __ocml_asin_f64(x); }
735 double asinh(
double x) {
return __ocml_asinh_f64(x); }
738 double atan(
double x) {
return __ocml_atan_f64(x); }
741 double atan2(
double x,
double y) {
return __ocml_atan2_f64(x, y); }
744 double atanh(
double x) {
return __ocml_atanh_f64(x); }
747 double cbrt(
double x) {
return __ocml_cbrt_f64(x); }
750 double ceil(
double x) {
return __ocml_ceil_f64(x); }
753 double copysign(
double x,
double y) {
return __ocml_copysign_f64(x, y); }
756 double cos(
double x) {
return __ocml_cos_f64(x); }
759 double cosh(
double x) {
return __ocml_cosh_f64(x); }
762 double cospi(
double x) {
return __ocml_cospi_f64(x); }
765 double cyl_bessel_i0(
double x) {
return __ocml_i0_f64(x); }
768 double cyl_bessel_i1(
double x) {
return __ocml_i1_f64(x); }
771 double erf(
double x) {
return __ocml_erf_f64(x); }
774 double erfc(
double x) {
return __ocml_erfc_f64(x); }
777 double erfcinv(
double x) {
return __ocml_erfcinv_f64(x); }
780 double erfcx(
double x) {
return __ocml_erfcx_f64(x); }
783 double erfinv(
double x) {
return __ocml_erfinv_f64(x); }
786 double exp(
double x) {
return __ocml_exp_f64(x); }
789 double exp10(
double x) {
return __ocml_exp10_f64(x); }
792 double exp2(
double x) {
return __ocml_exp2_f64(x); }
795 double expm1(
double x) {
return __ocml_expm1_f64(x); }
798 double fabs(
double x) {
return __ocml_fabs_f64(x); }
801 double fdim(
double x,
double y) {
return __ocml_fdim_f64(x, y); }
804 double floor(
double x) {
return __ocml_floor_f64(x); }
807 double fma(
double x,
double y,
double z) {
return __ocml_fma_f64(x, y, z); }
810 double fmax(
double x,
double y) {
return __ocml_fmax_f64(x, y); }
813 double fmin(
double x,
double y) {
return __ocml_fmin_f64(x, y); }
816 double fmod(
double x,
double y) {
return __ocml_fmod_f64(x, y); }
819 double frexp(
double x,
int* nptr)
823 __ocml_frexp_f64(x, (__attribute__((address_space(5)))
int*) &tmp);
830 double hypot(
double x,
double y) {
return __ocml_hypot_f64(x, y); }
833 int ilogb(
double x) {
return __ocml_ilogb_f64(x); }
836 __RETURN_TYPE isfinite(
double x) {
return __ocml_isfinite_f64(x); }
839 __RETURN_TYPE isinf(
double x) {
return __ocml_isinf_f64(x); }
842 __RETURN_TYPE isnan(
double x) {
return __ocml_isnan_f64(x); }
845 double j0(
double x) {
return __ocml_j0_f64(x); }
848 double j1(
double x) {
return __ocml_j1_f64(x); }
851 double jn(
int n,
double x)
856 if (n == 0)
return j0f(x);
857 if (n == 1)
return j1f(x);
861 for (
int i = 1; i < n; ++i) {
862 double x2 = (2 * i) / x * x1 - x0;
871 double ldexp(
double x,
int e) {
return __ocml_ldexp_f64(x, e); }
874 double lgamma(
double x) {
return __ocml_lgamma_f64(x); }
877 long long int llrint(
double x) {
return __ocml_rint_f64(x); }
880 long long int llround(
double x) {
return __ocml_round_f64(x); }
883 double log(
double x) {
return __ocml_log_f64(x); }
886 double log10(
double x) {
return __ocml_log10_f64(x); }
889 double log1p(
double x) {
return __ocml_log1p_f64(x); }
892 double log2(
double x) {
return __ocml_log2_f64(x); }
895 double logb(
double x) {
return __ocml_logb_f64(x); }
898 long int lrint(
double x) {
return __ocml_rint_f64(x); }
901 long int lround(
double x) {
return __ocml_round_f64(x); }
904 double modf(
double x,
double* iptr)
908 __ocml_modf_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
915 double nan(
const char* tagp)
921 uint64_t mantissa : 51;
923 uint32_t exponent : 11;
926 static_assert(
sizeof(
double) ==
sizeof(ieee_double),
"");
930 tmp.bits.exponent = ~0u;
932 tmp.bits.mantissa = __make_mantissa(tagp);
936 static_assert(
sizeof(uint64_t)==
sizeof(
double));
937 uint64_t val = __make_mantissa(tagp);
939 return *
reinterpret_cast<double*
>(&val);
944 double nearbyint(
double x) {
return __ocml_nearbyint_f64(x); }
947 double nextafter(
double x,
double y) {
return __ocml_nextafter_f64(x, y); }
950 double norm(
int dim,
const double* a)
953 while (dim--) { r += a[0] * a[0]; ++a; }
955 return __ocml_sqrt_f64(r);
959 double norm3d(
double x,
double y,
double z)
961 return __ocml_len3_f64(x, y, z);
965 double norm4d(
double x,
double y,
double z,
double w)
967 return __ocml_len4_f64(x, y, z, w);
971 double normcdf(
double x) {
return __ocml_ncdf_f64(x); }
974 double normcdfinv(
double x) {
return __ocml_ncdfinv_f64(x); }
977 double pow(
double x,
double y) {
return __ocml_pow_f64(x, y); }
980 double powi(
double base,
int iexp) {
return __ocml_pown_f64(base, iexp); }
983 double rcbrt(
double x) {
return __ocml_rcbrt_f64(x); }
986 double remainder(
double x,
double y) {
return __ocml_remainder_f64(x, y); }
989 double remquo(
double x,
double y,
int* quo)
993 __ocml_remquo_f64(x, y, (__attribute__((address_space(5)))
int*) &tmp);
1000 double rhypot(
double x,
double y) {
return __ocml_rhypot_f64(x, y); }
1003 double rint(
double x) {
return __ocml_rint_f64(x); }
1006 double rnorm(
int dim,
const double* a)
1009 while (dim--) { r += a[0] * a[0]; ++a; }
1011 return __ocml_rsqrt_f64(r);
1015 double rnorm3d(
double x,
double y,
double z)
1017 return __ocml_rlen3_f64(x, y, z);
1021 double rnorm4d(
double x,
double y,
double z,
double w)
1023 return __ocml_rlen4_f64(x, y, z, w);
1027 double round(
double x) {
return __ocml_round_f64(x); }
1030 double rsqrt(
double x) {
return __ocml_rsqrt_f64(x); }
1033 double scalbln(
double x,
long int n)
1035 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1039 double scalbn(
double x,
int n) {
return __ocml_scalbn_f64(x, n); }
1042 __RETURN_TYPE signbit(
double x) {
return __ocml_signbit_f64(x); }
1045 double sin(
double x) {
return __ocml_sin_f64(x); }
1048 void sincos(
double x,
double* sptr,
double* cptr)
1052 __ocml_sincos_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
1057 void sincospi(
double x,
double* sptr,
double* cptr)
1060 *sptr = __ocml_sincospi_f64(
1061 x, (__attribute__((address_space(5)))
double*) &tmp);
1066 double sinh(
double x) {
return __ocml_sinh_f64(x); }
1069 double sinpi(
double x) {
return __ocml_sinpi_f64(x); }
1072 double sqrt(
double x) {
return __ocml_sqrt_f64(x); }
1075 double tan(
double x) {
return __ocml_tan_f64(x); }
1078 double tanh(
double x) {
return __ocml_tanh_f64(x); }
1081 double tgamma(
double x) {
return __ocml_tgamma_f64(x); }
1084 double trunc(
double x) {
return __ocml_trunc_f64(x); }
1087 double y0(
double x) {
return __ocml_y0_f64(x); }
1090 double y1(
double x) {
return __ocml_y1_f64(x); }
1093 double yn(
int n,
double x)
1098 if (n == 0)
return j0f(x);
1099 if (n == 1)
return j1f(x);
1103 for (
int i = 1; i < n; ++i) {
1104 double x2 = (2 * i) / x * x1 - x0;
1113 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1116 double __dadd_rd(
double x,
double y) {
return __ocml_add_rtn_f64(x, y); }
1120 double __dadd_rn(
double x,
double y) {
return x + y; }
1121 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1124 double __dadd_ru(
double x,
double y) {
return __ocml_add_rtp_f64(x, y); }
1127 double __dadd_rz(
double x,
double y) {
return __ocml_add_rtz_f64(x, y); }
1130 double __ddiv_rd(
double x,
double y) {
return __ocml_div_rtn_f64(x, y); }
1134 double __ddiv_rn(
double x,
double y) {
return x / y; }
1135 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1138 double __ddiv_ru(
double x,
double y) {
return __ocml_div_rtp_f64(x, y); }
1141 double __ddiv_rz(
double x,
double y) {
return __ocml_div_rtz_f64(x, y); }
1144 double __dmul_rd(
double x,
double y) {
return __ocml_mul_rtn_f64(x, y); }
1148 double __dmul_rn(
double x,
double y) {
return x * y; }
1149 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1152 double __dmul_ru(
double x,
double y) {
return __ocml_mul_rtp_f64(x, y); }
1155 double __dmul_rz(
double x,
double y) {
return __ocml_mul_rtz_f64(x, y); }
1158 double __drcp_rd(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1162 double __drcp_rn(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1163 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1166 double __drcp_ru(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1169 double __drcp_rz(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1172 double __dsqrt_rd(
double x) {
return __ocml_sqrt_rtn_f64(x); }
1176 double __dsqrt_rn(
double x) {
return __ocml_sqrt_f64(x); }
1177 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1180 double __dsqrt_ru(
double x) {
return __ocml_sqrt_rtp_f64(x); }
1183 double __dsqrt_rz(
double x) {
return __ocml_sqrt_rtz_f64(x); }
1186 double __dsub_rd(
double x,
double y) {
return __ocml_sub_rtn_f64(x, y); }
1190 double __dsub_rn(
double x,
double y) {
return x - y; }
1191 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1194 double __dsub_ru(
double x,
double y) {
return __ocml_sub_rtp_f64(x, y); }
1197 double __dsub_rz(
double x,
double y) {
return __ocml_sub_rtz_f64(x, y); }
1200 double __fma_rd(
double x,
double y,
double z)
1202 return __ocml_fma_rtn_f64(x, y, z);
1207 double __fma_rn(
double x,
double y,
double z)
1209 return __ocml_fma_f64(x, y, z);
1211 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1214 double __fma_ru(
double x,
double y,
double z)
1216 return __ocml_fma_rtp_f64(x, y, z);
1220 double __fma_rz(
double x,
double y,
double z)
1222 return __ocml_fma_rtz_f64(x, y, z);
1233 int sgn = x >> (
sizeof(int) * CHAR_BIT - 1);
1234 return (x ^ sgn) - sgn;
1240 long sgn = x >> (
sizeof(long) * CHAR_BIT - 1);
1241 return (x ^ sgn) - sgn;
1245 long long llabs(
long long x)
1247 long long sgn = x >> (
sizeof(
long long) * CHAR_BIT - 1);
1248 return (x ^ sgn) - sgn;
1251 #if defined(__cplusplus)
1254 long abs(
long x) {
return labs(x); }
1257 long long abs(
long long x) {
return llabs(x); }
1262 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1263 return __ocml_fma_f16(x, y, z);
1267 inline float fma(
float x,
float y,
float z) {
1268 return fmaf(x, y, z);
1271 #pragma push_macro("__DEF_FLOAT_FUN")
1272 #pragma push_macro("__DEF_FLOAT_FUN2")
1273 #pragma push_macro("__DEF_FLOAT_FUN2I")
1274 #pragma push_macro("__HIP_OVERLOAD")
1275 #pragma push_macro("__HIP_OVERLOAD2")
1278 template<
bool __B,
class __T =
void>
1288 #define __HIP_OVERLOAD1(__retty, __fn) \
1289 template <typename __T> \
1291 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1294 return ::__fn((double)__x); \
1300 #define __HIP_OVERLOAD2(__retty, __fn) \
1301 template <typename __T1, typename __T2> \
1302 __DEVICE__ typename __hip_enable_if< \
1303 std::numeric_limits<__T1>::is_specialized && \
1304 std::numeric_limits<__T2>::is_specialized, \
1306 __fn(__T1 __x, __T2 __y) { \
1307 return __fn((double)__x, (double)__y); \
1311 #define __DEF_FUN1(retty, func) \
1314 float func(float x) \
1316 return func##f(x); \
1318 __HIP_OVERLOAD1(retty, func)
1321 #define __DEF_FUNI(retty, func) \
1324 retty func(float x) \
1326 return func##f(x); \
1328 __HIP_OVERLOAD1(retty, func)
1331 #define __DEF_FUN2(retty, func) \
1334 float func(float x, float y) \
1336 return func##f(x, y); \
1338 __HIP_OVERLOAD2(retty, func)
1340 __DEF_FUN1(
double, acos)
1341 __DEF_FUN1(
double, acosh)
1342 __DEF_FUN1(
double, asin)
1343 __DEF_FUN1(
double, asinh)
1344 __DEF_FUN1(
double, atan)
1345 __DEF_FUN2(
double, atan2);
1346 __DEF_FUN1(
double, atanh)
1347 __DEF_FUN1(
double, cbrt)
1348 __DEF_FUN1(
double, ceil)
1349 __DEF_FUN2(
double, copysign);
1350 __DEF_FUN1(
double, cos)
1351 __DEF_FUN1(
double, cosh)
1352 __DEF_FUN1(
double, erf)
1353 __DEF_FUN1(
double, erfc)
1354 __DEF_FUN1(
double, exp)
1355 __DEF_FUN1(
double, exp2)
1356 __DEF_FUN1(
double, expm1)
1357 __DEF_FUN1(
double, fabs)
1358 __DEF_FUN2(
double, fdim);
1359 __DEF_FUN1(
double, floor)
1360 __DEF_FUN2(
double, fmax);
1361 __DEF_FUN2(
double, fmin);
1362 __DEF_FUN2(
double, fmod);
1364 __DEF_FUN2(
double, hypot);
1365 __DEF_FUNI(
int, ilogb)
1366 __HIP_OVERLOAD1(
bool, isfinite)
1367 __HIP_OVERLOAD2(
bool, isgreater);
1368 __HIP_OVERLOAD2(
bool, isgreaterequal);
1369 __HIP_OVERLOAD1(
bool, isinf);
1370 __HIP_OVERLOAD2(
bool, isless);
1371 __HIP_OVERLOAD2(
bool, islessequal);
1372 __HIP_OVERLOAD2(
bool, islessgreater);
1373 __HIP_OVERLOAD1(
bool, isnan);
1375 __HIP_OVERLOAD2(
bool, isunordered);
1376 __DEF_FUN1(
double, lgamma)
1377 __DEF_FUN1(
double, log)
1378 __DEF_FUN1(
double, log10)
1379 __DEF_FUN1(
double, log1p)
1380 __DEF_FUN1(
double, log2)
1381 __DEF_FUN1(
double, logb)
1382 __DEF_FUNI(
long long, llrint)
1383 __DEF_FUNI(
long long, llround)
1384 __DEF_FUNI(
long, lrint)
1385 __DEF_FUNI(
long, lround)
1386 __DEF_FUN1(
double, nearbyint);
1387 __DEF_FUN2(
double, nextafter);
1388 __DEF_FUN2(
double, pow);
1389 __DEF_FUN2(
double, remainder);
1390 __DEF_FUN1(
double, rint);
1391 __DEF_FUN1(
double, round);
1392 __HIP_OVERLOAD1(
bool, signbit)
1393 __DEF_FUN1(
double, sin)
1394 __DEF_FUN1(
double, sinh)
1395 __DEF_FUN1(
double, sqrt)
1396 __DEF_FUN1(
double, tan)
1397 __DEF_FUN1(
double, tanh)
1398 __DEF_FUN1(
double, tgamma)
1399 __DEF_FUN1(
double, trunc);
1402 #define __DEF_FLOAT_FUN2I(func) \
1405 float func(float x, int y) \
1407 return func##f(x, y); \
1409 __DEF_FLOAT_FUN2I(scalbn)
1410 __DEF_FLOAT_FUN2I(ldexp)
1413 __DEVICE__
inline T min(T arg1, T arg2) {
1414 return (arg1 < arg2) ? arg1 : arg2;
1418 __DEVICE__
inline T max(T arg1, T arg2) {
1419 return (arg1 > arg2) ? arg1 : arg2;
1422 __DEVICE__
inline int min(
int arg1,
int arg2) {
1423 return (arg1 < arg2) ? arg1 : arg2;
1425 __DEVICE__
inline int max(
int arg1,
int arg2) {
1426 return (arg1 > arg2) ? arg1 : arg2;
1429 __DEVICE__
inline int min(uint32_t arg1,
int arg2) {
1430 return (arg1 < arg2) ? arg1 : arg2;
1432 __DEVICE__
inline int max(uint32_t arg1,
int arg2) {
1433 return (arg1 > arg2) ? arg1 : arg2;
1438 float max(
float x,
float y) {
1444 double max(
double x,
double y) {
1450 float min(
float x,
float y) {
1456 double min(
double x,
double y) {
1460 __HIP_OVERLOAD2(
double, max)
1461 __HIP_OVERLOAD2(
double, min)
1463 #if !defined(__HIPCC_RTC__)
1464 __host__ inline static int min(
int arg1,
int arg2) {
1465 return std::min(arg1, arg2);
1468 __host__ inline static int max(
int arg1,
int arg2) {
1469 return std::max(arg1, arg2);
1471 #endif // !defined(__HIPCC_RTC__)
1474 inline float pow(
float base,
int iexp) {
1475 return powif(base, iexp);
1479 inline double pow(
double base,
int iexp) {
1480 return powi(base, iexp);
1484 inline _Float16 pow(_Float16 base,
int iexp) {
1485 return __ocml_pown_f16(base, iexp);
1488 #pragma pop_macro("__DEF_FLOAT_FUN")
1489 #pragma pop_macro("__DEF_FLOAT_FUN2")
1490 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1491 #pragma pop_macro("__HIP_OVERLOAD")
1492 #pragma pop_macro("__HIP_OVERLOAD2")
1494 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
1496 #pragma pop_macro("__DEVICE__")
1497 #pragma pop_macro("__RETURN_TYPE")