25 #include "hip_fp16_math_fwd.h" 36 #if !__HIP_DEVICE_COMPILE__ 47 #include "kalmar_math.h" 50 #if _LIBCPP_VERSION && __HIP__ 53 struct __numeric_type<_Float16>
55 static _Float16 __test(_Float16);
57 typedef _Float16 type;
58 static const bool value =
true;
61 #endif // _LIBCPP_VERSION 63 #pragma push_macro("__DEVICE__") 64 #pragma push_macro("__RETURN_TYPE") 67 #define __DEVICE__ __device__ 68 #define __RETURN_TYPE int 69 #else // to be consistent with __clang_cuda_math_forward_declares 70 #define __DEVICE__ static __device__ 71 #define __RETURN_TYPE bool 76 uint64_t __make_mantissa_base8(
const char* tagp)
82 if (tmp >=
'0' && tmp <=
'7') r = (r * 8u) + tmp -
'0';
93 uint64_t __make_mantissa_base10(
const char* tagp)
99 if (tmp >=
'0' && tmp <=
'9') r = (r * 10u) + tmp -
'0';
110 uint64_t __make_mantissa_base16(
const char* tagp)
116 if (tmp >=
'0' && tmp <=
'9') r = (r * 16u) + tmp -
'0';
117 else if (tmp >=
'a' && tmp <=
'f') r = (r * 16u) + tmp -
'a' + 10;
118 else if (tmp >=
'A' && tmp <=
'F') r = (r * 16u) + tmp -
'A' + 10;
129 uint64_t __make_mantissa(
const char* tagp)
131 if (!tagp)
return 0u;
136 if (*tagp ==
'x' || *tagp ==
'X')
return __make_mantissa_base16(tagp);
137 else return __make_mantissa_base8(tagp);
140 return __make_mantissa_base10(tagp);
144 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ 147 int amd_mixed_dot(
short2 a,
short2 b,
int c,
bool saturate) {
148 return __ockl_sdot2(a.data, b.data, c, saturate);
153 return __ockl_udot2(a.data, b.data, c, saturate);
157 int amd_mixed_dot(
char4 a,
char4 b,
int c,
bool saturate) {
158 return __ockl_sdot4(a.data, b.data, c, saturate);
162 uint amd_mixed_dot(
uchar4 a,
uchar4 b, uint c,
bool saturate) {
163 return __ockl_udot4(a.data, b.data, c, saturate);
167 int amd_mixed_dot(
int a,
int b,
int c,
bool saturate) {
168 return __ockl_sdot8(a, b, c, saturate);
172 uint amd_mixed_dot(uint a, uint b, uint c,
bool saturate) {
173 return __ockl_udot8(a, b, c, saturate);
180 float abs(
float x) {
return __ocml_fabs_f32(x); }
183 float acosf(
float x) {
return __ocml_acos_f32(x); }
186 float acoshf(
float x) {
return __ocml_acosh_f32(x); }
189 float asinf(
float x) {
return __ocml_asin_f32(x); }
192 float asinhf(
float x) {
return __ocml_asinh_f32(x); }
195 float atan2f(
float x,
float y) {
return __ocml_atan2_f32(x, y); }
198 float atanf(
float x) {
return __ocml_atan_f32(x); }
201 float atanhf(
float x) {
return __ocml_atanh_f32(x); }
204 float cbrtf(
float x) {
return __ocml_cbrt_f32(x); }
207 float ceilf(
float x) {
return __ocml_ceil_f32(x); }
210 float copysignf(
float x,
float y) {
return __ocml_copysign_f32(x, y); }
213 float cosf(
float x) {
return __ocml_cos_f32(x); }
216 float coshf(
float x) {
return __ocml_cosh_f32(x); }
219 float cospif(
float x) {
return __ocml_cospi_f32(x); }
222 float cyl_bessel_i0f(
float x) {
return __ocml_i0_f32(x); }
225 float cyl_bessel_i1f(
float x) {
return __ocml_i1_f32(x); }
228 float erfcf(
float x) {
return __ocml_erfc_f32(x); }
231 float erfcinvf(
float x) {
return __ocml_erfcinv_f32(x); }
234 float erfcxf(
float x) {
return __ocml_erfcx_f32(x); }
237 float erff(
float x) {
return __ocml_erf_f32(x); }
240 float erfinvf(
float x) {
return __ocml_erfinv_f32(x); }
243 float exp10f(
float x) {
return __ocml_exp10_f32(x); }
246 float exp2f(
float x) {
return __ocml_exp2_f32(x); }
249 float expf(
float x) {
return __ocml_exp_f32(x); }
252 float expm1f(
float x) {
return __ocml_expm1_f32(x); }
255 float fabsf(
float x) {
return __ocml_fabs_f32(x); }
258 float fdimf(
float x,
float y) {
return __ocml_fdim_f32(x, y); }
261 float fdividef(
float x,
float y) {
return x / y; }
264 float floorf(
float x) {
return __ocml_floor_f32(x); }
267 float fmaf(
float x,
float y,
float z) {
return __ocml_fma_f32(x, y, z); }
270 float fmaxf(
float x,
float y) {
return __ocml_fmax_f32(x, y); }
273 float fminf(
float x,
float y) {
return __ocml_fmin_f32(x, y); }
276 float fmodf(
float x,
float y) {
return __ocml_fmod_f32(x, y); }
279 float frexpf(
float x,
int* nptr)
283 __ocml_frexp_f32(x, (__attribute__((address_space(5)))
int*) &tmp);
290 float hypotf(
float x,
float y) {
return __ocml_hypot_f32(x, y); }
293 int ilogbf(
float x) {
return __ocml_ilogb_f32(x); }
296 __RETURN_TYPE isfinite(
float x) {
return __ocml_isfinite_f32(x); }
299 __RETURN_TYPE isinf(
float x) {
return __ocml_isinf_f32(x); }
302 __RETURN_TYPE isnan(
float x) {
return __ocml_isnan_f32(x); }
305 float j0f(
float x) {
return __ocml_j0_f32(x); }
308 float j1f(
float x) {
return __ocml_j1_f32(x); }
311 float jnf(
int n,
float x)
315 if (n == 0)
return j0f(x);
316 if (n == 1)
return j1f(x);
320 for (
int i = 1; i < n; ++i) {
321 float x2 = (2 * i) / x * x1 - x0;
330 float ldexpf(
float x,
int e) {
return __ocml_ldexp_f32(x, e); }
333 float lgammaf(
float x) {
return __ocml_lgamma_f32(x); }
336 long long int llrintf(
float x) {
return __ocml_rint_f32(x); }
339 long long int llroundf(
float x) {
return __ocml_round_f32(x); }
342 float log10f(
float x) {
return __ocml_log10_f32(x); }
345 float log1pf(
float x) {
return __ocml_log1p_f32(x); }
348 float log2f(
float x) {
return __ocml_log2_f32(x); }
351 float logbf(
float x) {
return __ocml_logb_f32(x); }
354 float logf(
float x) {
return __ocml_log_f32(x); }
357 long int lrintf(
float x) {
return __ocml_rint_f32(x); }
360 long int lroundf(
float x) {
return __ocml_round_f32(x); }
363 float modff(
float x,
float* iptr)
367 __ocml_modf_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
374 float nanf(
const char* tagp)
379 uint32_t mantissa : 22;
381 uint32_t exponent : 8;
385 static_assert(
sizeof(
float) ==
sizeof(ieee_float),
"");
389 tmp.bits.exponent = ~0u;
391 tmp.bits.mantissa = __make_mantissa(tagp);
397 float nearbyintf(
float x) {
return __ocml_nearbyint_f32(x); }
400 float nextafterf(
float x,
float y) {
return __ocml_nextafter_f32(x, y); }
403 float norm3df(
float x,
float y,
float z) {
return __ocml_len3_f32(x, y, z); }
406 float norm4df(
float x,
float y,
float z,
float w)
408 return __ocml_len4_f32(x, y, z, w);
412 float normcdff(
float x) {
return __ocml_ncdf_f32(x); }
415 float normcdfinvf(
float x) {
return __ocml_ncdfinv_f32(x); }
418 float normf(
int dim,
const float* a)
421 while (dim--) { r += a[0] * a[0]; ++a; }
423 return __ocml_sqrt_f32(r);
427 float powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
430 float rcbrtf(
float x) {
return __ocml_rcbrt_f32(x); }
433 float remainderf(
float x,
float y) {
return __ocml_remainder_f32(x, y); }
436 float remquof(
float x,
float y,
int* quo)
440 __ocml_remquo_f32(x, y, (__attribute__((address_space(5)))
int*) &tmp);
447 float rhypotf(
float x,
float y) {
return __ocml_rhypot_f32(x, y); }
450 float rintf(
float x) {
return __ocml_rint_f32(x); }
453 float rnorm3df(
float x,
float y,
float z)
455 return __ocml_rlen3_f32(x, y, z);
460 float rnorm4df(
float x,
float y,
float z,
float w)
462 return __ocml_rlen4_f32(x, y, z, w);
466 float rnormf(
int dim,
const float* a)
469 while (dim--) { r += a[0] * a[0]; ++a; }
471 return __ocml_rsqrt_f32(r);
475 float roundf(
float x) {
return __ocml_round_f32(x); }
478 float rsqrtf(
float x) {
return __ocml_rsqrt_f32(x); }
481 float scalblnf(
float x,
long int n)
483 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
487 float scalbnf(
float x,
int n) {
return __ocml_scalbn_f32(x, n); }
490 __RETURN_TYPE signbit(
float x) {
return __ocml_signbit_f32(x); }
493 void sincosf(
float x,
float* sptr,
float* cptr)
498 __ocml_sincos_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
503 void sincospif(
float x,
float* sptr,
float* cptr)
508 __ocml_sincospi_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
513 float sinf(
float x) {
return __ocml_sin_f32(x); }
516 float sinhf(
float x) {
return __ocml_sinh_f32(x); }
519 float sinpif(
float x) {
return __ocml_sinpi_f32(x); }
522 float sqrtf(
float x) {
return __ocml_sqrt_f32(x); }
525 float tanf(
float x) {
return __ocml_tan_f32(x); }
528 float tanhf(
float x) {
return __ocml_tanh_f32(x); }
531 float tgammaf(
float x) {
return __ocml_tgamma_f32(x); }
534 float truncf(
float x) {
return __ocml_trunc_f32(x); }
537 float y0f(
float x) {
return __ocml_y0_f32(x); }
540 float y1f(
float x) {
return __ocml_y1_f32(x); }
543 float ynf(
int n,
float x)
548 if (n == 0)
return y0f(x);
549 if (n == 1)
return y1f(x);
553 for (
int i = 1; i < n; ++i) {
554 float x2 = (2 * i) / x * x1 - x0;
565 float __cosf(
float x) {
return __ocml_native_cos_f32(x); }
568 float __exp10f(
float x) {
return __ocml_native_exp10_f32(x); }
571 float __expf(
float x) {
return __ocml_native_exp_f32(x); }
572 #if defined OCML_BASIC_ROUNDED_OPERATIONS 575 float __fadd_rd(
float x,
float y) {
return __ocml_add_rtn_f32(x, y); }
579 float __fadd_rn(
float x,
float y) {
return x + y; }
580 #if defined OCML_BASIC_ROUNDED_OPERATIONS 583 float __fadd_ru(
float x,
float y) {
return __ocml_add_rtp_f32(x, y); }
586 float __fadd_rz(
float x,
float y) {
return __ocml_add_rtz_f32(x, y); }
589 float __fdiv_rd(
float x,
float y) {
return __ocml_div_rtn_f32(x, y); }
593 float __fdiv_rn(
float x,
float y) {
return x / y; }
594 #if defined OCML_BASIC_ROUNDED_OPERATIONS 597 float __fdiv_ru(
float x,
float y) {
return __ocml_div_rtp_f32(x, y); }
600 float __fdiv_rz(
float x,
float y) {
return __ocml_div_rtz_f32(x, y); }
604 float __fdividef(
float x,
float y) {
return x / y; }
605 #if defined OCML_BASIC_ROUNDED_OPERATIONS 608 float __fmaf_rd(
float x,
float y,
float z)
610 return __ocml_fma_rtn_f32(x, y, z);
615 float __fmaf_rn(
float x,
float y,
float z)
617 return __ocml_fma_f32(x, y, z);
619 #if defined OCML_BASIC_ROUNDED_OPERATIONS 622 float __fmaf_ru(
float x,
float y,
float z)
624 return __ocml_fma_rtp_f32(x, y, z);
628 float __fmaf_rz(
float x,
float y,
float z)
630 return __ocml_fma_rtz_f32(x, y, z);
634 float __fmul_rd(
float x,
float y) {
return __ocml_mul_rtn_f32(x, y); }
638 float __fmul_rn(
float x,
float y) {
return x * y; }
639 #if defined OCML_BASIC_ROUNDED_OPERATIONS 642 float __fmul_ru(
float x,
float y) {
return __ocml_mul_rtp_f32(x, y); }
645 float __fmul_rz(
float x,
float y) {
return __ocml_mul_rtz_f32(x, y); }
648 float __frcp_rd(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
652 float __frcp_rn(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
653 #if defined OCML_BASIC_ROUNDED_OPERATIONS 656 float __frcp_ru(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
659 float __frcp_rz(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
663 float __frsqrt_rn(
float x) {
return __llvm_amdgcn_rsq_f32(x); }
664 #if defined OCML_BASIC_ROUNDED_OPERATIONS 667 float __fsqrt_rd(
float x) {
return __ocml_sqrt_rtn_f32(x); }
671 float __fsqrt_rn(
float x) {
return __ocml_native_sqrt_f32(x); }
672 #if defined OCML_BASIC_ROUNDED_OPERATIONS 675 float __fsqrt_ru(
float x) {
return __ocml_sqrt_rtp_f32(x); }
678 float __fsqrt_rz(
float x) {
return __ocml_sqrt_rtz_f32(x); }
681 float __fsub_rd(
float x,
float y) {
return __ocml_sub_rtn_f32(x, y); }
685 float __fsub_rn(
float x,
float y) {
return x - y; }
686 #if defined OCML_BASIC_ROUNDED_OPERATIONS 689 float __fsub_ru(
float x,
float y) {
return __ocml_sub_rtp_f32(x, y); }
692 float __fsub_rz(
float x,
float y) {
return __ocml_sub_rtz_f32(x, y); }
696 float __log10f(
float x) {
return __ocml_native_log10_f32(x); }
699 float __log2f(
float x) {
return __ocml_native_log2_f32(x); }
702 float __logf(
float x) {
return __ocml_native_log_f32(x); }
705 float __powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
708 float __saturatef(
float x) {
return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
711 void __sincosf(
float x,
float* sptr,
float* cptr)
713 *sptr = __ocml_native_sin_f32(x);
714 *cptr = __ocml_native_cos_f32(x);
718 float __sinf(
float x) {
return __ocml_native_sin_f32(x); }
721 float __tanf(
float x) {
return __ocml_tan_f32(x); }
728 double abs(
double x) {
return __ocml_fabs_f64(x); }
731 double acos(
double x) {
return __ocml_acos_f64(x); }
734 double acosh(
double x) {
return __ocml_acosh_f64(x); }
737 double asin(
double x) {
return __ocml_asin_f64(x); }
740 double asinh(
double x) {
return __ocml_asinh_f64(x); }
743 double atan(
double x) {
return __ocml_atan_f64(x); }
746 double atan2(
double x,
double y) {
return __ocml_atan2_f64(x, y); }
749 double atanh(
double x) {
return __ocml_atanh_f64(x); }
752 double cbrt(
double x) {
return __ocml_cbrt_f64(x); }
755 double ceil(
double x) {
return __ocml_ceil_f64(x); }
758 double copysign(
double x,
double y) {
return __ocml_copysign_f64(x, y); }
761 double cos(
double x) {
return __ocml_cos_f64(x); }
764 double cosh(
double x) {
return __ocml_cosh_f64(x); }
767 double cospi(
double x) {
return __ocml_cospi_f64(x); }
770 double cyl_bessel_i0(
double x) {
return __ocml_i0_f64(x); }
773 double cyl_bessel_i1(
double x) {
return __ocml_i1_f64(x); }
776 double erf(
double x) {
return __ocml_erf_f64(x); }
779 double erfc(
double x) {
return __ocml_erfc_f64(x); }
782 double erfcinv(
double x) {
return __ocml_erfcinv_f64(x); }
785 double erfcx(
double x) {
return __ocml_erfcx_f64(x); }
788 double erfinv(
double x) {
return __ocml_erfinv_f64(x); }
791 double exp(
double x) {
return __ocml_exp_f64(x); }
794 double exp10(
double x) {
return __ocml_exp10_f64(x); }
797 double exp2(
double x) {
return __ocml_exp2_f64(x); }
800 double expm1(
double x) {
return __ocml_expm1_f64(x); }
803 double fabs(
double x) {
return __ocml_fabs_f64(x); }
806 double fdim(
double x,
double y) {
return __ocml_fdim_f64(x, y); }
809 double floor(
double x) {
return __ocml_floor_f64(x); }
812 double fma(
double x,
double y,
double z) {
return __ocml_fma_f64(x, y, z); }
815 double fmax(
double x,
double y) {
return __ocml_fmax_f64(x, y); }
818 double fmin(
double x,
double y) {
return __ocml_fmin_f64(x, y); }
821 double fmod(
double x,
double y) {
return __ocml_fmod_f64(x, y); }
824 double frexp(
double x,
int* nptr)
828 __ocml_frexp_f64(x, (__attribute__((address_space(5)))
int*) &tmp);
835 double hypot(
double x,
double y) {
return __ocml_hypot_f64(x, y); }
838 int ilogb(
double x) {
return __ocml_ilogb_f64(x); }
841 __RETURN_TYPE isfinite(
double x) {
return __ocml_isfinite_f64(x); }
844 __RETURN_TYPE isinf(
double x) {
return __ocml_isinf_f64(x); }
847 __RETURN_TYPE isnan(
double x) {
return __ocml_isnan_f64(x); }
850 double j0(
double x) {
return __ocml_j0_f64(x); }
853 double j1(
double x) {
return __ocml_j1_f64(x); }
856 double jn(
int n,
double x)
861 if (n == 0)
return j0f(x);
862 if (n == 1)
return j1f(x);
866 for (
int i = 1; i < n; ++i) {
867 double x2 = (2 * i) / x * x1 - x0;
876 double ldexp(
double x,
int e) {
return __ocml_ldexp_f64(x, e); }
879 double lgamma(
double x) {
return __ocml_lgamma_f64(x); }
882 long long int llrint(
double x) {
return __ocml_rint_f64(x); }
885 long long int llround(
double x) {
return __ocml_round_f64(x); }
888 double log(
double x) {
return __ocml_log_f64(x); }
891 double log10(
double x) {
return __ocml_log10_f64(x); }
894 double log1p(
double x) {
return __ocml_log1p_f64(x); }
897 double log2(
double x) {
return __ocml_log2_f64(x); }
900 double logb(
double x) {
return __ocml_logb_f64(x); }
903 long int lrint(
double x) {
return __ocml_rint_f64(x); }
906 long int lround(
double x) {
return __ocml_round_f64(x); }
909 double modf(
double x,
double* iptr)
913 __ocml_modf_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
920 double nan(
const char* tagp)
926 uint64_t mantissa : 51;
928 uint32_t exponent : 11;
931 static_assert(
sizeof(
double) ==
sizeof(ieee_double),
"");
935 tmp.bits.exponent = ~0u;
937 tmp.bits.mantissa = __make_mantissa(tagp);
941 static_assert(
sizeof(uint64_t)==
sizeof(
double));
942 uint64_t val = __make_mantissa(tagp);
944 return *
reinterpret_cast<double*
>(&val);
949 double nearbyint(
double x) {
return __ocml_nearbyint_f64(x); }
952 double nextafter(
double x,
double y) {
return __ocml_nextafter_f64(x, y); }
955 double norm(
int dim,
const double* a)
958 while (dim--) { r += a[0] * a[0]; ++a; }
960 return __ocml_sqrt_f64(r);
964 double norm3d(
double x,
double y,
double z)
966 return __ocml_len3_f64(x, y, z);
970 double norm4d(
double x,
double y,
double z,
double w)
972 return __ocml_len4_f64(x, y, z, w);
976 double normcdf(
double x) {
return __ocml_ncdf_f64(x); }
979 double normcdfinv(
double x) {
return __ocml_ncdfinv_f64(x); }
982 double pow(
double x,
double y) {
return __ocml_pow_f64(x, y); }
985 double rcbrt(
double x) {
return __ocml_rcbrt_f64(x); }
988 double remainder(
double x,
double y) {
return __ocml_remainder_f64(x, y); }
991 double remquo(
double x,
double y,
int* quo)
995 __ocml_remquo_f64(x, y, (__attribute__((address_space(5)))
int*) &tmp);
1002 double rhypot(
double x,
double y) {
return __ocml_rhypot_f64(x, y); }
1005 double rint(
double x) {
return __ocml_rint_f64(x); }
1008 double rnorm(
int dim,
const double* a)
1011 while (dim--) { r += a[0] * a[0]; ++a; }
1013 return __ocml_rsqrt_f64(r);
1017 double rnorm3d(
double x,
double y,
double z)
1019 return __ocml_rlen3_f64(x, y, z);
1023 double rnorm4d(
double x,
double y,
double z,
double w)
1025 return __ocml_rlen4_f64(x, y, z, w);
1029 double round(
double x) {
return __ocml_round_f64(x); }
1032 double rsqrt(
double x) {
return __ocml_rsqrt_f64(x); }
1035 double scalbln(
double x,
long int n)
1037 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1041 double scalbn(
double x,
int n) {
return __ocml_scalbn_f64(x, n); }
1044 __RETURN_TYPE signbit(
double x) {
return __ocml_signbit_f64(x); }
1047 double sin(
double x) {
return __ocml_sin_f64(x); }
1050 void sincos(
double x,
double* sptr,
double* cptr)
1054 __ocml_sincos_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
1059 void sincospi(
double x,
double* sptr,
double* cptr)
1062 *sptr = __ocml_sincospi_f64(
1063 x, (__attribute__((address_space(5)))
double*) &tmp);
1068 double sinh(
double x) {
return __ocml_sinh_f64(x); }
1071 double sinpi(
double x) {
return __ocml_sinpi_f64(x); }
1074 double sqrt(
double x) {
return __ocml_sqrt_f64(x); }
1077 double tan(
double x) {
return __ocml_tan_f64(x); }
1080 double tanh(
double x) {
return __ocml_tanh_f64(x); }
1083 double tgamma(
double x) {
return __ocml_tgamma_f64(x); }
1086 double trunc(
double x) {
return __ocml_trunc_f64(x); }
1089 double y0(
double x) {
return __ocml_y0_f64(x); }
1092 double y1(
double x) {
return __ocml_y1_f64(x); }
1095 double yn(
int n,
double x)
1100 if (n == 0)
return j0f(x);
1101 if (n == 1)
return j1f(x);
1105 for (
int i = 1; i < n; ++i) {
1106 double x2 = (2 * i) / x * x1 - x0;
1115 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1118 double __dadd_rd(
double x,
double y) {
return __ocml_add_rtn_f64(x, y); }
1122 double __dadd_rn(
double x,
double y) {
return x + y; }
1123 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1126 double __dadd_ru(
double x,
double y) {
return __ocml_add_rtp_f64(x, y); }
1129 double __dadd_rz(
double x,
double y) {
return __ocml_add_rtz_f64(x, y); }
1132 double __ddiv_rd(
double x,
double y) {
return __ocml_div_rtn_f64(x, y); }
1136 double __ddiv_rn(
double x,
double y) {
return x / y; }
1137 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1140 double __ddiv_ru(
double x,
double y) {
return __ocml_div_rtp_f64(x, y); }
1143 double __ddiv_rz(
double x,
double y) {
return __ocml_div_rtz_f64(x, y); }
1146 double __dmul_rd(
double x,
double y) {
return __ocml_mul_rtn_f64(x, y); }
1150 double __dmul_rn(
double x,
double y) {
return x * y; }
1151 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1154 double __dmul_ru(
double x,
double y) {
return __ocml_mul_rtp_f64(x, y); }
1157 double __dmul_rz(
double x,
double y) {
return __ocml_mul_rtz_f64(x, y); }
1160 double __drcp_rd(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1164 double __drcp_rn(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1165 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1168 double __drcp_ru(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1171 double __drcp_rz(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1174 double __dsqrt_rd(
double x) {
return __ocml_sqrt_rtn_f64(x); }
1178 double __dsqrt_rn(
double x) {
return __ocml_sqrt_f64(x); }
1179 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1182 double __dsqrt_ru(
double x) {
return __ocml_sqrt_rtp_f64(x); }
1185 double __dsqrt_rz(
double x) {
return __ocml_sqrt_rtz_f64(x); }
1188 double __dsub_rd(
double x,
double y) {
return __ocml_sub_rtn_f64(x, y); }
1192 double __dsub_rn(
double x,
double y) {
return x - y; }
1193 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1196 double __dsub_ru(
double x,
double y) {
return __ocml_sub_rtp_f64(x, y); }
1199 double __dsub_rz(
double x,
double y) {
return __ocml_sub_rtz_f64(x, y); }
1202 double __fma_rd(
double x,
double y,
double z)
1204 return __ocml_fma_rtn_f64(x, y, z);
1209 double __fma_rn(
double x,
double y,
double z)
1211 return __ocml_fma_f64(x, y, z);
1213 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1216 double __fma_ru(
double x,
double y,
double z)
1218 return __ocml_fma_rtp_f64(x, y, z);
1222 double __fma_rz(
double x,
double y,
double z)
1224 return __ocml_fma_rtz_f64(x, y, z);
1235 int sgn = x >> (
sizeof(int) * CHAR_BIT - 1);
1236 return (x ^ sgn) - sgn;
1242 long sgn = x >> (
sizeof(long) * CHAR_BIT - 1);
1243 return (x ^ sgn) - sgn;
1247 long long llabs(
long long x)
1249 long long sgn = x >> (
sizeof(
long long) * CHAR_BIT - 1);
1250 return (x ^ sgn) - sgn;
1253 #if defined(__cplusplus) 1256 long abs(
long x) {
return labs(x); }
1259 long long abs(
long long x) {
return llabs(x); }
1264 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1265 return __ocml_fma_f16(x, y, z);
1269 inline float fma(
float x,
float y,
float z) {
1270 return fmaf(x, y, z);
1273 #pragma push_macro("__DEF_FLOAT_FUN") 1274 #pragma push_macro("__DEF_FLOAT_FUN2") 1275 #pragma push_macro("__DEF_FLOAT_FUN2I") 1276 #pragma push_macro("__HIP_OVERLOAD") 1277 #pragma push_macro("__HIP_OVERLOAD2") 1280 template<
bool __B,
class __T =
void>
1290 #define __HIP_OVERLOAD1(__retty, __fn) \ 1291 template <typename __T> \ 1293 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \ 1296 return ::__fn((double)__x); \ 1302 #define __HIP_OVERLOAD2(__retty, __fn) \ 1303 template <typename __T1, typename __T2> \ 1304 __DEVICE__ typename __hip_enable_if< \ 1305 std::numeric_limits<__T1>::is_specialized && \ 1306 std::numeric_limits<__T2>::is_specialized, \ 1308 __fn(__T1 __x, __T2 __y) { \ 1309 return __fn((double)__x, (double)__y); \ 1313 #define __DEF_FUN1(retty, func) \ 1316 float func(float x) \ 1318 return func##f(x); \ 1320 __HIP_OVERLOAD1(retty, func) 1323 #define __DEF_FUNI(retty, func) \ 1326 retty func(float x) \ 1328 return func##f(x); \ 1330 __HIP_OVERLOAD1(retty, func) 1333 #define __DEF_FUN2(retty, func) \ 1336 float func(float x, float y) \ 1338 return func##f(x, y); \ 1340 __HIP_OVERLOAD2(retty, func) 1342 __DEF_FUN1(
double, acos)
1343 __DEF_FUN1(
double, acosh)
1344 __DEF_FUN1(
double, asin)
1345 __DEF_FUN1(
double, asinh)
1346 __DEF_FUN1(
double, atan)
1347 __DEF_FUN2(
double, atan2);
1348 __DEF_FUN1(
double, atanh)
1349 __DEF_FUN1(
double, cbrt)
1350 __DEF_FUN1(
double, ceil)
1351 __DEF_FUN2(
double, copysign);
1352 __DEF_FUN1(
double, cos)
1353 __DEF_FUN1(
double, cosh)
1354 __DEF_FUN1(
double, erf)
1355 __DEF_FUN1(
double, erfc)
1356 __DEF_FUN1(
double, exp)
1357 __DEF_FUN1(
double, exp2)
1358 __DEF_FUN1(
double, expm1)
1359 __DEF_FUN1(
double, fabs)
1360 __DEF_FUN2(
double, fdim);
1361 __DEF_FUN1(
double, floor)
1362 __DEF_FUN2(
double, fmax);
1363 __DEF_FUN2(
double, fmin);
1364 __DEF_FUN2(
double, fmod);
1366 __DEF_FUN2(
double, hypot);
1367 __DEF_FUNI(
int, ilogb)
1368 __HIP_OVERLOAD1(
bool, isfinite)
1369 __HIP_OVERLOAD2(
bool, isgreater);
1370 __HIP_OVERLOAD2(
bool, isgreaterequal);
1371 __HIP_OVERLOAD1(
bool, isinf);
1372 __HIP_OVERLOAD2(
bool, isless);
1373 __HIP_OVERLOAD2(
bool, islessequal);
1374 __HIP_OVERLOAD2(
bool, islessgreater);
1375 __HIP_OVERLOAD1(
bool, isnan);
1377 __HIP_OVERLOAD2(
bool, isunordered);
1378 __DEF_FUN1(
double, lgamma)
1379 __DEF_FUN1(
double, log)
1380 __DEF_FUN1(
double, log10)
1381 __DEF_FUN1(
double, log1p)
1382 __DEF_FUN1(
double, log2)
1383 __DEF_FUN1(
double, logb)
1384 __DEF_FUNI(
long long, llrint)
1385 __DEF_FUNI(
long long, llround)
1386 __DEF_FUNI(
long, lrint)
1387 __DEF_FUNI(
long, lround)
1388 __DEF_FUN1(
double, nearbyint);
1389 __DEF_FUN2(
double, nextafter);
1390 __DEF_FUN2(
double, pow);
1391 __DEF_FUN2(
double, remainder);
1392 __DEF_FUN1(
double, rint);
1393 __DEF_FUN1(
double, round);
1394 __HIP_OVERLOAD1(
bool, signbit)
1395 __DEF_FUN1(
double, sin)
1396 __DEF_FUN1(
double, sinh)
1397 __DEF_FUN1(
double, sqrt)
1398 __DEF_FUN1(
double, tan)
1399 __DEF_FUN1(
double, tanh)
1400 __DEF_FUN1(
double, tgamma)
1401 __DEF_FUN1(
double, trunc);
1404 #define __DEF_FLOAT_FUN2I(func) \ 1407 float func(float x, int y) \ 1409 return func##f(x, y); \ 1411 __DEF_FLOAT_FUN2I(scalbn)
1414 __DEVICE__
inline static T min(T arg1, T arg2) {
1415 return (arg1 < arg2) ? arg1 : arg2;
1419 __DEVICE__
inline static T max(T arg1, T arg2) {
1420 return (arg1 > arg2) ? arg1 : arg2;
1425 __DEVICE__
inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1426 return min(arg1, (uint32_t) arg2);
1446 __DEVICE__
inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1447 return max(arg1, (uint32_t) arg2);
1449 __DEVICE__
inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1450 return max((uint32_t) arg1, arg2);
1467 __DEVICE__
inline int min(
int arg1,
int arg2) {
1468 return (arg1 < arg2) ? arg1 : arg2;
1470 __DEVICE__
inline int max(
int arg1,
int arg2) {
1471 return (arg1 > arg2) ? arg1 : arg2;
1476 float max(
float x,
float y) {
1482 double max(
double x,
double y) {
1488 float min(
float x,
float y) {
1494 double min(
double x,
double y) {
1498 __HIP_OVERLOAD2(
double, max)
1499 __HIP_OVERLOAD2(
double, min)
1503 __host__ inline static int min(
int arg1,
int arg2) {
1504 return std::min(arg1, arg2);
1507 __host__ inline static int max(
int arg1,
int arg2) {
1508 return std::max(arg1, arg2);
1512 #pragma pop_macro("__DEF_FLOAT_FUN") 1513 #pragma pop_macro("__DEF_FLOAT_FUN2") 1514 #pragma pop_macro("__DEF_FLOAT_FUN2I") 1515 #pragma pop_macro("__HIP_OVERLOAD") 1516 #pragma pop_macro("__HIP_OVERLOAD2") 1517 #pragma pop_macro("__DEVICE__") 1518 #pragma pop_macro("__RETURN_TYPE") Definition: hip_vector_types.h:1369
#define __host__
Definition: host_defines.h:41
Definition: hip_vector_types.h:1356
Contains definitions of APIs for HIP runtime.
Defines the different newt vector types for HIP runtime.
Definition: hip_vector_types.h:1376
Definition: hip_vector_types.h:1363
Definition: math_functions.h:1281