25 #include "hip_fp16_math_fwd.h" 40 #include "kalmar_math.h" 43 #pragma push_macro("__DEVICE__") 44 #pragma push_macro("__RETURN_TYPE") 47 #define __DEVICE__ __device__ 48 #define __RETURN_TYPE int 49 #else // to be consistent with __clang_cuda_math_forward_declares 50 #define __DEVICE__ static __device__ 51 #define __RETURN_TYPE bool 56 uint64_t __make_mantissa_base8(
const char* tagp)
62 if (tmp >=
'0' && tmp <=
'7') r = (r * 8u) + tmp -
'0';
73 uint64_t __make_mantissa_base10(
const char* tagp)
79 if (tmp >=
'0' && tmp <=
'9') r = (r * 10u) + tmp -
'0';
90 uint64_t __make_mantissa_base16(
const char* tagp)
96 if (tmp >=
'0' && tmp <=
'9') r = (r * 16u) + tmp -
'0';
97 else if (tmp >=
'a' && tmp <=
'f') r = (r * 16u) + tmp -
'a' + 10;
98 else if (tmp >=
'A' && tmp <=
'F') r = (r * 16u) + tmp -
'A' + 10;
109 uint64_t __make_mantissa(
const char* tagp)
111 if (!tagp)
return 0u;
116 if (*tagp ==
'x' || *tagp ==
'X')
return __make_mantissa_base16(tagp);
117 else return __make_mantissa_base8(tagp);
120 return __make_mantissa_base10(tagp);
124 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ 127 int amd_mixed_dot(short2 a, short2 b,
int c,
bool saturate) {
128 return __ockl_sdot2(a.data, b.data, c, saturate);
132 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c,
bool saturate) {
133 return __ockl_udot2(a.data, b.data, c, saturate);
137 int amd_mixed_dot(char4 a, char4 b,
int c,
bool saturate) {
138 return __ockl_sdot4(a.data, b.data, c, saturate);
142 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c,
bool saturate) {
143 return __ockl_udot4(a.data, b.data, c, saturate);
147 int amd_mixed_dot(
int a,
int b,
int c,
bool saturate) {
148 return __ockl_sdot8(a, b, c, saturate);
152 uint amd_mixed_dot(uint a, uint b, uint c,
bool saturate) {
153 return __ockl_udot8(a, b, c, saturate);
160 float abs(
float x) {
return __ocml_fabs_f32(x); }
163 float acosf(
float x) {
return __ocml_acos_f32(x); }
166 float acoshf(
float x) {
return __ocml_acosh_f32(x); }
169 float asinf(
float x) {
return __ocml_asin_f32(x); }
172 float asinhf(
float x) {
return __ocml_asinh_f32(x); }
175 float atan2f(
float x,
float y) {
return __ocml_atan2_f32(x, y); }
178 float atanf(
float x) {
return __ocml_atan_f32(x); }
181 float atanhf(
float x) {
return __ocml_atanh_f32(x); }
184 float cbrtf(
float x) {
return __ocml_cbrt_f32(x); }
187 float ceilf(
float x) {
return __ocml_ceil_f32(x); }
190 float copysignf(
float x,
float y) {
return __ocml_copysign_f32(x, y); }
193 float cosf(
float x) {
return __ocml_cos_f32(x); }
196 float coshf(
float x) {
return __ocml_cosh_f32(x); }
199 float cospif(
float x) {
return __ocml_cospi_f32(x); }
202 float cyl_bessel_i0f(
float x) {
return __ocml_i0_f32(x); }
205 float cyl_bessel_i1f(
float x) {
return __ocml_i1_f32(x); }
208 float erfcf(
float x) {
return __ocml_erfc_f32(x); }
211 float erfcinvf(
float x) {
return __ocml_erfcinv_f32(x); }
214 float erfcxf(
float x) {
return __ocml_erfcx_f32(x); }
217 float erff(
float x) {
return __ocml_erf_f32(x); }
220 float erfinvf(
float x) {
return __ocml_erfinv_f32(x); }
223 float exp10f(
float x) {
return __ocml_exp10_f32(x); }
226 float exp2f(
float x) {
return __ocml_exp2_f32(x); }
229 float expf(
float x) {
return __ocml_exp_f32(x); }
232 float expm1f(
float x) {
return __ocml_expm1_f32(x); }
235 float fabsf(
float x) {
return __ocml_fabs_f32(x); }
238 float fdimf(
float x,
float y) {
return __ocml_fdim_f32(x, y); }
241 float fdividef(
float x,
float y) {
return x / y; }
244 float floorf(
float x) {
return __ocml_floor_f32(x); }
247 float fmaf(
float x,
float y,
float z) {
return __ocml_fma_f32(x, y, z); }
250 float fmaxf(
float x,
float y) {
return __ocml_fmax_f32(x, y); }
253 float fminf(
float x,
float y) {
return __ocml_fmin_f32(x, y); }
256 float fmodf(
float x,
float y) {
return __ocml_fmod_f32(x, y); }
259 float frexpf(
float x,
int* nptr)
263 __ocml_frexp_f32(x, (
__attribute__((address_space(5)))
int*) &tmp);
270 float hypotf(
float x,
float y) {
return __ocml_hypot_f32(x, y); }
273 int ilogbf(
float x) {
return __ocml_ilogb_f32(x); }
276 __RETURN_TYPE isfinite(
float x) {
return __ocml_isfinite_f32(x); }
279 __RETURN_TYPE isinf(
float x) {
return __ocml_isinf_f32(x); }
282 __RETURN_TYPE isnan(
float x) {
return __ocml_isnan_f32(x); }
285 float j0f(
float x) {
return __ocml_j0_f32(x); }
288 float j1f(
float x) {
return __ocml_j1_f32(x); }
291 float jnf(
int n,
float x)
295 if (n == 0)
return j0f(x);
296 if (n == 1)
return j1f(x);
300 for (
int i = 1; i < n; ++i) {
301 float x2 = (2 * i) / x * x1 - x0;
310 float ldexpf(
float x,
int e) {
return __ocml_ldexp_f32(x, e); }
313 float lgammaf(
float x) {
return __ocml_lgamma_f32(x); }
316 long long int llrintf(
float x) {
return __ocml_rint_f32(x); }
319 long long int llroundf(
float x) {
return __ocml_round_f32(x); }
322 float log10f(
float x) {
return __ocml_log10_f32(x); }
325 float log1pf(
float x) {
return __ocml_log1p_f32(x); }
328 float log2f(
float x) {
return __ocml_log2_f32(x); }
331 float logbf(
float x) {
return __ocml_logb_f32(x); }
334 float logf(
float x) {
return __ocml_log_f32(x); }
337 long int lrintf(
float x) {
return __ocml_rint_f32(x); }
340 long int lroundf(
float x) {
return __ocml_round_f32(x); }
343 float modff(
float x,
float* iptr)
347 __ocml_modf_f32(x, (
__attribute__((address_space(5)))
float*) &tmp);
354 float nanf(
const char* tagp)
359 uint32_t mantissa : 22;
361 uint32_t exponent : 8;
365 static_assert(
sizeof(
float) ==
sizeof(ieee_float),
"");
369 tmp.bits.exponent = ~0u;
371 tmp.bits.mantissa = __make_mantissa(tagp);
377 float nearbyintf(
float x) {
return __ocml_nearbyint_f32(x); }
380 float nextafterf(
float x,
float y) {
return __ocml_nextafter_f32(x, y); }
383 float norm3df(
float x,
float y,
float z) {
return __ocml_len3_f32(x, y, z); }
386 float norm4df(
float x,
float y,
float z,
float w)
388 return __ocml_len4_f32(x, y, z, w);
392 float normcdff(
float x) {
return __ocml_ncdf_f32(x); }
395 float normcdfinvf(
float x) {
return __ocml_ncdfinv_f32(x); }
398 float normf(
int dim,
const float* a)
401 while (dim--) { r += a[0] * a[0]; ++a; }
403 return __ocml_sqrt_f32(r);
407 float powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
410 float rcbrtf(
float x) {
return __ocml_rcbrt_f32(x); }
413 float remainderf(
float x,
float y) {
return __ocml_remainder_f32(x, y); }
416 float remquof(
float x,
float y,
int* quo)
420 __ocml_remquo_f32(x, y, (
__attribute__((address_space(5)))
int*) &tmp);
427 float rhypotf(
float x,
float y) {
return __ocml_rhypot_f32(x, y); }
430 float rintf(
float x) {
return __ocml_rint_f32(x); }
433 float rnorm3df(
float x,
float y,
float z)
435 return __ocml_rlen3_f32(x, y, z);
440 float rnorm4df(
float x,
float y,
float z,
float w)
442 return __ocml_rlen4_f32(x, y, z, w);
446 float rnormf(
int dim,
const float* a)
449 while (dim--) { r += a[0] * a[0]; ++a; }
451 return __ocml_rsqrt_f32(r);
455 float roundf(
float x) {
return __ocml_round_f32(x); }
458 float rsqrtf(
float x) {
return __ocml_rsqrt_f32(x); }
461 float scalblnf(
float x,
long int n)
463 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
467 float scalbnf(
float x,
int n) {
return __ocml_scalbn_f32(x, n); }
470 __RETURN_TYPE signbit(
float x) {
return __ocml_signbit_f32(x); }
473 void sincosf(
float x,
float* sptr,
float* cptr)
478 __ocml_sincos_f32(x, (
__attribute__((address_space(5)))
float*) &tmp);
483 void sincospif(
float x,
float* sptr,
float* cptr)
488 __ocml_sincospi_f32(x, (
__attribute__((address_space(5)))
float*) &tmp);
493 float sinf(
float x) {
return __ocml_sin_f32(x); }
496 float sinhf(
float x) {
return __ocml_sinh_f32(x); }
499 float sinpif(
float x) {
return __ocml_sinpi_f32(x); }
502 float sqrtf(
float x) {
return __ocml_sqrt_f32(x); }
505 float tanf(
float x) {
return __ocml_tan_f32(x); }
508 float tanhf(
float x) {
return __ocml_tanh_f32(x); }
511 float tgammaf(
float x) {
return __ocml_tgamma_f32(x); }
514 float truncf(
float x) {
return __ocml_trunc_f32(x); }
517 float y0f(
float x) {
return __ocml_y0_f32(x); }
520 float y1f(
float x) {
return __ocml_y1_f32(x); }
523 float ynf(
int n,
float x)
528 if (n == 0)
return y0f(x);
529 if (n == 1)
return y1f(x);
533 for (
int i = 1; i < n; ++i) {
534 float x2 = (2 * i) / x * x1 - x0;
545 float __cosf(
float x) {
return __ocml_native_cos_f32(x); }
548 float __exp10f(
float x) {
return __ocml_native_exp10_f32(x); }
551 float __expf(
float x) {
return __ocml_native_exp_f32(x); }
552 #if defined OCML_BASIC_ROUNDED_OPERATIONS 555 float __fadd_rd(
float x,
float y) {
return __ocml_add_rtn_f32(x, y); }
559 float __fadd_rn(
float x,
float y) {
return x + y; }
560 #if defined OCML_BASIC_ROUNDED_OPERATIONS 563 float __fadd_ru(
float x,
float y) {
return __ocml_add_rtp_f32(x, y); }
566 float __fadd_rz(
float x,
float y) {
return __ocml_add_rtz_f32(x, y); }
569 float __fdiv_rd(
float x,
float y) {
return __ocml_div_rtn_f32(x, y); }
573 float __fdiv_rn(
float x,
float y) {
return x / y; }
574 #if defined OCML_BASIC_ROUNDED_OPERATIONS 577 float __fdiv_ru(
float x,
float y) {
return __ocml_div_rtp_f32(x, y); }
580 float __fdiv_rz(
float x,
float y) {
return __ocml_div_rtz_f32(x, y); }
584 float __fdividef(
float x,
float y) {
return x / y; }
585 #if defined OCML_BASIC_ROUNDED_OPERATIONS 588 float __fmaf_rd(
float x,
float y,
float z)
590 return __ocml_fma_rtn_f32(x, y, z);
595 float __fmaf_rn(
float x,
float y,
float z)
597 return __ocml_fma_f32(x, y, z);
599 #if defined OCML_BASIC_ROUNDED_OPERATIONS 602 float __fmaf_ru(
float x,
float y,
float z)
604 return __ocml_fma_rtp_f32(x, y, z);
608 float __fmaf_rz(
float x,
float y,
float z)
610 return __ocml_fma_rtz_f32(x, y, z);
614 float __fmul_rd(
float x,
float y) {
return __ocml_mul_rtn_f32(x, y); }
618 float __fmul_rn(
float x,
float y) {
return x * y; }
619 #if defined OCML_BASIC_ROUNDED_OPERATIONS 622 float __fmul_ru(
float x,
float y) {
return __ocml_mul_rtp_f32(x, y); }
625 float __fmul_rz(
float x,
float y) {
return __ocml_mul_rtz_f32(x, y); }
628 float __frcp_rd(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
632 float __frcp_rn(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
633 #if defined OCML_BASIC_ROUNDED_OPERATIONS 636 float __frcp_ru(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
639 float __frcp_rz(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
643 float __frsqrt_rn(
float x) {
return __llvm_amdgcn_rsq_f32(x); }
644 #if defined OCML_BASIC_ROUNDED_OPERATIONS 647 float __fsqrt_rd(
float x) {
return __ocml_sqrt_rtn_f32(x); }
651 float __fsqrt_rn(
float x) {
return __ocml_native_sqrt_f32(x); }
652 #if defined OCML_BASIC_ROUNDED_OPERATIONS 655 float __fsqrt_ru(
float x) {
return __ocml_sqrt_rtp_f32(x); }
658 float __fsqrt_rz(
float x) {
return __ocml_sqrt_rtz_f32(x); }
661 float __fsub_rd(
float x,
float y) {
return __ocml_sub_rtn_f32(x, y); }
665 float __fsub_rn(
float x,
float y) {
return x - y; }
666 #if defined OCML_BASIC_ROUNDED_OPERATIONS 669 float __fsub_ru(
float x,
float y) {
return __ocml_sub_rtp_f32(x, y); }
672 float __fsub_rz(
float x,
float y) {
return __ocml_sub_rtz_f32(x, y); }
676 float __log10f(
float x) {
return __ocml_native_log10_f32(x); }
679 float __log2f(
float x) {
return __ocml_native_log2_f32(x); }
682 float __logf(
float x) {
return __ocml_native_log_f32(x); }
685 float __powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
688 float __saturatef(
float x) {
return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
691 void __sincosf(
float x,
float* sptr,
float* cptr)
693 *sptr = __ocml_native_sin_f32(x);
694 *cptr = __ocml_native_cos_f32(x);
698 float __sinf(
float x) {
return __ocml_native_sin_f32(x); }
701 float __tanf(
float x) {
return __ocml_tan_f32(x); }
708 double abs(
double x) {
return __ocml_fabs_f64(x); }
711 double acos(
double x) {
return __ocml_acos_f64(x); }
714 double acosh(
double x) {
return __ocml_acosh_f64(x); }
717 double asin(
double x) {
return __ocml_asin_f64(x); }
720 double asinh(
double x) {
return __ocml_asinh_f64(x); }
723 double atan(
double x) {
return __ocml_atan_f64(x); }
726 double atan2(
double x,
double y) {
return __ocml_atan2_f64(x, y); }
729 double atanh(
double x) {
return __ocml_atanh_f64(x); }
732 double cbrt(
double x) {
return __ocml_cbrt_f64(x); }
735 double ceil(
double x) {
return __ocml_ceil_f64(x); }
738 double copysign(
double x,
double y) {
return __ocml_copysign_f64(x, y); }
741 double cos(
double x) {
return __ocml_cos_f64(x); }
744 double cosh(
double x) {
return __ocml_cosh_f64(x); }
747 double cospi(
double x) {
return __ocml_cospi_f64(x); }
750 double cyl_bessel_i0(
double x) {
return __ocml_i0_f64(x); }
753 double cyl_bessel_i1(
double x) {
return __ocml_i1_f64(x); }
756 double erf(
double x) {
return __ocml_erf_f64(x); }
759 double erfc(
double x) {
return __ocml_erfc_f64(x); }
762 double erfcinv(
double x) {
return __ocml_erfcinv_f64(x); }
765 double erfcx(
double x) {
return __ocml_erfcx_f64(x); }
768 double erfinv(
double x) {
return __ocml_erfinv_f64(x); }
771 double exp(
double x) {
return __ocml_exp_f64(x); }
774 double exp10(
double x) {
return __ocml_exp10_f64(x); }
777 double exp2(
double x) {
return __ocml_exp2_f64(x); }
780 double expm1(
double x) {
return __ocml_expm1_f64(x); }
783 double fabs(
double x) {
return __ocml_fabs_f64(x); }
786 double fdim(
double x,
double y) {
return __ocml_fdim_f64(x, y); }
789 double floor(
double x) {
return __ocml_floor_f64(x); }
792 double fma(
double x,
double y,
double z) {
return __ocml_fma_f64(x, y, z); }
795 double fmax(
double x,
double y) {
return __ocml_fmax_f64(x, y); }
798 double fmin(
double x,
double y) {
return __ocml_fmin_f64(x, y); }
801 double fmod(
double x,
double y) {
return __ocml_fmod_f64(x, y); }
804 double frexp(
double x,
int* nptr)
808 __ocml_frexp_f64(x, (
__attribute__((address_space(5)))
int*) &tmp);
815 double hypot(
double x,
double y) {
return __ocml_hypot_f64(x, y); }
818 int ilogb(
double x) {
return __ocml_ilogb_f64(x); }
821 __RETURN_TYPE isfinite(
double x) {
return __ocml_isfinite_f64(x); }
824 __RETURN_TYPE isinf(
double x) {
return __ocml_isinf_f64(x); }
827 __RETURN_TYPE isnan(
double x) {
return __ocml_isnan_f64(x); }
830 double j0(
double x) {
return __ocml_j0_f64(x); }
833 double j1(
double x) {
return __ocml_j1_f64(x); }
836 double jn(
int n,
double x)
841 if (n == 0)
return j0f(x);
842 if (n == 1)
return j1f(x);
846 for (
int i = 1; i < n; ++i) {
847 double x2 = (2 * i) / x * x1 - x0;
856 double ldexp(
double x,
int e) {
return __ocml_ldexp_f64(x, e); }
859 double lgamma(
double x) {
return __ocml_lgamma_f64(x); }
862 long long int llrint(
double x) {
return __ocml_rint_f64(x); }
865 long long int llround(
double x) {
return __ocml_round_f64(x); }
868 double log(
double x) {
return __ocml_log_f64(x); }
871 double log10(
double x) {
return __ocml_log10_f64(x); }
874 double log1p(
double x) {
return __ocml_log1p_f64(x); }
877 double log2(
double x) {
return __ocml_log2_f64(x); }
880 double logb(
double x) {
return __ocml_logb_f64(x); }
883 long int lrint(
double x) {
return __ocml_rint_f64(x); }
886 long int lround(
double x) {
return __ocml_round_f64(x); }
889 double modf(
double x,
double* iptr)
893 __ocml_modf_f64(x, (
__attribute__((address_space(5)))
double*) &tmp);
900 double nan(
const char* tagp)
906 uint64_t mantissa : 51;
908 uint32_t exponent : 11;
911 static_assert(
sizeof(
double) ==
sizeof(ieee_double),
"");
915 tmp.bits.exponent = ~0u;
917 tmp.bits.mantissa = __make_mantissa(tagp);
921 static_assert(
sizeof(uint64_t)==
sizeof(
double));
922 uint64_t val = __make_mantissa(tagp);
924 return *
reinterpret_cast<double*
>(&val);
929 double nearbyint(
double x) {
return __ocml_nearbyint_f64(x); }
932 double nextafter(
double x,
double y) {
return __ocml_nextafter_f64(x, y); }
935 double norm(
int dim,
const double* a)
938 while (dim--) { r += a[0] * a[0]; ++a; }
940 return __ocml_sqrt_f64(r);
944 double norm3d(
double x,
double y,
double z)
946 return __ocml_len3_f64(x, y, z);
950 double norm4d(
double x,
double y,
double z,
double w)
952 return __ocml_len4_f64(x, y, z, w);
956 double normcdf(
double x) {
return __ocml_ncdf_f64(x); }
959 double normcdfinv(
double x) {
return __ocml_ncdfinv_f64(x); }
962 double pow(
double x,
double y) {
return __ocml_pow_f64(x, y); }
965 double rcbrt(
double x) {
return __ocml_rcbrt_f64(x); }
968 double remainder(
double x,
double y) {
return __ocml_remainder_f64(x, y); }
971 double remquo(
double x,
double y,
int* quo)
975 __ocml_remquo_f64(x, y, (
__attribute__((address_space(5)))
int*) &tmp);
982 double rhypot(
double x,
double y) {
return __ocml_rhypot_f64(x, y); }
985 double rint(
double x) {
return __ocml_rint_f64(x); }
988 double rnorm(
int dim,
const double* a)
991 while (dim--) { r += a[0] * a[0]; ++a; }
993 return __ocml_rsqrt_f64(r);
997 double rnorm3d(
double x,
double y,
double z)
999 return __ocml_rlen3_f64(x, y, z);
1003 double rnorm4d(
double x,
double y,
double z,
double w)
1005 return __ocml_rlen4_f64(x, y, z, w);
1009 double round(
double x) {
return __ocml_round_f64(x); }
1012 double rsqrt(
double x) {
return __ocml_rsqrt_f64(x); }
1015 double scalbln(
double x,
long int n)
1017 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1021 double scalbn(
double x,
int n) {
return __ocml_scalbn_f64(x, n); }
1024 __RETURN_TYPE signbit(
double x) {
return __ocml_signbit_f64(x); }
1027 double sin(
double x) {
return __ocml_sin_f64(x); }
1030 void sincos(
double x,
double* sptr,
double* cptr)
1034 __ocml_sincos_f64(x, (
__attribute__((address_space(5)))
double*) &tmp);
1039 void sincospi(
double x,
double* sptr,
double* cptr)
1042 *sptr = __ocml_sincospi_f64(
1048 double sinh(
double x) {
return __ocml_sinh_f64(x); }
1051 double sinpi(
double x) {
return __ocml_sinpi_f64(x); }
1054 double sqrt(
double x) {
return __ocml_sqrt_f64(x); }
1057 double tan(
double x) {
return __ocml_tan_f64(x); }
1060 double tanh(
double x) {
return __ocml_tanh_f64(x); }
1063 double tgamma(
double x) {
return __ocml_tgamma_f64(x); }
1066 double trunc(
double x) {
return __ocml_trunc_f64(x); }
1069 double y0(
double x) {
return __ocml_y0_f64(x); }
1072 double y1(
double x) {
return __ocml_y1_f64(x); }
1075 double yn(
int n,
double x)
1080 if (n == 0)
return j0f(x);
1081 if (n == 1)
return j1f(x);
1085 for (
int i = 1; i < n; ++i) {
1086 double x2 = (2 * i) / x * x1 - x0;
1095 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1098 double __dadd_rd(
double x,
double y) {
return __ocml_add_rtn_f64(x, y); }
1102 double __dadd_rn(
double x,
double y) {
return x + y; }
1103 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1106 double __dadd_ru(
double x,
double y) {
return __ocml_add_rtp_f64(x, y); }
1109 double __dadd_rz(
double x,
double y) {
return __ocml_add_rtz_f64(x, y); }
1112 double __ddiv_rd(
double x,
double y) {
return __ocml_div_rtn_f64(x, y); }
1116 double __ddiv_rn(
double x,
double y) {
return x / y; }
1117 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1120 double __ddiv_ru(
double x,
double y) {
return __ocml_div_rtp_f64(x, y); }
1123 double __ddiv_rz(
double x,
double y) {
return __ocml_div_rtz_f64(x, y); }
1126 double __dmul_rd(
double x,
double y) {
return __ocml_mul_rtn_f64(x, y); }
1130 double __dmul_rn(
double x,
double y) {
return x * y; }
1131 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1134 double __dmul_ru(
double x,
double y) {
return __ocml_mul_rtp_f64(x, y); }
1137 double __dmul_rz(
double x,
double y) {
return __ocml_mul_rtz_f64(x, y); }
1140 double __drcp_rd(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1144 double __drcp_rn(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1145 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1148 double __drcp_ru(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1151 double __drcp_rz(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1154 double __dsqrt_rd(
double x) {
return __ocml_sqrt_rtn_f64(x); }
1158 double __dsqrt_rn(
double x) {
return __ocml_sqrt_f64(x); }
1159 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1162 double __dsqrt_ru(
double x) {
return __ocml_sqrt_rtp_f64(x); }
1165 double __dsqrt_rz(
double x) {
return __ocml_sqrt_rtz_f64(x); }
1168 double __dsub_rd(
double x,
double y) {
return __ocml_sub_rtn_f64(x, y); }
1172 double __dsub_rn(
double x,
double y) {
return x - y; }
1173 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1176 double __dsub_ru(
double x,
double y) {
return __ocml_sub_rtp_f64(x, y); }
1179 double __dsub_rz(
double x,
double y) {
return __ocml_sub_rtz_f64(x, y); }
1182 double __fma_rd(
double x,
double y,
double z)
1184 return __ocml_fma_rtn_f64(x, y, z);
1189 double __fma_rn(
double x,
double y,
double z)
1191 return __ocml_fma_f64(x, y, z);
1193 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1196 double __fma_ru(
double x,
double y,
double z)
1198 return __ocml_fma_rtp_f64(x, y, z);
1202 double __fma_rz(
double x,
double y,
double z)
1204 return __ocml_fma_rtz_f64(x, y, z);
1215 int sgn = x >> (
sizeof(int) * CHAR_BIT - 1);
1216 return (x ^ sgn) - sgn;
1222 long sgn = x >> (
sizeof(long) * CHAR_BIT - 1);
1223 return (x ^ sgn) - sgn;
1227 long long llabs(
long long x)
1229 long long sgn = x >> (
sizeof(
long long) * CHAR_BIT - 1);
1230 return (x ^ sgn) - sgn;
1233 #if defined(__cplusplus) 1236 long abs(
long x) {
return labs(x); }
1239 long long abs(
long long x) {
return llabs(x); }
1244 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1245 return __ocml_fma_f16(x, y, z);
1249 inline float fma(
float x,
float y,
float z) {
1250 return fmaf(x, y, z);
1253 #pragma push_macro("__DEF_FLOAT_FUN") 1254 #pragma push_macro("__DEF_FLOAT_FUN2") 1255 #pragma push_macro("__DEF_FLOAT_FUN2I") 1256 #pragma push_macro("__HIP_OVERLOAD") 1257 #pragma push_macro("__HIP_OVERLOAD2") 1260 template<
bool __B,
class __T =
void>
1270 #define __HIP_OVERLOAD1(__retty, __fn) \ 1271 template <typename __T> \ 1273 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \ 1276 return ::__fn((double)__x); \ 1282 #define __HIP_OVERLOAD2(__retty, __fn) \ 1283 template <typename __T1, typename __T2> \ 1284 __DEVICE__ typename __hip_enable_if< \ 1285 std::numeric_limits<__T1>::is_specialized && \ 1286 std::numeric_limits<__T2>::is_specialized, \ 1288 __fn(__T1 __x, __T2 __y) { \ 1289 return __fn((double)__x, (double)__y); \ 1293 #define __DEF_FUN1(retty, func) \ 1296 float func(float x) \ 1298 return func##f(x); \ 1300 __HIP_OVERLOAD1(retty, func) 1303 #define __DEF_FUNI(retty, func) \ 1306 retty func(float x) \ 1308 return func##f(x); \ 1310 __HIP_OVERLOAD1(retty, func) 1313 #define __DEF_FUN2(retty, func) \ 1316 float func(float x, float y) \ 1318 return func##f(x, y); \ 1320 __HIP_OVERLOAD2(retty, func) 1322 __DEF_FUN1(
double, acos)
1323 __DEF_FUN1(
double, acosh)
1324 __DEF_FUN1(
double, asin)
1325 __DEF_FUN1(
double, asinh)
1326 __DEF_FUN1(
double, atan)
1327 __DEF_FUN2(
double, atan2);
1328 __DEF_FUN1(
double, atanh)
1329 __DEF_FUN1(
double, cbrt)
1330 __DEF_FUN1(
double, ceil)
1331 __DEF_FUN2(
double, copysign);
1332 __DEF_FUN1(
double, cos)
1333 __DEF_FUN1(
double, cosh)
1334 __DEF_FUN1(
double, erf)
1335 __DEF_FUN1(
double, erfc)
1336 __DEF_FUN1(
double, exp)
1337 __DEF_FUN1(
double, exp2)
1338 __DEF_FUN1(
double, expm1)
1339 __DEF_FUN1(
double, fabs)
1340 __DEF_FUN2(
double, fdim);
1341 __DEF_FUN1(
double, floor)
1342 __DEF_FUN2(
double, fmax);
1343 __DEF_FUN2(
double, fmin);
1344 __DEF_FUN2(
double, fmod);
1346 __DEF_FUN2(
double, hypot);
1347 __DEF_FUNI(
int, ilogb)
1348 __HIP_OVERLOAD1(
bool, isfinite)
1349 __HIP_OVERLOAD2(
bool, isgreater);
1350 __HIP_OVERLOAD2(
bool, isgreaterequal);
1351 __HIP_OVERLOAD1(
bool, isinf);
1352 __HIP_OVERLOAD2(
bool, isless);
1353 __HIP_OVERLOAD2(
bool, islessequal);
1354 __HIP_OVERLOAD2(
bool, islessgreater);
1355 __HIP_OVERLOAD1(
bool, isnan);
1357 __HIP_OVERLOAD2(
bool, isunordered);
1358 __DEF_FUN1(
double, lgamma)
1359 __DEF_FUN1(
double, log)
1360 __DEF_FUN1(
double, log10)
1361 __DEF_FUN1(
double, log1p)
1362 __DEF_FUN1(
double, log2)
1363 __DEF_FUN1(
double, logb)
1364 __DEF_FUNI(
long long, llrint)
1365 __DEF_FUNI(
long long, llround)
1366 __DEF_FUNI(
long, lrint)
1367 __DEF_FUNI(
long, lround)
1368 __DEF_FUN1(
double, nearbyint);
1369 __DEF_FUN2(
double, nextafter);
1370 __DEF_FUN2(
double, pow);
1371 __DEF_FUN2(
double, remainder);
1372 __DEF_FUN1(
double, rint);
1373 __DEF_FUN1(
double, round);
1374 __HIP_OVERLOAD1(
bool, signbit)
1375 __DEF_FUN1(
double, sin)
1376 __DEF_FUN1(
double, sinh)
1377 __DEF_FUN1(
double, sqrt)
1378 __DEF_FUN1(
double, tan)
1379 __DEF_FUN1(
double, tanh)
1380 __DEF_FUN1(
double, tgamma)
1381 __DEF_FUN1(
double, trunc);
1384 #define __DEF_FLOAT_FUN2I(func) \ 1387 float func(float x, int y) \ 1389 return func##f(x, y); \ 1391 __DEF_FLOAT_FUN2I(scalbn)
1395 __DEVICE__
inline static T min(T arg1, T arg2) {
1396 return (arg1 < arg2) ? arg1 : arg2;
1399 __DEVICE__
inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1400 return min(arg1, (uint32_t) arg2);
1421 __DEVICE__
inline static T max(T arg1, T arg2) {
1422 return (arg1 > arg2) ? arg1 : arg2;
1425 __DEVICE__
inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1426 return max(arg1, (uint32_t) arg2);
1428 __DEVICE__
inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1429 return max((uint32_t) arg1, arg2);
1446 __DEVICE__
inline int min(
int arg1,
int arg2) {
1447 return (arg1 < arg2) ? arg1 : arg2;
1449 __DEVICE__
inline int max(
int arg1,
int arg2) {
1450 return (arg1 > arg2) ? arg1 : arg2;
1455 float max(
float x,
float y) {
1461 double max(
double x,
double y) {
1467 float min(
float x,
float y) {
1473 double min(
double x,
double y) {
1477 __HIP_OVERLOAD2(
double, max)
1478 __HIP_OVERLOAD2(
double, min)
1482 __host__ inline static int min(
int arg1,
int arg2) {
1483 return std::min(arg1, arg2);
1486 __host__ inline static int max(
int arg1,
int arg2) {
1487 return std::max(arg1, arg2);
1491 #pragma pop_macro("__DEF_FLOAT_FUN") 1492 #pragma pop_macro("__DEF_FLOAT_FUN2") 1493 #pragma pop_macro("__DEF_FLOAT_FUN2I") 1494 #pragma pop_macro("__HIP_OVERLOAD") 1495 #pragma pop_macro("__HIP_OVERLOAD2") 1496 #pragma pop_macro("__DEVICE__") 1497 #pragma pop_macro("__RETURN_TYPE")
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Copies the memory address of symbol symbolName to devPtr.
Definition: hip_fp16_math_fwd.h:53
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Defines the different newt vector types for HIP runtime.
Definition: math_functions.h:1261