25 #include "hip_fp16_math_fwd.h" 39 #include "kalmar_math.h" 42 #pragma push_macro("__DEVICE__") 43 #pragma push_macro("__RETURN_TYPE") 46 #define __DEVICE__ __device__ 47 #define __RETURN_TYPE int 48 #else // to be consistent with __clang_cuda_math_forward_declares 49 #define __DEVICE__ static __device__ 50 #define __RETURN_TYPE bool 55 uint64_t __make_mantissa_base8(
const char* tagp)
61 if (tmp >=
'0' && tmp <=
'7') r = (r * 8u) + tmp -
'0';
72 uint64_t __make_mantissa_base10(
const char* tagp)
78 if (tmp >=
'0' && tmp <=
'9') r = (r * 10u) + tmp -
'0';
89 uint64_t __make_mantissa_base16(
const char* tagp)
95 if (tmp >=
'0' && tmp <=
'9') r = (r * 16u) + tmp -
'0';
96 else if (tmp >=
'a' && tmp <=
'f') r = (r * 16u) + tmp -
'a' + 10;
97 else if (tmp >=
'A' && tmp <=
'F') r = (r * 16u) + tmp -
'A' + 10;
108 uint64_t __make_mantissa(
const char* tagp)
110 if (!tagp)
return 0u;
115 if (*tagp ==
'x' || *tagp ==
'X')
return __make_mantissa_base16(tagp);
116 else return __make_mantissa_base8(tagp);
119 return __make_mantissa_base10(tagp);
125 float abs(
float x) {
return __ocml_fabs_f32(x); }
128 float acosf(
float x) {
return __ocml_acos_f32(x); }
131 float acoshf(
float x) {
return __ocml_acosh_f32(x); }
134 float asinf(
float x) {
return __ocml_asin_f32(x); }
137 float asinhf(
float x) {
return __ocml_asinh_f32(x); }
140 float atan2f(
float x,
float y) {
return __ocml_atan2_f32(x, y); }
143 float atanf(
float x) {
return __ocml_atan_f32(x); }
146 float atanhf(
float x) {
return __ocml_atanh_f32(x); }
149 float cbrtf(
float x) {
return __ocml_cbrt_f32(x); }
152 float ceilf(
float x) {
return __ocml_ceil_f32(x); }
155 float copysignf(
float x,
float y) {
return __ocml_copysign_f32(x, y); }
158 float cosf(
float x) {
return __ocml_cos_f32(x); }
161 float coshf(
float x) {
return __ocml_cosh_f32(x); }
164 float cospif(
float x) {
return __ocml_cospi_f32(x); }
167 float cyl_bessel_i0f(
float x) {
return __ocml_i0_f32(x); }
170 float cyl_bessel_i1f(
float x) {
return __ocml_i1_f32(x); }
173 float erfcf(
float x) {
return __ocml_erfc_f32(x); }
176 float erfcinvf(
float x) {
return __ocml_erfcinv_f32(x); }
179 float erfcxf(
float x) {
return __ocml_erfcx_f32(x); }
182 float erff(
float x) {
return __ocml_erf_f32(x); }
185 float erfinvf(
float x) {
return __ocml_erfinv_f32(x); }
188 float exp10f(
float x) {
return __ocml_exp10_f32(x); }
191 float exp2f(
float x) {
return __ocml_exp2_f32(x); }
194 float expf(
float x) {
return __ocml_exp_f32(x); }
197 float expm1f(
float x) {
return __ocml_expm1_f32(x); }
200 float fabsf(
float x) {
return __ocml_fabs_f32(x); }
203 float fdimf(
float x,
float y) {
return __ocml_fdim_f32(x, y); }
206 float fdividef(
float x,
float y) {
return x / y; }
209 float floorf(
float x) {
return __ocml_floor_f32(x); }
212 float fmaf(
float x,
float y,
float z) {
return __ocml_fma_f32(x, y, z); }
215 float fmaxf(
float x,
float y) {
return __ocml_fmax_f32(x, y); }
218 float fminf(
float x,
float y) {
return __ocml_fmin_f32(x, y); }
221 float fmodf(
float x,
float y) {
return __ocml_fmod_f32(x, y); }
224 float frexpf(
float x,
int* nptr)
228 __ocml_frexp_f32(x, (__attribute__((address_space(5)))
int*) &tmp);
235 float hypotf(
float x,
float y) {
return __ocml_hypot_f32(x, y); }
238 int ilogbf(
float x) {
return __ocml_ilogb_f32(x); }
241 __RETURN_TYPE isfinite(
float x) {
return __ocml_isfinite_f32(x); }
244 __RETURN_TYPE isinf(
float x) {
return __ocml_isinf_f32(x); }
247 __RETURN_TYPE isnan(
float x) {
return __ocml_isnan_f32(x); }
250 float j0f(
float x) {
return __ocml_j0_f32(x); }
253 float j1f(
float x) {
return __ocml_j1_f32(x); }
256 float jnf(
int n,
float x)
260 if (n == 0)
return j0f(x);
261 if (n == 1)
return j1f(x);
265 for (
int i = 1; i < n; ++i) {
266 float x2 = (2 * i) / x * x1 - x0;
275 float ldexpf(
float x,
int e) {
return __ocml_ldexp_f32(x, e); }
278 float lgammaf(
float x) {
return __ocml_lgamma_f32(x); }
281 long long int llrintf(
float x) {
return __ocml_rint_f32(x); }
284 long long int llroundf(
float x) {
return __ocml_round_f32(x); }
287 float log10f(
float x) {
return __ocml_log10_f32(x); }
290 float log1pf(
float x) {
return __ocml_log1p_f32(x); }
293 float log2f(
float x) {
return __ocml_log2_f32(x); }
296 float logbf(
float x) {
return __ocml_logb_f32(x); }
299 float logf(
float x) {
return __ocml_log_f32(x); }
302 long int lrintf(
float x) {
return __ocml_rint_f32(x); }
305 long int lroundf(
float x) {
return __ocml_round_f32(x); }
308 float modff(
float x,
float* iptr)
312 __ocml_modf_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
319 float nanf(
const char* tagp)
324 uint32_t mantissa : 22;
326 uint32_t exponent : 8;
330 static_assert(
sizeof(
float) ==
sizeof(ieee_float),
"");
334 tmp.bits.exponent = ~0u;
336 tmp.bits.mantissa = __make_mantissa(tagp);
342 float nearbyintf(
float x) {
return __ocml_nearbyint_f32(x); }
345 float nextafterf(
float x,
float y) {
return __ocml_nextafter_f32(x, y); }
348 float norm3df(
float x,
float y,
float z) {
return __ocml_len3_f32(x, y, z); }
351 float norm4df(
float x,
float y,
float z,
float w)
353 return __ocml_len4_f32(x, y, z, w);
357 float normcdff(
float x) {
return __ocml_ncdf_f32(x); }
360 float normcdfinvf(
float x) {
return __ocml_ncdfinv_f32(x); }
363 float normf(
int dim,
const float* a)
366 while (dim--) { r += a[0] * a[0]; ++a; }
368 return __ocml_sqrt_f32(r);
372 float powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
375 float rcbrtf(
float x) {
return __ocml_rcbrt_f32(x); }
378 float remainderf(
float x,
float y) {
return __ocml_remainder_f32(x, y); }
381 float remquof(
float x,
float y,
int* quo)
385 __ocml_remquo_f32(x, y, (__attribute__((address_space(5)))
int*) &tmp);
392 float rhypotf(
float x,
float y) {
return __ocml_rhypot_f32(x, y); }
395 float rintf(
float x) {
return __ocml_rint_f32(x); }
398 float rnorm3df(
float x,
float y,
float z)
400 return __ocml_rlen3_f32(x, y, z);
405 float rnorm4df(
float x,
float y,
float z,
float w)
407 return __ocml_rlen4_f32(x, y, z, w);
411 float rnormf(
int dim,
const float* a)
414 while (dim--) { r += a[0] * a[0]; ++a; }
416 return __ocml_rsqrt_f32(r);
420 float roundf(
float x) {
return __ocml_round_f32(x); }
423 float rsqrtf(
float x) {
return __ocml_rsqrt_f32(x); }
426 float scalblnf(
float x,
long int n)
428 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
432 float scalbnf(
float x,
int n) {
return __ocml_scalbn_f32(x, n); }
435 __RETURN_TYPE signbit(
float x) {
return __ocml_signbit_f32(x); }
438 void sincosf(
float x,
float* sptr,
float* cptr)
443 __ocml_sincos_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
448 void sincospif(
float x,
float* sptr,
float* cptr)
453 __ocml_sincospi_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
458 float sinf(
float x) {
return __ocml_sin_f32(x); }
461 float sinhf(
float x) {
return __ocml_sinh_f32(x); }
464 float sinpif(
float x) {
return __ocml_sinpi_f32(x); }
467 float sqrtf(
float x) {
return __ocml_sqrt_f32(x); }
470 float tanf(
float x) {
return __ocml_tan_f32(x); }
473 float tanhf(
float x) {
return __ocml_tanh_f32(x); }
476 float tgammaf(
float x) {
return __ocml_tgamma_f32(x); }
479 float truncf(
float x) {
return __ocml_trunc_f32(x); }
482 float y0f(
float x) {
return __ocml_y0_f32(x); }
485 float y1f(
float x) {
return __ocml_y1_f32(x); }
488 float ynf(
int n,
float x)
493 if (n == 0)
return y0f(x);
494 if (n == 1)
return y1f(x);
498 for (
int i = 1; i < n; ++i) {
499 float x2 = (2 * i) / x * x1 - x0;
510 float __cosf(
float x) {
return __ocml_native_cos_f32(x); }
513 float __exp10f(
float x) {
return __ocml_native_exp10_f32(x); }
516 float __expf(
float x) {
return __ocml_native_exp_f32(x); }
517 #if defined OCML_BASIC_ROUNDED_OPERATIONS 520 float __fadd_rd(
float x,
float y) {
return __ocml_add_rtn_f32(x, y); }
524 float __fadd_rn(
float x,
float y) {
return x + y; }
525 #if defined OCML_BASIC_ROUNDED_OPERATIONS 528 float __fadd_ru(
float x,
float y) {
return __ocml_add_rtp_f32(x, y); }
531 float __fadd_rz(
float x,
float y) {
return __ocml_add_rtz_f32(x, y); }
534 float __fdiv_rd(
float x,
float y) {
return __ocml_div_rtn_f32(x, y); }
538 float __fdiv_rn(
float x,
float y) {
return x / y; }
539 #if defined OCML_BASIC_ROUNDED_OPERATIONS 542 float __fdiv_ru(
float x,
float y) {
return __ocml_div_rtp_f32(x, y); }
545 float __fdiv_rz(
float x,
float y) {
return __ocml_div_rtz_f32(x, y); }
549 float __fdividef(
float x,
float y) {
return x / y; }
550 #if defined OCML_BASIC_ROUNDED_OPERATIONS 553 float __fmaf_rd(
float x,
float y,
float z)
555 return __ocml_fma_rtn_f32(x, y, z);
560 float __fmaf_rn(
float x,
float y,
float z)
562 return __ocml_fma_f32(x, y, z);
564 #if defined OCML_BASIC_ROUNDED_OPERATIONS 567 float __fmaf_ru(
float x,
float y,
float z)
569 return __ocml_fma_rtp_f32(x, y, z);
573 float __fmaf_rz(
float x,
float y,
float z)
575 return __ocml_fma_rtz_f32(x, y, z);
579 float __fmul_rd(
float x,
float y) {
return __ocml_mul_rtn_f32(x, y); }
583 float __fmul_rn(
float x,
float y) {
return x * y; }
584 #if defined OCML_BASIC_ROUNDED_OPERATIONS 587 float __fmul_ru(
float x,
float y) {
return __ocml_mul_rtp_f32(x, y); }
590 float __fmul_rz(
float x,
float y) {
return __ocml_mul_rtz_f32(x, y); }
593 float __frcp_rd(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
597 float __frcp_rn(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
598 #if defined OCML_BASIC_ROUNDED_OPERATIONS 601 float __frcp_ru(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
604 float __frcp_rz(
float x) {
return __llvm_amdgcn_rcp_f32(x); }
608 float __frsqrt_rn(
float x) {
return __llvm_amdgcn_rsq_f32(x); }
609 #if defined OCML_BASIC_ROUNDED_OPERATIONS 612 float __fsqrt_rd(
float x) {
return __ocml_sqrt_rtn_f32(x); }
616 float __fsqrt_rn(
float x) {
return __ocml_native_sqrt_f32(x); }
617 #if defined OCML_BASIC_ROUNDED_OPERATIONS 620 float __fsqrt_ru(
float x) {
return __ocml_sqrt_rtp_f32(x); }
623 float __fsqrt_rz(
float x) {
return __ocml_sqrt_rtz_f32(x); }
626 float __fsub_rd(
float x,
float y) {
return __ocml_sub_rtn_f32(x, y); }
630 float __fsub_rn(
float x,
float y) {
return x - y; }
631 #if defined OCML_BASIC_ROUNDED_OPERATIONS 634 float __fsub_ru(
float x,
float y) {
return __ocml_sub_rtp_f32(x, y); }
637 float __fsub_rz(
float x,
float y) {
return __ocml_sub_rtz_f32(x, y); }
641 float __log10f(
float x) {
return __ocml_native_log10_f32(x); }
644 float __log2f(
float x) {
return __ocml_native_log2_f32(x); }
647 float __logf(
float x) {
return __ocml_native_log_f32(x); }
650 float __powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
653 float __saturatef(
float x) {
return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
656 void __sincosf(
float x,
float* sptr,
float* cptr)
658 *sptr = __ocml_native_sin_f32(x);
659 *cptr = __ocml_native_cos_f32(x);
663 float __sinf(
float x) {
return __ocml_native_sin_f32(x); }
666 float __tanf(
float x) {
return __ocml_tan_f32(x); }
673 double abs(
double x) {
return __ocml_fabs_f64(x); }
676 double acos(
double x) {
return __ocml_acos_f64(x); }
679 double acosh(
double x) {
return __ocml_acosh_f64(x); }
682 double asin(
double x) {
return __ocml_asin_f64(x); }
685 double asinh(
double x) {
return __ocml_asinh_f64(x); }
688 double atan(
double x) {
return __ocml_atan_f64(x); }
691 double atan2(
double x,
double y) {
return __ocml_atan2_f64(x, y); }
694 double atanh(
double x) {
return __ocml_atanh_f64(x); }
697 double cbrt(
double x) {
return __ocml_cbrt_f64(x); }
700 double ceil(
double x) {
return __ocml_ceil_f64(x); }
703 double copysign(
double x,
double y) {
return __ocml_copysign_f64(x, y); }
706 double cos(
double x) {
return __ocml_cos_f64(x); }
709 double cosh(
double x) {
return __ocml_cosh_f64(x); }
712 double cospi(
double x) {
return __ocml_cospi_f64(x); }
715 double cyl_bessel_i0(
double x) {
return __ocml_i0_f64(x); }
718 double cyl_bessel_i1(
double x) {
return __ocml_i1_f64(x); }
721 double erf(
double x) {
return __ocml_erf_f64(x); }
724 double erfc(
double x) {
return __ocml_erfc_f64(x); }
727 double erfcinv(
double x) {
return __ocml_erfcinv_f64(x); }
730 double erfcx(
double x) {
return __ocml_erfcx_f64(x); }
733 double erfinv(
double x) {
return __ocml_erfinv_f64(x); }
736 double exp(
double x) {
return __ocml_exp_f64(x); }
739 double exp10(
double x) {
return __ocml_exp10_f64(x); }
742 double exp2(
double x) {
return __ocml_exp2_f64(x); }
745 double expm1(
double x) {
return __ocml_expm1_f64(x); }
748 double fabs(
double x) {
return __ocml_fabs_f64(x); }
751 double fdim(
double x,
double y) {
return __ocml_fdim_f64(x, y); }
754 double floor(
double x) {
return __ocml_floor_f64(x); }
757 double fma(
double x,
double y,
double z) {
return __ocml_fma_f64(x, y, z); }
760 double fmax(
double x,
double y) {
return __ocml_fmax_f64(x, y); }
763 double fmin(
double x,
double y) {
return __ocml_fmin_f64(x, y); }
766 double fmod(
double x,
double y) {
return __ocml_fmod_f64(x, y); }
769 double frexp(
double x,
int* nptr)
773 __ocml_frexp_f64(x, (__attribute__((address_space(5)))
int*) &tmp);
780 double hypot(
double x,
double y) {
return __ocml_hypot_f64(x, y); }
783 int ilogb(
double x) {
return __ocml_ilogb_f64(x); }
786 __RETURN_TYPE isfinite(
double x) {
return __ocml_isfinite_f64(x); }
789 __RETURN_TYPE isinf(
double x) {
return __ocml_isinf_f64(x); }
792 __RETURN_TYPE isnan(
double x) {
return __ocml_isnan_f64(x); }
795 double j0(
double x) {
return __ocml_j0_f64(x); }
798 double j1(
double x) {
return __ocml_j1_f64(x); }
801 double jn(
int n,
double x)
806 if (n == 0)
return j0f(x);
807 if (n == 1)
return j1f(x);
811 for (
int i = 1; i < n; ++i) {
812 double x2 = (2 * i) / x * x1 - x0;
821 double ldexp(
double x,
int e) {
return __ocml_ldexp_f64(x, e); }
824 double lgamma(
double x) {
return __ocml_lgamma_f64(x); }
827 long long int llrint(
double x) {
return __ocml_rint_f64(x); }
830 long long int llround(
double x) {
return __ocml_round_f64(x); }
833 double log(
double x) {
return __ocml_log_f64(x); }
836 double log10(
double x) {
return __ocml_log10_f64(x); }
839 double log1p(
double x) {
return __ocml_log1p_f64(x); }
842 double log2(
double x) {
return __ocml_log2_f64(x); }
845 double logb(
double x) {
return __ocml_logb_f64(x); }
848 long int lrint(
double x) {
return __ocml_rint_f64(x); }
851 long int lround(
double x) {
return __ocml_round_f64(x); }
854 double modf(
double x,
double* iptr)
858 __ocml_modf_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
865 double nan(
const char* tagp)
870 uint64_t mantissa : 51;
872 uint32_t exponent : 11;
876 static_assert(
sizeof(
double) ==
sizeof(ieee_double),
"");
880 tmp.bits.exponent = ~0u;
882 tmp.bits.mantissa = __make_mantissa(tagp);
888 double nearbyint(
double x) {
return __ocml_nearbyint_f64(x); }
891 double nextafter(
double x,
double y) {
return __ocml_nextafter_f64(x, y); }
894 double norm(
int dim,
const double* a)
897 while (dim--) { r += a[0] * a[0]; ++a; }
899 return __ocml_sqrt_f64(r);
903 double norm3d(
double x,
double y,
double z)
905 return __ocml_len3_f64(x, y, z);
909 double norm4d(
double x,
double y,
double z,
double w)
911 return __ocml_len4_f64(x, y, z, w);
915 double normcdf(
double x) {
return __ocml_ncdf_f64(x); }
918 double normcdfinv(
double x) {
return __ocml_ncdfinv_f64(x); }
921 double pow(
double x,
double y) {
return __ocml_pow_f64(x, y); }
924 double rcbrt(
double x) {
return __ocml_rcbrt_f64(x); }
927 double remainder(
double x,
double y) {
return __ocml_remainder_f64(x, y); }
930 double remquo(
double x,
double y,
int* quo)
934 __ocml_remquo_f64(x, y, (__attribute__((address_space(5)))
int*) &tmp);
941 double rhypot(
double x,
double y) {
return __ocml_rhypot_f64(x, y); }
944 double rint(
double x) {
return __ocml_rint_f64(x); }
947 double rnorm(
int dim,
const double* a)
950 while (dim--) { r += a[0] * a[0]; ++a; }
952 return __ocml_rsqrt_f64(r);
956 double rnorm3d(
double x,
double y,
double z)
958 return __ocml_rlen3_f64(x, y, z);
962 double rnorm4d(
double x,
double y,
double z,
double w)
964 return __ocml_rlen4_f64(x, y, z, w);
968 double round(
double x) {
return __ocml_round_f64(x); }
971 double rsqrt(
double x) {
return __ocml_rsqrt_f64(x); }
974 double scalbln(
double x,
long int n)
976 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
980 double scalbn(
double x,
int n) {
return __ocml_scalbn_f64(x, n); }
983 __RETURN_TYPE signbit(
double x) {
return __ocml_signbit_f64(x); }
986 double sin(
double x) {
return __ocml_sin_f64(x); }
989 void sincos(
double x,
double* sptr,
double* cptr)
993 __ocml_sincos_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
998 void sincospi(
double x,
double* sptr,
double* cptr)
1001 *sptr = __ocml_sincospi_f64(
1002 x, (__attribute__((address_space(5)))
double*) &tmp);
1007 double sinh(
double x) {
return __ocml_sinh_f64(x); }
1010 double sinpi(
double x) {
return __ocml_sinpi_f64(x); }
1013 double sqrt(
double x) {
return __ocml_sqrt_f64(x); }
1016 double tan(
double x) {
return __ocml_tan_f64(x); }
1019 double tanh(
double x) {
return __ocml_tanh_f64(x); }
1022 double tgamma(
double x) {
return __ocml_tgamma_f64(x); }
1025 double trunc(
double x) {
return __ocml_trunc_f64(x); }
1028 double y0(
double x) {
return __ocml_y0_f64(x); }
1031 double y1(
double x) {
return __ocml_y1_f64(x); }
1034 double yn(
int n,
double x)
1039 if (n == 0)
return j0f(x);
1040 if (n == 1)
return j1f(x);
1044 for (
int i = 1; i < n; ++i) {
1045 double x2 = (2 * i) / x * x1 - x0;
1054 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1057 double __dadd_rd(
double x,
double y) {
return __ocml_add_rtn_f64(x, y); }
1061 double __dadd_rn(
double x,
double y) {
return x + y; }
1062 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1065 double __dadd_ru(
double x,
double y) {
return __ocml_add_rtp_f64(x, y); }
1068 double __dadd_rz(
double x,
double y) {
return __ocml_add_rtz_f64(x, y); }
1071 double __ddiv_rd(
double x,
double y) {
return __ocml_div_rtn_f64(x, y); }
1075 double __ddiv_rn(
double x,
double y) {
return x / y; }
1076 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1079 double __ddiv_ru(
double x,
double y) {
return __ocml_div_rtp_f64(x, y); }
1082 double __ddiv_rz(
double x,
double y) {
return __ocml_div_rtz_f64(x, y); }
1085 double __dmul_rd(
double x,
double y) {
return __ocml_mul_rtn_f64(x, y); }
1089 double __dmul_rn(
double x,
double y) {
return x * y; }
1090 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1093 double __dmul_ru(
double x,
double y) {
return __ocml_mul_rtp_f64(x, y); }
1096 double __dmul_rz(
double x,
double y) {
return __ocml_mul_rtz_f64(x, y); }
1099 double __drcp_rd(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1103 double __drcp_rn(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1104 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1107 double __drcp_ru(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1110 double __drcp_rz(
double x) {
return __llvm_amdgcn_rcp_f64(x); }
1113 double __dsqrt_rd(
double x) {
return __ocml_sqrt_rtn_f64(x); }
1117 double __dsqrt_rn(
double x) {
return __ocml_sqrt_f64(x); }
1118 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1121 double __dsqrt_ru(
double x) {
return __ocml_sqrt_rtp_f64(x); }
1124 double __dsqrt_rz(
double x) {
return __ocml_sqrt_rtz_f64(x); }
1127 double __dsub_rd(
double x,
double y) {
return __ocml_sub_rtn_f64(x, y); }
1131 double __dsub_rn(
double x,
double y) {
return x - y; }
1132 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1135 double __dsub_ru(
double x,
double y) {
return __ocml_sub_rtp_f64(x, y); }
1138 double __dsub_rz(
double x,
double y) {
return __ocml_sub_rtz_f64(x, y); }
1141 double __fma_rd(
double x,
double y,
double z)
1143 return __ocml_fma_rtn_f64(x, y, z);
1148 double __fma_rn(
double x,
double y,
double z)
1150 return __ocml_fma_f64(x, y, z);
1152 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1155 double __fma_ru(
double x,
double y,
double z)
1157 return __ocml_fma_rtp_f64(x, y, z);
1161 double __fma_rz(
double x,
double y,
double z)
1163 return __ocml_fma_rtz_f64(x, y, z);
1174 int sgn = x >> (
sizeof(int) * CHAR_BIT - 1);
1175 return (x ^ sgn) - sgn;
1181 long sgn = x >> (
sizeof(long) * CHAR_BIT - 1);
1182 return (x ^ sgn) - sgn;
1186 long long llabs(
long long x)
1188 long long sgn = x >> (
sizeof(
long long) * CHAR_BIT - 1);
1189 return (x ^ sgn) - sgn;
1192 #if defined(__cplusplus) 1195 long abs(
long x) {
return labs(x); }
1198 long long abs(
long long x) {
return llabs(x); }
1203 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1204 return __ocml_fma_f16(x, y, z);
1208 inline float fma(
float x,
float y,
float z) {
1209 return fmaf(x, y, z);
1212 #pragma push_macro("__DEF_FLOAT_FUN") 1213 #pragma push_macro("__DEF_FLOAT_FUN2") 1214 #pragma push_macro("__DEF_FLOAT_FUN2I") 1215 #pragma push_macro("__HIP_OVERLOAD") 1216 #pragma push_macro("__HIP_OVERLOAD2") 1219 template<
bool __B,
class __T =
void>
1229 #define __HIP_OVERLOAD1(__retty, __fn) \ 1230 template <typename __T> \ 1232 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \ 1235 return ::__fn((double)__x); \ 1241 #define __HIP_OVERLOAD2(__retty, __fn) \ 1242 template <typename __T1, typename __T2> \ 1243 __DEVICE__ typename __hip_enable_if< \ 1244 std::numeric_limits<__T1>::is_specialized && \ 1245 std::numeric_limits<__T2>::is_specialized, \ 1247 __fn(__T1 __x, __T2 __y) { \ 1248 return __fn((double)__x, (double)__y); \ 1252 #define __DEF_FUN1(retty, func) \ 1255 float func(float x) \ 1257 return func##f(x); \ 1259 __HIP_OVERLOAD1(retty, func) 1262 #define __DEF_FUNI(retty, func) \ 1265 retty func(float x) \ 1267 return func##f(x); \ 1269 __HIP_OVERLOAD1(retty, func) 1272 #define __DEF_FUN2(retty, func) \ 1275 float func(float x, float y) \ 1277 return func##f(x, y); \ 1279 __HIP_OVERLOAD2(retty, func) 1281 __DEF_FUN1(
double, acos)
1282 __DEF_FUN1(
double, acosh)
1283 __DEF_FUN1(
double, asin)
1284 __DEF_FUN1(
double, asinh)
1285 __DEF_FUN1(
double, atan)
1286 __DEF_FUN2(
double, atan2);
1287 __DEF_FUN1(
double, atanh)
1288 __DEF_FUN1(
double, cbrt)
1289 __DEF_FUN1(
double, ceil)
1290 __DEF_FUN2(
double, copysign);
1291 __DEF_FUN1(
double, cos)
1292 __DEF_FUN1(
double, cosh)
1293 __DEF_FUN1(
double, erf)
1294 __DEF_FUN1(
double, erfc)
1295 __DEF_FUN1(
double, exp)
1296 __DEF_FUN1(
double, exp2)
1297 __DEF_FUN1(
double, expm1)
1298 __DEF_FUN1(
double, fabs)
1299 __DEF_FUN2(
double, fdim);
1300 __DEF_FUN1(
double, floor)
1301 __DEF_FUN2(
double, fmax);
1302 __DEF_FUN2(
double, fmin);
1303 __DEF_FUN2(
double, fmod);
1305 __DEF_FUN2(
double, hypot);
1306 __DEF_FUNI(
int, ilogb)
1307 __HIP_OVERLOAD1(
bool, isfinite)
1308 __HIP_OVERLOAD2(
bool, isgreater);
1309 __HIP_OVERLOAD2(
bool, isgreaterequal);
1310 __HIP_OVERLOAD1(
bool, isinf);
1311 __HIP_OVERLOAD2(
bool, isless);
1312 __HIP_OVERLOAD2(
bool, islessequal);
1313 __HIP_OVERLOAD2(
bool, islessgreater);
1314 __HIP_OVERLOAD1(
bool, isnan);
1316 __HIP_OVERLOAD2(
bool, isunordered);
1317 __DEF_FUN1(
double, lgamma)
1318 __DEF_FUN1(
double, log)
1319 __DEF_FUN1(
double, log10)
1320 __DEF_FUN1(
double, log1p)
1321 __DEF_FUN1(
double, log2)
1322 __DEF_FUN1(
double, logb)
1323 __DEF_FUNI(
long long, llrint)
1324 __DEF_FUNI(
long long, llround)
1325 __DEF_FUNI(
long, lrint)
1326 __DEF_FUNI(
long, lround)
1327 __DEF_FUN1(
double, nearbyint);
1328 __DEF_FUN2(
double, nextafter);
1329 __DEF_FUN2(
double, pow);
1330 __DEF_FUN2(
double, remainder);
1331 __DEF_FUN1(
double, rint);
1332 __DEF_FUN1(
double, round);
1333 __HIP_OVERLOAD1(
bool, signbit)
1334 __DEF_FUN1(
double, sin)
1335 __DEF_FUN1(
double, sinh)
1336 __DEF_FUN1(
double, sqrt)
1337 __DEF_FUN1(
double, tan)
1338 __DEF_FUN1(
double, tanh)
1339 __DEF_FUN1(
double, tgamma)
1340 __DEF_FUN1(
double, trunc);
1343 #define __DEF_FLOAT_FUN2I(func) \ 1346 float func(float x, int y) \ 1348 return func##f(x, y); \ 1350 __DEF_FLOAT_FUN2I(scalbn)
1354 __DEVICE__
inline static T min(T arg1, T arg2) {
1355 return (arg1 < arg2) ? arg1 : arg2;
1358 __DEVICE__
inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1359 return min(arg1, (uint32_t) arg2);
1380 __DEVICE__
inline static T max(T arg1, T arg2) {
1381 return (arg1 > arg2) ? arg1 : arg2;
1384 __DEVICE__
inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1385 return max(arg1, (uint32_t) arg2);
1387 __DEVICE__
inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1388 return max((uint32_t) arg1, arg2);
1405 __DEVICE__
inline int min(
int arg1,
int arg2) {
1406 return (arg1 < arg2) ? arg1 : arg2;
1408 __DEVICE__
inline int max(
int arg1,
int arg2) {
1409 return (arg1 > arg2) ? arg1 : arg2;
1414 float max(
float x,
float y) {
1420 double max(
double x,
double y) {
1426 float min(
float x,
float y) {
1432 double min(
double x,
double y) {
1436 __HIP_OVERLOAD2(
double, max)
1437 __HIP_OVERLOAD2(
double, min)
1441 __host__ inline static int min(
int arg1,
int arg2) {
1442 return std::min(arg1, arg2);
1445 __host__ inline static int max(
int arg1,
int arg2) {
1446 return std::max(arg1, arg2);
1450 #pragma pop_macro("__DEF_FLOAT_FUN") 1451 #pragma pop_macro("__DEF_FLOAT_FUN2") 1452 #pragma pop_macro("__DEF_FLOAT_FUN2I") 1453 #pragma pop_macro("__HIP_OVERLOAD") 1454 #pragma pop_macro("__HIP_OVERLOAD2") 1455 #pragma pop_macro("__DEVICE__") 1456 #pragma pop_macro("__RETURN_TYPE")
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Definition: math_functions.h:1220