HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
27 #include "math_fwd.h"
28 
30 
31 #include <algorithm>
32 
33 // assert.h is only for the host version of assert.
34 // The device version of assert is implemented in hip/hcc_detail/hip_runtime.h.
35 // Users should include hip_runtime.h for the device version of assert.
36 #if !__HIP_DEVICE_COMPILE__
37 #include <assert.h>
38 #endif
39 
40 #include <limits.h>
41 #include <limits>
42 #include <stdint.h>
43 
44 // HCC's own math functions should be included first, otherwise there will
45 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
46 #ifdef __HCC__
47 #include "kalmar_math.h"
48 #endif
49 
50 #if _LIBCPP_VERSION && __HIP__
51 namespace std {
52 template <>
53 struct __numeric_type<_Float16>
54 {
55  static _Float16 __test(_Float16);
56 
57  typedef _Float16 type;
58  static const bool value = true;
59 };
60 }
61 #endif // _LIBCPP_VERSION
62 
63 #pragma push_macro("__DEVICE__")
64 #pragma push_macro("__RETURN_TYPE")
65 
66 #ifdef __HCC__
67 #define __DEVICE__ __device__
68 #define __RETURN_TYPE int
69 #else // to be consistent with __clang_cuda_math_forward_declares
70 #define __DEVICE__ static __device__
71 #define __RETURN_TYPE bool
72 #endif
73 
74 __DEVICE__
75 inline
76 uint64_t __make_mantissa_base8(const char* tagp)
77 {
78  uint64_t r = 0;
79  while (tagp) {
80  char tmp = *tagp;
81 
82  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
83  else return 0;
84 
85  ++tagp;
86  }
87 
88  return r;
89 }
90 
91 __DEVICE__
92 inline
93 uint64_t __make_mantissa_base10(const char* tagp)
94 {
95  uint64_t r = 0;
96  while (tagp) {
97  char tmp = *tagp;
98 
99  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
100  else return 0;
101 
102  ++tagp;
103  }
104 
105  return r;
106 }
107 
108 __DEVICE__
109 inline
110 uint64_t __make_mantissa_base16(const char* tagp)
111 {
112  uint64_t r = 0;
113  while (tagp) {
114  char tmp = *tagp;
115 
116  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
117  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
118  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
119  else return 0;
120 
121  ++tagp;
122  }
123 
124  return r;
125 }
126 
127 __DEVICE__
128 inline
129 uint64_t __make_mantissa(const char* tagp)
130 {
131  if (!tagp) return 0u;
132 
133  if (*tagp == '0') {
134  ++tagp;
135 
136  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
137  else return __make_mantissa_base8(tagp);
138  }
139 
140  return __make_mantissa_base10(tagp);
141 }
142 
143 // DOT FUNCTIONS
144 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
145 __DEVICE__
146 inline
147 int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
148  return __ockl_sdot2(a.data, b.data, c, saturate);
149 }
150 __DEVICE__
151 inline
152 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
153  return __ockl_udot2(a.data, b.data, c, saturate);
154 }
155 __DEVICE__
156 inline
157 int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
158  return __ockl_sdot4(a.data, b.data, c, saturate);
159 }
160 __DEVICE__
161 inline
162 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
163  return __ockl_udot4(a.data, b.data, c, saturate);
164 }
165 __DEVICE__
166 inline
167 int amd_mixed_dot(int a, int b, int c, bool saturate) {
168  return __ockl_sdot8(a, b, c, saturate);
169 }
170 __DEVICE__
171 inline
172 uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
173  return __ockl_udot8(a, b, c, saturate);
174 }
175 #endif
176 
177 // BEGIN FLOAT
178 __DEVICE__
179 inline
180 float abs(float x) { return __ocml_fabs_f32(x); }
181 __DEVICE__
182 inline
183 float acosf(float x) { return __ocml_acos_f32(x); }
184 __DEVICE__
185 inline
186 float acoshf(float x) { return __ocml_acosh_f32(x); }
187 __DEVICE__
188 inline
189 float asinf(float x) { return __ocml_asin_f32(x); }
190 __DEVICE__
191 inline
192 float asinhf(float x) { return __ocml_asinh_f32(x); }
193 __DEVICE__
194 inline
195 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
196 __DEVICE__
197 inline
198 float atanf(float x) { return __ocml_atan_f32(x); }
199 __DEVICE__
200 inline
201 float atanhf(float x) { return __ocml_atanh_f32(x); }
202 __DEVICE__
203 inline
204 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
205 __DEVICE__
206 inline
207 float ceilf(float x) { return __ocml_ceil_f32(x); }
208 __DEVICE__
209 inline
210 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
211 __DEVICE__
212 inline
213 float cosf(float x) { return __ocml_cos_f32(x); }
214 __DEVICE__
215 inline
216 float coshf(float x) { return __ocml_cosh_f32(x); }
217 __DEVICE__
218 inline
219 float cospif(float x) { return __ocml_cospi_f32(x); }
220 __DEVICE__
221 inline
222 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
223 __DEVICE__
224 inline
225 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
226 __DEVICE__
227 inline
228 float erfcf(float x) { return __ocml_erfc_f32(x); }
229 __DEVICE__
230 inline
231 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
232 __DEVICE__
233 inline
234 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
235 __DEVICE__
236 inline
237 float erff(float x) { return __ocml_erf_f32(x); }
238 __DEVICE__
239 inline
240 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
241 __DEVICE__
242 inline
243 float exp10f(float x) { return __ocml_exp10_f32(x); }
244 __DEVICE__
245 inline
246 float exp2f(float x) { return __ocml_exp2_f32(x); }
247 __DEVICE__
248 inline
249 float expf(float x) { return __ocml_exp_f32(x); }
250 __DEVICE__
251 inline
252 float expm1f(float x) { return __ocml_expm1_f32(x); }
253 __DEVICE__
254 inline
255 float fabsf(float x) { return __ocml_fabs_f32(x); }
256 __DEVICE__
257 inline
258 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
259 __DEVICE__
260 inline
261 float fdividef(float x, float y) { return x / y; }
262 __DEVICE__
263 inline
264 float floorf(float x) { return __ocml_floor_f32(x); }
265 __DEVICE__
266 inline
267 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
268 __DEVICE__
269 inline
270 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
271 __DEVICE__
272 inline
273 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
274 __DEVICE__
275 inline
276 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
277 __DEVICE__
278 inline
279 float frexpf(float x, int* nptr)
280 {
281  int tmp;
282  float r =
283  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
284  *nptr = tmp;
285 
286  return r;
287 }
288 __DEVICE__
289 inline
290 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
291 __DEVICE__
292 inline
293 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
294 __DEVICE__
295 inline
296 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
297 __DEVICE__
298 inline
299 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
300 __DEVICE__
301 inline
302 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
303 __DEVICE__
304 inline
305 float j0f(float x) { return __ocml_j0_f32(x); }
306 __DEVICE__
307 inline
308 float j1f(float x) { return __ocml_j1_f32(x); }
309 __DEVICE__
310 inline
311 float jnf(int n, float x)
312 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
313  // for linear recurrences to get O(log n) steps, but it's unclear if
314  // it'd be beneficial in this case.
315  if (n == 0) return j0f(x);
316  if (n == 1) return j1f(x);
317 
318  float x0 = j0f(x);
319  float x1 = j1f(x);
320  for (int i = 1; i < n; ++i) {
321  float x2 = (2 * i) / x * x1 - x0;
322  x0 = x1;
323  x1 = x2;
324  }
325 
326  return x1;
327 }
328 __DEVICE__
329 inline
330 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
331 __DEVICE__
332 inline
333 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
334 __DEVICE__
335 inline
336 long long int llrintf(float x) { return __ocml_rint_f32(x); }
337 __DEVICE__
338 inline
339 long long int llroundf(float x) { return __ocml_round_f32(x); }
340 __DEVICE__
341 inline
342 float log10f(float x) { return __ocml_log10_f32(x); }
343 __DEVICE__
344 inline
345 float log1pf(float x) { return __ocml_log1p_f32(x); }
346 __DEVICE__
347 inline
348 float log2f(float x) { return __ocml_log2_f32(x); }
349 __DEVICE__
350 inline
351 float logbf(float x) { return __ocml_logb_f32(x); }
352 __DEVICE__
353 inline
354 float logf(float x) { return __ocml_log_f32(x); }
355 __DEVICE__
356 inline
357 long int lrintf(float x) { return __ocml_rint_f32(x); }
358 __DEVICE__
359 inline
360 long int lroundf(float x) { return __ocml_round_f32(x); }
361 __DEVICE__
362 inline
363 float modff(float x, float* iptr)
364 {
365  float tmp;
366  float r =
367  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
368  *iptr = tmp;
369 
370  return r;
371 }
372 __DEVICE__
373 inline
374 float nanf(const char* tagp)
375 {
376  union {
377  float val;
378  struct ieee_float {
379  uint32_t mantissa : 22;
380  uint32_t quiet : 1;
381  uint32_t exponent : 8;
382  uint32_t sign : 1;
383  } bits;
384 
385  static_assert(sizeof(float) == sizeof(ieee_float), "");
386  } tmp;
387 
388  tmp.bits.sign = 0u;
389  tmp.bits.exponent = ~0u;
390  tmp.bits.quiet = 1u;
391  tmp.bits.mantissa = __make_mantissa(tagp);
392 
393  return tmp.val;
394 }
395 __DEVICE__
396 inline
397 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
398 __DEVICE__
399 inline
400 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
401 __DEVICE__
402 inline
403 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
404 __DEVICE__
405 inline
406 float norm4df(float x, float y, float z, float w)
407 {
408  return __ocml_len4_f32(x, y, z, w);
409 }
410 __DEVICE__
411 inline
412 float normcdff(float x) { return __ocml_ncdf_f32(x); }
413 __DEVICE__
414 inline
415 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
416 __DEVICE__
417 inline
418 float normf(int dim, const float* a)
419 { // TODO: placeholder until OCML adds support.
420  float r = 0;
421  while (dim--) { r += a[0] * a[0]; ++a; }
422 
423  return __ocml_sqrt_f32(r);
424 }
425 __DEVICE__
426 inline
427 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
428 __DEVICE__
429 inline
430 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
431 __DEVICE__
432 inline
433 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
434 __DEVICE__
435 inline
436 float remquof(float x, float y, int* quo)
437 {
438  int tmp;
439  float r =
440  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
441  *quo = tmp;
442 
443  return r;
444 }
445 __DEVICE__
446 inline
447 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
448 __DEVICE__
449 inline
450 float rintf(float x) { return __ocml_rint_f32(x); }
451 __DEVICE__
452 inline
453 float rnorm3df(float x, float y, float z)
454 {
455  return __ocml_rlen3_f32(x, y, z);
456 }
457 
458 __DEVICE__
459 inline
460 float rnorm4df(float x, float y, float z, float w)
461 {
462  return __ocml_rlen4_f32(x, y, z, w);
463 }
464 __DEVICE__
465 inline
466 float rnormf(int dim, const float* a)
467 { // TODO: placeholder until OCML adds support.
468  float r = 0;
469  while (dim--) { r += a[0] * a[0]; ++a; }
470 
471  return __ocml_rsqrt_f32(r);
472 }
473 __DEVICE__
474 inline
475 float roundf(float x) { return __ocml_round_f32(x); }
476 __DEVICE__
477 inline
478 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
479 __DEVICE__
480 inline
481 float scalblnf(float x, long int n)
482 {
483  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
484 }
485 __DEVICE__
486 inline
487 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
488 __DEVICE__
489 inline
490 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
491 __DEVICE__
492 inline
493 void sincosf(float x, float* sptr, float* cptr)
494 {
495  float tmp;
496 
497  *sptr =
498  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
499  *cptr = tmp;
500 }
501 __DEVICE__
502 inline
503 void sincospif(float x, float* sptr, float* cptr)
504 {
505  float tmp;
506 
507  *sptr =
508  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
509  *cptr = tmp;
510 }
511 __DEVICE__
512 inline
513 float sinf(float x) { return __ocml_sin_f32(x); }
514 __DEVICE__
515 inline
516 float sinhf(float x) { return __ocml_sinh_f32(x); }
517 __DEVICE__
518 inline
519 float sinpif(float x) { return __ocml_sinpi_f32(x); }
520 __DEVICE__
521 inline
522 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
523 __DEVICE__
524 inline
525 float tanf(float x) { return __ocml_tan_f32(x); }
526 __DEVICE__
527 inline
528 float tanhf(float x) { return __ocml_tanh_f32(x); }
529 __DEVICE__
530 inline
531 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
532 __DEVICE__
533 inline
534 float truncf(float x) { return __ocml_trunc_f32(x); }
535 __DEVICE__
536 inline
537 float y0f(float x) { return __ocml_y0_f32(x); }
538 __DEVICE__
539 inline
540 float y1f(float x) { return __ocml_y1_f32(x); }
541 __DEVICE__
542 inline
543 float ynf(int n, float x)
544 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
545  // for linear recurrences to get O(log n) steps, but it's unclear if
546  // it'd be beneficial in this case. Placeholder until OCML adds
547  // support.
548  if (n == 0) return y0f(x);
549  if (n == 1) return y1f(x);
550 
551  float x0 = y0f(x);
552  float x1 = y1f(x);
553  for (int i = 1; i < n; ++i) {
554  float x2 = (2 * i) / x * x1 - x0;
555  x0 = x1;
556  x1 = x2;
557  }
558 
559  return x1;
560 }
561 
562 // BEGIN INTRINSICS
563 __DEVICE__
564 inline
565 float __cosf(float x) { return __ocml_native_cos_f32(x); }
566 __DEVICE__
567 inline
568 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
569 __DEVICE__
570 inline
571 float __expf(float x) { return __ocml_native_exp_f32(x); }
572 #if defined OCML_BASIC_ROUNDED_OPERATIONS
573 __DEVICE__
574 inline
575 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
576 #endif
577 __DEVICE__
578 inline
579 float __fadd_rn(float x, float y) { return x + y; }
580 #if defined OCML_BASIC_ROUNDED_OPERATIONS
581 __DEVICE__
582 inline
583 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
584 __DEVICE__
585 inline
586 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
587 __DEVICE__
588 inline
589 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
590 #endif
591 __DEVICE__
592 inline
593 float __fdiv_rn(float x, float y) { return x / y; }
594 #if defined OCML_BASIC_ROUNDED_OPERATIONS
595 __DEVICE__
596 inline
597 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
598 __DEVICE__
599 inline
600 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
601 #endif
602 __DEVICE__
603 inline
604 float __fdividef(float x, float y) { return x / y; }
605 #if defined OCML_BASIC_ROUNDED_OPERATIONS
606 __DEVICE__
607 inline
608 float __fmaf_rd(float x, float y, float z)
609 {
610  return __ocml_fma_rtn_f32(x, y, z);
611 }
612 #endif
613 __DEVICE__
614 inline
615 float __fmaf_rn(float x, float y, float z)
616 {
617  return __ocml_fma_f32(x, y, z);
618 }
619 #if defined OCML_BASIC_ROUNDED_OPERATIONS
620 __DEVICE__
621 inline
622 float __fmaf_ru(float x, float y, float z)
623 {
624  return __ocml_fma_rtp_f32(x, y, z);
625 }
626 __DEVICE__
627 inline
628 float __fmaf_rz(float x, float y, float z)
629 {
630  return __ocml_fma_rtz_f32(x, y, z);
631 }
632 __DEVICE__
633 inline
634 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
635 #endif
636 __DEVICE__
637 inline
638 float __fmul_rn(float x, float y) { return x * y; }
639 #if defined OCML_BASIC_ROUNDED_OPERATIONS
640 __DEVICE__
641 inline
642 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
643 __DEVICE__
644 inline
645 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
646 __DEVICE__
647 inline
648 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
649 #endif
650 __DEVICE__
651 inline
652 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
653 #if defined OCML_BASIC_ROUNDED_OPERATIONS
654 __DEVICE__
655 inline
656 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
657 __DEVICE__
658 inline
659 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
660 #endif
661 __DEVICE__
662 inline
663 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
664 #if defined OCML_BASIC_ROUNDED_OPERATIONS
665 __DEVICE__
666 inline
667 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
668 #endif
669 __DEVICE__
670 inline
671 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
672 #if defined OCML_BASIC_ROUNDED_OPERATIONS
673 __DEVICE__
674 inline
675 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
676 __DEVICE__
677 inline
678 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
679 __DEVICE__
680 inline
681 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
682 #endif
683 __DEVICE__
684 inline
685 float __fsub_rn(float x, float y) { return x - y; }
686 #if defined OCML_BASIC_ROUNDED_OPERATIONS
687 __DEVICE__
688 inline
689 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
690 __DEVICE__
691 inline
692 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
693 #endif
694 __DEVICE__
695 inline
696 float __log10f(float x) { return __ocml_native_log10_f32(x); }
697 __DEVICE__
698 inline
699 float __log2f(float x) { return __ocml_native_log2_f32(x); }
700 __DEVICE__
701 inline
702 float __logf(float x) { return __ocml_native_log_f32(x); }
703 __DEVICE__
704 inline
705 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
706 __DEVICE__
707 inline
708 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
709 __DEVICE__
710 inline
711 void __sincosf(float x, float* sptr, float* cptr)
712 {
713  *sptr = __ocml_native_sin_f32(x);
714  *cptr = __ocml_native_cos_f32(x);
715 }
716 __DEVICE__
717 inline
718 float __sinf(float x) { return __ocml_native_sin_f32(x); }
719 __DEVICE__
720 inline
721 float __tanf(float x) { return __ocml_tan_f32(x); }
722 // END INTRINSICS
723 // END FLOAT
724 
725 // BEGIN DOUBLE
726 __DEVICE__
727 inline
728 double abs(double x) { return __ocml_fabs_f64(x); }
729 __DEVICE__
730 inline
731 double acos(double x) { return __ocml_acos_f64(x); }
732 __DEVICE__
733 inline
734 double acosh(double x) { return __ocml_acosh_f64(x); }
735 __DEVICE__
736 inline
737 double asin(double x) { return __ocml_asin_f64(x); }
738 __DEVICE__
739 inline
740 double asinh(double x) { return __ocml_asinh_f64(x); }
741 __DEVICE__
742 inline
743 double atan(double x) { return __ocml_atan_f64(x); }
744 __DEVICE__
745 inline
746 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
747 __DEVICE__
748 inline
749 double atanh(double x) { return __ocml_atanh_f64(x); }
750 __DEVICE__
751 inline
752 double cbrt(double x) { return __ocml_cbrt_f64(x); }
753 __DEVICE__
754 inline
755 double ceil(double x) { return __ocml_ceil_f64(x); }
756 __DEVICE__
757 inline
758 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
759 __DEVICE__
760 inline
761 double cos(double x) { return __ocml_cos_f64(x); }
762 __DEVICE__
763 inline
764 double cosh(double x) { return __ocml_cosh_f64(x); }
765 __DEVICE__
766 inline
767 double cospi(double x) { return __ocml_cospi_f64(x); }
768 __DEVICE__
769 inline
770 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
771 __DEVICE__
772 inline
773 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
774 __DEVICE__
775 inline
776 double erf(double x) { return __ocml_erf_f64(x); }
777 __DEVICE__
778 inline
779 double erfc(double x) { return __ocml_erfc_f64(x); }
780 __DEVICE__
781 inline
782 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
783 __DEVICE__
784 inline
785 double erfcx(double x) { return __ocml_erfcx_f64(x); }
786 __DEVICE__
787 inline
788 double erfinv(double x) { return __ocml_erfinv_f64(x); }
789 __DEVICE__
790 inline
791 double exp(double x) { return __ocml_exp_f64(x); }
792 __DEVICE__
793 inline
794 double exp10(double x) { return __ocml_exp10_f64(x); }
795 __DEVICE__
796 inline
797 double exp2(double x) { return __ocml_exp2_f64(x); }
798 __DEVICE__
799 inline
800 double expm1(double x) { return __ocml_expm1_f64(x); }
801 __DEVICE__
802 inline
803 double fabs(double x) { return __ocml_fabs_f64(x); }
804 __DEVICE__
805 inline
806 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
807 __DEVICE__
808 inline
809 double floor(double x) { return __ocml_floor_f64(x); }
810 __DEVICE__
811 inline
812 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
813 __DEVICE__
814 inline
815 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
816 __DEVICE__
817 inline
818 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
819 __DEVICE__
820 inline
821 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
822 __DEVICE__
823 inline
824 double frexp(double x, int* nptr)
825 {
826  int tmp;
827  double r =
828  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
829  *nptr = tmp;
830 
831  return r;
832 }
833 __DEVICE__
834 inline
835 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
836 __DEVICE__
837 inline
838 int ilogb(double x) { return __ocml_ilogb_f64(x); }
839 __DEVICE__
840 inline
841 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
842 __DEVICE__
843 inline
844 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
845 __DEVICE__
846 inline
847 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
848 __DEVICE__
849 inline
850 double j0(double x) { return __ocml_j0_f64(x); }
851 __DEVICE__
852 inline
853 double j1(double x) { return __ocml_j1_f64(x); }
854 __DEVICE__
855 inline
856 double jn(int n, double x)
857 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
858  // for linear recurrences to get O(log n) steps, but it's unclear if
859  // it'd be beneficial in this case. Placeholder until OCML adds
860  // support.
861  if (n == 0) return j0f(x);
862  if (n == 1) return j1f(x);
863 
864  double x0 = j0f(x);
865  double x1 = j1f(x);
866  for (int i = 1; i < n; ++i) {
867  double x2 = (2 * i) / x * x1 - x0;
868  x0 = x1;
869  x1 = x2;
870  }
871 
872  return x1;
873 }
874 __DEVICE__
875 inline
876 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
877 __DEVICE__
878 inline
879 double lgamma(double x) { return __ocml_lgamma_f64(x); }
880 __DEVICE__
881 inline
882 long long int llrint(double x) { return __ocml_rint_f64(x); }
883 __DEVICE__
884 inline
885 long long int llround(double x) { return __ocml_round_f64(x); }
886 __DEVICE__
887 inline
888 double log(double x) { return __ocml_log_f64(x); }
889 __DEVICE__
890 inline
891 double log10(double x) { return __ocml_log10_f64(x); }
892 __DEVICE__
893 inline
894 double log1p(double x) { return __ocml_log1p_f64(x); }
895 __DEVICE__
896 inline
897 double log2(double x) { return __ocml_log2_f64(x); }
898 __DEVICE__
899 inline
900 double logb(double x) { return __ocml_logb_f64(x); }
901 __DEVICE__
902 inline
903 long int lrint(double x) { return __ocml_rint_f64(x); }
904 __DEVICE__
905 inline
906 long int lround(double x) { return __ocml_round_f64(x); }
907 __DEVICE__
908 inline
909 double modf(double x, double* iptr)
910 {
911  double tmp;
912  double r =
913  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
914  *iptr = tmp;
915 
916  return r;
917 }
918 __DEVICE__
919 inline
920 double nan(const char* tagp)
921 {
922 #if !_WIN32
923  union {
924  double val;
925  struct ieee_double {
926  uint64_t mantissa : 51;
927  uint32_t quiet : 1;
928  uint32_t exponent : 11;
929  uint32_t sign : 1;
930  } bits;
931  static_assert(sizeof(double) == sizeof(ieee_double), "");
932  } tmp;
933 
934  tmp.bits.sign = 0u;
935  tmp.bits.exponent = ~0u;
936  tmp.bits.quiet = 1u;
937  tmp.bits.mantissa = __make_mantissa(tagp);
938 
939  return tmp.val;
940 #else
941  static_assert(sizeof(uint64_t)==sizeof(double));
942  uint64_t val = __make_mantissa(tagp);
943  val |= 0xFFF << 51;
944  return *reinterpret_cast<double*>(&val);
945 #endif
946 }
947 __DEVICE__
948 inline
949 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
950 __DEVICE__
951 inline
952 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
953 __DEVICE__
954 inline
955 double norm(int dim, const double* a)
956 { // TODO: placeholder until OCML adds support.
957  double r = 0;
958  while (dim--) { r += a[0] * a[0]; ++a; }
959 
960  return __ocml_sqrt_f64(r);
961 }
962 __DEVICE__
963 inline
964 double norm3d(double x, double y, double z)
965 {
966  return __ocml_len3_f64(x, y, z);
967 }
968 __DEVICE__
969 inline
970 double norm4d(double x, double y, double z, double w)
971 {
972  return __ocml_len4_f64(x, y, z, w);
973 }
974 __DEVICE__
975 inline
976 double normcdf(double x) { return __ocml_ncdf_f64(x); }
977 __DEVICE__
978 inline
979 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
980 __DEVICE__
981 inline
982 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
983 __DEVICE__
984 inline
985 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
986 __DEVICE__
987 inline
988 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
989 __DEVICE__
990 inline
991 double remquo(double x, double y, int* quo)
992 {
993  int tmp;
994  double r =
995  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
996  *quo = tmp;
997 
998  return r;
999 }
1000 __DEVICE__
1001 inline
1002 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
1003 __DEVICE__
1004 inline
1005 double rint(double x) { return __ocml_rint_f64(x); }
1006 __DEVICE__
1007 inline
1008 double rnorm(int dim, const double* a)
1009 { // TODO: placeholder until OCML adds support.
1010  double r = 0;
1011  while (dim--) { r += a[0] * a[0]; ++a; }
1012 
1013  return __ocml_rsqrt_f64(r);
1014 }
1015 __DEVICE__
1016 inline
1017 double rnorm3d(double x, double y, double z)
1018 {
1019  return __ocml_rlen3_f64(x, y, z);
1020 }
1021 __DEVICE__
1022 inline
1023 double rnorm4d(double x, double y, double z, double w)
1024 {
1025  return __ocml_rlen4_f64(x, y, z, w);
1026 }
1027 __DEVICE__
1028 inline
1029 double round(double x) { return __ocml_round_f64(x); }
1030 __DEVICE__
1031 inline
1032 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1033 __DEVICE__
1034 inline
1035 double scalbln(double x, long int n)
1036 {
1037  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1038 }
1039 __DEVICE__
1040 inline
1041 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1042 __DEVICE__
1043 inline
1044 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1045 __DEVICE__
1046 inline
1047 double sin(double x) { return __ocml_sin_f64(x); }
1048 __DEVICE__
1049 inline
1050 void sincos(double x, double* sptr, double* cptr)
1051 {
1052  double tmp;
1053  *sptr =
1054  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1055  *cptr = tmp;
1056 }
1057 __DEVICE__
1058 inline
1059 void sincospi(double x, double* sptr, double* cptr)
1060 {
1061  double tmp;
1062  *sptr = __ocml_sincospi_f64(
1063  x, (__attribute__((address_space(5))) double*) &tmp);
1064  *cptr = tmp;
1065 }
1066 __DEVICE__
1067 inline
1068 double sinh(double x) { return __ocml_sinh_f64(x); }
1069 __DEVICE__
1070 inline
1071 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1072 __DEVICE__
1073 inline
1074 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1075 __DEVICE__
1076 inline
1077 double tan(double x) { return __ocml_tan_f64(x); }
1078 __DEVICE__
1079 inline
1080 double tanh(double x) { return __ocml_tanh_f64(x); }
1081 __DEVICE__
1082 inline
1083 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1084 __DEVICE__
1085 inline
1086 double trunc(double x) { return __ocml_trunc_f64(x); }
1087 __DEVICE__
1088 inline
1089 double y0(double x) { return __ocml_y0_f64(x); }
1090 __DEVICE__
1091 inline
1092 double y1(double x) { return __ocml_y1_f64(x); }
1093 __DEVICE__
1094 inline
1095 double yn(int n, double x)
1096 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1097  // for linear recurrences to get O(log n) steps, but it's unclear if
1098  // it'd be beneficial in this case. Placeholder until OCML adds
1099  // support.
1100  if (n == 0) return j0f(x);
1101  if (n == 1) return j1f(x);
1102 
1103  double x0 = j0f(x);
1104  double x1 = j1f(x);
1105  for (int i = 1; i < n; ++i) {
1106  double x2 = (2 * i) / x * x1 - x0;
1107  x0 = x1;
1108  x1 = x2;
1109  }
1110 
1111  return x1;
1112 }
1113 
1114 // BEGIN INTRINSICS
1115 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1116 __DEVICE__
1117 inline
1118 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1119 #endif
1120 __DEVICE__
1121 inline
1122 double __dadd_rn(double x, double y) { return x + y; }
1123 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1124 __DEVICE__
1125 inline
1126 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1127 __DEVICE__
1128 inline
1129 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1130 __DEVICE__
1131 inline
1132 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1133 #endif
1134 __DEVICE__
1135 inline
1136 double __ddiv_rn(double x, double y) { return x / y; }
1137 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1138 __DEVICE__
1139 inline
1140 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1141 __DEVICE__
1142 inline
1143 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1144 __DEVICE__
1145 inline
1146 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1147 #endif
1148 __DEVICE__
1149 inline
1150 double __dmul_rn(double x, double y) { return x * y; }
1151 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1152 __DEVICE__
1153 inline
1154 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1155 __DEVICE__
1156 inline
1157 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1158 __DEVICE__
1159 inline
1160 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1161 #endif
1162 __DEVICE__
1163 inline
1164 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1165 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1166 __DEVICE__
1167 inline
1168 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1169 __DEVICE__
1170 inline
1171 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1172 __DEVICE__
1173 inline
1174 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1175 #endif
1176 __DEVICE__
1177 inline
1178 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1179 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1180 __DEVICE__
1181 inline
1182 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1183 __DEVICE__
1184 inline
1185 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1186 __DEVICE__
1187 inline
1188 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1189 #endif
1190 __DEVICE__
1191 inline
1192 double __dsub_rn(double x, double y) { return x - y; }
1193 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1194 __DEVICE__
1195 inline
1196 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1197 __DEVICE__
1198 inline
1199 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1200 __DEVICE__
1201 inline
1202 double __fma_rd(double x, double y, double z)
1203 {
1204  return __ocml_fma_rtn_f64(x, y, z);
1205 }
1206 #endif
1207 __DEVICE__
1208 inline
1209 double __fma_rn(double x, double y, double z)
1210 {
1211  return __ocml_fma_f64(x, y, z);
1212 }
1213 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1214 __DEVICE__
1215 inline
1216 double __fma_ru(double x, double y, double z)
1217 {
1218  return __ocml_fma_rtp_f64(x, y, z);
1219 }
1220 __DEVICE__
1221 inline
1222 double __fma_rz(double x, double y, double z)
1223 {
1224  return __ocml_fma_rtz_f64(x, y, z);
1225 }
1226 #endif
1227 // END INTRINSICS
1228 // END DOUBLE
1229 
1230 // BEGIN INTEGER
1231 __DEVICE__
1232 inline
1233 int abs(int x)
1234 {
1235  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1236  return (x ^ sgn) - sgn;
1237 }
1238 __DEVICE__
1239 inline
1240 long labs(long x)
1241 {
1242  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1243  return (x ^ sgn) - sgn;
1244 }
1245 __DEVICE__
1246 inline
1247 long long llabs(long long x)
1248 {
1249  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1250  return (x ^ sgn) - sgn;
1251 }
1252 
1253 #if defined(__cplusplus)
1254  __DEVICE__
1255  inline
1256  long abs(long x) { return labs(x); }
1257  __DEVICE__
1258  inline
1259  long long abs(long long x) { return llabs(x); }
1260 #endif
1261 // END INTEGER
1262 
1263 __DEVICE__
1264 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1265  return __ocml_fma_f16(x, y, z);
1266 }
1267 
1268 __DEVICE__
1269 inline float fma(float x, float y, float z) {
1270  return fmaf(x, y, z);
1271 }
1272 
1273 #pragma push_macro("__DEF_FLOAT_FUN")
1274 #pragma push_macro("__DEF_FLOAT_FUN2")
1275 #pragma push_macro("__DEF_FLOAT_FUN2I")
1276 #pragma push_macro("__HIP_OVERLOAD")
1277 #pragma push_macro("__HIP_OVERLOAD2")
1278 
1279 // __hip_enable_if::type is a type function which returns __T if __B is true.
1280 template<bool __B, class __T = void>
1282 
1283 template <class __T> struct __hip_enable_if<true, __T> {
1284  typedef __T type;
1285 };
1286 
1287 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1288 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1289 // floor(double).
1290 #define __HIP_OVERLOAD1(__retty, __fn) \
1291  template <typename __T> \
1292  __DEVICE__ \
1293  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1294  __retty>::type \
1295  __fn(__T __x) { \
1296  return ::__fn((double)__x); \
1297  }
1298 
1299 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1300 // or integer argument to avoid compilation error due to ambibuity. e.g.
1301 // max(5.0f, 6.0) is resolved with max(double, double).
1302 #define __HIP_OVERLOAD2(__retty, __fn) \
1303  template <typename __T1, typename __T2> \
1304  __DEVICE__ typename __hip_enable_if< \
1305  std::numeric_limits<__T1>::is_specialized && \
1306  std::numeric_limits<__T2>::is_specialized, \
1307  __retty>::type \
1308  __fn(__T1 __x, __T2 __y) { \
1309  return __fn((double)__x, (double)__y); \
1310  }
1311 
1312 // Define cmath functions with float argument and returns float.
1313 #define __DEF_FUN1(retty, func) \
1314 __DEVICE__ \
1315 inline \
1316 float func(float x) \
1317 { \
1318  return func##f(x); \
1319 } \
1320 __HIP_OVERLOAD1(retty, func)
1321 
1322 // Define cmath functions with float argument and returns retty.
1323 #define __DEF_FUNI(retty, func) \
1324 __DEVICE__ \
1325 inline \
1326 retty func(float x) \
1327 { \
1328  return func##f(x); \
1329 } \
1330 __HIP_OVERLOAD1(retty, func)
1331 
1332 // define cmath functions with two float arguments.
1333 #define __DEF_FUN2(retty, func) \
1334 __DEVICE__ \
1335 inline \
1336 float func(float x, float y) \
1337 { \
1338  return func##f(x, y); \
1339 } \
1340 __HIP_OVERLOAD2(retty, func)
1341 
1342 __DEF_FUN1(double, acos)
1343 __DEF_FUN1(double, acosh)
1344 __DEF_FUN1(double, asin)
1345 __DEF_FUN1(double, asinh)
1346 __DEF_FUN1(double, atan)
1347 __DEF_FUN2(double, atan2);
1348 __DEF_FUN1(double, atanh)
1349 __DEF_FUN1(double, cbrt)
1350 __DEF_FUN1(double, ceil)
1351 __DEF_FUN2(double, copysign);
1352 __DEF_FUN1(double, cos)
1353 __DEF_FUN1(double, cosh)
1354 __DEF_FUN1(double, erf)
1355 __DEF_FUN1(double, erfc)
1356 __DEF_FUN1(double, exp)
1357 __DEF_FUN1(double, exp2)
1358 __DEF_FUN1(double, expm1)
1359 __DEF_FUN1(double, fabs)
1360 __DEF_FUN2(double, fdim);
1361 __DEF_FUN1(double, floor)
1362 __DEF_FUN2(double, fmax);
1363 __DEF_FUN2(double, fmin);
1364 __DEF_FUN2(double, fmod);
1365 //__HIP_OVERLOAD1(int, fpclassify)
1366 __DEF_FUN2(double, hypot);
1367 __DEF_FUNI(int, ilogb)
1368 __HIP_OVERLOAD1(bool, isfinite)
1369 __HIP_OVERLOAD2(bool, isgreater);
1370 __HIP_OVERLOAD2(bool, isgreaterequal);
1371 __HIP_OVERLOAD1(bool, isinf);
1372 __HIP_OVERLOAD2(bool, isless);
1373 __HIP_OVERLOAD2(bool, islessequal);
1374 __HIP_OVERLOAD2(bool, islessgreater);
1375 __HIP_OVERLOAD1(bool, isnan);
1376 //__HIP_OVERLOAD1(bool, isnormal)
1377 __HIP_OVERLOAD2(bool, isunordered);
1378 __DEF_FUN1(double, lgamma)
1379 __DEF_FUN1(double, log)
1380 __DEF_FUN1(double, log10)
1381 __DEF_FUN1(double, log1p)
1382 __DEF_FUN1(double, log2)
1383 __DEF_FUN1(double, logb)
1384 __DEF_FUNI(long long, llrint)
1385 __DEF_FUNI(long long, llround)
1386 __DEF_FUNI(long, lrint)
1387 __DEF_FUNI(long, lround)
1388 __DEF_FUN1(double, nearbyint);
1389 __DEF_FUN2(double, nextafter);
1390 __DEF_FUN2(double, pow);
1391 __DEF_FUN2(double, remainder);
1392 __DEF_FUN1(double, rint);
1393 __DEF_FUN1(double, round);
1394 __HIP_OVERLOAD1(bool, signbit)
1395 __DEF_FUN1(double, sin)
1396 __DEF_FUN1(double, sinh)
1397 __DEF_FUN1(double, sqrt)
1398 __DEF_FUN1(double, tan)
1399 __DEF_FUN1(double, tanh)
1400 __DEF_FUN1(double, tgamma)
1401 __DEF_FUN1(double, trunc);
1402 
1403 // define cmath functions with a float and an integer argument.
1404 #define __DEF_FLOAT_FUN2I(func) \
1405 __DEVICE__ \
1406 inline \
1407 float func(float x, int y) \
1408 { \
1409  return func##f(x, y); \
1410 }
1411 __DEF_FLOAT_FUN2I(scalbn)
1412 
1413 template<class T>
1414 __DEVICE__ inline static T min(T arg1, T arg2) {
1415  return (arg1 < arg2) ? arg1 : arg2;
1416 }
1417 
1418 template<class T>
1419 __DEVICE__ inline static T max(T arg1, T arg2) {
1420  return (arg1 > arg2) ? arg1 : arg2;
1421 }
1422 
1423 #if __HCC__
1424 
1425 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1426  return min(arg1, (uint32_t) arg2);
1427 }
1428 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1429  return min((uint32_t) arg1, arg2);
1430 }
1431 
1432 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1433  return min(arg1, (uint64_t) arg2);
1434 }
1435 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1436  return min((uint64_t) arg1, arg2);
1437 }
1438 
1439 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1440  return min(arg1, (unsigned long long) arg2);
1441 }
1442 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1443  return min((unsigned long long) arg1, arg2);
1444 }*/
1445 
1446 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1447  return max(arg1, (uint32_t) arg2);
1448 }
1449 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1450  return max((uint32_t) arg1, arg2);
1451 }
1452 
1453 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1454  return max(arg1, (uint64_t) arg2);
1455 }
1456 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1457  return max((uint64_t) arg1, arg2);
1458 }
1459 
1460 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1461  return max(arg1, (unsigned long long) arg2);
1462 }
1463 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1464  return max((unsigned long long) arg1, arg2);
1465 }*/
1466 #else
1467 __DEVICE__ inline int min(int arg1, int arg2) {
1468  return (arg1 < arg2) ? arg1 : arg2;
1469 }
1470 __DEVICE__ inline int max(int arg1, int arg2) {
1471  return (arg1 > arg2) ? arg1 : arg2;
1472 }
1473 
1474 __DEVICE__
1475 inline
1476 float max(float x, float y) {
1477  return fmaxf(x, y);
1478 }
1479 
1480 __DEVICE__
1481 inline
1482 double max(double x, double y) {
1483  return fmax(x, y);
1484 }
1485 
1486 __DEVICE__
1487 inline
1488 float min(float x, float y) {
1489  return fminf(x, y);
1490 }
1491 
1492 __DEVICE__
1493 inline
1494 double min(double x, double y) {
1495  return fmin(x, y);
1496 }
1497 
1498 __HIP_OVERLOAD2(double, max)
1499 __HIP_OVERLOAD2(double, min)
1500 
1501 #endif
1502 
1503 __host__ inline static int min(int arg1, int arg2) {
1504  return std::min(arg1, arg2);
1505 }
1506 
1507 __host__ inline static int max(int arg1, int arg2) {
1508  return std::max(arg1, arg2);
1509 }
1510 
1511 
1512 #pragma pop_macro("__DEF_FLOAT_FUN")
1513 #pragma pop_macro("__DEF_FLOAT_FUN2")
1514 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1515 #pragma pop_macro("__HIP_OVERLOAD")
1516 #pragma pop_macro("__HIP_OVERLOAD2")
1517 #pragma pop_macro("__DEVICE__")
1518 #pragma pop_macro("__RETURN_TYPE")
1519 
1520 // For backward compatibility.
1521 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1522 // defined after including math_functions.h.
Definition: hip_vector_types.h:1369
TODO-doc.
#define __host__
Definition: host_defines.h:41
Definition: hip_vector_types.h:1356
Contains definitions of APIs for HIP runtime.
Defines the different newt vector types for HIP runtime.
Definition: hip_vector_types.h:1376
Definition: hip_vector_types.h:1363
Definition: math_functions.h:1281