HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "math_fwd.h"
27 
29 
30 #include <algorithm>
31 #include <assert.h>
32 #include <limits.h>
33 #include <limits>
34 #include <stdint.h>
35 
36 // HCC's own math functions should be included first, otherwise there will
37 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
38 #ifdef __HCC__
39 #include "kalmar_math.h"
40 #endif
41 
42 #pragma push_macro("__DEVICE__")
43 #pragma push_macro("__RETURN_TYPE")
44 
45 #ifdef __HCC__
46 #define __DEVICE__ __device__
47 #define __RETURN_TYPE int
48 #else // to be consistent with __clang_cuda_math_forward_declares
49 #define __DEVICE__ static __device__
50 #define __RETURN_TYPE bool
51 #endif
52 
53 __DEVICE__
54 inline
55 uint64_t __make_mantissa_base8(const char* tagp)
56 {
57  uint64_t r = 0;
58  while (tagp) {
59  char tmp = *tagp;
60 
61  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
62  else return 0;
63 
64  ++tagp;
65  }
66 
67  return r;
68 }
69 
70 __DEVICE__
71 inline
72 uint64_t __make_mantissa_base10(const char* tagp)
73 {
74  uint64_t r = 0;
75  while (tagp) {
76  char tmp = *tagp;
77 
78  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
79  else return 0;
80 
81  ++tagp;
82  }
83 
84  return r;
85 }
86 
87 __DEVICE__
88 inline
89 uint64_t __make_mantissa_base16(const char* tagp)
90 {
91  uint64_t r = 0;
92  while (tagp) {
93  char tmp = *tagp;
94 
95  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
96  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
97  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
98  else return 0;
99 
100  ++tagp;
101  }
102 
103  return r;
104 }
105 
106 __DEVICE__
107 inline
108 uint64_t __make_mantissa(const char* tagp)
109 {
110  if (!tagp) return 0u;
111 
112  if (*tagp == '0') {
113  ++tagp;
114 
115  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
116  else return __make_mantissa_base8(tagp);
117  }
118 
119  return __make_mantissa_base10(tagp);
120 }
121 
122 // BEGIN FLOAT
123 __DEVICE__
124 inline
125 float abs(float x) { return __ocml_fabs_f32(x); }
126 __DEVICE__
127 inline
128 float acosf(float x) { return __ocml_acos_f32(x); }
129 __DEVICE__
130 inline
131 float acoshf(float x) { return __ocml_acosh_f32(x); }
132 __DEVICE__
133 inline
134 float asinf(float x) { return __ocml_asin_f32(x); }
135 __DEVICE__
136 inline
137 float asinhf(float x) { return __ocml_asinh_f32(x); }
138 __DEVICE__
139 inline
140 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
141 __DEVICE__
142 inline
143 float atanf(float x) { return __ocml_atan_f32(x); }
144 __DEVICE__
145 inline
146 float atanhf(float x) { return __ocml_atanh_f32(x); }
147 __DEVICE__
148 inline
149 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
150 __DEVICE__
151 inline
152 float ceilf(float x) { return __ocml_ceil_f32(x); }
153 __DEVICE__
154 inline
155 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
156 __DEVICE__
157 inline
158 float cosf(float x) { return __ocml_cos_f32(x); }
159 __DEVICE__
160 inline
161 float coshf(float x) { return __ocml_cosh_f32(x); }
162 __DEVICE__
163 inline
164 float cospif(float x) { return __ocml_cospi_f32(x); }
165 __DEVICE__
166 inline
167 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
168 __DEVICE__
169 inline
170 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
171 __DEVICE__
172 inline
173 float erfcf(float x) { return __ocml_erfc_f32(x); }
174 __DEVICE__
175 inline
176 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
177 __DEVICE__
178 inline
179 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
180 __DEVICE__
181 inline
182 float erff(float x) { return __ocml_erf_f32(x); }
183 __DEVICE__
184 inline
185 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
186 __DEVICE__
187 inline
188 float exp10f(float x) { return __ocml_exp10_f32(x); }
189 __DEVICE__
190 inline
191 float exp2f(float x) { return __ocml_exp2_f32(x); }
192 __DEVICE__
193 inline
194 float expf(float x) { return __ocml_exp_f32(x); }
195 __DEVICE__
196 inline
197 float expm1f(float x) { return __ocml_expm1_f32(x); }
198 __DEVICE__
199 inline
200 float fabsf(float x) { return __ocml_fabs_f32(x); }
201 __DEVICE__
202 inline
203 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
204 __DEVICE__
205 inline
206 float fdividef(float x, float y) { return x / y; }
207 __DEVICE__
208 inline
209 float floorf(float x) { return __ocml_floor_f32(x); }
210 __DEVICE__
211 inline
212 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
213 __DEVICE__
214 inline
215 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
216 __DEVICE__
217 inline
218 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
219 __DEVICE__
220 inline
221 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
222 __DEVICE__
223 inline
224 float frexpf(float x, int* nptr)
225 {
226  int tmp;
227  float r =
228  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
229  *nptr = tmp;
230 
231  return r;
232 }
233 __DEVICE__
234 inline
235 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
236 __DEVICE__
237 inline
238 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
239 __DEVICE__
240 inline
241 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
242 __DEVICE__
243 inline
244 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
245 __DEVICE__
246 inline
247 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
248 __DEVICE__
249 inline
250 float j0f(float x) { return __ocml_j0_f32(x); }
251 __DEVICE__
252 inline
253 float j1f(float x) { return __ocml_j1_f32(x); }
254 __DEVICE__
255 inline
256 float jnf(int n, float x)
257 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
258  // for linear recurrences to get O(log n) steps, but it's unclear if
259  // it'd be beneficial in this case.
260  if (n == 0) return j0f(x);
261  if (n == 1) return j1f(x);
262 
263  float x0 = j0f(x);
264  float x1 = j1f(x);
265  for (int i = 1; i < n; ++i) {
266  float x2 = (2 * i) / x * x1 - x0;
267  x0 = x1;
268  x1 = x2;
269  }
270 
271  return x1;
272 }
273 __DEVICE__
274 inline
275 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
276 __DEVICE__
277 inline
278 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
279 __DEVICE__
280 inline
281 long long int llrintf(float x) { return __ocml_rint_f32(x); }
282 __DEVICE__
283 inline
284 long long int llroundf(float x) { return __ocml_round_f32(x); }
285 __DEVICE__
286 inline
287 float log10f(float x) { return __ocml_log10_f32(x); }
288 __DEVICE__
289 inline
290 float log1pf(float x) { return __ocml_log1p_f32(x); }
291 __DEVICE__
292 inline
293 float log2f(float x) { return __ocml_log2_f32(x); }
294 __DEVICE__
295 inline
296 float logbf(float x) { return __ocml_logb_f32(x); }
297 __DEVICE__
298 inline
299 float logf(float x) { return __ocml_log_f32(x); }
300 __DEVICE__
301 inline
302 long int lrintf(float x) { return __ocml_rint_f32(x); }
303 __DEVICE__
304 inline
305 long int lroundf(float x) { return __ocml_round_f32(x); }
306 __DEVICE__
307 inline
308 float modff(float x, float* iptr)
309 {
310  float tmp;
311  float r =
312  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
313  *iptr = tmp;
314 
315  return r;
316 }
317 __DEVICE__
318 inline
319 float nanf(const char* tagp)
320 {
321  union {
322  float val;
323  struct ieee_float {
324  uint32_t mantissa : 22;
325  uint32_t quiet : 1;
326  uint32_t exponent : 8;
327  uint32_t sign : 1;
328  } bits;
329 
330  static_assert(sizeof(float) == sizeof(ieee_float), "");
331  } tmp;
332 
333  tmp.bits.sign = 0u;
334  tmp.bits.exponent = ~0u;
335  tmp.bits.quiet = 1u;
336  tmp.bits.mantissa = __make_mantissa(tagp);
337 
338  return tmp.val;
339 }
340 __DEVICE__
341 inline
342 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
343 __DEVICE__
344 inline
345 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
346 __DEVICE__
347 inline
348 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
349 __DEVICE__
350 inline
351 float norm4df(float x, float y, float z, float w)
352 {
353  return __ocml_len4_f32(x, y, z, w);
354 }
355 __DEVICE__
356 inline
357 float normcdff(float x) { return __ocml_ncdf_f32(x); }
358 __DEVICE__
359 inline
360 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
361 __DEVICE__
362 inline
363 float normf(int dim, const float* a)
364 { // TODO: placeholder until OCML adds support.
365  float r = 0;
366  while (dim--) { r += a[0] * a[0]; ++a; }
367 
368  return __ocml_sqrt_f32(r);
369 }
370 __DEVICE__
371 inline
372 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
373 __DEVICE__
374 inline
375 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
376 __DEVICE__
377 inline
378 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
379 __DEVICE__
380 inline
381 float remquof(float x, float y, int* quo)
382 {
383  int tmp;
384  float r =
385  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
386  *quo = tmp;
387 
388  return r;
389 }
390 __DEVICE__
391 inline
392 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
393 __DEVICE__
394 inline
395 float rintf(float x) { return __ocml_rint_f32(x); }
396 __DEVICE__
397 inline
398 float rnorm3df(float x, float y, float z)
399 {
400  return __ocml_rlen3_f32(x, y, z);
401 }
402 
403 __DEVICE__
404 inline
405 float rnorm4df(float x, float y, float z, float w)
406 {
407  return __ocml_rlen4_f32(x, y, z, w);
408 }
409 __DEVICE__
410 inline
411 float rnormf(int dim, const float* a)
412 { // TODO: placeholder until OCML adds support.
413  float r = 0;
414  while (dim--) { r += a[0] * a[0]; ++a; }
415 
416  return __ocml_rsqrt_f32(r);
417 }
418 __DEVICE__
419 inline
420 float roundf(float x) { return __ocml_round_f32(x); }
421 __DEVICE__
422 inline
423 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
424 __DEVICE__
425 inline
426 float scalblnf(float x, long int n)
427 {
428  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
429 }
430 __DEVICE__
431 inline
432 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
433 __DEVICE__
434 inline
435 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
436 __DEVICE__
437 inline
438 void sincosf(float x, float* sptr, float* cptr)
439 {
440  float tmp;
441 
442  *sptr =
443  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
444  *cptr = tmp;
445 }
446 __DEVICE__
447 inline
448 void sincospif(float x, float* sptr, float* cptr)
449 {
450  float tmp;
451 
452  *sptr =
453  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
454  *cptr = tmp;
455 }
456 __DEVICE__
457 inline
458 float sinf(float x) { return __ocml_sin_f32(x); }
459 __DEVICE__
460 inline
461 float sinhf(float x) { return __ocml_sinh_f32(x); }
462 __DEVICE__
463 inline
464 float sinpif(float x) { return __ocml_sinpi_f32(x); }
465 __DEVICE__
466 inline
467 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
468 __DEVICE__
469 inline
470 float tanf(float x) { return __ocml_tan_f32(x); }
471 __DEVICE__
472 inline
473 float tanhf(float x) { return __ocml_tanh_f32(x); }
474 __DEVICE__
475 inline
476 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
477 __DEVICE__
478 inline
479 float truncf(float x) { return __ocml_trunc_f32(x); }
480 __DEVICE__
481 inline
482 float y0f(float x) { return __ocml_y0_f32(x); }
483 __DEVICE__
484 inline
485 float y1f(float x) { return __ocml_y1_f32(x); }
486 __DEVICE__
487 inline
488 float ynf(int n, float x)
489 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
490  // for linear recurrences to get O(log n) steps, but it's unclear if
491  // it'd be beneficial in this case. Placeholder until OCML adds
492  // support.
493  if (n == 0) return y0f(x);
494  if (n == 1) return y1f(x);
495 
496  float x0 = y0f(x);
497  float x1 = y1f(x);
498  for (int i = 1; i < n; ++i) {
499  float x2 = (2 * i) / x * x1 - x0;
500  x0 = x1;
501  x1 = x2;
502  }
503 
504  return x1;
505 }
506 
507 // BEGIN INTRINSICS
508 __DEVICE__
509 inline
510 float __cosf(float x) { return __ocml_native_cos_f32(x); }
511 __DEVICE__
512 inline
513 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
514 __DEVICE__
515 inline
516 float __expf(float x) { return __ocml_native_exp_f32(x); }
517 #if defined OCML_BASIC_ROUNDED_OPERATIONS
518 __DEVICE__
519 inline
520 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
521 #endif
522 __DEVICE__
523 inline
524 float __fadd_rn(float x, float y) { return x + y; }
525 #if defined OCML_BASIC_ROUNDED_OPERATIONS
526 __DEVICE__
527 inline
528 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
529 __DEVICE__
530 inline
531 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
532 __DEVICE__
533 inline
534 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
535 #endif
536 __DEVICE__
537 inline
538 float __fdiv_rn(float x, float y) { return x / y; }
539 #if defined OCML_BASIC_ROUNDED_OPERATIONS
540 __DEVICE__
541 inline
542 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
543 __DEVICE__
544 inline
545 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
546 #endif
547 __DEVICE__
548 inline
549 float __fdividef(float x, float y) { return x / y; }
550 #if defined OCML_BASIC_ROUNDED_OPERATIONS
551 __DEVICE__
552 inline
553 float __fmaf_rd(float x, float y, float z)
554 {
555  return __ocml_fma_rtn_f32(x, y, z);
556 }
557 #endif
558 __DEVICE__
559 inline
560 float __fmaf_rn(float x, float y, float z)
561 {
562  return __ocml_fma_f32(x, y, z);
563 }
564 #if defined OCML_BASIC_ROUNDED_OPERATIONS
565 __DEVICE__
566 inline
567 float __fmaf_ru(float x, float y, float z)
568 {
569  return __ocml_fma_rtp_f32(x, y, z);
570 }
571 __DEVICE__
572 inline
573 float __fmaf_rz(float x, float y, float z)
574 {
575  return __ocml_fma_rtz_f32(x, y, z);
576 }
577 __DEVICE__
578 inline
579 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
580 #endif
581 __DEVICE__
582 inline
583 float __fmul_rn(float x, float y) { return x * y; }
584 #if defined OCML_BASIC_ROUNDED_OPERATIONS
585 __DEVICE__
586 inline
587 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
588 __DEVICE__
589 inline
590 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
591 __DEVICE__
592 inline
593 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
594 #endif
595 __DEVICE__
596 inline
597 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
598 #if defined OCML_BASIC_ROUNDED_OPERATIONS
599 __DEVICE__
600 inline
601 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
602 __DEVICE__
603 inline
604 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
605 #endif
606 __DEVICE__
607 inline
608 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
609 #if defined OCML_BASIC_ROUNDED_OPERATIONS
610 __DEVICE__
611 inline
612 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
613 #endif
614 __DEVICE__
615 inline
616 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
617 #if defined OCML_BASIC_ROUNDED_OPERATIONS
618 __DEVICE__
619 inline
620 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
621 __DEVICE__
622 inline
623 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
624 __DEVICE__
625 inline
626 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
627 #endif
628 __DEVICE__
629 inline
630 float __fsub_rn(float x, float y) { return x - y; }
631 #if defined OCML_BASIC_ROUNDED_OPERATIONS
632 __DEVICE__
633 inline
634 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
635 __DEVICE__
636 inline
637 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
638 #endif
639 __DEVICE__
640 inline
641 float __log10f(float x) { return __ocml_native_log10_f32(x); }
642 __DEVICE__
643 inline
644 float __log2f(float x) { return __ocml_native_log2_f32(x); }
645 __DEVICE__
646 inline
647 float __logf(float x) { return __ocml_native_log_f32(x); }
648 __DEVICE__
649 inline
650 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
651 __DEVICE__
652 inline
653 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
654 __DEVICE__
655 inline
656 void __sincosf(float x, float* sptr, float* cptr)
657 {
658  *sptr = __ocml_native_sin_f32(x);
659  *cptr = __ocml_native_cos_f32(x);
660 }
661 __DEVICE__
662 inline
663 float __sinf(float x) { return __ocml_native_sin_f32(x); }
664 __DEVICE__
665 inline
666 float __tanf(float x) { return __ocml_tan_f32(x); }
667 // END INTRINSICS
668 // END FLOAT
669 
670 // BEGIN DOUBLE
671 __DEVICE__
672 inline
673 double abs(double x) { return __ocml_fabs_f64(x); }
674 __DEVICE__
675 inline
676 double acos(double x) { return __ocml_acos_f64(x); }
677 __DEVICE__
678 inline
679 double acosh(double x) { return __ocml_acosh_f64(x); }
680 __DEVICE__
681 inline
682 double asin(double x) { return __ocml_asin_f64(x); }
683 __DEVICE__
684 inline
685 double asinh(double x) { return __ocml_asinh_f64(x); }
686 __DEVICE__
687 inline
688 double atan(double x) { return __ocml_atan_f64(x); }
689 __DEVICE__
690 inline
691 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
692 __DEVICE__
693 inline
694 double atanh(double x) { return __ocml_atanh_f64(x); }
695 __DEVICE__
696 inline
697 double cbrt(double x) { return __ocml_cbrt_f64(x); }
698 __DEVICE__
699 inline
700 double ceil(double x) { return __ocml_ceil_f64(x); }
701 __DEVICE__
702 inline
703 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
704 __DEVICE__
705 inline
706 double cos(double x) { return __ocml_cos_f64(x); }
707 __DEVICE__
708 inline
709 double cosh(double x) { return __ocml_cosh_f64(x); }
710 __DEVICE__
711 inline
712 double cospi(double x) { return __ocml_cospi_f64(x); }
713 __DEVICE__
714 inline
715 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
716 __DEVICE__
717 inline
718 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
719 __DEVICE__
720 inline
721 double erf(double x) { return __ocml_erf_f64(x); }
722 __DEVICE__
723 inline
724 double erfc(double x) { return __ocml_erfc_f64(x); }
725 __DEVICE__
726 inline
727 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
728 __DEVICE__
729 inline
730 double erfcx(double x) { return __ocml_erfcx_f64(x); }
731 __DEVICE__
732 inline
733 double erfinv(double x) { return __ocml_erfinv_f64(x); }
734 __DEVICE__
735 inline
736 double exp(double x) { return __ocml_exp_f64(x); }
737 __DEVICE__
738 inline
739 double exp10(double x) { return __ocml_exp10_f64(x); }
740 __DEVICE__
741 inline
742 double exp2(double x) { return __ocml_exp2_f64(x); }
743 __DEVICE__
744 inline
745 double expm1(double x) { return __ocml_expm1_f64(x); }
746 __DEVICE__
747 inline
748 double fabs(double x) { return __ocml_fabs_f64(x); }
749 __DEVICE__
750 inline
751 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
752 __DEVICE__
753 inline
754 double floor(double x) { return __ocml_floor_f64(x); }
755 __DEVICE__
756 inline
757 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
758 __DEVICE__
759 inline
760 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
761 __DEVICE__
762 inline
763 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
764 __DEVICE__
765 inline
766 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
767 __DEVICE__
768 inline
769 double frexp(double x, int* nptr)
770 {
771  int tmp;
772  double r =
773  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
774  *nptr = tmp;
775 
776  return r;
777 }
778 __DEVICE__
779 inline
780 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
781 __DEVICE__
782 inline
783 int ilogb(double x) { return __ocml_ilogb_f64(x); }
784 __DEVICE__
785 inline
786 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
787 __DEVICE__
788 inline
789 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
790 __DEVICE__
791 inline
792 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
793 __DEVICE__
794 inline
795 double j0(double x) { return __ocml_j0_f64(x); }
796 __DEVICE__
797 inline
798 double j1(double x) { return __ocml_j1_f64(x); }
799 __DEVICE__
800 inline
801 double jn(int n, double x)
802 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
803  // for linear recurrences to get O(log n) steps, but it's unclear if
804  // it'd be beneficial in this case. Placeholder until OCML adds
805  // support.
806  if (n == 0) return j0f(x);
807  if (n == 1) return j1f(x);
808 
809  double x0 = j0f(x);
810  double x1 = j1f(x);
811  for (int i = 1; i < n; ++i) {
812  double x2 = (2 * i) / x * x1 - x0;
813  x0 = x1;
814  x1 = x2;
815  }
816 
817  return x1;
818 }
819 __DEVICE__
820 inline
821 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
822 __DEVICE__
823 inline
824 double lgamma(double x) { return __ocml_lgamma_f64(x); }
825 __DEVICE__
826 inline
827 long long int llrint(double x) { return __ocml_rint_f64(x); }
828 __DEVICE__
829 inline
830 long long int llround(double x) { return __ocml_round_f64(x); }
831 __DEVICE__
832 inline
833 double log(double x) { return __ocml_log_f64(x); }
834 __DEVICE__
835 inline
836 double log10(double x) { return __ocml_log10_f64(x); }
837 __DEVICE__
838 inline
839 double log1p(double x) { return __ocml_log1p_f64(x); }
840 __DEVICE__
841 inline
842 double log2(double x) { return __ocml_log2_f64(x); }
843 __DEVICE__
844 inline
845 double logb(double x) { return __ocml_logb_f64(x); }
846 __DEVICE__
847 inline
848 long int lrint(double x) { return __ocml_rint_f64(x); }
849 __DEVICE__
850 inline
851 long int lround(double x) { return __ocml_round_f64(x); }
852 __DEVICE__
853 inline
854 double modf(double x, double* iptr)
855 {
856  double tmp;
857  double r =
858  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
859  *iptr = tmp;
860 
861  return r;
862 }
863 __DEVICE__
864 inline
865 double nan(const char* tagp)
866 {
867  union {
868  double val;
869  struct ieee_double {
870  uint64_t mantissa : 51;
871  uint32_t quiet : 1;
872  uint32_t exponent : 11;
873  uint32_t sign : 1;
874  } bits;
875 
876  static_assert(sizeof(double) == sizeof(ieee_double), "");
877  } tmp;
878 
879  tmp.bits.sign = 0u;
880  tmp.bits.exponent = ~0u;
881  tmp.bits.quiet = 1u;
882  tmp.bits.mantissa = __make_mantissa(tagp);
883 
884  return tmp.val;
885 }
886 __DEVICE__
887 inline
888 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
889 __DEVICE__
890 inline
891 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
892 __DEVICE__
893 inline
894 double norm(int dim, const double* a)
895 { // TODO: placeholder until OCML adds support.
896  double r = 0;
897  while (dim--) { r += a[0] * a[0]; ++a; }
898 
899  return __ocml_sqrt_f64(r);
900 }
901 __DEVICE__
902 inline
903 double norm3d(double x, double y, double z)
904 {
905  return __ocml_len3_f64(x, y, z);
906 }
907 __DEVICE__
908 inline
909 double norm4d(double x, double y, double z, double w)
910 {
911  return __ocml_len4_f64(x, y, z, w);
912 }
913 __DEVICE__
914 inline
915 double normcdf(double x) { return __ocml_ncdf_f64(x); }
916 __DEVICE__
917 inline
918 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
919 __DEVICE__
920 inline
921 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
922 __DEVICE__
923 inline
924 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
925 __DEVICE__
926 inline
927 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
928 __DEVICE__
929 inline
930 double remquo(double x, double y, int* quo)
931 {
932  int tmp;
933  double r =
934  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
935  *quo = tmp;
936 
937  return r;
938 }
939 __DEVICE__
940 inline
941 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
942 __DEVICE__
943 inline
944 double rint(double x) { return __ocml_rint_f64(x); }
945 __DEVICE__
946 inline
947 double rnorm(int dim, const double* a)
948 { // TODO: placeholder until OCML adds support.
949  double r = 0;
950  while (dim--) { r += a[0] * a[0]; ++a; }
951 
952  return __ocml_rsqrt_f64(r);
953 }
954 __DEVICE__
955 inline
956 double rnorm3d(double x, double y, double z)
957 {
958  return __ocml_rlen3_f64(x, y, z);
959 }
960 __DEVICE__
961 inline
962 double rnorm4d(double x, double y, double z, double w)
963 {
964  return __ocml_rlen4_f64(x, y, z, w);
965 }
966 __DEVICE__
967 inline
968 double round(double x) { return __ocml_round_f64(x); }
969 __DEVICE__
970 inline
971 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
972 __DEVICE__
973 inline
974 double scalbln(double x, long int n)
975 {
976  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
977 }
978 __DEVICE__
979 inline
980 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
981 __DEVICE__
982 inline
983 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
984 __DEVICE__
985 inline
986 double sin(double x) { return __ocml_sin_f64(x); }
987 __DEVICE__
988 inline
989 void sincos(double x, double* sptr, double* cptr)
990 {
991  double tmp;
992  *sptr =
993  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
994  *cptr = tmp;
995 }
996 __DEVICE__
997 inline
998 void sincospi(double x, double* sptr, double* cptr)
999 {
1000  double tmp;
1001  *sptr = __ocml_sincospi_f64(
1002  x, (__attribute__((address_space(5))) double*) &tmp);
1003  *cptr = tmp;
1004 }
1005 __DEVICE__
1006 inline
1007 double sinh(double x) { return __ocml_sinh_f64(x); }
1008 __DEVICE__
1009 inline
1010 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1011 __DEVICE__
1012 inline
1013 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1014 __DEVICE__
1015 inline
1016 double tan(double x) { return __ocml_tan_f64(x); }
1017 __DEVICE__
1018 inline
1019 double tanh(double x) { return __ocml_tanh_f64(x); }
1020 __DEVICE__
1021 inline
1022 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1023 __DEVICE__
1024 inline
1025 double trunc(double x) { return __ocml_trunc_f64(x); }
1026 __DEVICE__
1027 inline
1028 double y0(double x) { return __ocml_y0_f64(x); }
1029 __DEVICE__
1030 inline
1031 double y1(double x) { return __ocml_y1_f64(x); }
1032 __DEVICE__
1033 inline
1034 double yn(int n, double x)
1035 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1036  // for linear recurrences to get O(log n) steps, but it's unclear if
1037  // it'd be beneficial in this case. Placeholder until OCML adds
1038  // support.
1039  if (n == 0) return j0f(x);
1040  if (n == 1) return j1f(x);
1041 
1042  double x0 = j0f(x);
1043  double x1 = j1f(x);
1044  for (int i = 1; i < n; ++i) {
1045  double x2 = (2 * i) / x * x1 - x0;
1046  x0 = x1;
1047  x1 = x2;
1048  }
1049 
1050  return x1;
1051 }
1052 
1053 // BEGIN INTRINSICS
1054 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1055 __DEVICE__
1056 inline
1057 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1058 #endif
1059 __DEVICE__
1060 inline
1061 double __dadd_rn(double x, double y) { return x + y; }
1062 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1063 __DEVICE__
1064 inline
1065 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1066 __DEVICE__
1067 inline
1068 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1069 __DEVICE__
1070 inline
1071 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1072 #endif
1073 __DEVICE__
1074 inline
1075 double __ddiv_rn(double x, double y) { return x / y; }
1076 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1077 __DEVICE__
1078 inline
1079 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1080 __DEVICE__
1081 inline
1082 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1083 __DEVICE__
1084 inline
1085 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1086 #endif
1087 __DEVICE__
1088 inline
1089 double __dmul_rn(double x, double y) { return x * y; }
1090 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1091 __DEVICE__
1092 inline
1093 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1094 __DEVICE__
1095 inline
1096 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1097 __DEVICE__
1098 inline
1099 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1100 #endif
1101 __DEVICE__
1102 inline
1103 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1104 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1105 __DEVICE__
1106 inline
1107 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1108 __DEVICE__
1109 inline
1110 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1111 __DEVICE__
1112 inline
1113 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1114 #endif
1115 __DEVICE__
1116 inline
1117 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1118 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1119 __DEVICE__
1120 inline
1121 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1122 __DEVICE__
1123 inline
1124 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1125 __DEVICE__
1126 inline
1127 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1128 #endif
1129 __DEVICE__
1130 inline
1131 double __dsub_rn(double x, double y) { return x - y; }
1132 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1133 __DEVICE__
1134 inline
1135 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1136 __DEVICE__
1137 inline
1138 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1139 __DEVICE__
1140 inline
1141 double __fma_rd(double x, double y, double z)
1142 {
1143  return __ocml_fma_rtn_f64(x, y, z);
1144 }
1145 #endif
1146 __DEVICE__
1147 inline
1148 double __fma_rn(double x, double y, double z)
1149 {
1150  return __ocml_fma_f64(x, y, z);
1151 }
1152 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1153 __DEVICE__
1154 inline
1155 double __fma_ru(double x, double y, double z)
1156 {
1157  return __ocml_fma_rtp_f64(x, y, z);
1158 }
1159 __DEVICE__
1160 inline
1161 double __fma_rz(double x, double y, double z)
1162 {
1163  return __ocml_fma_rtz_f64(x, y, z);
1164 }
1165 #endif
1166 // END INTRINSICS
1167 // END DOUBLE
1168 
1169 // BEGIN INTEGER
1170 __DEVICE__
1171 inline
1172 int abs(int x)
1173 {
1174  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1175  return (x ^ sgn) - sgn;
1176 }
1177 __DEVICE__
1178 inline
1179 long labs(long x)
1180 {
1181  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1182  return (x ^ sgn) - sgn;
1183 }
1184 __DEVICE__
1185 inline
1186 long long llabs(long long x)
1187 {
1188  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1189  return (x ^ sgn) - sgn;
1190 }
1191 
1192 #if defined(__cplusplus)
1193  __DEVICE__
1194  inline
1195  long abs(long x) { return labs(x); }
1196  __DEVICE__
1197  inline
1198  long long abs(long long x) { return llabs(x); }
1199 #endif
1200 // END INTEGER
1201 
1202 __DEVICE__
1203 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1204  return __ocml_fma_f16(x, y, z);
1205 }
1206 
1207 __DEVICE__
1208 inline float fma(float x, float y, float z) {
1209  return fmaf(x, y, z);
1210 }
1211 
1212 #pragma push_macro("__DEF_FLOAT_FUN")
1213 #pragma push_macro("__DEF_FLOAT_FUN2")
1214 #pragma push_macro("__DEF_FLOAT_FUN2I")
1215 #pragma push_macro("__HIP_OVERLOAD")
1216 #pragma push_macro("__HIP_OVERLOAD2")
1217 
1218 // __hip_enable_if::type is a type function which returns __T if __B is true.
1219 template<bool __B, class __T = void>
1221 
1222 template <class __T> struct __hip_enable_if<true, __T> {
1223  typedef __T type;
1224 };
1225 
1226 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1227 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1228 // floor(double).
1229 #define __HIP_OVERLOAD1(__retty, __fn) \
1230  template <typename __T> \
1231  __DEVICE__ \
1232  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1233  __retty>::type \
1234  __fn(__T __x) { \
1235  return ::__fn((double)__x); \
1236  }
1237 
1238 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1239 // or integer argument to avoid compilation error due to ambibuity. e.g.
1240 // max(5.0f, 6.0) is resolved with max(double, double).
1241 #define __HIP_OVERLOAD2(__retty, __fn) \
1242  template <typename __T1, typename __T2> \
1243  __DEVICE__ typename __hip_enable_if< \
1244  std::numeric_limits<__T1>::is_specialized && \
1245  std::numeric_limits<__T2>::is_specialized, \
1246  __retty>::type \
1247  __fn(__T1 __x, __T2 __y) { \
1248  return __fn((double)__x, (double)__y); \
1249  }
1250 
1251 // Define cmath functions with float argument and returns float.
1252 #define __DEF_FUN1(retty, func) \
1253 __DEVICE__ \
1254 inline \
1255 float func(float x) \
1256 { \
1257  return func##f(x); \
1258 } \
1259 __HIP_OVERLOAD1(retty, func)
1260 
1261 // Define cmath functions with float argument and returns retty.
1262 #define __DEF_FUNI(retty, func) \
1263 __DEVICE__ \
1264 inline \
1265 retty func(float x) \
1266 { \
1267  return func##f(x); \
1268 } \
1269 __HIP_OVERLOAD1(retty, func)
1270 
1271 // define cmath functions with two float arguments.
1272 #define __DEF_FUN2(retty, func) \
1273 __DEVICE__ \
1274 inline \
1275 float func(float x, float y) \
1276 { \
1277  return func##f(x, y); \
1278 } \
1279 __HIP_OVERLOAD2(retty, func)
1280 
1281 __DEF_FUN1(double, acos)
1282 __DEF_FUN1(double, acosh)
1283 __DEF_FUN1(double, asin)
1284 __DEF_FUN1(double, asinh)
1285 __DEF_FUN1(double, atan)
1286 __DEF_FUN2(double, atan2);
1287 __DEF_FUN1(double, atanh)
1288 __DEF_FUN1(double, cbrt)
1289 __DEF_FUN1(double, ceil)
1290 __DEF_FUN2(double, copysign);
1291 __DEF_FUN1(double, cos)
1292 __DEF_FUN1(double, cosh)
1293 __DEF_FUN1(double, erf)
1294 __DEF_FUN1(double, erfc)
1295 __DEF_FUN1(double, exp)
1296 __DEF_FUN1(double, exp2)
1297 __DEF_FUN1(double, expm1)
1298 __DEF_FUN1(double, fabs)
1299 __DEF_FUN2(double, fdim);
1300 __DEF_FUN1(double, floor)
1301 __DEF_FUN2(double, fmax);
1302 __DEF_FUN2(double, fmin);
1303 __DEF_FUN2(double, fmod);
1304 //__HIP_OVERLOAD1(int, fpclassify)
1305 __DEF_FUN2(double, hypot);
1306 __DEF_FUNI(int, ilogb)
1307 __HIP_OVERLOAD1(bool, isfinite)
1308 __HIP_OVERLOAD2(bool, isgreater);
1309 __HIP_OVERLOAD2(bool, isgreaterequal);
1310 __HIP_OVERLOAD1(bool, isinf);
1311 __HIP_OVERLOAD2(bool, isless);
1312 __HIP_OVERLOAD2(bool, islessequal);
1313 __HIP_OVERLOAD2(bool, islessgreater);
1314 __HIP_OVERLOAD1(bool, isnan);
1315 //__HIP_OVERLOAD1(bool, isnormal)
1316 __HIP_OVERLOAD2(bool, isunordered);
1317 __DEF_FUN1(double, lgamma)
1318 __DEF_FUN1(double, log)
1319 __DEF_FUN1(double, log10)
1320 __DEF_FUN1(double, log1p)
1321 __DEF_FUN1(double, log2)
1322 __DEF_FUN1(double, logb)
1323 __DEF_FUNI(long long, llrint)
1324 __DEF_FUNI(long long, llround)
1325 __DEF_FUNI(long, lrint)
1326 __DEF_FUNI(long, lround)
1327 __DEF_FUN1(double, nearbyint);
1328 __DEF_FUN2(double, nextafter);
1329 __DEF_FUN2(double, pow);
1330 __DEF_FUN2(double, remainder);
1331 __DEF_FUN1(double, rint);
1332 __DEF_FUN1(double, round);
1333 __HIP_OVERLOAD1(bool, signbit)
1334 __DEF_FUN1(double, sin)
1335 __DEF_FUN1(double, sinh)
1336 __DEF_FUN1(double, sqrt)
1337 __DEF_FUN1(double, tan)
1338 __DEF_FUN1(double, tanh)
1339 __DEF_FUN1(double, tgamma)
1340 __DEF_FUN1(double, trunc);
1341 
1342 // define cmath functions with a float and an integer argument.
1343 #define __DEF_FLOAT_FUN2I(func) \
1344 __DEVICE__ \
1345 inline \
1346 float func(float x, int y) \
1347 { \
1348  return func##f(x, y); \
1349 }
1350 __DEF_FLOAT_FUN2I(scalbn)
1351 
1352 #if __HCC__
1353 template<class T>
1354 __DEVICE__ inline static T min(T arg1, T arg2) {
1355  return (arg1 < arg2) ? arg1 : arg2;
1356 }
1357 
1358 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1359  return min(arg1, (uint32_t) arg2);
1360 }
1361 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1362  return min((uint32_t) arg1, arg2);
1363 }
1364 
1365 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1366  return min(arg1, (uint64_t) arg2);
1367 }
1368 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1369  return min((uint64_t) arg1, arg2);
1370 }
1371 
1372 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1373  return min(arg1, (unsigned long long) arg2);
1374 }
1375 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1376  return min((unsigned long long) arg1, arg2);
1377 }*/
1378 
1379 template<class T>
1380 __DEVICE__ inline static T max(T arg1, T arg2) {
1381  return (arg1 > arg2) ? arg1 : arg2;
1382 }
1383 
1384 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1385  return max(arg1, (uint32_t) arg2);
1386 }
1387 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1388  return max((uint32_t) arg1, arg2);
1389 }
1390 
1391 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1392  return max(arg1, (uint64_t) arg2);
1393 }
1394 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1395  return max((uint64_t) arg1, arg2);
1396 }
1397 
1398 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1399  return max(arg1, (unsigned long long) arg2);
1400 }
1401 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1402  return max((unsigned long long) arg1, arg2);
1403 }*/
1404 #else
1405 __DEVICE__ inline int min(int arg1, int arg2) {
1406  return (arg1 < arg2) ? arg1 : arg2;
1407 }
1408 __DEVICE__ inline int max(int arg1, int arg2) {
1409  return (arg1 > arg2) ? arg1 : arg2;
1410 }
1411 
1412 __DEVICE__
1413 inline
1414 float max(float x, float y) {
1415  return fmaxf(x, y);
1416 }
1417 
1418 __DEVICE__
1419 inline
1420 double max(double x, double y) {
1421  return fmax(x, y);
1422 }
1423 
1424 __DEVICE__
1425 inline
1426 float min(float x, float y) {
1427  return fminf(x, y);
1428 }
1429 
1430 __DEVICE__
1431 inline
1432 double min(double x, double y) {
1433  return fmin(x, y);
1434 }
1435 
1436 __HIP_OVERLOAD2(double, max)
1437 __HIP_OVERLOAD2(double, min)
1438 
1439 #endif
1440 
1441 __host__ inline static int min(int arg1, int arg2) {
1442  return std::min(arg1, arg2);
1443 }
1444 
1445 __host__ inline static int max(int arg1, int arg2) {
1446  return std::max(arg1, arg2);
1447 }
1448 
1449 
1450 #pragma pop_macro("__DEF_FLOAT_FUN")
1451 #pragma pop_macro("__DEF_FLOAT_FUN2")
1452 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1453 #pragma pop_macro("__HIP_OVERLOAD")
1454 #pragma pop_macro("__HIP_OVERLOAD2")
1455 #pragma pop_macro("__DEVICE__")
1456 #pragma pop_macro("__RETURN_TYPE")
1457 
1458 // For backward compatibility.
1459 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1460 // defined after including math_functions.h.
TODO-doc.
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Definition: math_functions.h:1220