HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include <assert.h>
26 #include <limits.h>
27 #include <limits>
28 #include <stdint.h>
29 #include <algorithm>
30 
32 
33 #include "hip_fp16_math_fwd.h"
34 #include "math_fwd.h"
35 
36 // HCC's own math functions should be included first, otherwise there will
37 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
38 #ifdef __HCC__
39 #include "kalmar_math.h"
40 #endif
41 
42 #pragma push_macro("__DEVICE__")
43 #pragma push_macro("__RETURN_TYPE")
44 
45 #ifdef __HCC__
46 #define __DEVICE__ __device__
47 #define __RETURN_TYPE int
48 #else // to be consistent with __clang_cuda_math_forward_declares
49 #define __DEVICE__ static __device__
50 #define __RETURN_TYPE bool
51 #endif
52 
53 __DEVICE__
54 inline
55 uint64_t __make_mantissa_base8(const char* tagp)
56 {
57  uint64_t r = 0;
58  while (tagp) {
59  char tmp = *tagp;
60 
61  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
62  else return 0;
63 
64  ++tagp;
65  }
66 
67  return r;
68 }
69 
70 __DEVICE__
71 inline
72 uint64_t __make_mantissa_base10(const char* tagp)
73 {
74  uint64_t r = 0;
75  while (tagp) {
76  char tmp = *tagp;
77 
78  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
79  else return 0;
80 
81  ++tagp;
82  }
83 
84  return r;
85 }
86 
87 __DEVICE__
88 inline
89 uint64_t __make_mantissa_base16(const char* tagp)
90 {
91  uint64_t r = 0;
92  while (tagp) {
93  char tmp = *tagp;
94 
95  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
96  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
97  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
98  else return 0;
99 
100  ++tagp;
101  }
102 
103  return r;
104 }
105 
106 __DEVICE__
107 inline
108 uint64_t __make_mantissa(const char* tagp)
109 {
110  if (!tagp) return 0u;
111 
112  if (*tagp == '0') {
113  ++tagp;
114 
115  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
116  else return __make_mantissa_base8(tagp);
117  }
118 
119  return __make_mantissa_base10(tagp);
120 }
121 
122 // BEGIN FLOAT
123 __DEVICE__
124 inline
125 float abs(float x) { return __ocml_fabs_f32(x); }
126 __DEVICE__
127 inline
128 float acosf(float x) { return __ocml_acos_f32(x); }
129 __DEVICE__
130 inline
131 float acoshf(float x) { return __ocml_acosh_f32(x); }
132 __DEVICE__
133 inline
134 float asinf(float x) { return __ocml_asin_f32(x); }
135 __DEVICE__
136 inline
137 float asinhf(float x) { return __ocml_asinh_f32(x); }
138 __DEVICE__
139 inline
140 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
141 __DEVICE__
142 inline
143 float atanf(float x) { return __ocml_atan_f32(x); }
144 __DEVICE__
145 inline
146 float atanhf(float x) { return __ocml_atanh_f32(x); }
147 __DEVICE__
148 inline
149 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
150 __DEVICE__
151 inline
152 float ceilf(float x) { return __ocml_ceil_f32(x); }
153 __DEVICE__
154 inline
155 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
156 __DEVICE__
157 inline
158 float cosf(float x) { return __ocml_cos_f32(x); }
159 __DEVICE__
160 inline
161 float coshf(float x) { return __ocml_cosh_f32(x); }
162 __DEVICE__
163 inline
164 float cospif(float x) { return __ocml_cospi_f32(x); }
165 __DEVICE__
166 inline
167 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
168 __DEVICE__
169 inline
170 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
171 __DEVICE__
172 inline
173 float erfcf(float x) { return __ocml_erfc_f32(x); }
174 __DEVICE__
175 inline
176 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
177 __DEVICE__
178 inline
179 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
180 __DEVICE__
181 inline
182 float erff(float x) { return __ocml_erf_f32(x); }
183 __DEVICE__
184 inline
185 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
186 __DEVICE__
187 inline
188 float exp10f(float x) { return __ocml_exp10_f32(x); }
189 __DEVICE__
190 inline
191 float exp2f(float x) { return __ocml_exp2_f32(x); }
192 __DEVICE__
193 inline
194 float expf(float x) { return __ocml_exp_f32(x); }
195 __DEVICE__
196 inline
197 float expm1f(float x) { return __ocml_expm1_f32(x); }
198 __DEVICE__
199 inline
200 float fabsf(float x) { return __ocml_fabs_f32(x); }
201 __DEVICE__
202 inline
203 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
204 __DEVICE__
205 inline
206 float fdividef(float x, float y) { return x / y; }
207 __DEVICE__
208 inline
209 float floorf(float x) { return __ocml_floor_f32(x); }
210 __DEVICE__
211 inline
212 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
213 __DEVICE__
214 inline
215 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
216 __DEVICE__
217 inline
218 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
219 __DEVICE__
220 inline
221 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
222 __DEVICE__
223 inline
224 float frexpf(float x, int* nptr)
225 {
226  int tmp;
227  float r =
228  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
229  *nptr = tmp;
230 
231  return r;
232 }
233 __DEVICE__
234 inline
235 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
236 __DEVICE__
237 inline
238 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
239 __DEVICE__
240 inline
241 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
242 __DEVICE__
243 inline
244 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
245 __DEVICE__
246 inline
247 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
248 __DEVICE__
249 inline
250 float j0f(float x) { return __ocml_j0_f32(x); }
251 __DEVICE__
252 inline
253 float j1f(float x) { return __ocml_j1_f32(x); }
254 __DEVICE__
255 inline
256 float jnf(int n, float x)
257 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
258  // for linear recurrences to get O(log n) steps, but it's unclear if
259  // it'd be beneficial in this case.
260  if (n == 0) return j0f(x);
261  if (n == 1) return j1f(x);
262 
263  float x0 = j0f(x);
264  float x1 = j1f(x);
265  for (int i = 1; i < n; ++i) {
266  float x2 = (2 * i) / x * x1 - x0;
267  x0 = x1;
268  x1 = x2;
269  }
270 
271  return x1;
272 }
273 __DEVICE__
274 inline
275 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
276 __DEVICE__
277 inline
278 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
279 __DEVICE__
280 inline
281 long long int llrintf(float x) { return __ocml_rint_f32(x); }
282 __DEVICE__
283 inline
284 long long int llroundf(float x) { return __ocml_round_f32(x); }
285 __DEVICE__
286 inline
287 float log10f(float x) { return __ocml_log10_f32(x); }
288 __DEVICE__
289 inline
290 float log1pf(float x) { return __ocml_log1p_f32(x); }
291 __DEVICE__
292 inline
293 float log2f(float x) { return __ocml_log2_f32(x); }
294 __DEVICE__
295 inline
296 float logbf(float x) { return __ocml_logb_f32(x); }
297 __DEVICE__
298 inline
299 float logf(float x) { return __ocml_log_f32(x); }
300 __DEVICE__
301 inline
302 long int lrintf(float x) { return __ocml_rint_f32(x); }
303 __DEVICE__
304 inline
305 long int lroundf(float x) { return __ocml_round_f32(x); }
306 __DEVICE__
307 inline
308 float modff(float x, float* iptr)
309 {
310  float tmp;
311  float r =
312  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
313  *iptr = tmp;
314 
315  return r;
316 }
317 __DEVICE__
318 inline
319 float nanf(const char* tagp)
320 {
321  union {
322  float val;
323  struct ieee_float {
324  uint32_t mantissa : 22;
325  uint32_t quiet : 1;
326  uint32_t exponent : 8;
327  uint32_t sign : 1;
328  } bits;
329 
330  static_assert(sizeof(float) == sizeof(ieee_float), "");
331  } tmp;
332 
333  tmp.bits.sign = 0u;
334  tmp.bits.exponent = ~0u;
335  tmp.bits.quiet = 1u;
336  tmp.bits.mantissa = __make_mantissa(tagp);
337 
338  return tmp.val;
339 }
340 __DEVICE__
341 inline
342 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
343 __DEVICE__
344 inline
345 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
346 __DEVICE__
347 inline
348 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
349 __DEVICE__
350 inline
351 float norm4df(float x, float y, float z, float w)
352 {
353  return __ocml_len4_f32(x, y, z, w);
354 }
355 __DEVICE__
356 inline
357 float normcdff(float x) { return __ocml_ncdf_f32(x); }
358 __DEVICE__
359 inline
360 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
361 __DEVICE__
362 inline
363 float normf(int dim, const float* a)
364 { // TODO: placeholder until OCML adds support.
365  float r = 0;
366  while (dim--) { r += a[0] * a[0]; ++a; }
367 
368  return __ocml_sqrt_f32(r);
369 }
370 __DEVICE__
371 inline
372 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
373 __DEVICE__
374 inline
375 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
376 __DEVICE__
377 inline
378 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
379 __DEVICE__
380 inline
381 float remquof(float x, float y, int* quo)
382 {
383  int tmp;
384  float r =
385  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
386  *quo = tmp;
387 
388  return r;
389 }
390 __DEVICE__
391 inline
392 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
393 __DEVICE__
394 inline
395 float rintf(float x) { return __ocml_rint_f32(x); }
396 __DEVICE__
397 inline
398 float rnorm3df(float x, float y, float z)
399 {
400  return __ocml_rlen3_f32(x, y, z);
401 }
402 
403 __DEVICE__
404 inline
405 float rnorm4df(float x, float y, float z, float w)
406 {
407  return __ocml_rlen4_f32(x, y, z, w);
408 }
409 __DEVICE__
410 inline
411 float rnormf(int dim, const float* a)
412 { // TODO: placeholder until OCML adds support.
413  float r = 0;
414  while (dim--) { r += a[0] * a[0]; ++a; }
415 
416  return __ocml_rsqrt_f32(r);
417 }
418 __DEVICE__
419 inline
420 float roundf(float x) { return __ocml_round_f32(x); }
421 __DEVICE__
422 inline
423 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
424 __DEVICE__
425 inline
426 float scalblnf(float x, long int n)
427 {
428  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
429 }
430 __DEVICE__
431 inline
432 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
433 __DEVICE__
434 inline
435 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
436 __DEVICE__
437 inline
438 void sincosf(float x, float* sptr, float* cptr)
439 {
440  float tmp;
441 
442  *sptr =
443  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
444  *cptr = tmp;
445 }
446 __DEVICE__
447 inline
448 void sincospif(float x, float* sptr, float* cptr)
449 {
450  float tmp;
451 
452  *sptr =
453  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
454  *cptr = tmp;
455 }
456 __DEVICE__
457 inline
458 float sinf(float x) { return __ocml_sin_f32(x); }
459 __DEVICE__
460 inline
461 float sinhf(float x) { return __ocml_sinh_f32(x); }
462 __DEVICE__
463 inline
464 float sinpif(float x) { return __ocml_sinpi_f32(x); }
465 __DEVICE__
466 inline
467 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
468 __DEVICE__
469 inline
470 float tanf(float x) { return __ocml_tan_f32(x); }
471 __DEVICE__
472 inline
473 float tanhf(float x) { return __ocml_tanh_f32(x); }
474 __DEVICE__
475 inline
476 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
477 __DEVICE__
478 inline
479 float truncf(float x) { return __ocml_trunc_f32(x); }
480 __DEVICE__
481 inline
482 float y0f(float x) { return __ocml_y0_f32(x); }
483 __DEVICE__
484 inline
485 float y1f(float x) { return __ocml_y1_f32(x); }
486 __DEVICE__
487 inline
488 float ynf(int n, float x)
489 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
490  // for linear recurrences to get O(log n) steps, but it's unclear if
491  // it'd be beneficial in this case. Placeholder until OCML adds
492  // support.
493  if (n == 0) return y0f(x);
494  if (n == 1) return y1f(x);
495 
496  float x0 = y0f(x);
497  float x1 = y1f(x);
498  for (int i = 1; i < n; ++i) {
499  float x2 = (2 * i) / x * x1 - x0;
500  x0 = x1;
501  x1 = x2;
502  }
503 
504  return x1;
505 }
506 
507 // BEGIN INTRINSICS
508 __DEVICE__
509 inline
510 float __cosf(float x) { return __llvm_amdgcn_cos_f32(x); }
511 __DEVICE__
512 inline
513 float __exp10f(float x) { return __ocml_exp10_f32(x); }
514 __DEVICE__
515 inline
516 float __expf(float x) { return __ocml_exp_f32(x); }
517 __DEVICE__
518 inline
519 float __fadd_rd(float x, float y) { return __ocml_add_rtp_f32(x, y); }
520 __DEVICE__
521 inline
522 float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); }
523 __DEVICE__
524 inline
525 float __fadd_ru(float x, float y) { return __ocml_add_rtn_f32(x, y); }
526 __DEVICE__
527 inline
528 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
529 __DEVICE__
530 inline
531 float __fdiv_rd(float x, float y) { return x / y; }
532 __DEVICE__
533 inline
534 float __fdiv_rn(float x, float y) { return x / y; }
535 __DEVICE__
536 inline
537 float __fdiv_ru(float x, float y) { return x / y; }
538 __DEVICE__
539 inline
540 float __fdiv_rz(float x, float y) { return x / y; }
541 __DEVICE__
542 inline
543 float __fdividef(float x, float y) { return x / y; }
544 __DEVICE__
545 inline
546 float __fmaf_rd(float x, float y, float z)
547 {
548  return __ocml_fma_rtp_f32(x, y, z);
549 }
550 __DEVICE__
551 inline
552 float __fmaf_rn(float x, float y, float z)
553 {
554  return __ocml_fma_rte_f32(x, y, z);
555 }
556 __DEVICE__
557 inline
558 float __fmaf_ru(float x, float y, float z)
559 {
560  return __ocml_fma_rtn_f32(x, y, z);
561 }
562 __DEVICE__
563 inline
564 float __fmaf_rz(float x, float y, float z)
565 {
566  return __ocml_fma_rtz_f32(x, y, z);
567 }
568 __DEVICE__
569 inline
570 float __fmul_rd(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
571 __DEVICE__
572 inline
573 float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); }
574 __DEVICE__
575 inline
576 float __fmul_ru(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
577 __DEVICE__
578 inline
579 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
580 __DEVICE__
581 inline
582 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
583 __DEVICE__
584 inline
585 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
586 __DEVICE__
587 inline
588 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
589 __DEVICE__
590 inline
591 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
592 __DEVICE__
593 inline
594 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
595 __DEVICE__
596 inline
597 float __fsqrt_rd(float x) { return __ocml_sqrt_rtp_f32(x); }
598 __DEVICE__
599 inline
600 float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); }
601 __DEVICE__
602 inline
603 float __fsqrt_ru(float x) { return __ocml_sqrt_rtn_f32(x); }
604 __DEVICE__
605 inline
606 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
607 __DEVICE__
608 inline
609 float __fsub_rd(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
610 __DEVICE__
611 inline
612 float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); }
613 __DEVICE__
614 inline
615 float __fsub_ru(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
616 __DEVICE__
617 inline
618 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
619 __DEVICE__
620 inline
621 float __log10f(float x) { return __ocml_log10_f32(x); }
622 __DEVICE__
623 inline
624 float __log2f(float x) { return __ocml_log2_f32(x); }
625 __DEVICE__
626 inline
627 float __logf(float x) { return __ocml_log_f32(x); }
628 __DEVICE__
629 inline
630 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
631 __DEVICE__
632 inline
633 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
634 __DEVICE__
635 inline
636 void __sincosf(float x, float* sptr, float* cptr)
637 {
638  float tmp;
639 
640  *sptr =
641  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
642  *cptr = tmp;
643 }
644 __DEVICE__
645 inline
646 float __sinf(float x) { return __llvm_amdgcn_sin_f32(x); }
647 __DEVICE__
648 inline
649 float __tanf(float x) { return __ocml_tan_f32(x); }
650 // END INTRINSICS
651 // END FLOAT
652 
653 // BEGIN DOUBLE
654 __DEVICE__
655 inline
656 double abs(double x) { return __ocml_fabs_f64(x); }
657 __DEVICE__
658 inline
659 double acos(double x) { return __ocml_acos_f64(x); }
660 __DEVICE__
661 inline
662 double acosh(double x) { return __ocml_acosh_f64(x); }
663 __DEVICE__
664 inline
665 double asin(double x) { return __ocml_asin_f64(x); }
666 __DEVICE__
667 inline
668 double asinh(double x) { return __ocml_asinh_f64(x); }
669 __DEVICE__
670 inline
671 double atan(double x) { return __ocml_atan_f64(x); }
672 __DEVICE__
673 inline
674 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
675 __DEVICE__
676 inline
677 double atanh(double x) { return __ocml_atanh_f64(x); }
678 __DEVICE__
679 inline
680 double cbrt(double x) { return __ocml_cbrt_f64(x); }
681 __DEVICE__
682 inline
683 double ceil(double x) { return __ocml_ceil_f64(x); }
684 __DEVICE__
685 inline
686 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
687 __DEVICE__
688 inline
689 double cos(double x) { return __ocml_cos_f64(x); }
690 __DEVICE__
691 inline
692 double cosh(double x) { return __ocml_cosh_f64(x); }
693 __DEVICE__
694 inline
695 double cospi(double x) { return __ocml_cospi_f64(x); }
696 __DEVICE__
697 inline
698 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
699 __DEVICE__
700 inline
701 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
702 __DEVICE__
703 inline
704 double erf(double x) { return __ocml_erf_f64(x); }
705 __DEVICE__
706 inline
707 double erfc(double x) { return __ocml_erfc_f64(x); }
708 __DEVICE__
709 inline
710 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
711 __DEVICE__
712 inline
713 double erfcx(double x) { return __ocml_erfcx_f64(x); }
714 __DEVICE__
715 inline
716 double erfinv(double x) { return __ocml_erfinv_f64(x); }
717 __DEVICE__
718 inline
719 double exp(double x) { return __ocml_exp_f64(x); }
720 __DEVICE__
721 inline
722 double exp10(double x) { return __ocml_exp10_f64(x); }
723 __DEVICE__
724 inline
725 double exp2(double x) { return __ocml_exp2_f64(x); }
726 __DEVICE__
727 inline
728 double expm1(double x) { return __ocml_expm1_f64(x); }
729 __DEVICE__
730 inline
731 double fabs(double x) { return __ocml_fabs_f64(x); }
732 __DEVICE__
733 inline
734 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
735 __DEVICE__
736 inline
737 double floor(double x) { return __ocml_floor_f64(x); }
738 __DEVICE__
739 inline
740 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
741 __DEVICE__
742 inline
743 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
744 __DEVICE__
745 inline
746 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
747 __DEVICE__
748 inline
749 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
750 __DEVICE__
751 inline
752 double frexp(double x, int* nptr)
753 {
754  int tmp;
755  double r =
756  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
757  *nptr = tmp;
758 
759  return r;
760 }
761 __DEVICE__
762 inline
763 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
764 __DEVICE__
765 inline
766 int ilogb(double x) { return __ocml_ilogb_f64(x); }
767 __DEVICE__
768 inline
769 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
770 __DEVICE__
771 inline
772 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
773 __DEVICE__
774 inline
775 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
776 __DEVICE__
777 inline
778 double j0(double x) { return __ocml_j0_f64(x); }
779 __DEVICE__
780 inline
781 double j1(double x) { return __ocml_j1_f64(x); }
782 __DEVICE__
783 inline
784 double jn(int n, double x)
785 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
786  // for linear recurrences to get O(log n) steps, but it's unclear if
787  // it'd be beneficial in this case. Placeholder until OCML adds
788  // support.
789  if (n == 0) return j0f(x);
790  if (n == 1) return j1f(x);
791 
792  double x0 = j0f(x);
793  double x1 = j1f(x);
794  for (int i = 1; i < n; ++i) {
795  double x2 = (2 * i) / x * x1 - x0;
796  x0 = x1;
797  x1 = x2;
798  }
799 
800  return x1;
801 }
802 __DEVICE__
803 inline
804 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
805 __DEVICE__
806 inline
807 double lgamma(double x) { return __ocml_lgamma_f64(x); }
808 __DEVICE__
809 inline
810 long long int llrint(double x) { return __ocml_rint_f64(x); }
811 __DEVICE__
812 inline
813 long long int llround(double x) { return __ocml_round_f64(x); }
814 __DEVICE__
815 inline
816 double log(double x) { return __ocml_log_f64(x); }
817 __DEVICE__
818 inline
819 double log10(double x) { return __ocml_log10_f64(x); }
820 __DEVICE__
821 inline
822 double log1p(double x) { return __ocml_log1p_f64(x); }
823 __DEVICE__
824 inline
825 double log2(double x) { return __ocml_log2_f64(x); }
826 __DEVICE__
827 inline
828 double logb(double x) { return __ocml_logb_f64(x); }
829 __DEVICE__
830 inline
831 long int lrint(double x) { return __ocml_rint_f64(x); }
832 __DEVICE__
833 inline
834 long int lround(double x) { return __ocml_round_f64(x); }
835 __DEVICE__
836 inline
837 double modf(double x, double* iptr)
838 {
839  double tmp;
840  double r =
841  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
842  *iptr = tmp;
843 
844  return r;
845 }
846 __DEVICE__
847 inline
848 double nan(const char* tagp)
849 {
850  union {
851  double val;
852  struct ieee_double {
853  uint64_t mantissa : 51;
854  uint32_t quiet : 1;
855  uint32_t exponent : 11;
856  uint32_t sign : 1;
857  } bits;
858 
859  static_assert(sizeof(double) == sizeof(ieee_double), "");
860  } tmp;
861 
862  tmp.bits.sign = 0u;
863  tmp.bits.exponent = ~0u;
864  tmp.bits.quiet = 1u;
865  tmp.bits.mantissa = __make_mantissa(tagp);
866 
867  return tmp.val;
868 }
869 __DEVICE__
870 inline
871 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
872 __DEVICE__
873 inline
874 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
875 __DEVICE__
876 inline
877 double norm(int dim, const double* a)
878 { // TODO: placeholder until OCML adds support.
879  double r = 0;
880  while (dim--) { r += a[0] * a[0]; ++a; }
881 
882  return __ocml_sqrt_f64(r);
883 }
884 __DEVICE__
885 inline
886 double norm3d(double x, double y, double z)
887 {
888  return __ocml_len3_f64(x, y, z);
889 }
890 __DEVICE__
891 inline
892 double norm4d(double x, double y, double z, double w)
893 {
894  return __ocml_len4_f64(x, y, z, w);
895 }
896 __DEVICE__
897 inline
898 double normcdf(double x) { return __ocml_ncdf_f64(x); }
899 __DEVICE__
900 inline
901 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
902 __DEVICE__
903 inline
904 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
905 __DEVICE__
906 inline
907 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
908 __DEVICE__
909 inline
910 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
911 __DEVICE__
912 inline
913 double remquo(double x, double y, int* quo)
914 {
915  int tmp;
916  double r =
917  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
918  *quo = tmp;
919 
920  return r;
921 }
922 __DEVICE__
923 inline
924 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
925 __DEVICE__
926 inline
927 double rint(double x) { return __ocml_rint_f64(x); }
928 __DEVICE__
929 inline
930 double rnorm(int dim, const double* a)
931 { // TODO: placeholder until OCML adds support.
932  double r = 0;
933  while (dim--) { r += a[0] * a[0]; ++a; }
934 
935  return __ocml_rsqrt_f64(r);
936 }
937 __DEVICE__
938 inline
939 double rnorm3d(double x, double y, double z)
940 {
941  return __ocml_rlen3_f64(x, y, z);
942 }
943 __DEVICE__
944 inline
945 double rnorm4d(double x, double y, double z, double w)
946 {
947  return __ocml_rlen4_f64(x, y, z, w);
948 }
949 __DEVICE__
950 inline
951 double round(double x) { return __ocml_round_f64(x); }
952 __DEVICE__
953 inline
954 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
955 __DEVICE__
956 inline
957 double scalbln(double x, long int n)
958 {
959  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
960 }
961 __DEVICE__
962 inline
963 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
964 __DEVICE__
965 inline
966 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
967 __DEVICE__
968 inline
969 double sin(double x) { return __ocml_sin_f64(x); }
970 __DEVICE__
971 inline
972 void sincos(double x, double* sptr, double* cptr)
973 {
974  double tmp;
975  *sptr =
976  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
977  *cptr = tmp;
978 }
979 __DEVICE__
980 inline
981 void sincospi(double x, double* sptr, double* cptr)
982 {
983  double tmp;
984  *sptr = __ocml_sincospi_f64(
985  x, (__attribute__((address_space(5))) double*) &tmp);
986  *cptr = tmp;
987 }
988 __DEVICE__
989 inline
990 double sinh(double x) { return __ocml_sinh_f64(x); }
991 __DEVICE__
992 inline
993 double sinpi(double x) { return __ocml_sinpi_f64(x); }
994 __DEVICE__
995 inline
996 double sqrt(double x) { return __ocml_sqrt_f64(x); }
997 __DEVICE__
998 inline
999 double tan(double x) { return __ocml_tan_f64(x); }
1000 __DEVICE__
1001 inline
1002 double tanh(double x) { return __ocml_tanh_f64(x); }
1003 __DEVICE__
1004 inline
1005 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1006 __DEVICE__
1007 inline
1008 double trunc(double x) { return __ocml_trunc_f64(x); }
1009 __DEVICE__
1010 inline
1011 double y0(double x) { return __ocml_y0_f64(x); }
1012 __DEVICE__
1013 inline
1014 double y1(double x) { return __ocml_y1_f64(x); }
1015 __DEVICE__
1016 inline
1017 double yn(int n, double x)
1018 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1019  // for linear recurrences to get O(log n) steps, but it's unclear if
1020  // it'd be beneficial in this case. Placeholder until OCML adds
1021  // support.
1022  if (n == 0) return j0f(x);
1023  if (n == 1) return j1f(x);
1024 
1025  double x0 = j0f(x);
1026  double x1 = j1f(x);
1027  for (int i = 1; i < n; ++i) {
1028  double x2 = (2 * i) / x * x1 - x0;
1029  x0 = x1;
1030  x1 = x2;
1031  }
1032 
1033  return x1;
1034 }
1035 
1036 // BEGIN INTRINSICS
1037 __DEVICE__
1038 inline
1039 double __dadd_rd(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1040 __DEVICE__
1041 inline
1042 double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); }
1043 __DEVICE__
1044 inline
1045 double __dadd_ru(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1046 __DEVICE__
1047 inline
1048 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1049 __DEVICE__
1050 inline
1051 double __ddiv_rd(double x, double y) { return x / y; }
1052 __DEVICE__
1053 inline
1054 double __ddiv_rn(double x, double y) { return x / y; }
1055 __DEVICE__
1056 inline
1057 double __ddiv_ru(double x, double y) { return x / y; }
1058 __DEVICE__
1059 inline
1060 double __ddiv_rz(double x, double y) { return x / y; }
1061 __DEVICE__
1062 inline
1063 double __dmul_rd(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1064 __DEVICE__
1065 inline
1066 double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); }
1067 __DEVICE__
1068 inline
1069 double __dmul_ru(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1070 __DEVICE__
1071 inline
1072 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1073 __DEVICE__
1074 inline
1075 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1076 __DEVICE__
1077 inline
1078 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1079 __DEVICE__
1080 inline
1081 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1082 __DEVICE__
1083 inline
1084 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1085 __DEVICE__
1086 inline
1087 double __dsqrt_rd(double x) { return __ocml_sqrt_rtp_f64(x); }
1088 __DEVICE__
1089 inline
1090 double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); }
1091 __DEVICE__
1092 inline
1093 double __dsqrt_ru(double x) { return __ocml_sqrt_rtn_f64(x); }
1094 __DEVICE__
1095 inline
1096 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1097 __DEVICE__
1098 inline
1099 double __dsub_rd(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1100 __DEVICE__
1101 inline
1102 double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); }
1103 __DEVICE__
1104 inline
1105 double __dsub_ru(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1106 __DEVICE__
1107 inline
1108 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1109 __DEVICE__
1110 inline
1111 double __fma_rd(double x, double y, double z)
1112 {
1113  return __ocml_fma_rtp_f64(x, y, z);
1114 }
1115 __DEVICE__
1116 inline
1117 double __fma_rn(double x, double y, double z)
1118 {
1119  return __ocml_fma_rte_f64(x, y, z);
1120 }
1121 __DEVICE__
1122 inline
1123 double __fma_ru(double x, double y, double z)
1124 {
1125  return __ocml_fma_rtn_f64(x, y, z);
1126 }
1127 __DEVICE__
1128 inline
1129 double __fma_rz(double x, double y, double z)
1130 {
1131  return __ocml_fma_rtz_f64(x, y, z);
1132 }
1133 // END INTRINSICS
1134 // END DOUBLE
1135 
1136 // BEGIN INTEGER
1137 __DEVICE__
1138 inline
1139 int abs(int x)
1140 {
1141  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1142  return (x ^ sgn) - sgn;
1143 }
1144 __DEVICE__
1145 inline
1146 long labs(long x)
1147 {
1148  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1149  return (x ^ sgn) - sgn;
1150 }
1151 __DEVICE__
1152 inline
1153 long long llabs(long long x)
1154 {
1155  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1156  return (x ^ sgn) - sgn;
1157 }
1158 
1159 #if defined(__cplusplus)
1160  __DEVICE__
1161  inline
1162  long abs(long x) { return labs(x); }
1163  __DEVICE__
1164  inline
1165  long long abs(long long x) { return llabs(x); }
1166 #endif
1167 // END INTEGER
1168 
1169 __DEVICE__
1170 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1171  return __ocml_fma_f16(x, y, z);
1172 }
1173 
1174 __DEVICE__
1175 inline float fma(float x, float y, float z) {
1176  return fmaf(x, y, z);
1177 }
1178 
1179 #pragma push_macro("__DEF_FLOAT_FUN")
1180 #pragma push_macro("__DEF_FLOAT_FUN2")
1181 #pragma push_macro("__DEF_FLOAT_FUN2I")
1182 #pragma push_macro("__HIP_OVERLOAD")
1183 #pragma push_macro("__HIP_OVERLOAD2")
1184 
1185 // __hip_enable_if::type is a type function which returns __T if __B is true.
1186 template<bool __B, class __T = void>
1188 
1189 template <class __T> struct __hip_enable_if<true, __T> {
1190  typedef __T type;
1191 };
1192 
1193 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1194 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1195 // floor(double).
1196 #define __HIP_OVERLOAD1(__retty, __fn) \
1197  template <typename __T> \
1198  __DEVICE__ \
1199  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1200  __retty>::type \
1201  __fn(__T __x) { \
1202  return ::__fn((double)__x); \
1203  }
1204 
1205 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1206 // or integer argument to avoid compilation error due to ambibuity. e.g.
1207 // max(5.0f, 6.0) is resolved with max(double, double).
1208 #define __HIP_OVERLOAD2(__retty, __fn) \
1209  template <typename __T1, typename __T2> \
1210  __DEVICE__ typename __hip_enable_if< \
1211  std::numeric_limits<__T1>::is_specialized && \
1212  std::numeric_limits<__T2>::is_specialized, \
1213  __retty>::type \
1214  __fn(__T1 __x, __T2 __y) { \
1215  return __fn((double)__x, (double)__y); \
1216  }
1217 
1218 // Define cmath functions with float argument and returns float.
1219 #define __DEF_FUN1(retty, func) \
1220 __DEVICE__ \
1221 inline \
1222 float func(float x) \
1223 { \
1224  return func##f(x); \
1225 } \
1226 __HIP_OVERLOAD1(retty, func)
1227 
1228 // Define cmath functions with float argument and returns retty.
1229 #define __DEF_FUNI(retty, func) \
1230 __DEVICE__ \
1231 inline \
1232 retty func(float x) \
1233 { \
1234  return func##f(x); \
1235 } \
1236 __HIP_OVERLOAD1(retty, func)
1237 
1238 // define cmath functions with two float arguments.
1239 #define __DEF_FUN2(retty, func) \
1240 __DEVICE__ \
1241 inline \
1242 float func(float x, float y) \
1243 { \
1244  return func##f(x, y); \
1245 } \
1246 __HIP_OVERLOAD2(retty, func)
1247 
1248 __DEF_FUN1(double, acos)
1249 __DEF_FUN1(double, acosh)
1250 __DEF_FUN1(double, asin)
1251 __DEF_FUN1(double, asinh)
1252 __DEF_FUN1(double, atan)
1253 __DEF_FUN2(double, atan2);
1254 __DEF_FUN1(double, atanh)
1255 __DEF_FUN1(double, cbrt)
1256 __DEF_FUN1(double, ceil)
1257 __DEF_FUN2(double, copysign);
1258 __DEF_FUN1(double, cos)
1259 __DEF_FUN1(double, cosh)
1260 __DEF_FUN1(double, erf)
1261 __DEF_FUN1(double, erfc)
1262 __DEF_FUN1(double, exp)
1263 __DEF_FUN1(double, exp2)
1264 __DEF_FUN1(double, expm1)
1265 __DEF_FUN1(double, fabs)
1266 __DEF_FUN2(double, fdim);
1267 __DEF_FUN1(double, floor)
1268 __DEF_FUN2(double, fmax);
1269 __DEF_FUN2(double, fmin);
1270 __DEF_FUN2(double, fmod);
1271 //__HIP_OVERLOAD1(int, fpclassify)
1272 __DEF_FUN2(double, hypot);
1273 __DEF_FUNI(int, ilogb)
1274 __HIP_OVERLOAD1(bool, isfinite)
1275 __HIP_OVERLOAD2(bool, isgreater);
1276 __HIP_OVERLOAD2(bool, isgreaterequal);
1277 __HIP_OVERLOAD1(bool, isinf);
1278 __HIP_OVERLOAD2(bool, isless);
1279 __HIP_OVERLOAD2(bool, islessequal);
1280 __HIP_OVERLOAD2(bool, islessgreater);
1281 __HIP_OVERLOAD1(bool, isnan);
1282 //__HIP_OVERLOAD1(bool, isnormal)
1283 __HIP_OVERLOAD2(bool, isunordered);
1284 __DEF_FUN1(double, lgamma)
1285 __DEF_FUN1(double, log)
1286 __DEF_FUN1(double, log10)
1287 __DEF_FUN1(double, log1p)
1288 __DEF_FUN1(double, log2)
1289 __DEF_FUN1(double, logb)
1290 __DEF_FUNI(long long, llrint)
1291 __DEF_FUNI(long long, llround)
1292 __DEF_FUNI(long, lrint)
1293 __DEF_FUNI(long, lround)
1294 __DEF_FUN1(double, nearbyint);
1295 __DEF_FUN2(double, nextafter);
1296 __DEF_FUN2(double, pow);
1297 __DEF_FUN2(double, remainder);
1298 __DEF_FUN1(double, rint);
1299 __DEF_FUN1(double, round);
1300 __HIP_OVERLOAD1(bool, signbit)
1301 __DEF_FUN1(double, sin)
1302 __DEF_FUN1(double, sinh)
1303 __DEF_FUN1(double, sqrt)
1304 __DEF_FUN1(double, tan)
1305 __DEF_FUN1(double, tanh)
1306 __DEF_FUN1(double, tgamma)
1307 __DEF_FUN1(double, trunc);
1308 
1309 // define cmath functions with a float and an integer argument.
1310 #define __DEF_FLOAT_FUN2I(func) \
1311 __DEVICE__ \
1312 inline \
1313 float func(float x, int y) \
1314 { \
1315  return func##f(x, y); \
1316 }
1317 __DEF_FLOAT_FUN2I(scalbn)
1318 
1319 #if __HCC__
1320 template<class T>
1321 __DEVICE__ inline static T min(T arg1, T arg2) {
1322  return (arg1 < arg2) ? arg1 : arg2;
1323 }
1324 
1325 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1326  return min(arg1, (uint32_t) arg2);
1327 }
1328 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1329  return min((uint32_t) arg1, arg2);
1330 }
1331 
1332 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1333  return min(arg1, (uint64_t) arg2);
1334 }
1335 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1336  return min((uint64_t) arg1, arg2);
1337 }
1338 
1339 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1340  return min(arg1, (unsigned long long) arg2);
1341 }
1342 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1343  return min((unsigned long long) arg1, arg2);
1344 }*/
1345 
1346 template<class T>
1347 __DEVICE__ inline static T max(T arg1, T arg2) {
1348  return (arg1 > arg2) ? arg1 : arg2;
1349 }
1350 
1351 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1352  return max(arg1, (uint32_t) arg2);
1353 }
1354 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1355  return max((uint32_t) arg1, arg2);
1356 }
1357 
1358 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1359  return max(arg1, (uint64_t) arg2);
1360 }
1361 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1362  return max((uint64_t) arg1, arg2);
1363 }
1364 
1365 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1366  return max(arg1, (unsigned long long) arg2);
1367 }
1368 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1369  return max((unsigned long long) arg1, arg2);
1370 }*/
1371 #else
1372 __DEVICE__ inline int min(int arg1, int arg2) {
1373  return (arg1 < arg2) ? arg1 : arg2;
1374 }
1375 __DEVICE__ inline int max(int arg1, int arg2) {
1376  return (arg1 > arg2) ? arg1 : arg2;
1377 }
1378 
1379 __DEVICE__
1380 inline
1381 float max(float x, float y) {
1382  return fmaxf(x, y);
1383 }
1384 
1385 __DEVICE__
1386 inline
1387 double max(double x, double y) {
1388  return fmax(x, y);
1389 }
1390 
1391 __DEVICE__
1392 inline
1393 float min(float x, float y) {
1394  return fminf(x, y);
1395 }
1396 
1397 __DEVICE__
1398 inline
1399 double min(double x, double y) {
1400  return fmin(x, y);
1401 }
1402 
1403 __HIP_OVERLOAD2(double, max)
1404 __HIP_OVERLOAD2(double, min)
1405 
1406 #endif
1407 
1408 __host__ inline static int min(int arg1, int arg2) {
1409  return std::min(arg1, arg2);
1410 }
1411 
1412 __host__ inline static int max(int arg1, int arg2) {
1413  return std::max(arg1, arg2);
1414 }
1415 
1416 
1417 #pragma pop_macro("__DEF_FLOAT_FUN")
1418 #pragma pop_macro("__DEF_FLOAT_FUN2")
1419 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1420 #pragma pop_macro("__HIP_OVERLOAD")
1421 #pragma pop_macro("__HIP_OVERLOAD2")
1422 #pragma pop_macro("__DEVICE__")
1423 #pragma pop_macro("__RETURN_TYPE")
1424 
1425 // For backward compatibility.
1426 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1427 // defined after including math_functions.h.
TODO-doc.
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Definition: math_functions.h:1187