HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
27 #include "math_fwd.h"
28 
30 
31 #include <algorithm>
32 #include <assert.h>
33 #include <limits.h>
34 #include <limits>
35 #include <stdint.h>
36 
37 // HCC's own math functions should be included first, otherwise there will
38 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
39 #ifdef __HCC__
40 #include "kalmar_math.h"
41 #endif
42 
43 #pragma push_macro("__DEVICE__")
44 #pragma push_macro("__RETURN_TYPE")
45 
46 #ifdef __HCC__
47 #define __DEVICE__ __device__
48 #define __RETURN_TYPE int
49 #else // to be consistent with __clang_cuda_math_forward_declares
50 #define __DEVICE__ static __device__
51 #define __RETURN_TYPE bool
52 #endif
53 
54 __DEVICE__
55 inline
56 uint64_t __make_mantissa_base8(const char* tagp)
57 {
58  uint64_t r = 0;
59  while (tagp) {
60  char tmp = *tagp;
61 
62  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
63  else return 0;
64 
65  ++tagp;
66  }
67 
68  return r;
69 }
70 
71 __DEVICE__
72 inline
73 uint64_t __make_mantissa_base10(const char* tagp)
74 {
75  uint64_t r = 0;
76  while (tagp) {
77  char tmp = *tagp;
78 
79  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
80  else return 0;
81 
82  ++tagp;
83  }
84 
85  return r;
86 }
87 
88 __DEVICE__
89 inline
90 uint64_t __make_mantissa_base16(const char* tagp)
91 {
92  uint64_t r = 0;
93  while (tagp) {
94  char tmp = *tagp;
95 
96  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
97  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
98  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
99  else return 0;
100 
101  ++tagp;
102  }
103 
104  return r;
105 }
106 
107 __DEVICE__
108 inline
109 uint64_t __make_mantissa(const char* tagp)
110 {
111  if (!tagp) return 0u;
112 
113  if (*tagp == '0') {
114  ++tagp;
115 
116  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
117  else return __make_mantissa_base8(tagp);
118  }
119 
120  return __make_mantissa_base10(tagp);
121 }
122 
123 // DOT FUNCTIONS
124 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
125 __DEVICE__
126 inline
127 int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
128  return __ockl_sdot2(a.data, b.data, c, saturate);
129 }
130 __DEVICE__
131 inline
132 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
133  return __ockl_udot2(a.data, b.data, c, saturate);
134 }
135 __DEVICE__
136 inline
137 int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
138  return __ockl_sdot4(a.data, b.data, c, saturate);
139 }
140 __DEVICE__
141 inline
142 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
143  return __ockl_udot4(a.data, b.data, c, saturate);
144 }
145 __DEVICE__
146 inline
147 int amd_mixed_dot(int a, int b, int c, bool saturate) {
148  return __ockl_sdot8(a, b, c, saturate);
149 }
150 __DEVICE__
151 inline
152 uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
153  return __ockl_udot8(a, b, c, saturate);
154 }
155 #endif
156 
157 // BEGIN FLOAT
158 __DEVICE__
159 inline
160 float abs(float x) { return __ocml_fabs_f32(x); }
161 __DEVICE__
162 inline
163 float acosf(float x) { return __ocml_acos_f32(x); }
164 __DEVICE__
165 inline
166 float acoshf(float x) { return __ocml_acosh_f32(x); }
167 __DEVICE__
168 inline
169 float asinf(float x) { return __ocml_asin_f32(x); }
170 __DEVICE__
171 inline
172 float asinhf(float x) { return __ocml_asinh_f32(x); }
173 __DEVICE__
174 inline
175 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
176 __DEVICE__
177 inline
178 float atanf(float x) { return __ocml_atan_f32(x); }
179 __DEVICE__
180 inline
181 float atanhf(float x) { return __ocml_atanh_f32(x); }
182 __DEVICE__
183 inline
184 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
185 __DEVICE__
186 inline
187 float ceilf(float x) { return __ocml_ceil_f32(x); }
188 __DEVICE__
189 inline
190 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
191 __DEVICE__
192 inline
193 float cosf(float x) { return __ocml_cos_f32(x); }
194 __DEVICE__
195 inline
196 float coshf(float x) { return __ocml_cosh_f32(x); }
197 __DEVICE__
198 inline
199 float cospif(float x) { return __ocml_cospi_f32(x); }
200 __DEVICE__
201 inline
202 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
203 __DEVICE__
204 inline
205 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
206 __DEVICE__
207 inline
208 float erfcf(float x) { return __ocml_erfc_f32(x); }
209 __DEVICE__
210 inline
211 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
212 __DEVICE__
213 inline
214 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
215 __DEVICE__
216 inline
217 float erff(float x) { return __ocml_erf_f32(x); }
218 __DEVICE__
219 inline
220 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
221 __DEVICE__
222 inline
223 float exp10f(float x) { return __ocml_exp10_f32(x); }
224 __DEVICE__
225 inline
226 float exp2f(float x) { return __ocml_exp2_f32(x); }
227 __DEVICE__
228 inline
229 float expf(float x) { return __ocml_exp_f32(x); }
230 __DEVICE__
231 inline
232 float expm1f(float x) { return __ocml_expm1_f32(x); }
233 __DEVICE__
234 inline
235 float fabsf(float x) { return __ocml_fabs_f32(x); }
236 __DEVICE__
237 inline
238 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
239 __DEVICE__
240 inline
241 float fdividef(float x, float y) { return x / y; }
242 __DEVICE__
243 inline
244 float floorf(float x) { return __ocml_floor_f32(x); }
245 __DEVICE__
246 inline
247 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
248 __DEVICE__
249 inline
250 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
251 __DEVICE__
252 inline
253 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
254 __DEVICE__
255 inline
256 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
257 __DEVICE__
258 inline
259 float frexpf(float x, int* nptr)
260 {
261  int tmp;
262  float r =
263  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
264  *nptr = tmp;
265 
266  return r;
267 }
268 __DEVICE__
269 inline
270 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
271 __DEVICE__
272 inline
273 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
274 __DEVICE__
275 inline
276 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
277 __DEVICE__
278 inline
279 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
280 __DEVICE__
281 inline
282 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
283 __DEVICE__
284 inline
285 float j0f(float x) { return __ocml_j0_f32(x); }
286 __DEVICE__
287 inline
288 float j1f(float x) { return __ocml_j1_f32(x); }
289 __DEVICE__
290 inline
291 float jnf(int n, float x)
292 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
293  // for linear recurrences to get O(log n) steps, but it's unclear if
294  // it'd be beneficial in this case.
295  if (n == 0) return j0f(x);
296  if (n == 1) return j1f(x);
297 
298  float x0 = j0f(x);
299  float x1 = j1f(x);
300  for (int i = 1; i < n; ++i) {
301  float x2 = (2 * i) / x * x1 - x0;
302  x0 = x1;
303  x1 = x2;
304  }
305 
306  return x1;
307 }
308 __DEVICE__
309 inline
310 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
311 __DEVICE__
312 inline
313 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
314 __DEVICE__
315 inline
316 long long int llrintf(float x) { return __ocml_rint_f32(x); }
317 __DEVICE__
318 inline
319 long long int llroundf(float x) { return __ocml_round_f32(x); }
320 __DEVICE__
321 inline
322 float log10f(float x) { return __ocml_log10_f32(x); }
323 __DEVICE__
324 inline
325 float log1pf(float x) { return __ocml_log1p_f32(x); }
326 __DEVICE__
327 inline
328 float log2f(float x) { return __ocml_log2_f32(x); }
329 __DEVICE__
330 inline
331 float logbf(float x) { return __ocml_logb_f32(x); }
332 __DEVICE__
333 inline
334 float logf(float x) { return __ocml_log_f32(x); }
335 __DEVICE__
336 inline
337 long int lrintf(float x) { return __ocml_rint_f32(x); }
338 __DEVICE__
339 inline
340 long int lroundf(float x) { return __ocml_round_f32(x); }
341 __DEVICE__
342 inline
343 float modff(float x, float* iptr)
344 {
345  float tmp;
346  float r =
347  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
348  *iptr = tmp;
349 
350  return r;
351 }
352 __DEVICE__
353 inline
354 float nanf(const char* tagp)
355 {
356  union {
357  float val;
358  struct ieee_float {
359  uint32_t mantissa : 22;
360  uint32_t quiet : 1;
361  uint32_t exponent : 8;
362  uint32_t sign : 1;
363  } bits;
364 
365  static_assert(sizeof(float) == sizeof(ieee_float), "");
366  } tmp;
367 
368  tmp.bits.sign = 0u;
369  tmp.bits.exponent = ~0u;
370  tmp.bits.quiet = 1u;
371  tmp.bits.mantissa = __make_mantissa(tagp);
372 
373  return tmp.val;
374 }
375 __DEVICE__
376 inline
377 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
378 __DEVICE__
379 inline
380 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
381 __DEVICE__
382 inline
383 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
384 __DEVICE__
385 inline
386 float norm4df(float x, float y, float z, float w)
387 {
388  return __ocml_len4_f32(x, y, z, w);
389 }
390 __DEVICE__
391 inline
392 float normcdff(float x) { return __ocml_ncdf_f32(x); }
393 __DEVICE__
394 inline
395 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
396 __DEVICE__
397 inline
398 float normf(int dim, const float* a)
399 { // TODO: placeholder until OCML adds support.
400  float r = 0;
401  while (dim--) { r += a[0] * a[0]; ++a; }
402 
403  return __ocml_sqrt_f32(r);
404 }
405 __DEVICE__
406 inline
407 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
408 __DEVICE__
409 inline
410 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
411 __DEVICE__
412 inline
413 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
414 __DEVICE__
415 inline
416 float remquof(float x, float y, int* quo)
417 {
418  int tmp;
419  float r =
420  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
421  *quo = tmp;
422 
423  return r;
424 }
425 __DEVICE__
426 inline
427 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
428 __DEVICE__
429 inline
430 float rintf(float x) { return __ocml_rint_f32(x); }
431 __DEVICE__
432 inline
433 float rnorm3df(float x, float y, float z)
434 {
435  return __ocml_rlen3_f32(x, y, z);
436 }
437 
438 __DEVICE__
439 inline
440 float rnorm4df(float x, float y, float z, float w)
441 {
442  return __ocml_rlen4_f32(x, y, z, w);
443 }
444 __DEVICE__
445 inline
446 float rnormf(int dim, const float* a)
447 { // TODO: placeholder until OCML adds support.
448  float r = 0;
449  while (dim--) { r += a[0] * a[0]; ++a; }
450 
451  return __ocml_rsqrt_f32(r);
452 }
453 __DEVICE__
454 inline
455 float roundf(float x) { return __ocml_round_f32(x); }
456 __DEVICE__
457 inline
458 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
459 __DEVICE__
460 inline
461 float scalblnf(float x, long int n)
462 {
463  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
464 }
465 __DEVICE__
466 inline
467 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
468 __DEVICE__
469 inline
470 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
471 __DEVICE__
472 inline
473 void sincosf(float x, float* sptr, float* cptr)
474 {
475  float tmp;
476 
477  *sptr =
478  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
479  *cptr = tmp;
480 }
481 __DEVICE__
482 inline
483 void sincospif(float x, float* sptr, float* cptr)
484 {
485  float tmp;
486 
487  *sptr =
488  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
489  *cptr = tmp;
490 }
491 __DEVICE__
492 inline
493 float sinf(float x) { return __ocml_sin_f32(x); }
494 __DEVICE__
495 inline
496 float sinhf(float x) { return __ocml_sinh_f32(x); }
497 __DEVICE__
498 inline
499 float sinpif(float x) { return __ocml_sinpi_f32(x); }
500 __DEVICE__
501 inline
502 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
503 __DEVICE__
504 inline
505 float tanf(float x) { return __ocml_tan_f32(x); }
506 __DEVICE__
507 inline
508 float tanhf(float x) { return __ocml_tanh_f32(x); }
509 __DEVICE__
510 inline
511 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
512 __DEVICE__
513 inline
514 float truncf(float x) { return __ocml_trunc_f32(x); }
515 __DEVICE__
516 inline
517 float y0f(float x) { return __ocml_y0_f32(x); }
518 __DEVICE__
519 inline
520 float y1f(float x) { return __ocml_y1_f32(x); }
521 __DEVICE__
522 inline
523 float ynf(int n, float x)
524 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
525  // for linear recurrences to get O(log n) steps, but it's unclear if
526  // it'd be beneficial in this case. Placeholder until OCML adds
527  // support.
528  if (n == 0) return y0f(x);
529  if (n == 1) return y1f(x);
530 
531  float x0 = y0f(x);
532  float x1 = y1f(x);
533  for (int i = 1; i < n; ++i) {
534  float x2 = (2 * i) / x * x1 - x0;
535  x0 = x1;
536  x1 = x2;
537  }
538 
539  return x1;
540 }
541 
542 // BEGIN INTRINSICS
543 __DEVICE__
544 inline
545 float __cosf(float x) { return __ocml_native_cos_f32(x); }
546 __DEVICE__
547 inline
548 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
549 __DEVICE__
550 inline
551 float __expf(float x) { return __ocml_native_exp_f32(x); }
552 #if defined OCML_BASIC_ROUNDED_OPERATIONS
553 __DEVICE__
554 inline
555 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
556 #endif
557 __DEVICE__
558 inline
559 float __fadd_rn(float x, float y) { return x + y; }
560 #if defined OCML_BASIC_ROUNDED_OPERATIONS
561 __DEVICE__
562 inline
563 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
564 __DEVICE__
565 inline
566 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
567 __DEVICE__
568 inline
569 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
570 #endif
571 __DEVICE__
572 inline
573 float __fdiv_rn(float x, float y) { return x / y; }
574 #if defined OCML_BASIC_ROUNDED_OPERATIONS
575 __DEVICE__
576 inline
577 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
578 __DEVICE__
579 inline
580 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
581 #endif
582 __DEVICE__
583 inline
584 float __fdividef(float x, float y) { return x / y; }
585 #if defined OCML_BASIC_ROUNDED_OPERATIONS
586 __DEVICE__
587 inline
588 float __fmaf_rd(float x, float y, float z)
589 {
590  return __ocml_fma_rtn_f32(x, y, z);
591 }
592 #endif
593 __DEVICE__
594 inline
595 float __fmaf_rn(float x, float y, float z)
596 {
597  return __ocml_fma_f32(x, y, z);
598 }
599 #if defined OCML_BASIC_ROUNDED_OPERATIONS
600 __DEVICE__
601 inline
602 float __fmaf_ru(float x, float y, float z)
603 {
604  return __ocml_fma_rtp_f32(x, y, z);
605 }
606 __DEVICE__
607 inline
608 float __fmaf_rz(float x, float y, float z)
609 {
610  return __ocml_fma_rtz_f32(x, y, z);
611 }
612 __DEVICE__
613 inline
614 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
615 #endif
616 __DEVICE__
617 inline
618 float __fmul_rn(float x, float y) { return x * y; }
619 #if defined OCML_BASIC_ROUNDED_OPERATIONS
620 __DEVICE__
621 inline
622 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
623 __DEVICE__
624 inline
625 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
626 __DEVICE__
627 inline
628 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
629 #endif
630 __DEVICE__
631 inline
632 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
633 #if defined OCML_BASIC_ROUNDED_OPERATIONS
634 __DEVICE__
635 inline
636 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
637 __DEVICE__
638 inline
639 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
640 #endif
641 __DEVICE__
642 inline
643 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
644 #if defined OCML_BASIC_ROUNDED_OPERATIONS
645 __DEVICE__
646 inline
647 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
648 #endif
649 __DEVICE__
650 inline
651 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
652 #if defined OCML_BASIC_ROUNDED_OPERATIONS
653 __DEVICE__
654 inline
655 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
656 __DEVICE__
657 inline
658 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
659 __DEVICE__
660 inline
661 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
662 #endif
663 __DEVICE__
664 inline
665 float __fsub_rn(float x, float y) { return x - y; }
666 #if defined OCML_BASIC_ROUNDED_OPERATIONS
667 __DEVICE__
668 inline
669 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
670 __DEVICE__
671 inline
672 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
673 #endif
674 __DEVICE__
675 inline
676 float __log10f(float x) { return __ocml_native_log10_f32(x); }
677 __DEVICE__
678 inline
679 float __log2f(float x) { return __ocml_native_log2_f32(x); }
680 __DEVICE__
681 inline
682 float __logf(float x) { return __ocml_native_log_f32(x); }
683 __DEVICE__
684 inline
685 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
686 __DEVICE__
687 inline
688 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
689 __DEVICE__
690 inline
691 void __sincosf(float x, float* sptr, float* cptr)
692 {
693  *sptr = __ocml_native_sin_f32(x);
694  *cptr = __ocml_native_cos_f32(x);
695 }
696 __DEVICE__
697 inline
698 float __sinf(float x) { return __ocml_native_sin_f32(x); }
699 __DEVICE__
700 inline
701 float __tanf(float x) { return __ocml_tan_f32(x); }
702 // END INTRINSICS
703 // END FLOAT
704 
705 // BEGIN DOUBLE
706 __DEVICE__
707 inline
708 double abs(double x) { return __ocml_fabs_f64(x); }
709 __DEVICE__
710 inline
711 double acos(double x) { return __ocml_acos_f64(x); }
712 __DEVICE__
713 inline
714 double acosh(double x) { return __ocml_acosh_f64(x); }
715 __DEVICE__
716 inline
717 double asin(double x) { return __ocml_asin_f64(x); }
718 __DEVICE__
719 inline
720 double asinh(double x) { return __ocml_asinh_f64(x); }
721 __DEVICE__
722 inline
723 double atan(double x) { return __ocml_atan_f64(x); }
724 __DEVICE__
725 inline
726 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
727 __DEVICE__
728 inline
729 double atanh(double x) { return __ocml_atanh_f64(x); }
730 __DEVICE__
731 inline
732 double cbrt(double x) { return __ocml_cbrt_f64(x); }
733 __DEVICE__
734 inline
735 double ceil(double x) { return __ocml_ceil_f64(x); }
736 __DEVICE__
737 inline
738 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
739 __DEVICE__
740 inline
741 double cos(double x) { return __ocml_cos_f64(x); }
742 __DEVICE__
743 inline
744 double cosh(double x) { return __ocml_cosh_f64(x); }
745 __DEVICE__
746 inline
747 double cospi(double x) { return __ocml_cospi_f64(x); }
748 __DEVICE__
749 inline
750 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
751 __DEVICE__
752 inline
753 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
754 __DEVICE__
755 inline
756 double erf(double x) { return __ocml_erf_f64(x); }
757 __DEVICE__
758 inline
759 double erfc(double x) { return __ocml_erfc_f64(x); }
760 __DEVICE__
761 inline
762 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
763 __DEVICE__
764 inline
765 double erfcx(double x) { return __ocml_erfcx_f64(x); }
766 __DEVICE__
767 inline
768 double erfinv(double x) { return __ocml_erfinv_f64(x); }
769 __DEVICE__
770 inline
771 double exp(double x) { return __ocml_exp_f64(x); }
772 __DEVICE__
773 inline
774 double exp10(double x) { return __ocml_exp10_f64(x); }
775 __DEVICE__
776 inline
777 double exp2(double x) { return __ocml_exp2_f64(x); }
778 __DEVICE__
779 inline
780 double expm1(double x) { return __ocml_expm1_f64(x); }
781 __DEVICE__
782 inline
783 double fabs(double x) { return __ocml_fabs_f64(x); }
784 __DEVICE__
785 inline
786 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
787 __DEVICE__
788 inline
789 double floor(double x) { return __ocml_floor_f64(x); }
790 __DEVICE__
791 inline
792 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
793 __DEVICE__
794 inline
795 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
796 __DEVICE__
797 inline
798 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
799 __DEVICE__
800 inline
801 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
802 __DEVICE__
803 inline
804 double frexp(double x, int* nptr)
805 {
806  int tmp;
807  double r =
808  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
809  *nptr = tmp;
810 
811  return r;
812 }
813 __DEVICE__
814 inline
815 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
816 __DEVICE__
817 inline
818 int ilogb(double x) { return __ocml_ilogb_f64(x); }
819 __DEVICE__
820 inline
821 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
822 __DEVICE__
823 inline
824 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
825 __DEVICE__
826 inline
827 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
828 __DEVICE__
829 inline
830 double j0(double x) { return __ocml_j0_f64(x); }
831 __DEVICE__
832 inline
833 double j1(double x) { return __ocml_j1_f64(x); }
834 __DEVICE__
835 inline
836 double jn(int n, double x)
837 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
838  // for linear recurrences to get O(log n) steps, but it's unclear if
839  // it'd be beneficial in this case. Placeholder until OCML adds
840  // support.
841  if (n == 0) return j0f(x);
842  if (n == 1) return j1f(x);
843 
844  double x0 = j0f(x);
845  double x1 = j1f(x);
846  for (int i = 1; i < n; ++i) {
847  double x2 = (2 * i) / x * x1 - x0;
848  x0 = x1;
849  x1 = x2;
850  }
851 
852  return x1;
853 }
854 __DEVICE__
855 inline
856 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
857 __DEVICE__
858 inline
859 double lgamma(double x) { return __ocml_lgamma_f64(x); }
860 __DEVICE__
861 inline
862 long long int llrint(double x) { return __ocml_rint_f64(x); }
863 __DEVICE__
864 inline
865 long long int llround(double x) { return __ocml_round_f64(x); }
866 __DEVICE__
867 inline
868 double log(double x) { return __ocml_log_f64(x); }
869 __DEVICE__
870 inline
871 double log10(double x) { return __ocml_log10_f64(x); }
872 __DEVICE__
873 inline
874 double log1p(double x) { return __ocml_log1p_f64(x); }
875 __DEVICE__
876 inline
877 double log2(double x) { return __ocml_log2_f64(x); }
878 __DEVICE__
879 inline
880 double logb(double x) { return __ocml_logb_f64(x); }
881 __DEVICE__
882 inline
883 long int lrint(double x) { return __ocml_rint_f64(x); }
884 __DEVICE__
885 inline
886 long int lround(double x) { return __ocml_round_f64(x); }
887 __DEVICE__
888 inline
889 double modf(double x, double* iptr)
890 {
891  double tmp;
892  double r =
893  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
894  *iptr = tmp;
895 
896  return r;
897 }
898 __DEVICE__
899 inline
900 double nan(const char* tagp)
901 {
902 #if !_WIN32
903  union {
904  double val;
905  struct ieee_double {
906  uint64_t mantissa : 51;
907  uint32_t quiet : 1;
908  uint32_t exponent : 11;
909  uint32_t sign : 1;
910  } bits;
911  static_assert(sizeof(double) == sizeof(ieee_double), "");
912  } tmp;
913 
914  tmp.bits.sign = 0u;
915  tmp.bits.exponent = ~0u;
916  tmp.bits.quiet = 1u;
917  tmp.bits.mantissa = __make_mantissa(tagp);
918 
919  return tmp.val;
920 #else
921  static_assert(sizeof(uint64_t)==sizeof(double));
922  uint64_t val = __make_mantissa(tagp);
923  val |= 0xFFF << 51;
924  return *reinterpret_cast<double*>(&val);
925 #endif
926 }
927 __DEVICE__
928 inline
929 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
930 __DEVICE__
931 inline
932 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
933 __DEVICE__
934 inline
935 double norm(int dim, const double* a)
936 { // TODO: placeholder until OCML adds support.
937  double r = 0;
938  while (dim--) { r += a[0] * a[0]; ++a; }
939 
940  return __ocml_sqrt_f64(r);
941 }
942 __DEVICE__
943 inline
944 double norm3d(double x, double y, double z)
945 {
946  return __ocml_len3_f64(x, y, z);
947 }
948 __DEVICE__
949 inline
950 double norm4d(double x, double y, double z, double w)
951 {
952  return __ocml_len4_f64(x, y, z, w);
953 }
954 __DEVICE__
955 inline
956 double normcdf(double x) { return __ocml_ncdf_f64(x); }
957 __DEVICE__
958 inline
959 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
960 __DEVICE__
961 inline
962 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
963 __DEVICE__
964 inline
965 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
966 __DEVICE__
967 inline
968 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
969 __DEVICE__
970 inline
971 double remquo(double x, double y, int* quo)
972 {
973  int tmp;
974  double r =
975  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
976  *quo = tmp;
977 
978  return r;
979 }
980 __DEVICE__
981 inline
982 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
983 __DEVICE__
984 inline
985 double rint(double x) { return __ocml_rint_f64(x); }
986 __DEVICE__
987 inline
988 double rnorm(int dim, const double* a)
989 { // TODO: placeholder until OCML adds support.
990  double r = 0;
991  while (dim--) { r += a[0] * a[0]; ++a; }
992 
993  return __ocml_rsqrt_f64(r);
994 }
995 __DEVICE__
996 inline
997 double rnorm3d(double x, double y, double z)
998 {
999  return __ocml_rlen3_f64(x, y, z);
1000 }
1001 __DEVICE__
1002 inline
1003 double rnorm4d(double x, double y, double z, double w)
1004 {
1005  return __ocml_rlen4_f64(x, y, z, w);
1006 }
1007 __DEVICE__
1008 inline
1009 double round(double x) { return __ocml_round_f64(x); }
1010 __DEVICE__
1011 inline
1012 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1013 __DEVICE__
1014 inline
1015 double scalbln(double x, long int n)
1016 {
1017  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1018 }
1019 __DEVICE__
1020 inline
1021 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1022 __DEVICE__
1023 inline
1024 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1025 __DEVICE__
1026 inline
1027 double sin(double x) { return __ocml_sin_f64(x); }
1028 __DEVICE__
1029 inline
1030 void sincos(double x, double* sptr, double* cptr)
1031 {
1032  double tmp;
1033  *sptr =
1034  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1035  *cptr = tmp;
1036 }
1037 __DEVICE__
1038 inline
1039 void sincospi(double x, double* sptr, double* cptr)
1040 {
1041  double tmp;
1042  *sptr = __ocml_sincospi_f64(
1043  x, (__attribute__((address_space(5))) double*) &tmp);
1044  *cptr = tmp;
1045 }
1046 __DEVICE__
1047 inline
1048 double sinh(double x) { return __ocml_sinh_f64(x); }
1049 __DEVICE__
1050 inline
1051 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1052 __DEVICE__
1053 inline
1054 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1055 __DEVICE__
1056 inline
1057 double tan(double x) { return __ocml_tan_f64(x); }
1058 __DEVICE__
1059 inline
1060 double tanh(double x) { return __ocml_tanh_f64(x); }
1061 __DEVICE__
1062 inline
1063 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1064 __DEVICE__
1065 inline
1066 double trunc(double x) { return __ocml_trunc_f64(x); }
1067 __DEVICE__
1068 inline
1069 double y0(double x) { return __ocml_y0_f64(x); }
1070 __DEVICE__
1071 inline
1072 double y1(double x) { return __ocml_y1_f64(x); }
1073 __DEVICE__
1074 inline
1075 double yn(int n, double x)
1076 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1077  // for linear recurrences to get O(log n) steps, but it's unclear if
1078  // it'd be beneficial in this case. Placeholder until OCML adds
1079  // support.
1080  if (n == 0) return j0f(x);
1081  if (n == 1) return j1f(x);
1082 
1083  double x0 = j0f(x);
1084  double x1 = j1f(x);
1085  for (int i = 1; i < n; ++i) {
1086  double x2 = (2 * i) / x * x1 - x0;
1087  x0 = x1;
1088  x1 = x2;
1089  }
1090 
1091  return x1;
1092 }
1093 
1094 // BEGIN INTRINSICS
1095 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1096 __DEVICE__
1097 inline
1098 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1099 #endif
1100 __DEVICE__
1101 inline
1102 double __dadd_rn(double x, double y) { return x + y; }
1103 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1104 __DEVICE__
1105 inline
1106 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1107 __DEVICE__
1108 inline
1109 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1110 __DEVICE__
1111 inline
1112 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1113 #endif
1114 __DEVICE__
1115 inline
1116 double __ddiv_rn(double x, double y) { return x / y; }
1117 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1118 __DEVICE__
1119 inline
1120 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1121 __DEVICE__
1122 inline
1123 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1124 __DEVICE__
1125 inline
1126 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1127 #endif
1128 __DEVICE__
1129 inline
1130 double __dmul_rn(double x, double y) { return x * y; }
1131 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1132 __DEVICE__
1133 inline
1134 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1135 __DEVICE__
1136 inline
1137 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1138 __DEVICE__
1139 inline
1140 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1141 #endif
1142 __DEVICE__
1143 inline
1144 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1145 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1146 __DEVICE__
1147 inline
1148 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1149 __DEVICE__
1150 inline
1151 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1152 __DEVICE__
1153 inline
1154 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1155 #endif
1156 __DEVICE__
1157 inline
1158 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1159 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1160 __DEVICE__
1161 inline
1162 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1163 __DEVICE__
1164 inline
1165 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1166 __DEVICE__
1167 inline
1168 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1169 #endif
1170 __DEVICE__
1171 inline
1172 double __dsub_rn(double x, double y) { return x - y; }
1173 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1174 __DEVICE__
1175 inline
1176 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1177 __DEVICE__
1178 inline
1179 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1180 __DEVICE__
1181 inline
1182 double __fma_rd(double x, double y, double z)
1183 {
1184  return __ocml_fma_rtn_f64(x, y, z);
1185 }
1186 #endif
1187 __DEVICE__
1188 inline
1189 double __fma_rn(double x, double y, double z)
1190 {
1191  return __ocml_fma_f64(x, y, z);
1192 }
1193 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1194 __DEVICE__
1195 inline
1196 double __fma_ru(double x, double y, double z)
1197 {
1198  return __ocml_fma_rtp_f64(x, y, z);
1199 }
1200 __DEVICE__
1201 inline
1202 double __fma_rz(double x, double y, double z)
1203 {
1204  return __ocml_fma_rtz_f64(x, y, z);
1205 }
1206 #endif
1207 // END INTRINSICS
1208 // END DOUBLE
1209 
1210 // BEGIN INTEGER
1211 __DEVICE__
1212 inline
1213 int abs(int x)
1214 {
1215  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1216  return (x ^ sgn) - sgn;
1217 }
1218 __DEVICE__
1219 inline
1220 long labs(long x)
1221 {
1222  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1223  return (x ^ sgn) - sgn;
1224 }
1225 __DEVICE__
1226 inline
1227 long long llabs(long long x)
1228 {
1229  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1230  return (x ^ sgn) - sgn;
1231 }
1232 
1233 #if defined(__cplusplus)
1234  __DEVICE__
1235  inline
1236  long abs(long x) { return labs(x); }
1237  __DEVICE__
1238  inline
1239  long long abs(long long x) { return llabs(x); }
1240 #endif
1241 // END INTEGER
1242 
1243 __DEVICE__
1244 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1245  return __ocml_fma_f16(x, y, z);
1246 }
1247 
1248 __DEVICE__
1249 inline float fma(float x, float y, float z) {
1250  return fmaf(x, y, z);
1251 }
1252 
1253 #pragma push_macro("__DEF_FLOAT_FUN")
1254 #pragma push_macro("__DEF_FLOAT_FUN2")
1255 #pragma push_macro("__DEF_FLOAT_FUN2I")
1256 #pragma push_macro("__HIP_OVERLOAD")
1257 #pragma push_macro("__HIP_OVERLOAD2")
1258 
1259 // __hip_enable_if::type is a type function which returns __T if __B is true.
1260 template<bool __B, class __T = void>
1262 
1263 template <class __T> struct __hip_enable_if<true, __T> {
1264  typedef __T type;
1265 };
1266 
1267 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1268 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1269 // floor(double).
1270 #define __HIP_OVERLOAD1(__retty, __fn) \
1271  template <typename __T> \
1272  __DEVICE__ \
1273  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1274  __retty>::type \
1275  __fn(__T __x) { \
1276  return ::__fn((double)__x); \
1277  }
1278 
1279 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1280 // or integer argument to avoid compilation error due to ambibuity. e.g.
1281 // max(5.0f, 6.0) is resolved with max(double, double).
1282 #define __HIP_OVERLOAD2(__retty, __fn) \
1283  template <typename __T1, typename __T2> \
1284  __DEVICE__ typename __hip_enable_if< \
1285  std::numeric_limits<__T1>::is_specialized && \
1286  std::numeric_limits<__T2>::is_specialized, \
1287  __retty>::type \
1288  __fn(__T1 __x, __T2 __y) { \
1289  return __fn((double)__x, (double)__y); \
1290  }
1291 
1292 // Define cmath functions with float argument and returns float.
1293 #define __DEF_FUN1(retty, func) \
1294 __DEVICE__ \
1295 inline \
1296 float func(float x) \
1297 { \
1298  return func##f(x); \
1299 } \
1300 __HIP_OVERLOAD1(retty, func)
1301 
1302 // Define cmath functions with float argument and returns retty.
1303 #define __DEF_FUNI(retty, func) \
1304 __DEVICE__ \
1305 inline \
1306 retty func(float x) \
1307 { \
1308  return func##f(x); \
1309 } \
1310 __HIP_OVERLOAD1(retty, func)
1311 
1312 // define cmath functions with two float arguments.
1313 #define __DEF_FUN2(retty, func) \
1314 __DEVICE__ \
1315 inline \
1316 float func(float x, float y) \
1317 { \
1318  return func##f(x, y); \
1319 } \
1320 __HIP_OVERLOAD2(retty, func)
1321 
1322 __DEF_FUN1(double, acos)
1323 __DEF_FUN1(double, acosh)
1324 __DEF_FUN1(double, asin)
1325 __DEF_FUN1(double, asinh)
1326 __DEF_FUN1(double, atan)
1327 __DEF_FUN2(double, atan2);
1328 __DEF_FUN1(double, atanh)
1329 __DEF_FUN1(double, cbrt)
1330 __DEF_FUN1(double, ceil)
1331 __DEF_FUN2(double, copysign);
1332 __DEF_FUN1(double, cos)
1333 __DEF_FUN1(double, cosh)
1334 __DEF_FUN1(double, erf)
1335 __DEF_FUN1(double, erfc)
1336 __DEF_FUN1(double, exp)
1337 __DEF_FUN1(double, exp2)
1338 __DEF_FUN1(double, expm1)
1339 __DEF_FUN1(double, fabs)
1340 __DEF_FUN2(double, fdim);
1341 __DEF_FUN1(double, floor)
1342 __DEF_FUN2(double, fmax);
1343 __DEF_FUN2(double, fmin);
1344 __DEF_FUN2(double, fmod);
1345 //__HIP_OVERLOAD1(int, fpclassify)
1346 __DEF_FUN2(double, hypot);
1347 __DEF_FUNI(int, ilogb)
1348 __HIP_OVERLOAD1(bool, isfinite)
1349 __HIP_OVERLOAD2(bool, isgreater);
1350 __HIP_OVERLOAD2(bool, isgreaterequal);
1351 __HIP_OVERLOAD1(bool, isinf);
1352 __HIP_OVERLOAD2(bool, isless);
1353 __HIP_OVERLOAD2(bool, islessequal);
1354 __HIP_OVERLOAD2(bool, islessgreater);
1355 __HIP_OVERLOAD1(bool, isnan);
1356 //__HIP_OVERLOAD1(bool, isnormal)
1357 __HIP_OVERLOAD2(bool, isunordered);
1358 __DEF_FUN1(double, lgamma)
1359 __DEF_FUN1(double, log)
1360 __DEF_FUN1(double, log10)
1361 __DEF_FUN1(double, log1p)
1362 __DEF_FUN1(double, log2)
1363 __DEF_FUN1(double, logb)
1364 __DEF_FUNI(long long, llrint)
1365 __DEF_FUNI(long long, llround)
1366 __DEF_FUNI(long, lrint)
1367 __DEF_FUNI(long, lround)
1368 __DEF_FUN1(double, nearbyint);
1369 __DEF_FUN2(double, nextafter);
1370 __DEF_FUN2(double, pow);
1371 __DEF_FUN2(double, remainder);
1372 __DEF_FUN1(double, rint);
1373 __DEF_FUN1(double, round);
1374 __HIP_OVERLOAD1(bool, signbit)
1375 __DEF_FUN1(double, sin)
1376 __DEF_FUN1(double, sinh)
1377 __DEF_FUN1(double, sqrt)
1378 __DEF_FUN1(double, tan)
1379 __DEF_FUN1(double, tanh)
1380 __DEF_FUN1(double, tgamma)
1381 __DEF_FUN1(double, trunc);
1382 
1383 // define cmath functions with a float and an integer argument.
1384 #define __DEF_FLOAT_FUN2I(func) \
1385 __DEVICE__ \
1386 inline \
1387 float func(float x, int y) \
1388 { \
1389  return func##f(x, y); \
1390 }
1391 __DEF_FLOAT_FUN2I(scalbn)
1392 
1393 #if __HCC__
1394 template<class T>
1395 __DEVICE__ inline static T min(T arg1, T arg2) {
1396  return (arg1 < arg2) ? arg1 : arg2;
1397 }
1398 
1399 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1400  return min(arg1, (uint32_t) arg2);
1401 }
1402 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1403  return min((uint32_t) arg1, arg2);
1404 }
1405 
1406 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1407  return min(arg1, (uint64_t) arg2);
1408 }
1409 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1410  return min((uint64_t) arg1, arg2);
1411 }
1412 
1413 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1414  return min(arg1, (unsigned long long) arg2);
1415 }
1416 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1417  return min((unsigned long long) arg1, arg2);
1418 }*/
1419 
1420 template<class T>
1421 __DEVICE__ inline static T max(T arg1, T arg2) {
1422  return (arg1 > arg2) ? arg1 : arg2;
1423 }
1424 
1425 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1426  return max(arg1, (uint32_t) arg2);
1427 }
1428 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1429  return max((uint32_t) arg1, arg2);
1430 }
1431 
1432 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1433  return max(arg1, (uint64_t) arg2);
1434 }
1435 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1436  return max((uint64_t) arg1, arg2);
1437 }
1438 
1439 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1440  return max(arg1, (unsigned long long) arg2);
1441 }
1442 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1443  return max((unsigned long long) arg1, arg2);
1444 }*/
1445 #else
1446 __DEVICE__ inline int min(int arg1, int arg2) {
1447  return (arg1 < arg2) ? arg1 : arg2;
1448 }
1449 __DEVICE__ inline int max(int arg1, int arg2) {
1450  return (arg1 > arg2) ? arg1 : arg2;
1451 }
1452 
1453 __DEVICE__
1454 inline
1455 float max(float x, float y) {
1456  return fmaxf(x, y);
1457 }
1458 
1459 __DEVICE__
1460 inline
1461 double max(double x, double y) {
1462  return fmax(x, y);
1463 }
1464 
1465 __DEVICE__
1466 inline
1467 float min(float x, float y) {
1468  return fminf(x, y);
1469 }
1470 
1471 __DEVICE__
1472 inline
1473 double min(double x, double y) {
1474  return fmin(x, y);
1475 }
1476 
1477 __HIP_OVERLOAD2(double, max)
1478 __HIP_OVERLOAD2(double, min)
1479 
1480 #endif
1481 
1482 __host__ inline static int min(int arg1, int arg2) {
1483  return std::min(arg1, arg2);
1484 }
1485 
1486 __host__ inline static int max(int arg1, int arg2) {
1487  return std::max(arg1, arg2);
1488 }
1489 
1490 
1491 #pragma pop_macro("__DEF_FLOAT_FUN")
1492 #pragma pop_macro("__DEF_FLOAT_FUN2")
1493 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1494 #pragma pop_macro("__HIP_OVERLOAD")
1495 #pragma pop_macro("__HIP_OVERLOAD2")
1496 #pragma pop_macro("__DEVICE__")
1497 #pragma pop_macro("__RETURN_TYPE")
1498 
1499 // For backward compatibility.
1500 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1501 // defined after including math_functions.h.
TODO-doc.
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Copies the memory address of symbol symbolName to devPtr.
Definition: hip_fp16_math_fwd.h:53
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Defines the different newt vector types for HIP runtime.
Definition: math_functions.h:1261