HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
27 #include "math_fwd.h"
28 
30 
31 #include <algorithm>
32 
33 // assert.h is only for the host version of assert.
34 // The device version of assert is implemented in hip/amd_detail/hip_runtime.h.
35 // Users should include hip_runtime.h for the device version of assert.
36 #if !__HIP_DEVICE_COMPILE__
37 #include <assert.h>
38 #endif
39 
40 #include <limits.h>
41 #include <limits>
42 #include <stdint.h>
43 
44 #if _LIBCPP_VERSION && __HIP__
45 namespace std {
46 template <>
47 struct __numeric_type<_Float16>
48 {
49  static _Float16 __test(_Float16);
50 
51  typedef _Float16 type;
52  static const bool value = true;
53 };
54 }
55 #endif // _LIBCPP_VERSION
56 
57 #pragma push_macro("__DEVICE__")
58 #pragma push_macro("__RETURN_TYPE")
59 
60 #define __DEVICE__ static __device__
61 #define __RETURN_TYPE bool
62 
63 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
64 __DEVICE__
65 inline
66 uint64_t __make_mantissa_base8(const char* tagp)
67 {
68  uint64_t r = 0;
69  while (tagp) {
70  char tmp = *tagp;
71 
72  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
73  else return 0;
74 
75  ++tagp;
76  }
77 
78  return r;
79 }
80 
81 __DEVICE__
82 inline
83 uint64_t __make_mantissa_base10(const char* tagp)
84 {
85  uint64_t r = 0;
86  while (tagp) {
87  char tmp = *tagp;
88 
89  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
90  else return 0;
91 
92  ++tagp;
93  }
94 
95  return r;
96 }
97 
98 __DEVICE__
99 inline
100 uint64_t __make_mantissa_base16(const char* tagp)
101 {
102  uint64_t r = 0;
103  while (tagp) {
104  char tmp = *tagp;
105 
106  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
107  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
108  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
109  else return 0;
110 
111  ++tagp;
112  }
113 
114  return r;
115 }
116 
117 __DEVICE__
118 inline
119 uint64_t __make_mantissa(const char* tagp)
120 {
121  if (!tagp) return 0u;
122 
123  if (*tagp == '0') {
124  ++tagp;
125 
126  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
127  else return __make_mantissa_base8(tagp);
128  }
129 
130  return __make_mantissa_base10(tagp);
131 }
132 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
133 
134 // DOT FUNCTIONS
135 #if __HIP_CLANG_ONLY__
136 __DEVICE__
137 inline
138 int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
139  return __ockl_sdot2(a.data, b.data, c, saturate);
140 }
141 __DEVICE__
142 inline
143 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
144  return __ockl_udot2(a.data, b.data, c, saturate);
145 }
146 __DEVICE__
147 inline
148 int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
149  return __ockl_sdot4(a.data, b.data, c, saturate);
150 }
151 __DEVICE__
152 inline
153 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
154  return __ockl_udot4(a.data, b.data, c, saturate);
155 }
156 __DEVICE__
157 inline
158 int amd_mixed_dot(int a, int b, int c, bool saturate) {
159  return __ockl_sdot8(a, b, c, saturate);
160 }
161 __DEVICE__
162 inline
163 uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
164  return __ockl_udot8(a, b, c, saturate);
165 }
166 #endif
167 
168 #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
169 // BEGIN FLOAT
170 __DEVICE__
171 inline
172 float abs(float x) { return __ocml_fabs_f32(x); }
173 __DEVICE__
174 inline
175 float acosf(float x) { return __ocml_acos_f32(x); }
176 __DEVICE__
177 inline
178 float acoshf(float x) { return __ocml_acosh_f32(x); }
179 __DEVICE__
180 inline
181 float asinf(float x) { return __ocml_asin_f32(x); }
182 __DEVICE__
183 inline
184 float asinhf(float x) { return __ocml_asinh_f32(x); }
185 __DEVICE__
186 inline
187 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
188 __DEVICE__
189 inline
190 float atanf(float x) { return __ocml_atan_f32(x); }
191 __DEVICE__
192 inline
193 float atanhf(float x) { return __ocml_atanh_f32(x); }
194 __DEVICE__
195 inline
196 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
197 __DEVICE__
198 inline
199 float ceilf(float x) { return __ocml_ceil_f32(x); }
200 __DEVICE__
201 inline
202 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
203 __DEVICE__
204 inline
205 float cosf(float x) { return __ocml_cos_f32(x); }
206 __DEVICE__
207 inline
208 float coshf(float x) { return __ocml_cosh_f32(x); }
209 __DEVICE__
210 inline
211 float cospif(float x) { return __ocml_cospi_f32(x); }
212 __DEVICE__
213 inline
214 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
215 __DEVICE__
216 inline
217 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
218 __DEVICE__
219 inline
220 float erfcf(float x) { return __ocml_erfc_f32(x); }
221 __DEVICE__
222 inline
223 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
224 __DEVICE__
225 inline
226 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
227 __DEVICE__
228 inline
229 float erff(float x) { return __ocml_erf_f32(x); }
230 __DEVICE__
231 inline
232 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
233 __DEVICE__
234 inline
235 float exp10f(float x) { return __ocml_exp10_f32(x); }
236 __DEVICE__
237 inline
238 float exp2f(float x) { return __ocml_exp2_f32(x); }
239 __DEVICE__
240 inline
241 float expf(float x) { return __ocml_exp_f32(x); }
242 __DEVICE__
243 inline
244 float expm1f(float x) { return __ocml_expm1_f32(x); }
245 __DEVICE__
246 inline
247 float fabsf(float x) { return __ocml_fabs_f32(x); }
248 __DEVICE__
249 inline
250 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
251 __DEVICE__
252 inline
253 float fdividef(float x, float y) { return x / y; }
254 __DEVICE__
255 inline
256 float floorf(float x) { return __ocml_floor_f32(x); }
257 __DEVICE__
258 inline
259 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
260 __DEVICE__
261 inline
262 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
263 __DEVICE__
264 inline
265 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
266 __DEVICE__
267 inline
268 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
269 __DEVICE__
270 inline
271 float frexpf(float x, int* nptr)
272 {
273  int tmp;
274  float r =
275  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
276  *nptr = tmp;
277 
278  return r;
279 }
280 __DEVICE__
281 inline
282 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
283 __DEVICE__
284 inline
285 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
286 __DEVICE__
287 inline
288 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
289 __DEVICE__
290 inline
291 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
292 __DEVICE__
293 inline
294 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
295 __DEVICE__
296 inline
297 float j0f(float x) { return __ocml_j0_f32(x); }
298 __DEVICE__
299 inline
300 float j1f(float x) { return __ocml_j1_f32(x); }
301 __DEVICE__
302 inline
303 float jnf(int n, float x)
304 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
305  // for linear recurrences to get O(log n) steps, but it's unclear if
306  // it'd be beneficial in this case.
307  if (n == 0) return j0f(x);
308  if (n == 1) return j1f(x);
309 
310  float x0 = j0f(x);
311  float x1 = j1f(x);
312  for (int i = 1; i < n; ++i) {
313  float x2 = (2 * i) / x * x1 - x0;
314  x0 = x1;
315  x1 = x2;
316  }
317 
318  return x1;
319 }
320 __DEVICE__
321 inline
322 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
323 __DEVICE__
324 inline
325 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
326 __DEVICE__
327 inline
328 long long int llrintf(float x) { return __ocml_rint_f32(x); }
329 __DEVICE__
330 inline
331 long long int llroundf(float x) { return __ocml_round_f32(x); }
332 __DEVICE__
333 inline
334 float log10f(float x) { return __ocml_log10_f32(x); }
335 __DEVICE__
336 inline
337 float log1pf(float x) { return __ocml_log1p_f32(x); }
338 __DEVICE__
339 inline
340 float log2f(float x) { return __ocml_log2_f32(x); }
341 __DEVICE__
342 inline
343 float logbf(float x) { return __ocml_logb_f32(x); }
344 __DEVICE__
345 inline
346 float logf(float x) { return __ocml_log_f32(x); }
347 __DEVICE__
348 inline
349 long int lrintf(float x) { return __ocml_rint_f32(x); }
350 __DEVICE__
351 inline
352 long int lroundf(float x) { return __ocml_round_f32(x); }
353 __DEVICE__
354 inline
355 float modff(float x, float* iptr)
356 {
357  float tmp;
358  float r =
359  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
360  *iptr = tmp;
361 
362  return r;
363 }
364 __DEVICE__
365 inline
366 float nanf(const char* tagp)
367 {
368  union {
369  float val;
370  struct ieee_float {
371  uint32_t mantissa : 22;
372  uint32_t quiet : 1;
373  uint32_t exponent : 8;
374  uint32_t sign : 1;
375  } bits;
376 
377  static_assert(sizeof(float) == sizeof(ieee_float), "");
378  } tmp;
379 
380  tmp.bits.sign = 0u;
381  tmp.bits.exponent = ~0u;
382  tmp.bits.quiet = 1u;
383  tmp.bits.mantissa = __make_mantissa(tagp);
384 
385  return tmp.val;
386 }
387 __DEVICE__
388 inline
389 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
390 __DEVICE__
391 inline
392 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
393 __DEVICE__
394 inline
395 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
396 __DEVICE__
397 inline
398 float norm4df(float x, float y, float z, float w)
399 {
400  return __ocml_len4_f32(x, y, z, w);
401 }
402 __DEVICE__
403 inline
404 float normcdff(float x) { return __ocml_ncdf_f32(x); }
405 __DEVICE__
406 inline
407 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
408 __DEVICE__
409 inline
410 float normf(int dim, const float* a)
411 { // TODO: placeholder until OCML adds support.
412  float r = 0;
413  while (dim--) { r += a[0] * a[0]; ++a; }
414 
415  return __ocml_sqrt_f32(r);
416 }
417 __DEVICE__
418 inline
419 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
420 __DEVICE__
421 inline
422 float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); }
423 __DEVICE__
424 inline
425 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
426 __DEVICE__
427 inline
428 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
429 __DEVICE__
430 inline
431 float remquof(float x, float y, int* quo)
432 {
433  int tmp;
434  float r =
435  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
436  *quo = tmp;
437 
438  return r;
439 }
440 __DEVICE__
441 inline
442 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
443 __DEVICE__
444 inline
445 float rintf(float x) { return __ocml_rint_f32(x); }
446 __DEVICE__
447 inline
448 float rnorm3df(float x, float y, float z)
449 {
450  return __ocml_rlen3_f32(x, y, z);
451 }
452 
453 __DEVICE__
454 inline
455 float rnorm4df(float x, float y, float z, float w)
456 {
457  return __ocml_rlen4_f32(x, y, z, w);
458 }
459 __DEVICE__
460 inline
461 float rnormf(int dim, const float* a)
462 { // TODO: placeholder until OCML adds support.
463  float r = 0;
464  while (dim--) { r += a[0] * a[0]; ++a; }
465 
466  return __ocml_rsqrt_f32(r);
467 }
468 __DEVICE__
469 inline
470 float roundf(float x) { return __ocml_round_f32(x); }
471 __DEVICE__
472 inline
473 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
474 __DEVICE__
475 inline
476 float scalblnf(float x, long int n)
477 {
478  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
479 }
480 __DEVICE__
481 inline
482 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
483 __DEVICE__
484 inline
485 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
486 __DEVICE__
487 inline
488 void sincosf(float x, float* sptr, float* cptr)
489 {
490  float tmp;
491 
492  *sptr =
493  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
494  *cptr = tmp;
495 }
496 __DEVICE__
497 inline
498 void sincospif(float x, float* sptr, float* cptr)
499 {
500  float tmp;
501 
502  *sptr =
503  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
504  *cptr = tmp;
505 }
506 __DEVICE__
507 inline
508 float sinf(float x) { return __ocml_sin_f32(x); }
509 __DEVICE__
510 inline
511 float sinhf(float x) { return __ocml_sinh_f32(x); }
512 __DEVICE__
513 inline
514 float sinpif(float x) { return __ocml_sinpi_f32(x); }
515 __DEVICE__
516 inline
517 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
518 __DEVICE__
519 inline
520 float tanf(float x) { return __ocml_tan_f32(x); }
521 __DEVICE__
522 inline
523 float tanhf(float x) { return __ocml_tanh_f32(x); }
524 __DEVICE__
525 inline
526 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
527 __DEVICE__
528 inline
529 float truncf(float x) { return __ocml_trunc_f32(x); }
530 __DEVICE__
531 inline
532 float y0f(float x) { return __ocml_y0_f32(x); }
533 __DEVICE__
534 inline
535 float y1f(float x) { return __ocml_y1_f32(x); }
536 __DEVICE__
537 inline
538 float ynf(int n, float x)
539 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
540  // for linear recurrences to get O(log n) steps, but it's unclear if
541  // it'd be beneficial in this case. Placeholder until OCML adds
542  // support.
543  if (n == 0) return y0f(x);
544  if (n == 1) return y1f(x);
545 
546  float x0 = y0f(x);
547  float x1 = y1f(x);
548  for (int i = 1; i < n; ++i) {
549  float x2 = (2 * i) / x * x1 - x0;
550  x0 = x1;
551  x1 = x2;
552  }
553 
554  return x1;
555 }
556 
557 // BEGIN INTRINSICS
558 __DEVICE__
559 inline
560 float __cosf(float x) { return __ocml_native_cos_f32(x); }
561 __DEVICE__
562 inline
563 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
564 __DEVICE__
565 inline
566 float __expf(float x) { return __ocml_native_exp_f32(x); }
567 #if defined OCML_BASIC_ROUNDED_OPERATIONS
568 __DEVICE__
569 inline
570 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
571 #endif
572 __DEVICE__
573 inline
574 float __fadd_rn(float x, float y) { return x + y; }
575 #if defined OCML_BASIC_ROUNDED_OPERATIONS
576 __DEVICE__
577 inline
578 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
579 __DEVICE__
580 inline
581 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
582 __DEVICE__
583 inline
584 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
585 #endif
586 __DEVICE__
587 inline
588 float __fdiv_rn(float x, float y) { return x / y; }
589 #if defined OCML_BASIC_ROUNDED_OPERATIONS
590 __DEVICE__
591 inline
592 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
593 __DEVICE__
594 inline
595 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
596 #endif
597 __DEVICE__
598 inline
599 float __fdividef(float x, float y) { return x / y; }
600 #if defined OCML_BASIC_ROUNDED_OPERATIONS
601 __DEVICE__
602 inline
603 float __fmaf_rd(float x, float y, float z)
604 {
605  return __ocml_fma_rtn_f32(x, y, z);
606 }
607 #endif
608 __DEVICE__
609 inline
610 float __fmaf_rn(float x, float y, float z)
611 {
612  return __ocml_fma_f32(x, y, z);
613 }
614 #if defined OCML_BASIC_ROUNDED_OPERATIONS
615 __DEVICE__
616 inline
617 float __fmaf_ru(float x, float y, float z)
618 {
619  return __ocml_fma_rtp_f32(x, y, z);
620 }
621 __DEVICE__
622 inline
623 float __fmaf_rz(float x, float y, float z)
624 {
625  return __ocml_fma_rtz_f32(x, y, z);
626 }
627 __DEVICE__
628 inline
629 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
630 #endif
631 __DEVICE__
632 inline
633 float __fmul_rn(float x, float y) { return x * y; }
634 #if defined OCML_BASIC_ROUNDED_OPERATIONS
635 __DEVICE__
636 inline
637 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
638 __DEVICE__
639 inline
640 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
641 __DEVICE__
642 inline
643 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
644 #endif
645 __DEVICE__
646 inline
647 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
648 #if defined OCML_BASIC_ROUNDED_OPERATIONS
649 __DEVICE__
650 inline
651 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
652 __DEVICE__
653 inline
654 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
655 #endif
656 __DEVICE__
657 inline
658 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
659 #if defined OCML_BASIC_ROUNDED_OPERATIONS
660 __DEVICE__
661 inline
662 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
663 #endif
664 __DEVICE__
665 inline
666 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
667 #if defined OCML_BASIC_ROUNDED_OPERATIONS
668 __DEVICE__
669 inline
670 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
671 __DEVICE__
672 inline
673 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
674 __DEVICE__
675 inline
676 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
677 #endif
678 __DEVICE__
679 inline
680 float __fsub_rn(float x, float y) { return x - y; }
681 #if defined OCML_BASIC_ROUNDED_OPERATIONS
682 __DEVICE__
683 inline
684 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
685 __DEVICE__
686 inline
687 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
688 #endif
689 __DEVICE__
690 inline
691 float __log10f(float x) { return __ocml_native_log10_f32(x); }
692 __DEVICE__
693 inline
694 float __log2f(float x) { return __ocml_native_log2_f32(x); }
695 __DEVICE__
696 inline
697 float __logf(float x) { return __ocml_native_log_f32(x); }
698 __DEVICE__
699 inline
700 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
701 __DEVICE__
702 inline
703 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
704 __DEVICE__
705 inline
706 void __sincosf(float x, float* sptr, float* cptr)
707 {
708  *sptr = __ocml_native_sin_f32(x);
709  *cptr = __ocml_native_cos_f32(x);
710 }
711 __DEVICE__
712 inline
713 float __sinf(float x) { return __ocml_native_sin_f32(x); }
714 __DEVICE__
715 inline
716 float __tanf(float x) { return __ocml_tan_f32(x); }
717 // END INTRINSICS
718 // END FLOAT
719 
720 // BEGIN DOUBLE
721 __DEVICE__
722 inline
723 double abs(double x) { return __ocml_fabs_f64(x); }
724 __DEVICE__
725 inline
726 double acos(double x) { return __ocml_acos_f64(x); }
727 __DEVICE__
728 inline
729 double acosh(double x) { return __ocml_acosh_f64(x); }
730 __DEVICE__
731 inline
732 double asin(double x) { return __ocml_asin_f64(x); }
733 __DEVICE__
734 inline
735 double asinh(double x) { return __ocml_asinh_f64(x); }
736 __DEVICE__
737 inline
738 double atan(double x) { return __ocml_atan_f64(x); }
739 __DEVICE__
740 inline
741 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
742 __DEVICE__
743 inline
744 double atanh(double x) { return __ocml_atanh_f64(x); }
745 __DEVICE__
746 inline
747 double cbrt(double x) { return __ocml_cbrt_f64(x); }
748 __DEVICE__
749 inline
750 double ceil(double x) { return __ocml_ceil_f64(x); }
751 __DEVICE__
752 inline
753 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
754 __DEVICE__
755 inline
756 double cos(double x) { return __ocml_cos_f64(x); }
757 __DEVICE__
758 inline
759 double cosh(double x) { return __ocml_cosh_f64(x); }
760 __DEVICE__
761 inline
762 double cospi(double x) { return __ocml_cospi_f64(x); }
763 __DEVICE__
764 inline
765 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
766 __DEVICE__
767 inline
768 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
769 __DEVICE__
770 inline
771 double erf(double x) { return __ocml_erf_f64(x); }
772 __DEVICE__
773 inline
774 double erfc(double x) { return __ocml_erfc_f64(x); }
775 __DEVICE__
776 inline
777 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
778 __DEVICE__
779 inline
780 double erfcx(double x) { return __ocml_erfcx_f64(x); }
781 __DEVICE__
782 inline
783 double erfinv(double x) { return __ocml_erfinv_f64(x); }
784 __DEVICE__
785 inline
786 double exp(double x) { return __ocml_exp_f64(x); }
787 __DEVICE__
788 inline
789 double exp10(double x) { return __ocml_exp10_f64(x); }
790 __DEVICE__
791 inline
792 double exp2(double x) { return __ocml_exp2_f64(x); }
793 __DEVICE__
794 inline
795 double expm1(double x) { return __ocml_expm1_f64(x); }
796 __DEVICE__
797 inline
798 double fabs(double x) { return __ocml_fabs_f64(x); }
799 __DEVICE__
800 inline
801 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
802 __DEVICE__
803 inline
804 double floor(double x) { return __ocml_floor_f64(x); }
805 __DEVICE__
806 inline
807 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
808 __DEVICE__
809 inline
810 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
811 __DEVICE__
812 inline
813 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
814 __DEVICE__
815 inline
816 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
817 __DEVICE__
818 inline
819 double frexp(double x, int* nptr)
820 {
821  int tmp;
822  double r =
823  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
824  *nptr = tmp;
825 
826  return r;
827 }
828 __DEVICE__
829 inline
830 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
831 __DEVICE__
832 inline
833 int ilogb(double x) { return __ocml_ilogb_f64(x); }
834 __DEVICE__
835 inline
836 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
837 __DEVICE__
838 inline
839 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
840 __DEVICE__
841 inline
842 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
843 __DEVICE__
844 inline
845 double j0(double x) { return __ocml_j0_f64(x); }
846 __DEVICE__
847 inline
848 double j1(double x) { return __ocml_j1_f64(x); }
849 __DEVICE__
850 inline
851 double jn(int n, double x)
852 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
853  // for linear recurrences to get O(log n) steps, but it's unclear if
854  // it'd be beneficial in this case. Placeholder until OCML adds
855  // support.
856  if (n == 0) return j0f(x);
857  if (n == 1) return j1f(x);
858 
859  double x0 = j0f(x);
860  double x1 = j1f(x);
861  for (int i = 1; i < n; ++i) {
862  double x2 = (2 * i) / x * x1 - x0;
863  x0 = x1;
864  x1 = x2;
865  }
866 
867  return x1;
868 }
869 __DEVICE__
870 inline
871 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
872 __DEVICE__
873 inline
874 double lgamma(double x) { return __ocml_lgamma_f64(x); }
875 __DEVICE__
876 inline
877 long long int llrint(double x) { return __ocml_rint_f64(x); }
878 __DEVICE__
879 inline
880 long long int llround(double x) { return __ocml_round_f64(x); }
881 __DEVICE__
882 inline
883 double log(double x) { return __ocml_log_f64(x); }
884 __DEVICE__
885 inline
886 double log10(double x) { return __ocml_log10_f64(x); }
887 __DEVICE__
888 inline
889 double log1p(double x) { return __ocml_log1p_f64(x); }
890 __DEVICE__
891 inline
892 double log2(double x) { return __ocml_log2_f64(x); }
893 __DEVICE__
894 inline
895 double logb(double x) { return __ocml_logb_f64(x); }
896 __DEVICE__
897 inline
898 long int lrint(double x) { return __ocml_rint_f64(x); }
899 __DEVICE__
900 inline
901 long int lround(double x) { return __ocml_round_f64(x); }
902 __DEVICE__
903 inline
904 double modf(double x, double* iptr)
905 {
906  double tmp;
907  double r =
908  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
909  *iptr = tmp;
910 
911  return r;
912 }
913 __DEVICE__
914 inline
915 double nan(const char* tagp)
916 {
917 #if !_WIN32
918  union {
919  double val;
920  struct ieee_double {
921  uint64_t mantissa : 51;
922  uint32_t quiet : 1;
923  uint32_t exponent : 11;
924  uint32_t sign : 1;
925  } bits;
926  static_assert(sizeof(double) == sizeof(ieee_double), "");
927  } tmp;
928 
929  tmp.bits.sign = 0u;
930  tmp.bits.exponent = ~0u;
931  tmp.bits.quiet = 1u;
932  tmp.bits.mantissa = __make_mantissa(tagp);
933 
934  return tmp.val;
935 #else
936  static_assert(sizeof(uint64_t)==sizeof(double));
937  uint64_t val = __make_mantissa(tagp);
938  val |= 0xFFF << 51;
939  return *reinterpret_cast<double*>(&val);
940 #endif
941 }
942 __DEVICE__
943 inline
944 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
945 __DEVICE__
946 inline
947 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
948 __DEVICE__
949 inline
950 double norm(int dim, const double* a)
951 { // TODO: placeholder until OCML adds support.
952  double r = 0;
953  while (dim--) { r += a[0] * a[0]; ++a; }
954 
955  return __ocml_sqrt_f64(r);
956 }
957 __DEVICE__
958 inline
959 double norm3d(double x, double y, double z)
960 {
961  return __ocml_len3_f64(x, y, z);
962 }
963 __DEVICE__
964 inline
965 double norm4d(double x, double y, double z, double w)
966 {
967  return __ocml_len4_f64(x, y, z, w);
968 }
969 __DEVICE__
970 inline
971 double normcdf(double x) { return __ocml_ncdf_f64(x); }
972 __DEVICE__
973 inline
974 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
975 __DEVICE__
976 inline
977 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
978 __DEVICE__
979 inline
980 double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); }
981 __DEVICE__
982 inline
983 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
984 __DEVICE__
985 inline
986 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
987 __DEVICE__
988 inline
989 double remquo(double x, double y, int* quo)
990 {
991  int tmp;
992  double r =
993  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
994  *quo = tmp;
995 
996  return r;
997 }
998 __DEVICE__
999 inline
1000 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
1001 __DEVICE__
1002 inline
1003 double rint(double x) { return __ocml_rint_f64(x); }
1004 __DEVICE__
1005 inline
1006 double rnorm(int dim, const double* a)
1007 { // TODO: placeholder until OCML adds support.
1008  double r = 0;
1009  while (dim--) { r += a[0] * a[0]; ++a; }
1010 
1011  return __ocml_rsqrt_f64(r);
1012 }
1013 __DEVICE__
1014 inline
1015 double rnorm3d(double x, double y, double z)
1016 {
1017  return __ocml_rlen3_f64(x, y, z);
1018 }
1019 __DEVICE__
1020 inline
1021 double rnorm4d(double x, double y, double z, double w)
1022 {
1023  return __ocml_rlen4_f64(x, y, z, w);
1024 }
1025 __DEVICE__
1026 inline
1027 double round(double x) { return __ocml_round_f64(x); }
1028 __DEVICE__
1029 inline
1030 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1031 __DEVICE__
1032 inline
1033 double scalbln(double x, long int n)
1034 {
1035  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1036 }
1037 __DEVICE__
1038 inline
1039 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1040 __DEVICE__
1041 inline
1042 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1043 __DEVICE__
1044 inline
1045 double sin(double x) { return __ocml_sin_f64(x); }
1046 __DEVICE__
1047 inline
1048 void sincos(double x, double* sptr, double* cptr)
1049 {
1050  double tmp;
1051  *sptr =
1052  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1053  *cptr = tmp;
1054 }
1055 __DEVICE__
1056 inline
1057 void sincospi(double x, double* sptr, double* cptr)
1058 {
1059  double tmp;
1060  *sptr = __ocml_sincospi_f64(
1061  x, (__attribute__((address_space(5))) double*) &tmp);
1062  *cptr = tmp;
1063 }
1064 __DEVICE__
1065 inline
1066 double sinh(double x) { return __ocml_sinh_f64(x); }
1067 __DEVICE__
1068 inline
1069 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1070 __DEVICE__
1071 inline
1072 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1073 __DEVICE__
1074 inline
1075 double tan(double x) { return __ocml_tan_f64(x); }
1076 __DEVICE__
1077 inline
1078 double tanh(double x) { return __ocml_tanh_f64(x); }
1079 __DEVICE__
1080 inline
1081 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1082 __DEVICE__
1083 inline
1084 double trunc(double x) { return __ocml_trunc_f64(x); }
1085 __DEVICE__
1086 inline
1087 double y0(double x) { return __ocml_y0_f64(x); }
1088 __DEVICE__
1089 inline
1090 double y1(double x) { return __ocml_y1_f64(x); }
1091 __DEVICE__
1092 inline
1093 double yn(int n, double x)
1094 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1095  // for linear recurrences to get O(log n) steps, but it's unclear if
1096  // it'd be beneficial in this case. Placeholder until OCML adds
1097  // support.
1098  if (n == 0) return j0f(x);
1099  if (n == 1) return j1f(x);
1100 
1101  double x0 = j0f(x);
1102  double x1 = j1f(x);
1103  for (int i = 1; i < n; ++i) {
1104  double x2 = (2 * i) / x * x1 - x0;
1105  x0 = x1;
1106  x1 = x2;
1107  }
1108 
1109  return x1;
1110 }
1111 
1112 // BEGIN INTRINSICS
1113 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1114 __DEVICE__
1115 inline
1116 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1117 #endif
1118 __DEVICE__
1119 inline
1120 double __dadd_rn(double x, double y) { return x + y; }
1121 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1122 __DEVICE__
1123 inline
1124 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1125 __DEVICE__
1126 inline
1127 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1128 __DEVICE__
1129 inline
1130 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1131 #endif
1132 __DEVICE__
1133 inline
1134 double __ddiv_rn(double x, double y) { return x / y; }
1135 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1136 __DEVICE__
1137 inline
1138 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1139 __DEVICE__
1140 inline
1141 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1142 __DEVICE__
1143 inline
1144 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1145 #endif
1146 __DEVICE__
1147 inline
1148 double __dmul_rn(double x, double y) { return x * y; }
1149 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1150 __DEVICE__
1151 inline
1152 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1153 __DEVICE__
1154 inline
1155 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1156 __DEVICE__
1157 inline
1158 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1159 #endif
1160 __DEVICE__
1161 inline
1162 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1163 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1164 __DEVICE__
1165 inline
1166 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1167 __DEVICE__
1168 inline
1169 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1170 __DEVICE__
1171 inline
1172 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1173 #endif
1174 __DEVICE__
1175 inline
1176 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1177 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1178 __DEVICE__
1179 inline
1180 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1181 __DEVICE__
1182 inline
1183 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1184 __DEVICE__
1185 inline
1186 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1187 #endif
1188 __DEVICE__
1189 inline
1190 double __dsub_rn(double x, double y) { return x - y; }
1191 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1192 __DEVICE__
1193 inline
1194 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1195 __DEVICE__
1196 inline
1197 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1198 __DEVICE__
1199 inline
1200 double __fma_rd(double x, double y, double z)
1201 {
1202  return __ocml_fma_rtn_f64(x, y, z);
1203 }
1204 #endif
1205 __DEVICE__
1206 inline
1207 double __fma_rn(double x, double y, double z)
1208 {
1209  return __ocml_fma_f64(x, y, z);
1210 }
1211 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1212 __DEVICE__
1213 inline
1214 double __fma_ru(double x, double y, double z)
1215 {
1216  return __ocml_fma_rtp_f64(x, y, z);
1217 }
1218 __DEVICE__
1219 inline
1220 double __fma_rz(double x, double y, double z)
1221 {
1222  return __ocml_fma_rtz_f64(x, y, z);
1223 }
1224 #endif
1225 // END INTRINSICS
1226 // END DOUBLE
1227 
1228 // BEGIN INTEGER
1229 __DEVICE__
1230 inline
1231 int abs(int x)
1232 {
1233  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1234  return (x ^ sgn) - sgn;
1235 }
1236 __DEVICE__
1237 inline
1238 long labs(long x)
1239 {
1240  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1241  return (x ^ sgn) - sgn;
1242 }
1243 __DEVICE__
1244 inline
1245 long long llabs(long long x)
1246 {
1247  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1248  return (x ^ sgn) - sgn;
1249 }
1250 
1251 #if defined(__cplusplus)
1252  __DEVICE__
1253  inline
1254  long abs(long x) { return labs(x); }
1255  __DEVICE__
1256  inline
1257  long long abs(long long x) { return llabs(x); }
1258 #endif
1259 // END INTEGER
1260 
1261 __DEVICE__
1262 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1263  return __ocml_fma_f16(x, y, z);
1264 }
1265 
1266 __DEVICE__
1267 inline float fma(float x, float y, float z) {
1268  return fmaf(x, y, z);
1269 }
1270 
1271 #pragma push_macro("__DEF_FLOAT_FUN")
1272 #pragma push_macro("__DEF_FLOAT_FUN2")
1273 #pragma push_macro("__DEF_FLOAT_FUN2I")
1274 #pragma push_macro("__HIP_OVERLOAD")
1275 #pragma push_macro("__HIP_OVERLOAD2")
1276 
1277 // __hip_enable_if::type is a type function which returns __T if __B is true.
1278 template<bool __B, class __T = void>
1280 
1281 template <class __T> struct __hip_enable_if<true, __T> {
1282  typedef __T type;
1283 };
1284 
1285 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1286 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1287 // floor(double).
1288 #define __HIP_OVERLOAD1(__retty, __fn) \
1289  template <typename __T> \
1290  __DEVICE__ \
1291  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1292  __retty>::type \
1293  __fn(__T __x) { \
1294  return ::__fn((double)__x); \
1295  }
1296 
1297 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1298 // or integer argument to avoid compilation error due to ambibuity. e.g.
1299 // max(5.0f, 6.0) is resolved with max(double, double).
1300 #define __HIP_OVERLOAD2(__retty, __fn) \
1301  template <typename __T1, typename __T2> \
1302  __DEVICE__ typename __hip_enable_if< \
1303  std::numeric_limits<__T1>::is_specialized && \
1304  std::numeric_limits<__T2>::is_specialized, \
1305  __retty>::type \
1306  __fn(__T1 __x, __T2 __y) { \
1307  return __fn((double)__x, (double)__y); \
1308  }
1309 
1310 // Define cmath functions with float argument and returns float.
1311 #define __DEF_FUN1(retty, func) \
1312 __DEVICE__ \
1313 inline \
1314 float func(float x) \
1315 { \
1316  return func##f(x); \
1317 } \
1318 __HIP_OVERLOAD1(retty, func)
1319 
1320 // Define cmath functions with float argument and returns retty.
1321 #define __DEF_FUNI(retty, func) \
1322 __DEVICE__ \
1323 inline \
1324 retty func(float x) \
1325 { \
1326  return func##f(x); \
1327 } \
1328 __HIP_OVERLOAD1(retty, func)
1329 
1330 // define cmath functions with two float arguments.
1331 #define __DEF_FUN2(retty, func) \
1332 __DEVICE__ \
1333 inline \
1334 float func(float x, float y) \
1335 { \
1336  return func##f(x, y); \
1337 } \
1338 __HIP_OVERLOAD2(retty, func)
1339 
1340 __DEF_FUN1(double, acos)
1341 __DEF_FUN1(double, acosh)
1342 __DEF_FUN1(double, asin)
1343 __DEF_FUN1(double, asinh)
1344 __DEF_FUN1(double, atan)
1345 __DEF_FUN2(double, atan2);
1346 __DEF_FUN1(double, atanh)
1347 __DEF_FUN1(double, cbrt)
1348 __DEF_FUN1(double, ceil)
1349 __DEF_FUN2(double, copysign);
1350 __DEF_FUN1(double, cos)
1351 __DEF_FUN1(double, cosh)
1352 __DEF_FUN1(double, erf)
1353 __DEF_FUN1(double, erfc)
1354 __DEF_FUN1(double, exp)
1355 __DEF_FUN1(double, exp2)
1356 __DEF_FUN1(double, expm1)
1357 __DEF_FUN1(double, fabs)
1358 __DEF_FUN2(double, fdim);
1359 __DEF_FUN1(double, floor)
1360 __DEF_FUN2(double, fmax);
1361 __DEF_FUN2(double, fmin);
1362 __DEF_FUN2(double, fmod);
1363 //__HIP_OVERLOAD1(int, fpclassify)
1364 __DEF_FUN2(double, hypot);
1365 __DEF_FUNI(int, ilogb)
1366 __HIP_OVERLOAD1(bool, isfinite)
1367 __HIP_OVERLOAD2(bool, isgreater);
1368 __HIP_OVERLOAD2(bool, isgreaterequal);
1369 __HIP_OVERLOAD1(bool, isinf);
1370 __HIP_OVERLOAD2(bool, isless);
1371 __HIP_OVERLOAD2(bool, islessequal);
1372 __HIP_OVERLOAD2(bool, islessgreater);
1373 __HIP_OVERLOAD1(bool, isnan);
1374 //__HIP_OVERLOAD1(bool, isnormal)
1375 __HIP_OVERLOAD2(bool, isunordered);
1376 __DEF_FUN1(double, lgamma)
1377 __DEF_FUN1(double, log)
1378 __DEF_FUN1(double, log10)
1379 __DEF_FUN1(double, log1p)
1380 __DEF_FUN1(double, log2)
1381 __DEF_FUN1(double, logb)
1382 __DEF_FUNI(long long, llrint)
1383 __DEF_FUNI(long long, llround)
1384 __DEF_FUNI(long, lrint)
1385 __DEF_FUNI(long, lround)
1386 __DEF_FUN1(double, nearbyint);
1387 __DEF_FUN2(double, nextafter);
1388 __DEF_FUN2(double, pow);
1389 __DEF_FUN2(double, remainder);
1390 __DEF_FUN1(double, rint);
1391 __DEF_FUN1(double, round);
1392 __HIP_OVERLOAD1(bool, signbit)
1393 __DEF_FUN1(double, sin)
1394 __DEF_FUN1(double, sinh)
1395 __DEF_FUN1(double, sqrt)
1396 __DEF_FUN1(double, tan)
1397 __DEF_FUN1(double, tanh)
1398 __DEF_FUN1(double, tgamma)
1399 __DEF_FUN1(double, trunc);
1400 
1401 // define cmath functions with a float and an integer argument.
1402 #define __DEF_FLOAT_FUN2I(func) \
1403 __DEVICE__ \
1404 inline \
1405 float func(float x, int y) \
1406 { \
1407  return func##f(x, y); \
1408 }
1409 __DEF_FLOAT_FUN2I(scalbn)
1410 __DEF_FLOAT_FUN2I(ldexp)
1411 
1412 template<class T>
1413 __DEVICE__ inline T min(T arg1, T arg2) {
1414  return (arg1 < arg2) ? arg1 : arg2;
1415 }
1416 
1417 template<class T>
1418 __DEVICE__ inline T max(T arg1, T arg2) {
1419  return (arg1 > arg2) ? arg1 : arg2;
1420 }
1421 
1422 __DEVICE__ inline int min(int arg1, int arg2) {
1423  return (arg1 < arg2) ? arg1 : arg2;
1424 }
1425 __DEVICE__ inline int max(int arg1, int arg2) {
1426  return (arg1 > arg2) ? arg1 : arg2;
1427 }
1428 
1429 __DEVICE__ inline int min(uint32_t arg1, int arg2) {
1430  return (arg1 < arg2) ? arg1 : arg2;
1431 }
1432 __DEVICE__ inline int max(uint32_t arg1, int arg2) {
1433  return (arg1 > arg2) ? arg1 : arg2;
1434 }
1435 
1436 __DEVICE__
1437 inline
1438 float max(float x, float y) {
1439  return fmaxf(x, y);
1440 }
1441 
1442 __DEVICE__
1443 inline
1444 double max(double x, double y) {
1445  return fmax(x, y);
1446 }
1447 
1448 __DEVICE__
1449 inline
1450 float min(float x, float y) {
1451  return fminf(x, y);
1452 }
1453 
1454 __DEVICE__
1455 inline
1456 double min(double x, double y) {
1457  return fmin(x, y);
1458 }
1459 
1460 __HIP_OVERLOAD2(double, max)
1461 __HIP_OVERLOAD2(double, min)
1462 
1463 __host__ inline static int min(int arg1, int arg2) {
1464  return std::min(arg1, arg2);
1465 }
1466 
1467 __host__ inline static int max(int arg1, int arg2) {
1468  return std::max(arg1, arg2);
1469 }
1470 
1471 __DEVICE__
1472 inline float pow(float base, int iexp) {
1473  return powif(base, iexp);
1474 }
1475 
1476 __DEVICE__
1477 inline double pow(double base, int iexp) {
1478  return powi(base, iexp);
1479 }
1480 
1481 __DEVICE__
1482 inline _Float16 pow(_Float16 base, int iexp) {
1483  return __ocml_pown_f16(base, iexp);
1484 }
1485 
1486 #pragma pop_macro("__DEF_FLOAT_FUN")
1487 #pragma pop_macro("__DEF_FLOAT_FUN2")
1488 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1489 #pragma pop_macro("__HIP_OVERLOAD")
1490 #pragma pop_macro("__HIP_OVERLOAD2")
1491 
1492 #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
1493 
1494 #pragma pop_macro("__DEVICE__")
1495 #pragma pop_macro("__RETURN_TYPE")
1496 
1497 // For backward compatibility.
1498 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1499 // defined after including math_functions.h.
char4
Definition: hip_vector_types.h:1505
__host__
#define __host__
Definition: host_defines.h:59
hip_runtime.h
Contains definitions of APIs for HIP runtime.
host_defines.h
TODO-doc.
__hip_enable_if
Definition: math_functions.h:1279
uchar4
Definition: hip_vector_types.h:1512
short2
Definition: hip_vector_types.h:1518
ushort2
Definition: hip_vector_types.h:1525