HIP: Heterogenous-computing Interface for Portability
math_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 
25 #include "hip_fp16_math_fwd.h"
26 #include "hip_vector_types.h"
27 #include "math_fwd.h"
28 
30 
31 #include <algorithm>
32 
33 // assert.h is only for the host version of assert.
34 // The device version of assert is implemented in hip/hcc_detail/hip_runtime.h.
35 // Users should include hip_runtime.h for the device version of assert.
36 #if !__HIP_DEVICE_COMPILE__
37 #include <assert.h>
38 #endif
39 
40 #include <limits.h>
41 #include <limits>
42 #include <stdint.h>
43 
44 // HCC's own math functions should be included first, otherwise there will
45 // be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
46 #ifdef __HCC__
47 #include "kalmar_math.h"
48 #endif
49 
50 #pragma push_macro("__DEVICE__")
51 #pragma push_macro("__RETURN_TYPE")
52 
53 #ifdef __HCC__
54 #define __DEVICE__ __device__
55 #define __RETURN_TYPE int
56 #else // to be consistent with __clang_cuda_math_forward_declares
57 #define __DEVICE__ static __device__
58 #define __RETURN_TYPE bool
59 #endif
60 
61 __DEVICE__
62 inline
63 uint64_t __make_mantissa_base8(const char* tagp)
64 {
65  uint64_t r = 0;
66  while (tagp) {
67  char tmp = *tagp;
68 
69  if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
70  else return 0;
71 
72  ++tagp;
73  }
74 
75  return r;
76 }
77 
78 __DEVICE__
79 inline
80 uint64_t __make_mantissa_base10(const char* tagp)
81 {
82  uint64_t r = 0;
83  while (tagp) {
84  char tmp = *tagp;
85 
86  if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
87  else return 0;
88 
89  ++tagp;
90  }
91 
92  return r;
93 }
94 
95 __DEVICE__
96 inline
97 uint64_t __make_mantissa_base16(const char* tagp)
98 {
99  uint64_t r = 0;
100  while (tagp) {
101  char tmp = *tagp;
102 
103  if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
104  else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
105  else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
106  else return 0;
107 
108  ++tagp;
109  }
110 
111  return r;
112 }
113 
114 __DEVICE__
115 inline
116 uint64_t __make_mantissa(const char* tagp)
117 {
118  if (!tagp) return 0u;
119 
120  if (*tagp == '0') {
121  ++tagp;
122 
123  if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
124  else return __make_mantissa_base8(tagp);
125  }
126 
127  return __make_mantissa_base10(tagp);
128 }
129 
130 // DOT FUNCTIONS
131 #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
132 __DEVICE__
133 inline
134 int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
135  return __ockl_sdot2(a.data, b.data, c, saturate);
136 }
137 __DEVICE__
138 inline
139 uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
140  return __ockl_udot2(a.data, b.data, c, saturate);
141 }
142 __DEVICE__
143 inline
144 int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
145  return __ockl_sdot4(a.data, b.data, c, saturate);
146 }
147 __DEVICE__
148 inline
149 uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
150  return __ockl_udot4(a.data, b.data, c, saturate);
151 }
152 __DEVICE__
153 inline
154 int amd_mixed_dot(int a, int b, int c, bool saturate) {
155  return __ockl_sdot8(a, b, c, saturate);
156 }
157 __DEVICE__
158 inline
159 uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
160  return __ockl_udot8(a, b, c, saturate);
161 }
162 #endif
163 
164 // BEGIN FLOAT
165 __DEVICE__
166 inline
167 float abs(float x) { return __ocml_fabs_f32(x); }
168 __DEVICE__
169 inline
170 float acosf(float x) { return __ocml_acos_f32(x); }
171 __DEVICE__
172 inline
173 float acoshf(float x) { return __ocml_acosh_f32(x); }
174 __DEVICE__
175 inline
176 float asinf(float x) { return __ocml_asin_f32(x); }
177 __DEVICE__
178 inline
179 float asinhf(float x) { return __ocml_asinh_f32(x); }
180 __DEVICE__
181 inline
182 float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
183 __DEVICE__
184 inline
185 float atanf(float x) { return __ocml_atan_f32(x); }
186 __DEVICE__
187 inline
188 float atanhf(float x) { return __ocml_atanh_f32(x); }
189 __DEVICE__
190 inline
191 float cbrtf(float x) { return __ocml_cbrt_f32(x); }
192 __DEVICE__
193 inline
194 float ceilf(float x) { return __ocml_ceil_f32(x); }
195 __DEVICE__
196 inline
197 float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
198 __DEVICE__
199 inline
200 float cosf(float x) { return __ocml_cos_f32(x); }
201 __DEVICE__
202 inline
203 float coshf(float x) { return __ocml_cosh_f32(x); }
204 __DEVICE__
205 inline
206 float cospif(float x) { return __ocml_cospi_f32(x); }
207 __DEVICE__
208 inline
209 float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
210 __DEVICE__
211 inline
212 float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
213 __DEVICE__
214 inline
215 float erfcf(float x) { return __ocml_erfc_f32(x); }
216 __DEVICE__
217 inline
218 float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
219 __DEVICE__
220 inline
221 float erfcxf(float x) { return __ocml_erfcx_f32(x); }
222 __DEVICE__
223 inline
224 float erff(float x) { return __ocml_erf_f32(x); }
225 __DEVICE__
226 inline
227 float erfinvf(float x) { return __ocml_erfinv_f32(x); }
228 __DEVICE__
229 inline
230 float exp10f(float x) { return __ocml_exp10_f32(x); }
231 __DEVICE__
232 inline
233 float exp2f(float x) { return __ocml_exp2_f32(x); }
234 __DEVICE__
235 inline
236 float expf(float x) { return __ocml_exp_f32(x); }
237 __DEVICE__
238 inline
239 float expm1f(float x) { return __ocml_expm1_f32(x); }
240 __DEVICE__
241 inline
242 float fabsf(float x) { return __ocml_fabs_f32(x); }
243 __DEVICE__
244 inline
245 float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
246 __DEVICE__
247 inline
248 float fdividef(float x, float y) { return x / y; }
249 __DEVICE__
250 inline
251 float floorf(float x) { return __ocml_floor_f32(x); }
252 __DEVICE__
253 inline
254 float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
255 __DEVICE__
256 inline
257 float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
258 __DEVICE__
259 inline
260 float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
261 __DEVICE__
262 inline
263 float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
264 __DEVICE__
265 inline
266 float frexpf(float x, int* nptr)
267 {
268  int tmp;
269  float r =
270  __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
271  *nptr = tmp;
272 
273  return r;
274 }
275 __DEVICE__
276 inline
277 float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
278 __DEVICE__
279 inline
280 int ilogbf(float x) { return __ocml_ilogb_f32(x); }
281 __DEVICE__
282 inline
283 __RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
284 __DEVICE__
285 inline
286 __RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
287 __DEVICE__
288 inline
289 __RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
290 __DEVICE__
291 inline
292 float j0f(float x) { return __ocml_j0_f32(x); }
293 __DEVICE__
294 inline
295 float j1f(float x) { return __ocml_j1_f32(x); }
296 __DEVICE__
297 inline
298 float jnf(int n, float x)
299 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
300  // for linear recurrences to get O(log n) steps, but it's unclear if
301  // it'd be beneficial in this case.
302  if (n == 0) return j0f(x);
303  if (n == 1) return j1f(x);
304 
305  float x0 = j0f(x);
306  float x1 = j1f(x);
307  for (int i = 1; i < n; ++i) {
308  float x2 = (2 * i) / x * x1 - x0;
309  x0 = x1;
310  x1 = x2;
311  }
312 
313  return x1;
314 }
315 __DEVICE__
316 inline
317 float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
318 __DEVICE__
319 inline
320 float lgammaf(float x) { return __ocml_lgamma_f32(x); }
321 __DEVICE__
322 inline
323 long long int llrintf(float x) { return __ocml_rint_f32(x); }
324 __DEVICE__
325 inline
326 long long int llroundf(float x) { return __ocml_round_f32(x); }
327 __DEVICE__
328 inline
329 float log10f(float x) { return __ocml_log10_f32(x); }
330 __DEVICE__
331 inline
332 float log1pf(float x) { return __ocml_log1p_f32(x); }
333 __DEVICE__
334 inline
335 float log2f(float x) { return __ocml_log2_f32(x); }
336 __DEVICE__
337 inline
338 float logbf(float x) { return __ocml_logb_f32(x); }
339 __DEVICE__
340 inline
341 float logf(float x) { return __ocml_log_f32(x); }
342 __DEVICE__
343 inline
344 long int lrintf(float x) { return __ocml_rint_f32(x); }
345 __DEVICE__
346 inline
347 long int lroundf(float x) { return __ocml_round_f32(x); }
348 __DEVICE__
349 inline
350 float modff(float x, float* iptr)
351 {
352  float tmp;
353  float r =
354  __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
355  *iptr = tmp;
356 
357  return r;
358 }
359 __DEVICE__
360 inline
361 float nanf(const char* tagp)
362 {
363  union {
364  float val;
365  struct ieee_float {
366  uint32_t mantissa : 22;
367  uint32_t quiet : 1;
368  uint32_t exponent : 8;
369  uint32_t sign : 1;
370  } bits;
371 
372  static_assert(sizeof(float) == sizeof(ieee_float), "");
373  } tmp;
374 
375  tmp.bits.sign = 0u;
376  tmp.bits.exponent = ~0u;
377  tmp.bits.quiet = 1u;
378  tmp.bits.mantissa = __make_mantissa(tagp);
379 
380  return tmp.val;
381 }
382 __DEVICE__
383 inline
384 float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
385 __DEVICE__
386 inline
387 float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
388 __DEVICE__
389 inline
390 float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
391 __DEVICE__
392 inline
393 float norm4df(float x, float y, float z, float w)
394 {
395  return __ocml_len4_f32(x, y, z, w);
396 }
397 __DEVICE__
398 inline
399 float normcdff(float x) { return __ocml_ncdf_f32(x); }
400 __DEVICE__
401 inline
402 float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
403 __DEVICE__
404 inline
405 float normf(int dim, const float* a)
406 { // TODO: placeholder until OCML adds support.
407  float r = 0;
408  while (dim--) { r += a[0] * a[0]; ++a; }
409 
410  return __ocml_sqrt_f32(r);
411 }
412 __DEVICE__
413 inline
414 float powf(float x, float y) { return __ocml_pow_f32(x, y); }
415 __DEVICE__
416 inline
417 float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
418 __DEVICE__
419 inline
420 float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
421 __DEVICE__
422 inline
423 float remquof(float x, float y, int* quo)
424 {
425  int tmp;
426  float r =
427  __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
428  *quo = tmp;
429 
430  return r;
431 }
432 __DEVICE__
433 inline
434 float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
435 __DEVICE__
436 inline
437 float rintf(float x) { return __ocml_rint_f32(x); }
438 __DEVICE__
439 inline
440 float rnorm3df(float x, float y, float z)
441 {
442  return __ocml_rlen3_f32(x, y, z);
443 }
444 
445 __DEVICE__
446 inline
447 float rnorm4df(float x, float y, float z, float w)
448 {
449  return __ocml_rlen4_f32(x, y, z, w);
450 }
451 __DEVICE__
452 inline
453 float rnormf(int dim, const float* a)
454 { // TODO: placeholder until OCML adds support.
455  float r = 0;
456  while (dim--) { r += a[0] * a[0]; ++a; }
457 
458  return __ocml_rsqrt_f32(r);
459 }
460 __DEVICE__
461 inline
462 float roundf(float x) { return __ocml_round_f32(x); }
463 __DEVICE__
464 inline
465 float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
466 __DEVICE__
467 inline
468 float scalblnf(float x, long int n)
469 {
470  return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
471 }
472 __DEVICE__
473 inline
474 float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
475 __DEVICE__
476 inline
477 __RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
478 __DEVICE__
479 inline
480 void sincosf(float x, float* sptr, float* cptr)
481 {
482  float tmp;
483 
484  *sptr =
485  __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
486  *cptr = tmp;
487 }
488 __DEVICE__
489 inline
490 void sincospif(float x, float* sptr, float* cptr)
491 {
492  float tmp;
493 
494  *sptr =
495  __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
496  *cptr = tmp;
497 }
498 __DEVICE__
499 inline
500 float sinf(float x) { return __ocml_sin_f32(x); }
501 __DEVICE__
502 inline
503 float sinhf(float x) { return __ocml_sinh_f32(x); }
504 __DEVICE__
505 inline
506 float sinpif(float x) { return __ocml_sinpi_f32(x); }
507 __DEVICE__
508 inline
509 float sqrtf(float x) { return __ocml_sqrt_f32(x); }
510 __DEVICE__
511 inline
512 float tanf(float x) { return __ocml_tan_f32(x); }
513 __DEVICE__
514 inline
515 float tanhf(float x) { return __ocml_tanh_f32(x); }
516 __DEVICE__
517 inline
518 float tgammaf(float x) { return __ocml_tgamma_f32(x); }
519 __DEVICE__
520 inline
521 float truncf(float x) { return __ocml_trunc_f32(x); }
522 __DEVICE__
523 inline
524 float y0f(float x) { return __ocml_y0_f32(x); }
525 __DEVICE__
526 inline
527 float y1f(float x) { return __ocml_y1_f32(x); }
528 __DEVICE__
529 inline
530 float ynf(int n, float x)
531 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
532  // for linear recurrences to get O(log n) steps, but it's unclear if
533  // it'd be beneficial in this case. Placeholder until OCML adds
534  // support.
535  if (n == 0) return y0f(x);
536  if (n == 1) return y1f(x);
537 
538  float x0 = y0f(x);
539  float x1 = y1f(x);
540  for (int i = 1; i < n; ++i) {
541  float x2 = (2 * i) / x * x1 - x0;
542  x0 = x1;
543  x1 = x2;
544  }
545 
546  return x1;
547 }
548 
549 // BEGIN INTRINSICS
550 __DEVICE__
551 inline
552 float __cosf(float x) { return __ocml_native_cos_f32(x); }
553 __DEVICE__
554 inline
555 float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
556 __DEVICE__
557 inline
558 float __expf(float x) { return __ocml_native_exp_f32(x); }
559 #if defined OCML_BASIC_ROUNDED_OPERATIONS
560 __DEVICE__
561 inline
562 float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
563 #endif
564 __DEVICE__
565 inline
566 float __fadd_rn(float x, float y) { return x + y; }
567 #if defined OCML_BASIC_ROUNDED_OPERATIONS
568 __DEVICE__
569 inline
570 float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
571 __DEVICE__
572 inline
573 float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
574 __DEVICE__
575 inline
576 float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
577 #endif
578 __DEVICE__
579 inline
580 float __fdiv_rn(float x, float y) { return x / y; }
581 #if defined OCML_BASIC_ROUNDED_OPERATIONS
582 __DEVICE__
583 inline
584 float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
585 __DEVICE__
586 inline
587 float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
588 #endif
589 __DEVICE__
590 inline
591 float __fdividef(float x, float y) { return x / y; }
592 #if defined OCML_BASIC_ROUNDED_OPERATIONS
593 __DEVICE__
594 inline
595 float __fmaf_rd(float x, float y, float z)
596 {
597  return __ocml_fma_rtn_f32(x, y, z);
598 }
599 #endif
600 __DEVICE__
601 inline
602 float __fmaf_rn(float x, float y, float z)
603 {
604  return __ocml_fma_f32(x, y, z);
605 }
606 #if defined OCML_BASIC_ROUNDED_OPERATIONS
607 __DEVICE__
608 inline
609 float __fmaf_ru(float x, float y, float z)
610 {
611  return __ocml_fma_rtp_f32(x, y, z);
612 }
613 __DEVICE__
614 inline
615 float __fmaf_rz(float x, float y, float z)
616 {
617  return __ocml_fma_rtz_f32(x, y, z);
618 }
619 __DEVICE__
620 inline
621 float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
622 #endif
623 __DEVICE__
624 inline
625 float __fmul_rn(float x, float y) { return x * y; }
626 #if defined OCML_BASIC_ROUNDED_OPERATIONS
627 __DEVICE__
628 inline
629 float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
630 __DEVICE__
631 inline
632 float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
633 __DEVICE__
634 inline
635 float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
636 #endif
637 __DEVICE__
638 inline
639 float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
640 #if defined OCML_BASIC_ROUNDED_OPERATIONS
641 __DEVICE__
642 inline
643 float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
644 __DEVICE__
645 inline
646 float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
647 #endif
648 __DEVICE__
649 inline
650 float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
651 #if defined OCML_BASIC_ROUNDED_OPERATIONS
652 __DEVICE__
653 inline
654 float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
655 #endif
656 __DEVICE__
657 inline
658 float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
659 #if defined OCML_BASIC_ROUNDED_OPERATIONS
660 __DEVICE__
661 inline
662 float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
663 __DEVICE__
664 inline
665 float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
666 __DEVICE__
667 inline
668 float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
669 #endif
670 __DEVICE__
671 inline
672 float __fsub_rn(float x, float y) { return x - y; }
673 #if defined OCML_BASIC_ROUNDED_OPERATIONS
674 __DEVICE__
675 inline
676 float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
677 __DEVICE__
678 inline
679 float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
680 #endif
681 __DEVICE__
682 inline
683 float __log10f(float x) { return __ocml_native_log10_f32(x); }
684 __DEVICE__
685 inline
686 float __log2f(float x) { return __ocml_native_log2_f32(x); }
687 __DEVICE__
688 inline
689 float __logf(float x) { return __ocml_native_log_f32(x); }
690 __DEVICE__
691 inline
692 float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
693 __DEVICE__
694 inline
695 float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
696 __DEVICE__
697 inline
698 void __sincosf(float x, float* sptr, float* cptr)
699 {
700  *sptr = __ocml_native_sin_f32(x);
701  *cptr = __ocml_native_cos_f32(x);
702 }
703 __DEVICE__
704 inline
705 float __sinf(float x) { return __ocml_native_sin_f32(x); }
706 __DEVICE__
707 inline
708 float __tanf(float x) { return __ocml_tan_f32(x); }
709 // END INTRINSICS
710 // END FLOAT
711 
712 // BEGIN DOUBLE
713 __DEVICE__
714 inline
715 double abs(double x) { return __ocml_fabs_f64(x); }
716 __DEVICE__
717 inline
718 double acos(double x) { return __ocml_acos_f64(x); }
719 __DEVICE__
720 inline
721 double acosh(double x) { return __ocml_acosh_f64(x); }
722 __DEVICE__
723 inline
724 double asin(double x) { return __ocml_asin_f64(x); }
725 __DEVICE__
726 inline
727 double asinh(double x) { return __ocml_asinh_f64(x); }
728 __DEVICE__
729 inline
730 double atan(double x) { return __ocml_atan_f64(x); }
731 __DEVICE__
732 inline
733 double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
734 __DEVICE__
735 inline
736 double atanh(double x) { return __ocml_atanh_f64(x); }
737 __DEVICE__
738 inline
739 double cbrt(double x) { return __ocml_cbrt_f64(x); }
740 __DEVICE__
741 inline
742 double ceil(double x) { return __ocml_ceil_f64(x); }
743 __DEVICE__
744 inline
745 double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
746 __DEVICE__
747 inline
748 double cos(double x) { return __ocml_cos_f64(x); }
749 __DEVICE__
750 inline
751 double cosh(double x) { return __ocml_cosh_f64(x); }
752 __DEVICE__
753 inline
754 double cospi(double x) { return __ocml_cospi_f64(x); }
755 __DEVICE__
756 inline
757 double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
758 __DEVICE__
759 inline
760 double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
761 __DEVICE__
762 inline
763 double erf(double x) { return __ocml_erf_f64(x); }
764 __DEVICE__
765 inline
766 double erfc(double x) { return __ocml_erfc_f64(x); }
767 __DEVICE__
768 inline
769 double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
770 __DEVICE__
771 inline
772 double erfcx(double x) { return __ocml_erfcx_f64(x); }
773 __DEVICE__
774 inline
775 double erfinv(double x) { return __ocml_erfinv_f64(x); }
776 __DEVICE__
777 inline
778 double exp(double x) { return __ocml_exp_f64(x); }
779 __DEVICE__
780 inline
781 double exp10(double x) { return __ocml_exp10_f64(x); }
782 __DEVICE__
783 inline
784 double exp2(double x) { return __ocml_exp2_f64(x); }
785 __DEVICE__
786 inline
787 double expm1(double x) { return __ocml_expm1_f64(x); }
788 __DEVICE__
789 inline
790 double fabs(double x) { return __ocml_fabs_f64(x); }
791 __DEVICE__
792 inline
793 double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
794 __DEVICE__
795 inline
796 double floor(double x) { return __ocml_floor_f64(x); }
797 __DEVICE__
798 inline
799 double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
800 __DEVICE__
801 inline
802 double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
803 __DEVICE__
804 inline
805 double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
806 __DEVICE__
807 inline
808 double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
809 __DEVICE__
810 inline
811 double frexp(double x, int* nptr)
812 {
813  int tmp;
814  double r =
815  __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
816  *nptr = tmp;
817 
818  return r;
819 }
820 __DEVICE__
821 inline
822 double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
823 __DEVICE__
824 inline
825 int ilogb(double x) { return __ocml_ilogb_f64(x); }
826 __DEVICE__
827 inline
828 __RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
829 __DEVICE__
830 inline
831 __RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
832 __DEVICE__
833 inline
834 __RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
835 __DEVICE__
836 inline
837 double j0(double x) { return __ocml_j0_f64(x); }
838 __DEVICE__
839 inline
840 double j1(double x) { return __ocml_j1_f64(x); }
841 __DEVICE__
842 inline
843 double jn(int n, double x)
844 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
845  // for linear recurrences to get O(log n) steps, but it's unclear if
846  // it'd be beneficial in this case. Placeholder until OCML adds
847  // support.
848  if (n == 0) return j0f(x);
849  if (n == 1) return j1f(x);
850 
851  double x0 = j0f(x);
852  double x1 = j1f(x);
853  for (int i = 1; i < n; ++i) {
854  double x2 = (2 * i) / x * x1 - x0;
855  x0 = x1;
856  x1 = x2;
857  }
858 
859  return x1;
860 }
861 __DEVICE__
862 inline
863 double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
864 __DEVICE__
865 inline
866 double lgamma(double x) { return __ocml_lgamma_f64(x); }
867 __DEVICE__
868 inline
869 long long int llrint(double x) { return __ocml_rint_f64(x); }
870 __DEVICE__
871 inline
872 long long int llround(double x) { return __ocml_round_f64(x); }
873 __DEVICE__
874 inline
875 double log(double x) { return __ocml_log_f64(x); }
876 __DEVICE__
877 inline
878 double log10(double x) { return __ocml_log10_f64(x); }
879 __DEVICE__
880 inline
881 double log1p(double x) { return __ocml_log1p_f64(x); }
882 __DEVICE__
883 inline
884 double log2(double x) { return __ocml_log2_f64(x); }
885 __DEVICE__
886 inline
887 double logb(double x) { return __ocml_logb_f64(x); }
888 __DEVICE__
889 inline
890 long int lrint(double x) { return __ocml_rint_f64(x); }
891 __DEVICE__
892 inline
893 long int lround(double x) { return __ocml_round_f64(x); }
894 __DEVICE__
895 inline
896 double modf(double x, double* iptr)
897 {
898  double tmp;
899  double r =
900  __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
901  *iptr = tmp;
902 
903  return r;
904 }
905 __DEVICE__
906 inline
907 double nan(const char* tagp)
908 {
909 #if !_WIN32
910  union {
911  double val;
912  struct ieee_double {
913  uint64_t mantissa : 51;
914  uint32_t quiet : 1;
915  uint32_t exponent : 11;
916  uint32_t sign : 1;
917  } bits;
918  static_assert(sizeof(double) == sizeof(ieee_double), "");
919  } tmp;
920 
921  tmp.bits.sign = 0u;
922  tmp.bits.exponent = ~0u;
923  tmp.bits.quiet = 1u;
924  tmp.bits.mantissa = __make_mantissa(tagp);
925 
926  return tmp.val;
927 #else
928  static_assert(sizeof(uint64_t)==sizeof(double));
929  uint64_t val = __make_mantissa(tagp);
930  val |= 0xFFF << 51;
931  return *reinterpret_cast<double*>(&val);
932 #endif
933 }
934 __DEVICE__
935 inline
936 double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
937 __DEVICE__
938 inline
939 double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
940 __DEVICE__
941 inline
942 double norm(int dim, const double* a)
943 { // TODO: placeholder until OCML adds support.
944  double r = 0;
945  while (dim--) { r += a[0] * a[0]; ++a; }
946 
947  return __ocml_sqrt_f64(r);
948 }
949 __DEVICE__
950 inline
951 double norm3d(double x, double y, double z)
952 {
953  return __ocml_len3_f64(x, y, z);
954 }
955 __DEVICE__
956 inline
957 double norm4d(double x, double y, double z, double w)
958 {
959  return __ocml_len4_f64(x, y, z, w);
960 }
961 __DEVICE__
962 inline
963 double normcdf(double x) { return __ocml_ncdf_f64(x); }
964 __DEVICE__
965 inline
966 double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
967 __DEVICE__
968 inline
969 double pow(double x, double y) { return __ocml_pow_f64(x, y); }
970 __DEVICE__
971 inline
972 double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
973 __DEVICE__
974 inline
975 double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
976 __DEVICE__
977 inline
978 double remquo(double x, double y, int* quo)
979 {
980  int tmp;
981  double r =
982  __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
983  *quo = tmp;
984 
985  return r;
986 }
987 __DEVICE__
988 inline
989 double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
990 __DEVICE__
991 inline
992 double rint(double x) { return __ocml_rint_f64(x); }
993 __DEVICE__
994 inline
995 double rnorm(int dim, const double* a)
996 { // TODO: placeholder until OCML adds support.
997  double r = 0;
998  while (dim--) { r += a[0] * a[0]; ++a; }
999 
1000  return __ocml_rsqrt_f64(r);
1001 }
1002 __DEVICE__
1003 inline
1004 double rnorm3d(double x, double y, double z)
1005 {
1006  return __ocml_rlen3_f64(x, y, z);
1007 }
1008 __DEVICE__
1009 inline
1010 double rnorm4d(double x, double y, double z, double w)
1011 {
1012  return __ocml_rlen4_f64(x, y, z, w);
1013 }
1014 __DEVICE__
1015 inline
1016 double round(double x) { return __ocml_round_f64(x); }
1017 __DEVICE__
1018 inline
1019 double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1020 __DEVICE__
1021 inline
1022 double scalbln(double x, long int n)
1023 {
1024  return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1025 }
1026 __DEVICE__
1027 inline
1028 double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1029 __DEVICE__
1030 inline
1031 __RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1032 __DEVICE__
1033 inline
1034 double sin(double x) { return __ocml_sin_f64(x); }
1035 __DEVICE__
1036 inline
1037 void sincos(double x, double* sptr, double* cptr)
1038 {
1039  double tmp;
1040  *sptr =
1041  __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1042  *cptr = tmp;
1043 }
1044 __DEVICE__
1045 inline
1046 void sincospi(double x, double* sptr, double* cptr)
1047 {
1048  double tmp;
1049  *sptr = __ocml_sincospi_f64(
1050  x, (__attribute__((address_space(5))) double*) &tmp);
1051  *cptr = tmp;
1052 }
1053 __DEVICE__
1054 inline
1055 double sinh(double x) { return __ocml_sinh_f64(x); }
1056 __DEVICE__
1057 inline
1058 double sinpi(double x) { return __ocml_sinpi_f64(x); }
1059 __DEVICE__
1060 inline
1061 double sqrt(double x) { return __ocml_sqrt_f64(x); }
1062 __DEVICE__
1063 inline
1064 double tan(double x) { return __ocml_tan_f64(x); }
1065 __DEVICE__
1066 inline
1067 double tanh(double x) { return __ocml_tanh_f64(x); }
1068 __DEVICE__
1069 inline
1070 double tgamma(double x) { return __ocml_tgamma_f64(x); }
1071 __DEVICE__
1072 inline
1073 double trunc(double x) { return __ocml_trunc_f64(x); }
1074 __DEVICE__
1075 inline
1076 double y0(double x) { return __ocml_y0_f64(x); }
1077 __DEVICE__
1078 inline
1079 double y1(double x) { return __ocml_y1_f64(x); }
1080 __DEVICE__
1081 inline
1082 double yn(int n, double x)
1083 { // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1084  // for linear recurrences to get O(log n) steps, but it's unclear if
1085  // it'd be beneficial in this case. Placeholder until OCML adds
1086  // support.
1087  if (n == 0) return j0f(x);
1088  if (n == 1) return j1f(x);
1089 
1090  double x0 = j0f(x);
1091  double x1 = j1f(x);
1092  for (int i = 1; i < n; ++i) {
1093  double x2 = (2 * i) / x * x1 - x0;
1094  x0 = x1;
1095  x1 = x2;
1096  }
1097 
1098  return x1;
1099 }
1100 
1101 // BEGIN INTRINSICS
1102 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1103 __DEVICE__
1104 inline
1105 double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1106 #endif
1107 __DEVICE__
1108 inline
1109 double __dadd_rn(double x, double y) { return x + y; }
1110 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1111 __DEVICE__
1112 inline
1113 double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1114 __DEVICE__
1115 inline
1116 double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1117 __DEVICE__
1118 inline
1119 double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1120 #endif
1121 __DEVICE__
1122 inline
1123 double __ddiv_rn(double x, double y) { return x / y; }
1124 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1125 __DEVICE__
1126 inline
1127 double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1128 __DEVICE__
1129 inline
1130 double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1131 __DEVICE__
1132 inline
1133 double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1134 #endif
1135 __DEVICE__
1136 inline
1137 double __dmul_rn(double x, double y) { return x * y; }
1138 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1139 __DEVICE__
1140 inline
1141 double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1142 __DEVICE__
1143 inline
1144 double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1145 __DEVICE__
1146 inline
1147 double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
1148 #endif
1149 __DEVICE__
1150 inline
1151 double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
1152 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1153 __DEVICE__
1154 inline
1155 double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
1156 __DEVICE__
1157 inline
1158 double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
1159 __DEVICE__
1160 inline
1161 double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1162 #endif
1163 __DEVICE__
1164 inline
1165 double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1166 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1167 __DEVICE__
1168 inline
1169 double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1170 __DEVICE__
1171 inline
1172 double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1173 __DEVICE__
1174 inline
1175 double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1176 #endif
1177 __DEVICE__
1178 inline
1179 double __dsub_rn(double x, double y) { return x - y; }
1180 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1181 __DEVICE__
1182 inline
1183 double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1184 __DEVICE__
1185 inline
1186 double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1187 __DEVICE__
1188 inline
1189 double __fma_rd(double x, double y, double z)
1190 {
1191  return __ocml_fma_rtn_f64(x, y, z);
1192 }
1193 #endif
1194 __DEVICE__
1195 inline
1196 double __fma_rn(double x, double y, double z)
1197 {
1198  return __ocml_fma_f64(x, y, z);
1199 }
1200 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1201 __DEVICE__
1202 inline
1203 double __fma_ru(double x, double y, double z)
1204 {
1205  return __ocml_fma_rtp_f64(x, y, z);
1206 }
1207 __DEVICE__
1208 inline
1209 double __fma_rz(double x, double y, double z)
1210 {
1211  return __ocml_fma_rtz_f64(x, y, z);
1212 }
1213 #endif
1214 // END INTRINSICS
1215 // END DOUBLE
1216 
1217 // BEGIN INTEGER
1218 __DEVICE__
1219 inline
1220 int abs(int x)
1221 {
1222  int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1223  return (x ^ sgn) - sgn;
1224 }
1225 __DEVICE__
1226 inline
1227 long labs(long x)
1228 {
1229  long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1230  return (x ^ sgn) - sgn;
1231 }
1232 __DEVICE__
1233 inline
1234 long long llabs(long long x)
1235 {
1236  long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1237  return (x ^ sgn) - sgn;
1238 }
1239 
1240 #if defined(__cplusplus)
1241  __DEVICE__
1242  inline
1243  long abs(long x) { return labs(x); }
1244  __DEVICE__
1245  inline
1246  long long abs(long long x) { return llabs(x); }
1247 #endif
1248 // END INTEGER
1249 
1250 __DEVICE__
1251 inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1252  return __ocml_fma_f16(x, y, z);
1253 }
1254 
1255 __DEVICE__
1256 inline float fma(float x, float y, float z) {
1257  return fmaf(x, y, z);
1258 }
1259 
1260 #pragma push_macro("__DEF_FLOAT_FUN")
1261 #pragma push_macro("__DEF_FLOAT_FUN2")
1262 #pragma push_macro("__DEF_FLOAT_FUN2I")
1263 #pragma push_macro("__HIP_OVERLOAD")
1264 #pragma push_macro("__HIP_OVERLOAD2")
1265 
1266 // __hip_enable_if::type is a type function which returns __T if __B is true.
1267 template<bool __B, class __T = void>
1269 
1270 template <class __T> struct __hip_enable_if<true, __T> {
1271  typedef __T type;
1272 };
1273 
1274 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1275 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1276 // floor(double).
1277 #define __HIP_OVERLOAD1(__retty, __fn) \
1278  template <typename __T> \
1279  __DEVICE__ \
1280  typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1281  __retty>::type \
1282  __fn(__T __x) { \
1283  return ::__fn((double)__x); \
1284  }
1285 
1286 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1287 // or integer argument to avoid compilation error due to ambibuity. e.g.
1288 // max(5.0f, 6.0) is resolved with max(double, double).
1289 #define __HIP_OVERLOAD2(__retty, __fn) \
1290  template <typename __T1, typename __T2> \
1291  __DEVICE__ typename __hip_enable_if< \
1292  std::numeric_limits<__T1>::is_specialized && \
1293  std::numeric_limits<__T2>::is_specialized, \
1294  __retty>::type \
1295  __fn(__T1 __x, __T2 __y) { \
1296  return __fn((double)__x, (double)__y); \
1297  }
1298 
1299 // Define cmath functions with float argument and returns float.
1300 #define __DEF_FUN1(retty, func) \
1301 __DEVICE__ \
1302 inline \
1303 float func(float x) \
1304 { \
1305  return func##f(x); \
1306 } \
1307 __HIP_OVERLOAD1(retty, func)
1308 
1309 // Define cmath functions with float argument and returns retty.
1310 #define __DEF_FUNI(retty, func) \
1311 __DEVICE__ \
1312 inline \
1313 retty func(float x) \
1314 { \
1315  return func##f(x); \
1316 } \
1317 __HIP_OVERLOAD1(retty, func)
1318 
1319 // define cmath functions with two float arguments.
1320 #define __DEF_FUN2(retty, func) \
1321 __DEVICE__ \
1322 inline \
1323 float func(float x, float y) \
1324 { \
1325  return func##f(x, y); \
1326 } \
1327 __HIP_OVERLOAD2(retty, func)
1328 
1329 __DEF_FUN1(double, acos)
1330 __DEF_FUN1(double, acosh)
1331 __DEF_FUN1(double, asin)
1332 __DEF_FUN1(double, asinh)
1333 __DEF_FUN1(double, atan)
1334 __DEF_FUN2(double, atan2);
1335 __DEF_FUN1(double, atanh)
1336 __DEF_FUN1(double, cbrt)
1337 __DEF_FUN1(double, ceil)
1338 __DEF_FUN2(double, copysign);
1339 __DEF_FUN1(double, cos)
1340 __DEF_FUN1(double, cosh)
1341 __DEF_FUN1(double, erf)
1342 __DEF_FUN1(double, erfc)
1343 __DEF_FUN1(double, exp)
1344 __DEF_FUN1(double, exp2)
1345 __DEF_FUN1(double, expm1)
1346 __DEF_FUN1(double, fabs)
1347 __DEF_FUN2(double, fdim);
1348 __DEF_FUN1(double, floor)
1349 __DEF_FUN2(double, fmax);
1350 __DEF_FUN2(double, fmin);
1351 __DEF_FUN2(double, fmod);
1352 //__HIP_OVERLOAD1(int, fpclassify)
1353 __DEF_FUN2(double, hypot);
1354 __DEF_FUNI(int, ilogb)
1355 __HIP_OVERLOAD1(bool, isfinite)
1356 __HIP_OVERLOAD2(bool, isgreater);
1357 __HIP_OVERLOAD2(bool, isgreaterequal);
1358 __HIP_OVERLOAD1(bool, isinf);
1359 __HIP_OVERLOAD2(bool, isless);
1360 __HIP_OVERLOAD2(bool, islessequal);
1361 __HIP_OVERLOAD2(bool, islessgreater);
1362 __HIP_OVERLOAD1(bool, isnan);
1363 //__HIP_OVERLOAD1(bool, isnormal)
1364 __HIP_OVERLOAD2(bool, isunordered);
1365 __DEF_FUN1(double, lgamma)
1366 __DEF_FUN1(double, log)
1367 __DEF_FUN1(double, log10)
1368 __DEF_FUN1(double, log1p)
1369 __DEF_FUN1(double, log2)
1370 __DEF_FUN1(double, logb)
1371 __DEF_FUNI(long long, llrint)
1372 __DEF_FUNI(long long, llround)
1373 __DEF_FUNI(long, lrint)
1374 __DEF_FUNI(long, lround)
1375 __DEF_FUN1(double, nearbyint);
1376 __DEF_FUN2(double, nextafter);
1377 __DEF_FUN2(double, pow);
1378 __DEF_FUN2(double, remainder);
1379 __DEF_FUN1(double, rint);
1380 __DEF_FUN1(double, round);
1381 __HIP_OVERLOAD1(bool, signbit)
1382 __DEF_FUN1(double, sin)
1383 __DEF_FUN1(double, sinh)
1384 __DEF_FUN1(double, sqrt)
1385 __DEF_FUN1(double, tan)
1386 __DEF_FUN1(double, tanh)
1387 __DEF_FUN1(double, tgamma)
1388 __DEF_FUN1(double, trunc);
1389 
1390 // define cmath functions with a float and an integer argument.
1391 #define __DEF_FLOAT_FUN2I(func) \
1392 __DEVICE__ \
1393 inline \
1394 float func(float x, int y) \
1395 { \
1396  return func##f(x, y); \
1397 }
1398 __DEF_FLOAT_FUN2I(scalbn)
1399 
1400 #if __HCC__
1401 template<class T>
1402 __DEVICE__ inline static T min(T arg1, T arg2) {
1403  return (arg1 < arg2) ? arg1 : arg2;
1404 }
1405 
1406 __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
1407  return min(arg1, (uint32_t) arg2);
1408 }
1409 /*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
1410  return min((uint32_t) arg1, arg2);
1411 }
1412 
1413 __DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
1414  return min(arg1, (uint64_t) arg2);
1415 }
1416 __DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
1417  return min((uint64_t) arg1, arg2);
1418 }
1419 
1420 __DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
1421  return min(arg1, (unsigned long long) arg2);
1422 }
1423 __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
1424  return min((unsigned long long) arg1, arg2);
1425 }*/
1426 
1427 template<class T>
1428 __DEVICE__ inline static T max(T arg1, T arg2) {
1429  return (arg1 > arg2) ? arg1 : arg2;
1430 }
1431 
1432 __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
1433  return max(arg1, (uint32_t) arg2);
1434 }
1435 __DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
1436  return max((uint32_t) arg1, arg2);
1437 }
1438 
1439 /*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
1440  return max(arg1, (uint64_t) arg2);
1441 }
1442 __DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
1443  return max((uint64_t) arg1, arg2);
1444 }
1445 
1446 __DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
1447  return max(arg1, (unsigned long long) arg2);
1448 }
1449 __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
1450  return max((unsigned long long) arg1, arg2);
1451 }*/
1452 #else
1453 __DEVICE__ inline int min(int arg1, int arg2) {
1454  return (arg1 < arg2) ? arg1 : arg2;
1455 }
1456 __DEVICE__ inline int max(int arg1, int arg2) {
1457  return (arg1 > arg2) ? arg1 : arg2;
1458 }
1459 
1460 __DEVICE__
1461 inline
1462 float max(float x, float y) {
1463  return fmaxf(x, y);
1464 }
1465 
1466 __DEVICE__
1467 inline
1468 double max(double x, double y) {
1469  return fmax(x, y);
1470 }
1471 
1472 __DEVICE__
1473 inline
1474 float min(float x, float y) {
1475  return fminf(x, y);
1476 }
1477 
1478 __DEVICE__
1479 inline
1480 double min(double x, double y) {
1481  return fmin(x, y);
1482 }
1483 
1484 __HIP_OVERLOAD2(double, max)
1485 __HIP_OVERLOAD2(double, min)
1486 
1487 #endif
1488 
1489 __host__ inline static int min(int arg1, int arg2) {
1490  return std::min(arg1, arg2);
1491 }
1492 
1493 __host__ inline static int max(int arg1, int arg2) {
1494  return std::max(arg1, arg2);
1495 }
1496 
1497 
1498 #pragma pop_macro("__DEF_FLOAT_FUN")
1499 #pragma pop_macro("__DEF_FLOAT_FUN2")
1500 #pragma pop_macro("__DEF_FLOAT_FUN2I")
1501 #pragma pop_macro("__HIP_OVERLOAD")
1502 #pragma pop_macro("__HIP_OVERLOAD2")
1503 #pragma pop_macro("__DEVICE__")
1504 #pragma pop_macro("__RETURN_TYPE")
1505 
1506 // For backward compatibility.
1507 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1508 // defined after including math_functions.h.
TODO-doc.
#define __host__
Definition: host_defines.h:41
Contains definitions of APIs for HIP runtime.
Defines the different newt vector types for HIP runtime.
Definition: math_functions.h:1268