28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H 29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H 31 #if defined(__HCC__) && (__hcc_workweek__ < 16032) 32 #error("This version of HIP requires a newer version of HCC."); 37 #if !defined(_MSC_VER) || __clang__ 38 #if defined(__clang__) 39 #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) 40 #elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. 41 #define __ROUND_UP_TO_NEXT_POT__(x) \ 42 (1 << (31 - __builtin_clz(x) + (x > (1 << (31 - __builtin_clz(x)))))) 43 #define __NATIVE_VECTOR__(n, T) \ 44 __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) 47 #if defined(__cplusplus) 48 #include <type_traits> 50 template<
typename T,
unsigned int n>
struct HIP_vector_base;
53 struct HIP_vector_base<T, 1> {
54 typedef T Native_vec_ __NATIVE_VECTOR__(1, T);
65 struct HIP_vector_base<T, 2> {
66 typedef T Native_vec_ __NATIVE_VECTOR__(2, T);
78 struct HIP_vector_base<T, 3> {
79 typedef T Native_vec_ __NATIVE_VECTOR__(3, T);
92 struct HIP_vector_base<T, 4> {
93 typedef T Native_vec_ __NATIVE_VECTOR__(4, T);
106 template<
typename T,
unsigned int rank>
107 struct HIP_vector_type :
public HIP_vector_base<T, rank> {
108 using HIP_vector_base<T, rank>::data;
109 using typename HIP_vector_base<T, rank>::Native_vec_;
112 HIP_vector_type() =
default;
115 typename std::enable_if<
116 std::is_convertible<U, T>{}>::type* =
nullptr>
118 HIP_vector_type(U x) noexcept
120 for (
auto i = 0u; i != rank; ++i) data[i] = x;
124 typename std::enable_if<
125 (rank > 1) &&
sizeof...(Us) == rank>::type* =
nullptr>
127 HIP_vector_type(Us... xs) noexcept { data = Native_vec_{
static_cast<T
>(xs)...}; }
129 HIP_vector_type(
const HIP_vector_type&) =
default;
131 HIP_vector_type(HIP_vector_type&&) =
default;
133 ~HIP_vector_type() =
default;
136 HIP_vector_type& operator=(
const HIP_vector_type&) =
default;
138 HIP_vector_type& operator=(HIP_vector_type&&) =
default;
142 HIP_vector_type& operator++() noexcept
144 return *
this += HIP_vector_type{1};
147 HIP_vector_type operator++(
int) noexcept
154 HIP_vector_type& operator--() noexcept
156 return *
this -= HIP_vector_type{1};
159 HIP_vector_type operator--(
int) noexcept
166 HIP_vector_type& operator+=(
const HIP_vector_type& x) noexcept
172 HIP_vector_type& operator-=(
const HIP_vector_type& x) noexcept
179 typename std::enable_if<
180 std::is_convertible<U, T>{}>::type* =
nullptr>
182 HIP_vector_type& operator-=(U x) noexcept
184 return *
this -= HIP_vector_type{x};
187 HIP_vector_type& operator*=(
const HIP_vector_type& x) noexcept
193 HIP_vector_type& operator/=(
const HIP_vector_type& x) noexcept
201 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
203 HIP_vector_type operator-() noexcept
206 tmp.data = -tmp.data;
212 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
214 HIP_vector_type operator~() noexcept
216 HIP_vector_type r{*
this};
222 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
224 HIP_vector_type& operator%=(
const HIP_vector_type& x) noexcept
231 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
233 HIP_vector_type& operator^=(
const HIP_vector_type& x) noexcept
240 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
242 HIP_vector_type& operator|=(
const HIP_vector_type& x) noexcept
249 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
251 HIP_vector_type& operator&=(
const HIP_vector_type& x) noexcept
258 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
260 HIP_vector_type& operator>>=(
const HIP_vector_type& x) noexcept
267 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
269 HIP_vector_type& operator<<=(
const HIP_vector_type& x) noexcept
277 template<
typename T,
unsigned int n>
280 HIP_vector_type<T, n> operator+(
281 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
283 return HIP_vector_type<T, n>{x} += y;
285 template<
typename T,
unsigned int n,
typename U>
288 HIP_vector_type<T, n> operator+(
289 const HIP_vector_type<T, n>& x, U y) noexcept
291 return HIP_vector_type<T, n>{x} += y;
293 template<
typename T,
unsigned int n,
typename U>
296 HIP_vector_type<T, n> operator+(
297 U x,
const HIP_vector_type<T, n>& y) noexcept
302 template<
typename T,
unsigned int n>
305 HIP_vector_type<T, n> operator-(
306 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
308 return HIP_vector_type<T, n>{x} -= y;
310 template<
typename T,
unsigned int n,
typename U>
313 HIP_vector_type<T, n> operator-(
314 const HIP_vector_type<T, n>& x, U y) noexcept
316 return HIP_vector_type<T, n>{x} -= y;
318 template<
typename T,
unsigned int n,
typename U>
321 HIP_vector_type<T, n> operator-(
322 U x,
const HIP_vector_type<T, n>& y) noexcept
324 return HIP_vector_type<T, n>{x} -= y;
327 template<
typename T,
unsigned int n>
330 HIP_vector_type<T, n> operator*(
331 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
333 return HIP_vector_type<T, n>{x} *= y;
335 template<
typename T,
unsigned int n,
typename U>
338 HIP_vector_type<T, n> operator*(
339 const HIP_vector_type<T, n>& x, U y) noexcept
341 return HIP_vector_type<T, n>{x} *= y;
343 template<
typename T,
unsigned int n,
typename U>
346 HIP_vector_type<T, n> operator*(
347 U x,
const HIP_vector_type<T, n>& y) noexcept
352 template<
typename T,
unsigned int n>
355 HIP_vector_type<T, n> operator/(
356 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
358 return HIP_vector_type<T, n>{x} /= y;
360 template<
typename T,
unsigned int n,
typename U>
363 HIP_vector_type<T, n> operator/(
364 const HIP_vector_type<T, n>& x, U y) noexcept
366 return HIP_vector_type<T, n>{x} /= y;
368 template<
typename T,
unsigned int n,
typename U>
371 HIP_vector_type<T, n> operator/(
372 U x,
const HIP_vector_type<T, n>& y) noexcept
374 return HIP_vector_type<T, n>{x} /= y;
377 template<
typename T,
unsigned int n>
381 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
383 auto tmp = x.data == y.data;
384 for (
auto i = 0u; i != n; ++i)
if (tmp[i] == 0)
return false;
387 template<
typename T,
unsigned int n,
typename U>
390 bool operator==(
const HIP_vector_type<T, n>& x, U y) noexcept
392 return x == HIP_vector_type<T, n>{y};
394 template<
typename T,
unsigned int n,
typename U>
397 bool operator==(U x,
const HIP_vector_type<T, n>& y) noexcept
399 return HIP_vector_type<T, n>{x} == y;
402 template<
typename T,
unsigned int n>
406 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
410 template<
typename T,
unsigned int n,
typename U>
413 bool operator!=(
const HIP_vector_type<T, n>& x, U y) noexcept
417 template<
typename T,
unsigned int n,
typename U>
420 bool operator!=(U x,
const HIP_vector_type<T, n>& y) noexcept
428 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
430 HIP_vector_type<T, n> operator%(
431 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
433 return HIP_vector_type<T, n>{x} %= y;
439 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
441 HIP_vector_type<T, n> operator%(
442 const HIP_vector_type<T, n>& x, U y) noexcept
444 return HIP_vector_type<T, n>{x} %= y;
450 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
452 HIP_vector_type<T, n> operator%(
453 U x,
const HIP_vector_type<T, n>& y) noexcept
455 return HIP_vector_type<T, n>{x} %= y;
461 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
463 HIP_vector_type<T, n> operator^(
464 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
466 return HIP_vector_type<T, n>{x} ^= y;
472 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
474 HIP_vector_type<T, n> operator^(
475 const HIP_vector_type<T, n>& x, U y) noexcept
477 return HIP_vector_type<T, n>{x} ^= y;
483 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
485 HIP_vector_type<T, n> operator^(
486 U x,
const HIP_vector_type<T, n>& y) noexcept
488 return HIP_vector_type<T, n>{x} ^= y;
494 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
496 HIP_vector_type<T, n> operator|(
497 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
499 return HIP_vector_type<T, n>{x} |= y;
505 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
507 HIP_vector_type<T, n> operator|(
508 const HIP_vector_type<T, n>& x, U y) noexcept
510 return HIP_vector_type<T, n>{x} |= y;
516 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
518 HIP_vector_type<T, n> operator|(
519 U x,
const HIP_vector_type<T, n>& y) noexcept
521 return HIP_vector_type<T, n>{x} |= y;
527 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
529 HIP_vector_type<T, n> operator&(
530 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
532 return HIP_vector_type<T, n>{x} &= y;
538 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
540 HIP_vector_type<T, n> operator&(
541 const HIP_vector_type<T, n>& x, U y) noexcept
543 return HIP_vector_type<T, n>{x} &= y;
549 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
551 HIP_vector_type<T, n> operator&(
552 U x,
const HIP_vector_type<T, n>& y) noexcept
554 return HIP_vector_type<T, n>{x} &= y;
560 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
562 HIP_vector_type<T, n> operator>>(
563 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
565 return HIP_vector_type<T, n>{x} >>= y;
571 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
573 HIP_vector_type<T, n> operator>>(
574 const HIP_vector_type<T, n>& x, U y) noexcept
576 return HIP_vector_type<T, n>{x} >>= y;
582 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
584 HIP_vector_type<T, n> operator>>(
585 U x,
const HIP_vector_type<T, n>& y) noexcept
587 return HIP_vector_type<T, n>{x} >>= y;
593 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
595 HIP_vector_type<T, n> operator<<(
596 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
598 return HIP_vector_type<T, n>{x} <<= y;
604 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
606 HIP_vector_type<T, n> operator<<(
607 const HIP_vector_type<T, n>& x, U y) noexcept
609 return HIP_vector_type<T, n>{x} <<= y;
615 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
617 HIP_vector_type<T, n> operator<<(
618 U x,
const HIP_vector_type<T, n>& y) noexcept
620 return HIP_vector_type<T, n>{x} <<= y;
623 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ 624 using CUDA_name##1 = HIP_vector_type<T, 1>;\ 625 using CUDA_name##2 = HIP_vector_type<T, 2>;\ 626 using CUDA_name##3 = HIP_vector_type<T, 3>;\ 627 using CUDA_name##4 = HIP_vector_type<T, 4>; 629 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ 630 typedef T CUDA_name##_impl1 __NATIVE_VECTOR__(1, T);\ 631 typedef T CUDA_name##_impl2 __NATIVE_VECTOR__(2, T);\ 632 typedef T CUDA_name##_impl3 __NATIVE_VECTOR__(3, T);\ 633 typedef T CUDA_name##_impl4 __NATIVE_VECTOR__(4, T);\ 636 CUDA_name##_impl1 data;\ 644 CUDA_name##_impl2 data;\ 653 CUDA_name##_impl3 data;\ 663 CUDA_name##_impl4 data;\ 674 __MAKE_VECTOR_TYPE__(uchar,
unsigned char);
675 __MAKE_VECTOR_TYPE__(
char,
char);
676 __MAKE_VECTOR_TYPE__(ushort,
unsigned short);
677 __MAKE_VECTOR_TYPE__(
short,
short);
678 __MAKE_VECTOR_TYPE__(uint,
unsigned int);
679 __MAKE_VECTOR_TYPE__(
int,
int);
680 __MAKE_VECTOR_TYPE__(ulong,
unsigned long);
681 __MAKE_VECTOR_TYPE__(
long,
long);
682 __MAKE_VECTOR_TYPE__(ulonglong,
unsigned long long);
683 __MAKE_VECTOR_TYPE__(longlong,
long long);
684 __MAKE_VECTOR_TYPE__(
float,
float);
685 __MAKE_VECTOR_TYPE__(
double,
double);
687 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ 688 __device__ __host__ \ 691 type make_##type(comp x) { type r = {x}; return r; } 693 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ 694 __device__ __host__ \ 697 type make_##type(comp x, comp y) { type r = {x, y}; return r; } 699 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ 700 __device__ __host__ \ 703 type make_##type(comp x, comp y, comp z) { type r = {x, y, z}; return r; } 705 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ 706 __device__ __host__ \ 709 type make_##type(comp x, comp y, comp z, comp w) { \ 710 type r = {x, y, z, w}; \ 714 DECLOP_MAKE_ONE_COMPONENT(
unsigned char, uchar1);
715 DECLOP_MAKE_TWO_COMPONENT(
unsigned char, uchar2);
716 DECLOP_MAKE_THREE_COMPONENT(
unsigned char, uchar3);
717 DECLOP_MAKE_FOUR_COMPONENT(
unsigned char, uchar4);
719 DECLOP_MAKE_ONE_COMPONENT(
signed char, char1);
720 DECLOP_MAKE_TWO_COMPONENT(
signed char, char2);
721 DECLOP_MAKE_THREE_COMPONENT(
signed char, char3);
722 DECLOP_MAKE_FOUR_COMPONENT(
signed char, char4);
724 DECLOP_MAKE_ONE_COMPONENT(
unsigned short, ushort1);
725 DECLOP_MAKE_TWO_COMPONENT(
unsigned short, ushort2);
726 DECLOP_MAKE_THREE_COMPONENT(
unsigned short, ushort3);
727 DECLOP_MAKE_FOUR_COMPONENT(
unsigned short, ushort4);
729 DECLOP_MAKE_ONE_COMPONENT(
signed short, short1);
730 DECLOP_MAKE_TWO_COMPONENT(
signed short, short2);
731 DECLOP_MAKE_THREE_COMPONENT(
signed short, short3);
732 DECLOP_MAKE_FOUR_COMPONENT(
signed short, short4);
734 DECLOP_MAKE_ONE_COMPONENT(
unsigned int, uint1);
735 DECLOP_MAKE_TWO_COMPONENT(
unsigned int, uint2);
736 DECLOP_MAKE_THREE_COMPONENT(
unsigned int, uint3);
737 DECLOP_MAKE_FOUR_COMPONENT(
unsigned int, uint4);
739 DECLOP_MAKE_ONE_COMPONENT(
signed int, int1);
740 DECLOP_MAKE_TWO_COMPONENT(
signed int, int2);
741 DECLOP_MAKE_THREE_COMPONENT(
signed int, int3);
742 DECLOP_MAKE_FOUR_COMPONENT(
signed int, int4);
744 DECLOP_MAKE_ONE_COMPONENT(
float, float1);
745 DECLOP_MAKE_TWO_COMPONENT(
float, float2);
746 DECLOP_MAKE_THREE_COMPONENT(
float, float3);
747 DECLOP_MAKE_FOUR_COMPONENT(
float, float4);
749 DECLOP_MAKE_ONE_COMPONENT(
double, double1);
750 DECLOP_MAKE_TWO_COMPONENT(
double, double2);
751 DECLOP_MAKE_THREE_COMPONENT(
double, double3);
752 DECLOP_MAKE_FOUR_COMPONENT(
double, double4);
754 DECLOP_MAKE_ONE_COMPONENT(
unsigned long, ulong1);
755 DECLOP_MAKE_TWO_COMPONENT(
unsigned long, ulong2);
756 DECLOP_MAKE_THREE_COMPONENT(
unsigned long, ulong3);
757 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long, ulong4);
759 DECLOP_MAKE_ONE_COMPONENT(
signed long, long1);
760 DECLOP_MAKE_TWO_COMPONENT(
signed long, long2);
761 DECLOP_MAKE_THREE_COMPONENT(
signed long, long3);
762 DECLOP_MAKE_FOUR_COMPONENT(
signed long, long4);
764 DECLOP_MAKE_ONE_COMPONENT(
unsigned long long, ulonglong1);
765 DECLOP_MAKE_TWO_COMPONENT(
unsigned long long, ulonglong2);
766 DECLOP_MAKE_THREE_COMPONENT(
unsigned long long, ulonglong3);
767 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long long, ulonglong4);
769 DECLOP_MAKE_ONE_COMPONENT(
signed long long, longlong1);
770 DECLOP_MAKE_TWO_COMPONENT(
signed long long, longlong2);
771 DECLOP_MAKE_THREE_COMPONENT(
signed long long, longlong3);
772 DECLOP_MAKE_FOUR_COMPONENT(
signed long long, longlong4);
773 #else // defined(_MSC_VER) 774 #include <mmintrin.h> 775 #include <xmmintrin.h> 776 #include <emmintrin.h> 777 #include <immintrin.h> 779 typedef union {
char data; } char1;
780 typedef union {
char data[2]; } char2;
781 typedef union {
char data[4]; } char4;
782 typedef union { char4 data; } char3;
783 typedef union { __m64 data; } char8;
784 typedef union { __m128i data; } char16;
786 typedef union {
unsigned char data; } uchar1;
787 typedef union {
unsigned char data[2]; } uchar2;
788 typedef union {
unsigned char data[4]; } uchar4;
789 typedef union { uchar4 data; } uchar3;
790 typedef union { __m64 data; } uchar8;
791 typedef union { __m128i data; } uchar16;
793 typedef union {
short data; } short1;
794 typedef union {
short data[2]; } short2;
795 typedef union { __m64 data; } short4;
796 typedef union { short4 data; } short3;
797 typedef union { __m128i data; } short8;
798 typedef union { __m128i data[2]; } short16;
800 typedef union {
unsigned short data; } ushort1;
801 typedef union {
unsigned short data[2]; } ushort2;
802 typedef union { __m64 data; } ushort4;
803 typedef union { ushort4 data; } ushort3;
804 typedef union { __m128i data; } ushort8;
805 typedef union { __m128i data[2]; } ushort16;
807 typedef union {
int data; } int1;
808 typedef union { __m64 data; } int2;
809 typedef union { __m128i data; } int4;
810 typedef union { int4 data; } int3;
811 typedef union { __m128i data[2]; } int8;
812 typedef union { __m128i data[4];} int16;
814 typedef union {
unsigned int data; } uint1;
815 typedef union { __m64 data; } uint2;
816 typedef union { __m128i data; } uint4;
817 typedef union { uint4 data; } uint3;
818 typedef union { __m128i data[2]; } uint8;
819 typedef union { __m128i data[4]; } uint16;
822 typedef union {
int data; } long1;
823 typedef union { __m64 data; } long2;
824 typedef union { __m128i data; } long4;
825 typedef union { long4 data; } long3;
826 typedef union { __m128i data[2]; } long8;
827 typedef union { __m128i data[4]; } long16;
829 typedef union {
unsigned int data; } ulong1;
830 typedef union { __m64 data; } ulong2;
831 typedef union { __m128i data; } ulong4;
832 typedef union { ulong4 data; } ulong3;
833 typedef union { __m128i data[2]; } ulong8;
834 typedef union { __m128i data[4]; } ulong16;
835 #else // defined(_WIN64) 836 typedef union { __m64 data; } long1;
837 typedef union { __m128i data; } long2;
838 typedef union { __m128i data[2]; } long4;
839 typedef union { long4 data; } long3;
840 typedef union { __m128i data[4]; } long8;
841 typedef union { __m128i data[8]; } long16;
843 typedef union { __m64 data; } ulong1;
844 typedef union { __m128i data; } ulong2;
845 typedef union { __m128i data[2]; } ulong4;
846 typedef union { ulong4 data; } ulong3;
847 typedef union { __m128i data[4]; } ulong8;
848 typedef union { __m128i data[8]; } ulong16;
849 #endif // defined(_WIN64) 851 typedef union { __m64 data; } longlong1;
852 typedef union { __m128i data; } longlong2;
853 typedef union { __m128i data[2]; } longlong4;
854 typedef union { longlong4 data; } longlong3;
855 typedef union { __m128i data[4]; } longlong8;
856 typedef union { __m128i data[8]; } longlong16;
858 typedef union { __m64 data; } ulonglong1;
859 typedef union { __m128i data; } ulonglong2;
860 typedef union { __m128i data[2]; } ulonglong4;
861 typedef union { ulonglong4 data; } ulonglong3;
862 typedef union { __m128i data[4]; } ulonglong8;
863 typedef union { __m128i data[8]; } ulonglong16;
865 typedef union {
float data; } float1;
866 typedef union { __m64 data; } float2;
867 typedef union { __m128 data; } float4;
868 typedef union { float4 data; } float3;
869 typedef union { __m256 data; } float8;
870 typedef union { __m256 data[2]; } float16;
872 typedef union {
double data; } double1;
873 typedef union { __m128d data; } double2;
874 typedef union { __m256d data; } double4;
875 typedef union { double4 data; } double3;
876 typedef union { __m256d data[2]; } double8;
877 typedef union { __m256d data[4]; } double16;
879 #endif // defined(_MSC_VER)
#define __host__
Definition: host_defines.h:41