28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H 29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H 31 #if defined(__HCC__) && (__hcc_workweek__ < 16032) 32 #error("This version of HIP requires a newer version of HCC."); 37 #if !defined(_MSC_VER) || __clang__ 38 #if defined(__clang__) 39 #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) 40 #elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. 41 #define __ROUND_UP_TO_NEXT_POT__(x) \ 42 (1 << (31 - __builtin_clz(x) + (x > (1 << (31 - __builtin_clz(x)))))) 43 #define __NATIVE_VECTOR__(n, T) \ 44 __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) 47 #if defined(__cplusplus) 48 #include <type_traits> 50 template<
typename T,
unsigned int n>
struct HIP_vector_base;
53 struct HIP_vector_base<T, 1> {
54 typedef T Native_vec_ __NATIVE_VECTOR__(1, T);
65 struct HIP_vector_base<T, 2> {
66 typedef T Native_vec_ __NATIVE_VECTOR__(2, T);
78 struct HIP_vector_base<T, 3> {
84 Native_vec_() =
default;
88 Native_vec_(T x) noexcept : d{x, x, x} {}
91 Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
94 Native_vec_(
const Native_vec_&) =
default;
97 Native_vec_(Native_vec_&&) =
default;
99 ~Native_vec_() =
default;
102 Native_vec_& operator=(
const Native_vec_&) =
default;
104 Native_vec_& operator=(Native_vec_&&) =
default;
107 T& operator[](
unsigned int idx) noexcept {
return d[idx]; }
109 T operator[](
unsigned int idx)
const noexcept {
return d[idx]; }
112 Native_vec_& operator+=(
const Native_vec_& x) noexcept
114 for (
auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
118 Native_vec_& operator-=(
const Native_vec_& x) noexcept
120 for (
auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
125 Native_vec_& operator*=(
const Native_vec_& x) noexcept
127 for (
auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
131 Native_vec_& operator/=(
const Native_vec_& x) noexcept
133 for (
auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
139 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
141 Native_vec_ operator-() const noexcept
144 for (
auto&& x : r.d) x = -x;
150 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
152 Native_vec_ operator~() const noexcept
155 for (
auto&& x : r.d) x = ~x;
160 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
162 Native_vec_& operator%=(
const Native_vec_& x) noexcept
164 for (
auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
169 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
171 Native_vec_& operator^=(
const Native_vec_& x) noexcept
173 for (
auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
178 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
180 Native_vec_& operator|=(
const Native_vec_& x) noexcept
182 for (
auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
187 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
189 Native_vec_& operator&=(
const Native_vec_& x) noexcept
191 for (
auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
196 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
198 Native_vec_& operator>>=(
const Native_vec_& x) noexcept
200 for (
auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
205 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
207 Native_vec_& operator<<=(
const Native_vec_& x) noexcept
209 for (
auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
213 using Vec3_cmp =
int __NATIVE_VECTOR__(3,
int);
215 Vec3_cmp operator==(
const Native_vec_& x)
const noexcept
218 r[0] = d[0] == x.d[0];
219 r[1] = d[1] == x.d[1];
220 r[2] = d[2] == x.d[2];
236 struct HIP_vector_base<T, 4> {
237 typedef T Native_vec_ __NATIVE_VECTOR__(4, T);
250 template<
typename T,
unsigned int rank>
251 struct HIP_vector_type :
public HIP_vector_base<T, rank> {
252 using HIP_vector_base<T, rank>::data;
253 using typename HIP_vector_base<T, rank>::Native_vec_;
256 HIP_vector_type() =
default;
259 typename std::enable_if<
260 std::is_convertible<U, T>{}>::type* =
nullptr>
262 HIP_vector_type(U x) noexcept
264 for (
auto i = 0u; i != rank; ++i) data[i] = x;
268 typename std::enable_if<
269 (rank > 1) &&
sizeof...(Us) == rank>::type* =
nullptr>
271 HIP_vector_type(Us... xs) noexcept { data = Native_vec_{
static_cast<T
>(xs)...}; }
273 HIP_vector_type(
const HIP_vector_type&) =
default;
275 HIP_vector_type(HIP_vector_type&&) =
default;
277 ~HIP_vector_type() =
default;
280 HIP_vector_type& operator=(
const HIP_vector_type&) =
default;
282 HIP_vector_type& operator=(HIP_vector_type&&) =
default;
286 HIP_vector_type& operator++() noexcept
288 return *
this += HIP_vector_type{1};
291 HIP_vector_type operator++(
int) noexcept
299 HIP_vector_type& operator--() noexcept
301 return *
this -= HIP_vector_type{1};
304 HIP_vector_type operator--(
int) noexcept
312 HIP_vector_type& operator+=(
const HIP_vector_type& x) noexcept
319 typename std::enable_if<
320 std::is_convertible<U, T>{}>::type* =
nullptr>
322 HIP_vector_type& operator+=(U x) noexcept
324 return *
this += HIP_vector_type{x};
328 HIP_vector_type& operator-=(
const HIP_vector_type& x) noexcept
335 typename std::enable_if<
336 std::is_convertible<U, T>{}>::type* =
nullptr>
338 HIP_vector_type& operator-=(U x) noexcept
340 return *
this -= HIP_vector_type{x};
344 HIP_vector_type& operator*=(
const HIP_vector_type& x) noexcept
351 typename std::enable_if<
352 std::is_convertible<U, T>{}>::type* =
nullptr>
354 HIP_vector_type& operator*=(U x) noexcept
356 return *
this *= HIP_vector_type{x};
360 HIP_vector_type& operator/=(
const HIP_vector_type& x) noexcept
367 typename std::enable_if<
368 std::is_convertible<U, T>{}>::type* =
nullptr>
370 HIP_vector_type& operator/=(U x) noexcept
372 return *
this /= HIP_vector_type{x};
377 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
379 HIP_vector_type operator-() noexcept
382 tmp.data = -tmp.data;
388 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
390 HIP_vector_type operator~() noexcept
392 HIP_vector_type r{*
this};
399 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
401 HIP_vector_type& operator%=(
const HIP_vector_type& x) noexcept
409 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
411 HIP_vector_type& operator^=(
const HIP_vector_type& x) noexcept
419 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
421 HIP_vector_type& operator|=(
const HIP_vector_type& x) noexcept
429 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
431 HIP_vector_type& operator&=(
const HIP_vector_type& x) noexcept
439 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
441 HIP_vector_type& operator>>=(
const HIP_vector_type& x) noexcept
449 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
451 HIP_vector_type& operator<<=(
const HIP_vector_type& x) noexcept
459 template<
typename T,
unsigned int n>
461 HIP_vector_type<T, n> operator+(
462 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
464 return HIP_vector_type<T, n>{x} += y;
466 template<
typename T,
unsigned int n,
typename U>
468 HIP_vector_type<T, n> operator+(
469 const HIP_vector_type<T, n>& x, U y) noexcept
471 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
473 template<
typename T,
unsigned int n,
typename U>
475 HIP_vector_type<T, n> operator+(
476 U x,
const HIP_vector_type<T, n>& y) noexcept
478 return HIP_vector_type<T, n>{x} += y;
481 template<
typename T,
unsigned int n>
483 HIP_vector_type<T, n> operator-(
484 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
486 return HIP_vector_type<T, n>{x} -= y;
488 template<
typename T,
unsigned int n,
typename U>
490 HIP_vector_type<T, n> operator-(
491 const HIP_vector_type<T, n>& x, U y) noexcept
493 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
495 template<
typename T,
unsigned int n,
typename U>
497 HIP_vector_type<T, n> operator-(
498 U x,
const HIP_vector_type<T, n>& y) noexcept
500 return HIP_vector_type<T, n>{x} -= y;
503 template<
typename T,
unsigned int n>
505 HIP_vector_type<T, n> operator*(
506 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
508 return HIP_vector_type<T, n>{x} *= y;
510 template<
typename T,
unsigned int n,
typename U>
512 HIP_vector_type<T, n> operator*(
513 const HIP_vector_type<T, n>& x, U y) noexcept
515 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
517 template<
typename T,
unsigned int n,
typename U>
519 HIP_vector_type<T, n> operator*(
520 U x,
const HIP_vector_type<T, n>& y) noexcept
522 return HIP_vector_type<T, n>{x} *= y;
525 template<
typename T,
unsigned int n>
527 HIP_vector_type<T, n> operator/(
528 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
530 return HIP_vector_type<T, n>{x} /= y;
532 template<
typename T,
unsigned int n,
typename U>
534 HIP_vector_type<T, n> operator/(
535 const HIP_vector_type<T, n>& x, U y) noexcept
537 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
539 template<
typename T,
unsigned int n,
typename U>
541 HIP_vector_type<T, n> operator/(
542 U x,
const HIP_vector_type<T, n>& y) noexcept
544 return HIP_vector_type<T, n>{x} /= y;
547 template<
typename T,
unsigned int n>
550 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
552 auto tmp = x.data == y.data;
553 for (
auto i = 0u; i != n; ++i)
if (tmp[i] == 0)
return false;
556 template<
typename T,
unsigned int n,
typename U>
558 bool operator==(
const HIP_vector_type<T, n>& x, U y) noexcept
560 return x == HIP_vector_type<T, n>{y};
562 template<
typename T,
unsigned int n,
typename U>
564 bool operator==(U x,
const HIP_vector_type<T, n>& y) noexcept
566 return HIP_vector_type<T, n>{x} == y;
569 template<
typename T,
unsigned int n>
572 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
576 template<
typename T,
unsigned int n,
typename U>
578 bool operator!=(
const HIP_vector_type<T, n>& x, U y) noexcept
582 template<
typename T,
unsigned int n,
typename U>
584 bool operator!=(U x,
const HIP_vector_type<T, n>& y) noexcept
592 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
594 HIP_vector_type<T, n> operator%(
595 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
597 return HIP_vector_type<T, n>{x} %= y;
603 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
605 HIP_vector_type<T, n> operator%(
606 const HIP_vector_type<T, n>& x, U y) noexcept
608 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
614 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
616 HIP_vector_type<T, n> operator%(
617 U x,
const HIP_vector_type<T, n>& y) noexcept
619 return HIP_vector_type<T, n>{x} %= y;
625 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
627 HIP_vector_type<T, n> operator^(
628 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
630 return HIP_vector_type<T, n>{x} ^= y;
636 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
638 HIP_vector_type<T, n> operator^(
639 const HIP_vector_type<T, n>& x, U y) noexcept
641 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
647 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
649 HIP_vector_type<T, n> operator^(
650 U x,
const HIP_vector_type<T, n>& y) noexcept
652 return HIP_vector_type<T, n>{x} ^= y;
658 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
660 HIP_vector_type<T, n> operator|(
661 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
663 return HIP_vector_type<T, n>{x} |= y;
669 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
671 HIP_vector_type<T, n> operator|(
672 const HIP_vector_type<T, n>& x, U y) noexcept
674 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
680 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
682 HIP_vector_type<T, n> operator|(
683 U x,
const HIP_vector_type<T, n>& y) noexcept
685 return HIP_vector_type<T, n>{x} |= y;
691 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
693 HIP_vector_type<T, n> operator&(
694 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
696 return HIP_vector_type<T, n>{x} &= y;
702 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
704 HIP_vector_type<T, n> operator&(
705 const HIP_vector_type<T, n>& x, U y) noexcept
707 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
713 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
715 HIP_vector_type<T, n> operator&(
716 U x,
const HIP_vector_type<T, n>& y) noexcept
718 return HIP_vector_type<T, n>{x} &= y;
724 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
726 HIP_vector_type<T, n> operator>>(
727 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
729 return HIP_vector_type<T, n>{x} >>= y;
735 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
737 HIP_vector_type<T, n> operator>>(
738 const HIP_vector_type<T, n>& x, U y) noexcept
740 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
746 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
748 HIP_vector_type<T, n> operator>>(
749 U x,
const HIP_vector_type<T, n>& y) noexcept
751 return HIP_vector_type<T, n>{x} >>= y;
757 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
759 HIP_vector_type<T, n> operator<<(
760 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
762 return HIP_vector_type<T, n>{x} <<= y;
768 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
770 HIP_vector_type<T, n> operator<<(
771 const HIP_vector_type<T, n>& x, U y) noexcept
773 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
779 typename std::enable_if<std::is_arithmetic<U>::value>::type,
780 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
782 HIP_vector_type<T, n> operator<<(
783 U x,
const HIP_vector_type<T, n>& y) noexcept
785 return HIP_vector_type<T, n>{x} <<= y;
788 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ 789 using CUDA_name##1 = HIP_vector_type<T, 1>;\ 790 using CUDA_name##2 = HIP_vector_type<T, 2>;\ 791 using CUDA_name##3 = HIP_vector_type<T, 3>;\ 792 using CUDA_name##4 = HIP_vector_type<T, 4>; 794 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ 795 typedef T CUDA_name##_impl1 __NATIVE_VECTOR__(1, T);\ 796 typedef T CUDA_name##_impl2 __NATIVE_VECTOR__(2, T);\ 797 typedef T CUDA_name##_impl3 __NATIVE_VECTOR__(3, T);\ 798 typedef T CUDA_name##_impl4 __NATIVE_VECTOR__(4, T);\ 801 CUDA_name##_impl1 data;\ 809 CUDA_name##_impl2 data;\ 828 CUDA_name##_impl4 data;\ 839 __MAKE_VECTOR_TYPE__(uchar,
unsigned char);
840 __MAKE_VECTOR_TYPE__(
char,
char);
841 __MAKE_VECTOR_TYPE__(ushort,
unsigned short);
842 __MAKE_VECTOR_TYPE__(
short,
short);
843 __MAKE_VECTOR_TYPE__(uint,
unsigned int);
844 __MAKE_VECTOR_TYPE__(
int,
int);
845 __MAKE_VECTOR_TYPE__(ulong,
unsigned long);
846 __MAKE_VECTOR_TYPE__(
long,
long);
847 __MAKE_VECTOR_TYPE__(ulonglong,
unsigned long long);
848 __MAKE_VECTOR_TYPE__(longlong,
long long);
849 __MAKE_VECTOR_TYPE__(
float,
float);
850 __MAKE_VECTOR_TYPE__(
double,
double);
853 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ 854 static inline __device__ __host__ \ 855 type make_##type(comp x) { type r{x}; return r; } 857 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ 858 static inline __device__ __host__ \ 859 type make_##type(comp x, comp y) { type r{x, y}; return r; } 861 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ 862 static inline __device__ __host__ \ 863 type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; } 865 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ 866 static inline __device__ __host__ \ 867 type make_##type(comp x, comp y, comp z, comp w) { \ 868 type r{x, y, z, w}; \ 872 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ 873 static inline __device__ __host__ \ 874 type make_##type(comp x) { type r; r.x =x; return r; } 876 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ 877 static inline __device__ __host__ \ 878 type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; } 880 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ 881 static inline __device__ __host__ \ 882 type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; } 884 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ 885 static inline __device__ __host__ \ 886 type make_##type(comp x, comp y, comp z, comp w) { \ 887 type r; r.x=x; r.y=y; r.z=z; r.w=w; \ 892 DECLOP_MAKE_ONE_COMPONENT(
unsigned char, uchar1);
893 DECLOP_MAKE_TWO_COMPONENT(
unsigned char, uchar2);
894 DECLOP_MAKE_THREE_COMPONENT(
unsigned char, uchar3);
895 DECLOP_MAKE_FOUR_COMPONENT(
unsigned char, uchar4);
897 DECLOP_MAKE_ONE_COMPONENT(
signed char, char1);
898 DECLOP_MAKE_TWO_COMPONENT(
signed char, char2);
899 DECLOP_MAKE_THREE_COMPONENT(
signed char, char3);
900 DECLOP_MAKE_FOUR_COMPONENT(
signed char, char4);
902 DECLOP_MAKE_ONE_COMPONENT(
unsigned short, ushort1);
903 DECLOP_MAKE_TWO_COMPONENT(
unsigned short, ushort2);
904 DECLOP_MAKE_THREE_COMPONENT(
unsigned short, ushort3);
905 DECLOP_MAKE_FOUR_COMPONENT(
unsigned short, ushort4);
907 DECLOP_MAKE_ONE_COMPONENT(
signed short, short1);
908 DECLOP_MAKE_TWO_COMPONENT(
signed short, short2);
909 DECLOP_MAKE_THREE_COMPONENT(
signed short, short3);
910 DECLOP_MAKE_FOUR_COMPONENT(
signed short, short4);
912 DECLOP_MAKE_ONE_COMPONENT(
unsigned int, uint1);
913 DECLOP_MAKE_TWO_COMPONENT(
unsigned int, uint2);
914 DECLOP_MAKE_THREE_COMPONENT(
unsigned int, uint3);
915 DECLOP_MAKE_FOUR_COMPONENT(
unsigned int, uint4);
917 DECLOP_MAKE_ONE_COMPONENT(
signed int, int1);
918 DECLOP_MAKE_TWO_COMPONENT(
signed int, int2);
919 DECLOP_MAKE_THREE_COMPONENT(
signed int, int3);
920 DECLOP_MAKE_FOUR_COMPONENT(
signed int, int4);
922 DECLOP_MAKE_ONE_COMPONENT(
float, float1);
923 DECLOP_MAKE_TWO_COMPONENT(
float, float2);
924 DECLOP_MAKE_THREE_COMPONENT(
float, float3);
925 DECLOP_MAKE_FOUR_COMPONENT(
float, float4);
927 DECLOP_MAKE_ONE_COMPONENT(
double, double1);
928 DECLOP_MAKE_TWO_COMPONENT(
double, double2);
929 DECLOP_MAKE_THREE_COMPONENT(
double, double3);
930 DECLOP_MAKE_FOUR_COMPONENT(
double, double4);
932 DECLOP_MAKE_ONE_COMPONENT(
unsigned long, ulong1);
933 DECLOP_MAKE_TWO_COMPONENT(
unsigned long, ulong2);
934 DECLOP_MAKE_THREE_COMPONENT(
unsigned long, ulong3);
935 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long, ulong4);
937 DECLOP_MAKE_ONE_COMPONENT(
signed long, long1);
938 DECLOP_MAKE_TWO_COMPONENT(
signed long, long2);
939 DECLOP_MAKE_THREE_COMPONENT(
signed long, long3);
940 DECLOP_MAKE_FOUR_COMPONENT(
signed long, long4);
942 DECLOP_MAKE_ONE_COMPONENT(
unsigned long long, ulonglong1);
943 DECLOP_MAKE_TWO_COMPONENT(
unsigned long long, ulonglong2);
944 DECLOP_MAKE_THREE_COMPONENT(
unsigned long long, ulonglong3);
945 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long long, ulonglong4);
947 DECLOP_MAKE_ONE_COMPONENT(
signed long long, longlong1);
948 DECLOP_MAKE_TWO_COMPONENT(
signed long long, longlong2);
949 DECLOP_MAKE_THREE_COMPONENT(
signed long long, longlong3);
950 DECLOP_MAKE_FOUR_COMPONENT(
signed long long, longlong4);
951 #else // defined(_MSC_VER) 952 #include <mmintrin.h> 953 #include <xmmintrin.h> 954 #include <emmintrin.h> 955 #include <immintrin.h> 957 typedef union {
char data; } char1;
958 typedef union {
char data[2]; } char2;
959 typedef union {
char data[4]; } char4;
960 typedef union { char4 data; } char3;
961 typedef union { __m64 data; } char8;
962 typedef union { __m128i data; } char16;
964 typedef union {
unsigned char data; } uchar1;
965 typedef union {
unsigned char data[2]; } uchar2;
966 typedef union {
unsigned char data[4]; } uchar4;
967 typedef union { uchar4 data; } uchar3;
968 typedef union { __m64 data; } uchar8;
969 typedef union { __m128i data; } uchar16;
971 typedef union {
short data; } short1;
972 typedef union {
short data[2]; } short2;
973 typedef union { __m64 data; } short4;
974 typedef union { short4 data; } short3;
975 typedef union { __m128i data; } short8;
976 typedef union { __m128i data[2]; } short16;
978 typedef union {
unsigned short data; } ushort1;
979 typedef union {
unsigned short data[2]; } ushort2;
980 typedef union { __m64 data; } ushort4;
981 typedef union { ushort4 data; } ushort3;
982 typedef union { __m128i data; } ushort8;
983 typedef union { __m128i data[2]; } ushort16;
985 typedef union {
int data; } int1;
986 typedef union { __m64 data; } int2;
987 typedef union { __m128i data; } int4;
988 typedef union { int4 data; } int3;
989 typedef union { __m128i data[2]; } int8;
990 typedef union { __m128i data[4];} int16;
992 typedef union {
unsigned int data; } uint1;
993 typedef union { __m64 data; } uint2;
994 typedef union { __m128i data; } uint4;
995 typedef union { uint4 data; } uint3;
996 typedef union { __m128i data[2]; } uint8;
997 typedef union { __m128i data[4]; } uint16;
1000 typedef union {
int data; } long1;
1001 typedef union { __m64 data; } long2;
1002 typedef union { __m128i data; } long4;
1003 typedef union { long4 data; } long3;
1004 typedef union { __m128i data[2]; } long8;
1005 typedef union { __m128i data[4]; } long16;
1007 typedef union {
unsigned int data; } ulong1;
1008 typedef union { __m64 data; } ulong2;
1009 typedef union { __m128i data; } ulong4;
1010 typedef union { ulong4 data; } ulong3;
1011 typedef union { __m128i data[2]; } ulong8;
1012 typedef union { __m128i data[4]; } ulong16;
1013 #else // defined(_WIN64) 1014 typedef union { __m64 data; } long1;
1015 typedef union { __m128i data; } long2;
1016 typedef union { __m128i data[2]; } long4;
1017 typedef union { long4 data; } long3;
1018 typedef union { __m128i data[4]; } long8;
1019 typedef union { __m128i data[8]; } long16;
1021 typedef union { __m64 data; } ulong1;
1022 typedef union { __m128i data; } ulong2;
1023 typedef union { __m128i data[2]; } ulong4;
1024 typedef union { ulong4 data; } ulong3;
1025 typedef union { __m128i data[4]; } ulong8;
1026 typedef union { __m128i data[8]; } ulong16;
1027 #endif // defined(_WIN64) 1029 typedef union { __m64 data; } longlong1;
1030 typedef union { __m128i data; } longlong2;
1031 typedef union { __m128i data[2]; } longlong4;
1032 typedef union { longlong4 data; } longlong3;
1033 typedef union { __m128i data[4]; } longlong8;
1034 typedef union { __m128i data[8]; } longlong16;
1036 typedef union { __m64 data; } ulonglong1;
1037 typedef union { __m128i data; } ulonglong2;
1038 typedef union { __m128i data[2]; } ulonglong4;
1039 typedef union { ulonglong4 data; } ulonglong3;
1040 typedef union { __m128i data[4]; } ulonglong8;
1041 typedef union { __m128i data[8]; } ulonglong16;
1043 typedef union {
float data; } float1;
1044 typedef union { __m64 data; } float2;
1045 typedef union { __m128 data; } float4;
1046 typedef union { float4 data; } float3;
1047 typedef union { __m256 data; } float8;
1048 typedef union { __m256 data[2]; } float16;
1050 typedef union {
double data; } double1;
1051 typedef union { __m128d data; } double2;
1052 typedef union { __m256d data; } double4;
1053 typedef union { double4 data; } double3;
1054 typedef union { __m256d data[2]; } double8;
1055 typedef union { __m256d data[4]; } double16;
1057 #endif // defined(_MSC_VER)
#define __host__
Definition: host_defines.h:41