28 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
33 #if defined(__has_attribute)
34 #if __has_attribute(ext_vector_type)
35 #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
37 #define __NATIVE_VECTOR__(n, T) T[n]
40 #if defined(__cplusplus)
43 #include <type_traits>
46 template<
typename,
typename,
unsigned int>
struct Scalar_accessor;
50 template<
typename T,
typename U,
unsigned int n>
51 struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
53 template<
typename T,
typename U,
unsigned int n>
54 struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
55 : is_floating_point<T> {};
59 template<
typename T,
typename Vector,
unsigned int idx>
60 struct Scalar_accessor {
62 const Scalar_accessor* p;
65 operator const T*()
const noexcept {
66 return &
reinterpret_cast<const T*
>(p)[idx];
69 operator const T*()
const volatile noexcept {
70 return &
reinterpret_cast<const T*
>(p)[idx];
73 operator T*() noexcept {
74 return &
reinterpret_cast<T*
>(
75 const_cast<Scalar_accessor*
>(p))[idx];
78 operator T*()
volatile noexcept {
79 return &
reinterpret_cast<T*
>(
80 const_cast<Scalar_accessor*
>(p))[idx];
86 std::ostream& operator<<(std::ostream& os,
87 const Scalar_accessor& x) noexcept {
88 return os << x.data[idx];
92 std::istream& operator>>(std::istream& is,
93 Scalar_accessor& x) noexcept {
105 operator T() const noexcept {
return data[idx]; }
107 operator T() const volatile noexcept {
return data[idx]; }
109 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
114 typename std::enable_if<
115 !std::is_same<U, T>{} &&
118 T,
typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* =
nullptr>
120 operator U() const noexcept {
return static_cast<U
>(data[idx]); }
123 typename std::enable_if<
124 !std::is_same<U, T>{} &&
127 T,
typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* =
nullptr>
129 operator U() const volatile noexcept {
return static_cast<U
>(data[idx]); }
133 operator T&() noexcept {
134 return reinterpret_cast<
135 T (&)[sizeof(Vector) / sizeof(T)]
>(data)[idx];
138 operator volatile T&()
volatile noexcept {
139 return reinterpret_cast<
140 volatile T (&)[sizeof(Vector) / sizeof(T)]
>(data)[idx];
144 Address operator&() const noexcept {
return Address{
this}; }
147 Scalar_accessor& operator=(
const Scalar_accessor& x) noexcept {
148 data[idx] = x.data[idx];
153 Scalar_accessor& operator=(T x) noexcept {
159 volatile Scalar_accessor& operator=(T x)
volatile noexcept {
166 Scalar_accessor& operator++() noexcept {
171 T operator++(
int) noexcept {
177 Scalar_accessor& operator--() noexcept {
182 T operator--(
int) noexcept {
192 typename std::enable_if<
193 std::is_convertible<U, T>{}>::type* =
nullptr>
195 Scalar_accessor& operator+=(U x) noexcept {
201 typename std::enable_if<
202 std::is_convertible<U, T>{}>::type* =
nullptr>
204 Scalar_accessor& operator-=(U x) noexcept {
211 typename std::enable_if<
212 std::is_convertible<U, T>{}>::type* =
nullptr>
214 Scalar_accessor& operator*=(U x) noexcept {
220 typename std::enable_if<
221 std::is_convertible<U, T>{}>::type* =
nullptr>
223 Scalar_accessor& operator/=(U x) noexcept {
229 typename std::enable_if<std::is_convertible<U, T>{} &&
230 std::is_integral<U>{}>::type* =
nullptr>
232 Scalar_accessor& operator%=(U x) noexcept {
239 typename std::enable_if<std::is_convertible<U, T>{} &&
240 std::is_integral<U>{}>::type* =
nullptr>
242 Scalar_accessor& operator>>=(U x) noexcept {
248 typename std::enable_if<std::is_convertible<U, T>{} &&
249 std::is_integral<U>{}>::type* =
nullptr>
251 Scalar_accessor& operator<<=(U x) noexcept {
257 typename std::enable_if<std::is_convertible<U, T>{} &&
258 std::is_integral<U>{}>::type* =
nullptr>
260 Scalar_accessor& operator&=(U x) noexcept {
266 typename std::enable_if<std::is_convertible<U, T>{} &&
267 std::is_integral<U>{}>::type* =
nullptr>
269 Scalar_accessor& operator|=(U x) noexcept {
275 typename std::enable_if<std::is_convertible<U, T>{} &&
276 std::is_integral<U>{}>::type* =
nullptr>
278 Scalar_accessor& operator^=(U x) noexcept {
286 unsigned int next_pot(
unsigned int x) {
288 return 1u << (32u - __builtin_clz(x - 1u));
292 template<
typename T,
unsigned int n>
struct HIP_vector_base;
295 struct HIP_vector_base<T, 1> {
296 using Native_vec_ = __NATIVE_VECTOR__(1, T);
300 #if __HIP_CLANG_ONLY__
305 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
309 using value_type = T;
312 HIP_vector_base() =
default;
316 HIP_vector_base(T x_) noexcept : data{x_} {}
319 HIP_vector_base(
const HIP_vector_base&) =
default;
322 HIP_vector_base(HIP_vector_base&&) =
default;
324 ~HIP_vector_base() =
default;
327 HIP_vector_base& operator=(
const HIP_vector_base& x_) noexcept {
328 #if __has_attribute(ext_vector_type)
331 data[0] = x_.data[0];
339 struct HIP_vector_base<T, 2> {
340 using Native_vec_ = __NATIVE_VECTOR__(2, T);
343 #if !__has_attribute(ext_vector_type)
344 alignas(hip_impl::next_pot(2 *
sizeof(T)))
348 #if __HIP_CLANG_ONLY__
354 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
355 hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
359 using value_type = T;
362 HIP_vector_base() =
default;
366 HIP_vector_base(T x_) noexcept : data{x_, x_} {}
369 HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
372 HIP_vector_base(
const HIP_vector_base&) =
default;
375 HIP_vector_base(HIP_vector_base&&) =
default;
377 ~HIP_vector_base() =
default;
380 HIP_vector_base& operator=(
const HIP_vector_base& x_) noexcept {
381 #if __has_attribute(ext_vector_type)
384 data[0] = x_.data[0];
385 data[1] = x_.data[1];
393 struct HIP_vector_base<T, 3> {
398 Native_vec_() =
default;
403 Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
406 Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
409 Native_vec_(
const Native_vec_&) =
default;
412 Native_vec_(Native_vec_&&) =
default;
414 ~Native_vec_() =
default;
417 Native_vec_& operator=(
const Native_vec_&) =
default;
419 Native_vec_& operator=(Native_vec_&&) =
default;
422 T& operator[](
unsigned int idx) noexcept {
return d[idx]; }
424 T operator[](
unsigned int idx)
const noexcept {
return d[idx]; }
427 Native_vec_& operator+=(
const Native_vec_& x_) noexcept
429 for (
auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
433 Native_vec_& operator-=(
const Native_vec_& x_) noexcept
435 for (
auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
440 Native_vec_& operator*=(
const Native_vec_& x_) noexcept
442 for (
auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
446 Native_vec_& operator/=(
const Native_vec_& x_) noexcept
448 for (
auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
454 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
456 Native_vec_ operator-() const noexcept
459 for (
auto&& x : r.d) x = -x;
465 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
467 Native_vec_ operator~() const noexcept
470 for (
auto&& x : r.d) x = ~x;
475 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
477 Native_vec_& operator%=(
const Native_vec_& x_) noexcept
479 for (
auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
484 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
486 Native_vec_& operator^=(
const Native_vec_& x_) noexcept
488 for (
auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
493 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
495 Native_vec_& operator|=(
const Native_vec_& x_) noexcept
497 for (
auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
502 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
504 Native_vec_& operator&=(
const Native_vec_& x_) noexcept
506 for (
auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
511 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
513 Native_vec_& operator>>=(
const Native_vec_& x_) noexcept
515 for (
auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
520 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
522 Native_vec_& operator<<=(
const Native_vec_& x_) noexcept
524 for (
auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
528 using Vec3_cmp =
int __attribute__((vector_size(4 *
sizeof(
int))));
530 Vec3_cmp operator==(
const Native_vec_& x_)
const noexcept
532 return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
545 using value_type = T;
548 HIP_vector_base() =
default;
552 HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
555 HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
558 HIP_vector_base(
const HIP_vector_base&) =
default;
561 HIP_vector_base(HIP_vector_base&&) =
default;
563 ~HIP_vector_base() =
default;
566 HIP_vector_base& operator=(
const HIP_vector_base&) =
default;
568 HIP_vector_base& operator=(HIP_vector_base&&) =
default;
572 struct HIP_vector_base<T, 4> {
573 using Native_vec_ = __NATIVE_VECTOR__(4, T);
576 #if !__has_attribute(ext_vector_type)
577 alignas(hip_impl::next_pot(4 *
sizeof(T)))
581 #if __HIP_CLANG_ONLY__
589 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
590 hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
591 hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
592 hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
596 using value_type = T;
599 HIP_vector_base() =
default;
603 HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
606 HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
609 HIP_vector_base(
const HIP_vector_base&) =
default;
612 HIP_vector_base(HIP_vector_base&&) =
default;
614 ~HIP_vector_base() =
default;
617 HIP_vector_base& operator=(
const HIP_vector_base& x_) noexcept {
618 #if __has_attribute(ext_vector_type)
621 data[0] = x_.data[0];
622 data[1] = x_.data[1];
623 data[2] = x_.data[2];
624 data[3] = x_.data[3];
631 template<
typename T,
unsigned int rank>
632 struct HIP_vector_type :
public HIP_vector_base<T, rank> {
633 using HIP_vector_base<T, rank>::data;
634 using typename HIP_vector_base<T, rank>::Native_vec_;
637 HIP_vector_type() =
default;
640 typename std::enable_if<
641 std::is_convertible<U, T>{}>::type* =
nullptr>
645 HIP_vector_type(U x_) noexcept
646 : HIP_vector_base<T, rank>{
static_cast<T
>(x_)}
650 typename std::enable_if<
651 (rank > 1) &&
sizeof...(Us) == rank>::type* =
nullptr>
654 HIP_vector_type(Us... xs) noexcept
655 : HIP_vector_base<T, rank>{
static_cast<T
>(xs)...}
659 HIP_vector_type(
const HIP_vector_type&) =
default;
662 HIP_vector_type(HIP_vector_type&&) =
default;
664 ~HIP_vector_type() =
default;
667 HIP_vector_type& operator=(
const HIP_vector_type&) =
default;
669 HIP_vector_type& operator=(HIP_vector_type&&) =
default;
673 HIP_vector_type& operator++() noexcept
675 return *
this += HIP_vector_type{1};
678 HIP_vector_type operator++(
int) noexcept
686 HIP_vector_type& operator--() noexcept
688 return *
this -= HIP_vector_type{1};
691 HIP_vector_type operator--(
int) noexcept
699 HIP_vector_type& operator+=(
const HIP_vector_type& x) noexcept
706 typename std::enable_if<
707 std::is_convertible<U, T>{}>::type* =
nullptr>
709 HIP_vector_type& operator+=(U x) noexcept
711 return *
this += HIP_vector_type{x};
715 HIP_vector_type& operator-=(
const HIP_vector_type& x) noexcept
722 typename std::enable_if<
723 std::is_convertible<U, T>{}>::type* =
nullptr>
725 HIP_vector_type& operator-=(U x) noexcept
727 return *
this -= HIP_vector_type{x};
731 HIP_vector_type& operator*=(
const HIP_vector_type& x) noexcept
738 typename std::enable_if<
739 std::is_convertible<U, T>{}>::type* =
nullptr>
741 HIP_vector_type& operator*=(U x) noexcept
743 return *
this *= HIP_vector_type{x};
747 HIP_vector_type& operator/=(
const HIP_vector_type& x) noexcept
754 typename std::enable_if<
755 std::is_convertible<U, T>{}>::type* =
nullptr>
757 HIP_vector_type& operator/=(U x) noexcept
759 return *
this /= HIP_vector_type{x};
764 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
766 HIP_vector_type operator-() const noexcept
769 tmp.data = -tmp.data;
775 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
777 HIP_vector_type operator~() const noexcept
779 HIP_vector_type r{*
this};
786 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
788 HIP_vector_type& operator%=(
const HIP_vector_type& x) noexcept
796 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
798 HIP_vector_type& operator^=(
const HIP_vector_type& x) noexcept
806 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
808 HIP_vector_type& operator|=(
const HIP_vector_type& x) noexcept
816 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
818 HIP_vector_type& operator&=(
const HIP_vector_type& x) noexcept
826 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
828 HIP_vector_type& operator>>=(
const HIP_vector_type& x) noexcept
836 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
838 HIP_vector_type& operator<<=(
const HIP_vector_type& x) noexcept
845 template<
typename T,
unsigned int n>
849 HIP_vector_type<T, n> operator+(
850 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
852 return HIP_vector_type<T, n>{x} += y;
854 template<
typename T,
unsigned int n,
typename U>
858 HIP_vector_type<T, n> operator+(
859 const HIP_vector_type<T, n>& x, U y) noexcept
861 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
863 template<
typename T,
unsigned int n,
typename U>
867 HIP_vector_type<T, n> operator+(
868 U x,
const HIP_vector_type<T, n>& y) noexcept
870 return HIP_vector_type<T, n>{x} += y;
873 template<
typename T,
unsigned int n>
877 HIP_vector_type<T, n> operator-(
878 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
880 return HIP_vector_type<T, n>{x} -= y;
882 template<
typename T,
unsigned int n,
typename U>
886 HIP_vector_type<T, n> operator-(
887 const HIP_vector_type<T, n>& x, U y) noexcept
889 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
891 template<
typename T,
unsigned int n,
typename U>
895 HIP_vector_type<T, n> operator-(
896 U x,
const HIP_vector_type<T, n>& y) noexcept
898 return HIP_vector_type<T, n>{x} -= y;
901 template<
typename T,
unsigned int n>
905 HIP_vector_type<T, n> operator*(
906 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
908 return HIP_vector_type<T, n>{x} *= y;
910 template<
typename T,
unsigned int n,
typename U>
914 HIP_vector_type<T, n> operator*(
915 const HIP_vector_type<T, n>& x, U y) noexcept
917 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
919 template<
typename T,
unsigned int n,
typename U>
923 HIP_vector_type<T, n> operator*(
924 U x,
const HIP_vector_type<T, n>& y) noexcept
926 return HIP_vector_type<T, n>{x} *= y;
929 template<
typename T,
unsigned int n>
933 HIP_vector_type<T, n> operator/(
934 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
936 return HIP_vector_type<T, n>{x} /= y;
938 template<
typename T,
unsigned int n,
typename U>
942 HIP_vector_type<T, n> operator/(
943 const HIP_vector_type<T, n>& x, U y) noexcept
945 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
947 template<
typename T,
unsigned int n,
typename U>
951 HIP_vector_type<T, n> operator/(
952 U x,
const HIP_vector_type<T, n>& y) noexcept
954 return HIP_vector_type<T, n>{x} /= y;
961 bool _hip_any_zero(
const V& x,
int n) noexcept
964 (n == -1) ? true : ((x[n] == 0) ?
false : _hip_any_zero(x, n - 1));
967 template<
typename T,
unsigned int n>
972 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
974 return _hip_any_zero(x.data == y.data, n - 1);
976 template<
typename T,
unsigned int n,
typename U>
980 bool operator==(
const HIP_vector_type<T, n>& x, U y) noexcept
982 return x == HIP_vector_type<T, n>{y};
984 template<
typename T,
unsigned int n,
typename U>
988 bool operator==(U x,
const HIP_vector_type<T, n>& y) noexcept
990 return HIP_vector_type<T, n>{x} == y;
993 template<
typename T,
unsigned int n>
998 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1002 template<
typename T,
unsigned int n,
typename U>
1006 bool operator!=(
const HIP_vector_type<T, n>& x, U y) noexcept
1010 template<
typename T,
unsigned int n,
typename U>
1014 bool operator!=(U x,
const HIP_vector_type<T, n>& y) noexcept
1022 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1026 HIP_vector_type<T, n> operator%(
1027 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1029 return HIP_vector_type<T, n>{x} %= y;
1035 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1039 HIP_vector_type<T, n> operator%(
1040 const HIP_vector_type<T, n>& x, U y) noexcept
1042 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
1048 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1052 HIP_vector_type<T, n> operator%(
1053 U x,
const HIP_vector_type<T, n>& y) noexcept
1055 return HIP_vector_type<T, n>{x} %= y;
1061 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1065 HIP_vector_type<T, n> operator^(
1066 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1068 return HIP_vector_type<T, n>{x} ^= y;
1074 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1078 HIP_vector_type<T, n> operator^(
1079 const HIP_vector_type<T, n>& x, U y) noexcept
1081 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
1087 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1091 HIP_vector_type<T, n> operator^(
1092 U x,
const HIP_vector_type<T, n>& y) noexcept
1094 return HIP_vector_type<T, n>{x} ^= y;
1100 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1104 HIP_vector_type<T, n> operator|(
1105 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1107 return HIP_vector_type<T, n>{x} |= y;
1113 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1117 HIP_vector_type<T, n> operator|(
1118 const HIP_vector_type<T, n>& x, U y) noexcept
1120 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
1126 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1130 HIP_vector_type<T, n> operator|(
1131 U x,
const HIP_vector_type<T, n>& y) noexcept
1133 return HIP_vector_type<T, n>{x} |= y;
1139 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1143 HIP_vector_type<T, n> operator&(
1144 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1146 return HIP_vector_type<T, n>{x} &= y;
1152 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1156 HIP_vector_type<T, n> operator&(
1157 const HIP_vector_type<T, n>& x, U y) noexcept
1159 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1165 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1169 HIP_vector_type<T, n> operator&(
1170 U x,
const HIP_vector_type<T, n>& y) noexcept
1172 return HIP_vector_type<T, n>{x} &= y;
1178 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1182 HIP_vector_type<T, n> operator>>(
1183 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1185 return HIP_vector_type<T, n>{x} >>= y;
1191 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1195 HIP_vector_type<T, n> operator>>(
1196 const HIP_vector_type<T, n>& x, U y) noexcept
1198 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1204 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1208 HIP_vector_type<T, n> operator>>(
1209 U x,
const HIP_vector_type<T, n>& y) noexcept
1211 return HIP_vector_type<T, n>{x} >>= y;
1217 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1221 HIP_vector_type<T, n> operator<<(
1222 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1224 return HIP_vector_type<T, n>{x} <<= y;
1230 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1234 HIP_vector_type<T, n> operator<<(
1235 const HIP_vector_type<T, n>& x, U y) noexcept
1237 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1243 typename std::enable_if<std::is_arithmetic<U>::value>::type,
1244 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1248 HIP_vector_type<T, n> operator<<(
1249 U x,
const HIP_vector_type<T, n>& y) noexcept
1251 return HIP_vector_type<T, n>{x} <<= y;
1254 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1255 using CUDA_name##1 = HIP_vector_type<T, 1>;\
1256 using CUDA_name##2 = HIP_vector_type<T, 2>;\
1257 using CUDA_name##3 = HIP_vector_type<T, 3>;\
1258 using CUDA_name##4 = HIP_vector_type<T, 4>;
1260 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1281 __MAKE_VECTOR_TYPE__(uchar,
unsigned char);
1282 __MAKE_VECTOR_TYPE__(
char,
char);
1283 __MAKE_VECTOR_TYPE__(ushort,
unsigned short);
1284 __MAKE_VECTOR_TYPE__(
short,
short);
1285 __MAKE_VECTOR_TYPE__(uint,
unsigned int);
1286 __MAKE_VECTOR_TYPE__(
int,
int);
1287 __MAKE_VECTOR_TYPE__(ulong,
unsigned long);
1288 __MAKE_VECTOR_TYPE__(
long,
long);
1289 __MAKE_VECTOR_TYPE__(ulonglong,
unsigned long long);
1290 __MAKE_VECTOR_TYPE__(longlong,
long long);
1291 __MAKE_VECTOR_TYPE__(
float,
float);
1292 __MAKE_VECTOR_TYPE__(
double,
double);
1295 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1296 static inline __device__ __host__ \
1297 type make_##type(comp x) { type r{x}; return r; }
1299 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1300 static inline __device__ __host__ \
1301 type make_##type(comp x, comp y) { type r{x, y}; return r; }
1303 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1304 static inline __device__ __host__ \
1305 type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1307 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1308 static inline __device__ __host__ \
1309 type make_##type(comp x, comp y, comp z, comp w) { \
1310 type r{x, y, z, w}; \
1314 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1315 static inline __device__ __host__ \
1316 type make_##type(comp x) { type r; r.x =x; return r; }
1318 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1319 static inline __device__ __host__ \
1320 type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1322 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1323 static inline __device__ __host__ \
1324 type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1326 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1327 static inline __device__ __host__ \
1328 type make_##type(comp x, comp y, comp z, comp w) { \
1329 type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1334 DECLOP_MAKE_ONE_COMPONENT(
unsigned char,
uchar1);
1335 DECLOP_MAKE_TWO_COMPONENT(
unsigned char,
uchar2);
1336 DECLOP_MAKE_THREE_COMPONENT(
unsigned char,
uchar3);
1337 DECLOP_MAKE_FOUR_COMPONENT(
unsigned char,
uchar4);
1339 DECLOP_MAKE_ONE_COMPONENT(
signed char,
char1);
1340 DECLOP_MAKE_TWO_COMPONENT(
signed char,
char2);
1341 DECLOP_MAKE_THREE_COMPONENT(
signed char,
char3);
1342 DECLOP_MAKE_FOUR_COMPONENT(
signed char,
char4);
1344 DECLOP_MAKE_ONE_COMPONENT(
unsigned short,
ushort1);
1345 DECLOP_MAKE_TWO_COMPONENT(
unsigned short,
ushort2);
1346 DECLOP_MAKE_THREE_COMPONENT(
unsigned short,
ushort3);
1347 DECLOP_MAKE_FOUR_COMPONENT(
unsigned short,
ushort4);
1349 DECLOP_MAKE_ONE_COMPONENT(
signed short,
short1);
1350 DECLOP_MAKE_TWO_COMPONENT(
signed short,
short2);
1351 DECLOP_MAKE_THREE_COMPONENT(
signed short,
short3);
1352 DECLOP_MAKE_FOUR_COMPONENT(
signed short,
short4);
1354 DECLOP_MAKE_ONE_COMPONENT(
unsigned int,
uint1);
1355 DECLOP_MAKE_TWO_COMPONENT(
unsigned int,
uint2);
1356 DECLOP_MAKE_THREE_COMPONENT(
unsigned int,
uint3);
1357 DECLOP_MAKE_FOUR_COMPONENT(
unsigned int,
uint4);
1359 DECLOP_MAKE_ONE_COMPONENT(
signed int,
int1);
1360 DECLOP_MAKE_TWO_COMPONENT(
signed int,
int2);
1361 DECLOP_MAKE_THREE_COMPONENT(
signed int,
int3);
1362 DECLOP_MAKE_FOUR_COMPONENT(
signed int,
int4);
1364 DECLOP_MAKE_ONE_COMPONENT(
float,
float1);
1365 DECLOP_MAKE_TWO_COMPONENT(
float,
float2);
1366 DECLOP_MAKE_THREE_COMPONENT(
float,
float3);
1367 DECLOP_MAKE_FOUR_COMPONENT(
float,
float4);
1369 DECLOP_MAKE_ONE_COMPONENT(
double,
double1);
1370 DECLOP_MAKE_TWO_COMPONENT(
double,
double2);
1371 DECLOP_MAKE_THREE_COMPONENT(
double,
double3);
1372 DECLOP_MAKE_FOUR_COMPONENT(
double,
double4);
1374 DECLOP_MAKE_ONE_COMPONENT(
unsigned long,
ulong1);
1375 DECLOP_MAKE_TWO_COMPONENT(
unsigned long,
ulong2);
1376 DECLOP_MAKE_THREE_COMPONENT(
unsigned long,
ulong3);
1377 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long,
ulong4);
1379 DECLOP_MAKE_ONE_COMPONENT(
signed long,
long1);
1380 DECLOP_MAKE_TWO_COMPONENT(
signed long,
long2);
1381 DECLOP_MAKE_THREE_COMPONENT(
signed long,
long3);
1382 DECLOP_MAKE_FOUR_COMPONENT(
signed long,
long4);
1384 DECLOP_MAKE_ONE_COMPONENT(
unsigned long long,
ulonglong1);
1385 DECLOP_MAKE_TWO_COMPONENT(
unsigned long long,
ulonglong2);
1386 DECLOP_MAKE_THREE_COMPONENT(
unsigned long long,
ulonglong3);
1387 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long long,
ulonglong4);
1389 DECLOP_MAKE_ONE_COMPONENT(
signed long long,
longlong1);
1390 DECLOP_MAKE_TWO_COMPONENT(
signed long long,
longlong2);
1391 DECLOP_MAKE_THREE_COMPONENT(
signed long long,
longlong3);
1392 DECLOP_MAKE_FOUR_COMPONENT(
signed long long,
longlong4);
1393 #else // !defined(__has_attribute)
1395 #if defined(_MSC_VER)
1396 #include <mmintrin.h>
1397 #include <xmmintrin.h>
1398 #include <emmintrin.h>
1399 #include <immintrin.h>
1401 typedef union {
char data; }
char1;
1402 typedef union {
char data[2]; }
char2;
1403 typedef union {
char data[4]; }
char4;
1405 typedef union { __m64 data; }
char8;
1406 typedef union { __m128i data; }
char16;
1408 typedef union {
unsigned char data; }
uchar1;
1409 typedef union {
unsigned char data[2]; }
uchar2;
1410 typedef union {
unsigned char data[4]; }
uchar4;
1412 typedef union { __m64 data; }
uchar8;
1413 typedef union { __m128i data; }
uchar16;
1415 typedef union {
short data; }
short1;
1416 typedef union {
short data[2]; }
short2;
1417 typedef union { __m64 data; }
short4;
1419 typedef union { __m128i data; }
short8;
1420 typedef union { __m128i data[2]; }
short16;
1422 typedef union {
unsigned short data; }
ushort1;
1423 typedef union {
unsigned short data[2]; }
ushort2;
1424 typedef union { __m64 data; }
ushort4;
1426 typedef union { __m128i data; }
ushort8;
1427 typedef union { __m128i data[2]; }
ushort16;
1429 typedef union {
int data; }
int1;
1430 typedef union { __m64 data; }
int2;
1431 typedef union { __m128i data; }
int4;
1433 typedef union { __m128i data[2]; }
int8;
1434 typedef union { __m128i data[4];}
int16;
1436 typedef union {
unsigned int data; }
uint1;
1437 typedef union { __m64 data; }
uint2;
1438 typedef union { __m128i data; }
uint4;
1440 typedef union { __m128i data[2]; }
uint8;
1441 typedef union { __m128i data[4]; }
uint16;
1443 #if !defined(_WIN64)
1444 typedef union {
int data; }
long1;
1445 typedef union { __m64 data; }
long2;
1446 typedef union { __m128i data; }
long4;
1448 typedef union { __m128i data[2]; }
long8;
1449 typedef union { __m128i data[4]; }
long16;
1451 typedef union {
unsigned int data; }
ulong1;
1452 typedef union { __m64 data; }
ulong2;
1453 typedef union { __m128i data; }
ulong4;
1455 typedef union { __m128i data[2]; }
ulong8;
1456 typedef union { __m128i data[4]; }
ulong16;
1457 #else // defined(_WIN64)
1458 typedef union { __m64 data; }
long1;
1459 typedef union { __m128i data; }
long2;
1460 typedef union { __m128i data[2]; }
long4;
1462 typedef union { __m128i data[4]; }
long8;
1463 typedef union { __m128i data[8]; }
long16;
1465 typedef union { __m64 data; }
ulong1;
1466 typedef union { __m128i data; }
ulong2;
1467 typedef union { __m128i data[2]; }
ulong4;
1469 typedef union { __m128i data[4]; }
ulong8;
1470 typedef union { __m128i data[8]; }
ulong16;
1471 #endif // defined(_WIN64)
1473 typedef union { __m64 data; }
longlong1;
1474 typedef union { __m128i data; }
longlong2;
1475 typedef union { __m128i data[2]; }
longlong4;
1477 typedef union { __m128i data[4]; }
longlong8;
1478 typedef union { __m128i data[8]; }
longlong16;
1482 typedef union { __m128i data[2]; }
ulonglong4;
1484 typedef union { __m128i data[4]; }
ulonglong8;
1487 typedef union {
float data; }
float1;
1488 typedef union { __m64 data; }
float2;
1489 typedef union { __m128 data; }
float4;
1491 typedef union { __m256 data; }
float8;
1492 typedef union { __m256 data[2]; }
float16;
1494 typedef union {
double data; }
double1;
1495 typedef union { __m128d data; }
double2;
1496 typedef union { __m256d data; }
double4;
1498 typedef union { __m256d data[2]; }
double8;
1499 typedef union { __m256d data[4]; }
double16;
1501 #else // !defined(_MSC_VER)
1510 typedef union {
unsigned char data; }
uchar1;
1511 typedef union {
unsigned char data[2]; }
uchar2;
1512 typedef union {
unsigned char data[4]; }
uchar4;
1513 typedef union {
unsigned char data[8]; }
uchar8;
1514 typedef union {
unsigned char data[16]; }
uchar16;
1525 typedef union {
unsigned short data[2]; }
ushort2;
1526 typedef union {
unsigned short data[4]; }
ushort4;
1527 typedef union {
unsigned short data[8]; }
ushort8;
1532 typedef union {
int data[2]; }
int2;
1533 typedef union {
int data[4]; }
int4;
1534 typedef union {
int data[8]; }
int8;
1538 typedef union {
unsigned int data; }
uint1;
1539 typedef union {
unsigned int data[2]; }
uint2;
1540 typedef union {
unsigned int data[4]; }
uint4;
1541 typedef union {
unsigned int data[8]; }
uint8;
1542 typedef union {
unsigned int data[16]; }
uint16;
1552 typedef union {
unsigned long data; }
ulong1;
1553 typedef union {
unsigned long data[2]; }
ulong2;
1554 typedef union {
unsigned long data[4]; }
ulong4;
1555 typedef union {
unsigned long data[8]; }
ulong8;
1556 typedef union {
unsigned long data[16]; }
ulong16;
1587 #endif // defined(_MSC_VER)
1588 #endif // defined(__has_attribute)