28 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
33 #if defined(__HIPCC_RTC__)
34 #define __HOST_DEVICE__ __device__
36 #define __HOST_DEVICE__ __host__ __device__
39 #if defined(__has_attribute)
40 #if __has_attribute(ext_vector_type)
41 #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
43 #define __NATIVE_VECTOR__(n, T) T[n]
46 #if defined(__cplusplus)
47 #if !defined(__HIPCC_RTC__)
50 #include <type_traits>
51 #endif // !defined(__HIPCC_RTC__)
54 template<
typename,
typename,
unsigned int>
struct Scalar_accessor;
58 template<
typename T,
typename U,
unsigned int n>
59 struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
61 template<
typename T,
typename U,
unsigned int n>
62 struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
63 : is_floating_point<T> {};
67 template<
typename T,
typename Vector,
unsigned int idx>
68 struct Scalar_accessor {
70 const Scalar_accessor* p;
73 operator const T*()
const noexcept {
74 return &
reinterpret_cast<const T*
>(p)[idx];
77 operator const T*()
const volatile noexcept {
78 return &
reinterpret_cast<const T*
>(p)[idx];
81 operator T*() noexcept {
82 return &
reinterpret_cast<T*
>(
83 const_cast<Scalar_accessor*
>(p))[idx];
86 operator T*()
volatile noexcept {
87 return &
reinterpret_cast<T*
>(
88 const_cast<Scalar_accessor*
>(p))[idx];
94 std::ostream& operator<<(std::ostream& os,
95 const Scalar_accessor& x) noexcept {
96 return os << x.data[idx];
100 std::istream& operator>>(std::istream& is,
101 Scalar_accessor& x) noexcept {
113 operator T() const noexcept {
return data[idx]; }
115 operator T() const volatile noexcept {
return data[idx]; }
117 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
122 typename std::enable_if<
123 !std::is_same<U, T>{} &&
126 T,
typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* =
nullptr>
128 operator U() const noexcept {
return static_cast<U
>(data[idx]); }
131 typename std::enable_if<
132 !std::is_same<U, T>{} &&
135 T,
typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* =
nullptr>
137 operator U() const volatile noexcept {
return static_cast<U
>(data[idx]); }
141 operator T&() noexcept {
142 return reinterpret_cast<
143 T (&)[sizeof(Vector) / sizeof(T)]
>(data)[idx];
146 operator volatile T&()
volatile noexcept {
147 return reinterpret_cast<
148 volatile T (&)[sizeof(Vector) / sizeof(T)]
>(data)[idx];
152 Address operator&() const noexcept {
return Address{
this}; }
155 Scalar_accessor& operator=(
const Scalar_accessor& x) noexcept {
156 data[idx] = x.data[idx];
161 Scalar_accessor& operator=(T x) noexcept {
167 volatile Scalar_accessor& operator=(T x)
volatile noexcept {
174 Scalar_accessor& operator++() noexcept {
179 T operator++(
int) noexcept {
185 Scalar_accessor& operator--() noexcept {
190 T operator--(
int) noexcept {
200 typename std::enable_if<
201 std::is_convertible<U, T>{}>::type* =
nullptr>
203 Scalar_accessor& operator+=(U x) noexcept {
209 typename std::enable_if<
210 std::is_convertible<U, T>{}>::type* =
nullptr>
212 Scalar_accessor& operator-=(U x) noexcept {
219 typename std::enable_if<
220 std::is_convertible<U, T>{}>::type* =
nullptr>
222 Scalar_accessor& operator*=(U x) noexcept {
228 typename std::enable_if<
229 std::is_convertible<U, T>{}>::type* =
nullptr>
231 Scalar_accessor& operator/=(U x) noexcept {
237 typename std::enable_if<std::is_convertible<U, T>{} &&
238 std::is_integral<U>{}>::type* =
nullptr>
240 Scalar_accessor& operator%=(U x) noexcept {
247 typename std::enable_if<std::is_convertible<U, T>{} &&
248 std::is_integral<U>{}>::type* =
nullptr>
250 Scalar_accessor& operator>>=(U x) noexcept {
256 typename std::enable_if<std::is_convertible<U, T>{} &&
257 std::is_integral<U>{}>::type* =
nullptr>
259 Scalar_accessor& operator<<=(U x) noexcept {
265 typename std::enable_if<std::is_convertible<U, T>{} &&
266 std::is_integral<U>{}>::type* =
nullptr>
268 Scalar_accessor& operator&=(U x) noexcept {
274 typename std::enable_if<std::is_convertible<U, T>{} &&
275 std::is_integral<U>{}>::type* =
nullptr>
277 Scalar_accessor& operator|=(U x) noexcept {
283 typename std::enable_if<std::is_convertible<U, T>{} &&
284 std::is_integral<U>{}>::type* =
nullptr>
286 Scalar_accessor& operator^=(U x) noexcept {
294 unsigned int next_pot(
unsigned int x) {
296 return 1u << (32u - __builtin_clz(x - 1u));
300 template<
typename T,
unsigned int n>
struct HIP_vector_base;
303 struct HIP_vector_base<T, 1> {
304 using Native_vec_ = __NATIVE_VECTOR__(1, T);
308 #if __HIP_CLANG_ONLY__
313 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
317 using value_type = T;
320 HIP_vector_base() =
default;
324 HIP_vector_base(T x_) noexcept : data{x_} {}
327 HIP_vector_base(
const HIP_vector_base&) =
default;
330 HIP_vector_base(HIP_vector_base&&) =
default;
332 ~HIP_vector_base() =
default;
335 HIP_vector_base& operator=(
const HIP_vector_base& x_) noexcept {
336 #if __has_attribute(ext_vector_type)
339 data[0] = x_.data[0];
347 struct HIP_vector_base<T, 2> {
348 using Native_vec_ = __NATIVE_VECTOR__(2, T);
351 #if !__has_attribute(ext_vector_type)
352 alignas(hip_impl::next_pot(2 *
sizeof(T)))
356 #if __HIP_CLANG_ONLY__
362 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
363 hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
367 using value_type = T;
370 HIP_vector_base() =
default;
374 HIP_vector_base(T x_) noexcept : data{x_, x_} {}
377 HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
380 HIP_vector_base(
const HIP_vector_base&) =
default;
383 HIP_vector_base(HIP_vector_base&&) =
default;
385 ~HIP_vector_base() =
default;
388 HIP_vector_base& operator=(
const HIP_vector_base& x_) noexcept {
389 #if __has_attribute(ext_vector_type)
392 data[0] = x_.data[0];
393 data[1] = x_.data[1];
401 struct HIP_vector_base<T, 3> {
406 Native_vec_() =
default;
411 Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
414 Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
417 Native_vec_(
const Native_vec_&) =
default;
420 Native_vec_(Native_vec_&&) =
default;
422 ~Native_vec_() =
default;
425 Native_vec_& operator=(
const Native_vec_&) =
default;
427 Native_vec_& operator=(Native_vec_&&) =
default;
430 T& operator[](
unsigned int idx) noexcept {
return d[idx]; }
432 T operator[](
unsigned int idx)
const noexcept {
return d[idx]; }
435 Native_vec_& operator+=(
const Native_vec_& x_) noexcept
437 for (
auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
441 Native_vec_& operator-=(
const Native_vec_& x_) noexcept
443 for (
auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
448 Native_vec_& operator*=(
const Native_vec_& x_) noexcept
450 for (
auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
454 Native_vec_& operator/=(
const Native_vec_& x_) noexcept
456 for (
auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
462 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
464 Native_vec_ operator-() const noexcept
467 for (
auto&& x : r.d) x = -x;
473 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
475 Native_vec_ operator~() const noexcept
478 for (
auto&& x : r.d) x = ~x;
483 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
485 Native_vec_& operator%=(
const Native_vec_& x_) noexcept
487 for (
auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
492 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
494 Native_vec_& operator^=(
const Native_vec_& x_) noexcept
496 for (
auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
501 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
503 Native_vec_& operator|=(
const Native_vec_& x_) noexcept
505 for (
auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
510 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
512 Native_vec_& operator&=(
const Native_vec_& x_) noexcept
514 for (
auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
519 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
521 Native_vec_& operator>>=(
const Native_vec_& x_) noexcept
523 for (
auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
528 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
530 Native_vec_& operator<<=(
const Native_vec_& x_) noexcept
532 for (
auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
536 using Vec3_cmp =
int __attribute__((vector_size(4 *
sizeof(
int))));
538 Vec3_cmp operator==(
const Native_vec_& x_)
const noexcept
540 return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
553 using value_type = T;
556 HIP_vector_base() =
default;
560 HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
563 HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
566 HIP_vector_base(
const HIP_vector_base&) =
default;
569 HIP_vector_base(HIP_vector_base&&) =
default;
571 ~HIP_vector_base() =
default;
574 HIP_vector_base& operator=(
const HIP_vector_base&) =
default;
576 HIP_vector_base& operator=(HIP_vector_base&&) =
default;
580 struct HIP_vector_base<T, 4> {
581 using Native_vec_ = __NATIVE_VECTOR__(4, T);
584 #if !__has_attribute(ext_vector_type)
585 alignas(hip_impl::next_pot(4 *
sizeof(T)))
589 #if __HIP_CLANG_ONLY__
597 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
598 hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
599 hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
600 hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
604 using value_type = T;
607 HIP_vector_base() =
default;
611 HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
614 HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
617 HIP_vector_base(
const HIP_vector_base&) =
default;
620 HIP_vector_base(HIP_vector_base&&) =
default;
622 ~HIP_vector_base() =
default;
625 HIP_vector_base& operator=(
const HIP_vector_base& x_) noexcept {
626 #if __has_attribute(ext_vector_type)
629 data[0] = x_.data[0];
630 data[1] = x_.data[1];
631 data[2] = x_.data[2];
632 data[3] = x_.data[3];
639 template<
typename T,
unsigned int rank>
640 struct HIP_vector_type :
public HIP_vector_base<T, rank> {
641 using HIP_vector_base<T, rank>::data;
642 using typename HIP_vector_base<T, rank>::Native_vec_;
645 HIP_vector_type() =
default;
648 typename std::enable_if<
649 std::is_convertible<U, T>{}>::type* =
nullptr>
653 HIP_vector_type(U x_) noexcept
654 : HIP_vector_base<T, rank>{
static_cast<T
>(x_)}
658 typename std::enable_if<
659 (rank > 1) &&
sizeof...(Us) == rank>::type* =
nullptr>
662 HIP_vector_type(Us... xs) noexcept
663 : HIP_vector_base<T, rank>{
static_cast<T
>(xs)...}
667 HIP_vector_type(
const HIP_vector_type&) =
default;
670 HIP_vector_type(HIP_vector_type&&) =
default;
672 ~HIP_vector_type() =
default;
675 HIP_vector_type& operator=(
const HIP_vector_type&) =
default;
677 HIP_vector_type& operator=(HIP_vector_type&&) =
default;
681 HIP_vector_type& operator++() noexcept
683 return *
this += HIP_vector_type{1};
686 HIP_vector_type operator++(
int) noexcept
694 HIP_vector_type& operator--() noexcept
696 return *
this -= HIP_vector_type{1};
699 HIP_vector_type operator--(
int) noexcept
707 HIP_vector_type& operator+=(
const HIP_vector_type& x) noexcept
714 typename std::enable_if<
715 std::is_convertible<U, T>{}>::type* =
nullptr>
717 HIP_vector_type& operator+=(U x) noexcept
719 return *
this += HIP_vector_type{x};
723 HIP_vector_type& operator-=(
const HIP_vector_type& x) noexcept
730 typename std::enable_if<
731 std::is_convertible<U, T>{}>::type* =
nullptr>
733 HIP_vector_type& operator-=(U x) noexcept
735 return *
this -= HIP_vector_type{x};
739 HIP_vector_type& operator*=(
const HIP_vector_type& x) noexcept
746 typename std::enable_if<
747 std::is_convertible<U, T>{}>::type* =
nullptr>
749 HIP_vector_type& operator*=(U x) noexcept
751 return *
this *= HIP_vector_type{x};
755 HIP_vector_type& operator/=(
const HIP_vector_type& x) noexcept
762 typename std::enable_if<
763 std::is_convertible<U, T>{}>::type* =
nullptr>
765 HIP_vector_type& operator/=(U x) noexcept
767 return *
this /= HIP_vector_type{x};
772 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
774 HIP_vector_type operator-() const noexcept
777 tmp.data = -tmp.data;
783 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
785 HIP_vector_type operator~() const noexcept
787 HIP_vector_type r{*
this};
794 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
796 HIP_vector_type& operator%=(
const HIP_vector_type& x) noexcept
804 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
806 HIP_vector_type& operator^=(
const HIP_vector_type& x) noexcept
814 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
816 HIP_vector_type& operator|=(
const HIP_vector_type& x) noexcept
824 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
826 HIP_vector_type& operator&=(
const HIP_vector_type& x) noexcept
834 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
836 HIP_vector_type& operator>>=(
const HIP_vector_type& x) noexcept
844 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
846 HIP_vector_type& operator<<=(
const HIP_vector_type& x) noexcept
853 template<
typename T,
unsigned int n>
857 HIP_vector_type<T, n> operator+(
858 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
860 return HIP_vector_type<T, n>{x} += y;
862 template<
typename T,
unsigned int n,
typename U>
866 HIP_vector_type<T, n> operator+(
867 const HIP_vector_type<T, n>& x, U y) noexcept
869 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
871 template<
typename T,
unsigned int n,
typename U>
875 HIP_vector_type<T, n> operator+(
876 U x,
const HIP_vector_type<T, n>& y) noexcept
878 return HIP_vector_type<T, n>{x} += y;
881 template<
typename T,
unsigned int n>
885 HIP_vector_type<T, n> operator-(
886 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
888 return HIP_vector_type<T, n>{x} -= y;
890 template<
typename T,
unsigned int n,
typename U>
894 HIP_vector_type<T, n> operator-(
895 const HIP_vector_type<T, n>& x, U y) noexcept
897 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
899 template<
typename T,
unsigned int n,
typename U>
903 HIP_vector_type<T, n> operator-(
904 U x,
const HIP_vector_type<T, n>& y) noexcept
906 return HIP_vector_type<T, n>{x} -= y;
909 template<
typename T,
unsigned int n>
913 HIP_vector_type<T, n> operator*(
914 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
916 return HIP_vector_type<T, n>{x} *= y;
918 template<
typename T,
unsigned int n,
typename U>
922 HIP_vector_type<T, n> operator*(
923 const HIP_vector_type<T, n>& x, U y) noexcept
925 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
927 template<
typename T,
unsigned int n,
typename U>
931 HIP_vector_type<T, n> operator*(
932 U x,
const HIP_vector_type<T, n>& y) noexcept
934 return HIP_vector_type<T, n>{x} *= y;
937 template<
typename T,
unsigned int n>
941 HIP_vector_type<T, n> operator/(
942 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
944 return HIP_vector_type<T, n>{x} /= y;
946 template<
typename T,
unsigned int n,
typename U>
950 HIP_vector_type<T, n> operator/(
951 const HIP_vector_type<T, n>& x, U y) noexcept
953 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
955 template<
typename T,
unsigned int n,
typename U>
959 HIP_vector_type<T, n> operator/(
960 U x,
const HIP_vector_type<T, n>& y) noexcept
962 return HIP_vector_type<T, n>{x} /= y;
969 bool _hip_any_zero(
const V& x,
int n) noexcept
972 (n == -1) ? true : ((x[n] == 0) ?
false : _hip_any_zero(x, n - 1));
975 template<
typename T,
unsigned int n>
980 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
982 return _hip_any_zero(x.data == y.data, n - 1);
984 template<
typename T,
unsigned int n,
typename U>
988 bool operator==(
const HIP_vector_type<T, n>& x, U y) noexcept
990 return x == HIP_vector_type<T, n>{y};
992 template<
typename T,
unsigned int n,
typename U>
996 bool operator==(U x,
const HIP_vector_type<T, n>& y) noexcept
998 return HIP_vector_type<T, n>{x} == y;
1001 template<
typename T,
unsigned int n>
1006 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1010 template<
typename T,
unsigned int n,
typename U>
1014 bool operator!=(
const HIP_vector_type<T, n>& x, U y) noexcept
1018 template<
typename T,
unsigned int n,
typename U>
1022 bool operator!=(U x,
const HIP_vector_type<T, n>& y) noexcept
1030 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1034 HIP_vector_type<T, n> operator%(
1035 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1037 return HIP_vector_type<T, n>{x} %= y;
1043 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1047 HIP_vector_type<T, n> operator%(
1048 const HIP_vector_type<T, n>& x, U y) noexcept
1050 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
1056 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1060 HIP_vector_type<T, n> operator%(
1061 U x,
const HIP_vector_type<T, n>& y) noexcept
1063 return HIP_vector_type<T, n>{x} %= y;
1069 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1073 HIP_vector_type<T, n> operator^(
1074 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1076 return HIP_vector_type<T, n>{x} ^= y;
1082 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1086 HIP_vector_type<T, n> operator^(
1087 const HIP_vector_type<T, n>& x, U y) noexcept
1089 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
1095 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1099 HIP_vector_type<T, n> operator^(
1100 U x,
const HIP_vector_type<T, n>& y) noexcept
1102 return HIP_vector_type<T, n>{x} ^= y;
1108 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1112 HIP_vector_type<T, n> operator|(
1113 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1115 return HIP_vector_type<T, n>{x} |= y;
1121 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1125 HIP_vector_type<T, n> operator|(
1126 const HIP_vector_type<T, n>& x, U y) noexcept
1128 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
1134 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1138 HIP_vector_type<T, n> operator|(
1139 U x,
const HIP_vector_type<T, n>& y) noexcept
1141 return HIP_vector_type<T, n>{x} |= y;
1147 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1151 HIP_vector_type<T, n> operator&(
1152 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1154 return HIP_vector_type<T, n>{x} &= y;
1160 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1164 HIP_vector_type<T, n> operator&(
1165 const HIP_vector_type<T, n>& x, U y) noexcept
1167 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1173 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1177 HIP_vector_type<T, n> operator&(
1178 U x,
const HIP_vector_type<T, n>& y) noexcept
1180 return HIP_vector_type<T, n>{x} &= y;
1186 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1190 HIP_vector_type<T, n> operator>>(
1191 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1193 return HIP_vector_type<T, n>{x} >>= y;
1199 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1203 HIP_vector_type<T, n> operator>>(
1204 const HIP_vector_type<T, n>& x, U y) noexcept
1206 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1212 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1216 HIP_vector_type<T, n> operator>>(
1217 U x,
const HIP_vector_type<T, n>& y) noexcept
1219 return HIP_vector_type<T, n>{x} >>= y;
1225 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1229 HIP_vector_type<T, n> operator<<(
1230 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1232 return HIP_vector_type<T, n>{x} <<= y;
1238 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1242 HIP_vector_type<T, n> operator<<(
1243 const HIP_vector_type<T, n>& x, U y) noexcept
1245 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1251 typename std::enable_if<std::is_arithmetic<U>::value>::type,
1252 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1256 HIP_vector_type<T, n> operator<<(
1257 U x,
const HIP_vector_type<T, n>& y) noexcept
1259 return HIP_vector_type<T, n>{x} <<= y;
1262 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1263 using CUDA_name##1 = HIP_vector_type<T, 1>;\
1264 using CUDA_name##2 = HIP_vector_type<T, 2>;\
1265 using CUDA_name##3 = HIP_vector_type<T, 3>;\
1266 using CUDA_name##4 = HIP_vector_type<T, 4>;
1268 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1289 __MAKE_VECTOR_TYPE__(uchar,
unsigned char);
1290 __MAKE_VECTOR_TYPE__(
char,
char);
1291 __MAKE_VECTOR_TYPE__(ushort,
unsigned short);
1292 __MAKE_VECTOR_TYPE__(
short,
short);
1293 __MAKE_VECTOR_TYPE__(uint,
unsigned int);
1294 __MAKE_VECTOR_TYPE__(
int,
int);
1295 __MAKE_VECTOR_TYPE__(ulong,
unsigned long);
1296 __MAKE_VECTOR_TYPE__(
long,
long);
1297 __MAKE_VECTOR_TYPE__(ulonglong,
unsigned long long);
1298 __MAKE_VECTOR_TYPE__(longlong,
long long);
1299 __MAKE_VECTOR_TYPE__(
float,
float);
1300 __MAKE_VECTOR_TYPE__(
double,
double);
1303 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1304 static inline __HOST_DEVICE__ \
1305 type make_##type(comp x) { type r{x}; return r; }
1307 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1308 static inline __HOST_DEVICE__ \
1309 type make_##type(comp x, comp y) { type r{x, y}; return r; }
1311 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1312 static inline __HOST_DEVICE__ \
1313 type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1315 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1316 static inline __HOST_DEVICE__ \
1317 type make_##type(comp x, comp y, comp z, comp w) { \
1318 type r{x, y, z, w}; \
1322 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1323 static inline __HOST_DEVICE__ \
1324 type make_##type(comp x) { type r; r.x =x; return r; }
1326 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1327 static inline __HOST_DEVICE__ \
1328 type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1330 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1331 static inline __HOST_DEVICE__ \
1332 type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1334 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1335 static inline __HOST_DEVICE__ \
1336 type make_##type(comp x, comp y, comp z, comp w) { \
1337 type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1342 DECLOP_MAKE_ONE_COMPONENT(
unsigned char,
uchar1);
1343 DECLOP_MAKE_TWO_COMPONENT(
unsigned char,
uchar2);
1344 DECLOP_MAKE_THREE_COMPONENT(
unsigned char,
uchar3);
1345 DECLOP_MAKE_FOUR_COMPONENT(
unsigned char,
uchar4);
1347 DECLOP_MAKE_ONE_COMPONENT(
signed char,
char1);
1348 DECLOP_MAKE_TWO_COMPONENT(
signed char,
char2);
1349 DECLOP_MAKE_THREE_COMPONENT(
signed char,
char3);
1350 DECLOP_MAKE_FOUR_COMPONENT(
signed char,
char4);
1352 DECLOP_MAKE_ONE_COMPONENT(
unsigned short,
ushort1);
1353 DECLOP_MAKE_TWO_COMPONENT(
unsigned short,
ushort2);
1354 DECLOP_MAKE_THREE_COMPONENT(
unsigned short,
ushort3);
1355 DECLOP_MAKE_FOUR_COMPONENT(
unsigned short,
ushort4);
1357 DECLOP_MAKE_ONE_COMPONENT(
signed short,
short1);
1358 DECLOP_MAKE_TWO_COMPONENT(
signed short,
short2);
1359 DECLOP_MAKE_THREE_COMPONENT(
signed short,
short3);
1360 DECLOP_MAKE_FOUR_COMPONENT(
signed short,
short4);
1362 DECLOP_MAKE_ONE_COMPONENT(
unsigned int,
uint1);
1363 DECLOP_MAKE_TWO_COMPONENT(
unsigned int,
uint2);
1364 DECLOP_MAKE_THREE_COMPONENT(
unsigned int,
uint3);
1365 DECLOP_MAKE_FOUR_COMPONENT(
unsigned int,
uint4);
1367 DECLOP_MAKE_ONE_COMPONENT(
signed int,
int1);
1368 DECLOP_MAKE_TWO_COMPONENT(
signed int,
int2);
1369 DECLOP_MAKE_THREE_COMPONENT(
signed int,
int3);
1370 DECLOP_MAKE_FOUR_COMPONENT(
signed int,
int4);
1372 DECLOP_MAKE_ONE_COMPONENT(
float,
float1);
1373 DECLOP_MAKE_TWO_COMPONENT(
float,
float2);
1374 DECLOP_MAKE_THREE_COMPONENT(
float,
float3);
1375 DECLOP_MAKE_FOUR_COMPONENT(
float,
float4);
1377 DECLOP_MAKE_ONE_COMPONENT(
double,
double1);
1378 DECLOP_MAKE_TWO_COMPONENT(
double,
double2);
1379 DECLOP_MAKE_THREE_COMPONENT(
double,
double3);
1380 DECLOP_MAKE_FOUR_COMPONENT(
double,
double4);
1382 DECLOP_MAKE_ONE_COMPONENT(
unsigned long,
ulong1);
1383 DECLOP_MAKE_TWO_COMPONENT(
unsigned long,
ulong2);
1384 DECLOP_MAKE_THREE_COMPONENT(
unsigned long,
ulong3);
1385 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long,
ulong4);
1387 DECLOP_MAKE_ONE_COMPONENT(
signed long,
long1);
1388 DECLOP_MAKE_TWO_COMPONENT(
signed long,
long2);
1389 DECLOP_MAKE_THREE_COMPONENT(
signed long,
long3);
1390 DECLOP_MAKE_FOUR_COMPONENT(
signed long,
long4);
1392 DECLOP_MAKE_ONE_COMPONENT(
unsigned long long,
ulonglong1);
1393 DECLOP_MAKE_TWO_COMPONENT(
unsigned long long,
ulonglong2);
1394 DECLOP_MAKE_THREE_COMPONENT(
unsigned long long,
ulonglong3);
1395 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long long,
ulonglong4);
1397 DECLOP_MAKE_ONE_COMPONENT(
signed long long,
longlong1);
1398 DECLOP_MAKE_TWO_COMPONENT(
signed long long,
longlong2);
1399 DECLOP_MAKE_THREE_COMPONENT(
signed long long,
longlong3);
1400 DECLOP_MAKE_FOUR_COMPONENT(
signed long long,
longlong4);
1401 #else // !defined(__has_attribute)
1403 #if defined(_MSC_VER)
1404 #include <mmintrin.h>
1405 #include <xmmintrin.h>
1406 #include <emmintrin.h>
1407 #include <immintrin.h>
1409 typedef union {
char data; }
char1;
1410 typedef union {
char data[2]; }
char2;
1411 typedef union {
char data[4]; }
char4;
1413 typedef union { __m64 data; }
char8;
1414 typedef union { __m128i data; }
char16;
1416 typedef union {
unsigned char data; }
uchar1;
1417 typedef union {
unsigned char data[2]; }
uchar2;
1418 typedef union {
unsigned char data[4]; }
uchar4;
1420 typedef union { __m64 data; }
uchar8;
1421 typedef union { __m128i data; }
uchar16;
1423 typedef union {
short data; }
short1;
1424 typedef union {
short data[2]; }
short2;
1425 typedef union { __m64 data; }
short4;
1427 typedef union { __m128i data; }
short8;
1428 typedef union { __m128i data[2]; }
short16;
1430 typedef union {
unsigned short data; }
ushort1;
1431 typedef union {
unsigned short data[2]; }
ushort2;
1432 typedef union { __m64 data; }
ushort4;
1434 typedef union { __m128i data; }
ushort8;
1435 typedef union { __m128i data[2]; }
ushort16;
1437 typedef union {
int data; }
int1;
1438 typedef union { __m64 data; }
int2;
1439 typedef union { __m128i data; }
int4;
1441 typedef union { __m128i data[2]; }
int8;
1442 typedef union { __m128i data[4];}
int16;
1444 typedef union {
unsigned int data; }
uint1;
1445 typedef union { __m64 data; }
uint2;
1446 typedef union { __m128i data; }
uint4;
1448 typedef union { __m128i data[2]; }
uint8;
1449 typedef union { __m128i data[4]; }
uint16;
1451 #if !defined(_WIN64)
1452 typedef union {
int data; }
long1;
1453 typedef union { __m64 data; }
long2;
1454 typedef union { __m128i data; }
long4;
1456 typedef union { __m128i data[2]; }
long8;
1457 typedef union { __m128i data[4]; }
long16;
1459 typedef union {
unsigned int data; }
ulong1;
1460 typedef union { __m64 data; }
ulong2;
1461 typedef union { __m128i data; }
ulong4;
1463 typedef union { __m128i data[2]; }
ulong8;
1464 typedef union { __m128i data[4]; }
ulong16;
1465 #else // defined(_WIN64)
1466 typedef union { __m64 data; }
long1;
1467 typedef union { __m128i data; }
long2;
1468 typedef union { __m128i data[2]; }
long4;
1470 typedef union { __m128i data[4]; }
long8;
1471 typedef union { __m128i data[8]; }
long16;
1473 typedef union { __m64 data; }
ulong1;
1474 typedef union { __m128i data; }
ulong2;
1475 typedef union { __m128i data[2]; }
ulong4;
1477 typedef union { __m128i data[4]; }
ulong8;
1478 typedef union { __m128i data[8]; }
ulong16;
1479 #endif // defined(_WIN64)
1481 typedef union { __m64 data; }
longlong1;
1482 typedef union { __m128i data; }
longlong2;
1483 typedef union { __m128i data[2]; }
longlong4;
1485 typedef union { __m128i data[4]; }
longlong8;
1486 typedef union { __m128i data[8]; }
longlong16;
1490 typedef union { __m128i data[2]; }
ulonglong4;
1492 typedef union { __m128i data[4]; }
ulonglong8;
1495 typedef union {
float data; }
float1;
1496 typedef union { __m64 data; }
float2;
1497 typedef union { __m128 data; }
float4;
1499 typedef union { __m256 data; }
float8;
1500 typedef union { __m256 data[2]; }
float16;
1502 typedef union {
double data; }
double1;
1503 typedef union { __m128d data; }
double2;
1504 typedef union { __m256d data; }
double4;
1506 typedef union { __m256d data[2]; }
double8;
1507 typedef union { __m256d data[4]; }
double16;
1509 #else // !defined(_MSC_VER)
1518 typedef union {
unsigned char data; }
uchar1;
1519 typedef union {
unsigned char data[2]; }
uchar2;
1520 typedef union {
unsigned char data[4]; }
uchar4;
1521 typedef union {
unsigned char data[8]; }
uchar8;
1522 typedef union {
unsigned char data[16]; }
uchar16;
1533 typedef union {
unsigned short data[2]; }
ushort2;
1534 typedef union {
unsigned short data[4]; }
ushort4;
1535 typedef union {
unsigned short data[8]; }
ushort8;
1540 typedef union {
int data[2]; }
int2;
1541 typedef union {
int data[4]; }
int4;
1542 typedef union {
int data[8]; }
int8;
1546 typedef union {
unsigned int data; }
uint1;
1547 typedef union {
unsigned int data[2]; }
uint2;
1548 typedef union {
unsigned int data[4]; }
uint4;
1549 typedef union {
unsigned int data[8]; }
uint8;
1550 typedef union {
unsigned int data[16]; }
uint16;
1560 typedef union {
unsigned long data; }
ulong1;
1561 typedef union {
unsigned long data[2]; }
ulong2;
1562 typedef union {
unsigned long data[4]; }
ulong4;
1563 typedef union {
unsigned long data[8]; }
ulong8;
1564 typedef union {
unsigned long data[16]; }
ulong16;
1595 #endif // defined(_MSC_VER)
1596 #endif // defined(__has_attribute)