28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
31 #if defined(__HCC__) && (__hcc_workweek__ < 16032)
32 #error("This version of HIP requires a newer version of HCC.");
37 #if defined(__has_attribute)
38 #if __has_attribute(ext_vector_type)
39 #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
41 #define __NATIVE_VECTOR__(n, T) T[n]
44 #if defined(__cplusplus)
47 #include <type_traits>
50 template<
typename,
typename,
unsigned int>
struct Scalar_accessor;
54 template<
typename T,
typename U,
unsigned int n>
55 struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
57 template<
typename T,
typename U,
unsigned int n>
58 struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
59 : is_floating_point<T> {};
63 template<
typename T,
typename Vector,
unsigned int idx>
64 struct Scalar_accessor {
66 const Scalar_accessor* p;
69 operator const T*()
const noexcept {
70 return &
reinterpret_cast<const T*
>(p)[idx];
73 operator const T*()
const volatile noexcept {
74 return &
reinterpret_cast<const T*
>(p)[idx];
77 operator T*() noexcept {
78 return &
reinterpret_cast<T*
>(
79 const_cast<Scalar_accessor*
>(p))[idx];
82 operator T*()
volatile noexcept {
83 return &
reinterpret_cast<T*
>(
84 const_cast<Scalar_accessor*
>(p))[idx];
90 std::ostream& operator<<(std::ostream& os,
91 const Scalar_accessor& x) noexcept {
92 return os << x.data[idx];
96 std::istream& operator>>(std::istream& is,
97 Scalar_accessor& x) noexcept {
109 operator T() const noexcept {
return data[idx]; }
111 operator T() const volatile noexcept {
return data[idx]; }
113 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
118 typename std::enable_if<
119 !std::is_same<U, T>{} &&
122 T,
typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* =
nullptr>
124 operator U() const noexcept {
return static_cast<U
>(data[idx]); }
127 typename std::enable_if<
128 !std::is_same<U, T>{} &&
131 T,
typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* =
nullptr>
133 operator U() const volatile noexcept {
return static_cast<U
>(data[idx]); }
137 operator T&() noexcept {
138 return reinterpret_cast<
139 T (&)[sizeof(Vector) / sizeof(T)]
>(data)[idx];
142 operator volatile T&()
volatile noexcept {
143 return reinterpret_cast<
144 volatile T (&)[sizeof(Vector) / sizeof(T)]
>(data)[idx];
148 Address operator&() const noexcept {
return Address{
this}; }
151 Scalar_accessor& operator=(
const Scalar_accessor& x) noexcept {
152 data[idx] = x.data[idx];
157 Scalar_accessor& operator=(T x) noexcept {
163 volatile Scalar_accessor& operator=(T x)
volatile noexcept {
170 Scalar_accessor& operator++() noexcept {
175 T operator++(
int) noexcept {
181 Scalar_accessor& operator--() noexcept {
186 T operator--(
int) noexcept {
196 typename std::enable_if<
197 std::is_convertible<U, T>{}>::type* =
nullptr>
199 Scalar_accessor& operator+=(U x) noexcept {
205 typename std::enable_if<
206 std::is_convertible<U, T>{}>::type* =
nullptr>
208 Scalar_accessor& operator-=(U x) noexcept {
215 typename std::enable_if<
216 std::is_convertible<U, T>{}>::type* =
nullptr>
218 Scalar_accessor& operator*=(U x) noexcept {
224 typename std::enable_if<
225 std::is_convertible<U, T>{}>::type* =
nullptr>
227 Scalar_accessor& operator/=(U x) noexcept {
233 typename std::enable_if<std::is_convertible<U, T>{} &&
234 std::is_integral<U>{}>::type* =
nullptr>
236 Scalar_accessor& operator%=(U x) noexcept {
243 typename std::enable_if<std::is_convertible<U, T>{} &&
244 std::is_integral<U>{}>::type* =
nullptr>
246 Scalar_accessor& operator>>=(U x) noexcept {
252 typename std::enable_if<std::is_convertible<U, T>{} &&
253 std::is_integral<U>{}>::type* =
nullptr>
255 Scalar_accessor& operator<<=(U x) noexcept {
261 typename std::enable_if<std::is_convertible<U, T>{} &&
262 std::is_integral<U>{}>::type* =
nullptr>
264 Scalar_accessor& operator&=(U x) noexcept {
270 typename std::enable_if<std::is_convertible<U, T>{} &&
271 std::is_integral<U>{}>::type* =
nullptr>
273 Scalar_accessor& operator|=(U x) noexcept {
279 typename std::enable_if<std::is_convertible<U, T>{} &&
280 std::is_integral<U>{}>::type* =
nullptr>
282 Scalar_accessor& operator^=(U x) noexcept {
290 unsigned int next_pot(
unsigned int x) {
292 return 1u << (32u - __builtin_clz(x - 1u));
296 template<
typename T,
unsigned int n>
struct HIP_vector_base;
299 struct HIP_vector_base<T, 1> {
300 using Native_vec_ = __NATIVE_VECTOR__(1, T);
304 #if __HIP_CLANG_ONLY__
309 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
313 using value_type = T;
316 HIP_vector_base() =
default;
320 HIP_vector_base(T x) noexcept : data{x} {}
323 HIP_vector_base(
const HIP_vector_base&) =
default;
326 HIP_vector_base(HIP_vector_base&&) =
default;
328 ~HIP_vector_base() =
default;
331 HIP_vector_base& operator=(
const HIP_vector_base& x) noexcept {
332 #if __has_attribute(ext_vector_type)
343 struct HIP_vector_base<T, 2> {
344 using Native_vec_ = __NATIVE_VECTOR__(2, T);
347 #if !__has_attribute(ext_vector_type)
348 alignas(hip_impl::next_pot(2 *
sizeof(T)))
352 #if __HIP_CLANG_ONLY__
358 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
359 hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
363 using value_type = T;
366 HIP_vector_base() =
default;
370 HIP_vector_base(T x) noexcept : data{x, x} {}
373 HIP_vector_base(T x, T y) noexcept : data{x, y} {}
376 HIP_vector_base(
const HIP_vector_base&) =
default;
379 HIP_vector_base(HIP_vector_base&&) =
default;
381 ~HIP_vector_base() =
default;
384 HIP_vector_base& operator=(
const HIP_vector_base& x) noexcept {
385 #if __has_attribute(ext_vector_type)
397 struct HIP_vector_base<T, 3> {
402 Native_vec_() =
default;
407 Native_vec_(T x) noexcept : d{x, x, x} {}
410 Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
413 Native_vec_(
const Native_vec_&) =
default;
416 Native_vec_(Native_vec_&&) =
default;
418 ~Native_vec_() =
default;
421 Native_vec_& operator=(
const Native_vec_&) =
default;
423 Native_vec_& operator=(Native_vec_&&) =
default;
426 T& operator[](
unsigned int idx) noexcept {
return d[idx]; }
428 T operator[](
unsigned int idx)
const noexcept {
return d[idx]; }
431 Native_vec_& operator+=(
const Native_vec_& x) noexcept
433 for (
auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
437 Native_vec_& operator-=(
const Native_vec_& x) noexcept
439 for (
auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
444 Native_vec_& operator*=(
const Native_vec_& x) noexcept
446 for (
auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
450 Native_vec_& operator/=(
const Native_vec_& x) noexcept
452 for (
auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
458 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
460 Native_vec_ operator-() const noexcept
463 for (
auto&& x : r.d) x = -x;
469 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
471 Native_vec_ operator~() const noexcept
474 for (
auto&& x : r.d) x = ~x;
479 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
481 Native_vec_& operator%=(
const Native_vec_& x) noexcept
483 for (
auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
488 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
490 Native_vec_& operator^=(
const Native_vec_& x) noexcept
492 for (
auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
497 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
499 Native_vec_& operator|=(
const Native_vec_& x) noexcept
501 for (
auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
506 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
508 Native_vec_& operator&=(
const Native_vec_& x) noexcept
510 for (
auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
515 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
517 Native_vec_& operator>>=(
const Native_vec_& x) noexcept
519 for (
auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
524 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
526 Native_vec_& operator<<=(
const Native_vec_& x) noexcept
528 for (
auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
532 using Vec3_cmp =
int __attribute__((vector_size(4 *
sizeof(
int))));
534 Vec3_cmp operator==(
const Native_vec_& x)
const noexcept
536 return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
549 using value_type = T;
552 HIP_vector_base() =
default;
556 HIP_vector_base(T x) noexcept : data{x, x, x} {}
559 HIP_vector_base(T x, T y, T z) noexcept : data{x, y, z} {}
562 HIP_vector_base(
const HIP_vector_base&) =
default;
565 HIP_vector_base(HIP_vector_base&&) =
default;
567 ~HIP_vector_base() =
default;
570 HIP_vector_base& operator=(
const HIP_vector_base&) =
default;
572 HIP_vector_base& operator=(HIP_vector_base&&) =
default;
576 struct HIP_vector_base<T, 4> {
577 using Native_vec_ = __NATIVE_VECTOR__(4, T);
580 #if !__has_attribute(ext_vector_type)
581 alignas(hip_impl::next_pot(4 *
sizeof(T)))
585 #if __HIP_CLANG_ONLY__
593 hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
594 hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
595 hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
596 hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
600 using value_type = T;
603 HIP_vector_base() =
default;
607 HIP_vector_base(T x) noexcept : data{x, x, x, x} {}
610 HIP_vector_base(T x, T y, T z, T w) noexcept : data{x, y, z, w} {}
613 HIP_vector_base(
const HIP_vector_base&) =
default;
616 HIP_vector_base(HIP_vector_base&&) =
default;
618 ~HIP_vector_base() =
default;
621 HIP_vector_base& operator=(
const HIP_vector_base& x) noexcept {
622 #if __has_attribute(ext_vector_type)
635 template<
typename T,
unsigned int rank>
636 struct HIP_vector_type :
public HIP_vector_base<T, rank> {
637 using HIP_vector_base<T, rank>::data;
638 using typename HIP_vector_base<T, rank>::Native_vec_;
641 HIP_vector_type() =
default;
644 typename std::enable_if<
645 std::is_convertible<U, T>{}>::type* =
nullptr>
649 HIP_vector_type(U x) noexcept
650 : HIP_vector_base<T, rank>{
static_cast<T
>(x)}
654 typename std::enable_if<
655 (rank > 1) &&
sizeof...(Us) == rank>::type* =
nullptr>
658 HIP_vector_type(Us... xs) noexcept
659 : HIP_vector_base<T, rank>{
static_cast<T
>(xs)...}
663 HIP_vector_type(
const HIP_vector_type&) =
default;
666 HIP_vector_type(HIP_vector_type&&) =
default;
668 ~HIP_vector_type() =
default;
671 HIP_vector_type& operator=(
const HIP_vector_type&) =
default;
673 HIP_vector_type& operator=(HIP_vector_type&&) =
default;
677 HIP_vector_type& operator++() noexcept
679 return *
this += HIP_vector_type{1};
682 HIP_vector_type operator++(
int) noexcept
690 HIP_vector_type& operator--() noexcept
692 return *
this -= HIP_vector_type{1};
695 HIP_vector_type operator--(
int) noexcept
703 HIP_vector_type& operator+=(
const HIP_vector_type& x) noexcept
710 typename std::enable_if<
711 std::is_convertible<U, T>{}>::type* =
nullptr>
713 HIP_vector_type& operator+=(U x) noexcept
715 return *
this += HIP_vector_type{x};
719 HIP_vector_type& operator-=(
const HIP_vector_type& x) noexcept
726 typename std::enable_if<
727 std::is_convertible<U, T>{}>::type* =
nullptr>
729 HIP_vector_type& operator-=(U x) noexcept
731 return *
this -= HIP_vector_type{x};
735 HIP_vector_type& operator*=(
const HIP_vector_type& x) noexcept
742 typename std::enable_if<
743 std::is_convertible<U, T>{}>::type* =
nullptr>
745 HIP_vector_type& operator*=(U x) noexcept
747 return *
this *= HIP_vector_type{x};
751 HIP_vector_type& operator/=(
const HIP_vector_type& x) noexcept
758 typename std::enable_if<
759 std::is_convertible<U, T>{}>::type* =
nullptr>
761 HIP_vector_type& operator/=(U x) noexcept
763 return *
this /= HIP_vector_type{x};
768 typename std::enable_if<std::is_signed<U>{}>::type* =
nullptr>
770 HIP_vector_type operator-() const noexcept
773 tmp.data = -tmp.data;
779 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
781 HIP_vector_type operator~() const noexcept
783 HIP_vector_type r{*
this};
790 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
792 HIP_vector_type& operator%=(
const HIP_vector_type& x) noexcept
800 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
802 HIP_vector_type& operator^=(
const HIP_vector_type& x) noexcept
810 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
812 HIP_vector_type& operator|=(
const HIP_vector_type& x) noexcept
820 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
822 HIP_vector_type& operator&=(
const HIP_vector_type& x) noexcept
830 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
832 HIP_vector_type& operator>>=(
const HIP_vector_type& x) noexcept
840 typename std::enable_if<std::is_integral<U>{}>::type* =
nullptr>
842 HIP_vector_type& operator<<=(
const HIP_vector_type& x) noexcept
849 template<
typename T,
unsigned int n>
853 HIP_vector_type<T, n> operator+(
854 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
856 return HIP_vector_type<T, n>{x} += y;
858 template<
typename T,
unsigned int n,
typename U>
862 HIP_vector_type<T, n> operator+(
863 const HIP_vector_type<T, n>& x, U y) noexcept
865 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
867 template<
typename T,
unsigned int n,
typename U>
871 HIP_vector_type<T, n> operator+(
872 U x,
const HIP_vector_type<T, n>& y) noexcept
874 return HIP_vector_type<T, n>{x} += y;
877 template<
typename T,
unsigned int n>
881 HIP_vector_type<T, n> operator-(
882 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
884 return HIP_vector_type<T, n>{x} -= y;
886 template<
typename T,
unsigned int n,
typename U>
890 HIP_vector_type<T, n> operator-(
891 const HIP_vector_type<T, n>& x, U y) noexcept
893 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
895 template<
typename T,
unsigned int n,
typename U>
899 HIP_vector_type<T, n> operator-(
900 U x,
const HIP_vector_type<T, n>& y) noexcept
902 return HIP_vector_type<T, n>{x} -= y;
905 template<
typename T,
unsigned int n>
909 HIP_vector_type<T, n> operator*(
910 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
912 return HIP_vector_type<T, n>{x} *= y;
914 template<
typename T,
unsigned int n,
typename U>
918 HIP_vector_type<T, n> operator*(
919 const HIP_vector_type<T, n>& x, U y) noexcept
921 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
923 template<
typename T,
unsigned int n,
typename U>
927 HIP_vector_type<T, n> operator*(
928 U x,
const HIP_vector_type<T, n>& y) noexcept
930 return HIP_vector_type<T, n>{x} *= y;
933 template<
typename T,
unsigned int n>
937 HIP_vector_type<T, n> operator/(
938 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
940 return HIP_vector_type<T, n>{x} /= y;
942 template<
typename T,
unsigned int n,
typename U>
946 HIP_vector_type<T, n> operator/(
947 const HIP_vector_type<T, n>& x, U y) noexcept
949 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
951 template<
typename T,
unsigned int n,
typename U>
955 HIP_vector_type<T, n> operator/(
956 U x,
const HIP_vector_type<T, n>& y) noexcept
958 return HIP_vector_type<T, n>{x} /= y;
965 bool _hip_any_zero(
const V& x,
int n) noexcept
968 (n == -1) ? true : ((x[n] == 0) ?
false : _hip_any_zero(x, n - 1));
971 template<
typename T,
unsigned int n>
976 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
978 return _hip_any_zero(x.data == y.data, n - 1);
980 template<
typename T,
unsigned int n,
typename U>
984 bool operator==(
const HIP_vector_type<T, n>& x, U y) noexcept
986 return x == HIP_vector_type<T, n>{y};
988 template<
typename T,
unsigned int n,
typename U>
992 bool operator==(U x,
const HIP_vector_type<T, n>& y) noexcept
994 return HIP_vector_type<T, n>{x} == y;
997 template<
typename T,
unsigned int n>
1002 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1006 template<
typename T,
unsigned int n,
typename U>
1010 bool operator!=(
const HIP_vector_type<T, n>& x, U y) noexcept
1014 template<
typename T,
unsigned int n,
typename U>
1018 bool operator!=(U x,
const HIP_vector_type<T, n>& y) noexcept
1026 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1030 HIP_vector_type<T, n> operator%(
1031 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1033 return HIP_vector_type<T, n>{x} %= y;
1039 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1043 HIP_vector_type<T, n> operator%(
1044 const HIP_vector_type<T, n>& x, U y) noexcept
1046 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
1052 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1056 HIP_vector_type<T, n> operator%(
1057 U x,
const HIP_vector_type<T, n>& y) noexcept
1059 return HIP_vector_type<T, n>{x} %= y;
1065 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1069 HIP_vector_type<T, n> operator^(
1070 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1072 return HIP_vector_type<T, n>{x} ^= y;
1078 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1082 HIP_vector_type<T, n> operator^(
1083 const HIP_vector_type<T, n>& x, U y) noexcept
1085 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
1091 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1095 HIP_vector_type<T, n> operator^(
1096 U x,
const HIP_vector_type<T, n>& y) noexcept
1098 return HIP_vector_type<T, n>{x} ^= y;
1104 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1108 HIP_vector_type<T, n> operator|(
1109 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1111 return HIP_vector_type<T, n>{x} |= y;
1117 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1121 HIP_vector_type<T, n> operator|(
1122 const HIP_vector_type<T, n>& x, U y) noexcept
1124 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
1130 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1134 HIP_vector_type<T, n> operator|(
1135 U x,
const HIP_vector_type<T, n>& y) noexcept
1137 return HIP_vector_type<T, n>{x} |= y;
1143 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1147 HIP_vector_type<T, n> operator&(
1148 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1150 return HIP_vector_type<T, n>{x} &= y;
1156 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1160 HIP_vector_type<T, n> operator&(
1161 const HIP_vector_type<T, n>& x, U y) noexcept
1163 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1169 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1173 HIP_vector_type<T, n> operator&(
1174 U x,
const HIP_vector_type<T, n>& y) noexcept
1176 return HIP_vector_type<T, n>{x} &= y;
1182 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1186 HIP_vector_type<T, n> operator>>(
1187 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1189 return HIP_vector_type<T, n>{x} >>= y;
1195 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1199 HIP_vector_type<T, n> operator>>(
1200 const HIP_vector_type<T, n>& x, U y) noexcept
1202 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1208 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1212 HIP_vector_type<T, n> operator>>(
1213 U x,
const HIP_vector_type<T, n>& y) noexcept
1215 return HIP_vector_type<T, n>{x} >>= y;
1221 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1225 HIP_vector_type<T, n> operator<<(
1226 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y) noexcept
1228 return HIP_vector_type<T, n>{x} <<= y;
1234 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1238 HIP_vector_type<T, n> operator<<(
1239 const HIP_vector_type<T, n>& x, U y) noexcept
1241 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1247 typename std::enable_if<std::is_arithmetic<U>::value>::type,
1248 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1252 HIP_vector_type<T, n> operator<<(
1253 U x,
const HIP_vector_type<T, n>& y) noexcept
1255 return HIP_vector_type<T, n>{x} <<= y;
1258 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1259 using CUDA_name##1 = HIP_vector_type<T, 1>;\
1260 using CUDA_name##2 = HIP_vector_type<T, 2>;\
1261 using CUDA_name##3 = HIP_vector_type<T, 3>;\
1262 using CUDA_name##4 = HIP_vector_type<T, 4>;
1264 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1285 __MAKE_VECTOR_TYPE__(uchar,
unsigned char);
1286 __MAKE_VECTOR_TYPE__(
char,
char);
1287 __MAKE_VECTOR_TYPE__(ushort,
unsigned short);
1288 __MAKE_VECTOR_TYPE__(
short,
short);
1289 __MAKE_VECTOR_TYPE__(uint,
unsigned int);
1290 __MAKE_VECTOR_TYPE__(
int,
int);
1291 __MAKE_VECTOR_TYPE__(ulong,
unsigned long);
1292 __MAKE_VECTOR_TYPE__(
long,
long);
1293 __MAKE_VECTOR_TYPE__(ulonglong,
unsigned long long);
1294 __MAKE_VECTOR_TYPE__(longlong,
long long);
1295 __MAKE_VECTOR_TYPE__(
float,
float);
1296 __MAKE_VECTOR_TYPE__(
double,
double);
1299 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1300 static inline __device__ __host__ \
1301 type make_##type(comp x) { type r{x}; return r; }
1303 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1304 static inline __device__ __host__ \
1305 type make_##type(comp x, comp y) { type r{x, y}; return r; }
1307 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1308 static inline __device__ __host__ \
1309 type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1311 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1312 static inline __device__ __host__ \
1313 type make_##type(comp x, comp y, comp z, comp w) { \
1314 type r{x, y, z, w}; \
1318 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1319 static inline __device__ __host__ \
1320 type make_##type(comp x) { type r; r.x =x; return r; }
1322 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1323 static inline __device__ __host__ \
1324 type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1326 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1327 static inline __device__ __host__ \
1328 type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1330 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1331 static inline __device__ __host__ \
1332 type make_##type(comp x, comp y, comp z, comp w) { \
1333 type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1338 DECLOP_MAKE_ONE_COMPONENT(
unsigned char,
uchar1);
1339 DECLOP_MAKE_TWO_COMPONENT(
unsigned char,
uchar2);
1340 DECLOP_MAKE_THREE_COMPONENT(
unsigned char,
uchar3);
1341 DECLOP_MAKE_FOUR_COMPONENT(
unsigned char,
uchar4);
1343 DECLOP_MAKE_ONE_COMPONENT(
signed char,
char1);
1344 DECLOP_MAKE_TWO_COMPONENT(
signed char,
char2);
1345 DECLOP_MAKE_THREE_COMPONENT(
signed char,
char3);
1346 DECLOP_MAKE_FOUR_COMPONENT(
signed char,
char4);
1348 DECLOP_MAKE_ONE_COMPONENT(
unsigned short,
ushort1);
1349 DECLOP_MAKE_TWO_COMPONENT(
unsigned short,
ushort2);
1350 DECLOP_MAKE_THREE_COMPONENT(
unsigned short,
ushort3);
1351 DECLOP_MAKE_FOUR_COMPONENT(
unsigned short,
ushort4);
1353 DECLOP_MAKE_ONE_COMPONENT(
signed short,
short1);
1354 DECLOP_MAKE_TWO_COMPONENT(
signed short,
short2);
1355 DECLOP_MAKE_THREE_COMPONENT(
signed short,
short3);
1356 DECLOP_MAKE_FOUR_COMPONENT(
signed short,
short4);
1358 DECLOP_MAKE_ONE_COMPONENT(
unsigned int,
uint1);
1359 DECLOP_MAKE_TWO_COMPONENT(
unsigned int,
uint2);
1360 DECLOP_MAKE_THREE_COMPONENT(
unsigned int,
uint3);
1361 DECLOP_MAKE_FOUR_COMPONENT(
unsigned int,
uint4);
1363 DECLOP_MAKE_ONE_COMPONENT(
signed int,
int1);
1364 DECLOP_MAKE_TWO_COMPONENT(
signed int,
int2);
1365 DECLOP_MAKE_THREE_COMPONENT(
signed int,
int3);
1366 DECLOP_MAKE_FOUR_COMPONENT(
signed int,
int4);
1368 DECLOP_MAKE_ONE_COMPONENT(
float,
float1);
1369 DECLOP_MAKE_TWO_COMPONENT(
float,
float2);
1370 DECLOP_MAKE_THREE_COMPONENT(
float,
float3);
1371 DECLOP_MAKE_FOUR_COMPONENT(
float,
float4);
1373 DECLOP_MAKE_ONE_COMPONENT(
double,
double1);
1374 DECLOP_MAKE_TWO_COMPONENT(
double,
double2);
1375 DECLOP_MAKE_THREE_COMPONENT(
double,
double3);
1376 DECLOP_MAKE_FOUR_COMPONENT(
double,
double4);
1378 DECLOP_MAKE_ONE_COMPONENT(
unsigned long,
ulong1);
1379 DECLOP_MAKE_TWO_COMPONENT(
unsigned long,
ulong2);
1380 DECLOP_MAKE_THREE_COMPONENT(
unsigned long,
ulong3);
1381 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long,
ulong4);
1383 DECLOP_MAKE_ONE_COMPONENT(
signed long,
long1);
1384 DECLOP_MAKE_TWO_COMPONENT(
signed long,
long2);
1385 DECLOP_MAKE_THREE_COMPONENT(
signed long,
long3);
1386 DECLOP_MAKE_FOUR_COMPONENT(
signed long,
long4);
1388 DECLOP_MAKE_ONE_COMPONENT(
unsigned long long,
ulonglong1);
1389 DECLOP_MAKE_TWO_COMPONENT(
unsigned long long,
ulonglong2);
1390 DECLOP_MAKE_THREE_COMPONENT(
unsigned long long,
ulonglong3);
1391 DECLOP_MAKE_FOUR_COMPONENT(
unsigned long long,
ulonglong4);
1393 DECLOP_MAKE_ONE_COMPONENT(
signed long long,
longlong1);
1394 DECLOP_MAKE_TWO_COMPONENT(
signed long long,
longlong2);
1395 DECLOP_MAKE_THREE_COMPONENT(
signed long long,
longlong3);
1396 DECLOP_MAKE_FOUR_COMPONENT(
signed long long,
longlong4);
1397 #else // !defined(__has_attribute)
1399 #if defined(_MSC_VER)
1400 #include <mmintrin.h>
1401 #include <xmmintrin.h>
1402 #include <emmintrin.h>
1403 #include <immintrin.h>
1405 typedef union {
char data; }
char1;
1406 typedef union {
char data[2]; }
char2;
1407 typedef union {
char data[4]; }
char4;
1409 typedef union { __m64 data; }
char8;
1410 typedef union { __m128i data; }
char16;
1412 typedef union {
unsigned char data; }
uchar1;
1413 typedef union {
unsigned char data[2]; }
uchar2;
1414 typedef union {
unsigned char data[4]; }
uchar4;
1416 typedef union { __m64 data; }
uchar8;
1417 typedef union { __m128i data; }
uchar16;
1419 typedef union {
short data; }
short1;
1420 typedef union {
short data[2]; }
short2;
1421 typedef union { __m64 data; }
short4;
1423 typedef union { __m128i data; }
short8;
1424 typedef union { __m128i data[2]; }
short16;
1426 typedef union {
unsigned short data; }
ushort1;
1427 typedef union {
unsigned short data[2]; }
ushort2;
1428 typedef union { __m64 data; }
ushort4;
1430 typedef union { __m128i data; }
ushort8;
1431 typedef union { __m128i data[2]; }
ushort16;
1433 typedef union {
int data; }
int1;
1434 typedef union { __m64 data; }
int2;
1435 typedef union { __m128i data; }
int4;
1437 typedef union { __m128i data[2]; }
int8;
1438 typedef union { __m128i data[4];}
int16;
1440 typedef union {
unsigned int data; }
uint1;
1441 typedef union { __m64 data; }
uint2;
1442 typedef union { __m128i data; }
uint4;
1444 typedef union { __m128i data[2]; }
uint8;
1445 typedef union { __m128i data[4]; }
uint16;
1447 #if !defined(_WIN64)
1448 typedef union {
int data; }
long1;
1449 typedef union { __m64 data; }
long2;
1450 typedef union { __m128i data; }
long4;
1452 typedef union { __m128i data[2]; }
long8;
1453 typedef union { __m128i data[4]; }
long16;
1455 typedef union {
unsigned int data; }
ulong1;
1456 typedef union { __m64 data; }
ulong2;
1457 typedef union { __m128i data; }
ulong4;
1459 typedef union { __m128i data[2]; }
ulong8;
1460 typedef union { __m128i data[4]; }
ulong16;
1461 #else // defined(_WIN64)
1462 typedef union { __m64 data; }
long1;
1463 typedef union { __m128i data; }
long2;
1464 typedef union { __m128i data[2]; }
long4;
1466 typedef union { __m128i data[4]; }
long8;
1467 typedef union { __m128i data[8]; }
long16;
1469 typedef union { __m64 data; }
ulong1;
1470 typedef union { __m128i data; }
ulong2;
1471 typedef union { __m128i data[2]; }
ulong4;
1473 typedef union { __m128i data[4]; }
ulong8;
1474 typedef union { __m128i data[8]; }
ulong16;
1475 #endif // defined(_WIN64)
1477 typedef union { __m64 data; }
longlong1;
1478 typedef union { __m128i data; }
longlong2;
1479 typedef union { __m128i data[2]; }
longlong4;
1481 typedef union { __m128i data[4]; }
longlong8;
1482 typedef union { __m128i data[8]; }
longlong16;
1486 typedef union { __m128i data[2]; }
ulonglong4;
1488 typedef union { __m128i data[4]; }
ulonglong8;
1491 typedef union {
float data; }
float1;
1492 typedef union { __m64 data; }
float2;
1493 typedef union { __m128 data; }
float4;
1495 typedef union { __m256 data; }
float8;
1496 typedef union { __m256 data[2]; }
float16;
1498 typedef union {
double data; }
double1;
1499 typedef union { __m128d data; }
double2;
1500 typedef union { __m256d data; }
double4;
1502 typedef union { __m256d data[2]; }
double8;
1503 typedef union { __m256d data[4]; }
double16;
1505 #else // !defined(_MSC_VER)
1514 typedef union {
unsigned char data; }
uchar1;
1515 typedef union {
unsigned char data[2]; }
uchar2;
1516 typedef union {
unsigned char data[4]; }
uchar4;
1517 typedef union {
unsigned char data[8]; }
uchar8;
1518 typedef union {
unsigned char data[16]; }
uchar16;
1529 typedef union {
unsigned short data[2]; }
ushort2;
1530 typedef union {
unsigned short data[4]; }
ushort4;
1531 typedef union {
unsigned short data[8]; }
ushort8;
1536 typedef union {
int data[2]; }
int2;
1537 typedef union {
int data[4]; }
int4;
1538 typedef union {
int data[8]; }
int8;
1542 typedef union {
unsigned int data; }
uint1;
1543 typedef union {
unsigned int data[2]; }
uint2;
1544 typedef union {
unsigned int data[4]; }
uint4;
1545 typedef union {
unsigned int data[8]; }
uint8;
1546 typedef union {
unsigned int data[16]; }
uint16;
1556 typedef union {
unsigned long data; }
ulong1;
1557 typedef union {
unsigned long data[2]; }
ulong2;
1558 typedef union {
unsigned long data[4]; }
ulong4;
1559 typedef union {
unsigned long data[8]; }
ulong8;
1560 typedef union {
unsigned long data[16]; }
ulong16;
1591 #endif // defined(_MSC_VER)
1592 #endif // defined(__has_attribute)