HIP: Heterogenous-computing Interface for Portability
hip_vector_types.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
30 
32 
33 #if defined(__has_attribute)
34  #if __has_attribute(ext_vector_type)
35  #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
36  #else
37  #define __NATIVE_VECTOR__(n, T) T[n]
38  #endif
39 
40 #if defined(__cplusplus)
41  #include <array>
42  #include <iosfwd>
43  #include <type_traits>
44 
45  namespace hip_impl {
46  template<typename, typename, unsigned int> struct Scalar_accessor;
47  } // Namespace hip_impl.
48 
49  namespace std {
50  template<typename T, typename U, unsigned int n>
51  struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
52  : is_integral<T> {};
53  template<typename T, typename U, unsigned int n>
54  struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
55  : is_floating_point<T> {};
56  } // Namespace std.
57 
58  namespace hip_impl {
59  template<typename T, typename Vector, unsigned int idx>
60  struct Scalar_accessor {
61  struct Address {
62  const Scalar_accessor* p;
63 
64  __host__ __device__
65  operator const T*() const noexcept {
66  return &reinterpret_cast<const T*>(p)[idx];
67  }
68  __host__ __device__
69  operator const T*() const volatile noexcept {
70  return &reinterpret_cast<const T*>(p)[idx];
71  }
72  __host__ __device__
73  operator T*() noexcept {
74  return &reinterpret_cast<T*>(
75  const_cast<Scalar_accessor*>(p))[idx];
76  }
77  __host__ __device__
78  operator T*() volatile noexcept {
79  return &reinterpret_cast<T*>(
80  const_cast<Scalar_accessor*>(p))[idx];
81  }
82  };
83 
84  friend
85  inline
86  std::ostream& operator<<(std::ostream& os,
87  const Scalar_accessor& x) noexcept {
88  return os << x.data[idx];
89  }
90  friend
91  inline
92  std::istream& operator>>(std::istream& is,
93  Scalar_accessor& x) noexcept {
94  T tmp;
95  is >> tmp;
96  x.data[idx] = tmp;
97 
98  return is;
99  }
100 
101  // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
102  Vector data;
103 
104  __host__ __device__
105  operator T() const noexcept { return data[idx]; }
106  __host__ __device__
107  operator T() const volatile noexcept { return data[idx]; }
108 
109 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
110  // The conversions to enum are fairly ghastly, but unfortunately used in
111  // some pre-existing, difficult to modify, code.
112  template<
113  typename U,
114  typename std::enable_if<
115  !std::is_same<U, T>{} &&
116  std::is_enum<U>{} &&
117  std::is_convertible<
118  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
119  __host__ __device__
120  operator U() const noexcept { return static_cast<U>(data[idx]); }
121  template<
122  typename U,
123  typename std::enable_if<
124  !std::is_same<U, T>{} &&
125  std::is_enum<U>{} &&
126  std::is_convertible<
127  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
128  __host__ __device__
129  operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
130 #endif
131 
132  __host__ __device__
133  operator T&() noexcept {
134  return reinterpret_cast<
135  T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
136  }
137  __host__ __device__
138  operator volatile T&() volatile noexcept {
139  return reinterpret_cast<
140  volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
141  }
142 
143  __host__ __device__
144  Address operator&() const noexcept { return Address{this}; }
145 
146  __host__ __device__
147  Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
148  data[idx] = x.data[idx];
149 
150  return *this;
151  }
152  __host__ __device__
153  Scalar_accessor& operator=(T x) noexcept {
154  data[idx] = x;
155 
156  return *this;
157  }
158  __host__ __device__
159  volatile Scalar_accessor& operator=(T x) volatile noexcept {
160  data[idx] = x;
161 
162  return *this;
163  }
164 
165  __host__ __device__
166  Scalar_accessor& operator++() noexcept {
167  ++data[idx];
168  return *this;
169  }
170  __host__ __device__
171  T operator++(int) noexcept {
172  auto r{data[idx]};
173  ++data[idx];
174  return *this;
175  }
176  __host__ __device__
177  Scalar_accessor& operator--() noexcept {
178  --data[idx];
179  return *this;
180  }
181  __host__ __device__
182  T operator--(int) noexcept {
183  auto r{data[idx]};
184  --data[idx];
185  return *this;
186  }
187 
188  // TODO: convertibility is too restrictive, constraint should be on
189  // the operator being invocable with a value of type U.
190  template<
191  typename U,
192  typename std::enable_if<
193  std::is_convertible<U, T>{}>::type* = nullptr>
194  __host__ __device__
195  Scalar_accessor& operator+=(U x) noexcept {
196  data[idx] += x;
197  return *this;
198  }
199  template<
200  typename U,
201  typename std::enable_if<
202  std::is_convertible<U, T>{}>::type* = nullptr>
203  __host__ __device__
204  Scalar_accessor& operator-=(U x) noexcept {
205  data[idx] -= x;
206  return *this;
207  }
208 
209  template<
210  typename U,
211  typename std::enable_if<
212  std::is_convertible<U, T>{}>::type* = nullptr>
213  __host__ __device__
214  Scalar_accessor& operator*=(U x) noexcept {
215  data[idx] *= x;
216  return *this;
217  }
218  template<
219  typename U,
220  typename std::enable_if<
221  std::is_convertible<U, T>{}>::type* = nullptr>
222  __host__ __device__
223  Scalar_accessor& operator/=(U x) noexcept {
224  data[idx] /= x;
225  return *this;
226  }
227  template<
228  typename U = T,
229  typename std::enable_if<std::is_convertible<U, T>{} &&
230  std::is_integral<U>{}>::type* = nullptr>
231  __host__ __device__
232  Scalar_accessor& operator%=(U x) noexcept {
233  data[idx] %= x;
234  return *this;
235  }
236 
237  template<
238  typename U = T,
239  typename std::enable_if<std::is_convertible<U, T>{} &&
240  std::is_integral<U>{}>::type* = nullptr>
241  __host__ __device__
242  Scalar_accessor& operator>>=(U x) noexcept {
243  data[idx] >>= x;
244  return *this;
245  }
246  template<
247  typename U = T,
248  typename std::enable_if<std::is_convertible<U, T>{} &&
249  std::is_integral<U>{}>::type* = nullptr>
250  __host__ __device__
251  Scalar_accessor& operator<<=(U x) noexcept {
252  data[idx] <<= x;
253  return *this;
254  }
255  template<
256  typename U = T,
257  typename std::enable_if<std::is_convertible<U, T>{} &&
258  std::is_integral<U>{}>::type* = nullptr>
259  __host__ __device__
260  Scalar_accessor& operator&=(U x) noexcept {
261  data[idx] &= x;
262  return *this;
263  }
264  template<
265  typename U = T,
266  typename std::enable_if<std::is_convertible<U, T>{} &&
267  std::is_integral<U>{}>::type* = nullptr>
268  __host__ __device__
269  Scalar_accessor& operator|=(U x) noexcept {
270  data[idx] |= x;
271  return *this;
272  }
273  template<
274  typename U = T,
275  typename std::enable_if<std::is_convertible<U, T>{} &&
276  std::is_integral<U>{}>::type* = nullptr>
277  __host__ __device__
278  Scalar_accessor& operator^=(U x) noexcept {
279  data[idx] ^= x;
280  return *this;
281  }
282  };
283 
284  inline
285  constexpr
286  unsigned int next_pot(unsigned int x) {
287  // Precondition: x > 1.
288  return 1u << (32u - __builtin_clz(x - 1u));
289  }
290  } // Namespace hip_impl.
291 
292  template<typename T, unsigned int n> struct HIP_vector_base;
293 
294  template<typename T>
295  struct HIP_vector_base<T, 1> {
296  using Native_vec_ = __NATIVE_VECTOR__(1, T);
297 
298  union {
299  Native_vec_ data;
300 #if __HIP_CLANG_ONLY__
301  struct {
302  T x;
303  };
304 #else
305  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
306 #endif
307  };
308 
309  using value_type = T;
310 
311  __host__ __device__
312  HIP_vector_base() = default;
313  __host__ __device__
314  explicit
315  constexpr
316  HIP_vector_base(T x_) noexcept : data{x_} {}
317  __host__ __device__
318  constexpr
319  HIP_vector_base(const HIP_vector_base&) = default;
320  __host__ __device__
321  constexpr
322  HIP_vector_base(HIP_vector_base&&) = default;
323  __host__ __device__
324  ~HIP_vector_base() = default;
325 
326  __host__ __device__
327  HIP_vector_base& operator=(const HIP_vector_base& x_) noexcept {
328  #if __has_attribute(ext_vector_type)
329  data = x_.data;
330  #else
331  data[0] = x_.data[0];
332  #endif
333 
334  return *this;
335  }
336  };
337 
338  template<typename T>
339  struct HIP_vector_base<T, 2> {
340  using Native_vec_ = __NATIVE_VECTOR__(2, T);
341 
342  union
343  #if !__has_attribute(ext_vector_type)
344  alignas(hip_impl::next_pot(2 * sizeof(T)))
345  #endif
346  {
347  Native_vec_ data;
348 #if __HIP_CLANG_ONLY__
349  struct {
350  T x;
351  T y;
352  };
353 #else
354  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
355  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
356 #endif
357  };
358 
359  using value_type = T;
360 
361  __host__ __device__
362  HIP_vector_base() = default;
363  __host__ __device__
364  explicit
365  constexpr
366  HIP_vector_base(T x_) noexcept : data{x_, x_} {}
367  __host__ __device__
368  constexpr
369  HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
370  __host__ __device__
371  constexpr
372  HIP_vector_base(const HIP_vector_base&) = default;
373  __host__ __device__
374  constexpr
375  HIP_vector_base(HIP_vector_base&&) = default;
376  __host__ __device__
377  ~HIP_vector_base() = default;
378 
379  __host__ __device__
380  HIP_vector_base& operator=(const HIP_vector_base& x_) noexcept {
381  #if __has_attribute(ext_vector_type)
382  data = x_.data;
383  #else
384  data[0] = x_.data[0];
385  data[1] = x_.data[1];
386  #endif
387 
388  return *this;
389  }
390  };
391 
392  template<typename T>
393  struct HIP_vector_base<T, 3> {
394  struct Native_vec_ {
395  T d[3];
396 
397  __host__ __device__
398  Native_vec_() = default;
399 
400  __host__ __device__
401  explicit
402  constexpr
403  Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
404  __host__ __device__
405  constexpr
406  Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
407  __host__ __device__
408  constexpr
409  Native_vec_(const Native_vec_&) = default;
410  __host__ __device__
411  constexpr
412  Native_vec_(Native_vec_&&) = default;
413  __host__ __device__
414  ~Native_vec_() = default;
415 
416  __host__ __device__
417  Native_vec_& operator=(const Native_vec_&) = default;
418  __host__ __device__
419  Native_vec_& operator=(Native_vec_&&) = default;
420 
421  __host__ __device__
422  T& operator[](unsigned int idx) noexcept { return d[idx]; }
423  __host__ __device__
424  T operator[](unsigned int idx) const noexcept { return d[idx]; }
425 
426  __host__ __device__
427  Native_vec_& operator+=(const Native_vec_& x_) noexcept
428  {
429  for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
430  return *this;
431  }
432  __host__ __device__
433  Native_vec_& operator-=(const Native_vec_& x_) noexcept
434  {
435  for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
436  return *this;
437  }
438 
439  __host__ __device__
440  Native_vec_& operator*=(const Native_vec_& x_) noexcept
441  {
442  for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
443  return *this;
444  }
445  __host__ __device__
446  Native_vec_& operator/=(const Native_vec_& x_) noexcept
447  {
448  for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
449  return *this;
450  }
451 
452  template<
453  typename U = T,
454  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
455  __host__ __device__
456  Native_vec_ operator-() const noexcept
457  {
458  auto r{*this};
459  for (auto&& x : r.d) x = -x;
460  return r;
461  }
462 
463  template<
464  typename U = T,
465  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
466  __host__ __device__
467  Native_vec_ operator~() const noexcept
468  {
469  auto r{*this};
470  for (auto&& x : r.d) x = ~x;
471  return r;
472  }
473  template<
474  typename U = T,
475  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
476  __host__ __device__
477  Native_vec_& operator%=(const Native_vec_& x_) noexcept
478  {
479  for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
480  return *this;
481  }
482  template<
483  typename U = T,
484  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
485  __host__ __device__
486  Native_vec_& operator^=(const Native_vec_& x_) noexcept
487  {
488  for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
489  return *this;
490  }
491  template<
492  typename U = T,
493  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
494  __host__ __device__
495  Native_vec_& operator|=(const Native_vec_& x_) noexcept
496  {
497  for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
498  return *this;
499  }
500  template<
501  typename U = T,
502  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
503  __host__ __device__
504  Native_vec_& operator&=(const Native_vec_& x_) noexcept
505  {
506  for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
507  return *this;
508  }
509  template<
510  typename U = T,
511  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
512  __host__ __device__
513  Native_vec_& operator>>=(const Native_vec_& x_) noexcept
514  {
515  for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
516  return *this;
517  }
518  template<
519  typename U = T,
520  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
521  __host__ __device__
522  Native_vec_& operator<<=(const Native_vec_& x_) noexcept
523  {
524  for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
525  return *this;
526  }
527 
528  using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
529  __host__ __device__
530  Vec3_cmp operator==(const Native_vec_& x_) const noexcept
531  {
532  return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
533  }
534  };
535 
536  union {
537  Native_vec_ data;
538  struct {
539  T x;
540  T y;
541  T z;
542  };
543  };
544 
545  using value_type = T;
546 
547  __host__ __device__
548  HIP_vector_base() = default;
549  __host__ __device__
550  explicit
551  constexpr
552  HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
553  __host__ __device__
554  constexpr
555  HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
556  __host__ __device__
557  constexpr
558  HIP_vector_base(const HIP_vector_base&) = default;
559  __host__ __device__
560  constexpr
561  HIP_vector_base(HIP_vector_base&&) = default;
562  __host__ __device__
563  ~HIP_vector_base() = default;
564 
565  __host__ __device__
566  HIP_vector_base& operator=(const HIP_vector_base&) = default;
567  __host__ __device__
568  HIP_vector_base& operator=(HIP_vector_base&&) = default;
569  };
570 
571  template<typename T>
572  struct HIP_vector_base<T, 4> {
573  using Native_vec_ = __NATIVE_VECTOR__(4, T);
574 
575  union
576  #if !__has_attribute(ext_vector_type)
577  alignas(hip_impl::next_pot(4 * sizeof(T)))
578  #endif
579  {
580  Native_vec_ data;
581 #if __HIP_CLANG_ONLY__
582  struct {
583  T x;
584  T y;
585  T z;
586  T w;
587  };
588 #else
589  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
590  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
591  hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
592  hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
593 #endif
594  };
595 
596  using value_type = T;
597 
598  __host__ __device__
599  HIP_vector_base() = default;
600  __host__ __device__
601  explicit
602  constexpr
603  HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
604  __host__ __device__
605  constexpr
606  HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
607  __host__ __device__
608  constexpr
609  HIP_vector_base(const HIP_vector_base&) = default;
610  __host__ __device__
611  constexpr
612  HIP_vector_base(HIP_vector_base&&) = default;
613  __host__ __device__
614  ~HIP_vector_base() = default;
615 
616  __host__ __device__
617  HIP_vector_base& operator=(const HIP_vector_base& x_) noexcept {
618  #if __has_attribute(ext_vector_type)
619  data = x_.data;
620  #else
621  data[0] = x_.data[0];
622  data[1] = x_.data[1];
623  data[2] = x_.data[2];
624  data[3] = x_.data[3];
625  #endif
626 
627  return *this;
628  }
629  };
630 
631  template<typename T, unsigned int rank>
632  struct HIP_vector_type : public HIP_vector_base<T, rank> {
633  using HIP_vector_base<T, rank>::data;
634  using typename HIP_vector_base<T, rank>::Native_vec_;
635 
636  __host__ __device__
637  HIP_vector_type() = default;
638  template<
639  typename U,
640  typename std::enable_if<
641  std::is_convertible<U, T>{}>::type* = nullptr>
642  __host__ __device__
643  explicit
644  constexpr
645  HIP_vector_type(U x_) noexcept
646  : HIP_vector_base<T, rank>{static_cast<T>(x_)}
647  {}
648  template< // TODO: constrain based on type as well.
649  typename... Us,
650  typename std::enable_if<
651  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
652  __host__ __device__
653  constexpr
654  HIP_vector_type(Us... xs) noexcept
655  : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
656  {}
657  __host__ __device__
658  constexpr
659  HIP_vector_type(const HIP_vector_type&) = default;
660  __host__ __device__
661  constexpr
662  HIP_vector_type(HIP_vector_type&&) = default;
663  __host__ __device__
664  ~HIP_vector_type() = default;
665 
666  __host__ __device__
667  HIP_vector_type& operator=(const HIP_vector_type&) = default;
668  __host__ __device__
669  HIP_vector_type& operator=(HIP_vector_type&&) = default;
670 
671  // Operators
672  __host__ __device__
673  HIP_vector_type& operator++() noexcept
674  {
675  return *this += HIP_vector_type{1};
676  }
677  __host__ __device__
678  HIP_vector_type operator++(int) noexcept
679  {
680  auto tmp(*this);
681  ++*this;
682  return tmp;
683  }
684 
685  __host__ __device__
686  HIP_vector_type& operator--() noexcept
687  {
688  return *this -= HIP_vector_type{1};
689  }
690  __host__ __device__
691  HIP_vector_type operator--(int) noexcept
692  {
693  auto tmp(*this);
694  --*this;
695  return tmp;
696  }
697 
698  __host__ __device__
699  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
700  {
701  data += x.data;
702  return *this;
703  }
704  template<
705  typename U,
706  typename std::enable_if<
707  std::is_convertible<U, T>{}>::type* = nullptr>
708  __host__ __device__
709  HIP_vector_type& operator+=(U x) noexcept
710  {
711  return *this += HIP_vector_type{x};
712  }
713 
714  __host__ __device__
715  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
716  {
717  data -= x.data;
718  return *this;
719  }
720  template<
721  typename U,
722  typename std::enable_if<
723  std::is_convertible<U, T>{}>::type* = nullptr>
724  __host__ __device__
725  HIP_vector_type& operator-=(U x) noexcept
726  {
727  return *this -= HIP_vector_type{x};
728  }
729 
730  __host__ __device__
731  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
732  {
733  data *= x.data;
734  return *this;
735  }
736  template<
737  typename U,
738  typename std::enable_if<
739  std::is_convertible<U, T>{}>::type* = nullptr>
740  __host__ __device__
741  HIP_vector_type& operator*=(U x) noexcept
742  {
743  return *this *= HIP_vector_type{x};
744  }
745 
746  __host__ __device__
747  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
748  {
749  data /= x.data;
750  return *this;
751  }
752  template<
753  typename U,
754  typename std::enable_if<
755  std::is_convertible<U, T>{}>::type* = nullptr>
756  __host__ __device__
757  HIP_vector_type& operator/=(U x) noexcept
758  {
759  return *this /= HIP_vector_type{x};
760  }
761 
762  template<
763  typename U = T,
764  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
765  __host__ __device__
766  HIP_vector_type operator-() const noexcept
767  {
768  auto tmp(*this);
769  tmp.data = -tmp.data;
770  return tmp;
771  }
772 
773  template<
774  typename U = T,
775  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
776  __host__ __device__
777  HIP_vector_type operator~() const noexcept
778  {
779  HIP_vector_type r{*this};
780  r.data = ~r.data;
781  return r;
782  }
783 
784  template<
785  typename U = T,
786  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
787  __host__ __device__
788  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
789  {
790  data %= x.data;
791  return *this;
792  }
793 
794  template<
795  typename U = T,
796  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
797  __host__ __device__
798  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
799  {
800  data ^= x.data;
801  return *this;
802  }
803 
804  template<
805  typename U = T,
806  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
807  __host__ __device__
808  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
809  {
810  data |= x.data;
811  return *this;
812  }
813 
814  template<
815  typename U = T,
816  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
817  __host__ __device__
818  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
819  {
820  data &= x.data;
821  return *this;
822  }
823 
824  template<
825  typename U = T,
826  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
827  __host__ __device__
828  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
829  {
830  data >>= x.data;
831  return *this;
832  }
833 
834  template<
835  typename U = T,
836  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
837  __host__ __device__
838  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
839  {
840  data <<= x.data;
841  return *this;
842  }
843  };
844 
845  template<typename T, unsigned int n>
846  __host__ __device__
847  inline
848  constexpr
849  HIP_vector_type<T, n> operator+(
850  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
851  {
852  return HIP_vector_type<T, n>{x} += y;
853  }
854  template<typename T, unsigned int n, typename U>
855  __host__ __device__
856  inline
857  constexpr
858  HIP_vector_type<T, n> operator+(
859  const HIP_vector_type<T, n>& x, U y) noexcept
860  {
861  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
862  }
863  template<typename T, unsigned int n, typename U>
864  __host__ __device__
865  inline
866  constexpr
867  HIP_vector_type<T, n> operator+(
868  U x, const HIP_vector_type<T, n>& y) noexcept
869  {
870  return HIP_vector_type<T, n>{x} += y;
871  }
872 
873  template<typename T, unsigned int n>
874  __host__ __device__
875  inline
876  constexpr
877  HIP_vector_type<T, n> operator-(
878  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
879  {
880  return HIP_vector_type<T, n>{x} -= y;
881  }
882  template<typename T, unsigned int n, typename U>
883  __host__ __device__
884  inline
885  constexpr
886  HIP_vector_type<T, n> operator-(
887  const HIP_vector_type<T, n>& x, U y) noexcept
888  {
889  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
890  }
891  template<typename T, unsigned int n, typename U>
892  __host__ __device__
893  inline
894  constexpr
895  HIP_vector_type<T, n> operator-(
896  U x, const HIP_vector_type<T, n>& y) noexcept
897  {
898  return HIP_vector_type<T, n>{x} -= y;
899  }
900 
901  template<typename T, unsigned int n>
902  __host__ __device__
903  inline
904  constexpr
905  HIP_vector_type<T, n> operator*(
906  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
907  {
908  return HIP_vector_type<T, n>{x} *= y;
909  }
910  template<typename T, unsigned int n, typename U>
911  __host__ __device__
912  inline
913  constexpr
914  HIP_vector_type<T, n> operator*(
915  const HIP_vector_type<T, n>& x, U y) noexcept
916  {
917  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
918  }
919  template<typename T, unsigned int n, typename U>
920  __host__ __device__
921  inline
922  constexpr
923  HIP_vector_type<T, n> operator*(
924  U x, const HIP_vector_type<T, n>& y) noexcept
925  {
926  return HIP_vector_type<T, n>{x} *= y;
927  }
928 
929  template<typename T, unsigned int n>
930  __host__ __device__
931  inline
932  constexpr
933  HIP_vector_type<T, n> operator/(
934  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
935  {
936  return HIP_vector_type<T, n>{x} /= y;
937  }
938  template<typename T, unsigned int n, typename U>
939  __host__ __device__
940  inline
941  constexpr
942  HIP_vector_type<T, n> operator/(
943  const HIP_vector_type<T, n>& x, U y) noexcept
944  {
945  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
946  }
947  template<typename T, unsigned int n, typename U>
948  __host__ __device__
949  inline
950  constexpr
951  HIP_vector_type<T, n> operator/(
952  U x, const HIP_vector_type<T, n>& y) noexcept
953  {
954  return HIP_vector_type<T, n>{x} /= y;
955  }
956 
957  template<typename V>
958  __host__ __device__
959  inline
960  constexpr
961  bool _hip_any_zero(const V& x, int n) noexcept
962  {
963  return
964  (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1));
965  }
966 
967  template<typename T, unsigned int n>
968  __host__ __device__
969  inline
970  constexpr
971  bool operator==(
972  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
973  {
974  return _hip_any_zero(x.data == y.data, n - 1);
975  }
976  template<typename T, unsigned int n, typename U>
977  __host__ __device__
978  inline
979  constexpr
980  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
981  {
982  return x == HIP_vector_type<T, n>{y};
983  }
984  template<typename T, unsigned int n, typename U>
985  __host__ __device__
986  inline
987  constexpr
988  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
989  {
990  return HIP_vector_type<T, n>{x} == y;
991  }
992 
993  template<typename T, unsigned int n>
994  __host__ __device__
995  inline
996  constexpr
997  bool operator!=(
998  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
999  {
1000  return !(x == y);
1001  }
1002  template<typename T, unsigned int n, typename U>
1003  __host__ __device__
1004  inline
1005  constexpr
1006  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
1007  {
1008  return !(x == y);
1009  }
1010  template<typename T, unsigned int n, typename U>
1011  __host__ __device__
1012  inline
1013  constexpr
1014  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
1015  {
1016  return !(x == y);
1017  }
1018 
1019  template<
1020  typename T,
1021  unsigned int n,
1022  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1023  __host__ __device__
1024  inline
1025  constexpr
1026  HIP_vector_type<T, n> operator%(
1027  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1028  {
1029  return HIP_vector_type<T, n>{x} %= y;
1030  }
1031  template<
1032  typename T,
1033  unsigned int n,
1034  typename U,
1035  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1036  __host__ __device__
1037  inline
1038  constexpr
1039  HIP_vector_type<T, n> operator%(
1040  const HIP_vector_type<T, n>& x, U y) noexcept
1041  {
1042  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
1043  }
1044  template<
1045  typename T,
1046  unsigned int n,
1047  typename U,
1048  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1049  __host__ __device__
1050  inline
1051  constexpr
1052  HIP_vector_type<T, n> operator%(
1053  U x, const HIP_vector_type<T, n>& y) noexcept
1054  {
1055  return HIP_vector_type<T, n>{x} %= y;
1056  }
1057 
1058  template<
1059  typename T,
1060  unsigned int n,
1061  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1062  __host__ __device__
1063  inline
1064  constexpr
1065  HIP_vector_type<T, n> operator^(
1066  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1067  {
1068  return HIP_vector_type<T, n>{x} ^= y;
1069  }
1070  template<
1071  typename T,
1072  unsigned int n,
1073  typename U,
1074  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1075  __host__ __device__
1076  inline
1077  constexpr
1078  HIP_vector_type<T, n> operator^(
1079  const HIP_vector_type<T, n>& x, U y) noexcept
1080  {
1081  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
1082  }
1083  template<
1084  typename T,
1085  unsigned int n,
1086  typename U,
1087  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1088  __host__ __device__
1089  inline
1090  constexpr
1091  HIP_vector_type<T, n> operator^(
1092  U x, const HIP_vector_type<T, n>& y) noexcept
1093  {
1094  return HIP_vector_type<T, n>{x} ^= y;
1095  }
1096 
1097  template<
1098  typename T,
1099  unsigned int n,
1100  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1101  __host__ __device__
1102  inline
1103  constexpr
1104  HIP_vector_type<T, n> operator|(
1105  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1106  {
1107  return HIP_vector_type<T, n>{x} |= y;
1108  }
1109  template<
1110  typename T,
1111  unsigned int n,
1112  typename U,
1113  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1114  __host__ __device__
1115  inline
1116  constexpr
1117  HIP_vector_type<T, n> operator|(
1118  const HIP_vector_type<T, n>& x, U y) noexcept
1119  {
1120  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
1121  }
1122  template<
1123  typename T,
1124  unsigned int n,
1125  typename U,
1126  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1127  __host__ __device__
1128  inline
1129  constexpr
1130  HIP_vector_type<T, n> operator|(
1131  U x, const HIP_vector_type<T, n>& y) noexcept
1132  {
1133  return HIP_vector_type<T, n>{x} |= y;
1134  }
1135 
1136  template<
1137  typename T,
1138  unsigned int n,
1139  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1140  __host__ __device__
1141  inline
1142  constexpr
1143  HIP_vector_type<T, n> operator&(
1144  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1145  {
1146  return HIP_vector_type<T, n>{x} &= y;
1147  }
1148  template<
1149  typename T,
1150  unsigned int n,
1151  typename U,
1152  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1153  __host__ __device__
1154  inline
1155  constexpr
1156  HIP_vector_type<T, n> operator&(
1157  const HIP_vector_type<T, n>& x, U y) noexcept
1158  {
1159  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1160  }
1161  template<
1162  typename T,
1163  unsigned int n,
1164  typename U,
1165  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1166  __host__ __device__
1167  inline
1168  constexpr
1169  HIP_vector_type<T, n> operator&(
1170  U x, const HIP_vector_type<T, n>& y) noexcept
1171  {
1172  return HIP_vector_type<T, n>{x} &= y;
1173  }
1174 
1175  template<
1176  typename T,
1177  unsigned int n,
1178  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1179  __host__ __device__
1180  inline
1181  constexpr
1182  HIP_vector_type<T, n> operator>>(
1183  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1184  {
1185  return HIP_vector_type<T, n>{x} >>= y;
1186  }
1187  template<
1188  typename T,
1189  unsigned int n,
1190  typename U,
1191  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1192  __host__ __device__
1193  inline
1194  constexpr
1195  HIP_vector_type<T, n> operator>>(
1196  const HIP_vector_type<T, n>& x, U y) noexcept
1197  {
1198  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1199  }
1200  template<
1201  typename T,
1202  unsigned int n,
1203  typename U,
1204  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1205  __host__ __device__
1206  inline
1207  constexpr
1208  HIP_vector_type<T, n> operator>>(
1209  U x, const HIP_vector_type<T, n>& y) noexcept
1210  {
1211  return HIP_vector_type<T, n>{x} >>= y;
1212  }
1213 
1214  template<
1215  typename T,
1216  unsigned int n,
1217  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1218  __host__ __device__
1219  inline
1220  constexpr
1221  HIP_vector_type<T, n> operator<<(
1222  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1223  {
1224  return HIP_vector_type<T, n>{x} <<= y;
1225  }
1226  template<
1227  typename T,
1228  unsigned int n,
1229  typename U,
1230  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1231  __host__ __device__
1232  inline
1233  constexpr
1234  HIP_vector_type<T, n> operator<<(
1235  const HIP_vector_type<T, n>& x, U y) noexcept
1236  {
1237  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1238  }
1239  template<
1240  typename T,
1241  unsigned int n,
1242  typename U,
1243  typename std::enable_if<std::is_arithmetic<U>::value>::type,
1244  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1245  __host__ __device__
1246  inline
1247  constexpr
1248  HIP_vector_type<T, n> operator<<(
1249  U x, const HIP_vector_type<T, n>& y) noexcept
1250  {
1251  return HIP_vector_type<T, n>{x} <<= y;
1252  }
1253 
1254  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1255  using CUDA_name##1 = HIP_vector_type<T, 1>;\
1256  using CUDA_name##2 = HIP_vector_type<T, 2>;\
1257  using CUDA_name##3 = HIP_vector_type<T, 3>;\
1258  using CUDA_name##4 = HIP_vector_type<T, 4>;
1259 #else
1260  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1261  typedef struct {\
1262  T x;\
1263  } CUDA_name##1;\
1264  typedef struct {\
1265  T x;\
1266  T y;\
1267  } CUDA_name##2;\
1268  typedef struct {\
1269  T x;\
1270  T y;\
1271  T z;\
1272  } CUDA_name##3;\
1273  typedef struct {\
1274  T x;\
1275  T y;\
1276  T z;\
1277  T w;\
1278  } CUDA_name##4;
1279 #endif
1280 
1281 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1282 __MAKE_VECTOR_TYPE__(char, char);
1283 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1284 __MAKE_VECTOR_TYPE__(short, short);
1285 __MAKE_VECTOR_TYPE__(uint, unsigned int);
1286 __MAKE_VECTOR_TYPE__(int, int);
1287 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1288 __MAKE_VECTOR_TYPE__(long, long);
1289 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1290 __MAKE_VECTOR_TYPE__(longlong, long long);
1291 __MAKE_VECTOR_TYPE__(float, float);
1292 __MAKE_VECTOR_TYPE__(double, double);
1293 
1294 #ifdef __cplusplus
1295 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1296  static inline __device__ __host__ \
1297  type make_##type(comp x) { type r{x}; return r; }
1298 
1299 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1300  static inline __device__ __host__ \
1301  type make_##type(comp x, comp y) { type r{x, y}; return r; }
1302 
1303 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1304  static inline __device__ __host__ \
1305  type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1306 
1307 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1308  static inline __device__ __host__ \
1309  type make_##type(comp x, comp y, comp z, comp w) { \
1310  type r{x, y, z, w}; \
1311  return r; \
1312  }
1313 #else
1314  #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1315  static inline __device__ __host__ \
1316  type make_##type(comp x) { type r; r.x =x; return r; }
1317 
1318  #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1319  static inline __device__ __host__ \
1320  type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1321 
1322  #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1323  static inline __device__ __host__ \
1324  type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1325 
1326  #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1327  static inline __device__ __host__ \
1328  type make_##type(comp x, comp y, comp z, comp w) { \
1329  type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1330  return r; \
1331  }
1332 #endif
1333 
1334 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
1335 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
1336 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
1337 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
1338 
1339 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
1340 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
1341 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
1342 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
1343 
1344 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
1345 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
1346 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
1347 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
1348 
1349 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
1350 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
1351 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
1352 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
1353 
1354 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
1355 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
1356 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
1357 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
1358 
1359 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
1360 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
1361 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
1362 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
1363 
1364 DECLOP_MAKE_ONE_COMPONENT(float, float1);
1365 DECLOP_MAKE_TWO_COMPONENT(float, float2);
1366 DECLOP_MAKE_THREE_COMPONENT(float, float3);
1367 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
1368 
1369 DECLOP_MAKE_ONE_COMPONENT(double, double1);
1370 DECLOP_MAKE_TWO_COMPONENT(double, double2);
1371 DECLOP_MAKE_THREE_COMPONENT(double, double3);
1372 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
1373 
1374 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
1375 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
1376 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
1377 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
1378 
1379 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
1380 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
1381 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
1382 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
1383 
1384 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
1385 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
1386 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
1387 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
1388 
1389 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
1390 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
1391 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
1392 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
1393 #else // !defined(__has_attribute)
1394 
1395 #if defined(_MSC_VER)
1396 #include <mmintrin.h>
1397 #include <xmmintrin.h>
1398 #include <emmintrin.h>
1399 #include <immintrin.h>
1400 
1401 typedef union { char data; } char1;
1402 typedef union { char data[2]; } char2;
1403 typedef union { char data[4]; } char4;
1404 typedef union { char4 data; } char3;
1405 typedef union { __m64 data; } char8;
1406 typedef union { __m128i data; } char16;
1407 
1408 typedef union { unsigned char data; } uchar1;
1409 typedef union { unsigned char data[2]; } uchar2;
1410 typedef union { unsigned char data[4]; } uchar4;
1411 typedef union { uchar4 data; } uchar3;
1412 typedef union { __m64 data; } uchar8;
1413 typedef union { __m128i data; } uchar16;
1414 
1415 typedef union { short data; } short1;
1416 typedef union { short data[2]; } short2;
1417 typedef union { __m64 data; } short4;
1418 typedef union { short4 data; } short3;
1419 typedef union { __m128i data; } short8;
1420 typedef union { __m128i data[2]; } short16;
1421 
1422 typedef union { unsigned short data; } ushort1;
1423 typedef union { unsigned short data[2]; } ushort2;
1424 typedef union { __m64 data; } ushort4;
1425 typedef union { ushort4 data; } ushort3;
1426 typedef union { __m128i data; } ushort8;
1427 typedef union { __m128i data[2]; } ushort16;
1428 
1429 typedef union { int data; } int1;
1430 typedef union { __m64 data; } int2;
1431 typedef union { __m128i data; } int4;
1432 typedef union { int4 data; } int3;
1433 typedef union { __m128i data[2]; } int8;
1434 typedef union { __m128i data[4];} int16;
1435 
1436 typedef union { unsigned int data; } uint1;
1437 typedef union { __m64 data; } uint2;
1438 typedef union { __m128i data; } uint4;
1439 typedef union { uint4 data; } uint3;
1440 typedef union { __m128i data[2]; } uint8;
1441 typedef union { __m128i data[4]; } uint16;
1442 
1443 #if !defined(_WIN64)
1444 typedef union { int data; } long1;
1445 typedef union { __m64 data; } long2;
1446 typedef union { __m128i data; } long4;
1447 typedef union { long4 data; } long3;
1448 typedef union { __m128i data[2]; } long8;
1449 typedef union { __m128i data[4]; } long16;
1450 
1451 typedef union { unsigned int data; } ulong1;
1452 typedef union { __m64 data; } ulong2;
1453 typedef union { __m128i data; } ulong4;
1454 typedef union { ulong4 data; } ulong3;
1455 typedef union { __m128i data[2]; } ulong8;
1456 typedef union { __m128i data[4]; } ulong16;
1457 #else // defined(_WIN64)
1458 typedef union { __m64 data; } long1;
1459 typedef union { __m128i data; } long2;
1460 typedef union { __m128i data[2]; } long4;
1461 typedef union { long4 data; } long3;
1462 typedef union { __m128i data[4]; } long8;
1463 typedef union { __m128i data[8]; } long16;
1464 
1465 typedef union { __m64 data; } ulong1;
1466 typedef union { __m128i data; } ulong2;
1467 typedef union { __m128i data[2]; } ulong4;
1468 typedef union { ulong4 data; } ulong3;
1469 typedef union { __m128i data[4]; } ulong8;
1470 typedef union { __m128i data[8]; } ulong16;
1471 #endif // defined(_WIN64)
1472 
1473 typedef union { __m64 data; } longlong1;
1474 typedef union { __m128i data; } longlong2;
1475 typedef union { __m128i data[2]; } longlong4;
1476 typedef union { longlong4 data; } longlong3;
1477 typedef union { __m128i data[4]; } longlong8;
1478 typedef union { __m128i data[8]; } longlong16;
1479 
1480 typedef union { __m64 data; } ulonglong1;
1481 typedef union { __m128i data; } ulonglong2;
1482 typedef union { __m128i data[2]; } ulonglong4;
1483 typedef union { ulonglong4 data; } ulonglong3;
1484 typedef union { __m128i data[4]; } ulonglong8;
1485 typedef union { __m128i data[8]; } ulonglong16;
1486 
1487 typedef union { float data; } float1;
1488 typedef union { __m64 data; } float2;
1489 typedef union { __m128 data; } float4;
1490 typedef union { float4 data; } float3;
1491 typedef union { __m256 data; } float8;
1492 typedef union { __m256 data[2]; } float16;
1493 
1494 typedef union { double data; } double1;
1495 typedef union { __m128d data; } double2;
1496 typedef union { __m256d data; } double4;
1497 typedef union { double4 data; } double3;
1498 typedef union { __m256d data[2]; } double8;
1499 typedef union { __m256d data[4]; } double16;
1500 
1501 #else // !defined(_MSC_VER)
1502 
1503 typedef union { char data; } char1;
1504 typedef union { char data[2]; } char2;
1505 typedef union { char data[4]; } char4;
1506 typedef union { char data[8]; } char8;
1507 typedef union { char data[16]; } char16;
1508 typedef union { char4 data; } char3;
1509 
1510 typedef union { unsigned char data; } uchar1;
1511 typedef union { unsigned char data[2]; } uchar2;
1512 typedef union { unsigned char data[4]; } uchar4;
1513 typedef union { unsigned char data[8]; } uchar8;
1514 typedef union { unsigned char data[16]; } uchar16;
1515 typedef union { uchar4 data; } uchar3;
1516 
1517 typedef union { short data; } short1;
1518 typedef union { short data[2]; } short2;
1519 typedef union { short data[4]; } short4;
1520 typedef union { short data[8]; } short8;
1521 typedef union { short data[16]; } short16;
1522 typedef union { short4 data; } short3;
1523 
1524 typedef union { unsigned short data; } ushort1;
1525 typedef union { unsigned short data[2]; } ushort2;
1526 typedef union { unsigned short data[4]; } ushort4;
1527 typedef union { unsigned short data[8]; } ushort8;
1528 typedef union { unsigned short data[16]; } ushort16;
1529 typedef union { ushort4 data; } ushort3;
1530 
1531 typedef union { int data; } int1;
1532 typedef union { int data[2]; } int2;
1533 typedef union { int data[4]; } int4;
1534 typedef union { int data[8]; } int8;
1535 typedef union { int data[16]; } int16;
1536 typedef union { int4 data; } int3;
1537 
1538 typedef union { unsigned int data; } uint1;
1539 typedef union { unsigned int data[2]; } uint2;
1540 typedef union { unsigned int data[4]; } uint4;
1541 typedef union { unsigned int data[8]; } uint8;
1542 typedef union { unsigned int data[16]; } uint16;
1543 typedef union { uint4 data; } uint3;
1544 
1545 typedef union { long data; } long1;
1546 typedef union { long data[2]; } long2;
1547 typedef union { long data[4]; } long4;
1548 typedef union { long data[8]; } long8;
1549 typedef union { long data[16]; } long16;
1550 typedef union { long4 data; } long3;
1551 
1552 typedef union { unsigned long data; } ulong1;
1553 typedef union { unsigned long data[2]; } ulong2;
1554 typedef union { unsigned long data[4]; } ulong4;
1555 typedef union { unsigned long data[8]; } ulong8;
1556 typedef union { unsigned long data[16]; } ulong16;
1557 typedef union { ulong4 data; } ulong3;
1558 
1559 typedef union { long long data; } longlong1;
1560 typedef union { long long data[2]; } longlong2;
1561 typedef union { long long data[4]; } longlong4;
1562 typedef union { long long data[8]; } longlong8;
1563 typedef union { long long data[16]; } longlong16;
1564 typedef union { longlong4 data; } longlong3;
1565 
1566 typedef union { unsigned long long data; } ulonglong1;
1567 typedef union { unsigned long long data[2]; } ulonglong2;
1568 typedef union { unsigned long long data[4]; } ulonglong4;
1569 typedef union { unsigned long long data[8]; } ulonglong8;
1570 typedef union { unsigned long long data[16]; } ulonglong16;
1571 typedef union { ulonglong4 data; } ulonglong3;
1572 
1573 typedef union { float data; } float1;
1574 typedef union { float data[2]; } float2;
1575 typedef union { float data[4]; } float4;
1576 typedef union { float data[8]; } float8;
1577 typedef union { float data[16]; } float16;
1578 typedef union { float4 data; } float3;
1579 
1580 typedef union { double data; } double1;
1581 typedef union { double data[2]; } double2;
1582 typedef union { double data[4]; } double4;
1583 typedef union { double data[8]; } double8;
1584 typedef union { double data[16]; } double16;
1585 typedef union { double4 data; } double3;
1586 
1587 #endif // defined(_MSC_VER)
1588 #endif // defined(__has_attribute)
1589 #endif
uint2
Definition: hip_vector_types.h:1539
longlong2
Definition: hip_vector_types.h:1560
int4
Definition: hip_vector_types.h:1533
double8
Definition: hip_vector_types.h:1583
long2
Definition: hip_vector_types.h:1546
ulong16
Definition: hip_vector_types.h:1556
uchar2
Definition: hip_vector_types.h:1511
ulong4
Definition: hip_vector_types.h:1554
char4
Definition: hip_vector_types.h:1505
ushort1
Definition: hip_vector_types.h:1524
char2
Definition: hip_vector_types.h:1504
int8
Definition: hip_vector_types.h:1534
ulong1
Definition: hip_vector_types.h:1552
double2
Definition: hip_vector_types.h:1581
uint3
Definition: hip_vector_types.h:1543
long3
Definition: hip_vector_types.h:1550
ulong8
Definition: hip_vector_types.h:1555
long16
Definition: hip_vector_types.h:1549
uint1
Definition: hip_vector_types.h:1538
int3
Definition: hip_vector_types.h:1536
long1
Definition: hip_vector_types.h:1545
uint16
Definition: hip_vector_types.h:1542
float4
Definition: hip_vector_types.h:1575
double16
Definition: hip_vector_types.h:1584
char3
Definition: hip_vector_types.h:1508
short3
Definition: hip_vector_types.h:1522
char1
Definition: hip_vector_types.h:1503
longlong3
Definition: hip_vector_types.h:1564
ulong2
Definition: hip_vector_types.h:1553
ulonglong8
Definition: hip_vector_types.h:1569
__host__
#define __host__
Definition: host_defines.h:59
float8
Definition: hip_vector_types.h:1576
int2
Definition: hip_vector_types.h:1532
host_defines.h
TODO-doc.
float3
Definition: hip_vector_types.h:1578
ulonglong16
Definition: hip_vector_types.h:1570
uchar3
Definition: hip_vector_types.h:1515
ulonglong4
Definition: hip_vector_types.h:1568
float2
Definition: hip_vector_types.h:1574
ushort16
Definition: hip_vector_types.h:1528
short4
Definition: hip_vector_types.h:1519
longlong4
Definition: hip_vector_types.h:1561
uchar16
Definition: hip_vector_types.h:1514
ushort8
Definition: hip_vector_types.h:1527
short1
Definition: hip_vector_types.h:1517
int1
Definition: hip_vector_types.h:1531
double3
Definition: hip_vector_types.h:1585
char16
Definition: hip_vector_types.h:1507
ulonglong2
Definition: hip_vector_types.h:1567
short8
Definition: hip_vector_types.h:1520
longlong8
Definition: hip_vector_types.h:1562
uchar4
Definition: hip_vector_types.h:1512
ulonglong3
Definition: hip_vector_types.h:1571
ushort4
Definition: hip_vector_types.h:1526
float16
Definition: hip_vector_types.h:1577
float1
Definition: hip_vector_types.h:1573
short16
Definition: hip_vector_types.h:1521
longlong1
Definition: hip_vector_types.h:1559
uchar1
Definition: hip_vector_types.h:1510
uint8
Definition: hip_vector_types.h:1541
short2
Definition: hip_vector_types.h:1518
long8
Definition: hip_vector_types.h:1548
ulong3
Definition: hip_vector_types.h:1557
uchar8
Definition: hip_vector_types.h:1513
double4
Definition: hip_vector_types.h:1582
longlong16
Definition: hip_vector_types.h:1563
ulonglong1
Definition: hip_vector_types.h:1566
ushort2
Definition: hip_vector_types.h:1525
double1
Definition: hip_vector_types.h:1580
ushort3
Definition: hip_vector_types.h:1529
char8
Definition: hip_vector_types.h:1506
uint4
Definition: hip_vector_types.h:1540
int16
Definition: hip_vector_types.h:1535
long4
Definition: hip_vector_types.h:1547