HIP: Heterogenous-computing Interface for Portability
hip_vector_types.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
30 
31 #if defined(__HCC__) && (__hcc_workweek__ < 16032)
32 #error("This version of HIP requires a newer version of HCC.");
33 #endif
34 
36 
37 #if !defined(_MSC_VER)|| __clang__
38  #if __has_attribute(ext_vector_type)
39  #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n)))
40  #else
41  #define __NATIVE_VECTOR__(n, ...) [n]
42  #endif
43 
44 #if defined(__cplusplus)
45  #include <array>
46  #include <iosfwd>
47  #include <type_traits>
48 
49  namespace hip_impl {
50  template<typename, typename, unsigned int> struct Scalar_accessor;
51  } // Namespace hip_impl.
52 
53  namespace std {
54  template<typename T, typename U, unsigned int n>
55  struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
56  : is_integral<T> {};
57  template<typename T, typename U, unsigned int n>
58  struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
59  : is_floating_point<T> {};
60  } // Namespace std.
61 
62  namespace hip_impl {
63  template<typename T, typename Vector, unsigned int idx>
64  struct Scalar_accessor {
65  struct Address {
66  const Scalar_accessor* p;
67 
68  __host__ __device__
69  operator const T*() const noexcept {
70  return &reinterpret_cast<const T*>(p)[idx];
71  }
72  __host__ __device__
73  operator const T*() const volatile noexcept {
74  return &reinterpret_cast<const T*>(p)[idx];
75  }
76  __host__ __device__
77  operator T*() noexcept {
78  return &reinterpret_cast<T*>(
79  const_cast<Scalar_accessor*>(p))[idx];
80  }
81  __host__ __device__
82  operator T*() volatile noexcept {
83  return &reinterpret_cast<T*>(
84  const_cast<Scalar_accessor*>(p))[idx];
85  }
86  };
87 
88  friend
89  inline
90  std::ostream& operator<<(std::ostream& os,
91  const Scalar_accessor& x) noexcept {
92  return os << x.data[idx];
93  }
94  friend
95  inline
96  std::istream& operator>>(std::istream& is,
97  Scalar_accessor& x) noexcept {
98  T tmp;
99  is >> tmp;
100  x.data[idx] = tmp;
101 
102  return is;
103  }
104 
105  // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
106  Vector data;
107 
108  __host__ __device__
109  operator T() const noexcept { return data[idx]; }
110  __host__ __device__
111  operator T() const volatile noexcept { return data[idx]; }
112 
113 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
114  // The conversions to enum are fairly ghastly, but unfortunately used in
115  // some pre-existing, difficult to modify, code.
116  template<
117  typename U,
118  typename std::enable_if<
119  !std::is_same<U, T>{} &&
120  std::is_enum<U>{} &&
121  std::is_convertible<
122  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
123  __host__ __device__
124  operator U() const noexcept { return static_cast<U>(data[idx]); }
125  template<
126  typename U,
127  typename std::enable_if<
128  !std::is_same<U, T>{} &&
129  std::is_enum<U>{} &&
130  std::is_convertible<
131  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
132  __host__ __device__
133  operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
134 #endif
135 
136  __host__ __device__
137  operator T&() noexcept {
138  return reinterpret_cast<
139  T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
140  }
141  __host__ __device__
142  operator volatile T&() volatile noexcept {
143  return reinterpret_cast<
144  volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
145  }
146 
147  __host__ __device__
148  Address operator&() const noexcept { return Address{this}; }
149 
150  __host__ __device__
151  Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
152  data[idx] = x.data[idx];
153 
154  return *this;
155  }
156  __host__ __device__
157  Scalar_accessor& operator=(T x) noexcept {
158  data[idx] = x;
159 
160  return *this;
161  }
162  __host__ __device__
163  volatile Scalar_accessor& operator=(T x) volatile noexcept {
164  data[idx] = x;
165 
166  return *this;
167  }
168 
169  __host__ __device__
170  Scalar_accessor& operator++() noexcept {
171  ++data[idx];
172  return *this;
173  }
174  __host__ __device__
175  T operator++(int) noexcept {
176  auto r{data[idx]};
177  ++data[idx];
178  return *this;
179  }
180  __host__ __device__
181  Scalar_accessor& operator--() noexcept {
182  --data[idx];
183  return *this;
184  }
185  __host__ __device__
186  T operator--(int) noexcept {
187  auto r{data[idx]};
188  --data[idx];
189  return *this;
190  }
191 
192  // TODO: convertibility is too restrictive, constraint should be on
193  // the operator being invocable with a value of type U.
194  template<
195  typename U,
196  typename std::enable_if<
197  std::is_convertible<U, T>{}>::type* = nullptr>
198  __host__ __device__
199  Scalar_accessor& operator+=(U x) noexcept {
200  data[idx] += x;
201  return *this;
202  }
203  template<
204  typename U,
205  typename std::enable_if<
206  std::is_convertible<U, T>{}>::type* = nullptr>
207  __host__ __device__
208  Scalar_accessor& operator-=(U x) noexcept {
209  data[idx] -= x;
210  return *this;
211  }
212 
213  template<
214  typename U,
215  typename std::enable_if<
216  std::is_convertible<U, T>{}>::type* = nullptr>
217  __host__ __device__
218  Scalar_accessor& operator*=(U x) noexcept {
219  data[idx] *= x;
220  return *this;
221  }
222  template<
223  typename U,
224  typename std::enable_if<
225  std::is_convertible<U, T>{}>::type* = nullptr>
226  __host__ __device__
227  Scalar_accessor& operator/=(U x) noexcept {
228  data[idx] /= x;
229  return *this;
230  }
231  template<
232  typename U = T,
233  typename std::enable_if<std::is_convertible<U, T>{} &&
234  std::is_integral<U>{}>::type* = nullptr>
235  __host__ __device__
236  Scalar_accessor& operator%=(U x) noexcept {
237  data[idx] %= x;
238  return *this;
239  }
240 
241  template<
242  typename U = T,
243  typename std::enable_if<std::is_convertible<U, T>{} &&
244  std::is_integral<U>{}>::type* = nullptr>
245  __host__ __device__
246  Scalar_accessor& operator>>=(U x) noexcept {
247  data[idx] >>= x;
248  return *this;
249  }
250  template<
251  typename U = T,
252  typename std::enable_if<std::is_convertible<U, T>{} &&
253  std::is_integral<U>{}>::type* = nullptr>
254  __host__ __device__
255  Scalar_accessor& operator<<=(U x) noexcept {
256  data[idx] <<= x;
257  return *this;
258  }
259  template<
260  typename U = T,
261  typename std::enable_if<std::is_convertible<U, T>{} &&
262  std::is_integral<U>{}>::type* = nullptr>
263  __host__ __device__
264  Scalar_accessor& operator&=(U x) noexcept {
265  data[idx] &= x;
266  return *this;
267  }
268  template<
269  typename U = T,
270  typename std::enable_if<std::is_convertible<U, T>{} &&
271  std::is_integral<U>{}>::type* = nullptr>
272  __host__ __device__
273  Scalar_accessor& operator|=(U x) noexcept {
274  data[idx] |= x;
275  return *this;
276  }
277  template<
278  typename U = T,
279  typename std::enable_if<std::is_convertible<U, T>{} &&
280  std::is_integral<U>{}>::type* = nullptr>
281  __host__ __device__
282  Scalar_accessor& operator^=(U x) noexcept {
283  data[idx] ^= x;
284  return *this;
285  }
286  };
287  } // Namespace hip_impl.
288 
289  template<typename T, unsigned int n> struct HIP_vector_base;
290 
291  template<typename T>
292  struct HIP_vector_base<T, 1> {
293  using Native_vec_ = T __NATIVE_VECTOR__(1, T);
294 
295  union {
296  Native_vec_ data;
297 #if __HIP_CLANG_ONLY__
298  struct {
299  T x;
300  };
301 #else
302  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
303 #endif
304  };
305 
306  using value_type = T;
307 
308  __host__ __device__
309  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
310  #if __has_attribute(ext_vector_type)
311  data = x.data;
312  #else
313  data[0] = x.data[0];
314  #endif
315 
316  return *this;
317  }
318  };
319 
320  template<typename T>
321  struct HIP_vector_base<T, 2> {
322  using Native_vec_ = T __NATIVE_VECTOR__(2, T);
323 
324  union {
325  Native_vec_ data;
326 #if __HIP_CLANG_ONLY__
327  struct {
328  T x;
329  T y;
330  };
331 #else
332  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
333  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
334 #endif
335  };
336 
337  using value_type = T;
338 
339  __host__ __device__
340  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
341  #if __has_attribute(ext_vector_type)
342  data = x.data;
343  #else
344  data[0] = x.data[0];
345  data[1] = x.data[1];
346  #endif
347 
348  return *this;
349  }
350  };
351 
352  template<typename T>
353  struct HIP_vector_base<T, 3> {
354  struct Native_vec_ {
355  T d[3];
356 
357  __host__ __device__
358  constexpr
359  Native_vec_() = default;
360  __host__ __device__
361  explicit
362  constexpr
363  Native_vec_(T x) noexcept : d{x, x, x} {}
364  __host__ __device__
365  constexpr
366  Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
367  __host__ __device__
368  constexpr
369  Native_vec_(const Native_vec_&) = default;
370  __host__ __device__
371  constexpr
372  Native_vec_(Native_vec_&&) = default;
373  __host__ __device__
374  ~Native_vec_() = default;
375 
376  __host__ __device__
377  Native_vec_& operator=(const Native_vec_&) = default;
378  __host__ __device__
379  Native_vec_& operator=(Native_vec_&&) = default;
380 
381  __host__ __device__
382  T& operator[](unsigned int idx) noexcept { return d[idx]; }
383  __host__ __device__
384  T operator[](unsigned int idx) const noexcept { return d[idx]; }
385 
386  __host__ __device__
387  Native_vec_& operator+=(const Native_vec_& x) noexcept
388  {
389  for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
390  return *this;
391  }
392  __host__ __device__
393  Native_vec_& operator-=(const Native_vec_& x) noexcept
394  {
395  for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
396  return *this;
397  }
398 
399  __host__ __device__
400  Native_vec_& operator*=(const Native_vec_& x) noexcept
401  {
402  for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
403  return *this;
404  }
405  __host__ __device__
406  Native_vec_& operator/=(const Native_vec_& x) noexcept
407  {
408  for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
409  return *this;
410  }
411 
412  template<
413  typename U = T,
414  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
415  __host__ __device__
416  Native_vec_ operator-() const noexcept
417  {
418  auto r{*this};
419  for (auto&& x : r.d) x = -x;
420  return r;
421  }
422 
423  template<
424  typename U = T,
425  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
426  __host__ __device__
427  Native_vec_ operator~() const noexcept
428  {
429  auto r{*this};
430  for (auto&& x : r.d) x = ~x;
431  return r;
432  }
433  template<
434  typename U = T,
435  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
436  __host__ __device__
437  Native_vec_& operator%=(const Native_vec_& x) noexcept
438  {
439  for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
440  return *this;
441  }
442  template<
443  typename U = T,
444  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
445  __host__ __device__
446  Native_vec_& operator^=(const Native_vec_& x) noexcept
447  {
448  for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
449  return *this;
450  }
451  template<
452  typename U = T,
453  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
454  __host__ __device__
455  Native_vec_& operator|=(const Native_vec_& x) noexcept
456  {
457  for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
458  return *this;
459  }
460  template<
461  typename U = T,
462  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
463  __host__ __device__
464  Native_vec_& operator&=(const Native_vec_& x) noexcept
465  {
466  for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
467  return *this;
468  }
469  template<
470  typename U = T,
471  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
472  __host__ __device__
473  Native_vec_& operator>>=(const Native_vec_& x) noexcept
474  {
475  for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
476  return *this;
477  }
478  template<
479  typename U = T,
480  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
481  __host__ __device__
482  Native_vec_& operator<<=(const Native_vec_& x) noexcept
483  {
484  for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
485  return *this;
486  }
487 
488  using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
489  __host__ __device__
490  Vec3_cmp operator==(const Native_vec_& x) const noexcept
491  {
492  return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
493  }
494  };
495 
496  union {
497  Native_vec_ data;
498  struct {
499  T x;
500  T y;
501  T z;
502  };
503  };
504 
505  using value_type = T;
506  };
507 
508  template<typename T>
509  struct HIP_vector_base<T, 4> {
510  using Native_vec_ = T __NATIVE_VECTOR__(4, T);
511 
512  union {
513  Native_vec_ data;
514 #if __HIP_CLANG_ONLY__
515  struct {
516  T x;
517  T y;
518  T z;
519  T w;
520  };
521 #else
522  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
523  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
524  hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
525  hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
526 #endif
527  };
528 
529  using value_type = T;
530 
531  __host__ __device__
532  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
533  #if __has_attribute(ext_vector_type)
534  data = x.data;
535  #else
536  data[0] = x.data[0];
537  data[1] = x.data[1];
538  data[2] = x.data[2];
539  data[3] = x.data[3];
540  #endif
541 
542  return *this;
543  }
544  };
545 
546  template<typename T, unsigned int rank>
547  struct HIP_vector_type : public HIP_vector_base<T, rank> {
548  using HIP_vector_base<T, rank>::data;
549  using typename HIP_vector_base<T, rank>::Native_vec_;
550 
551  inline __host__ __device__
552  HIP_vector_type() = default;
553  template<
554  typename U,
555  typename std::enable_if<
556  std::is_convertible<U, T>{}>::type* = nullptr>
557  explicit inline __host__ __device__
558  HIP_vector_type(U x) noexcept
559  {
560  for (auto i = 0u; i != rank; ++i) data[i] = x;
561  }
562  template< // TODO: constrain based on type as well.
563  typename... Us,
564  typename std::enable_if<
565  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
566  inline __host__ __device__
567  HIP_vector_type(Us... xs) noexcept
568  {
569  #if __has_attribute(ext_vector_type)
570  new (&data) Native_vec_{static_cast<T>(xs)...};
571  #else
572  new (&data) std::array<T, rank>{static_cast<T>(xs)...};
573  #endif
574  }
575  inline __host__ __device__
576  HIP_vector_type(const HIP_vector_type&) = default;
577  inline __host__ __device__
578  HIP_vector_type(HIP_vector_type&&) = default;
579  inline __host__ __device__
580  ~HIP_vector_type() = default;
581 
582  inline __host__ __device__
583  HIP_vector_type& operator=(const HIP_vector_type&) = default;
584  inline __host__ __device__
585  HIP_vector_type& operator=(HIP_vector_type&&) = default;
586 
587  // Operators
588  inline __host__ __device__
589  HIP_vector_type& operator++() noexcept
590  {
591  return *this += HIP_vector_type{1};
592  }
593  inline __host__ __device__
594  HIP_vector_type operator++(int) noexcept
595  {
596  auto tmp(*this);
597  ++*this;
598  return tmp;
599  }
600 
601  inline __host__ __device__
602  HIP_vector_type& operator--() noexcept
603  {
604  return *this -= HIP_vector_type{1};
605  }
606  inline __host__ __device__
607  HIP_vector_type operator--(int) noexcept
608  {
609  auto tmp(*this);
610  --*this;
611  return tmp;
612  }
613 
614  inline __host__ __device__
615  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
616  {
617  data += x.data;
618  return *this;
619  }
620  template<
621  typename U,
622  typename std::enable_if<
623  std::is_convertible<U, T>{}>::type* = nullptr>
624  inline __host__ __device__
625  HIP_vector_type& operator+=(U x) noexcept
626  {
627  return *this += HIP_vector_type{x};
628  }
629 
630  inline __host__ __device__
631  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
632  {
633  data -= x.data;
634  return *this;
635  }
636  template<
637  typename U,
638  typename std::enable_if<
639  std::is_convertible<U, T>{}>::type* = nullptr>
640  inline __host__ __device__
641  HIP_vector_type& operator-=(U x) noexcept
642  {
643  return *this -= HIP_vector_type{x};
644  }
645 
646  inline __host__ __device__
647  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
648  {
649  data *= x.data;
650  return *this;
651  }
652  template<
653  typename U,
654  typename std::enable_if<
655  std::is_convertible<U, T>{}>::type* = nullptr>
656  inline __host__ __device__
657  HIP_vector_type& operator*=(U x) noexcept
658  {
659  return *this *= HIP_vector_type{x};
660  }
661 
662  inline __host__ __device__
663  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
664  {
665  data /= x.data;
666  return *this;
667  }
668  template<
669  typename U,
670  typename std::enable_if<
671  std::is_convertible<U, T>{}>::type* = nullptr>
672  inline __host__ __device__
673  HIP_vector_type& operator/=(U x) noexcept
674  {
675  return *this /= HIP_vector_type{x};
676  }
677 
678  template<
679  typename U = T,
680  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
681  inline __host__ __device__
682  HIP_vector_type operator-() noexcept
683  {
684  auto tmp(*this);
685  tmp.data = -tmp.data;
686  return tmp;
687  }
688 
689  template<
690  typename U = T,
691  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
692  inline __host__ __device__
693  HIP_vector_type operator~() noexcept
694  {
695  HIP_vector_type r{*this};
696  r.data = ~r.data;
697  return r;
698  }
699 
700  template<
701  typename U = T,
702  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
703  inline __host__ __device__
704  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
705  {
706  data %= x.data;
707  return *this;
708  }
709 
710  template<
711  typename U = T,
712  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
713  inline __host__ __device__
714  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
715  {
716  data ^= x.data;
717  return *this;
718  }
719 
720  template<
721  typename U = T,
722  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
723  inline __host__ __device__
724  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
725  {
726  data |= x.data;
727  return *this;
728  }
729 
730  template<
731  typename U = T,
732  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
733  inline __host__ __device__
734  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
735  {
736  data &= x.data;
737  return *this;
738  }
739 
740  template<
741  typename U = T,
742  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
743  inline __host__ __device__
744  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
745  {
746  data >>= x.data;
747  return *this;
748  }
749 
750  template<
751  typename U = T,
752  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
753  inline __host__ __device__
754  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
755  {
756  data <<= x.data;
757  return *this;
758  }
759  };
760 
761  template<typename T, unsigned int n>
762  inline __host__ __device__
763  HIP_vector_type<T, n> operator+(
764  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
765  {
766  return HIP_vector_type<T, n>{x} += y;
767  }
768  template<typename T, unsigned int n, typename U>
769  inline __host__ __device__
770  HIP_vector_type<T, n> operator+(
771  const HIP_vector_type<T, n>& x, U y) noexcept
772  {
773  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
774  }
775  template<typename T, unsigned int n, typename U>
776  inline __host__ __device__
777  HIP_vector_type<T, n> operator+(
778  U x, const HIP_vector_type<T, n>& y) noexcept
779  {
780  return HIP_vector_type<T, n>{x} += y;
781  }
782 
783  template<typename T, unsigned int n>
784  inline __host__ __device__
785  HIP_vector_type<T, n> operator-(
786  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
787  {
788  return HIP_vector_type<T, n>{x} -= y;
789  }
790  template<typename T, unsigned int n, typename U>
791  inline __host__ __device__
792  HIP_vector_type<T, n> operator-(
793  const HIP_vector_type<T, n>& x, U y) noexcept
794  {
795  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
796  }
797  template<typename T, unsigned int n, typename U>
798  inline __host__ __device__
799  HIP_vector_type<T, n> operator-(
800  U x, const HIP_vector_type<T, n>& y) noexcept
801  {
802  return HIP_vector_type<T, n>{x} -= y;
803  }
804 
805  template<typename T, unsigned int n>
806  inline __host__ __device__
807  HIP_vector_type<T, n> operator*(
808  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
809  {
810  return HIP_vector_type<T, n>{x} *= y;
811  }
812  template<typename T, unsigned int n, typename U>
813  inline __host__ __device__
814  HIP_vector_type<T, n> operator*(
815  const HIP_vector_type<T, n>& x, U y) noexcept
816  {
817  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
818  }
819  template<typename T, unsigned int n, typename U>
820  inline __host__ __device__
821  HIP_vector_type<T, n> operator*(
822  U x, const HIP_vector_type<T, n>& y) noexcept
823  {
824  return HIP_vector_type<T, n>{x} *= y;
825  }
826 
827  template<typename T, unsigned int n>
828  inline __host__ __device__
829  HIP_vector_type<T, n> operator/(
830  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
831  {
832  return HIP_vector_type<T, n>{x} /= y;
833  }
834  template<typename T, unsigned int n, typename U>
835  inline __host__ __device__
836  HIP_vector_type<T, n> operator/(
837  const HIP_vector_type<T, n>& x, U y) noexcept
838  {
839  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
840  }
841  template<typename T, unsigned int n, typename U>
842  inline __host__ __device__
843  HIP_vector_type<T, n> operator/(
844  U x, const HIP_vector_type<T, n>& y) noexcept
845  {
846  return HIP_vector_type<T, n>{x} /= y;
847  }
848 
849  template<typename T, unsigned int n>
850  inline __host__ __device__
851  bool operator==(
852  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
853  {
854  auto tmp = x.data == y.data;
855  for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
856  return true;
857  }
858  template<typename T, unsigned int n, typename U>
859  inline __host__ __device__
860  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
861  {
862  return x == HIP_vector_type<T, n>{y};
863  }
864  template<typename T, unsigned int n, typename U>
865  inline __host__ __device__
866  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
867  {
868  return HIP_vector_type<T, n>{x} == y;
869  }
870 
871  template<typename T, unsigned int n>
872  inline __host__ __device__
873  bool operator!=(
874  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
875  {
876  return !(x == y);
877  }
878  template<typename T, unsigned int n, typename U>
879  inline __host__ __device__
880  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
881  {
882  return !(x == y);
883  }
884  template<typename T, unsigned int n, typename U>
885  inline __host__ __device__
886  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
887  {
888  return !(x == y);
889  }
890 
891  template<
892  typename T,
893  unsigned int n,
894  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
895  inline __host__ __device__
896  HIP_vector_type<T, n> operator%(
897  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
898  {
899  return HIP_vector_type<T, n>{x} %= y;
900  }
901  template<
902  typename T,
903  unsigned int n,
904  typename U,
905  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
906  inline __host__ __device__
907  HIP_vector_type<T, n> operator%(
908  const HIP_vector_type<T, n>& x, U y) noexcept
909  {
910  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
911  }
912  template<
913  typename T,
914  unsigned int n,
915  typename U,
916  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
917  inline __host__ __device__
918  HIP_vector_type<T, n> operator%(
919  U x, const HIP_vector_type<T, n>& y) noexcept
920  {
921  return HIP_vector_type<T, n>{x} %= y;
922  }
923 
924  template<
925  typename T,
926  unsigned int n,
927  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
928  inline __host__ __device__
929  HIP_vector_type<T, n> operator^(
930  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
931  {
932  return HIP_vector_type<T, n>{x} ^= y;
933  }
934  template<
935  typename T,
936  unsigned int n,
937  typename U,
938  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
939  inline __host__ __device__
940  HIP_vector_type<T, n> operator^(
941  const HIP_vector_type<T, n>& x, U y) noexcept
942  {
943  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
944  }
945  template<
946  typename T,
947  unsigned int n,
948  typename U,
949  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
950  inline __host__ __device__
951  HIP_vector_type<T, n> operator^(
952  U x, const HIP_vector_type<T, n>& y) noexcept
953  {
954  return HIP_vector_type<T, n>{x} ^= y;
955  }
956 
957  template<
958  typename T,
959  unsigned int n,
960  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
961  inline __host__ __device__
962  HIP_vector_type<T, n> operator|(
963  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
964  {
965  return HIP_vector_type<T, n>{x} |= y;
966  }
967  template<
968  typename T,
969  unsigned int n,
970  typename U,
971  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
972  inline __host__ __device__
973  HIP_vector_type<T, n> operator|(
974  const HIP_vector_type<T, n>& x, U y) noexcept
975  {
976  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
977  }
978  template<
979  typename T,
980  unsigned int n,
981  typename U,
982  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
983  inline __host__ __device__
984  HIP_vector_type<T, n> operator|(
985  U x, const HIP_vector_type<T, n>& y) noexcept
986  {
987  return HIP_vector_type<T, n>{x} |= y;
988  }
989 
990  template<
991  typename T,
992  unsigned int n,
993  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
994  inline __host__ __device__
995  HIP_vector_type<T, n> operator&(
996  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
997  {
998  return HIP_vector_type<T, n>{x} &= y;
999  }
1000  template<
1001  typename T,
1002  unsigned int n,
1003  typename U,
1004  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1005  inline __host__ __device__
1006  HIP_vector_type<T, n> operator&(
1007  const HIP_vector_type<T, n>& x, U y) noexcept
1008  {
1009  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1010  }
1011  template<
1012  typename T,
1013  unsigned int n,
1014  typename U,
1015  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1016  inline __host__ __device__
1017  HIP_vector_type<T, n> operator&(
1018  U x, const HIP_vector_type<T, n>& y) noexcept
1019  {
1020  return HIP_vector_type<T, n>{x} &= y;
1021  }
1022 
1023  template<
1024  typename T,
1025  unsigned int n,
1026  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1027  inline __host__ __device__
1028  HIP_vector_type<T, n> operator>>(
1029  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1030  {
1031  return HIP_vector_type<T, n>{x} >>= y;
1032  }
1033  template<
1034  typename T,
1035  unsigned int n,
1036  typename U,
1037  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1038  inline __host__ __device__
1039  HIP_vector_type<T, n> operator>>(
1040  const HIP_vector_type<T, n>& x, U y) noexcept
1041  {
1042  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1043  }
1044  template<
1045  typename T,
1046  unsigned int n,
1047  typename U,
1048  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1049  inline __host__ __device__
1050  HIP_vector_type<T, n> operator>>(
1051  U x, const HIP_vector_type<T, n>& y) noexcept
1052  {
1053  return HIP_vector_type<T, n>{x} >>= y;
1054  }
1055 
1056  template<
1057  typename T,
1058  unsigned int n,
1059  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1060  inline __host__ __device__
1061  HIP_vector_type<T, n> operator<<(
1062  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1063  {
1064  return HIP_vector_type<T, n>{x} <<= y;
1065  }
1066  template<
1067  typename T,
1068  unsigned int n,
1069  typename U,
1070  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1071  inline __host__ __device__
1072  HIP_vector_type<T, n> operator<<(
1073  const HIP_vector_type<T, n>& x, U y) noexcept
1074  {
1075  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1076  }
1077  template<
1078  typename T,
1079  unsigned int n,
1080  typename U,
1081  typename std::enable_if<std::is_arithmetic<U>::value>::type,
1082  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1083  inline __host__ __device__
1084  HIP_vector_type<T, n> operator<<(
1085  U x, const HIP_vector_type<T, n>& y) noexcept
1086  {
1087  return HIP_vector_type<T, n>{x} <<= y;
1088  }
1089 
1090  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1091  using CUDA_name##1 = HIP_vector_type<T, 1>;\
1092  using CUDA_name##2 = HIP_vector_type<T, 2>;\
1093  using CUDA_name##3 = HIP_vector_type<T, 3>;\
1094  using CUDA_name##4 = HIP_vector_type<T, 4>;
1095 #else
1096  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1097  typedef struct {\
1098  T x;\
1099  } CUDA_name##1;\
1100  typedef struct {\
1101  T x;\
1102  T y;\
1103  } CUDA_name##2;\
1104  typedef struct {\
1105  T x;\
1106  T y;\
1107  T z;\
1108  } CUDA_name##3;\
1109  typedef struct {\
1110  T x;\
1111  T y;\
1112  T z;\
1113  T w;\
1114  } CUDA_name##4;
1115 #endif
1116 
1117 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1118 __MAKE_VECTOR_TYPE__(char, char);
1119 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1120 __MAKE_VECTOR_TYPE__(short, short);
1121 __MAKE_VECTOR_TYPE__(uint, unsigned int);
1122 __MAKE_VECTOR_TYPE__(int, int);
1123 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1124 __MAKE_VECTOR_TYPE__(long, long);
1125 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1126 __MAKE_VECTOR_TYPE__(longlong, long long);
1127 __MAKE_VECTOR_TYPE__(float, float);
1128 __MAKE_VECTOR_TYPE__(double, double);
1129 
1130 #ifdef __cplusplus
1131 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1132  static inline __device__ __host__ \
1133  type make_##type(comp x) { type r{x}; return r; }
1134 
1135 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1136  static inline __device__ __host__ \
1137  type make_##type(comp x, comp y) { type r{x, y}; return r; }
1138 
1139 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1140  static inline __device__ __host__ \
1141  type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1142 
1143 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1144  static inline __device__ __host__ \
1145  type make_##type(comp x, comp y, comp z, comp w) { \
1146  type r{x, y, z, w}; \
1147  return r; \
1148  }
1149 #else
1150  #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1151  static inline __device__ __host__ \
1152  type make_##type(comp x) { type r; r.x =x; return r; }
1153 
1154  #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1155  static inline __device__ __host__ \
1156  type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1157 
1158  #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1159  static inline __device__ __host__ \
1160  type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1161 
1162  #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1163  static inline __device__ __host__ \
1164  type make_##type(comp x, comp y, comp z, comp w) { \
1165  type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1166  return r; \
1167  }
1168 #endif
1169 
1170 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
1171 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
1172 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
1173 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
1174 
1175 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
1176 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
1177 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
1178 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
1179 
1180 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
1181 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
1182 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
1183 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
1184 
1185 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
1186 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
1187 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
1188 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
1189 
1190 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
1191 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
1192 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
1193 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
1194 
1195 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
1196 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
1197 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
1198 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
1199 
1200 DECLOP_MAKE_ONE_COMPONENT(float, float1);
1201 DECLOP_MAKE_TWO_COMPONENT(float, float2);
1202 DECLOP_MAKE_THREE_COMPONENT(float, float3);
1203 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
1204 
1205 DECLOP_MAKE_ONE_COMPONENT(double, double1);
1206 DECLOP_MAKE_TWO_COMPONENT(double, double2);
1207 DECLOP_MAKE_THREE_COMPONENT(double, double3);
1208 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
1209 
1210 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
1211 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
1212 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
1213 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
1214 
1215 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
1216 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
1217 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
1218 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
1219 
1220 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
1221 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
1222 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
1223 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
1224 
1225 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
1226 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
1227 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
1228 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
1229 #else // defined(_MSC_VER)
1230 #include <mmintrin.h>
1231 #include <xmmintrin.h>
1232 #include <emmintrin.h>
1233 #include <immintrin.h>
1234 
1235 typedef union { char data; } char1;
1236 typedef union { char data[2]; } char2;
1237 typedef union { char data[4]; } char4;
1238 typedef union { char4 data; } char3;
1239 typedef union { __m64 data; } char8;
1240 typedef union { __m128i data; } char16;
1241 
1242 typedef union { unsigned char data; } uchar1;
1243 typedef union { unsigned char data[2]; } uchar2;
1244 typedef union { unsigned char data[4]; } uchar4;
1245 typedef union { uchar4 data; } uchar3;
1246 typedef union { __m64 data; } uchar8;
1247 typedef union { __m128i data; } uchar16;
1248 
1249 typedef union { short data; } short1;
1250 typedef union { short data[2]; } short2;
1251 typedef union { __m64 data; } short4;
1252 typedef union { short4 data; } short3;
1253 typedef union { __m128i data; } short8;
1254 typedef union { __m128i data[2]; } short16;
1255 
1256 typedef union { unsigned short data; } ushort1;
1257 typedef union { unsigned short data[2]; } ushort2;
1258 typedef union { __m64 data; } ushort4;
1259 typedef union { ushort4 data; } ushort3;
1260 typedef union { __m128i data; } ushort8;
1261 typedef union { __m128i data[2]; } ushort16;
1262 
1263 typedef union { int data; } int1;
1264 typedef union { __m64 data; } int2;
1265 typedef union { __m128i data; } int4;
1266 typedef union { int4 data; } int3;
1267 typedef union { __m128i data[2]; } int8;
1268 typedef union { __m128i data[4];} int16;
1269 
1270 typedef union { unsigned int data; } uint1;
1271 typedef union { __m64 data; } uint2;
1272 typedef union { __m128i data; } uint4;
1273 typedef union { uint4 data; } uint3;
1274 typedef union { __m128i data[2]; } uint8;
1275 typedef union { __m128i data[4]; } uint16;
1276 
1277 #if !defined(_WIN64)
1278 typedef union { int data; } long1;
1279 typedef union { __m64 data; } long2;
1280 typedef union { __m128i data; } long4;
1281 typedef union { long4 data; } long3;
1282 typedef union { __m128i data[2]; } long8;
1283 typedef union { __m128i data[4]; } long16;
1284 
1285 typedef union { unsigned int data; } ulong1;
1286 typedef union { __m64 data; } ulong2;
1287 typedef union { __m128i data; } ulong4;
1288 typedef union { ulong4 data; } ulong3;
1289 typedef union { __m128i data[2]; } ulong8;
1290 typedef union { __m128i data[4]; } ulong16;
1291 #else // defined(_WIN64)
1292 typedef union { __m64 data; } long1;
1293 typedef union { __m128i data; } long2;
1294 typedef union { __m128i data[2]; } long4;
1295 typedef union { long4 data; } long3;
1296 typedef union { __m128i data[4]; } long8;
1297 typedef union { __m128i data[8]; } long16;
1298 
1299 typedef union { __m64 data; } ulong1;
1300 typedef union { __m128i data; } ulong2;
1301 typedef union { __m128i data[2]; } ulong4;
1302 typedef union { ulong4 data; } ulong3;
1303 typedef union { __m128i data[4]; } ulong8;
1304 typedef union { __m128i data[8]; } ulong16;
1305 #endif // defined(_WIN64)
1306 
1307 typedef union { __m64 data; } longlong1;
1308 typedef union { __m128i data; } longlong2;
1309 typedef union { __m128i data[2]; } longlong4;
1310 typedef union { longlong4 data; } longlong3;
1311 typedef union { __m128i data[4]; } longlong8;
1312 typedef union { __m128i data[8]; } longlong16;
1313 
1314 typedef union { __m64 data; } ulonglong1;
1315 typedef union { __m128i data; } ulonglong2;
1316 typedef union { __m128i data[2]; } ulonglong4;
1317 typedef union { ulonglong4 data; } ulonglong3;
1318 typedef union { __m128i data[4]; } ulonglong8;
1319 typedef union { __m128i data[8]; } ulonglong16;
1320 
1321 typedef union { float data; } float1;
1322 typedef union { __m64 data; } float2;
1323 typedef union { __m128 data; } float4;
1324 typedef union { float4 data; } float3;
1325 typedef union { __m256 data; } float8;
1326 typedef union { __m256 data[2]; } float16;
1327 
1328 typedef union { double data; } double1;
1329 typedef union { __m128d data; } double2;
1330 typedef union { __m256d data; } double4;
1331 typedef union { double4 data; } double3;
1332 typedef union { __m256d data[2]; } double8;
1333 typedef union { __m256d data[4]; } double16;
1334 
1335 #endif // defined(_MSC_VER)
1336 #endif
TODO-doc.
#define __host__
Definition: host_defines.h:41
Definition: hip_runtime.h:202