HIP: Heterogenous-computing Interface for Portability
hip_vector_types.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
30 
31 #if defined(__HCC__) && (__hcc_workweek__ < 16032)
32 #error("This version of HIP requires a newer version of HCC.");
33 #endif
34 
36 
37 #if !defined(_MSC_VER) || __clang__
38  #if __has_attribute(ext_vector_type)
39  #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
40  #else
41  #define __NATIVE_VECTOR__(n, T) T[n]
42  #endif
43 
44 #if defined(__cplusplus)
45  #include <array>
46  #include <iosfwd>
47  #include <type_traits>
48 
49  namespace hip_impl {
50  template<typename, typename, unsigned int> struct Scalar_accessor;
51  } // Namespace hip_impl.
52 
53  namespace std {
54  template<typename T, typename U, unsigned int n>
55  struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
56  : is_integral<T> {};
57  template<typename T, typename U, unsigned int n>
58  struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
59  : is_floating_point<T> {};
60  } // Namespace std.
61 
62  namespace hip_impl {
63  template<typename T, typename Vector, unsigned int idx>
64  struct Scalar_accessor {
65  struct Address {
66  const Scalar_accessor* p;
67 
68  __host__ __device__
69  operator const T*() const noexcept {
70  return &reinterpret_cast<const T*>(p)[idx];
71  }
72  __host__ __device__
73  operator const T*() const volatile noexcept {
74  return &reinterpret_cast<const T*>(p)[idx];
75  }
76  __host__ __device__
77  operator T*() noexcept {
78  return &reinterpret_cast<T*>(
79  const_cast<Scalar_accessor*>(p))[idx];
80  }
81  __host__ __device__
82  operator T*() volatile noexcept {
83  return &reinterpret_cast<T*>(
84  const_cast<Scalar_accessor*>(p))[idx];
85  }
86  };
87 
88  friend
89  inline
90  std::ostream& operator<<(std::ostream& os,
91  const Scalar_accessor& x) noexcept {
92  return os << x.data[idx];
93  }
94  friend
95  inline
96  std::istream& operator>>(std::istream& is,
97  Scalar_accessor& x) noexcept {
98  T tmp;
99  is >> tmp;
100  x.data[idx] = tmp;
101 
102  return is;
103  }
104 
105  // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
106  Vector data;
107 
108  __host__ __device__
109  operator T() const noexcept { return data[idx]; }
110  __host__ __device__
111  operator T() const volatile noexcept { return data[idx]; }
112 
113 #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
114  // The conversions to enum are fairly ghastly, but unfortunately used in
115  // some pre-existing, difficult to modify, code.
116  template<
117  typename U,
118  typename std::enable_if<
119  !std::is_same<U, T>{} &&
120  std::is_enum<U>{} &&
121  std::is_convertible<
122  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
123  __host__ __device__
124  operator U() const noexcept { return static_cast<U>(data[idx]); }
125  template<
126  typename U,
127  typename std::enable_if<
128  !std::is_same<U, T>{} &&
129  std::is_enum<U>{} &&
130  std::is_convertible<
131  T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
132  __host__ __device__
133  operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
134 #endif
135 
136  __host__ __device__
137  operator T&() noexcept {
138  return reinterpret_cast<
139  T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
140  }
141  __host__ __device__
142  operator volatile T&() volatile noexcept {
143  return reinterpret_cast<
144  volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
145  }
146 
147  __host__ __device__
148  Address operator&() const noexcept { return Address{this}; }
149 
150  __host__ __device__
151  Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
152  data[idx] = x.data[idx];
153 
154  return *this;
155  }
156  __host__ __device__
157  Scalar_accessor& operator=(T x) noexcept {
158  data[idx] = x;
159 
160  return *this;
161  }
162  __host__ __device__
163  volatile Scalar_accessor& operator=(T x) volatile noexcept {
164  data[idx] = x;
165 
166  return *this;
167  }
168 
169  __host__ __device__
170  Scalar_accessor& operator++() noexcept {
171  ++data[idx];
172  return *this;
173  }
174  __host__ __device__
175  T operator++(int) noexcept {
176  auto r{data[idx]};
177  ++data[idx];
178  return *this;
179  }
180  __host__ __device__
181  Scalar_accessor& operator--() noexcept {
182  --data[idx];
183  return *this;
184  }
185  __host__ __device__
186  T operator--(int) noexcept {
187  auto r{data[idx]};
188  --data[idx];
189  return *this;
190  }
191 
192  // TODO: convertibility is too restrictive, constraint should be on
193  // the operator being invocable with a value of type U.
194  template<
195  typename U,
196  typename std::enable_if<
197  std::is_convertible<U, T>{}>::type* = nullptr>
198  __host__ __device__
199  Scalar_accessor& operator+=(U x) noexcept {
200  data[idx] += x;
201  return *this;
202  }
203  template<
204  typename U,
205  typename std::enable_if<
206  std::is_convertible<U, T>{}>::type* = nullptr>
207  __host__ __device__
208  Scalar_accessor& operator-=(U x) noexcept {
209  data[idx] -= x;
210  return *this;
211  }
212 
213  template<
214  typename U,
215  typename std::enable_if<
216  std::is_convertible<U, T>{}>::type* = nullptr>
217  __host__ __device__
218  Scalar_accessor& operator*=(U x) noexcept {
219  data[idx] *= x;
220  return *this;
221  }
222  template<
223  typename U,
224  typename std::enable_if<
225  std::is_convertible<U, T>{}>::type* = nullptr>
226  __host__ __device__
227  Scalar_accessor& operator/=(U x) noexcept {
228  data[idx] /= x;
229  return *this;
230  }
231  template<
232  typename U = T,
233  typename std::enable_if<std::is_convertible<U, T>{} &&
234  std::is_integral<U>{}>::type* = nullptr>
235  __host__ __device__
236  Scalar_accessor& operator%=(U x) noexcept {
237  data[idx] %= x;
238  return *this;
239  }
240 
241  template<
242  typename U = T,
243  typename std::enable_if<std::is_convertible<U, T>{} &&
244  std::is_integral<U>{}>::type* = nullptr>
245  __host__ __device__
246  Scalar_accessor& operator>>=(U x) noexcept {
247  data[idx] >>= x;
248  return *this;
249  }
250  template<
251  typename U = T,
252  typename std::enable_if<std::is_convertible<U, T>{} &&
253  std::is_integral<U>{}>::type* = nullptr>
254  __host__ __device__
255  Scalar_accessor& operator<<=(U x) noexcept {
256  data[idx] <<= x;
257  return *this;
258  }
259  template<
260  typename U = T,
261  typename std::enable_if<std::is_convertible<U, T>{} &&
262  std::is_integral<U>{}>::type* = nullptr>
263  __host__ __device__
264  Scalar_accessor& operator&=(U x) noexcept {
265  data[idx] &= x;
266  return *this;
267  }
268  template<
269  typename U = T,
270  typename std::enable_if<std::is_convertible<U, T>{} &&
271  std::is_integral<U>{}>::type* = nullptr>
272  __host__ __device__
273  Scalar_accessor& operator|=(U x) noexcept {
274  data[idx] |= x;
275  return *this;
276  }
277  template<
278  typename U = T,
279  typename std::enable_if<std::is_convertible<U, T>{} &&
280  std::is_integral<U>{}>::type* = nullptr>
281  __host__ __device__
282  Scalar_accessor& operator^=(U x) noexcept {
283  data[idx] ^= x;
284  return *this;
285  }
286  };
287 
288  inline
289  constexpr
290  unsigned int next_pot(unsigned int x) {
291  // Precondition: x > 1.
292  return 1u << (32u - __builtin_clz(x - 1u));
293  }
294  } // Namespace hip_impl.
295 
296  template<typename T, unsigned int n> struct HIP_vector_base;
297 
298  template<typename T>
299  struct HIP_vector_base<T, 1> {
300  using Native_vec_ = __NATIVE_VECTOR__(1, T);
301 
302  union {
303  Native_vec_ data;
304 #if __HIP_CLANG_ONLY__
305  struct {
306  T x;
307  };
308 #else
309  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
310 #endif
311  };
312 
313  using value_type = T;
314 
315  __host__ __device__
316  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
317  #if __has_attribute(ext_vector_type)
318  data = x.data;
319  #else
320  data[0] = x.data[0];
321  #endif
322 
323  return *this;
324  }
325  };
326 
327  template<typename T>
328  struct HIP_vector_base<T, 2> {
329  using Native_vec_ = __NATIVE_VECTOR__(2, T);
330 
331  union
332  #if !__has_attribute(ext_vector_type)
333  alignas(hip_impl::next_pot(2 * sizeof(T)))
334  #endif
335  {
336  Native_vec_ data;
337 #if __HIP_CLANG_ONLY__
338  struct {
339  T x;
340  T y;
341  };
342 #else
343  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
344  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
345 #endif
346  };
347 
348  using value_type = T;
349 
350  __host__ __device__
351  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
352  #if __has_attribute(ext_vector_type)
353  data = x.data;
354  #else
355  data[0] = x.data[0];
356  data[1] = x.data[1];
357  #endif
358 
359  return *this;
360  }
361  };
362 
363  template<typename T>
364  struct HIP_vector_base<T, 3> {
365  struct Native_vec_ {
366  T d[3];
367 
368  __host__ __device__
369  constexpr
370  Native_vec_() = default;
371  __host__ __device__
372  explicit
373  constexpr
374  Native_vec_(T x) noexcept : d{x, x, x} {}
375  __host__ __device__
376  constexpr
377  Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
378  __host__ __device__
379  constexpr
380  Native_vec_(const Native_vec_&) = default;
381  __host__ __device__
382  constexpr
383  Native_vec_(Native_vec_&&) = default;
384  __host__ __device__
385  ~Native_vec_() = default;
386 
387  __host__ __device__
388  Native_vec_& operator=(const Native_vec_&) = default;
389  __host__ __device__
390  Native_vec_& operator=(Native_vec_&&) = default;
391 
392  __host__ __device__
393  T& operator[](unsigned int idx) noexcept { return d[idx]; }
394  __host__ __device__
395  T operator[](unsigned int idx) const noexcept { return d[idx]; }
396 
397  __host__ __device__
398  Native_vec_& operator+=(const Native_vec_& x) noexcept
399  {
400  for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
401  return *this;
402  }
403  __host__ __device__
404  Native_vec_& operator-=(const Native_vec_& x) noexcept
405  {
406  for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
407  return *this;
408  }
409 
410  __host__ __device__
411  Native_vec_& operator*=(const Native_vec_& x) noexcept
412  {
413  for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
414  return *this;
415  }
416  __host__ __device__
417  Native_vec_& operator/=(const Native_vec_& x) noexcept
418  {
419  for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
420  return *this;
421  }
422 
423  template<
424  typename U = T,
425  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
426  __host__ __device__
427  Native_vec_ operator-() const noexcept
428  {
429  auto r{*this};
430  for (auto&& x : r.d) x = -x;
431  return r;
432  }
433 
434  template<
435  typename U = T,
436  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
437  __host__ __device__
438  Native_vec_ operator~() const noexcept
439  {
440  auto r{*this};
441  for (auto&& x : r.d) x = ~x;
442  return r;
443  }
444  template<
445  typename U = T,
446  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
447  __host__ __device__
448  Native_vec_& operator%=(const Native_vec_& x) noexcept
449  {
450  for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
451  return *this;
452  }
453  template<
454  typename U = T,
455  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
456  __host__ __device__
457  Native_vec_& operator^=(const Native_vec_& x) noexcept
458  {
459  for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
460  return *this;
461  }
462  template<
463  typename U = T,
464  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
465  __host__ __device__
466  Native_vec_& operator|=(const Native_vec_& x) noexcept
467  {
468  for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
469  return *this;
470  }
471  template<
472  typename U = T,
473  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
474  __host__ __device__
475  Native_vec_& operator&=(const Native_vec_& x) noexcept
476  {
477  for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
478  return *this;
479  }
480  template<
481  typename U = T,
482  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
483  __host__ __device__
484  Native_vec_& operator>>=(const Native_vec_& x) noexcept
485  {
486  for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
487  return *this;
488  }
489  template<
490  typename U = T,
491  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
492  __host__ __device__
493  Native_vec_& operator<<=(const Native_vec_& x) noexcept
494  {
495  for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
496  return *this;
497  }
498 
499  using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
500  __host__ __device__
501  Vec3_cmp operator==(const Native_vec_& x) const noexcept
502  {
503  return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
504  }
505  };
506 
507  union {
508  Native_vec_ data;
509  struct {
510  T x;
511  T y;
512  T z;
513  };
514  };
515 
516  using value_type = T;
517  };
518 
519  template<typename T>
520  struct HIP_vector_base<T, 4> {
521  using Native_vec_ = __NATIVE_VECTOR__(4, T);
522 
523  union
524  #if !__has_attribute(ext_vector_type)
525  alignas(hip_impl::next_pot(4 * sizeof(T)))
526  #endif
527  {
528  Native_vec_ data;
529 #if __HIP_CLANG_ONLY__
530  struct {
531  T x;
532  T y;
533  T z;
534  T w;
535  };
536 #else
537  hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
538  hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
539  hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
540  hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
541 #endif
542  };
543 
544  using value_type = T;
545 
546  __host__ __device__
547  HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
548  #if __has_attribute(ext_vector_type)
549  data = x.data;
550  #else
551  data[0] = x.data[0];
552  data[1] = x.data[1];
553  data[2] = x.data[2];
554  data[3] = x.data[3];
555  #endif
556 
557  return *this;
558  }
559  };
560 
561  template<typename T, unsigned int rank>
562  struct HIP_vector_type : public HIP_vector_base<T, rank> {
563  using HIP_vector_base<T, rank>::data;
564  using typename HIP_vector_base<T, rank>::Native_vec_;
565 
566  inline __host__ __device__
567  HIP_vector_type() = default;
568  template<
569  typename U,
570  typename std::enable_if<
571  std::is_convertible<U, T>{}>::type* = nullptr>
572  explicit inline __host__ __device__
573  HIP_vector_type(U x) noexcept
574  {
575  for (auto i = 0u; i != rank; ++i) data[i] = x;
576  }
577  template< // TODO: constrain based on type as well.
578  typename... Us,
579  typename std::enable_if<
580  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
581  inline __host__ __device__
582  HIP_vector_type(Us... xs) noexcept
583  {
584  #if __has_attribute(ext_vector_type)
585  new (&data) Native_vec_{static_cast<T>(xs)...};
586  #else
587  new (&data) std::array<T, rank>{static_cast<T>(xs)...};
588  #endif
589  }
590  inline __host__ __device__
591  HIP_vector_type(const HIP_vector_type&) = default;
592  inline __host__ __device__
593  HIP_vector_type(HIP_vector_type&&) = default;
594  inline __host__ __device__
595  ~HIP_vector_type() = default;
596 
597  inline __host__ __device__
598  HIP_vector_type& operator=(const HIP_vector_type&) = default;
599  inline __host__ __device__
600  HIP_vector_type& operator=(HIP_vector_type&&) = default;
601 
602  // Operators
603  inline __host__ __device__
604  HIP_vector_type& operator++() noexcept
605  {
606  return *this += HIP_vector_type{1};
607  }
608  inline __host__ __device__
609  HIP_vector_type operator++(int) noexcept
610  {
611  auto tmp(*this);
612  ++*this;
613  return tmp;
614  }
615 
616  inline __host__ __device__
617  HIP_vector_type& operator--() noexcept
618  {
619  return *this -= HIP_vector_type{1};
620  }
621  inline __host__ __device__
622  HIP_vector_type operator--(int) noexcept
623  {
624  auto tmp(*this);
625  --*this;
626  return tmp;
627  }
628 
629  inline __host__ __device__
630  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
631  {
632  data += x.data;
633  return *this;
634  }
635  template<
636  typename U,
637  typename std::enable_if<
638  std::is_convertible<U, T>{}>::type* = nullptr>
639  inline __host__ __device__
640  HIP_vector_type& operator+=(U x) noexcept
641  {
642  return *this += HIP_vector_type{x};
643  }
644 
645  inline __host__ __device__
646  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
647  {
648  data -= x.data;
649  return *this;
650  }
651  template<
652  typename U,
653  typename std::enable_if<
654  std::is_convertible<U, T>{}>::type* = nullptr>
655  inline __host__ __device__
656  HIP_vector_type& operator-=(U x) noexcept
657  {
658  return *this -= HIP_vector_type{x};
659  }
660 
661  inline __host__ __device__
662  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
663  {
664  data *= x.data;
665  return *this;
666  }
667  template<
668  typename U,
669  typename std::enable_if<
670  std::is_convertible<U, T>{}>::type* = nullptr>
671  inline __host__ __device__
672  HIP_vector_type& operator*=(U x) noexcept
673  {
674  return *this *= HIP_vector_type{x};
675  }
676 
677  inline __host__ __device__
678  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
679  {
680  data /= x.data;
681  return *this;
682  }
683  template<
684  typename U,
685  typename std::enable_if<
686  std::is_convertible<U, T>{}>::type* = nullptr>
687  inline __host__ __device__
688  HIP_vector_type& operator/=(U x) noexcept
689  {
690  return *this /= HIP_vector_type{x};
691  }
692 
693  template<
694  typename U = T,
695  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
696  inline __host__ __device__
697  HIP_vector_type operator-() noexcept
698  {
699  auto tmp(*this);
700  tmp.data = -tmp.data;
701  return tmp;
702  }
703 
704  template<
705  typename U = T,
706  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
707  inline __host__ __device__
708  HIP_vector_type operator~() noexcept
709  {
710  HIP_vector_type r{*this};
711  r.data = ~r.data;
712  return r;
713  }
714 
715  template<
716  typename U = T,
717  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
718  inline __host__ __device__
719  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
720  {
721  data %= x.data;
722  return *this;
723  }
724 
725  template<
726  typename U = T,
727  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
728  inline __host__ __device__
729  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
730  {
731  data ^= x.data;
732  return *this;
733  }
734 
735  template<
736  typename U = T,
737  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
738  inline __host__ __device__
739  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
740  {
741  data |= x.data;
742  return *this;
743  }
744 
745  template<
746  typename U = T,
747  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
748  inline __host__ __device__
749  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
750  {
751  data &= x.data;
752  return *this;
753  }
754 
755  template<
756  typename U = T,
757  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
758  inline __host__ __device__
759  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
760  {
761  data >>= x.data;
762  return *this;
763  }
764 
765  template<
766  typename U = T,
767  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
768  inline __host__ __device__
769  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
770  {
771  data <<= x.data;
772  return *this;
773  }
774  };
775 
776  template<typename T, unsigned int n>
777  inline __host__ __device__
778  HIP_vector_type<T, n> operator+(
779  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
780  {
781  return HIP_vector_type<T, n>{x} += y;
782  }
783  template<typename T, unsigned int n, typename U>
784  inline __host__ __device__
785  HIP_vector_type<T, n> operator+(
786  const HIP_vector_type<T, n>& x, U y) noexcept
787  {
788  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
789  }
790  template<typename T, unsigned int n, typename U>
791  inline __host__ __device__
792  HIP_vector_type<T, n> operator+(
793  U x, const HIP_vector_type<T, n>& y) noexcept
794  {
795  return HIP_vector_type<T, n>{x} += y;
796  }
797 
798  template<typename T, unsigned int n>
799  inline __host__ __device__
800  HIP_vector_type<T, n> operator-(
801  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
802  {
803  return HIP_vector_type<T, n>{x} -= y;
804  }
805  template<typename T, unsigned int n, typename U>
806  inline __host__ __device__
807  HIP_vector_type<T, n> operator-(
808  const HIP_vector_type<T, n>& x, U y) noexcept
809  {
810  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
811  }
812  template<typename T, unsigned int n, typename U>
813  inline __host__ __device__
814  HIP_vector_type<T, n> operator-(
815  U x, const HIP_vector_type<T, n>& y) noexcept
816  {
817  return HIP_vector_type<T, n>{x} -= y;
818  }
819 
820  template<typename T, unsigned int n>
821  inline __host__ __device__
822  HIP_vector_type<T, n> operator*(
823  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
824  {
825  return HIP_vector_type<T, n>{x} *= y;
826  }
827  template<typename T, unsigned int n, typename U>
828  inline __host__ __device__
829  HIP_vector_type<T, n> operator*(
830  const HIP_vector_type<T, n>& x, U y) noexcept
831  {
832  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
833  }
834  template<typename T, unsigned int n, typename U>
835  inline __host__ __device__
836  HIP_vector_type<T, n> operator*(
837  U x, const HIP_vector_type<T, n>& y) noexcept
838  {
839  return HIP_vector_type<T, n>{x} *= y;
840  }
841 
842  template<typename T, unsigned int n>
843  inline __host__ __device__
844  HIP_vector_type<T, n> operator/(
845  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
846  {
847  return HIP_vector_type<T, n>{x} /= y;
848  }
849  template<typename T, unsigned int n, typename U>
850  inline __host__ __device__
851  HIP_vector_type<T, n> operator/(
852  const HIP_vector_type<T, n>& x, U y) noexcept
853  {
854  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
855  }
856  template<typename T, unsigned int n, typename U>
857  inline __host__ __device__
858  HIP_vector_type<T, n> operator/(
859  U x, const HIP_vector_type<T, n>& y) noexcept
860  {
861  return HIP_vector_type<T, n>{x} /= y;
862  }
863 
864  template<typename T, unsigned int n>
865  inline __host__ __device__
866  bool operator==(
867  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
868  {
869  auto tmp = x.data == y.data;
870  for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
871  return true;
872  }
873  template<typename T, unsigned int n, typename U>
874  inline __host__ __device__
875  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
876  {
877  return x == HIP_vector_type<T, n>{y};
878  }
879  template<typename T, unsigned int n, typename U>
880  inline __host__ __device__
881  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
882  {
883  return HIP_vector_type<T, n>{x} == y;
884  }
885 
886  template<typename T, unsigned int n>
887  inline __host__ __device__
888  bool operator!=(
889  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
890  {
891  return !(x == y);
892  }
893  template<typename T, unsigned int n, typename U>
894  inline __host__ __device__
895  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
896  {
897  return !(x == y);
898  }
899  template<typename T, unsigned int n, typename U>
900  inline __host__ __device__
901  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
902  {
903  return !(x == y);
904  }
905 
906  template<
907  typename T,
908  unsigned int n,
909  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
910  inline __host__ __device__
911  HIP_vector_type<T, n> operator%(
912  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
913  {
914  return HIP_vector_type<T, n>{x} %= y;
915  }
916  template<
917  typename T,
918  unsigned int n,
919  typename U,
920  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
921  inline __host__ __device__
922  HIP_vector_type<T, n> operator%(
923  const HIP_vector_type<T, n>& x, U y) noexcept
924  {
925  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
926  }
927  template<
928  typename T,
929  unsigned int n,
930  typename U,
931  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
932  inline __host__ __device__
933  HIP_vector_type<T, n> operator%(
934  U x, const HIP_vector_type<T, n>& y) noexcept
935  {
936  return HIP_vector_type<T, n>{x} %= y;
937  }
938 
939  template<
940  typename T,
941  unsigned int n,
942  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
943  inline __host__ __device__
944  HIP_vector_type<T, n> operator^(
945  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
946  {
947  return HIP_vector_type<T, n>{x} ^= y;
948  }
949  template<
950  typename T,
951  unsigned int n,
952  typename U,
953  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
954  inline __host__ __device__
955  HIP_vector_type<T, n> operator^(
956  const HIP_vector_type<T, n>& x, U y) noexcept
957  {
958  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
959  }
960  template<
961  typename T,
962  unsigned int n,
963  typename U,
964  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
965  inline __host__ __device__
966  HIP_vector_type<T, n> operator^(
967  U x, const HIP_vector_type<T, n>& y) noexcept
968  {
969  return HIP_vector_type<T, n>{x} ^= y;
970  }
971 
972  template<
973  typename T,
974  unsigned int n,
975  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
976  inline __host__ __device__
977  HIP_vector_type<T, n> operator|(
978  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
979  {
980  return HIP_vector_type<T, n>{x} |= y;
981  }
982  template<
983  typename T,
984  unsigned int n,
985  typename U,
986  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
987  inline __host__ __device__
988  HIP_vector_type<T, n> operator|(
989  const HIP_vector_type<T, n>& x, U y) noexcept
990  {
991  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
992  }
993  template<
994  typename T,
995  unsigned int n,
996  typename U,
997  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
998  inline __host__ __device__
999  HIP_vector_type<T, n> operator|(
1000  U x, const HIP_vector_type<T, n>& y) noexcept
1001  {
1002  return HIP_vector_type<T, n>{x} |= y;
1003  }
1004 
1005  template<
1006  typename T,
1007  unsigned int n,
1008  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1009  inline __host__ __device__
1010  HIP_vector_type<T, n> operator&(
1011  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1012  {
1013  return HIP_vector_type<T, n>{x} &= y;
1014  }
1015  template<
1016  typename T,
1017  unsigned int n,
1018  typename U,
1019  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1020  inline __host__ __device__
1021  HIP_vector_type<T, n> operator&(
1022  const HIP_vector_type<T, n>& x, U y) noexcept
1023  {
1024  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1025  }
1026  template<
1027  typename T,
1028  unsigned int n,
1029  typename U,
1030  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1031  inline __host__ __device__
1032  HIP_vector_type<T, n> operator&(
1033  U x, const HIP_vector_type<T, n>& y) noexcept
1034  {
1035  return HIP_vector_type<T, n>{x} &= y;
1036  }
1037 
1038  template<
1039  typename T,
1040  unsigned int n,
1041  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1042  inline __host__ __device__
1043  HIP_vector_type<T, n> operator>>(
1044  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1045  {
1046  return HIP_vector_type<T, n>{x} >>= y;
1047  }
1048  template<
1049  typename T,
1050  unsigned int n,
1051  typename U,
1052  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1053  inline __host__ __device__
1054  HIP_vector_type<T, n> operator>>(
1055  const HIP_vector_type<T, n>& x, U y) noexcept
1056  {
1057  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1058  }
1059  template<
1060  typename T,
1061  unsigned int n,
1062  typename U,
1063  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1064  inline __host__ __device__
1065  HIP_vector_type<T, n> operator>>(
1066  U x, const HIP_vector_type<T, n>& y) noexcept
1067  {
1068  return HIP_vector_type<T, n>{x} >>= y;
1069  }
1070 
1071  template<
1072  typename T,
1073  unsigned int n,
1074  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1075  inline __host__ __device__
1076  HIP_vector_type<T, n> operator<<(
1077  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1078  {
1079  return HIP_vector_type<T, n>{x} <<= y;
1080  }
1081  template<
1082  typename T,
1083  unsigned int n,
1084  typename U,
1085  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1086  inline __host__ __device__
1087  HIP_vector_type<T, n> operator<<(
1088  const HIP_vector_type<T, n>& x, U y) noexcept
1089  {
1090  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1091  }
1092  template<
1093  typename T,
1094  unsigned int n,
1095  typename U,
1096  typename std::enable_if<std::is_arithmetic<U>::value>::type,
1097  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1098  inline __host__ __device__
1099  HIP_vector_type<T, n> operator<<(
1100  U x, const HIP_vector_type<T, n>& y) noexcept
1101  {
1102  return HIP_vector_type<T, n>{x} <<= y;
1103  }
1104 
1105  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1106  using CUDA_name##1 = HIP_vector_type<T, 1>;\
1107  using CUDA_name##2 = HIP_vector_type<T, 2>;\
1108  using CUDA_name##3 = HIP_vector_type<T, 3>;\
1109  using CUDA_name##4 = HIP_vector_type<T, 4>;
1110 #else
1111  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1112  typedef struct {\
1113  T x;\
1114  } CUDA_name##1;\
1115  typedef struct {\
1116  T x;\
1117  T y;\
1118  } CUDA_name##2;\
1119  typedef struct {\
1120  T x;\
1121  T y;\
1122  T z;\
1123  } CUDA_name##3;\
1124  typedef struct {\
1125  T x;\
1126  T y;\
1127  T z;\
1128  T w;\
1129  } CUDA_name##4;
1130 #endif
1131 
1132 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1133 __MAKE_VECTOR_TYPE__(char, char);
1134 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1135 __MAKE_VECTOR_TYPE__(short, short);
1136 __MAKE_VECTOR_TYPE__(uint, unsigned int);
1137 __MAKE_VECTOR_TYPE__(int, int);
1138 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1139 __MAKE_VECTOR_TYPE__(long, long);
1140 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1141 __MAKE_VECTOR_TYPE__(longlong, long long);
1142 __MAKE_VECTOR_TYPE__(float, float);
1143 __MAKE_VECTOR_TYPE__(double, double);
1144 
1145 #ifdef __cplusplus
1146 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1147  static inline __device__ __host__ \
1148  type make_##type(comp x) { type r{x}; return r; }
1149 
1150 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1151  static inline __device__ __host__ \
1152  type make_##type(comp x, comp y) { type r{x, y}; return r; }
1153 
1154 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1155  static inline __device__ __host__ \
1156  type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
1157 
1158 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1159  static inline __device__ __host__ \
1160  type make_##type(comp x, comp y, comp z, comp w) { \
1161  type r{x, y, z, w}; \
1162  return r; \
1163  }
1164 #else
1165  #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
1166  static inline __device__ __host__ \
1167  type make_##type(comp x) { type r; r.x =x; return r; }
1168 
1169  #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
1170  static inline __device__ __host__ \
1171  type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
1172 
1173  #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
1174  static inline __device__ __host__ \
1175  type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
1176 
1177  #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
1178  static inline __device__ __host__ \
1179  type make_##type(comp x, comp y, comp z, comp w) { \
1180  type r; r.x=x; r.y=y; r.z=z; r.w=w; \
1181  return r; \
1182  }
1183 #endif
1184 
1185 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
1186 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
1187 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
1188 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
1189 
1190 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
1191 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
1192 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
1193 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
1194 
1195 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
1196 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
1197 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
1198 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
1199 
1200 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
1201 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
1202 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
1203 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
1204 
1205 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
1206 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
1207 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
1208 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
1209 
1210 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
1211 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
1212 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
1213 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
1214 
1215 DECLOP_MAKE_ONE_COMPONENT(float, float1);
1216 DECLOP_MAKE_TWO_COMPONENT(float, float2);
1217 DECLOP_MAKE_THREE_COMPONENT(float, float3);
1218 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
1219 
1220 DECLOP_MAKE_ONE_COMPONENT(double, double1);
1221 DECLOP_MAKE_TWO_COMPONENT(double, double2);
1222 DECLOP_MAKE_THREE_COMPONENT(double, double3);
1223 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
1224 
1225 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
1226 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
1227 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
1228 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
1229 
1230 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
1231 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
1232 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
1233 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
1234 
1235 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
1236 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
1237 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
1238 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
1239 
1240 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
1241 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
1242 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
1243 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
1244 #else // defined(_MSC_VER)
1245 #include <mmintrin.h>
1246 #include <xmmintrin.h>
1247 #include <emmintrin.h>
1248 #include <immintrin.h>
1249 
1250 typedef union { char data; } char1;
1251 typedef union { char data[2]; } char2;
1252 typedef union { char data[4]; } char4;
1253 typedef union { char4 data; } char3;
1254 typedef union { __m64 data; } char8;
1255 typedef union { __m128i data; } char16;
1256 
1257 typedef union { unsigned char data; } uchar1;
1258 typedef union { unsigned char data[2]; } uchar2;
1259 typedef union { unsigned char data[4]; } uchar4;
1260 typedef union { uchar4 data; } uchar3;
1261 typedef union { __m64 data; } uchar8;
1262 typedef union { __m128i data; } uchar16;
1263 
1264 typedef union { short data; } short1;
1265 typedef union { short data[2]; } short2;
1266 typedef union { __m64 data; } short4;
1267 typedef union { short4 data; } short3;
1268 typedef union { __m128i data; } short8;
1269 typedef union { __m128i data[2]; } short16;
1270 
1271 typedef union { unsigned short data; } ushort1;
1272 typedef union { unsigned short data[2]; } ushort2;
1273 typedef union { __m64 data; } ushort4;
1274 typedef union { ushort4 data; } ushort3;
1275 typedef union { __m128i data; } ushort8;
1276 typedef union { __m128i data[2]; } ushort16;
1277 
1278 typedef union { int data; } int1;
1279 typedef union { __m64 data; } int2;
1280 typedef union { __m128i data; } int4;
1281 typedef union { int4 data; } int3;
1282 typedef union { __m128i data[2]; } int8;
1283 typedef union { __m128i data[4];} int16;
1284 
1285 typedef union { unsigned int data; } uint1;
1286 typedef union { __m64 data; } uint2;
1287 typedef union { __m128i data; } uint4;
1288 typedef union { uint4 data; } uint3;
1289 typedef union { __m128i data[2]; } uint8;
1290 typedef union { __m128i data[4]; } uint16;
1291 
1292 #if !defined(_WIN64)
1293 typedef union { int data; } long1;
1294 typedef union { __m64 data; } long2;
1295 typedef union { __m128i data; } long4;
1296 typedef union { long4 data; } long3;
1297 typedef union { __m128i data[2]; } long8;
1298 typedef union { __m128i data[4]; } long16;
1299 
1300 typedef union { unsigned int data; } ulong1;
1301 typedef union { __m64 data; } ulong2;
1302 typedef union { __m128i data; } ulong4;
1303 typedef union { ulong4 data; } ulong3;
1304 typedef union { __m128i data[2]; } ulong8;
1305 typedef union { __m128i data[4]; } ulong16;
1306 #else // defined(_WIN64)
1307 typedef union { __m64 data; } long1;
1308 typedef union { __m128i data; } long2;
1309 typedef union { __m128i data[2]; } long4;
1310 typedef union { long4 data; } long3;
1311 typedef union { __m128i data[4]; } long8;
1312 typedef union { __m128i data[8]; } long16;
1313 
1314 typedef union { __m64 data; } ulong1;
1315 typedef union { __m128i data; } ulong2;
1316 typedef union { __m128i data[2]; } ulong4;
1317 typedef union { ulong4 data; } ulong3;
1318 typedef union { __m128i data[4]; } ulong8;
1319 typedef union { __m128i data[8]; } ulong16;
1320 #endif // defined(_WIN64)
1321 
1322 typedef union { __m64 data; } longlong1;
1323 typedef union { __m128i data; } longlong2;
1324 typedef union { __m128i data[2]; } longlong4;
1325 typedef union { longlong4 data; } longlong3;
1326 typedef union { __m128i data[4]; } longlong8;
1327 typedef union { __m128i data[8]; } longlong16;
1328 
1329 typedef union { __m64 data; } ulonglong1;
1330 typedef union { __m128i data; } ulonglong2;
1331 typedef union { __m128i data[2]; } ulonglong4;
1332 typedef union { ulonglong4 data; } ulonglong3;
1333 typedef union { __m128i data[4]; } ulonglong8;
1334 typedef union { __m128i data[8]; } ulonglong16;
1335 
1336 typedef union { float data; } float1;
1337 typedef union { __m64 data; } float2;
1338 typedef union { __m128 data; } float4;
1339 typedef union { float4 data; } float3;
1340 typedef union { __m256 data; } float8;
1341 typedef union { __m256 data[2]; } float16;
1342 
1343 typedef union { double data; } double1;
1344 typedef union { __m128d data; } double2;
1345 typedef union { __m256d data; } double4;
1346 typedef union { double4 data; } double3;
1347 typedef union { __m256d data[2]; } double8;
1348 typedef union { __m256d data[4]; } double16;
1349 
1350 #endif // defined(_MSC_VER)
1351 #endif
TODO-doc.
#define __host__
Definition: host_defines.h:41
Definition: hip_runtime.h:202