HIP: Heterogenous-computing Interface for Portability
hip_vector_types.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
30 
31 #if defined(__HCC__) && (__hcc_workweek__ < 16032)
32 #error("This version of HIP requires a newer version of HCC.");
33 #endif
34 
36 
37 #if !defined(_MSC_VER) || __clang__
38 #if defined(__clang__)
39  #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n)))
40 #elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax.
41  #define __ROUND_UP_TO_NEXT_POT__(x) \
42  (1 << (31 - __builtin_clz(x) + (x > (1 << (31 - __builtin_clz(x))))))
43  #define __NATIVE_VECTOR__(n, T) \
44  __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T))))
45 #endif
46 
47 #if defined(__cplusplus)
48  #include <type_traits>
49 
50  template<typename T, unsigned int n> struct HIP_vector_base;
51 
52  template<typename T>
53  struct HIP_vector_base<T, 1> {
54  typedef T Native_vec_ __NATIVE_VECTOR__(1, T);
55 
56  union {
57  Native_vec_ data;
58  struct {
59  T x;
60  };
61  };
62  };
63 
64  template<typename T>
65  struct HIP_vector_base<T, 2> {
66  typedef T Native_vec_ __NATIVE_VECTOR__(2, T);
67 
68  union {
69  Native_vec_ data;
70  struct {
71  T x;
72  T y;
73  };
74  };
75  };
76 
77  template<typename T>
78  struct HIP_vector_base<T, 3> {
79  struct Native_vec_ {
80  T d[3];
81 
82  __host__ __device__
83  constexpr
84  Native_vec_() = default;
85  __host__ __device__
86  explicit
87  constexpr
88  Native_vec_(T x) noexcept : d{x, x, x} {}
89  __host__ __device__
90  constexpr
91  Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
92  __host__ __device__
93  constexpr
94  Native_vec_(const Native_vec_&) = default;
95  __host__ __device__
96  constexpr
97  Native_vec_(Native_vec_&&) = default;
98  __host__ __device__
99  ~Native_vec_() = default;
100 
101  __host__ __device__
102  Native_vec_& operator=(const Native_vec_&) = default;
103  __host__ __device__
104  Native_vec_& operator=(Native_vec_&&) = default;
105 
106  __host__ __device__
107  T& operator[](unsigned int idx) noexcept { return d[idx]; }
108  __host__ __device__
109  T operator[](unsigned int idx) const noexcept { return d[idx]; }
110 
111  __host__ __device__
112  Native_vec_& operator+=(const Native_vec_& x) noexcept
113  {
114  for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
115  return *this;
116  }
117  __host__ __device__
118  Native_vec_& operator-=(const Native_vec_& x) noexcept
119  {
120  for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
121  return *this;
122  }
123 
124  __host__ __device__
125  Native_vec_& operator*=(const Native_vec_& x) noexcept
126  {
127  for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
128  return *this;
129  }
130  __host__ __device__
131  Native_vec_& operator/=(const Native_vec_& x) noexcept
132  {
133  for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
134  return *this;
135  }
136 
137  template<
138  typename U = T,
139  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
140  __host__ __device__
141  Native_vec_ operator-() const noexcept
142  {
143  auto r{*this};
144  for (auto&& x : r.d) x = -x;
145  return r;
146  }
147 
148  template<
149  typename U = T,
150  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
151  __host__ __device__
152  Native_vec_ operator~() const noexcept
153  {
154  auto r{*this};
155  for (auto&& x : r.d) x = ~x;
156  return r;
157  }
158  template<
159  typename U = T,
160  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
161  __host__ __device__
162  Native_vec_& operator%=(const Native_vec_& x) noexcept
163  {
164  for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
165  return *this;
166  }
167  template<
168  typename U = T,
169  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
170  __host__ __device__
171  Native_vec_& operator^=(const Native_vec_& x) noexcept
172  {
173  for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
174  return *this;
175  }
176  template<
177  typename U = T,
178  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
179  __host__ __device__
180  Native_vec_& operator|=(const Native_vec_& x) noexcept
181  {
182  for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
183  return *this;
184  }
185  template<
186  typename U = T,
187  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
188  __host__ __device__
189  Native_vec_& operator&=(const Native_vec_& x) noexcept
190  {
191  for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
192  return *this;
193  }
194  template<
195  typename U = T,
196  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
197  __host__ __device__
198  Native_vec_& operator>>=(const Native_vec_& x) noexcept
199  {
200  for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
201  return *this;
202  }
203  template<
204  typename U = T,
205  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
206  __host__ __device__
207  Native_vec_& operator<<=(const Native_vec_& x) noexcept
208  {
209  for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
210  return *this;
211  }
212 
213  using Vec3_cmp = int __NATIVE_VECTOR__(3, int);
214  __host__ __device__
215  Vec3_cmp operator==(const Native_vec_& x) const noexcept
216  {
217  Vec3_cmp r;
218  r[0] = d[0] == x.d[0];
219  r[1] = d[1] == x.d[1];
220  r[2] = d[2] == x.d[2];
221  return r;
222  }
223  };
224 
225  union {
226  Native_vec_ data;
227  struct {
228  T x;
229  T y;
230  T z;
231  };
232  };
233  };
234 
235  template<typename T>
236  struct HIP_vector_base<T, 4> {
237  typedef T Native_vec_ __NATIVE_VECTOR__(4, T);
238 
239  union {
240  Native_vec_ data;
241  struct {
242  T x;
243  T y;
244  T z;
245  T w;
246  };
247  };
248  };
249 
250  template<typename T, unsigned int rank>
251  struct HIP_vector_type : public HIP_vector_base<T, rank> {
252  using HIP_vector_base<T, rank>::data;
253  using typename HIP_vector_base<T, rank>::Native_vec_;
254 
255  inline __host__ __device__
256  HIP_vector_type() = default;
257  template<
258  typename U,
259  typename std::enable_if<
260  std::is_convertible<U, T>{}>::type* = nullptr>
261  explicit inline __host__ __device__
262  HIP_vector_type(U x) noexcept
263  {
264  for (auto i = 0u; i != rank; ++i) data[i] = x;
265  }
266  template< // TODO: constrain based on type as well.
267  typename... Us,
268  typename std::enable_if<
269  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
270  inline __host__ __device__
271  HIP_vector_type(Us... xs) noexcept { data = Native_vec_{static_cast<T>(xs)...}; }
272  inline __host__ __device__
273  HIP_vector_type(const HIP_vector_type&) = default;
274  inline __host__ __device__
275  HIP_vector_type(HIP_vector_type&&) = default;
276  inline __host__ __device__
277  ~HIP_vector_type() = default;
278 
279  inline __host__ __device__
280  HIP_vector_type& operator=(const HIP_vector_type&) = default;
281  inline __host__ __device__
282  HIP_vector_type& operator=(HIP_vector_type&&) = default;
283 
284  // Operators
285  inline __host__ __device__
286  HIP_vector_type& operator++() noexcept
287  {
288  return *this += HIP_vector_type{1};
289  }
290  inline __host__ __device__
291  HIP_vector_type operator++(int) noexcept
292  {
293  auto tmp(*this);
294  ++*this;
295  return tmp;
296  }
297 
298  inline __host__ __device__
299  HIP_vector_type& operator--() noexcept
300  {
301  return *this -= HIP_vector_type{1};
302  }
303  inline __host__ __device__
304  HIP_vector_type operator--(int) noexcept
305  {
306  auto tmp(*this);
307  --*this;
308  return tmp;
309  }
310 
311  inline __host__ __device__
312  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
313  {
314  data += x.data;
315  return *this;
316  }
317  template<
318  typename U,
319  typename std::enable_if<
320  std::is_convertible<U, T>{}>::type* = nullptr>
321  inline __host__ __device__
322  HIP_vector_type& operator+=(U x) noexcept
323  {
324  return *this += HIP_vector_type{x};
325  }
326 
327  inline __host__ __device__
328  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
329  {
330  data -= x.data;
331  return *this;
332  }
333  template<
334  typename U,
335  typename std::enable_if<
336  std::is_convertible<U, T>{}>::type* = nullptr>
337  inline __host__ __device__
338  HIP_vector_type& operator-=(U x) noexcept
339  {
340  return *this -= HIP_vector_type{x};
341  }
342 
343  inline __host__ __device__
344  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
345  {
346  data *= x.data;
347  return *this;
348  }
349  template<
350  typename U,
351  typename std::enable_if<
352  std::is_convertible<U, T>{}>::type* = nullptr>
353  inline __host__ __device__
354  HIP_vector_type& operator*=(U x) noexcept
355  {
356  return *this *= HIP_vector_type{x};
357  }
358 
359  inline __host__ __device__
360  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
361  {
362  data /= x.data;
363  return *this;
364  }
365  template<
366  typename U,
367  typename std::enable_if<
368  std::is_convertible<U, T>{}>::type* = nullptr>
369  inline __host__ __device__
370  HIP_vector_type& operator/=(U x) noexcept
371  {
372  return *this /= HIP_vector_type{x};
373  }
374 
375  template<
376  typename U = T,
377  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
378  inline __host__ __device__
379  HIP_vector_type operator-() noexcept
380  {
381  auto tmp(*this);
382  tmp.data = -tmp.data;
383  return tmp;
384  }
385 
386  template<
387  typename U = T,
388  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
389  inline __host__ __device__
390  HIP_vector_type operator~() noexcept
391  {
392  HIP_vector_type r{*this};
393  r.data = ~r.data;
394  return r;
395  }
396 
397  template<
398  typename U = T,
399  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
400  inline __host__ __device__
401  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
402  {
403  data %= x.data;
404  return *this;
405  }
406 
407  template<
408  typename U = T,
409  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
410  inline __host__ __device__
411  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
412  {
413  data ^= x.data;
414  return *this;
415  }
416 
417  template<
418  typename U = T,
419  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
420  inline __host__ __device__
421  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
422  {
423  data |= x.data;
424  return *this;
425  }
426 
427  template<
428  typename U = T,
429  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
430  inline __host__ __device__
431  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
432  {
433  data &= x.data;
434  return *this;
435  }
436 
437  template<
438  typename U = T,
439  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
440  inline __host__ __device__
441  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
442  {
443  data >>= x.data;
444  return *this;
445  }
446 
447  template<
448  typename U = T,
449  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
450  inline __host__ __device__
451  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
452  {
453  data <<= x.data;
454  return *this;
455  }
456  };
457 
458 
459  template<typename T, unsigned int n>
460  inline __host__ __device__
461  HIP_vector_type<T, n> operator+(
462  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
463  {
464  return HIP_vector_type<T, n>{x} += y;
465  }
466  template<typename T, unsigned int n, typename U>
467  inline __host__ __device__
468  HIP_vector_type<T, n> operator+(
469  const HIP_vector_type<T, n>& x, U y) noexcept
470  {
471  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
472  }
473  template<typename T, unsigned int n, typename U>
474  inline __host__ __device__
475  HIP_vector_type<T, n> operator+(
476  U x, const HIP_vector_type<T, n>& y) noexcept
477  {
478  return HIP_vector_type<T, n>{x} += y;
479  }
480 
481  template<typename T, unsigned int n>
482  inline __host__ __device__
483  HIP_vector_type<T, n> operator-(
484  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
485  {
486  return HIP_vector_type<T, n>{x} -= y;
487  }
488  template<typename T, unsigned int n, typename U>
489  inline __host__ __device__
490  HIP_vector_type<T, n> operator-(
491  const HIP_vector_type<T, n>& x, U y) noexcept
492  {
493  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
494  }
495  template<typename T, unsigned int n, typename U>
496  inline __host__ __device__
497  HIP_vector_type<T, n> operator-(
498  U x, const HIP_vector_type<T, n>& y) noexcept
499  {
500  return HIP_vector_type<T, n>{x} -= y;
501  }
502 
503  template<typename T, unsigned int n>
504  inline __host__ __device__
505  HIP_vector_type<T, n> operator*(
506  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
507  {
508  return HIP_vector_type<T, n>{x} *= y;
509  }
510  template<typename T, unsigned int n, typename U>
511  inline __host__ __device__
512  HIP_vector_type<T, n> operator*(
513  const HIP_vector_type<T, n>& x, U y) noexcept
514  {
515  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
516  }
517  template<typename T, unsigned int n, typename U>
518  inline __host__ __device__
519  HIP_vector_type<T, n> operator*(
520  U x, const HIP_vector_type<T, n>& y) noexcept
521  {
522  return HIP_vector_type<T, n>{x} *= y;
523  }
524 
525  template<typename T, unsigned int n>
526  inline __host__ __device__
527  HIP_vector_type<T, n> operator/(
528  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
529  {
530  return HIP_vector_type<T, n>{x} /= y;
531  }
532  template<typename T, unsigned int n, typename U>
533  inline __host__ __device__
534  HIP_vector_type<T, n> operator/(
535  const HIP_vector_type<T, n>& x, U y) noexcept
536  {
537  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
538  }
539  template<typename T, unsigned int n, typename U>
540  inline __host__ __device__
541  HIP_vector_type<T, n> operator/(
542  U x, const HIP_vector_type<T, n>& y) noexcept
543  {
544  return HIP_vector_type<T, n>{x} /= y;
545  }
546 
547  template<typename T, unsigned int n>
548  inline __host__ __device__
549  bool operator==(
550  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
551  {
552  auto tmp = x.data == y.data;
553  for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
554  return true;
555  }
556  template<typename T, unsigned int n, typename U>
557  inline __host__ __device__
558  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
559  {
560  return x == HIP_vector_type<T, n>{y};
561  }
562  template<typename T, unsigned int n, typename U>
563  inline __host__ __device__
564  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
565  {
566  return HIP_vector_type<T, n>{x} == y;
567  }
568 
569  template<typename T, unsigned int n>
570  inline __host__ __device__
571  bool operator!=(
572  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
573  {
574  return !(x == y);
575  }
576  template<typename T, unsigned int n, typename U>
577  inline __host__ __device__
578  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
579  {
580  return !(x == y);
581  }
582  template<typename T, unsigned int n, typename U>
583  inline __host__ __device__
584  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
585  {
586  return !(x == y);
587  }
588 
589  template<
590  typename T,
591  unsigned int n,
592  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
593  inline __host__ __device__
594  HIP_vector_type<T, n> operator%(
595  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
596  {
597  return HIP_vector_type<T, n>{x} %= y;
598  }
599  template<
600  typename T,
601  unsigned int n,
602  typename U,
603  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
604  inline __host__ __device__
605  HIP_vector_type<T, n> operator%(
606  const HIP_vector_type<T, n>& x, U y) noexcept
607  {
608  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
609  }
610  template<
611  typename T,
612  unsigned int n,
613  typename U,
614  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
615  inline __host__ __device__
616  HIP_vector_type<T, n> operator%(
617  U x, const HIP_vector_type<T, n>& y) noexcept
618  {
619  return HIP_vector_type<T, n>{x} %= y;
620  }
621 
622  template<
623  typename T,
624  unsigned int n,
625  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
626  inline __host__ __device__
627  HIP_vector_type<T, n> operator^(
628  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
629  {
630  return HIP_vector_type<T, n>{x} ^= y;
631  }
632  template<
633  typename T,
634  unsigned int n,
635  typename U,
636  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
637  inline __host__ __device__
638  HIP_vector_type<T, n> operator^(
639  const HIP_vector_type<T, n>& x, U y) noexcept
640  {
641  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
642  }
643  template<
644  typename T,
645  unsigned int n,
646  typename U,
647  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
648  inline __host__ __device__
649  HIP_vector_type<T, n> operator^(
650  U x, const HIP_vector_type<T, n>& y) noexcept
651  {
652  return HIP_vector_type<T, n>{x} ^= y;
653  }
654 
655  template<
656  typename T,
657  unsigned int n,
658  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
659  inline __host__ __device__
660  HIP_vector_type<T, n> operator|(
661  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
662  {
663  return HIP_vector_type<T, n>{x} |= y;
664  }
665  template<
666  typename T,
667  unsigned int n,
668  typename U,
669  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
670  inline __host__ __device__
671  HIP_vector_type<T, n> operator|(
672  const HIP_vector_type<T, n>& x, U y) noexcept
673  {
674  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
675  }
676  template<
677  typename T,
678  unsigned int n,
679  typename U,
680  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
681  inline __host__ __device__
682  HIP_vector_type<T, n> operator|(
683  U x, const HIP_vector_type<T, n>& y) noexcept
684  {
685  return HIP_vector_type<T, n>{x} |= y;
686  }
687 
688  template<
689  typename T,
690  unsigned int n,
691  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
692  inline __host__ __device__
693  HIP_vector_type<T, n> operator&(
694  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
695  {
696  return HIP_vector_type<T, n>{x} &= y;
697  }
698  template<
699  typename T,
700  unsigned int n,
701  typename U,
702  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
703  inline __host__ __device__
704  HIP_vector_type<T, n> operator&(
705  const HIP_vector_type<T, n>& x, U y) noexcept
706  {
707  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
708  }
709  template<
710  typename T,
711  unsigned int n,
712  typename U,
713  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
714  inline __host__ __device__
715  HIP_vector_type<T, n> operator&(
716  U x, const HIP_vector_type<T, n>& y) noexcept
717  {
718  return HIP_vector_type<T, n>{x} &= y;
719  }
720 
721  template<
722  typename T,
723  unsigned int n,
724  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
725  inline __host__ __device__
726  HIP_vector_type<T, n> operator>>(
727  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
728  {
729  return HIP_vector_type<T, n>{x} >>= y;
730  }
731  template<
732  typename T,
733  unsigned int n,
734  typename U,
735  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
736  inline __host__ __device__
737  HIP_vector_type<T, n> operator>>(
738  const HIP_vector_type<T, n>& x, U y) noexcept
739  {
740  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
741  }
742  template<
743  typename T,
744  unsigned int n,
745  typename U,
746  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
747  inline __host__ __device__
748  HIP_vector_type<T, n> operator>>(
749  U x, const HIP_vector_type<T, n>& y) noexcept
750  {
751  return HIP_vector_type<T, n>{x} >>= y;
752  }
753 
754  template<
755  typename T,
756  unsigned int n,
757  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
758  inline __host__ __device__
759  HIP_vector_type<T, n> operator<<(
760  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
761  {
762  return HIP_vector_type<T, n>{x} <<= y;
763  }
764  template<
765  typename T,
766  unsigned int n,
767  typename U,
768  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
769  inline __host__ __device__
770  HIP_vector_type<T, n> operator<<(
771  const HIP_vector_type<T, n>& x, U y) noexcept
772  {
773  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
774  }
775  template<
776  typename T,
777  unsigned int n,
778  typename U,
779  typename std::enable_if<std::is_arithmetic<U>::value>::type,
780  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
781  inline __host__ __device__
782  HIP_vector_type<T, n> operator<<(
783  U x, const HIP_vector_type<T, n>& y) noexcept
784  {
785  return HIP_vector_type<T, n>{x} <<= y;
786  }
787 
788  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
789  using CUDA_name##1 = HIP_vector_type<T, 1>;\
790  using CUDA_name##2 = HIP_vector_type<T, 2>;\
791  using CUDA_name##3 = HIP_vector_type<T, 3>;\
792  using CUDA_name##4 = HIP_vector_type<T, 4>;
793 #else
794  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
795  typedef T CUDA_name##_impl1 __NATIVE_VECTOR__(1, T);\
796  typedef T CUDA_name##_impl2 __NATIVE_VECTOR__(2, T);\
797  typedef T CUDA_name##_impl3 __NATIVE_VECTOR__(3, T);\
798  typedef T CUDA_name##_impl4 __NATIVE_VECTOR__(4, T);\
799  typedef struct {\
800  union {\
801  CUDA_name##_impl1 data;\
802  struct {\
803  T x;\
804  };\
805  };\
806  } CUDA_name##1;\
807  typedef struct {\
808  union {\
809  CUDA_name##_impl2 data;\
810  struct {\
811  T x;\
812  T y;\
813  };\
814  };\
815  } CUDA_name##2;\
816  typedef struct {\
817  union {\
818  T data[3];\
819  struct {\
820  T x;\
821  T y;\
822  T z;\
823  };\
824  };\
825  } CUDA_name##3;\
826  typedef struct {\
827  union {\
828  CUDA_name##_impl4 data;\
829  struct {\
830  T x;\
831  T y;\
832  T z;\
833  T w;\
834  };\
835  };\
836  } CUDA_name##4;
837 #endif
838 
839 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
840 __MAKE_VECTOR_TYPE__(char, char);
841 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
842 __MAKE_VECTOR_TYPE__(short, short);
843 __MAKE_VECTOR_TYPE__(uint, unsigned int);
844 __MAKE_VECTOR_TYPE__(int, int);
845 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
846 __MAKE_VECTOR_TYPE__(long, long);
847 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
848 __MAKE_VECTOR_TYPE__(longlong, long long);
849 __MAKE_VECTOR_TYPE__(float, float);
850 __MAKE_VECTOR_TYPE__(double, double);
851 
852 #ifdef __cplusplus
853 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
854  static inline __device__ __host__ \
855  type make_##type(comp x) { type r{x}; return r; }
856 
857 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
858  static inline __device__ __host__ \
859  type make_##type(comp x, comp y) { type r{x, y}; return r; }
860 
861 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
862  static inline __device__ __host__ \
863  type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
864 
865 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
866  static inline __device__ __host__ \
867  type make_##type(comp x, comp y, comp z, comp w) { \
868  type r{x, y, z, w}; \
869  return r; \
870  }
871 #else
872  #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
873  static inline __device__ __host__ \
874  type make_##type(comp x) { type r; r.x =x; return r; }
875 
876  #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
877  static inline __device__ __host__ \
878  type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
879 
880  #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
881  static inline __device__ __host__ \
882  type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
883 
884  #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
885  static inline __device__ __host__ \
886  type make_##type(comp x, comp y, comp z, comp w) { \
887  type r; r.x=x; r.y=y; r.z=z; r.w=w; \
888  return r; \
889  }
890 #endif
891 
892 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
893 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
894 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
895 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
896 
897 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
898 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
899 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
900 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
901 
902 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
903 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
904 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
905 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
906 
907 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
908 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
909 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
910 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
911 
912 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
913 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
914 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
915 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
916 
917 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
918 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
919 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
920 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
921 
922 DECLOP_MAKE_ONE_COMPONENT(float, float1);
923 DECLOP_MAKE_TWO_COMPONENT(float, float2);
924 DECLOP_MAKE_THREE_COMPONENT(float, float3);
925 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
926 
927 DECLOP_MAKE_ONE_COMPONENT(double, double1);
928 DECLOP_MAKE_TWO_COMPONENT(double, double2);
929 DECLOP_MAKE_THREE_COMPONENT(double, double3);
930 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
931 
932 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
933 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
934 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
935 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
936 
937 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
938 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
939 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
940 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
941 
942 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
943 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
944 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
945 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
946 
947 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
948 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
949 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
950 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
951 #else // defined(_MSC_VER)
952 #include <mmintrin.h>
953 #include <xmmintrin.h>
954 #include <emmintrin.h>
955 #include <immintrin.h>
956 
957 typedef union { char data; } char1;
958 typedef union { char data[2]; } char2;
959 typedef union { char data[4]; } char4;
960 typedef union { char4 data; } char3;
961 typedef union { __m64 data; } char8;
962 typedef union { __m128i data; } char16;
963 
964 typedef union { unsigned char data; } uchar1;
965 typedef union { unsigned char data[2]; } uchar2;
966 typedef union { unsigned char data[4]; } uchar4;
967 typedef union { uchar4 data; } uchar3;
968 typedef union { __m64 data; } uchar8;
969 typedef union { __m128i data; } uchar16;
970 
971 typedef union { short data; } short1;
972 typedef union { short data[2]; } short2;
973 typedef union { __m64 data; } short4;
974 typedef union { short4 data; } short3;
975 typedef union { __m128i data; } short8;
976 typedef union { __m128i data[2]; } short16;
977 
978 typedef union { unsigned short data; } ushort1;
979 typedef union { unsigned short data[2]; } ushort2;
980 typedef union { __m64 data; } ushort4;
981 typedef union { ushort4 data; } ushort3;
982 typedef union { __m128i data; } ushort8;
983 typedef union { __m128i data[2]; } ushort16;
984 
985 typedef union { int data; } int1;
986 typedef union { __m64 data; } int2;
987 typedef union { __m128i data; } int4;
988 typedef union { int4 data; } int3;
989 typedef union { __m128i data[2]; } int8;
990 typedef union { __m128i data[4];} int16;
991 
992 typedef union { unsigned int data; } uint1;
993 typedef union { __m64 data; } uint2;
994 typedef union { __m128i data; } uint4;
995 typedef union { uint4 data; } uint3;
996 typedef union { __m128i data[2]; } uint8;
997 typedef union { __m128i data[4]; } uint16;
998 
999 #if !defined(_WIN64)
1000 typedef union { int data; } long1;
1001 typedef union { __m64 data; } long2;
1002 typedef union { __m128i data; } long4;
1003 typedef union { long4 data; } long3;
1004 typedef union { __m128i data[2]; } long8;
1005 typedef union { __m128i data[4]; } long16;
1006 
1007 typedef union { unsigned int data; } ulong1;
1008 typedef union { __m64 data; } ulong2;
1009 typedef union { __m128i data; } ulong4;
1010 typedef union { ulong4 data; } ulong3;
1011 typedef union { __m128i data[2]; } ulong8;
1012 typedef union { __m128i data[4]; } ulong16;
1013 #else // defined(_WIN64)
1014 typedef union { __m64 data; } long1;
1015 typedef union { __m128i data; } long2;
1016 typedef union { __m128i data[2]; } long4;
1017 typedef union { long4 data; } long3;
1018 typedef union { __m128i data[4]; } long8;
1019 typedef union { __m128i data[8]; } long16;
1020 
1021 typedef union { __m64 data; } ulong1;
1022 typedef union { __m128i data; } ulong2;
1023 typedef union { __m128i data[2]; } ulong4;
1024 typedef union { ulong4 data; } ulong3;
1025 typedef union { __m128i data[4]; } ulong8;
1026 typedef union { __m128i data[8]; } ulong16;
1027 #endif // defined(_WIN64)
1028 
1029 typedef union { __m64 data; } longlong1;
1030 typedef union { __m128i data; } longlong2;
1031 typedef union { __m128i data[2]; } longlong4;
1032 typedef union { longlong4 data; } longlong3;
1033 typedef union { __m128i data[4]; } longlong8;
1034 typedef union { __m128i data[8]; } longlong16;
1035 
1036 typedef union { __m64 data; } ulonglong1;
1037 typedef union { __m128i data; } ulonglong2;
1038 typedef union { __m128i data[2]; } ulonglong4;
1039 typedef union { ulonglong4 data; } ulonglong3;
1040 typedef union { __m128i data[4]; } ulonglong8;
1041 typedef union { __m128i data[8]; } ulonglong16;
1042 
1043 typedef union { float data; } float1;
1044 typedef union { __m64 data; } float2;
1045 typedef union { __m128 data; } float4;
1046 typedef union { float4 data; } float3;
1047 typedef union { __m256 data; } float8;
1048 typedef union { __m256 data[2]; } float16;
1049 
1050 typedef union { double data; } double1;
1051 typedef union { __m128d data; } double2;
1052 typedef union { __m256d data; } double4;
1053 typedef union { double4 data; } double3;
1054 typedef union { __m256d data[2]; } double8;
1055 typedef union { __m256d data[4]; } double16;
1056 
1057 #endif // defined(_MSC_VER)
1058 #endif
TODO-doc.
#define __host__
Definition: host_defines.h:41