/home/users/khuck/src/operation_gordon_bell/Vc/include/Vc/common/simdarray.h

Line% of fetchesSource
1  
/*  This file is part of the Vc library. {{{
2  
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3  
4  
Redistribution and use in source and binary forms, with or without
5  
modification, are permitted provided that the following conditions are met:
6  
    * Redistributions of source code must retain the above copyright
7  
      notice, this list of conditions and the following disclaimer.
8  
    * Redistributions in binary form must reproduce the above copyright
9  
      notice, this list of conditions and the following disclaimer in the
10  
      documentation and/or other materials provided with the distribution.
11  
    * Neither the names of contributing organizations nor the
12  
      names of its contributors may be used to endorse or promote products
13  
      derived from this software without specific prior written permission.
14  
15  
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19  
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  
26  
}}}*/
27  
28  
#ifndef VC_COMMON_SIMDARRAY_H_
29  
#define VC_COMMON_SIMDARRAY_H_
30  
31  
//#define Vc_DEBUG_SIMD_CAST 1
32  
//#define Vc_DEBUG_SORTED 1
33  
#if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED
34  
#include <Vc/IO>
35  
#endif
36  
37  
#include <array>
38  
39  
#include "writemaskedvector.h"
40  
#include "simdarrayhelper.h"
41  
#include "simdmaskarray.h"
42  
#include "utility.h"
43  
#include "interleave.h"
44  
#include "indexsequence.h"
45  
#include "transpose.h"
46  
#include "macros.h"
47  
48  
namespace Vc_VERSIONED_NAMESPACE
49  
{
50  
// internal namespace (product & sum helper) {{{1
51  
namespace internal
52  
{
53  
template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
54  
template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
55  
}  // namespace internal
56  
57  
// min & max declarations {{{1
58  
template <typename T, std::size_t N, typename V, std::size_t M>
59  
inline SimdArray<T, N, V, M> min(const SimdArray<T, N, V, M> &x,
60  
                                 const SimdArray<T, N, V, M> &y);
61  
template <typename T, std::size_t N, typename V, std::size_t M>
62  
inline SimdArray<T, N, V, M> max(const SimdArray<T, N, V, M> &x,
63  
                                 const SimdArray<T, N, V, M> &y);
64  
65  
// SimdArray class {{{1
66  
/// \addtogroup SimdArray
67  
/// @{
68  
69  
// atomic SimdArray {{{1
70  
#define Vc_CURRENT_CLASS_NAME SimdArray
71  
/**\internal
72  
 * Specialization of `SimdArray<T, N, VectorType, VectorSize>` for the case where `N ==
73  
 * VectorSize`.
74  
 *
75  
 * This is specialized for implementation purposes: Since the general implementation uses
76  
 * two SimdArray data members it recurses over different SimdArray instantiations. The
77  
 * recursion is ended by this specialization, which has a single \p VectorType_ data
78  
 * member to which all functions are forwarded more or less directly.
79  
 */
80  
template <typename T, std::size_t N, typename VectorType_>
81  
class SimdArray<T, N, VectorType_, N>
82  
{
83  
    static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
84  
                      std::is_same<T, int32_t>::value ||
85  
                      std::is_same<T, uint32_t>::value ||
86  
                      std::is_same<T, int16_t>::value ||
87  
                      std::is_same<T, uint16_t>::value,
88  
                  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
89  
                  "int16_t, uint16_t }");
90  
91  
public:
92  
    using VectorType = VectorType_;
93  
    using vector_type = VectorType;
94  
    using storage_type = vector_type;
95  
    using vectorentry_type = typename vector_type::VectorEntryType;
96  
    using value_type = T;
97  
    using mask_type = SimdMaskArray<T, N, vector_type>;
98  
    using index_type = SimdArray<int, N>;
99  
    static constexpr std::size_t size() { return N; }
100  
    using Mask = mask_type;
101  
    using MaskType = Mask;
102  
    using MaskArgument = const MaskType &;
103  
    using VectorEntryType = vectorentry_type;
104  
    using EntryType = value_type;
105  
    using IndexType = index_type;
106  
    using AsArg = const SimdArray &;
107  
    using reference = Detail::ElementReference<SimdArray>;
108  
    static constexpr std::size_t Size = size();
109  
    static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
110  
111  
    // zero init
112  
#ifndef Vc_MSVC  // bogus error C2580
113  
    Vc_INTRINSIC SimdArray() = default;
114  
#endif
115  
116  
    // default copy ctor/operator
117  
    Vc_INTRINSIC SimdArray(const SimdArray &) = default;
118  
    Vc_INTRINSIC SimdArray(SimdArray &&) = default;
119  
    Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
120  
121  
    // broadcast
122  
    Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
123  
    Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
124  
    Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
125  
    template <
126  
        typename U,
127  
        typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
128  
    Vc_INTRINSIC SimdArray(U a)
129  
        : SimdArray(static_cast<value_type>(a))
130  
    {
131  
    }
132  
133  
    // implicit casts
134  
    template <typename U, typename V>
135  
    Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, enable_if<N == V::Size> = nullarg)
136  
        : data(simd_cast<vector_type>(internal_data(x)))
137  
    {
138  
    }
139  
    template <typename U, typename V>
140  
    Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
141  
                            enable_if<(N > V::Size && N <= 2 * V::Size)> = nullarg)
142  
        : data(simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x))))
143  
    {
144  
    }
145  
    template <typename U, typename V>
146  
    Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
147  
                            enable_if<(N > 2 * V::Size && N <= 4 * V::Size)> = nullarg)
148  
        : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
149  
                                      internal_data(internal_data1(internal_data0(x))),
150  
                                      internal_data(internal_data0(internal_data1(x))),
151  
                                      internal_data(internal_data1(internal_data1(x)))))
152  
    {
153  
    }
154  
155  
    template <typename V, std::size_t Pieces, std::size_t Index>
156  
    Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
157  
        : data(simd_cast<vector_type, Index>(x.data))
158  
    {
159  
    }
160  
161  
    Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
162  
        : data(init.begin(), Vc::Unaligned)
163  
    {
164  
#if defined Vc_CXX14 && 0  // doesn't compile yet
165  
        static_assert(init.size() == size(), "The initializer_list argument to "
166  
                                             "SimdArray<T, N> must contain exactly N "
167  
                                             "values.");
168  
#else
169  
        Vc_ASSERT(init.size() == size());
170  
#endif
171  
    }
172  
173  
    // implicit conversion from underlying vector_type
174  
    template <
175  
        typename V,
176  
        typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
177  
    explicit Vc_INTRINSIC SimdArray(const V &x)
178  
        : data(simd_cast<vector_type>(x))
179  
    {
180  
    }
181  
182  
    // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
183  
    // T implicitly convertible to U
184  
    template <
185  
        typename U, typename A,
186  
        typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
187  
    Vc_INTRINSIC operator Vector<U, A>() const
188  
    {
189  
        return simd_cast<Vector<U, A>>(data);
190  
    }
191  
192  
#include "gatherinterface.h"
193  
#include "scatterinterface.h"
194  
195  
    // forward all remaining ctors
196  
    template <typename... Args,
197  
              typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
198  
                                   !Traits::is_gather_signature<Args...>::value &&
199  
                                   !Traits::is_initializer_list<Args...>::value>>
200  
    explicit Vc_INTRINSIC SimdArray(Args &&... args)
201  
        : data(std::forward<Args>(args)...)
202  
    {
203  
    }
204  
205  
    template <std::size_t Offset>
206  
    explicit Vc_INTRINSIC SimdArray(
207  
        Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
208  
        : data(Vc::IndexesFromZero)
209  
    {
210  
        data += value_type(Offset);
211  
    }
212  
213  
    Vc_INTRINSIC void setZero() { data.setZero(); }
214  
    Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
215  
    Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
216  
    Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
217  
218  
    Vc_INTRINSIC void setQnan() { data.setQnan(); }
219  
    Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
220  
221  
    // internal: execute specified Operation
222  
    template <typename Op, typename... Args>
223  
    static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
224  
    {
225  
        SimdArray r;
226  
        Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
227  
        return r;
228  
    }
229  
230  
    template <typename Op, typename... Args>
231  
    static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
232  
    {
233  
        Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
234  
    }
235  
236  
    static Vc_INTRINSIC SimdArray Zero()
237  
    {
238  
        return SimdArray(Vc::Zero);
239  
    }
240  
    static Vc_INTRINSIC SimdArray One()
241  
    {
242  
        return SimdArray(Vc::One);
243  
    }
244  
    static Vc_INTRINSIC SimdArray IndexesFromZero()
245  
    {
246  
        return SimdArray(Vc::IndexesFromZero);
247  
    }
248  
    static Vc_INTRINSIC SimdArray Random()
249  
    {
250  
        return fromOperation(Common::Operations::random());
251  
    }
252  
253  
    template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
254  
    {
255  
        data.load(std::forward<Args>(args)...);
256  
    }
257  
258  
    template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
259  
    {
260  
        data.store(std::forward<Args>(args)...);
261  
    }
262  
263  
    Vc_INTRINSIC mask_type operator!() const
264  
    {
265  
        return {!data};
266  
    }
267  
268  
    Vc_INTRINSIC SimdArray operator-() const
269  
    {
270  
        return {-data};
271  
    }
272  
273  
    /// Returns a copy of itself
274  
    Vc_INTRINSIC SimdArray operator+() const { return *this; }
275  
276  
    Vc_INTRINSIC SimdArray operator~() const
277  
    {
278  
        return {~data};
279  
    }
280  
281  
    template <typename U,
282  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
283  
    Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
284  
    {
285  
        return {data << x};
286  
    }
287  
    template <typename U,
288  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
289  
    Vc_INTRINSIC SimdArray &operator<<=(U x)
290  
    {
291  
        data <<= x;
292  
        return *this;
293  
    }
294  
    template <typename U,
295  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
296  
    Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
297  
    {
298  
        return {data >> x};
299  
    }
300  
    template <typename U,
301  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
302  
    Vc_INTRINSIC SimdArray &operator>>=(U x)
303  
    {
304  
        data >>= x;
305  
        return *this;
306  
    }
307  
308  
#define Vc_BINARY_OPERATOR_(op)                                                          \
309  
    Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const              \
310  
    {                                                                                    \
311  
        return {data op rhs.data};                                                       \
312  
    }                                                                                    \
313  
    Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs)                         \
314  
    {                                                                                    \
315  
        data op## = rhs.data;                                                            \
316  
        return *this;                                                                    \
317  
    }
318  
    Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
319  
    Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
320  
    Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
321  
#undef Vc_BINARY_OPERATOR_
322  
323  
#define Vc_COMPARES(op)                                                                  \
324  
    Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const                       \
325  
    {                                                                                    \
326  
        return {data op rhs.data};                                                       \
327  
    }
328  
    Vc_ALL_COMPARES(Vc_COMPARES);
329  
#undef Vc_COMPARES
330  
331  
    /// \copydoc Vector::isNegative
332  
    Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
333  
    {
334  
        return {isnegative(data)};
335  
    }
336  
337  
private:
338  
    friend reference;
339  
    Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
340  
    {
341  
        return o.data[i];
342  
    }
343  
    template <typename U>
344  
    Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
345  
        noexcept(std::declval<value_type &>() = v))
346  
    {
347  
        o.data[i] = v;
348  
    }
349  
350  
public:
351  
    Vc_INTRINSIC reference operator[](size_t i) noexcept
352  
    {
353  
        static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
354  
        return {*this, int(i)};
355  
    }
356  
    Vc_INTRINSIC value_type operator[](size_t i) const noexcept
357  
    {
358  
        return get(*this, int(i));
359  
    }
360  
361  
    Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
362  
    {
363  
        return {*this, k};
364  
    }
365  
366  
    Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
367  
    {
368  
        data.assign(v.data, internal_data(k));
369  
    }
370  
371  
    // reductions ////////////////////////////////////////////////////////
372  
#define Vc_REDUCTION_FUNCTION_(name_)                                                    \
373  
    Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); }               \
374  
    Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const                          \
375  
    {                                                                                    \
376  
        return data.name_(internal_data(mask));                                          \
377  
    }                                                                                    \
378  
    Vc_NOTHING_EXPECTING_SEMICOLON
379  
    Vc_REDUCTION_FUNCTION_(min);
380  
    Vc_REDUCTION_FUNCTION_(max);
381  
    Vc_REDUCTION_FUNCTION_(product);
382  
    Vc_REDUCTION_FUNCTION_(sum);
383  
#undef Vc_REDUCTION_FUNCTION_
384  
    Vc_INTRINSIC Vc_PURE SimdArray partialSum() const { return data.partialSum(); }
385  
386  
    template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const
387  
    {
388  
        return {data.apply(std::forward<F>(f))};
389  
    }
390  
    template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const
391  
    {
392  
        return {data.apply(std::forward<F>(f), k)};
393  
    }
394  
395  
    Vc_INTRINSIC SimdArray shifted(int amount) const
396  
    {
397  
        return {data.shifted(amount)};
398  
    }
399  
400  
    template <std::size_t NN>
401  
    Vc_INTRINSIC SimdArray shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
402  
        const
403  
    {
404  
        return {data.shifted(amount, simd_cast<VectorType>(shiftIn))};
405  
    }
406  
407  
    Vc_INTRINSIC SimdArray rotated(int amount) const
408  
    {
409  
        return {data.rotated(amount)};
410  
    }
411  
412  
    /// \copydoc Vector::exponent
413  
    Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
414  
    {
415  
        return {exponent(data)};
416  
    }
417  
418  
    Vc_INTRINSIC SimdArray interleaveLow(SimdArray x) const
419  
    {
420  
        return {data.interleaveLow(x.data)};
421  
    }
422  
    Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x) const
423  
    {
424  
        return {data.interleaveHigh(x.data)};
425  
    }
426  
427  
    Vc_INTRINSIC SimdArray reversed() const
428  
    {
429  
        return {data.reversed()};
430  
    }
431  
432  
    Vc_INTRINSIC SimdArray sorted() const
433  
    {
434  
        return {data.sorted()};
435  
    }
436  
437  
    template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen)
438  
    {
439  
        return {VectorType::generate(gen)};
440  
    }
441  
442  
    Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
443  
        copySign(const SimdArray &reference) const
444  
    {
445  
        return {Vc::copysign(data, reference.data)};
446  
    }
447  
448  
    friend VectorType &internal_data<>(SimdArray &x);
449  
    friend const VectorType &internal_data<>(const SimdArray &x);
450  
451  
    /// \internal
452  
    Vc_INTRINSIC SimdArray(VectorType &&x) : data(std::move(x)) {}
453  
454  
    Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
455  
456  
private:
457  
    // The alignas attribute attached to the class declaration above is ignored by ICC
458  
    // 17.0.0 (at least). So just move the alignas attribute down here where it works for
459  
    // all compilers.
460  
    alignas(static_cast<std::size_t>(
461  
        Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
462  
                                 VectorType_::size()>::value)) storage_type data;
463  
};
464  
template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
465  
template <typename T, std::size_t N, typename VectorType>
466  
constexpr std::size_t SimdArray<T, N, VectorType, N>::MemoryAlignment;
467  
template <typename T, std::size_t N, typename VectorType>
468  
#ifndef Vc_MSVC
469  
Vc_INTRINSIC
470  
#endif
471  
VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
472  
{
473  
    return x.data;
474  
}
475  
template <typename T, std::size_t N, typename VectorType>
476  
#ifndef Vc_MSVC
477  
Vc_INTRINSIC
478  
#endif
479  
const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
480  
{
481  
    return x.data;
482  
}
483  
484  
// unpackIfSegment {{{2
485  
template <typename T> T unpackIfSegment(T &&x) { return std::forward<T>(x); }
486  
template <typename T, size_t Pieces, size_t Index>
487  
auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray())
488  
{
489  
    return x.asSimdArray();
490  
}
491  
492  
// gatherImplementation {{{2
493  
template <typename T, std::size_t N, typename VectorType>
494  
template <typename MT, typename IT>
495  
inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem,
496  
                                                                 IT &&indexes)
497  
{
498  
    data.gather(mem, unpackIfSegment(std::forward<IT>(indexes)));
499  
}
500  
template <typename T, std::size_t N, typename VectorType>
501  
template <typename MT, typename IT>
502  
inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem,
503  
                                                                 IT &&indexes,
504  
                                                                 MaskArgument mask)
505  
{
506  
    data.gather(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
507  
}
508  
509  
// scatterImplementation {{{2
510  
template <typename T, std::size_t N, typename VectorType>
511  
template <typename MT, typename IT>
512  
inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
513  
                                                                 IT &&indexes) const
514  
{
515  
    data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)));
516  
}
517  
template <typename T, std::size_t N, typename VectorType>
518  
template <typename MT, typename IT>
519  
inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
520  
                                                                 IT &&indexes,
521  
                                                                 MaskArgument mask) const
522  
{
523  
    data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
524  
}
525  
526  
// generic SimdArray {{{1
527  
/**
528  
 * Data-parallel arithmetic type with user-defined number of elements.
529  
 *
530  
 * \tparam T The type of the vector's elements. The supported types currently are limited
531  
 *           to the types supported by Vc::Vector<T>.
532  
 *
533  
 * \tparam N The number of elements to store and process concurrently. You can choose an
534  
 *           arbitrary number, though not every number is a good idea.
535  
 *           Generally, a power of two value or the sum of two power of two values might
536  
 *           work efficiently, though this depends a lot on the target system.
537  
 *
538  
 * \tparam V Don't change the default value unless you really know what you are doing.
539  
 *           This type is set to the underlying native Vc::Vector type used in the
540  
 *           implementation of the type.
541  
 *           Having it as part of the type name guards against some cases of ODR
542  
 *           violations (i.e. linking incompatible translation units / libraries).
543  
 *
544  
 * \tparam Wt Don't ever change the default value.
545  
 *           This parameter is an unfortunate implementation detail shining through.
546  
 *
547  
 * \warning Choosing \p N too large (what “too large” means depends on the target) will
548  
 *          result in excessive compilation times and high (or too high) register
549  
 *          pressure, thus potentially negating the improvement from concurrent execution.
550  
 *          As a rule of thumb, keep \p N less or equal to `2 * float_v::size()`.
551  
 *
552  
 * \warning A special portability concern arises from a current limitation in the MIC
553  
 *          implementation (Intel Knights Corner), where SimdArray types with \p T = \p
554  
 *          (u)short require an \p N either less than short_v::size() or a multiple of
555  
 *          short_v::size().
556  
 *
557  
 * \headerfile simdarray.h <Vc/SimdArray>
558  
 */
559  
template <typename T, size_t N, typename V, size_t Wt> class SimdArray
560  
{
561  
    static_assert(std::is_same<T,   double>::value ||
562  
                  std::is_same<T,    float>::value ||
563  
                  std::is_same<T,  int32_t>::value ||
564  
                  std::is_same<T, uint32_t>::value ||
565  
                  std::is_same<T,  int16_t>::value ||
566  
                  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
567  
    static_assert(
568  
        // either the EntryType and VectorEntryType of the main V are equal
569  
        std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
570  
            // or N is a multiple of V::size()
571  
            (N % V::size() == 0),
572  
        "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
573  
        "MIC::(u)short_v::size(), i.e. k * 16.");
574  
575  
    using my_traits = SimdArrayTraits<T, N>;
576  
    static constexpr std::size_t N0 = my_traits::N0;
577  
    static constexpr std::size_t N1 = my_traits::N1;
578  
    using Split = Common::Split<N0>;
579  
    template <typename U, std::size_t K> using CArray = U[K];
580  
581  
public:
582  
    using storage_type0 = typename my_traits::storage_type0;
583  
    using storage_type1 = typename my_traits::storage_type1;
584  
    static_assert(storage_type0::size() == N0, "");
585  
586  
    /**\internal
587  
     * This type reveals the implementation-specific type used for the data member.
588  
     */
589  
    using vector_type = V;
590  
    using vectorentry_type = typename storage_type0::vectorentry_type;
591  
    typedef vectorentry_type alias_type Vc_MAY_ALIAS;
592  
593  
    /// The type of the elements (i.e.\ \p T)
594  
    using value_type = T;
595  
596  
    /// The type of the mask used for masked operations and returned from comparisons.
597  
    using mask_type = SimdMaskArray<T, N, vector_type>;
598  
599  
    /// The type of the vector used for indexes in gather and scatter operations.
600  
    using index_type = SimdArray<int, N>;
601  
602  
    /**
603  
     * Returns \p N, the number of scalar components in an object of this type.
604  
     *
605  
     * The size of the SimdArray, i.e. the number of scalar elements in the vector. In
606  
     * contrast to Vector::size() you have control over this value via the \p N template
607  
     * parameter of the SimdArray class template.
608  
     *
609  
     * \returns The number of scalar values stored and manipulated concurrently by objects
610  
     * of this type.
611  
     */
612  
    static constexpr std::size_t size() { return N; }
613  
614  
    /// \copydoc mask_type
615  
    using Mask = mask_type;
616  
    /// \copydoc mask_type
617  
    using MaskType = Mask;
618  
    using MaskArgument = const MaskType &;
619  
    using VectorEntryType = vectorentry_type;
620  
    /// \copydoc value_type
621  
    using EntryType = value_type;
622  
    /// \copydoc index_type
623  
    using IndexType = index_type;
624  
    using AsArg = const SimdArray &;
625  
626  
    using reference = Detail::ElementReference<SimdArray>;
627  
628  
    ///\copydoc Vector::MemoryAlignment
629  
    static constexpr std::size_t MemoryAlignment =
630  
        storage_type0::MemoryAlignment > storage_type1::MemoryAlignment
631  
            ? storage_type0::MemoryAlignment
632  
            : storage_type1::MemoryAlignment;
633  
634  
    /// \name Generators
635  
    ///@{
636  
637  
    ///\copybrief Vector::Zero
638  
    static Vc_INTRINSIC SimdArray Zero()
639  
    {
640  
        return SimdArray(Vc::Zero);
641  
    }
642  
643  
    ///\copybrief Vector::One
644  
    static Vc_INTRINSIC SimdArray One()
645  
    {
646  
        return SimdArray(Vc::One);
647  
    }
648  
649  
    ///\copybrief Vector::IndexesFromZero
650  
    static Vc_INTRINSIC SimdArray IndexesFromZero()
651  
    {
652  
        return SimdArray(Vc::IndexesFromZero);
653  
    }
654  
655  
    ///\copydoc Vector::Random
656  
    static Vc_INTRINSIC SimdArray Random()
657  
    {
658  
        return fromOperation(Common::Operations::random());
659  
    }
660  
661  
    ///\copybrief Vector::generate
662  
    template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) // {{{2
663  
    {
664  
        auto tmp = storage_type0::generate(gen);  // GCC bug: the order of evaluation in
665  
                                                  // an initializer list is well-defined
666  
                                                  // (front to back), but GCC 4.8 doesn't
667  
                                                  // implement this correctly. Therefore
668  
                                                  // we enforce correct order.
669  
        return {std::move(tmp),
670  
                storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
671  
    }
672  
    ///@}
673  
674  
    /// \name Compile-Time Constant Initialization
675  
    ///@{
676  
677  
    ///\copydoc Vector::Vector()
678  
#ifndef Vc_MSVC  // bogus error C2580
679  
    SimdArray() = default;
680  
#endif
681  
    ///@}
682  
683  
    /// \name Conversion/Broadcast Constructors
684  
    ///@{
685  
686  
    ///\copydoc Vector::Vector(EntryType)
687  
    Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
688  
    template <
689  
        typename U,
690  
        typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
691  
    SimdArray(U a)
692  
        : SimdArray(static_cast<value_type>(a))
693  
    {
694  
    }
695  
    ///@}
696  
697  
    // default copy ctor/operator
698  
    SimdArray(const SimdArray &) = default;
699  
    SimdArray(SimdArray &&) = default;
700  
    SimdArray &operator=(const SimdArray &) = default;
701  
702  
    // load ctor
703  
    template <typename U,
704  
              typename Flags = DefaultLoadTag,
705  
              typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
706  
    explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
707  
        : data0(mem, f), data1(mem + storage_type0::size(), f)
708  
    {
709  
    }
710  
711  
// MSVC does overload resolution differently and takes the const U *mem overload (I hope)
712  
#ifndef Vc_MSVC
713  
    /**\internal
714  
     * Load from a C-array. This is basically the same function as the load constructor
715  
     * above, except that the forwarding reference overload would steal the deal and the
716  
     * constructor above doesn't get called. This overload is required to enable loads
717  
     * from C-arrays.
718  
     */
719  
    template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
720  
              typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
721  
    explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
722  
        : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
723  
    {
724  
    }
725  
    /**\internal
726  
     * Const overload of the above.
727  
     */
728  
    template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
729  
              typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
730  
    explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
731  
        : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
732  
    {
733  
    }
734  
#endif
735  
736  
    // initializer list
737  
    Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
738  
        : data0(init.begin(), Vc::Unaligned)
739  
        , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
740  
    {
741  
#if defined Vc_CXX14 && 0  // doesn't compile yet
742  
        static_assert(init.size() == size(), "The initializer_list argument to "
743  
                                             "SimdArray<T, N> must contain exactly N "
744  
                                             "values.");
745  
#else
746  
        Vc_ASSERT(init.size() == size());
747  
#endif
748  
    }
749  
750  
#include "gatherinterface.h"
751  
#include "scatterinterface.h"
752  
753  
    // forward all remaining ctors
754  
    template <typename... Args,
755  
              typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
756  
                                   !Traits::is_initializer_list<Args...>::value &&
757  
                                   !Traits::is_gather_signature<Args...>::value &&
758  
                                   !Traits::is_load_arguments<Args...>::value>>
759  
    explicit Vc_INTRINSIC SimdArray(Args &&... args)
760  
        : data0(Split::lo(args)...)  // no forward here - it could move and thus
761  
                                     // break the next line
762  
        , data1(Split::hi(std::forward<Args>(args))...)
763  
    {
764  
    }
765  
766  
    // explicit casts
767  
    template <typename W>
768  
    Vc_INTRINSIC explicit SimdArray(
769  
        W &&x,
770  
        enable_if<(Traits::is_simd_vector<W>::value && Traits::simd_vector_size<W>::value == N &&
771  
                   !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
772  
                     Traits::isSimdArray<W>::value))> = nullarg)
773  
        : data0(Split::lo(x)), data1(Split::hi(x))
774  
    {
775  
    }
776  
777  
    // implicit casts
778  
    template <typename W>
779  
    Vc_INTRINSIC SimdArray(
780  
        W &&x,
781  
        enable_if<(Traits::isSimdArray<W>::value && Traits::simd_vector_size<W>::value == N &&
782  
                   std::is_convertible<Traits::entry_type_of<W>, T>::value)> = nullarg)
783  
        : data0(Split::lo(x)), data1(Split::hi(x))
784  
    {
785  
    }
786  
787  
    // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
788  
    // T implicitly convertible to U
789  
    template <
790  
        typename U, typename A,
791  
        typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
792  
    operator Vector<U, A>() const
793  
    {
794  
        return simd_cast<Vector<U, A>>(data0, data1);
795  
    }
796  
797  
    //////////////////// other functions ///////////////
798  
799  
    Vc_INTRINSIC void setZero()
800  
    {
801  
        data0.setZero();
802  
        data1.setZero();
803  
    }
804  
    Vc_INTRINSIC void setZero(const mask_type &k)
805  
    {
806  
        data0.setZero(Split::lo(k));
807  
        data1.setZero(Split::hi(k));
808  
    }
809  
    Vc_INTRINSIC void setZeroInverted()
810  
    {
811  
        data0.setZeroInverted();
812  
        data1.setZeroInverted();
813  
    }
814  
    Vc_INTRINSIC void setZeroInverted(const mask_type &k)
815  
    {
816  
        data0.setZeroInverted(Split::lo(k));
817  
        data1.setZeroInverted(Split::hi(k));
818  
    }
819  
820  
821  
    Vc_INTRINSIC void setQnan() {
822  
        data0.setQnan();
823  
        data1.setQnan();
824  
    }
825  
    Vc_INTRINSIC void setQnan(const mask_type &m) {
826  
        data0.setQnan(Split::lo(m));
827  
        data1.setQnan(Split::hi(m));
828  
    }
829  
830  
    ///\internal execute specified Operation
831  
    template <typename Op, typename... Args>
832  
    static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
833  
    {
834  
        SimdArray r = {
835  
            storage_type0::fromOperation(op, Split::lo(args)...),  // no forward here - it
836  
                                                                   // could move and thus
837  
                                                                   // break the next line
838  
            storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
839  
        return r;
840  
    }
841  
842  
    ///\internal
843  
    template <typename Op, typename... Args>
844  
    static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
845  
    {
846  
        storage_type0::callOperation(op, Split::lo(args)...);
847  
        storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
848  
    }
849  
850  
851  
    template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
852  
    {
853  
        data0.load(mem, Split::lo(args)...);  // no forward here - it could move and thus
854  
                                              // break the next line
855  
        data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
856  
    }
857  
858  
    template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
859  
    {
860  
        data0.store(mem, Split::lo(args)...);  // no forward here - it could move and thus
861  
                                               // break the next line
862  
        data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
863  
    }
864  
865  
    Vc_INTRINSIC mask_type operator!() const
866  
    {
867  
        return {!data0, !data1};
868  
    }
869  
870  
    Vc_INTRINSIC SimdArray operator-() const
871  
    {
872  
        return {-data0, -data1};
873  
    }
874  
875  
    /// Returns a copy of itself
876  
    Vc_INTRINSIC SimdArray operator+() const { return *this; }
877  
878  
    Vc_INTRINSIC SimdArray operator~() const
879  
    {
880  
        return {~data0, ~data1};
881  
    }
882  
883  
    // left/right shift operators {{{2
884  
    template <typename U,
885  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
886  
    Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
887  
    {
888  
        return {data0 << x, data1 << x};
889  
    }
890  
    template <typename U,
891  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
892  
    Vc_INTRINSIC SimdArray &operator<<=(U x)
893  
    {
894  
        data0 <<= x;
895  
        data1 <<= x;
896  
        return *this;
897  
    }
898  
    template <typename U,
899  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
900  
    Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
901  
    {
902  
        return {data0 >> x, data1 >> x};
903  
    }
904  
    template <typename U,
905  
              typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
906  
    Vc_INTRINSIC SimdArray &operator>>=(U x)
907  
    {
908  
        data0 >>= x;
909  
        data1 >>= x;
910  
        return *this;
911  
    }
912  
913  
    // binary operators {{{2
914  
#define Vc_BINARY_OPERATOR_(op)                                                          \
915  
    Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const              \
916  
    {                                                                                    \
917  
        return {data0 op rhs.data0, data1 op rhs.data1};                                 \
918  
    }                                                                                    \
919  
    Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs)                         \
920  
    {                                                                                    \
921  
        data0 op## = rhs.data0;                                                          \
922  
        data1 op## = rhs.data1;                                                          \
923  
        return *this;                                                                    \
924  
    }
925  
    Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
926  
    Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
927  
    Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
928  
#undef Vc_BINARY_OPERATOR_
929  
930  
#define Vc_COMPARES(op)                                                                  \
931  
    Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const                       \
932  
    {                                                                                    \
933  
        return {data0 op rhs.data0, data1 op rhs.data1};                                 \
934  
    }
935  
    Vc_ALL_COMPARES(Vc_COMPARES);
936  
#undef Vc_COMPARES
937  
938  
    // operator[] {{{2
939  
    /// \name Scalar Subscript Operators
940  
    ///@{
941  
942  
private:
943  
    friend reference;
944  
    Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
945  
    {
946  
        return reinterpret_cast<const alias_type *>(&o)[i];
947  
    }
948  
    template <typename U>
949  
    Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
950  
        noexcept(std::declval<value_type &>() = v))
951  
    {
952 2.3%
        reinterpret_cast<alias_type *>(&o)[i] = v;
953  
    }
954  
955  
public:
956  
    ///\copydoc Vector::operator[](size_t)
957  
    Vc_INTRINSIC reference operator[](size_t i) noexcept
958  
    {
959  
        static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
960  
        return {*this, int(i)};
961  
    }
962  
963  
    ///\copydoc Vector::operator[](size_t) const
964  
    Vc_INTRINSIC value_type operator[](size_t index) const noexcept
965  
    {
966  
        return get(*this, int(index));
967  
    }
968  
    ///@}
969  
970  
    // operator(){{{2
971  
    ///\copydoc Vector::operator()(MaskType)
972  
    Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
973  
        const mask_type &mask)
974  
    {
975  
        return {*this, mask};
976  
    }
977  
978  
    ///\internal
979  
    Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
980  
    {
981  
        data0.assign(v.data0, internal_data0(k));
982  
        data1.assign(v.data1, internal_data1(k));
983  
    }
984  
985  
    // reductions {{{2
986  
#define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_)                          \
987  
private:                                                                                 \
988  
    template <typename ForSfinae = void>                                                 \
989  
    Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value &&                       \
990  
                               storage_type0::Size == storage_type1::Size,           \
991  
                           value_type> name_##_impl() const                              \
992  
    {                                                                                    \
993  
        return binary_fun_(data0, data1).name_();                                        \
994  
    }                                                                                    \
995  
                                                                                         \
996  
    template <typename ForSfinae = void>                                                 \
997  
    Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value &&                       \
998  
                               storage_type0::Size != storage_type1::Size,           \
999  
                           value_type> name_##_impl() const                              \
1000  
    {                                                                                    \
1001  
        return scalar_fun_(data0.name_(), data1.name_());                                \
1002  
    }                                                                                    \
1003  
                                                                                         \
1004  
public:                                                                                  \
1005  
    /**\copybrief Vector::##name_ */                                                     \
1006  
    Vc_INTRINSIC value_type name_() const { return name_##_impl(); }                     \
1007  
    /**\copybrief Vector::##name_ */                                                     \
1008  
    Vc_INTRINSIC value_type name_(const mask_type &mask) const                           \
1009  
    {                                                                                    \
1010  
        if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) {                                 \
1011  
            return data1.name_(Split::hi(mask));                                         \
1012  
        } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) {                          \
1013  
            return data0.name_(Split::lo(mask));                                         \
1014  
        } else {                                                                         \
1015  
            return scalar_fun_(data0.name_(Split::lo(mask)),                             \
1016  
                               data1.name_(Split::hi(mask)));                            \
1017  
        }                                                                                \
1018  
    }                                                                                    \
1019  
    Vc_NOTHING_EXPECTING_SEMICOLON
1020  
    Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1021  
    Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1022  
    Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1023  
    Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1024  
#undef Vc_REDUCTION_FUNCTION_
1025  
    ///\copybrief Vector::partialSum
1026  
    Vc_INTRINSIC Vc_PURE SimdArray partialSum() const //{{{2
1027  
    {
1028  
        auto ps0 = data0.partialSum();
1029  
        auto tmp = data1;
1030  
        tmp[0] += ps0[data0.size() - 1];
1031  
        return {std::move(ps0), tmp.partialSum()};
1032  
    }
1033  
1034  
    // apply {{{2
1035  
    ///\copybrief Vector::apply(F &&) const
1036  
    template <typename F> inline SimdArray apply(F &&f) const
1037  
    {
1038  
        return {data0.apply(f), data1.apply(f)};
1039  
    }
1040  
    ///\copybrief Vector::apply(F &&, MaskType) const
1041  
    template <typename F> inline SimdArray apply(F &&f, const mask_type &k) const
1042  
    {
1043  
        return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1044  
    }
1045  
1046  
    // shifted {{{2
1047  
    ///\copybrief Vector::shifted(int) const
1048  
    inline SimdArray shifted(int amount) const
1049  
    {
1050  
        constexpr int SSize = Size;
1051  
        constexpr int SSize0 = storage_type0::Size;
1052  
        constexpr int SSize1 = storage_type1::Size;
1053  
        if (amount == 0) {
1054  
            return *this;
1055  
        }
1056  
        if (amount < 0) {
1057  
            if (amount > -SSize0) {
1058  
                return {data0.shifted(amount), data1.shifted(amount, data0)};
1059  
            }
1060  
            if (amount == -SSize0) {
1061  
                return {storage_type0::Zero(), simd_cast<storage_type1>(data0)};
1062  
            }
1063  
            if (amount < -SSize0) {
1064  
                return {storage_type0::Zero(), simd_cast<storage_type1>(data0.shifted(
1065  
                                                   amount + SSize0))};
1066  
            }
1067  
            return Zero();
1068  
        } else {
1069  
            if (amount >= SSize) {
1070  
                return Zero();
1071  
            } else if (amount >= SSize0) {
1072  
                return {
1073  
                    simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1074  
                    storage_type1::Zero()};
1075  
            } else if (amount >= SSize1) {
1076  
                return {data0.shifted(amount, data1), storage_type1::Zero()};
1077  
            } else {
1078  
                return {data0.shifted(amount, data1), data1.shifted(amount)};
1079  
            }
1080  
        }
1081  
    }
1082  
1083  
    template <std::size_t NN>
1084  
    inline enable_if<
1085  
        !(std::is_same<storage_type0, storage_type1>::value &&  // not bisectable
1086  
          N == NN),
1087  
        SimdArray>
1088  
        shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1089  
    {
1090  
        constexpr int SSize = Size;
1091  
        if (amount < 0) {
1092  
            return SimdArray::generate([&](int i) -> value_type {
1093  
                i += amount;
1094  
                if (i >= 0) {
1095  
                    return operator[](i);
1096  
                } else if (i >= -SSize) {
1097  
                    return shiftIn[i + SSize];
1098  
                }
1099  
                return 0;
1100  
            });
1101  
        }
1102  
        return SimdArray::generate([&](int i) -> value_type {
1103  
            i += amount;
1104  
            if (i < SSize) {
1105  
                return operator[](i);
1106  
            } else if (i < 2 * SSize) {
1107  
                return shiftIn[i - SSize];
1108  
            }
1109  
            return 0;
1110  
        });
1111  
    }
1112  
1113  
private:
1114  
    // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1115  
    // expression directly in the enable_if below
1116  
    template <std::size_t NN> struct bisectable_shift
1117  
        : public std::integral_constant<bool,
1118  
                                        std::is_same<storage_type0, storage_type1>::value &&  // bisectable
1119  
                                        N == NN>
1120  
    {
1121  
    };
1122  
1123  
public:
1124  
    template <std::size_t NN>
1125  
    inline SimdArray shifted(enable_if<bisectable_shift<NN>::value, int> amount,
1126  
            const SimdArray<value_type, NN> &shiftIn) const
1127  
    {
1128  
        constexpr int SSize = Size;
1129  
        if (amount < 0) {
1130  
            if (amount > -static_cast<int>(storage_type0::Size)) {
1131  
                return {data0.shifted(amount, internal_data1(shiftIn)),
1132  
                        data1.shifted(amount, data0)};
1133  
            }
1134  
            if (amount == -static_cast<int>(storage_type0::Size)) {
1135  
                return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1136  
            }
1137  
            if (amount > -SSize) {
1138  
                return {
1139  
                    internal_data1(shiftIn)
1140  
                        .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1141  
                    data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1142  
            }
1143  
            if (amount == -SSize) {
1144  
                return shiftIn;
1145  
            }
1146  
            if (amount > -2 * SSize) {
1147  
                return shiftIn.shifted(amount + SSize);
1148  
            }
1149  
        }
1150  
        if (amount == 0) {
1151  
            return *this;
1152  
        }
1153  
        if (amount < static_cast<int>(storage_type0::Size)) {
1154  
            return {data0.shifted(amount, data1),
1155  
                    data1.shifted(amount, internal_data0(shiftIn))};
1156  
        }
1157  
        if (amount == static_cast<int>(storage_type0::Size)) {
1158  
            return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1159  
        }
1160  
        if (amount < SSize) {
1161  
            return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1162  
                    internal_data0(shiftIn)
1163  
                        .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1164  
        }
1165  
        if (amount == SSize) {
1166  
            return shiftIn;
1167  
        }
1168  
        if (amount < 2 * SSize) {
1169  
            return shiftIn.shifted(amount - SSize);
1170  
        }
1171  
        return Zero();
1172  
    }
1173  
1174  
    // rotated {{{2
1175  
    ///\copybrief Vector::rotated
1176  
    Vc_INTRINSIC SimdArray rotated(int amount) const
1177  
    {
1178  
        amount %= int(size());
1179  
        if (amount == 0) {
1180  
            return *this;
1181  
        } else if (amount < 0) {
1182  
            amount += size();
1183  
        }
1184  
1185  
#ifdef Vc_MSVC
1186  
        // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1187  
        // ->
1188  
        // load to implement the function instead.
1189  
        alignas(MemoryAlignment) T tmp[N + data0.size()];
1190  
        data0.store(&tmp[0], Vc::Aligned);
1191  
        data1.store(&tmp[data0.size()], Vc::Aligned);
1192  
        data0.store(&tmp[N], Vc::Unaligned);
1193  
        SimdArray r;
1194  
        r.data0.load(&tmp[amount], Vc::Unaligned);
1195  
        r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1196  
        return r;
1197  
#else
1198  
        auto &&d0cvtd = simd_cast<storage_type1>(data0);
1199  
        auto &&d1cvtd = simd_cast<storage_type0>(data1);
1200  
        constexpr int size0 = storage_type0::size();
1201  
        constexpr int size1 = storage_type1::size();
1202  
1203  
        if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1204  
            return {std::move(d1cvtd), std::move(d0cvtd)};
1205  
        } else if (amount < size1) {
1206  
            return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1207  
        } else if (amount == size1) {
1208  
            return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1209  
        } else if (int(size()) - amount < size1) {
1210  
            return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1211  
                    data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1212  
        } else if (int(size()) - amount == size1) {
1213  
            return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1214  
                    simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1215  
        } else if (amount <= size0) {
1216  
            return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1217  
                    simd_cast<storage_type1>(data0.shifted(amount - size1))};
1218  
        } else {
1219  
            return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1220  
                    simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1221  
        }
1222  
        return *this;
1223  
#endif
1224  
    }
1225  
1226  
    // interleaveLow/-High {{{2
1227  
    ///\internal \copydoc Vector::interleaveLow
1228  
    Vc_INTRINSIC SimdArray interleaveLow(const SimdArray &x) const
1229  
    {
1230  
        // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1231  
        return {data0.interleaveLow(x.data0),
1232  
                simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1233  
    }
1234  
    ///\internal \copydoc Vector::interleaveHigh
1235  
    Vc_INTRINSIC SimdArray interleaveHigh(const SimdArray &x) const
1236  
    {
1237  
        return interleaveHighImpl(
1238  
            x,
1239  
            std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1240  
    }
1241  
1242  
private:
1243  
    ///\internal
1244  
    Vc_INTRINSIC SimdArray interleaveHighImpl(const SimdArray &x, std::true_type) const
1245  
    {
1246  
        return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1247  
    }
1248  
    ///\internal
1249  
    inline SimdArray interleaveHighImpl(const SimdArray &x, std::false_type) const
1250  
    {
1251  
        return {data0.interleaveHigh(x.data0)
1252  
                    .shifted(storage_type1::Size,
1253  
                             simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1254  
                data1.interleaveHigh(x.data1)};
1255  
    }
1256  
1257  
public:
1258  
    ///\copybrief Vector::reversed
1259  
    inline SimdArray reversed() const //{{{2
1260  
    {
1261  
        if (std::is_same<storage_type0, storage_type1>::value) {
1262  
            return {simd_cast<storage_type0>(data1).reversed(),
1263  
                    simd_cast<storage_type1>(data0).reversed()};
1264  
        } else {
1265  
#ifdef Vc_MSVC
1266  
            // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1267  
            // store
1268  
            // -> load to implement the function instead.
1269  
            alignas(MemoryAlignment) T tmp[N];
1270  
            data1.reversed().store(&tmp[0], Vc::Aligned);
1271  
            data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1272  
            return SimdArray{&tmp[0], Vc::Aligned};
1273  
#else
1274  
            return {data0.shifted(storage_type1::Size, data1).reversed(),
1275  
                    simd_cast<storage_type1>(data0.reversed().shifted(
1276  
                        storage_type0::Size - storage_type1::Size))};
1277  
#endif
1278  
        }
1279  
    }
1280  
    ///\copydoc Vector::sorted
1281  
    inline SimdArray sorted() const  //{{{2
1282  
    {
1283  
        return sortedImpl(
1284  
            std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1285  
    }
1286  
1287  
    ///\internal
1288  
    Vc_INTRINSIC SimdArray sortedImpl(std::true_type) const
1289  
    {
1290  
#ifdef Vc_DEBUG_SORTED
1291  
        std::cerr << "-- " << data0 << data1 << '\n';
1292  
#endif
1293  
        const auto a = data0.sorted();
1294  
        const auto b = data1.sorted().reversed();
1295  
        const auto lo = Vc::min(a, b);
1296  
        const auto hi = Vc::max(a, b);
1297  
        return {lo.sorted(), hi.sorted()};
1298  
    }
1299  
1300  
    ///\internal
1301  
    Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const
1302  
    {
1303  
        using SortableArray =
1304  
            SimdArray<value_type, Common::NextPowerOfTwo<size()>::value>;
1305  
        auto sortable = simd_cast<SortableArray>(*this);
1306  
        for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1307  
            using limits = std::numeric_limits<value_type>;
1308  
            if (limits::has_infinity) {
1309  
                sortable[i] = limits::infinity();
1310  
            } else {
1311  
                sortable[i] = std::numeric_limits<value_type>::max();
1312  
            }
1313  
        }
1314  
        return simd_cast<SimdArray>(sortable.sorted());
1315  
1316  
        /* The following implementation appears to be less efficient. But this may need further
1317  
         * work.
1318  
        const auto a = data0.sorted();
1319  
        const auto b = data1.sorted();
1320  
#ifdef Vc_DEBUG_SORTED
1321  
        std::cerr << "== " << a << b << '\n';
1322  
#endif
1323  
        auto aIt = Vc::begin(a);
1324  
        auto bIt = Vc::begin(b);
1325  
        const auto aEnd = Vc::end(a);
1326  
        const auto bEnd = Vc::end(b);
1327  
        return SimdArray::generate([&](std::size_t) {
1328  
            if (aIt == aEnd) {
1329  
                return *(bIt++);
1330  
            }
1331  
            if (bIt == bEnd) {
1332  
                return *(aIt++);
1333  
            }
1334  
            if (*aIt < *bIt) {
1335  
                return *(aIt++);
1336  
            } else {
1337  
                return *(bIt++);
1338  
            }
1339  
        });
1340  
        */
1341  
    }
1342  
1343  
    /// \name Deprecated Members
1344  
    ///@{
1345  
1346  
    ///\copydoc size
1347  
    ///\deprecated Use size() instead.
1348  
    static constexpr std::size_t Size = size();
1349  
1350  
    /// \copydoc Vector::exponent
1351  
    Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
1352  
    {
1353  
        return {exponent(data0), exponent(data1)};
1354  
    }
1355  
1356  
    /// \copydoc Vector::isNegative
1357  
    Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1358  
    {
1359  
        return {isnegative(data0), isnegative(data1)};
1360  
    }
1361  
1362  
    ///\copydoc Vector::copySign
1363  
    Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
1364  
        copySign(const SimdArray &reference) const
1365  
    {
1366  
        return {Vc::copysign(data0, reference.data0),
1367  
                Vc::copysign(data1, reference.data1)};
1368  
    }
1369  
    ///@}
1370  
1371  
    // internal_data0/1 {{{2
1372  
    friend storage_type0 &internal_data0<>(SimdArray &x);
1373  
    friend storage_type1 &internal_data1<>(SimdArray &x);
1374  
    friend const storage_type0 &internal_data0<>(const SimdArray &x);
1375  
    friend const storage_type1 &internal_data1<>(const SimdArray &x);
1376  
1377  
    /// \internal
1378  
    Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1379  
        : data0(std::move(x)), data1(std::move(y))
1380  
    {
1381  
    }
1382  
1383  
    Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1384  
1385  
private: //{{{2
1386  
    // The alignas attribute attached to the class declaration above is ignored by ICC
1387  
    // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1388  
    // all compilers.
1389  
    alignas(static_cast<std::size_t>(
1390  
        Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1391  
                                 V::size()>::value)) storage_type0 data0;
1392  
    storage_type1 data1;
1393  
};
1394  
#undef Vc_CURRENT_CLASS_NAME
1395  
template <typename T, std::size_t N, typename V, std::size_t M>
1396  
constexpr std::size_t SimdArray<T, N, V, M>::Size;
1397  
template <typename T, std::size_t N, typename V, std::size_t M>
1398  
constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1399  
1400  
// gatherImplementation {{{2
1401  
template <typename T, std::size_t N, typename VectorType, std::size_t M>
1402  
template <typename MT, typename IT>
1403  
inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem,
1404  
                                                                 IT &&indexes)
1405  
{
1406  
    data0.gather(mem, Split::lo(Common::Operations::gather(),
1407  
                                indexes));  // don't forward indexes - it could move and
1408  
                                            // thus break the next line
1409  
    data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1410  
}
1411  
template <typename T, std::size_t N, typename VectorType, std::size_t M>
1412  
template <typename MT, typename IT>
1413  
inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem,
1414  
                                                                 IT &&indexes, MaskArgument mask)
1415  
{
1416  
    data0.gather(mem, Split::lo(Common::Operations::gather(), indexes),
1417  
                 Split::lo(mask));  // don't forward indexes - it could move and
1418  
                                    // thus break the next line
1419  
    data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1420  
                 Split::hi(mask));
1421  
}
1422  
1423  
// scatterImplementation {{{2
1424  
template <typename T, std::size_t N, typename VectorType, std::size_t M>
1425  
template <typename MT, typename IT>
1426  
inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1427  
                                                                 IT &&indexes) const
1428  
{
1429  
    data0.scatter(mem, Split::lo(Common::Operations::gather(),
1430  
                                indexes));  // don't forward indexes - it could move and
1431  
                                            // thus break the next line
1432  
    data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1433  
}
1434  
template <typename T, std::size_t N, typename VectorType, std::size_t M>
1435  
template <typename MT, typename IT>
1436  
inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1437  
                                                                 IT &&indexes, MaskArgument mask) const
1438  
{
1439  
    data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1440  
                 Split::lo(mask));  // don't forward indexes - it could move and
1441  
                                    // thus break the next line
1442  
    data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1443  
                 Split::hi(mask));
1444  
}
1445  
1446  
// internal_data0/1 (SimdArray) {{{1
1447  
///\internal Returns the first data member of a generic SimdArray
1448  
template <typename T, std::size_t N, typename V, std::size_t M>
1449  
#ifndef Vc_MSVC
1450  
Vc_INTRINSIC
1451  
#endif
1452  
typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1453  
    SimdArray<T, N, V, M> &x)
1454  
{
1455  
    return x.data0;
1456  
}
1457  
///\internal Returns the second data member of a generic SimdArray
1458  
template <typename T, std::size_t N, typename V, std::size_t M>
1459  
#ifndef Vc_MSVC
1460  
Vc_INTRINSIC
1461  
#endif
1462  
typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1463  
    SimdArray<T, N, V, M> &x)
1464  
{
1465  
    return x.data1;
1466  
}
1467  
///\internal Returns the first data member of a generic SimdArray (const overload)
1468  
template <typename T, std::size_t N, typename V, std::size_t M>
1469  
#ifndef Vc_MSVC
1470  
Vc_INTRINSIC
1471  
#endif
1472  
const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1473  
    const SimdArray<T, N, V, M> &x)
1474  
{
1475  
    return x.data0;
1476  
}
1477  
///\internal Returns the second data member of a generic SimdArray (const overload)
1478  
template <typename T, std::size_t N, typename V, std::size_t M>
1479  
#ifndef Vc_MSVC
1480  
Vc_INTRINSIC
1481  
#endif
1482  
const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1483  
    const SimdArray<T, N, V, M> &x)
1484  
{
1485  
    return x.data1;
1486  
}
1487  
1488  
// MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1489  
// MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1490  
// in the body the bug is supressed.
1491  
#if defined Vc_MSVC && defined Vc_IMPL_SSE
1492  
template <>
1493  
Vc_INTRINSIC SimdArray<double, 8, SSE::Vector<double>, 2>::SimdArray(
1494  
    SimdArray<double, 4> &&x, SimdArray<double, 4> &&y)
1495  
    : data0(x), data1(0)
1496  
{
1497  
    data1 = y;
1498  
}
1499  
#endif
1500  
1501  
// binary operators {{{1
1502  
namespace result_vector_type_internal
1503  
{
1504  
template <typename T>
1505  
using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1506  
1507  
template <typename T>
1508  
using is_integer_larger_than_int = std::integral_constant<
1509  
    bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1510  
                                        std::is_same<T, long>::value ||
1511  
                                        std::is_same<T, unsigned long>::value)>;
1512  
1513  
template <
1514  
    typename L, typename R,
1515  
    std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1516  
                                                  : Traits::simd_vector_size<R>::value,
1517  
    bool =
1518  
        (Traits::isSimdArray<L>::value ||
1519  
         Traits::isSimdArray<R>::value)  // one of the operands must be a SimdArray
1520  
        && !std::is_same<type<L>, type<R>>::value  // if the operands are of the same type
1521  
                                                   // use the member function
1522  
        &&
1523  
        ((std::is_arithmetic<type<L>>::value &&
1524  
          !is_integer_larger_than_int<type<L>>::value) ||
1525  
         (std::is_arithmetic<type<R>>::value &&
1526  
          !is_integer_larger_than_int<type<R>>::value)  // one of the operands is a scalar
1527  
                                                        // type
1528  
         ||
1529  
         (  // or one of the operands is Vector<T> with Vector<T>::size() ==
1530  
            // SimdArray::size()
1531  
             Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value &&
1532  
             ((Traits::is_simd_vector<L>::value && !Traits::isSimdArray<L>::value) ||
1533  
              (Traits::is_simd_vector<R>::value && !Traits::isSimdArray<R>::value))))>
1534  
struct evaluate;
1535  
1536  
template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1537  
{
1538  
private:
1539  
    using LScalar = Traits::entry_type_of<L>;
1540  
    using RScalar = Traits::entry_type_of<R>;
1541  
1542  
    template <bool B, typename True, typename False>
1543  
    using conditional = typename std::conditional<B, True, False>::type;
1544  
1545  
public:
1546  
    // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1547  
    // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1548  
    // int are promoted to int before any operation). This would imply that SIMD types with integral
1549  
    // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1550  
    // start. Therefore we special-case those operations where the scalar type of both operands is
1551  
    // integral and smaller than int.
1552  
    // In addition to that there is no generic support for 64-bit int SIMD types. Therefore
1553  
    // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1554  
    // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1555  
    // then the operator is disabled altogether. We do not want an implicit demotion.
1556  
    using type = SimdArray<
1557  
        conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1558  
                     sizeof(LScalar) < sizeof(int) &&
1559  
                     sizeof(RScalar) < sizeof(int)),
1560  
                    conditional<(sizeof(LScalar) == sizeof(RScalar)),
1561  
                                conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1562  
                                conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1563  
                    decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1564  
        N>;
1565  
};
1566  
1567  
}  // namespace result_vector_type_internal
1568  
1569  
template <typename L, typename R>
1570  
using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1571  
1572  
static_assert(
1573  
    std::is_same<result_vector_type<short int, Vc::SimdArray<short unsigned int, 32ul>>,
1574  
                 Vc::SimdArray<short unsigned int, 32ul>>::value,
1575  
    "result_vector_type does not work");
1576  
1577  
#define Vc_BINARY_OPERATORS_(op_)                                                        \
1578  
    /*!\brief Applies op_ component-wise and concurrently.  */                           \
1579  
    template <typename L, typename R>                                                    \
1580  
    Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs)                 \
1581  
    {                                                                                    \
1582  
        using Return = result_vector_type<L, R>;                                         \
1583  
        return Return(std::forward<L>(lhs)) op_ Return(std::forward<R>(rhs));            \
1584  
    }
1585  
/**
1586  
 * \name Arithmetic and Bitwise Operators
1587  
 *
1588  
 * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns
1589  
 * a new SimdArray object containing the result values.
1590  
 *
1591  
 * This operator only participates in overload resolution if:
1592  
 * \li At least one of the template parameters \p L or \p R is a SimdArray type.
1593  
 * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type
1594  
 *     larger than \c int \n
1595  
 *     or \n
1596  
 *     \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() ==
1597  
 *     SimdArray::size()).
1598  
 *
1599  
 * The return type of the operator is a SimdArray type using the more precise EntryType of
1600  
 * \p L or \p R and the same number of elements as the SimdArray argument(s).
1601  
 */
1602  
///@{
1603  
Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1604  
Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1605  
///@}
1606  
#undef Vc_BINARY_OPERATORS_
1607  
#define Vc_BINARY_OPERATORS_(op_)                                                        \
1608  
    /*!\brief Applies op_ component-wise and concurrently.  */                           \
1609  
    template <typename L, typename R>                                                    \
1610  
    Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs,      \
1611  
                                                                           R &&rhs)      \
1612  
    {                                                                                    \
1613  
        using Promote = result_vector_type<L, R>;                                        \
1614  
        return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs));          \
1615  
    }
1616  
/**
1617  
 * \name Compare Operators
1618  
 *
1619  
 * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns
1620  
 * a new SimdMaskArray object containing the result values.
1621  
 *
1622  
 * This operator only participates in overload resolution if (same rules as above):
1623  
 * \li At least one of the template parameters \p L or \p R is a SimdArray type.
1624  
 * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type
1625  
 *     larger than \c int \n
1626  
 *     or \n
1627  
 *     \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() ==
1628  
 *     SimdArray::size()).
1629  
 *
1630  
 * The return type of the operator is a SimdMaskArray type using the more precise EntryType of
1631  
 * \p L or \p R and the same number of elements as the SimdArray argument(s).
1632  
 */
1633  
///@{
1634  
Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1635  
///@}
1636  
#undef Vc_BINARY_OPERATORS_
1637  
1638  
// math functions {{{1
1639  
#define Vc_FORWARD_UNARY_OPERATOR(name_)                                                 \
1640  
    /*!\brief Applies the std::name_ function component-wise and concurrently. */        \
1641  
    template <typename T, std::size_t N, typename V, std::size_t M>                      \
1642  
    inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x)                   \
1643  
    {                                                                                    \
1644  
        return SimdArray<T, N, V, M>::fromOperation(                                     \
1645  
            Common::Operations::Forward_##name_(), x);                                   \
1646  
    }                                                                                    \
1647  
    Vc_NOTHING_EXPECTING_SEMICOLON
1648  
1649  
#define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_)                                            \
1650  
    /*!\brief Applies the std::name_ function component-wise and concurrently. */        \
1651  
    template <typename T, std::size_t N, typename V, std::size_t M>                      \
1652  
    inline SimdMaskArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x)               \
1653  
    {                                                                                    \
1654  
        return SimdMaskArray<T, N, V, M>::fromOperation(                                 \
1655  
            Common::Operations::Forward_##name_(), x);                                   \
1656  
    }                                                                                    \
1657  
    Vc_NOTHING_EXPECTING_SEMICOLON
1658  
1659  
#define Vc_FORWARD_BINARY_OPERATOR(name_)                                                \
1660  
    /*!\brief Applies the std::name_ function component-wise and concurrently. */        \
1661  
    template <typename T, std::size_t N, typename V, std::size_t M>                      \
1662  
    inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x,                   \
1663  
                                       const SimdArray<T, N, V, M> &y)                   \
1664  
    {                                                                                    \
1665  
        return SimdArray<T, N, V, M>::fromOperation(                                     \
1666  
            Common::Operations::Forward_##name_(), x, y);                                \
1667  
    }                                                                                    \
1668  
    Vc_NOTHING_EXPECTING_SEMICOLON
1669  
1670  
/**
1671  
 * \name Math functions
1672  
 * These functions evaluate the 
1673  
 */
1674  
///@{
1675  
Vc_FORWARD_UNARY_OPERATOR(abs);
1676  
Vc_FORWARD_UNARY_OPERATOR(asin);
1677  
Vc_FORWARD_UNARY_OPERATOR(atan);
1678  
Vc_FORWARD_BINARY_OPERATOR(atan2);
1679  
Vc_FORWARD_UNARY_OPERATOR(ceil);
1680  
Vc_FORWARD_BINARY_OPERATOR(copysign);
1681  
Vc_FORWARD_UNARY_OPERATOR(cos);
1682  
Vc_FORWARD_UNARY_OPERATOR(exp);
1683  
Vc_FORWARD_UNARY_OPERATOR(exponent);
1684  
Vc_FORWARD_UNARY_OPERATOR(floor);
1685  
/// Applies the std::fma function component-wise and concurrently.
1686  
template <typename T, std::size_t N>
1687  
inline SimdArray<T, N> fma(const SimdArray<T, N> &a, const SimdArray<T, N> &b,
1688  
                           const SimdArray<T, N> &c)
1689  
{
1690  
    return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1691  
}
1692  
Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1693  
Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1694  
Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1695  
#if defined Vc_MSVC && defined Vc_IMPL_SSE
1696  
inline SimdMaskArray<double, 8, SSE::Vector<double>, 2> isnan(
1697  
    const SimdArray<double, 8, SSE::Vector<double>, 2> &x)
1698  
{
1699  
    using V = SSE::Vector<double>;
1700  
    const SimdArray<double, 4, V, 2> &x0 = internal_data0(x);
1701  
    const SimdArray<double, 4, V, 2> &x1 = internal_data1(x);
1702  
    SimdMaskArray<double, 4, V, 2> r0;
1703  
    SimdMaskArray<double, 4, V, 2> r1;
1704  
    internal_data(internal_data0(r0)) = isnan(internal_data(internal_data0(x0)));
1705  
    internal_data(internal_data1(r0)) = isnan(internal_data(internal_data1(x0)));
1706  
    internal_data(internal_data0(r1)) = isnan(internal_data(internal_data0(x1)));
1707  
    internal_data(internal_data1(r1)) = isnan(internal_data(internal_data1(x1)));
1708  
    return {std::move(r0), std::move(r1)};
1709  
}
1710  
#endif
1711  
Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1712  
/// Applies the std::frexp function component-wise and concurrently.
1713  
template <typename T, std::size_t N>
1714  
inline SimdArray<T, N> frexp(const SimdArray<T, N> &x, SimdArray<int, N> *e)
1715  
{
1716  
    return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1717  
}
1718  
/// Applies the std::ldexp function component-wise and concurrently.
1719  
template <typename T, std::size_t N>
1720  
inline SimdArray<T, N> ldexp(const SimdArray<T, N> &x, const SimdArray<int, N> &e)
1721  
{
1722  
    return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1723  
}
1724  
Vc_FORWARD_UNARY_OPERATOR(log);
1725  
Vc_FORWARD_UNARY_OPERATOR(log10);
1726  
Vc_FORWARD_UNARY_OPERATOR(log2);
1727  
Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1728  
Vc_FORWARD_UNARY_OPERATOR(round);
1729  
Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1730  
Vc_FORWARD_UNARY_OPERATOR(sin);
1731  
/// Determines sine and cosine concurrently and component-wise on \p x.
1732  
template <typename T, std::size_t N>
1733  
void sincos(const SimdArray<T, N> &x, SimdArray<T, N> *sin, SimdArray<T, N> *cos)
1734  
{
1735  
    SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1736  
}
1737  
Vc_FORWARD_UNARY_OPERATOR(sqrt);
1738  
Vc_FORWARD_UNARY_OPERATOR(trunc);
1739  
Vc_FORWARD_BINARY_OPERATOR(min);
1740  
Vc_FORWARD_BINARY_OPERATOR(max);
1741  
///@}
1742  
#undef Vc_FORWARD_UNARY_OPERATOR
1743  
#undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1744  
#undef Vc_FORWARD_BINARY_OPERATOR
1745  
1746  
// simd_cast {{{1
1747  
#ifdef Vc_MSVC
1748  
#define Vc_DUMMY_ARG0 , int = 0
1749  
#define Vc_DUMMY_ARG1 , long = 0
1750  
#define Vc_DUMMY_ARG2 , short = 0
1751  
#define Vc_DUMMY_ARG3 , char = '0'
1752  
#define Vc_DUMMY_ARG4 , unsigned = 0u
1753  
#define Vc_DUMMY_ARG5 , unsigned short = 0u
1754  
#else
1755  
#define Vc_DUMMY_ARG0
1756  
#define Vc_DUMMY_ARG1
1757  
#define Vc_DUMMY_ARG2
1758  
#define Vc_DUMMY_ARG3
1759  
#define Vc_DUMMY_ARG4
1760  
#define Vc_DUMMY_ARG5
1761  
#endif  // Vc_MSVC
1762  
1763  
// simd_cast_impl_smaller_input {{{2
1764  
// The following function can be implemented without the sizeof...(From) overload.
1765  
// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1766  
// function in two works around the issue.
1767  
template <typename Return, std::size_t N, typename T, typename... From>
1768  
Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1769  
simd_cast_impl_smaller_input(const From &... xs, const T &last)
1770  
{
1771  
    Return r = simd_cast<Return>(xs...);
1772  
    for (size_t i = 0; i < N; ++i) {
1773  
        r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1774  
    }
1775  
    return r;
1776  
}
1777  
template <typename Return, std::size_t N, typename T>
1778  
Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1779  
{
1780  
    Return r = Return();
1781  
    for (size_t i = 0; i < N; ++i) {
1782  
        r[i] = static_cast<typename Return::EntryType>(last[i]);
1783  
    }
1784  
    return r;
1785  
}
1786  
template <typename Return, std::size_t N, typename T, typename... From>
1787  
Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1788  
    const From &... xs, const T &last)
1789  
{
1790  
    Return r = simd_cast<Return>(xs...);
1791  
    for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1792  
        r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1793  
    }
1794  
    return r;
1795  
}
1796  
template <typename Return, std::size_t N, typename T>
1797  
Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1798  
{
1799  
    Return r = Return();
1800  
    for (size_t i = 0; i < Return::size(); ++i) {
1801  
        r[i] = static_cast<typename Return::EntryType>(last[i]);
1802  
    }
1803  
    return r;
1804  
}
1805  
1806  
// simd_cast_without_last (declaration) {{{2
1807  
template <typename Return, typename T, typename... From>
1808  
Vc_INTRINSIC_L Vc_CONST_L Return
1809  
    simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1810  
1811  
// are_all_types_equal {{{2
1812  
template <typename... Ts> struct are_all_types_equal;
1813  
template <typename T>
1814  
struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1815  
{
1816  
};
1817  
template <typename T0, typename T1, typename... Ts>
1818  
struct are_all_types_equal<T0, T1, Ts...>
1819  
    : public std::integral_constant<
1820  
          bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1821  
{
1822  
};
1823  
1824  
// simd_cast_interleaved_argument_order (declarations) {{{2
1825  
/*! \internal
1826  
  The need for simd_cast_interleaved_argument_order stems from a shortcoming in pack
1827  
  expansion of variadic templates in C++. For a simd_cast with SimdArray arguments that
1828  
  are bisectable (i.e.  \c storage_type0 and \c storage_type1 are equal) the generic
1829  
  implementation needs to forward to a simd_cast of the \c internal_data0 and \c
1830  
  internal_data1 of the arguments. But the required order of arguments is
1831  
  `internal_data0(arg0), internal_data1(arg0), internal_data0(arg1), ...`. This is
1832  
  impossible to achieve with pack expansion. It is only possible to write
1833  
  `internal_data0(args)..., internal_data1(args)...` and thus have the argument order
1834  
  mixed up. The simd_cast_interleaved_argument_order “simply” calls simd_cast with the
1835  
  arguments correctly reordered (i.e. interleaved).
1836  
1837  
  The implementation of simd_cast_interleaved_argument_order is done generically, so that
1838  
  it supports any number of arguments. The central idea of the implementation is an
1839  
  `extract` function which returns one value of an argument pack determined via an index
1840  
  passed as template argument. This index is generated via an index_sequence. The
1841  
  `extract` function uses two argument packs (of equal size) to easily return values from
1842  
  the front and middle of the argument pack (for doing the deinterleave).
1843  
 */
1844  
template <typename Return, typename... Ts>
1845  
Vc_INTRINSIC Vc_CONST Return
1846  
    simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1847  
1848  
// simd_cast_with_offset (declarations and one impl) {{{2
1849  
// offset == 0 {{{3
1850  
template <typename Return, std::size_t offset, typename From, typename... Froms>
1851  
Vc_INTRINSIC Vc_CONST
1852  
    enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1853  
        simd_cast_with_offset(const From &x, const Froms &... xs);
1854  
// offset > 0 && offset divisible by Return::Size {{{3
1855  
template <typename Return, std::size_t offset, typename From>
1856  
Vc_INTRINSIC Vc_CONST
1857  
    enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1858  
        simd_cast_with_offset(const From &x);
1859  
// offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1860  
template <typename Return, std::size_t offset, typename From>
1861  
Vc_INTRINSIC Vc_CONST
1862  
    enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1863  
               ((Traits::isSimdArray<Return>::value &&
1864  
                 !Traits::isAtomicSimdArray<Return>::value) ||
1865  
                (Traits::isSimdMaskArray<Return>::value &&
1866  
                 !Traits::isAtomicSimdMaskArray<Return>::value))),
1867  
              Return>
1868  
        simd_cast_with_offset(const From &x);
1869  
// offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1870  
template <typename Return, std::size_t offset, typename From>
1871  
Vc_INTRINSIC Vc_CONST
1872  
    enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1873  
               ((Traits::isSimdArray<Return>::value &&
1874  
                 Traits::isAtomicSimdArray<Return>::value) ||
1875  
                (Traits::isSimdMaskArray<Return>::value &&
1876  
                 Traits::isAtomicSimdMaskArray<Return>::value))),
1877  
              Return>
1878  
        simd_cast_with_offset(const From &x);
1879  
// offset > first argument (drops first arg) {{{3
1880  
template <typename Return, std::size_t offset, typename From, typename... Froms>
1881  
Vc_INTRINSIC Vc_CONST enable_if<
1882  
    (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1883  
    simd_cast_with_offset(const From &, const Froms &... xs)
1884  
{
1885  
    return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1886  
}
1887  
1888  
// offset > first and only argument (returns Zero) {{{3
1889  
template <typename Return, std::size_t offset, typename From>
1890  
Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1891  
    const From &)
1892  
{
1893  
    return Return::Zero();
1894  
}
1895  
1896  
// first_type_of {{{2
1897  
template <typename T, typename... Ts> struct first_type_of_impl
1898  
{
1899  
    using type = T;
1900  
};
1901  
template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
1902  
1903  
// simd_cast_drop_arguments (declarations) {{{2
1904  
template <typename Return, typename From>
1905  
Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1906  
template <typename Return, typename... Froms>
1907  
Vc_INTRINSIC Vc_CONST
1908  
    enable_if<(are_all_types_equal<Froms...>::value &&
1909  
               sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1910  
              Return>
1911  
        simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1912  
// The following function can be implemented without the sizeof...(From) overload.
1913  
// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1914  
// function in two works around the issue.
1915  
template <typename Return, typename From, typename... Froms>
1916  
Vc_INTRINSIC Vc_CONST enable_if<
1917  
    (are_all_types_equal<From, Froms...>::value &&
1918  
     (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1919  
    Return>
1920  
simd_cast_drop_arguments(Froms... xs, From x, From);
1921  
template <typename Return, typename From>
1922  
Vc_INTRINSIC Vc_CONST
1923  
    enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1924  
    simd_cast_drop_arguments(From x, From);
1925  
1926  
namespace
1927  
{
1928  
#ifdef Vc_DEBUG_SIMD_CAST
1929  
void debugDoNothing(const std::initializer_list<void *> &) {}
1930  
template <typename T0, typename... Ts>
1931  
inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
1932  
                      const Ts &... args)
1933  
{
1934  
    std::cerr << prefix << arg0;
1935  
    debugDoNothing({&(std::cerr << ", " << args)...});
1936  
    std::cerr << suffix;
1937  
}
1938  
#else
1939  
template <typename T0, typename... Ts>
1940  
Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
1941  
{
1942  
}
1943  
#endif
1944  
}  // unnamed namespace
1945  
1946  
// is_less trait{{{2
1947  
template <size_t A, size_t B>
1948  
struct is_less : public std::integral_constant<bool, (A < B)> {
1949  
};
1950  
1951  
// is_power_of_2 trait{{{2
1952  
template <size_t N>
1953  
struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
1954  
};
1955  
1956  
// simd_cast<T>(xs...) to SimdArray/-mask {{{2
1957  
#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_)                                  \
1958  
    template <typename Return, typename T, typename A, typename... Froms>                \
1959  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
1960  
        (Traits::isAtomic##SimdArrayType_<Return>::value &&                              \
1961  
         is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value &&     \
1962  
         are_all_types_equal<NativeType_<T, A>, Froms...>::value),                       \
1963  
        Return>                                                                          \
1964  
    simd_cast(NativeType_<T, A> x, Froms... xs)                                          \
1965  
    {                                                                                    \
1966  
        vc_debug_("simd_cast{1}(", ")\n", x, xs...);                                     \
1967  
        return {simd_cast<typename Return::storage_type>(x, xs...)};                     \
1968  
    }                                                                                    \
1969  
    template <typename Return, typename T, typename A, typename... Froms>                \
1970  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
1971  
        (Traits::isAtomic##SimdArrayType_<Return>::value &&                              \
1972  
         !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value &&    \
1973  
         are_all_types_equal<NativeType_<T, A>, Froms...>::value),                       \
1974  
        Return>                                                                          \
1975  
    simd_cast(NativeType_<T, A> x, Froms... xs)                                          \
1976  
    {                                                                                    \
1977  
        vc_debug_("simd_cast{2}(", ")\n", x, xs...);                                     \
1978  
        return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)};  \
1979  
    }                                                                                    \
1980  
    template <typename Return, typename T, typename A, typename... Froms>                \
1981  
    Vc_INTRINSIC Vc_CONST                                                                \
1982  
        enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
1983  
                   !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
1984  
                   is_less<Common::left_size<Return::Size>(),                            \
1985  
                           NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value &&    \
1986  
                   are_all_types_equal<NativeType_<T, A>, Froms...>::value),             \
1987  
                  Return>                                                                \
1988  
        simd_cast(NativeType_<T, A> x, Froms... xs)                                      \
1989  
    {                                                                                    \
1990  
        vc_debug_("simd_cast{3}(", ")\n", x, xs...);                                     \
1991  
        using R0 = typename Return::storage_type0;                                       \
1992  
        using R1 = typename Return::storage_type1;                                       \
1993  
        return {simd_cast_drop_arguments<R0, Froms...>(x, xs...),                        \
1994  
                simd_cast_with_offset<R1, R0::Size>(x, xs...)};                          \
1995  
    }                                                                                    \
1996  
    template <typename Return, typename T, typename A, typename... Froms>                \
1997  
    Vc_INTRINSIC Vc_CONST                                                                \
1998  
        enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
1999  
                   !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2000  
                   !is_less<Common::left_size<Return::Size>(),                           \
2001  
                            NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value &&   \
2002  
                   are_all_types_equal<NativeType_<T, A>, Froms...>::value),             \
2003  
                  Return>                                                                \
2004  
        simd_cast(NativeType_<T, A> x, Froms... xs)                                      \
2005  
    {                                                                                    \
2006  
        vc_debug_("simd_cast{4}(", ")\n", x, xs...);                                     \
2007  
        using R0 = typename Return::storage_type0;                                       \
2008  
        using R1 = typename Return::storage_type1;                                       \
2009  
        return {simd_cast<R0>(x, xs...), R1::Zero()};                                    \
2010  
    }                                                                                    \
2011  
    Vc_NOTHING_EXPECTING_SEMICOLON
2012  
2013  
Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2014  
Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2015  
#undef Vc_SIMDARRAY_CASTS
2016  
2017  
// simd_cast<SimdArray/-mask, offset>(V) {{{2
2018  
#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_)                                  \
2019  
    /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */                             \
2020  
    template <typename Return, int offset, typename T, typename A>                       \
2021  
    Vc_INTRINSIC Vc_CONST                                                                \
2022  
        enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return>               \
2023  
        simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0)                                     \
2024  
    {                                                                                    \
2025  
        vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x);                       \
2026  
        return {simd_cast<typename Return::storage_type, offset>(x)};                    \
2027  
    }                                                                                    \
2028  
    /* both halves of Return array are extracted from argument */                        \
2029  
    template <typename Return, int offset, typename T, typename A>                       \
2030  
    Vc_INTRINSIC Vc_CONST                                                                \
2031  
        enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
2032  
                   !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2033  
                   Return::Size * offset + Common::left_size<Return::Size>() <           \
2034  
                       NativeType_<T, A>::Size),                                         \
2035  
                  Return>                                                                \
2036  
        simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1)                                     \
2037  
    {                                                                                    \
2038  
        vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x);                 \
2039  
        using R0 = typename Return::storage_type0;                                       \
2040  
        constexpr int entries_offset = offset * Return::Size;                            \
2041  
        constexpr int entries_offset_right = entries_offset + R0::Size;                  \
2042  
        return {                                                                         \
2043  
            simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x),    \
2044  
            simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2045  
                x)};                                                                     \
2046  
    }                                                                                    \
2047  
    /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */                         \
2048  
    /* right half of Return array is zero */                                             \
2049  
    template <typename Return, int offset, typename T, typename A>                       \
2050  
    Vc_INTRINSIC Vc_CONST                                                                \
2051  
        enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
2052  
                   !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2053  
                   Return::Size * offset + Common::left_size<Return::Size>() >=          \
2054  
                       NativeType_<T, A>::Size),                                         \
2055  
                  Return>                                                                \
2056  
        simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2)                                     \
2057  
    {                                                                                    \
2058  
        vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x);                     \
2059  
        using R0 = typename Return::storage_type0;                                       \
2060  
        using R1 = typename Return::storage_type1;                                       \
2061  
        constexpr int entries_offset = offset * Return::Size;                            \
2062  
        return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()};               \
2063  
    }                                                                                    \
2064  
    Vc_NOTHING_EXPECTING_SEMICOLON
2065  
2066  
Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2067  
Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2068  
#undef Vc_SIMDARRAY_CASTS
2069  
2070  
// simd_cast<T>(xs...) from SimdArray/-mask {{{2
2071  
#define Vc_SIMDARRAY_CASTS(SimdArrayType_)                                               \
2072  
    /* indivisible SimdArrayType_ */                                                     \
2073  
    template <typename Return, typename T, std::size_t N, typename V, typename... From>  \
2074  
    Vc_INTRINSIC Vc_CONST                                                                \
2075  
        enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value &&    \
2076  
                   (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) &&       \
2077  
                   !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value),            \
2078  
                  Return>                                                                \
2079  
        simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs)              \
2080  
    {                                                                                    \
2081  
        vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...);                          \
2082  
        return simd_cast<Return>(internal_data(x0), internal_data(xs)...);               \
2083  
    }                                                                                    \
2084  
    /* indivisible SimdArrayType_ && can drop arguments from the end */                  \
2085  
    template <typename Return, typename T, std::size_t N, typename V, typename... From>  \
2086  
    Vc_INTRINSIC Vc_CONST                                                                \
2087  
        enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value &&    \
2088  
                   (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) &&     \
2089  
                   !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value),            \
2090  
                  Return>                                                                \
2091  
        simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs)              \
2092  
    {                                                                                    \
2093  
        vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...);                         \
2094  
        return simd_cast_without_last<Return,                                            \
2095  
                                      typename SimdArrayType_<T, N, V, N>::storage_type, \
2096  
                                      typename From::storage_type...>(                   \
2097  
            internal_data(x0), internal_data(xs)...);                                    \
2098  
    }                                                                                    \
2099  
    /* bisectable SimdArrayType_ (N = 2^n) && never too large */                         \
2100  
    template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2101  
              typename... From>                                                          \
2102  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2103  
        (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2104  
         !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value &&                     \
2105  
         is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value),  \
2106  
        Return>                                                                          \
2107  
    simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2108  
    {                                                                                    \
2109  
        vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...);                           \
2110  
        return simd_cast_interleaved_argument_order<                                     \
2111  
            Return, typename SimdArrayType_<T, N, V, M>::storage_type0,                  \
2112  
            typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)...,  \
2113  
                                             internal_data1(x0), internal_data1(xs)...); \
2114  
    }                                                                                    \
2115  
    /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last      \
2116  
     * input can be dropped */                                                           \
2117  
    template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2118  
              typename... From>                                                          \
2119  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2120  
        (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2121  
         !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2122  
        Return>                                                                          \
2123  
    simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2124  
    {                                                                                    \
2125  
        vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...);                          \
2126  
        return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>(      \
2127  
            x0, xs...);                                                                  \
2128  
    }                                                                                    \
2129  
    /* remaining SimdArrayType_ input never larger (N != 2^n) */                         \
2130  
    template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2131  
              typename... From>                                                          \
2132  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2133  
        (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2134  
         N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value),         \
2135  
        Return>                                                                          \
2136  
    simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2137  
    {                                                                                    \
2138  
        vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...);                            \
2139  
        return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>,       \
2140  
                                            From...>(x0, xs...);                         \
2141  
    }                                                                                    \
2142  
    /* remaining SimdArrayType_ input larger (N != 2^n) */                               \
2143  
    template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2144  
              typename... From>                                                          \
2145  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2146  
        (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2147  
         N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value),          \
2148  
        Return>                                                                          \
2149  
    simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2150  
    {                                                                                    \
2151  
        vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...);                           \
2152  
        return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>,        \
2153  
                                           From...>(x0, xs...);                          \
2154  
    }                                                                                    \
2155  
    /* a single bisectable SimdArrayType_ (N = 2^n) too large */                         \
2156  
    template <typename Return, typename T, std::size_t N, typename V, std::size_t M>     \
2157  
    Vc_INTRINSIC Vc_CONST                                                                \
2158  
        enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return>  \
2159  
        simd_cast(const SimdArrayType_<T, N, V, M> &x)                                   \
2160  
    {                                                                                    \
2161  
        vc_debug_("simd_cast{single bisectable}(", ")\n", x);                            \
2162  
        return simd_cast<Return>(internal_data0(x));                                     \
2163  
    }                                                                                    \
2164  
    template <typename Return, typename T, std::size_t N, typename V, std::size_t M>     \
2165  
    Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size &&                       \
2166  
                                     N < 2 * Return::Size && is_power_of_2<N>::value),   \
2167  
                                    Return>                                              \
2168  
    simd_cast(const SimdArrayType_<T, N, V, M> &x)                                       \
2169  
    {                                                                                    \
2170  
        vc_debug_("simd_cast{single bisectable2}(", ")\n", x);                           \
2171  
        return simd_cast<Return>(internal_data0(x), internal_data1(x));                  \
2172  
    }                                                                                    \
2173  
    Vc_NOTHING_EXPECTING_SEMICOLON
2174  
2175  
Vc_SIMDARRAY_CASTS(SimdArray);
2176  
Vc_SIMDARRAY_CASTS(SimdMaskArray);
2177  
#undef Vc_SIMDARRAY_CASTS
2178  
2179  
// simd_cast<T, offset>(SimdArray/-mask) {{{2
2180  
#define Vc_SIMDARRAY_CASTS(SimdArrayType_)                                               \
2181  
    /* offset == 0 is like without offset */                                             \
2182  
    template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2183  
              std::size_t M>                                                             \
2184  
    Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast(                    \
2185  
        const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0)                               \
2186  
    {                                                                                    \
2187  
        vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x);                          \
2188  
        return simd_cast<Return>(x);                                                     \
2189  
    }                                                                                    \
2190  
    /* forward to V */                                                                   \
2191  
    template <typename Return, int offset, typename T, std::size_t N, typename V>        \
2192  
    Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast(                    \
2193  
        const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1)                               \
2194  
    {                                                                                    \
2195  
        vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x);                      \
2196  
        return simd_cast<Return, offset>(internal_data(x));                              \
2197  
    }                                                                                    \
2198  
    /* convert from right member of SimdArray */                                         \
2199  
    template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2200  
              std::size_t M>                                                             \
2201  
    Vc_INTRINSIC Vc_CONST                                                                \
2202  
        enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() &&          \
2203  
                   offset != 0 && Common::left_size<N>() % Return::Size == 0),           \
2204  
                  Return>                                                                \
2205  
        simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2)                     \
2206  
    {                                                                                    \
2207  
        vc_debug_("simd_cast{offset, right}(", ")\n", offset, x);                        \
2208  
        return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>(        \
2209  
            internal_data1(x));                                                          \
2210  
    }                                                                                    \
2211  
    /* same as above except for odd cases where offset * Return::Size doesn't fit the    \
2212  
     * left side of the SimdArray */                                                     \
2213  
    template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2214  
              std::size_t M>                                                             \
2215  
    Vc_INTRINSIC Vc_CONST                                                                \
2216  
        enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() &&          \
2217  
                   offset != 0 && Common::left_size<N>() % Return::Size != 0),           \
2218  
                  Return>                                                                \
2219  
        simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3)                     \
2220  
    {                                                                                    \
2221  
        vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x);                 \
2222  
        return simd_cast_with_offset<Return,                                             \
2223  
                                     offset * Return::Size - Common::left_size<N>()>(    \
2224  
            internal_data1(x));                                                          \
2225  
    }                                                                                    \
2226  
    /* convert from left member of SimdArray */                                          \
2227  
    template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2228  
              std::size_t M>                                                             \
2229  
    Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2230  
        (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/                 \
2231  
         offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()),          \
2232  
        Return>                                                                          \
2233  
    simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4)                         \
2234  
    {                                                                                    \
2235  
        vc_debug_("simd_cast{offset, left}(", ")\n", offset, x);                         \
2236  
        return simd_cast<Return, offset>(internal_data0(x));                             \
2237  
    }                                                                                    \
2238  
    /* fallback to copying scalars */                                                    \
2239  
    template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2240  
              std::size_t M>                                                             \
2241  
    Vc_INTRINSIC Vc_CONST                                                                \
2242  
        enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) &&         \
2243  
                   offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2244  
                  Return>                                                                \
2245  
        simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5)                     \
2246  
    {                                                                                    \
2247  
        vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x);                 \
2248  
        using R = typename Return::EntryType;                                            \
2249  
        Return r = Return::Zero();                                                       \
2250  
        for (std::size_t i = offset * Return::Size;                                      \
2251  
             i < std::min(N, (offset + 1) * Return::Size); ++i) {                        \
2252  
            r[i - offset * Return::Size] = static_cast<R>(x[i]);                         \
2253  
        }                                                                                \
2254  
        return r;                                                                        \
2255  
    }                                                                                    \
2256  
    Vc_NOTHING_EXPECTING_SEMICOLON
2257  
Vc_SIMDARRAY_CASTS(SimdArray);
2258  
Vc_SIMDARRAY_CASTS(SimdMaskArray);
2259  
#undef Vc_SIMDARRAY_CASTS
2260  
// simd_cast_drop_arguments (definitions) {{{2
2261  
template <typename Return, typename From>
2262  
Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2263  
{
2264  
    return simd_cast<Return>(x);
2265  
}
2266  
template <typename Return, typename... Froms>
2267  
Vc_INTRINSIC Vc_CONST
2268  
    enable_if<(are_all_types_equal<Froms...>::value &&
2269  
               sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2270  
              Return>
2271  
        simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2272  
{
2273  
    return simd_cast<Return>(xs..., x);
2274  
}
2275  
// The following function can be implemented without the sizeof...(From) overload.
2276  
// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2277  
// function in two works around the issue.
2278  
template <typename Return, typename From, typename... Froms>
2279  
Vc_INTRINSIC Vc_CONST enable_if<
2280  
    (are_all_types_equal<From, Froms...>::value &&
2281  
     (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2282  
    Return>
2283  
simd_cast_drop_arguments(Froms... xs, From x, From)
2284  
{
2285  
    return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2286  
}
2287  
template <typename Return, typename From>
2288  
Vc_INTRINSIC Vc_CONST
2289  
    enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2290  
    simd_cast_drop_arguments(From x, From)
2291  
{
2292  
    return simd_cast_drop_arguments<Return>(x);
2293  
}
2294  
2295  
// simd_cast_with_offset (definitions) {{{2
2296  
    template <typename Return, std::size_t offset, typename From>
2297  
    Vc_INTRINSIC Vc_CONST
2298  
    enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2299  
              Return> simd_cast_with_offset(const From &x)
2300  
{
2301  
    return simd_cast<Return, offset / Return::Size>(x);
2302  
}
2303  
template <typename Return, std::size_t offset, typename From>
2304  
Vc_INTRINSIC Vc_CONST
2305  
    enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2306  
               ((Traits::isSimdArray<Return>::value &&
2307  
                 !Traits::isAtomicSimdArray<Return>::value) ||
2308  
                (Traits::isSimdMaskArray<Return>::value &&
2309  
                 !Traits::isAtomicSimdMaskArray<Return>::value))),
2310  
              Return>
2311  
        simd_cast_with_offset(const From &x)
2312  
{
2313  
    using R0 = typename Return::storage_type0;
2314  
    using R1 = typename Return::storage_type1;
2315  
    return {simd_cast_with_offset<R0, offset>(x),
2316  
            simd_cast_with_offset<R1, offset + R0::Size>(x)};
2317  
}
2318  
template <typename Return, std::size_t offset, typename From>
2319  
Vc_INTRINSIC Vc_CONST
2320  
    enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2321  
               ((Traits::isSimdArray<Return>::value &&
2322  
                 Traits::isAtomicSimdArray<Return>::value) ||
2323  
                (Traits::isSimdMaskArray<Return>::value &&
2324  
                 Traits::isAtomicSimdMaskArray<Return>::value))),
2325  
              Return>
2326  
        simd_cast_with_offset(const From &x)
2327  
{
2328  
    return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2329  
}
2330  
template <typename Return, std::size_t offset, typename From, typename... Froms>
2331  
Vc_INTRINSIC Vc_CONST
2332  
    enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2333  
        simd_cast_with_offset(const From &x, const Froms &... xs)
2334  
{
2335  
    return simd_cast<Return>(x, xs...);
2336  
}
2337  
2338  
// simd_cast_without_last (definition) {{{2
2339  
template <typename Return, typename T, typename... From>
2340  
Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2341  
{
2342  
    return simd_cast<Return>(xs...);
2343  
}
2344  
2345  
// simd_cast_interleaved_argument_order (definitions) {{{2
2346  
2347  
#ifdef Vc_MSVC
2348  
// MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2349  
// is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2350  
// MSVC do the right thing.
2351  
template <std::size_t I, typename T0>
2352  
Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2353  
{
2354  
    return a0;
2355  
}
2356  
template <std::size_t I, typename T0>
2357  
Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2358  
{
2359  
    return b0;
2360  
}
2361  
#endif  // Vc_MSVC
2362  
2363  
/// \internal returns the first argument
2364  
template <std::size_t I, typename T0, typename... Ts>
2365  
Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2366  
                                                                  const Ts &...,
2367  
                                                                  const T0 &,
2368  
                                                                  const Ts &...)
2369  
{
2370  
    return a0;
2371  
}
2372  
/// \internal returns the center argument
2373  
template <std::size_t I, typename T0, typename... Ts>
2374  
Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2375  
                                                                  const Ts &...,
2376  
                                                                  const T0 &b0,
2377  
                                                                  const Ts &...)
2378  
{
2379  
    return b0;
2380  
}
2381  
/// \internal drops the first and center arguments and recurses
2382  
template <std::size_t I, typename T0, typename... Ts>
2383  
Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2384  
                                                                 const Ts &... a,
2385  
                                                                 const T0 &,
2386  
                                                                 const Ts &... b)
2387  
{
2388  
    return extract_interleaved<I - 2, Ts...>(a..., b...);
2389  
}
2390  
/// \internal calls simd_cast with correct argument order thanks to extract_interleaved
2391  
template <typename Return, typename... Ts, std::size_t... Indexes>
2392  
Vc_INTRINSIC Vc_CONST Return
2393  
    simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2394  
                                           const Ts &... b)
2395  
{
2396  
    return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2397  
}
2398  
/// \internal constructs the necessary index_sequence to pass it to
2399  
/// simd_cast_interleaved_argument_order_1
2400  
template <typename Return, typename... Ts>
2401  
Vc_INTRINSIC Vc_CONST Return
2402  
    simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2403  
{
2404  
    using seq = make_index_sequence<sizeof...(Ts)*2>;
2405  
    return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2406  
}
2407  
2408  
// conditional_assign {{{1
2409  
#define Vc_CONDITIONAL_ASSIGN(name_, op_)                                                \
2410  
    template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M,  \
2411  
              typename U>                                                                \
2412  
    Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign(               \
2413  
        SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs)                                  \
2414  
    {                                                                                    \
2415  
        lhs(mask) op_ rhs;                                                               \
2416  
    }                                                                                    \
2417  
    Vc_NOTHING_EXPECTING_SEMICOLON
2418  
Vc_CONDITIONAL_ASSIGN(          Assign,  =);
2419  
Vc_CONDITIONAL_ASSIGN(      PlusAssign, +=);
2420  
Vc_CONDITIONAL_ASSIGN(     MinusAssign, -=);
2421  
Vc_CONDITIONAL_ASSIGN(  MultiplyAssign, *=);
2422  
Vc_CONDITIONAL_ASSIGN(    DivideAssign, /=);
2423  
Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2424  
Vc_CONDITIONAL_ASSIGN(       XorAssign, ^=);
2425  
Vc_CONDITIONAL_ASSIGN(       AndAssign, &=);
2426  
Vc_CONDITIONAL_ASSIGN(        OrAssign, |=);
2427  
Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2428  
Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2429  
#undef Vc_CONDITIONAL_ASSIGN
2430  
2431  
#define Vc_CONDITIONAL_ASSIGN(name_, expr_)                                              \
2432  
    template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M>  \
2433  
    Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>>                 \
2434  
    conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask)                            \
2435  
    {                                                                                    \
2436  
        return expr_;                                                                    \
2437  
    }                                                                                    \
2438  
    Vc_NOTHING_EXPECTING_SEMICOLON
2439  
Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2440  
Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2441  
Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2442  
Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2443  
#undef Vc_CONDITIONAL_ASSIGN
2444  
// transpose_impl {{{1
2445  
namespace Common
2446  
{
2447  
template <typename T, size_t N, typename V>
2448  
inline void transpose_impl(
2449  
    TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2450  
    const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2451  
                         SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2452  
{
2453  
    V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2454  
                            &internal_data(*r[2]), &internal_data(*r[3])};
2455  
    transpose_impl(TransposeTag<4, 4>(), &r2[0],
2456  
                   TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2457  
                                              internal_data(std::get<1>(proxy.in)),
2458  
                                              internal_data(std::get<2>(proxy.in)),
2459  
                                              internal_data(std::get<3>(proxy.in))});
2460  
}
2461  
2462  
template <typename T, typename V>
2463  
inline void transpose_impl(
2464  
    TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2465  
    const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2466  
                         SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2467  
{
2468  
    auto &lo = *r[0];
2469  
    auto &hi = *r[1];
2470  
    internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2471  
    internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2472  
    internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2473  
    internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2474  
    internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2475  
    internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2476  
    internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2477  
    internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2478  
}
2479  
2480  
template <typename T, typename V>
2481  
inline void transpose_impl(
2482  
    TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2483  
    const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2484  
                         SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2485  
{
2486  
    V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2487  
                            &internal_data(*r[2]), &internal_data(*r[3])};
2488  
    transpose_impl(TransposeTag<4, 4>(), &r2[0],
2489  
                   TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2490  
                                              internal_data(std::get<1>(proxy.in)),
2491  
                                              internal_data(std::get<2>(proxy.in)),
2492  
                                              internal_data(std::get<3>(proxy.in))});
2493  
}
2494  
2495  
template <typename T, size_t N, typename V>
2496  
inline void transpose_impl(
2497  
    TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2498  
    const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2499  
                         SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2500  
{
2501  
    SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2502  
    SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2503  
    using H = SimdArray<T, 2>;
2504  
    transpose_impl(TransposeTag<2, 4>(), &r0[0],
2505  
                   TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2506  
                                              internal_data0(std::get<1>(proxy.in)),
2507  
                                              internal_data0(std::get<2>(proxy.in)),
2508  
                                              internal_data0(std::get<3>(proxy.in))});
2509  
    transpose_impl(TransposeTag<2, 4>(), &r1[0],
2510  
                   TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2511  
                                              internal_data1(std::get<1>(proxy.in)),
2512  
                                              internal_data1(std::get<2>(proxy.in)),
2513  
                                              internal_data1(std::get<3>(proxy.in))});
2514  
}
2515  
2516  
/* TODO:
2517  
template <typename T, std::size_t N, typename V, std::size_t VSize>
2518  
inline enable_if<(N > VSize), void> transpose_impl(
2519  
    std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2520  
    const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2521  
                         SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2522  
{
2523  
    typedef SimdArray<T, N, V, VSize> SA;
2524  
    std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2525  
        {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2526  
         &internal_data0(*r[3])}};
2527  
    transpose_impl(
2528  
        r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2529  
                           typename SA::storage_type0, typename SA::storage_type0>{
2530  
                internal_data0(std::get<0>(proxy.in)),
2531  
                internal_data0(std::get<1>(proxy.in)),
2532  
                internal_data0(std::get<2>(proxy.in)),
2533  
                internal_data0(std::get<3>(proxy.in))});
2534  
2535  
    std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2536  
        {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2537  
         &internal_data1(*r[3])}};
2538  
    transpose_impl(
2539  
        r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2540  
                           typename SA::storage_type1, typename SA::storage_type1>{
2541  
                internal_data1(std::get<0>(proxy.in)),
2542  
                internal_data1(std::get<1>(proxy.in)),
2543  
                internal_data1(std::get<2>(proxy.in)),
2544  
                internal_data1(std::get<3>(proxy.in))});
2545  
}
2546  
*/
2547  
}  // namespace Common
2548  
2549  
// Traits static assertions {{{1
2550  
static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4> &>::value, "");
2551  
static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4>>::value, "");
2552  
static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4> &>::value, "");
2553  
static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4>>::value, "");
2554  
static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4> &>::value, "");
2555  
static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4>>::value, "");
2556  
static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value, "");
2557  
static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value, "");
2558  
// }}}1
2559  
/// @}
2560  
2561  
} // namespace Vc_VERSIONED_NAMESPACE
2562  
2563  
// numeric_limits {{{1
2564  
namespace std
2565  
{
2566  
template <typename T, size_t N, typename V, size_t VN>
2567  
struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2568  
private:
2569  
    using R = Vc::SimdArray<T, N, V, VN>;
2570  
2571  
public:
2572  
    static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2573  
    static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2574  
    static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2575  
    {
2576  
        return numeric_limits<T>::lowest();
2577  
    }
2578  
    static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2579  
    {
2580  
        return numeric_limits<T>::epsilon();
2581  
    }
2582  
    static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2583  
    {
2584  
        return numeric_limits<T>::round_error();
2585  
    }
2586  
    static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2587  
    {
2588  
        return numeric_limits<T>::infinity();
2589  
    }
2590  
    static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2591  
    {
2592  
        return numeric_limits<T>::quiet_NaN();
2593  
    }
2594  
    static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2595  
    {
2596  
        return numeric_limits<T>::signaling_NaN();
2597  
    }
2598  
    static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2599  
    {
2600  
        return numeric_limits<T>::denorm_min();
2601  
    }
2602  
};
2603  
}  // namespace std
2604  
//}}}1
2605  
2606  
#endif // VC_COMMON_SIMDARRAY_H_
2607  
2608  
// vim: foldmethod=marker
2609  

Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.