Line | % of fetches | Source | |
---|---|---|---|
1 | /* This file is part of the Vc library. {{{ | ||
2 | Copyright © 2013-2015 Matthias Kretz <kretz@kde.org> | ||
3 | |||
4 | Redistribution and use in source and binary forms, with or without | ||
5 | modification, are permitted provided that the following conditions are met: | ||
6 | * Redistributions of source code must retain the above copyright | ||
7 | notice, this list of conditions and the following disclaimer. | ||
8 | * Redistributions in binary form must reproduce the above copyright | ||
9 | notice, this list of conditions and the following disclaimer in the | ||
10 | documentation and/or other materials provided with the distribution. | ||
11 | * Neither the names of contributing organizations nor the | ||
12 | names of its contributors may be used to endorse or promote products | ||
13 | derived from this software without specific prior written permission. | ||
14 | |||
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY | ||
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
25 | |||
26 | }}}*/ | ||
27 | |||
28 | #ifndef VC_COMMON_SIMDARRAY_H_ | ||
29 | #define VC_COMMON_SIMDARRAY_H_ | ||
30 | |||
31 | //#define Vc_DEBUG_SIMD_CAST 1 | ||
32 | //#define Vc_DEBUG_SORTED 1 | ||
33 | #if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED | ||
34 | #include <Vc/IO> | ||
35 | #endif | ||
36 | |||
37 | #include <array> | ||
38 | |||
39 | #include "writemaskedvector.h" | ||
40 | #include "simdarrayhelper.h" | ||
41 | #include "simdmaskarray.h" | ||
42 | #include "utility.h" | ||
43 | #include "interleave.h" | ||
44 | #include "indexsequence.h" | ||
45 | #include "transpose.h" | ||
46 | #include "macros.h" | ||
47 | |||
48 | namespace Vc_VERSIONED_NAMESPACE | ||
49 | { | ||
50 | // internal namespace (product & sum helper) {{{1 | ||
51 | namespace internal | ||
52 | { | ||
53 | template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; } | ||
54 | template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; } | ||
55 | } // namespace internal | ||
56 | |||
57 | // min & max declarations {{{1 | ||
58 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
59 | inline SimdArray<T, N, V, M> min(const SimdArray<T, N, V, M> &x, | ||
60 | const SimdArray<T, N, V, M> &y); | ||
61 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
62 | inline SimdArray<T, N, V, M> max(const SimdArray<T, N, V, M> &x, | ||
63 | const SimdArray<T, N, V, M> &y); | ||
64 | |||
65 | // SimdArray class {{{1 | ||
66 | /// \addtogroup SimdArray | ||
67 | /// @{ | ||
68 | |||
69 | // atomic SimdArray {{{1 | ||
70 | #define Vc_CURRENT_CLASS_NAME SimdArray | ||
71 | /**\internal | ||
72 | * Specialization of `SimdArray<T, N, VectorType, VectorSize>` for the case where `N == | ||
73 | * VectorSize`. | ||
74 | * | ||
75 | * This is specialized for implementation purposes: Since the general implementation uses | ||
76 | * two SimdArray data members it recurses over different SimdArray instantiations. The | ||
77 | * recursion is ended by this specialization, which has a single \p VectorType_ data | ||
78 | * member to which all functions are forwarded more or less directly. | ||
79 | */ | ||
80 | template <typename T, std::size_t N, typename VectorType_> | ||
81 | class SimdArray<T, N, VectorType_, N> | ||
82 | { | ||
83 | static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value || | ||
84 | std::is_same<T, int32_t>::value || | ||
85 | std::is_same<T, uint32_t>::value || | ||
86 | std::is_same<T, int16_t>::value || | ||
87 | std::is_same<T, uint16_t>::value, | ||
88 | "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, " | ||
89 | "int16_t, uint16_t }"); | ||
90 | |||
91 | public: | ||
92 | using VectorType = VectorType_; | ||
93 | using vector_type = VectorType; | ||
94 | using storage_type = vector_type; | ||
95 | using vectorentry_type = typename vector_type::VectorEntryType; | ||
96 | using value_type = T; | ||
97 | using mask_type = SimdMaskArray<T, N, vector_type>; | ||
98 | using index_type = SimdArray<int, N>; | ||
99 | static constexpr std::size_t size() { return N; } | ||
100 | using Mask = mask_type; | ||
101 | using MaskType = Mask; | ||
102 | using MaskArgument = const MaskType &; | ||
103 | using VectorEntryType = vectorentry_type; | ||
104 | using EntryType = value_type; | ||
105 | using IndexType = index_type; | ||
106 | using AsArg = const SimdArray &; | ||
107 | using reference = Detail::ElementReference<SimdArray>; | ||
108 | static constexpr std::size_t Size = size(); | ||
109 | static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment; | ||
110 | |||
111 | // zero init | ||
112 | #ifndef Vc_MSVC // bogus error C2580 | ||
113 | Vc_INTRINSIC SimdArray() = default; | ||
114 | #endif | ||
115 | |||
116 | // default copy ctor/operator | ||
117 | Vc_INTRINSIC SimdArray(const SimdArray &) = default; | ||
118 | Vc_INTRINSIC SimdArray(SimdArray &&) = default; | ||
119 | Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default; | ||
120 | |||
121 | // broadcast | ||
122 | Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {} | ||
123 | Vc_INTRINSIC SimdArray(value_type &a) : data(a) {} | ||
124 | Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {} | ||
125 | template < | ||
126 | typename U, | ||
127 | typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>> | ||
128 | Vc_INTRINSIC SimdArray(U a) | ||
129 | : SimdArray(static_cast<value_type>(a)) | ||
130 | { | ||
131 | } | ||
132 | |||
133 | // implicit casts | ||
134 | template <typename U, typename V> | ||
135 | Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, enable_if<N == V::Size> = nullarg) | ||
136 | : data(simd_cast<vector_type>(internal_data(x))) | ||
137 | { | ||
138 | } | ||
139 | template <typename U, typename V> | ||
140 | Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, | ||
141 | enable_if<(N > V::Size && N <= 2 * V::Size)> = nullarg) | ||
142 | : data(simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x)))) | ||
143 | { | ||
144 | } | ||
145 | template <typename U, typename V> | ||
146 | Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, | ||
147 | enable_if<(N > 2 * V::Size && N <= 4 * V::Size)> = nullarg) | ||
148 | : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))), | ||
149 | internal_data(internal_data1(internal_data0(x))), | ||
150 | internal_data(internal_data0(internal_data1(x))), | ||
151 | internal_data(internal_data1(internal_data1(x))))) | ||
152 | { | ||
153 | } | ||
154 | |||
155 | template <typename V, std::size_t Pieces, std::size_t Index> | ||
156 | Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x) | ||
157 | : data(simd_cast<vector_type, Index>(x.data)) | ||
158 | { | ||
159 | } | ||
160 | |||
161 | Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init) | ||
162 | : data(init.begin(), Vc::Unaligned) | ||
163 | { | ||
164 | #if defined Vc_CXX14 && 0 // doesn't compile yet | ||
165 | static_assert(init.size() == size(), "The initializer_list argument to " | ||
166 | "SimdArray<T, N> must contain exactly N " | ||
167 | "values."); | ||
168 | #else | ||
169 | Vc_ASSERT(init.size() == size()); | ||
170 | #endif | ||
171 | } | ||
172 | |||
173 | // implicit conversion from underlying vector_type | ||
174 | template < | ||
175 | typename V, | ||
176 | typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>> | ||
177 | explicit Vc_INTRINSIC SimdArray(const V &x) | ||
178 | : data(simd_cast<vector_type>(x)) | ||
179 | { | ||
180 | } | ||
181 | |||
182 | // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and | ||
183 | // T implicitly convertible to U | ||
184 | template < | ||
185 | typename U, typename A, | ||
186 | typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>> | ||
187 | Vc_INTRINSIC operator Vector<U, A>() const | ||
188 | { | ||
189 | return simd_cast<Vector<U, A>>(data); | ||
190 | } | ||
191 | |||
192 | #include "gatherinterface.h" | ||
193 | #include "scatterinterface.h" | ||
194 | |||
195 | // forward all remaining ctors | ||
196 | template <typename... Args, | ||
197 | typename = enable_if<!Traits::is_cast_arguments<Args...>::value && | ||
198 | !Traits::is_gather_signature<Args...>::value && | ||
199 | !Traits::is_initializer_list<Args...>::value>> | ||
200 | explicit Vc_INTRINSIC SimdArray(Args &&... args) | ||
201 | : data(std::forward<Args>(args)...) | ||
202 | { | ||
203 | } | ||
204 | |||
205 | template <std::size_t Offset> | ||
206 | explicit Vc_INTRINSIC SimdArray( | ||
207 | Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>) | ||
208 | : data(Vc::IndexesFromZero) | ||
209 | { | ||
210 | data += value_type(Offset); | ||
211 | } | ||
212 | |||
213 | Vc_INTRINSIC void setZero() { data.setZero(); } | ||
214 | Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); } | ||
215 | Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); } | ||
216 | Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); } | ||
217 | |||
218 | Vc_INTRINSIC void setQnan() { data.setQnan(); } | ||
219 | Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); } | ||
220 | |||
221 | // internal: execute specified Operation | ||
222 | template <typename Op, typename... Args> | ||
223 | static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args) | ||
224 | { | ||
225 | SimdArray r; | ||
226 | Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...); | ||
227 | return r; | ||
228 | } | ||
229 | |||
230 | template <typename Op, typename... Args> | ||
231 | static Vc_INTRINSIC void callOperation(Op op, Args &&... args) | ||
232 | { | ||
233 | Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...); | ||
234 | } | ||
235 | |||
236 | static Vc_INTRINSIC SimdArray Zero() | ||
237 | { | ||
238 | return SimdArray(Vc::Zero); | ||
239 | } | ||
240 | static Vc_INTRINSIC SimdArray One() | ||
241 | { | ||
242 | return SimdArray(Vc::One); | ||
243 | } | ||
244 | static Vc_INTRINSIC SimdArray IndexesFromZero() | ||
245 | { | ||
246 | return SimdArray(Vc::IndexesFromZero); | ||
247 | } | ||
248 | static Vc_INTRINSIC SimdArray Random() | ||
249 | { | ||
250 | return fromOperation(Common::Operations::random()); | ||
251 | } | ||
252 | |||
253 | template <typename... Args> Vc_INTRINSIC void load(Args &&... args) | ||
254 | { | ||
255 | data.load(std::forward<Args>(args)...); | ||
256 | } | ||
257 | |||
258 | template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const | ||
259 | { | ||
260 | data.store(std::forward<Args>(args)...); | ||
261 | } | ||
262 | |||
263 | Vc_INTRINSIC mask_type operator!() const | ||
264 | { | ||
265 | return {!data}; | ||
266 | } | ||
267 | |||
268 | Vc_INTRINSIC SimdArray operator-() const | ||
269 | { | ||
270 | return {-data}; | ||
271 | } | ||
272 | |||
273 | /// Returns a copy of itself | ||
274 | Vc_INTRINSIC SimdArray operator+() const { return *this; } | ||
275 | |||
276 | Vc_INTRINSIC SimdArray operator~() const | ||
277 | { | ||
278 | return {~data}; | ||
279 | } | ||
280 | |||
281 | template <typename U, | ||
282 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
283 | Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const | ||
284 | { | ||
285 | return {data << x}; | ||
286 | } | ||
287 | template <typename U, | ||
288 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
289 | Vc_INTRINSIC SimdArray &operator<<=(U x) | ||
290 | { | ||
291 | data <<= x; | ||
292 | return *this; | ||
293 | } | ||
294 | template <typename U, | ||
295 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
296 | Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const | ||
297 | { | ||
298 | return {data >> x}; | ||
299 | } | ||
300 | template <typename U, | ||
301 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
302 | Vc_INTRINSIC SimdArray &operator>>=(U x) | ||
303 | { | ||
304 | data >>= x; | ||
305 | return *this; | ||
306 | } | ||
307 | |||
308 | #define Vc_BINARY_OPERATOR_(op) \ | ||
309 | Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \ | ||
310 | { \ | ||
311 | return {data op rhs.data}; \ | ||
312 | } \ | ||
313 | Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \ | ||
314 | { \ | ||
315 | data op## = rhs.data; \ | ||
316 | return *this; \ | ||
317 | } | ||
318 | Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_); | ||
319 | Vc_ALL_BINARY(Vc_BINARY_OPERATOR_); | ||
320 | Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_); | ||
321 | #undef Vc_BINARY_OPERATOR_ | ||
322 | |||
323 | #define Vc_COMPARES(op) \ | ||
324 | Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \ | ||
325 | { \ | ||
326 | return {data op rhs.data}; \ | ||
327 | } | ||
328 | Vc_ALL_COMPARES(Vc_COMPARES); | ||
329 | #undef Vc_COMPARES | ||
330 | |||
331 | /// \copydoc Vector::isNegative | ||
332 | Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const | ||
333 | { | ||
334 | return {isnegative(data)}; | ||
335 | } | ||
336 | |||
337 | private: | ||
338 | friend reference; | ||
339 | Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept | ||
340 | { | ||
341 | return o.data[i]; | ||
342 | } | ||
343 | template <typename U> | ||
344 | Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept( | ||
345 | noexcept(std::declval<value_type &>() = v)) | ||
346 | { | ||
347 | o.data[i] = v; | ||
348 | } | ||
349 | |||
350 | public: | ||
351 | Vc_INTRINSIC reference operator[](size_t i) noexcept | ||
352 | { | ||
353 | static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), ""); | ||
354 | return {*this, int(i)}; | ||
355 | } | ||
356 | Vc_INTRINSIC value_type operator[](size_t i) const noexcept | ||
357 | { | ||
358 | return get(*this, int(i)); | ||
359 | } | ||
360 | |||
361 | Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k) | ||
362 | { | ||
363 | return {*this, k}; | ||
364 | } | ||
365 | |||
366 | Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) | ||
367 | { | ||
368 | data.assign(v.data, internal_data(k)); | ||
369 | } | ||
370 | |||
371 | // reductions //////////////////////////////////////////////////////// | ||
372 | #define Vc_REDUCTION_FUNCTION_(name_) \ | ||
373 | Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \ | ||
374 | Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \ | ||
375 | { \ | ||
376 | return data.name_(internal_data(mask)); \ | ||
377 | } \ | ||
378 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
379 | Vc_REDUCTION_FUNCTION_(min); | ||
380 | Vc_REDUCTION_FUNCTION_(max); | ||
381 | Vc_REDUCTION_FUNCTION_(product); | ||
382 | Vc_REDUCTION_FUNCTION_(sum); | ||
383 | #undef Vc_REDUCTION_FUNCTION_ | ||
384 | Vc_INTRINSIC Vc_PURE SimdArray partialSum() const { return data.partialSum(); } | ||
385 | |||
386 | template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const | ||
387 | { | ||
388 | return {data.apply(std::forward<F>(f))}; | ||
389 | } | ||
390 | template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const | ||
391 | { | ||
392 | return {data.apply(std::forward<F>(f), k)}; | ||
393 | } | ||
394 | |||
395 | Vc_INTRINSIC SimdArray shifted(int amount) const | ||
396 | { | ||
397 | return {data.shifted(amount)}; | ||
398 | } | ||
399 | |||
400 | template <std::size_t NN> | ||
401 | Vc_INTRINSIC SimdArray shifted(int amount, const SimdArray<value_type, NN> &shiftIn) | ||
402 | const | ||
403 | { | ||
404 | return {data.shifted(amount, simd_cast<VectorType>(shiftIn))}; | ||
405 | } | ||
406 | |||
407 | Vc_INTRINSIC SimdArray rotated(int amount) const | ||
408 | { | ||
409 | return {data.rotated(amount)}; | ||
410 | } | ||
411 | |||
412 | /// \copydoc Vector::exponent | ||
413 | Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const | ||
414 | { | ||
415 | return {exponent(data)}; | ||
416 | } | ||
417 | |||
418 | Vc_INTRINSIC SimdArray interleaveLow(SimdArray x) const | ||
419 | { | ||
420 | return {data.interleaveLow(x.data)}; | ||
421 | } | ||
422 | Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x) const | ||
423 | { | ||
424 | return {data.interleaveHigh(x.data)}; | ||
425 | } | ||
426 | |||
427 | Vc_INTRINSIC SimdArray reversed() const | ||
428 | { | ||
429 | return {data.reversed()}; | ||
430 | } | ||
431 | |||
432 | Vc_INTRINSIC SimdArray sorted() const | ||
433 | { | ||
434 | return {data.sorted()}; | ||
435 | } | ||
436 | |||
437 | template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) | ||
438 | { | ||
439 | return {VectorType::generate(gen)}; | ||
440 | } | ||
441 | |||
442 | Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray | ||
443 | copySign(const SimdArray &reference) const | ||
444 | { | ||
445 | return {Vc::copysign(data, reference.data)}; | ||
446 | } | ||
447 | |||
448 | friend VectorType &internal_data<>(SimdArray &x); | ||
449 | friend const VectorType &internal_data<>(const SimdArray &x); | ||
450 | |||
451 | /// \internal | ||
452 | Vc_INTRINSIC SimdArray(VectorType &&x) : data(std::move(x)) {} | ||
453 | |||
454 | Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type)); | ||
455 | |||
456 | private: | ||
457 | // The alignas attribute attached to the class declaration above is ignored by ICC | ||
458 | // 17.0.0 (at least). So just move the alignas attribute down here where it works for | ||
459 | // all compilers. | ||
460 | alignas(static_cast<std::size_t>( | ||
461 | Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) / | ||
462 | VectorType_::size()>::value)) storage_type data; | ||
463 | }; | ||
464 | template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size; | ||
465 | template <typename T, std::size_t N, typename VectorType> | ||
466 | constexpr std::size_t SimdArray<T, N, VectorType, N>::MemoryAlignment; | ||
467 | template <typename T, std::size_t N, typename VectorType> | ||
468 | #ifndef Vc_MSVC | ||
469 | Vc_INTRINSIC | ||
470 | #endif | ||
471 | VectorType &internal_data(SimdArray<T, N, VectorType, N> &x) | ||
472 | { | ||
473 | return x.data; | ||
474 | } | ||
475 | template <typename T, std::size_t N, typename VectorType> | ||
476 | #ifndef Vc_MSVC | ||
477 | Vc_INTRINSIC | ||
478 | #endif | ||
479 | const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x) | ||
480 | { | ||
481 | return x.data; | ||
482 | } | ||
483 | |||
484 | // unpackIfSegment {{{2 | ||
485 | template <typename T> T unpackIfSegment(T &&x) { return std::forward<T>(x); } | ||
486 | template <typename T, size_t Pieces, size_t Index> | ||
487 | auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray()) | ||
488 | { | ||
489 | return x.asSimdArray(); | ||
490 | } | ||
491 | |||
492 | // gatherImplementation {{{2 | ||
493 | template <typename T, std::size_t N, typename VectorType> | ||
494 | template <typename MT, typename IT> | ||
495 | inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem, | ||
496 | IT &&indexes) | ||
497 | { | ||
498 | data.gather(mem, unpackIfSegment(std::forward<IT>(indexes))); | ||
499 | } | ||
500 | template <typename T, std::size_t N, typename VectorType> | ||
501 | template <typename MT, typename IT> | ||
502 | inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem, | ||
503 | IT &&indexes, | ||
504 | MaskArgument mask) | ||
505 | { | ||
506 | data.gather(mem, unpackIfSegment(std::forward<IT>(indexes)), mask); | ||
507 | } | ||
508 | |||
509 | // scatterImplementation {{{2 | ||
510 | template <typename T, std::size_t N, typename VectorType> | ||
511 | template <typename MT, typename IT> | ||
512 | inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem, | ||
513 | IT &&indexes) const | ||
514 | { | ||
515 | data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes))); | ||
516 | } | ||
517 | template <typename T, std::size_t N, typename VectorType> | ||
518 | template <typename MT, typename IT> | ||
519 | inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem, | ||
520 | IT &&indexes, | ||
521 | MaskArgument mask) const | ||
522 | { | ||
523 | data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask); | ||
524 | } | ||
525 | |||
526 | // generic SimdArray {{{1 | ||
527 | /** | ||
528 | * Data-parallel arithmetic type with user-defined number of elements. | ||
529 | * | ||
530 | * \tparam T The type of the vector's elements. The supported types currently are limited | ||
531 | * to the types supported by Vc::Vector<T>. | ||
532 | * | ||
533 | * \tparam N The number of elements to store and process concurrently. You can choose an | ||
534 | * arbitrary number, though not every number is a good idea. | ||
535 | * Generally, a power of two value or the sum of two power of two values might | ||
536 | * work efficiently, though this depends a lot on the target system. | ||
537 | * | ||
538 | * \tparam V Don't change the default value unless you really know what you are doing. | ||
539 | * This type is set to the underlying native Vc::Vector type used in the | ||
540 | * implementation of the type. | ||
541 | * Having it as part of the type name guards against some cases of ODR | ||
542 | * violations (i.e. linking incompatible translation units / libraries). | ||
543 | * | ||
544 | * \tparam Wt Don't ever change the default value. | ||
545 | * This parameter is an unfortunate implementation detail shining through. | ||
546 | * | ||
547 | * \warning Choosing \p N too large (what “too large” means depends on the target) will | ||
548 | * result in excessive compilation times and high (or too high) register | ||
549 | * pressure, thus potentially negating the improvement from concurrent execution. | ||
550 | * As a rule of thumb, keep \p N less or equal to `2 * float_v::size()`. | ||
551 | * | ||
552 | * \warning A special portability concern arises from a current limitation in the MIC | ||
553 | * implementation (Intel Knights Corner), where SimdArray types with \p T = \p | ||
554 | * (u)short require an \p N either less than short_v::size() or a multiple of | ||
555 | * short_v::size(). | ||
556 | * | ||
557 | * \headerfile simdarray.h <Vc/SimdArray> | ||
558 | */ | ||
559 | template <typename T, size_t N, typename V, size_t Wt> class SimdArray | ||
560 | { | ||
561 | static_assert(std::is_same<T, double>::value || | ||
562 | std::is_same<T, float>::value || | ||
563 | std::is_same<T, int32_t>::value || | ||
564 | std::is_same<T, uint32_t>::value || | ||
565 | std::is_same<T, int16_t>::value || | ||
566 | std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }"); | ||
567 | static_assert( | ||
568 | // either the EntryType and VectorEntryType of the main V are equal | ||
569 | std::is_same<typename V::EntryType, typename V::VectorEntryType>::value || | ||
570 | // or N is a multiple of V::size() | ||
571 | (N % V::size() == 0), | ||
572 | "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * " | ||
573 | "MIC::(u)short_v::size(), i.e. k * 16."); | ||
574 | |||
575 | using my_traits = SimdArrayTraits<T, N>; | ||
576 | static constexpr std::size_t N0 = my_traits::N0; | ||
577 | static constexpr std::size_t N1 = my_traits::N1; | ||
578 | using Split = Common::Split<N0>; | ||
579 | template <typename U, std::size_t K> using CArray = U[K]; | ||
580 | |||
581 | public: | ||
582 | using storage_type0 = typename my_traits::storage_type0; | ||
583 | using storage_type1 = typename my_traits::storage_type1; | ||
584 | static_assert(storage_type0::size() == N0, ""); | ||
585 | |||
586 | /**\internal | ||
587 | * This type reveals the implementation-specific type used for the data member. | ||
588 | */ | ||
589 | using vector_type = V; | ||
590 | using vectorentry_type = typename storage_type0::vectorentry_type; | ||
591 | typedef vectorentry_type alias_type Vc_MAY_ALIAS; | ||
592 | |||
593 | /// The type of the elements (i.e.\ \p T) | ||
594 | using value_type = T; | ||
595 | |||
596 | /// The type of the mask used for masked operations and returned from comparisons. | ||
597 | using mask_type = SimdMaskArray<T, N, vector_type>; | ||
598 | |||
599 | /// The type of the vector used for indexes in gather and scatter operations. | ||
600 | using index_type = SimdArray<int, N>; | ||
601 | |||
602 | /** | ||
603 | * Returns \p N, the number of scalar components in an object of this type. | ||
604 | * | ||
605 | * The size of the SimdArray, i.e. the number of scalar elements in the vector. In | ||
606 | * contrast to Vector::size() you have control over this value via the \p N template | ||
607 | * parameter of the SimdArray class template. | ||
608 | * | ||
609 | * \returns The number of scalar values stored and manipulated concurrently by objects | ||
610 | * of this type. | ||
611 | */ | ||
612 | static constexpr std::size_t size() { return N; } | ||
613 | |||
614 | /// \copydoc mask_type | ||
615 | using Mask = mask_type; | ||
616 | /// \copydoc mask_type | ||
617 | using MaskType = Mask; | ||
618 | using MaskArgument = const MaskType &; | ||
619 | using VectorEntryType = vectorentry_type; | ||
620 | /// \copydoc value_type | ||
621 | using EntryType = value_type; | ||
622 | /// \copydoc index_type | ||
623 | using IndexType = index_type; | ||
624 | using AsArg = const SimdArray &; | ||
625 | |||
626 | using reference = Detail::ElementReference<SimdArray>; | ||
627 | |||
628 | ///\copydoc Vector::MemoryAlignment | ||
629 | static constexpr std::size_t MemoryAlignment = | ||
630 | storage_type0::MemoryAlignment > storage_type1::MemoryAlignment | ||
631 | ? storage_type0::MemoryAlignment | ||
632 | : storage_type1::MemoryAlignment; | ||
633 | |||
634 | /// \name Generators | ||
635 | ///@{ | ||
636 | |||
637 | ///\copybrief Vector::Zero | ||
638 | static Vc_INTRINSIC SimdArray Zero() | ||
639 | { | ||
640 | return SimdArray(Vc::Zero); | ||
641 | } | ||
642 | |||
643 | ///\copybrief Vector::One | ||
644 | static Vc_INTRINSIC SimdArray One() | ||
645 | { | ||
646 | return SimdArray(Vc::One); | ||
647 | } | ||
648 | |||
649 | ///\copybrief Vector::IndexesFromZero | ||
650 | static Vc_INTRINSIC SimdArray IndexesFromZero() | ||
651 | { | ||
652 | return SimdArray(Vc::IndexesFromZero); | ||
653 | } | ||
654 | |||
655 | ///\copydoc Vector::Random | ||
656 | static Vc_INTRINSIC SimdArray Random() | ||
657 | { | ||
658 | return fromOperation(Common::Operations::random()); | ||
659 | } | ||
660 | |||
661 | ///\copybrief Vector::generate | ||
662 | template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) // {{{2 | ||
663 | { | ||
664 | auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in | ||
665 | // an initializer list is well-defined | ||
666 | // (front to back), but GCC 4.8 doesn't | ||
667 | // implement this correctly. Therefore | ||
668 | // we enforce correct order. | ||
669 | return {std::move(tmp), | ||
670 | storage_type1::generate([&](std::size_t i) { return gen(i + N0); })}; | ||
671 | } | ||
672 | ///@} | ||
673 | |||
674 | /// \name Compile-Time Constant Initialization | ||
675 | ///@{ | ||
676 | |||
677 | ///\copydoc Vector::Vector() | ||
678 | #ifndef Vc_MSVC // bogus error C2580 | ||
679 | SimdArray() = default; | ||
680 | #endif | ||
681 | ///@} | ||
682 | |||
683 | /// \name Conversion/Broadcast Constructors | ||
684 | ///@{ | ||
685 | |||
686 | ///\copydoc Vector::Vector(EntryType) | ||
687 | Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {} | ||
688 | template < | ||
689 | typename U, | ||
690 | typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>> | ||
691 | SimdArray(U a) | ||
692 | : SimdArray(static_cast<value_type>(a)) | ||
693 | { | ||
694 | } | ||
695 | ///@} | ||
696 | |||
697 | // default copy ctor/operator | ||
698 | SimdArray(const SimdArray &) = default; | ||
699 | SimdArray(SimdArray &&) = default; | ||
700 | SimdArray &operator=(const SimdArray &) = default; | ||
701 | |||
702 | // load ctor | ||
703 | template <typename U, | ||
704 | typename Flags = DefaultLoadTag, | ||
705 | typename = enable_if<Traits::is_load_store_flag<Flags>::value>> | ||
706 | explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags()) | ||
707 | : data0(mem, f), data1(mem + storage_type0::size(), f) | ||
708 | { | ||
709 | } | ||
710 | |||
711 | // MSVC does overload resolution differently and takes the const U *mem overload (I hope) | ||
712 | #ifndef Vc_MSVC | ||
713 | /**\internal | ||
714 | * Load from a C-array. This is basically the same function as the load constructor | ||
715 | * above, except that the forwarding reference overload would steal the deal and the | ||
716 | * constructor above doesn't get called. This overload is required to enable loads | ||
717 | * from C-arrays. | ||
718 | */ | ||
719 | template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag, | ||
720 | typename = enable_if<Traits::is_load_store_flag<Flags>::value>> | ||
721 | explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags()) | ||
722 | : data0(&mem[0], f), data1(&mem[storage_type0::size()], f) | ||
723 | { | ||
724 | } | ||
725 | /**\internal | ||
726 | * Const overload of the above. | ||
727 | */ | ||
728 | template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag, | ||
729 | typename = enable_if<Traits::is_load_store_flag<Flags>::value>> | ||
730 | explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags()) | ||
731 | : data0(&mem[0], f), data1(&mem[storage_type0::size()], f) | ||
732 | { | ||
733 | } | ||
734 | #endif | ||
735 | |||
736 | // initializer list | ||
737 | Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init) | ||
738 | : data0(init.begin(), Vc::Unaligned) | ||
739 | , data1(init.begin() + storage_type0::size(), Vc::Unaligned) | ||
740 | { | ||
741 | #if defined Vc_CXX14 && 0 // doesn't compile yet | ||
742 | static_assert(init.size() == size(), "The initializer_list argument to " | ||
743 | "SimdArray<T, N> must contain exactly N " | ||
744 | "values."); | ||
745 | #else | ||
746 | Vc_ASSERT(init.size() == size()); | ||
747 | #endif | ||
748 | } | ||
749 | |||
750 | #include "gatherinterface.h" | ||
751 | #include "scatterinterface.h" | ||
752 | |||
753 | // forward all remaining ctors | ||
754 | template <typename... Args, | ||
755 | typename = enable_if<!Traits::is_cast_arguments<Args...>::value && | ||
756 | !Traits::is_initializer_list<Args...>::value && | ||
757 | !Traits::is_gather_signature<Args...>::value && | ||
758 | !Traits::is_load_arguments<Args...>::value>> | ||
759 | explicit Vc_INTRINSIC SimdArray(Args &&... args) | ||
760 | : data0(Split::lo(args)...) // no forward here - it could move and thus | ||
761 | // break the next line | ||
762 | , data1(Split::hi(std::forward<Args>(args))...) | ||
763 | { | ||
764 | } | ||
765 | |||
766 | // explicit casts | ||
767 | template <typename W> | ||
768 | Vc_INTRINSIC explicit SimdArray( | ||
769 | W &&x, | ||
770 | enable_if<(Traits::is_simd_vector<W>::value && Traits::simd_vector_size<W>::value == N && | ||
771 | !(std::is_convertible<Traits::entry_type_of<W>, T>::value && | ||
772 | Traits::isSimdArray<W>::value))> = nullarg) | ||
773 | : data0(Split::lo(x)), data1(Split::hi(x)) | ||
774 | { | ||
775 | } | ||
776 | |||
777 | // implicit casts | ||
778 | template <typename W> | ||
779 | Vc_INTRINSIC SimdArray( | ||
780 | W &&x, | ||
781 | enable_if<(Traits::isSimdArray<W>::value && Traits::simd_vector_size<W>::value == N && | ||
782 | std::is_convertible<Traits::entry_type_of<W>, T>::value)> = nullarg) | ||
783 | : data0(Split::lo(x)), data1(Split::hi(x)) | ||
784 | { | ||
785 | } | ||
786 | |||
787 | // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and | ||
788 | // T implicitly convertible to U | ||
789 | template < | ||
790 | typename U, typename A, | ||
791 | typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>> | ||
792 | operator Vector<U, A>() const | ||
793 | { | ||
794 | return simd_cast<Vector<U, A>>(data0, data1); | ||
795 | } | ||
796 | |||
797 | //////////////////// other functions /////////////// | ||
798 | |||
799 | Vc_INTRINSIC void setZero() | ||
800 | { | ||
801 | data0.setZero(); | ||
802 | data1.setZero(); | ||
803 | } | ||
804 | Vc_INTRINSIC void setZero(const mask_type &k) | ||
805 | { | ||
806 | data0.setZero(Split::lo(k)); | ||
807 | data1.setZero(Split::hi(k)); | ||
808 | } | ||
809 | Vc_INTRINSIC void setZeroInverted() | ||
810 | { | ||
811 | data0.setZeroInverted(); | ||
812 | data1.setZeroInverted(); | ||
813 | } | ||
814 | Vc_INTRINSIC void setZeroInverted(const mask_type &k) | ||
815 | { | ||
816 | data0.setZeroInverted(Split::lo(k)); | ||
817 | data1.setZeroInverted(Split::hi(k)); | ||
818 | } | ||
819 | |||
820 | |||
821 | Vc_INTRINSIC void setQnan() { | ||
822 | data0.setQnan(); | ||
823 | data1.setQnan(); | ||
824 | } | ||
825 | Vc_INTRINSIC void setQnan(const mask_type &m) { | ||
826 | data0.setQnan(Split::lo(m)); | ||
827 | data1.setQnan(Split::hi(m)); | ||
828 | } | ||
829 | |||
830 | ///\internal execute specified Operation | ||
831 | template <typename Op, typename... Args> | ||
832 | static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args) | ||
833 | { | ||
834 | SimdArray r = { | ||
835 | storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it | ||
836 | // could move and thus | ||
837 | // break the next line | ||
838 | storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)}; | ||
839 | return r; | ||
840 | } | ||
841 | |||
842 | ///\internal | ||
843 | template <typename Op, typename... Args> | ||
844 | static Vc_INTRINSIC void callOperation(Op op, Args &&... args) | ||
845 | { | ||
846 | storage_type0::callOperation(op, Split::lo(args)...); | ||
847 | storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...); | ||
848 | } | ||
849 | |||
850 | |||
851 | template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args) | ||
852 | { | ||
853 | data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus | ||
854 | // break the next line | ||
855 | data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...); | ||
856 | } | ||
857 | |||
858 | template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const | ||
859 | { | ||
860 | data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus | ||
861 | // break the next line | ||
862 | data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...); | ||
863 | } | ||
864 | |||
865 | Vc_INTRINSIC mask_type operator!() const | ||
866 | { | ||
867 | return {!data0, !data1}; | ||
868 | } | ||
869 | |||
870 | Vc_INTRINSIC SimdArray operator-() const | ||
871 | { | ||
872 | return {-data0, -data1}; | ||
873 | } | ||
874 | |||
875 | /// Returns a copy of itself | ||
876 | Vc_INTRINSIC SimdArray operator+() const { return *this; } | ||
877 | |||
878 | Vc_INTRINSIC SimdArray operator~() const | ||
879 | { | ||
880 | return {~data0, ~data1}; | ||
881 | } | ||
882 | |||
883 | // left/right shift operators {{{2 | ||
884 | template <typename U, | ||
885 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
886 | Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const | ||
887 | { | ||
888 | return {data0 << x, data1 << x}; | ||
889 | } | ||
890 | template <typename U, | ||
891 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
892 | Vc_INTRINSIC SimdArray &operator<<=(U x) | ||
893 | { | ||
894 | data0 <<= x; | ||
895 | data1 <<= x; | ||
896 | return *this; | ||
897 | } | ||
898 | template <typename U, | ||
899 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
900 | Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const | ||
901 | { | ||
902 | return {data0 >> x, data1 >> x}; | ||
903 | } | ||
904 | template <typename U, | ||
905 | typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>> | ||
906 | Vc_INTRINSIC SimdArray &operator>>=(U x) | ||
907 | { | ||
908 | data0 >>= x; | ||
909 | data1 >>= x; | ||
910 | return *this; | ||
911 | } | ||
912 | |||
913 | // binary operators {{{2 | ||
914 | #define Vc_BINARY_OPERATOR_(op) \ | ||
915 | Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \ | ||
916 | { \ | ||
917 | return {data0 op rhs.data0, data1 op rhs.data1}; \ | ||
918 | } \ | ||
919 | Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \ | ||
920 | { \ | ||
921 | data0 op## = rhs.data0; \ | ||
922 | data1 op## = rhs.data1; \ | ||
923 | return *this; \ | ||
924 | } | ||
925 | Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_); | ||
926 | Vc_ALL_BINARY(Vc_BINARY_OPERATOR_); | ||
927 | Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_); | ||
928 | #undef Vc_BINARY_OPERATOR_ | ||
929 | |||
930 | #define Vc_COMPARES(op) \ | ||
931 | Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \ | ||
932 | { \ | ||
933 | return {data0 op rhs.data0, data1 op rhs.data1}; \ | ||
934 | } | ||
935 | Vc_ALL_COMPARES(Vc_COMPARES); | ||
936 | #undef Vc_COMPARES | ||
937 | |||
938 | // operator[] {{{2 | ||
939 | /// \name Scalar Subscript Operators | ||
940 | ///@{ | ||
941 | |||
942 | private: | ||
943 | friend reference; | ||
944 | Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept | ||
945 | { | ||
946 | return reinterpret_cast<const alias_type *>(&o)[i]; | ||
947 | } | ||
948 | template <typename U> | ||
949 | Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept( | ||
950 | noexcept(std::declval<value_type &>() = v)) | ||
951 | { | ||
952 ![]() | 2.3% | reinterpret_cast<alias_type *>(&o)[i] = v; | |
![]() ![]() | |||
953 | } | ||
954 | |||
955 | public: | ||
956 | ///\copydoc Vector::operator[](size_t) | ||
957 | Vc_INTRINSIC reference operator[](size_t i) noexcept | ||
958 | { | ||
959 | static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), ""); | ||
960 | return {*this, int(i)}; | ||
961 | } | ||
962 | |||
963 | ///\copydoc Vector::operator[](size_t) const | ||
964 | Vc_INTRINSIC value_type operator[](size_t index) const noexcept | ||
965 | { | ||
966 | return get(*this, int(index)); | ||
967 | } | ||
968 | ///@} | ||
969 | |||
970 | // operator(){{{2 | ||
971 | ///\copydoc Vector::operator()(MaskType) | ||
972 | Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()( | ||
973 | const mask_type &mask) | ||
974 | { | ||
975 | return {*this, mask}; | ||
976 | } | ||
977 | |||
978 | ///\internal | ||
979 | Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2 | ||
980 | { | ||
981 | data0.assign(v.data0, internal_data0(k)); | ||
982 | data1.assign(v.data1, internal_data1(k)); | ||
983 | } | ||
984 | |||
985 | // reductions {{{2 | ||
986 | #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \ | ||
987 | private: \ | ||
988 | template <typename ForSfinae = void> \ | ||
989 | Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \ | ||
990 | storage_type0::Size == storage_type1::Size, \ | ||
991 | value_type> name_##_impl() const \ | ||
992 | { \ | ||
993 | return binary_fun_(data0, data1).name_(); \ | ||
994 | } \ | ||
995 | \ | ||
996 | template <typename ForSfinae = void> \ | ||
997 | Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \ | ||
998 | storage_type0::Size != storage_type1::Size, \ | ||
999 | value_type> name_##_impl() const \ | ||
1000 | { \ | ||
1001 | return scalar_fun_(data0.name_(), data1.name_()); \ | ||
1002 | } \ | ||
1003 | \ | ||
1004 | public: \ | ||
1005 | /**\copybrief Vector::##name_ */ \ | ||
1006 | Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \ | ||
1007 | /**\copybrief Vector::##name_ */ \ | ||
1008 | Vc_INTRINSIC value_type name_(const mask_type &mask) const \ | ||
1009 | { \ | ||
1010 | if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \ | ||
1011 | return data1.name_(Split::hi(mask)); \ | ||
1012 | } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \ | ||
1013 | return data0.name_(Split::lo(mask)); \ | ||
1014 | } else { \ | ||
1015 | return scalar_fun_(data0.name_(Split::lo(mask)), \ | ||
1016 | data1.name_(Split::hi(mask))); \ | ||
1017 | } \ | ||
1018 | } \ | ||
1019 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
1020 | Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min); | ||
1021 | Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max); | ||
1022 | Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_); | ||
1023 | Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_); | ||
1024 | #undef Vc_REDUCTION_FUNCTION_ | ||
1025 | ///\copybrief Vector::partialSum | ||
1026 | Vc_INTRINSIC Vc_PURE SimdArray partialSum() const //{{{2 | ||
1027 | { | ||
1028 | auto ps0 = data0.partialSum(); | ||
1029 | auto tmp = data1; | ||
1030 | tmp[0] += ps0[data0.size() - 1]; | ||
1031 | return {std::move(ps0), tmp.partialSum()}; | ||
1032 | } | ||
1033 | |||
1034 | // apply {{{2 | ||
1035 | ///\copybrief Vector::apply(F &&) const | ||
1036 | template <typename F> inline SimdArray apply(F &&f) const | ||
1037 | { | ||
1038 | return {data0.apply(f), data1.apply(f)}; | ||
1039 | } | ||
1040 | ///\copybrief Vector::apply(F &&, MaskType) const | ||
1041 | template <typename F> inline SimdArray apply(F &&f, const mask_type &k) const | ||
1042 | { | ||
1043 | return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))}; | ||
1044 | } | ||
1045 | |||
1046 | // shifted {{{2 | ||
1047 | ///\copybrief Vector::shifted(int) const | ||
1048 | inline SimdArray shifted(int amount) const | ||
1049 | { | ||
1050 | constexpr int SSize = Size; | ||
1051 | constexpr int SSize0 = storage_type0::Size; | ||
1052 | constexpr int SSize1 = storage_type1::Size; | ||
1053 | if (amount == 0) { | ||
1054 | return *this; | ||
1055 | } | ||
1056 | if (amount < 0) { | ||
1057 | if (amount > -SSize0) { | ||
1058 | return {data0.shifted(amount), data1.shifted(amount, data0)}; | ||
1059 | } | ||
1060 | if (amount == -SSize0) { | ||
1061 | return {storage_type0::Zero(), simd_cast<storage_type1>(data0)}; | ||
1062 | } | ||
1063 | if (amount < -SSize0) { | ||
1064 | return {storage_type0::Zero(), simd_cast<storage_type1>(data0.shifted( | ||
1065 | amount + SSize0))}; | ||
1066 | } | ||
1067 | return Zero(); | ||
1068 | } else { | ||
1069 | if (amount >= SSize) { | ||
1070 | return Zero(); | ||
1071 | } else if (amount >= SSize0) { | ||
1072 | return { | ||
1073 | simd_cast<storage_type0>(data1).shifted(amount - SSize0), | ||
1074 | storage_type1::Zero()}; | ||
1075 | } else if (amount >= SSize1) { | ||
1076 | return {data0.shifted(amount, data1), storage_type1::Zero()}; | ||
1077 | } else { | ||
1078 | return {data0.shifted(amount, data1), data1.shifted(amount)}; | ||
1079 | } | ||
1080 | } | ||
1081 | } | ||
1082 | |||
1083 | template <std::size_t NN> | ||
1084 | inline enable_if< | ||
1085 | !(std::is_same<storage_type0, storage_type1>::value && // not bisectable | ||
1086 | N == NN), | ||
1087 | SimdArray> | ||
1088 | shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const | ||
1089 | { | ||
1090 | constexpr int SSize = Size; | ||
1091 | if (amount < 0) { | ||
1092 | return SimdArray::generate([&](int i) -> value_type { | ||
1093 | i += amount; | ||
1094 | if (i >= 0) { | ||
1095 | return operator[](i); | ||
1096 | } else if (i >= -SSize) { | ||
1097 | return shiftIn[i + SSize]; | ||
1098 | } | ||
1099 | return 0; | ||
1100 | }); | ||
1101 | } | ||
1102 | return SimdArray::generate([&](int i) -> value_type { | ||
1103 | i += amount; | ||
1104 | if (i < SSize) { | ||
1105 | return operator[](i); | ||
1106 | } else if (i < 2 * SSize) { | ||
1107 | return shiftIn[i - SSize]; | ||
1108 | } | ||
1109 | return 0; | ||
1110 | }); | ||
1111 | } | ||
1112 | |||
1113 | private: | ||
1114 | // workaround for MSVC not understanding the simpler and shorter expression of the boolean | ||
1115 | // expression directly in the enable_if below | ||
1116 | template <std::size_t NN> struct bisectable_shift | ||
1117 | : public std::integral_constant<bool, | ||
1118 | std::is_same<storage_type0, storage_type1>::value && // bisectable | ||
1119 | N == NN> | ||
1120 | { | ||
1121 | }; | ||
1122 | |||
1123 | public: | ||
1124 | template <std::size_t NN> | ||
1125 | inline SimdArray shifted(enable_if<bisectable_shift<NN>::value, int> amount, | ||
1126 | const SimdArray<value_type, NN> &shiftIn) const | ||
1127 | { | ||
1128 | constexpr int SSize = Size; | ||
1129 | if (amount < 0) { | ||
1130 | if (amount > -static_cast<int>(storage_type0::Size)) { | ||
1131 | return {data0.shifted(amount, internal_data1(shiftIn)), | ||
1132 | data1.shifted(amount, data0)}; | ||
1133 | } | ||
1134 | if (amount == -static_cast<int>(storage_type0::Size)) { | ||
1135 | return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)}; | ||
1136 | } | ||
1137 | if (amount > -SSize) { | ||
1138 | return { | ||
1139 | internal_data1(shiftIn) | ||
1140 | .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)), | ||
1141 | data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))}; | ||
1142 | } | ||
1143 | if (amount == -SSize) { | ||
1144 | return shiftIn; | ||
1145 | } | ||
1146 | if (amount > -2 * SSize) { | ||
1147 | return shiftIn.shifted(amount + SSize); | ||
1148 | } | ||
1149 | } | ||
1150 | if (amount == 0) { | ||
1151 | return *this; | ||
1152 | } | ||
1153 | if (amount < static_cast<int>(storage_type0::Size)) { | ||
1154 | return {data0.shifted(amount, data1), | ||
1155 | data1.shifted(amount, internal_data0(shiftIn))}; | ||
1156 | } | ||
1157 | if (amount == static_cast<int>(storage_type0::Size)) { | ||
1158 | return {storage_type0(data1), storage_type1(internal_data0(shiftIn))}; | ||
1159 | } | ||
1160 | if (amount < SSize) { | ||
1161 | return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)), | ||
1162 | internal_data0(shiftIn) | ||
1163 | .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))}; | ||
1164 | } | ||
1165 | if (amount == SSize) { | ||
1166 | return shiftIn; | ||
1167 | } | ||
1168 | if (amount < 2 * SSize) { | ||
1169 | return shiftIn.shifted(amount - SSize); | ||
1170 | } | ||
1171 | return Zero(); | ||
1172 | } | ||
1173 | |||
1174 | // rotated {{{2 | ||
1175 | ///\copybrief Vector::rotated | ||
1176 | Vc_INTRINSIC SimdArray rotated(int amount) const | ||
1177 | { | ||
1178 | amount %= int(size()); | ||
1179 | if (amount == 0) { | ||
1180 | return *this; | ||
1181 | } else if (amount < 0) { | ||
1182 | amount += size(); | ||
1183 | } | ||
1184 | |||
1185 | #ifdef Vc_MSVC | ||
1186 | // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store | ||
1187 | // -> | ||
1188 | // load to implement the function instead. | ||
1189 | alignas(MemoryAlignment) T tmp[N + data0.size()]; | ||
1190 | data0.store(&tmp[0], Vc::Aligned); | ||
1191 | data1.store(&tmp[data0.size()], Vc::Aligned); | ||
1192 | data0.store(&tmp[N], Vc::Unaligned); | ||
1193 | SimdArray r; | ||
1194 | r.data0.load(&tmp[amount], Vc::Unaligned); | ||
1195 | r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned); | ||
1196 | return r; | ||
1197 | #else | ||
1198 | auto &&d0cvtd = simd_cast<storage_type1>(data0); | ||
1199 | auto &&d1cvtd = simd_cast<storage_type0>(data1); | ||
1200 | constexpr int size0 = storage_type0::size(); | ||
1201 | constexpr int size1 = storage_type1::size(); | ||
1202 | |||
1203 | if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) { | ||
1204 | return {std::move(d1cvtd), std::move(d0cvtd)}; | ||
1205 | } else if (amount < size1) { | ||
1206 | return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)}; | ||
1207 | } else if (amount == size1) { | ||
1208 | return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)}; | ||
1209 | } else if (int(size()) - amount < size1) { | ||
1210 | return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)), | ||
1211 | data1.shifted(amount - int(size()), data0.shifted(size0 - size1))}; | ||
1212 | } else if (int(size()) - amount == size1) { | ||
1213 | return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)), | ||
1214 | simd_cast<storage_type1>(data0.shifted(size0 - size1))}; | ||
1215 | } else if (amount <= size0) { | ||
1216 | return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0), | ||
1217 | simd_cast<storage_type1>(data0.shifted(amount - size1))}; | ||
1218 | } else { | ||
1219 | return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0), | ||
1220 | simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))}; | ||
1221 | } | ||
1222 | return *this; | ||
1223 | #endif | ||
1224 | } | ||
1225 | |||
1226 | // interleaveLow/-High {{{2 | ||
1227 | ///\internal \copydoc Vector::interleaveLow | ||
1228 | Vc_INTRINSIC SimdArray interleaveLow(const SimdArray &x) const | ||
1229 | { | ||
1230 | // return data0[0], x.data0[0], data0[1], x.data0[1], ... | ||
1231 | return {data0.interleaveLow(x.data0), | ||
1232 | simd_cast<storage_type1>(data0.interleaveHigh(x.data0))}; | ||
1233 | } | ||
1234 | ///\internal \copydoc Vector::interleaveHigh | ||
1235 | Vc_INTRINSIC SimdArray interleaveHigh(const SimdArray &x) const | ||
1236 | { | ||
1237 | return interleaveHighImpl( | ||
1238 | x, | ||
1239 | std::integral_constant<bool, storage_type0::Size == storage_type1::Size>()); | ||
1240 | } | ||
1241 | |||
1242 | private: | ||
1243 | ///\internal | ||
1244 | Vc_INTRINSIC SimdArray interleaveHighImpl(const SimdArray &x, std::true_type) const | ||
1245 | { | ||
1246 | return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)}; | ||
1247 | } | ||
1248 | ///\internal | ||
1249 | inline SimdArray interleaveHighImpl(const SimdArray &x, std::false_type) const | ||
1250 | { | ||
1251 | return {data0.interleaveHigh(x.data0) | ||
1252 | .shifted(storage_type1::Size, | ||
1253 | simd_cast<storage_type0>(data1.interleaveLow(x.data1))), | ||
1254 | data1.interleaveHigh(x.data1)}; | ||
1255 | } | ||
1256 | |||
1257 | public: | ||
1258 | ///\copybrief Vector::reversed | ||
1259 | inline SimdArray reversed() const //{{{2 | ||
1260 | { | ||
1261 | if (std::is_same<storage_type0, storage_type1>::value) { | ||
1262 | return {simd_cast<storage_type0>(data1).reversed(), | ||
1263 | simd_cast<storage_type1>(data0).reversed()}; | ||
1264 | } else { | ||
1265 | #ifdef Vc_MSVC | ||
1266 | // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use | ||
1267 | // store | ||
1268 | // -> load to implement the function instead. | ||
1269 | alignas(MemoryAlignment) T tmp[N]; | ||
1270 | data1.reversed().store(&tmp[0], Vc::Aligned); | ||
1271 | data0.reversed().store(&tmp[data1.size()], Vc::Unaligned); | ||
1272 | return SimdArray{&tmp[0], Vc::Aligned}; | ||
1273 | #else | ||
1274 | return {data0.shifted(storage_type1::Size, data1).reversed(), | ||
1275 | simd_cast<storage_type1>(data0.reversed().shifted( | ||
1276 | storage_type0::Size - storage_type1::Size))}; | ||
1277 | #endif | ||
1278 | } | ||
1279 | } | ||
1280 | ///\copydoc Vector::sorted | ||
1281 | inline SimdArray sorted() const //{{{2 | ||
1282 | { | ||
1283 | return sortedImpl( | ||
1284 | std::integral_constant<bool, storage_type0::Size == storage_type1::Size>()); | ||
1285 | } | ||
1286 | |||
1287 | ///\internal | ||
1288 | Vc_INTRINSIC SimdArray sortedImpl(std::true_type) const | ||
1289 | { | ||
1290 | #ifdef Vc_DEBUG_SORTED | ||
1291 | std::cerr << "-- " << data0 << data1 << '\n'; | ||
1292 | #endif | ||
1293 | const auto a = data0.sorted(); | ||
1294 | const auto b = data1.sorted().reversed(); | ||
1295 | const auto lo = Vc::min(a, b); | ||
1296 | const auto hi = Vc::max(a, b); | ||
1297 | return {lo.sorted(), hi.sorted()}; | ||
1298 | } | ||
1299 | |||
1300 | ///\internal | ||
1301 | Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const | ||
1302 | { | ||
1303 | using SortableArray = | ||
1304 | SimdArray<value_type, Common::NextPowerOfTwo<size()>::value>; | ||
1305 | auto sortable = simd_cast<SortableArray>(*this); | ||
1306 | for (std::size_t i = Size; i < SortableArray::Size; ++i) { | ||
1307 | using limits = std::numeric_limits<value_type>; | ||
1308 | if (limits::has_infinity) { | ||
1309 | sortable[i] = limits::infinity(); | ||
1310 | } else { | ||
1311 | sortable[i] = std::numeric_limits<value_type>::max(); | ||
1312 | } | ||
1313 | } | ||
1314 | return simd_cast<SimdArray>(sortable.sorted()); | ||
1315 | |||
1316 | /* The following implementation appears to be less efficient. But this may need further | ||
1317 | * work. | ||
1318 | const auto a = data0.sorted(); | ||
1319 | const auto b = data1.sorted(); | ||
1320 | #ifdef Vc_DEBUG_SORTED | ||
1321 | std::cerr << "== " << a << b << '\n'; | ||
1322 | #endif | ||
1323 | auto aIt = Vc::begin(a); | ||
1324 | auto bIt = Vc::begin(b); | ||
1325 | const auto aEnd = Vc::end(a); | ||
1326 | const auto bEnd = Vc::end(b); | ||
1327 | return SimdArray::generate([&](std::size_t) { | ||
1328 | if (aIt == aEnd) { | ||
1329 | return *(bIt++); | ||
1330 | } | ||
1331 | if (bIt == bEnd) { | ||
1332 | return *(aIt++); | ||
1333 | } | ||
1334 | if (*aIt < *bIt) { | ||
1335 | return *(aIt++); | ||
1336 | } else { | ||
1337 | return *(bIt++); | ||
1338 | } | ||
1339 | }); | ||
1340 | */ | ||
1341 | } | ||
1342 | |||
1343 | /// \name Deprecated Members | ||
1344 | ///@{ | ||
1345 | |||
1346 | ///\copydoc size | ||
1347 | ///\deprecated Use size() instead. | ||
1348 | static constexpr std::size_t Size = size(); | ||
1349 | |||
1350 | /// \copydoc Vector::exponent | ||
1351 | Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const | ||
1352 | { | ||
1353 | return {exponent(data0), exponent(data1)}; | ||
1354 | } | ||
1355 | |||
1356 | /// \copydoc Vector::isNegative | ||
1357 | Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const | ||
1358 | { | ||
1359 | return {isnegative(data0), isnegative(data1)}; | ||
1360 | } | ||
1361 | |||
1362 | ///\copydoc Vector::copySign | ||
1363 | Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray | ||
1364 | copySign(const SimdArray &reference) const | ||
1365 | { | ||
1366 | return {Vc::copysign(data0, reference.data0), | ||
1367 | Vc::copysign(data1, reference.data1)}; | ||
1368 | } | ||
1369 | ///@} | ||
1370 | |||
1371 | // internal_data0/1 {{{2 | ||
1372 | friend storage_type0 &internal_data0<>(SimdArray &x); | ||
1373 | friend storage_type1 &internal_data1<>(SimdArray &x); | ||
1374 | friend const storage_type0 &internal_data0<>(const SimdArray &x); | ||
1375 | friend const storage_type1 &internal_data1<>(const SimdArray &x); | ||
1376 | |||
1377 | /// \internal | ||
1378 | Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2 | ||
1379 | : data0(std::move(x)), data1(std::move(y)) | ||
1380 | { | ||
1381 | } | ||
1382 | |||
1383 | Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0)); | ||
1384 | |||
1385 | private: //{{{2 | ||
1386 | // The alignas attribute attached to the class declaration above is ignored by ICC | ||
1387 | // 17.0.0 (at least). So just move the alignas attribute down here where it works for | ||
1388 | // all compilers. | ||
1389 | alignas(static_cast<std::size_t>( | ||
1390 | Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) / | ||
1391 | V::size()>::value)) storage_type0 data0; | ||
1392 | storage_type1 data1; | ||
1393 | }; | ||
1394 | #undef Vc_CURRENT_CLASS_NAME | ||
1395 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
1396 | constexpr std::size_t SimdArray<T, N, V, M>::Size; | ||
1397 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
1398 | constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment; | ||
1399 | |||
1400 | // gatherImplementation {{{2 | ||
1401 | template <typename T, std::size_t N, typename VectorType, std::size_t M> | ||
1402 | template <typename MT, typename IT> | ||
1403 | inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem, | ||
1404 | IT &&indexes) | ||
1405 | { | ||
1406 | data0.gather(mem, Split::lo(Common::Operations::gather(), | ||
1407 | indexes)); // don't forward indexes - it could move and | ||
1408 | // thus break the next line | ||
1409 | data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes))); | ||
1410 | } | ||
1411 | template <typename T, std::size_t N, typename VectorType, std::size_t M> | ||
1412 | template <typename MT, typename IT> | ||
1413 | inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem, | ||
1414 | IT &&indexes, MaskArgument mask) | ||
1415 | { | ||
1416 | data0.gather(mem, Split::lo(Common::Operations::gather(), indexes), | ||
1417 | Split::lo(mask)); // don't forward indexes - it could move and | ||
1418 | // thus break the next line | ||
1419 | data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)), | ||
1420 | Split::hi(mask)); | ||
1421 | } | ||
1422 | |||
1423 | // scatterImplementation {{{2 | ||
1424 | template <typename T, std::size_t N, typename VectorType, std::size_t M> | ||
1425 | template <typename MT, typename IT> | ||
1426 | inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem, | ||
1427 | IT &&indexes) const | ||
1428 | { | ||
1429 | data0.scatter(mem, Split::lo(Common::Operations::gather(), | ||
1430 | indexes)); // don't forward indexes - it could move and | ||
1431 | // thus break the next line | ||
1432 | data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes))); | ||
1433 | } | ||
1434 | template <typename T, std::size_t N, typename VectorType, std::size_t M> | ||
1435 | template <typename MT, typename IT> | ||
1436 | inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem, | ||
1437 | IT &&indexes, MaskArgument mask) const | ||
1438 | { | ||
1439 | data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes), | ||
1440 | Split::lo(mask)); // don't forward indexes - it could move and | ||
1441 | // thus break the next line | ||
1442 | data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)), | ||
1443 | Split::hi(mask)); | ||
1444 | } | ||
1445 | |||
1446 | // internal_data0/1 (SimdArray) {{{1 | ||
1447 | ///\internal Returns the first data member of a generic SimdArray | ||
1448 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
1449 | #ifndef Vc_MSVC | ||
1450 | Vc_INTRINSIC | ||
1451 | #endif | ||
1452 | typename SimdArrayTraits<T, N>::storage_type0 &internal_data0( | ||
1453 | SimdArray<T, N, V, M> &x) | ||
1454 | { | ||
1455 | return x.data0; | ||
1456 | } | ||
1457 | ///\internal Returns the second data member of a generic SimdArray | ||
1458 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
1459 | #ifndef Vc_MSVC | ||
1460 | Vc_INTRINSIC | ||
1461 | #endif | ||
1462 | typename SimdArrayTraits<T, N>::storage_type1 &internal_data1( | ||
1463 | SimdArray<T, N, V, M> &x) | ||
1464 | { | ||
1465 | return x.data1; | ||
1466 | } | ||
1467 | ///\internal Returns the first data member of a generic SimdArray (const overload) | ||
1468 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
1469 | #ifndef Vc_MSVC | ||
1470 | Vc_INTRINSIC | ||
1471 | #endif | ||
1472 | const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0( | ||
1473 | const SimdArray<T, N, V, M> &x) | ||
1474 | { | ||
1475 | return x.data0; | ||
1476 | } | ||
1477 | ///\internal Returns the second data member of a generic SimdArray (const overload) | ||
1478 | template <typename T, std::size_t N, typename V, std::size_t M> | ||
1479 | #ifndef Vc_MSVC | ||
1480 | Vc_INTRINSIC | ||
1481 | #endif | ||
1482 | const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1( | ||
1483 | const SimdArray<T, N, V, M> &x) | ||
1484 | { | ||
1485 | return x.data1; | ||
1486 | } | ||
1487 | |||
1488 | // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1 | ||
1489 | // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y | ||
1490 | // in the body the bug is supressed. | ||
1491 | #if defined Vc_MSVC && defined Vc_IMPL_SSE | ||
1492 | template <> | ||
1493 | Vc_INTRINSIC SimdArray<double, 8, SSE::Vector<double>, 2>::SimdArray( | ||
1494 | SimdArray<double, 4> &&x, SimdArray<double, 4> &&y) | ||
1495 | : data0(x), data1(0) | ||
1496 | { | ||
1497 | data1 = y; | ||
1498 | } | ||
1499 | #endif | ||
1500 | |||
1501 | // binary operators {{{1 | ||
1502 | namespace result_vector_type_internal | ||
1503 | { | ||
1504 | template <typename T> | ||
1505 | using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type; | ||
1506 | |||
1507 | template <typename T> | ||
1508 | using is_integer_larger_than_int = std::integral_constant< | ||
1509 | bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) || | ||
1510 | std::is_same<T, long>::value || | ||
1511 | std::is_same<T, unsigned long>::value)>; | ||
1512 | |||
1513 | template < | ||
1514 | typename L, typename R, | ||
1515 | std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value | ||
1516 | : Traits::simd_vector_size<R>::value, | ||
1517 | bool = | ||
1518 | (Traits::isSimdArray<L>::value || | ||
1519 | Traits::isSimdArray<R>::value) // one of the operands must be a SimdArray | ||
1520 | && !std::is_same<type<L>, type<R>>::value // if the operands are of the same type | ||
1521 | // use the member function | ||
1522 | && | ||
1523 | ((std::is_arithmetic<type<L>>::value && | ||
1524 | !is_integer_larger_than_int<type<L>>::value) || | ||
1525 | (std::is_arithmetic<type<R>>::value && | ||
1526 | !is_integer_larger_than_int<type<R>>::value) // one of the operands is a scalar | ||
1527 | // type | ||
1528 | || | ||
1529 | ( // or one of the operands is Vector<T> with Vector<T>::size() == | ||
1530 | // SimdArray::size() | ||
1531 | Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value && | ||
1532 | ((Traits::is_simd_vector<L>::value && !Traits::isSimdArray<L>::value) || | ||
1533 | (Traits::is_simd_vector<R>::value && !Traits::isSimdArray<R>::value))))> | ||
1534 | struct evaluate; | ||
1535 | |||
1536 | template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true> | ||
1537 | { | ||
1538 | private: | ||
1539 | using LScalar = Traits::entry_type_of<L>; | ||
1540 | using RScalar = Traits::entry_type_of<R>; | ||
1541 | |||
1542 | template <bool B, typename True, typename False> | ||
1543 | using conditional = typename std::conditional<B, True, False>::type; | ||
1544 | |||
1545 | public: | ||
1546 | // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard | ||
1547 | // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than | ||
1548 | // int are promoted to int before any operation). This would imply that SIMD types with integral | ||
1549 | // types smaller than int are more or less useless - and you could use SimdArray<int> from the | ||
1550 | // start. Therefore we special-case those operations where the scalar type of both operands is | ||
1551 | // integral and smaller than int. | ||
1552 | // In addition to that there is no generic support for 64-bit int SIMD types. Therefore | ||
1553 | // promotion to a 64-bit integral type (including `long` because it can potentially have 64 | ||
1554 | // bits) also is not done. But if one of the operands is a scalar type that is larger than int | ||
1555 | // then the operator is disabled altogether. We do not want an implicit demotion. | ||
1556 | using type = SimdArray< | ||
1557 | conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value && | ||
1558 | sizeof(LScalar) < sizeof(int) && | ||
1559 | sizeof(RScalar) < sizeof(int)), | ||
1560 | conditional<(sizeof(LScalar) == sizeof(RScalar)), | ||
1561 | conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>, | ||
1562 | conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>, | ||
1563 | decltype(std::declval<LScalar>() + std::declval<RScalar>())>, | ||
1564 | N>; | ||
1565 | }; | ||
1566 | |||
1567 | } // namespace result_vector_type_internal | ||
1568 | |||
1569 | template <typename L, typename R> | ||
1570 | using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type; | ||
1571 | |||
1572 | static_assert( | ||
1573 | std::is_same<result_vector_type<short int, Vc::SimdArray<short unsigned int, 32ul>>, | ||
1574 | Vc::SimdArray<short unsigned int, 32ul>>::value, | ||
1575 | "result_vector_type does not work"); | ||
1576 | |||
1577 | #define Vc_BINARY_OPERATORS_(op_) \ | ||
1578 | /*!\brief Applies op_ component-wise and concurrently. */ \ | ||
1579 | template <typename L, typename R> \ | ||
1580 | Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \ | ||
1581 | { \ | ||
1582 | using Return = result_vector_type<L, R>; \ | ||
1583 | return Return(std::forward<L>(lhs)) op_ Return(std::forward<R>(rhs)); \ | ||
1584 | } | ||
1585 | /** | ||
1586 | * \name Arithmetic and Bitwise Operators | ||
1587 | * | ||
1588 | * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns | ||
1589 | * a new SimdArray object containing the result values. | ||
1590 | * | ||
1591 | * This operator only participates in overload resolution if: | ||
1592 | * \li At least one of the template parameters \p L or \p R is a SimdArray type. | ||
1593 | * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type | ||
1594 | * larger than \c int \n | ||
1595 | * or \n | ||
1596 | * \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() == | ||
1597 | * SimdArray::size()). | ||
1598 | * | ||
1599 | * The return type of the operator is a SimdArray type using the more precise EntryType of | ||
1600 | * \p L or \p R and the same number of elements as the SimdArray argument(s). | ||
1601 | */ | ||
1602 | ///@{ | ||
1603 | Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_); | ||
1604 | Vc_ALL_BINARY(Vc_BINARY_OPERATORS_); | ||
1605 | ///@} | ||
1606 | #undef Vc_BINARY_OPERATORS_ | ||
1607 | #define Vc_BINARY_OPERATORS_(op_) \ | ||
1608 | /*!\brief Applies op_ component-wise and concurrently. */ \ | ||
1609 | template <typename L, typename R> \ | ||
1610 | Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \ | ||
1611 | R &&rhs) \ | ||
1612 | { \ | ||
1613 | using Promote = result_vector_type<L, R>; \ | ||
1614 | return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \ | ||
1615 | } | ||
1616 | /** | ||
1617 | * \name Compare Operators | ||
1618 | * | ||
1619 | * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns | ||
1620 | * a new SimdMaskArray object containing the result values. | ||
1621 | * | ||
1622 | * This operator only participates in overload resolution if (same rules as above): | ||
1623 | * \li At least one of the template parameters \p L or \p R is a SimdArray type. | ||
1624 | * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type | ||
1625 | * larger than \c int \n | ||
1626 | * or \n | ||
1627 | * \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() == | ||
1628 | * SimdArray::size()). | ||
1629 | * | ||
1630 | * The return type of the operator is a SimdMaskArray type using the more precise EntryType of | ||
1631 | * \p L or \p R and the same number of elements as the SimdArray argument(s). | ||
1632 | */ | ||
1633 | ///@{ | ||
1634 | Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_); | ||
1635 | ///@} | ||
1636 | #undef Vc_BINARY_OPERATORS_ | ||
1637 | |||
1638 | // math functions {{{1 | ||
1639 | #define Vc_FORWARD_UNARY_OPERATOR(name_) \ | ||
1640 | /*!\brief Applies the std::name_ function component-wise and concurrently. */ \ | ||
1641 | template <typename T, std::size_t N, typename V, std::size_t M> \ | ||
1642 | inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \ | ||
1643 | { \ | ||
1644 | return SimdArray<T, N, V, M>::fromOperation( \ | ||
1645 | Common::Operations::Forward_##name_(), x); \ | ||
1646 | } \ | ||
1647 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
1648 | |||
1649 | #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \ | ||
1650 | /*!\brief Applies the std::name_ function component-wise and concurrently. */ \ | ||
1651 | template <typename T, std::size_t N, typename V, std::size_t M> \ | ||
1652 | inline SimdMaskArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \ | ||
1653 | { \ | ||
1654 | return SimdMaskArray<T, N, V, M>::fromOperation( \ | ||
1655 | Common::Operations::Forward_##name_(), x); \ | ||
1656 | } \ | ||
1657 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
1658 | |||
1659 | #define Vc_FORWARD_BINARY_OPERATOR(name_) \ | ||
1660 | /*!\brief Applies the std::name_ function component-wise and concurrently. */ \ | ||
1661 | template <typename T, std::size_t N, typename V, std::size_t M> \ | ||
1662 | inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x, \ | ||
1663 | const SimdArray<T, N, V, M> &y) \ | ||
1664 | { \ | ||
1665 | return SimdArray<T, N, V, M>::fromOperation( \ | ||
1666 | Common::Operations::Forward_##name_(), x, y); \ | ||
1667 | } \ | ||
1668 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
1669 | |||
1670 | /** | ||
1671 | * \name Math functions | ||
1672 | * These functions evaluate the | ||
1673 | */ | ||
1674 | ///@{ | ||
1675 | Vc_FORWARD_UNARY_OPERATOR(abs); | ||
1676 | Vc_FORWARD_UNARY_OPERATOR(asin); | ||
1677 | Vc_FORWARD_UNARY_OPERATOR(atan); | ||
1678 | Vc_FORWARD_BINARY_OPERATOR(atan2); | ||
1679 | Vc_FORWARD_UNARY_OPERATOR(ceil); | ||
1680 | Vc_FORWARD_BINARY_OPERATOR(copysign); | ||
1681 | Vc_FORWARD_UNARY_OPERATOR(cos); | ||
1682 | Vc_FORWARD_UNARY_OPERATOR(exp); | ||
1683 | Vc_FORWARD_UNARY_OPERATOR(exponent); | ||
1684 | Vc_FORWARD_UNARY_OPERATOR(floor); | ||
1685 | /// Applies the std::fma function component-wise and concurrently. | ||
1686 | template <typename T, std::size_t N> | ||
1687 | inline SimdArray<T, N> fma(const SimdArray<T, N> &a, const SimdArray<T, N> &b, | ||
1688 | const SimdArray<T, N> &c) | ||
1689 | { | ||
1690 | return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c); | ||
1691 | } | ||
1692 | Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite); | ||
1693 | Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf); | ||
1694 | Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan); | ||
1695 | #if defined Vc_MSVC && defined Vc_IMPL_SSE | ||
1696 | inline SimdMaskArray<double, 8, SSE::Vector<double>, 2> isnan( | ||
1697 | const SimdArray<double, 8, SSE::Vector<double>, 2> &x) | ||
1698 | { | ||
1699 | using V = SSE::Vector<double>; | ||
1700 | const SimdArray<double, 4, V, 2> &x0 = internal_data0(x); | ||
1701 | const SimdArray<double, 4, V, 2> &x1 = internal_data1(x); | ||
1702 | SimdMaskArray<double, 4, V, 2> r0; | ||
1703 | SimdMaskArray<double, 4, V, 2> r1; | ||
1704 | internal_data(internal_data0(r0)) = isnan(internal_data(internal_data0(x0))); | ||
1705 | internal_data(internal_data1(r0)) = isnan(internal_data(internal_data1(x0))); | ||
1706 | internal_data(internal_data0(r1)) = isnan(internal_data(internal_data0(x1))); | ||
1707 | internal_data(internal_data1(r1)) = isnan(internal_data(internal_data1(x1))); | ||
1708 | return {std::move(r0), std::move(r1)}; | ||
1709 | } | ||
1710 | #endif | ||
1711 | Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative); | ||
1712 | /// Applies the std::frexp function component-wise and concurrently. | ||
1713 | template <typename T, std::size_t N> | ||
1714 | inline SimdArray<T, N> frexp(const SimdArray<T, N> &x, SimdArray<int, N> *e) | ||
1715 | { | ||
1716 | return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e); | ||
1717 | } | ||
1718 | /// Applies the std::ldexp function component-wise and concurrently. | ||
1719 | template <typename T, std::size_t N> | ||
1720 | inline SimdArray<T, N> ldexp(const SimdArray<T, N> &x, const SimdArray<int, N> &e) | ||
1721 | { | ||
1722 | return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e); | ||
1723 | } | ||
1724 | Vc_FORWARD_UNARY_OPERATOR(log); | ||
1725 | Vc_FORWARD_UNARY_OPERATOR(log10); | ||
1726 | Vc_FORWARD_UNARY_OPERATOR(log2); | ||
1727 | Vc_FORWARD_UNARY_OPERATOR(reciprocal); | ||
1728 | Vc_FORWARD_UNARY_OPERATOR(round); | ||
1729 | Vc_FORWARD_UNARY_OPERATOR(rsqrt); | ||
1730 | Vc_FORWARD_UNARY_OPERATOR(sin); | ||
1731 | /// Determines sine and cosine concurrently and component-wise on \p x. | ||
1732 | template <typename T, std::size_t N> | ||
1733 | void sincos(const SimdArray<T, N> &x, SimdArray<T, N> *sin, SimdArray<T, N> *cos) | ||
1734 | { | ||
1735 | SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos); | ||
1736 | } | ||
1737 | Vc_FORWARD_UNARY_OPERATOR(sqrt); | ||
1738 | Vc_FORWARD_UNARY_OPERATOR(trunc); | ||
1739 | Vc_FORWARD_BINARY_OPERATOR(min); | ||
1740 | Vc_FORWARD_BINARY_OPERATOR(max); | ||
1741 | ///@} | ||
1742 | #undef Vc_FORWARD_UNARY_OPERATOR | ||
1743 | #undef Vc_FORWARD_UNARY_BOOL_OPERATOR | ||
1744 | #undef Vc_FORWARD_BINARY_OPERATOR | ||
1745 | |||
1746 | // simd_cast {{{1 | ||
1747 | #ifdef Vc_MSVC | ||
1748 | #define Vc_DUMMY_ARG0 , int = 0 | ||
1749 | #define Vc_DUMMY_ARG1 , long = 0 | ||
1750 | #define Vc_DUMMY_ARG2 , short = 0 | ||
1751 | #define Vc_DUMMY_ARG3 , char = '0' | ||
1752 | #define Vc_DUMMY_ARG4 , unsigned = 0u | ||
1753 | #define Vc_DUMMY_ARG5 , unsigned short = 0u | ||
1754 | #else | ||
1755 | #define Vc_DUMMY_ARG0 | ||
1756 | #define Vc_DUMMY_ARG1 | ||
1757 | #define Vc_DUMMY_ARG2 | ||
1758 | #define Vc_DUMMY_ARG3 | ||
1759 | #define Vc_DUMMY_ARG4 | ||
1760 | #define Vc_DUMMY_ARG5 | ||
1761 | #endif // Vc_MSVC | ||
1762 | |||
1763 | // simd_cast_impl_smaller_input {{{2 | ||
1764 | // The following function can be implemented without the sizeof...(From) overload. | ||
1765 | // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the | ||
1766 | // function in two works around the issue. | ||
1767 | template <typename Return, std::size_t N, typename T, typename... From> | ||
1768 | Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> | ||
1769 | simd_cast_impl_smaller_input(const From &... xs, const T &last) | ||
1770 | { | ||
1771 | Return r = simd_cast<Return>(xs...); | ||
1772 | for (size_t i = 0; i < N; ++i) { | ||
1773 | r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]); | ||
1774 | } | ||
1775 | return r; | ||
1776 | } | ||
1777 | template <typename Return, std::size_t N, typename T> | ||
1778 | Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last) | ||
1779 | { | ||
1780 | Return r = Return(); | ||
1781 | for (size_t i = 0; i < N; ++i) { | ||
1782 | r[i] = static_cast<typename Return::EntryType>(last[i]); | ||
1783 | } | ||
1784 | return r; | ||
1785 | } | ||
1786 | template <typename Return, std::size_t N, typename T, typename... From> | ||
1787 | Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input( | ||
1788 | const From &... xs, const T &last) | ||
1789 | { | ||
1790 | Return r = simd_cast<Return>(xs...); | ||
1791 | for (size_t i = N * sizeof...(From); i < Return::Size; ++i) { | ||
1792 | r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]); | ||
1793 | } | ||
1794 | return r; | ||
1795 | } | ||
1796 | template <typename Return, std::size_t N, typename T> | ||
1797 | Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last) | ||
1798 | { | ||
1799 | Return r = Return(); | ||
1800 | for (size_t i = 0; i < Return::size(); ++i) { | ||
1801 | r[i] = static_cast<typename Return::EntryType>(last[i]); | ||
1802 | } | ||
1803 | return r; | ||
1804 | } | ||
1805 | |||
1806 | // simd_cast_without_last (declaration) {{{2 | ||
1807 | template <typename Return, typename T, typename... From> | ||
1808 | Vc_INTRINSIC_L Vc_CONST_L Return | ||
1809 | simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R; | ||
1810 | |||
1811 | // are_all_types_equal {{{2 | ||
1812 | template <typename... Ts> struct are_all_types_equal; | ||
1813 | template <typename T> | ||
1814 | struct are_all_types_equal<T> : public std::integral_constant<bool, true> | ||
1815 | { | ||
1816 | }; | ||
1817 | template <typename T0, typename T1, typename... Ts> | ||
1818 | struct are_all_types_equal<T0, T1, Ts...> | ||
1819 | : public std::integral_constant< | ||
1820 | bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value> | ||
1821 | { | ||
1822 | }; | ||
1823 | |||
1824 | // simd_cast_interleaved_argument_order (declarations) {{{2 | ||
1825 | /*! \internal | ||
1826 | The need for simd_cast_interleaved_argument_order stems from a shortcoming in pack | ||
1827 | expansion of variadic templates in C++. For a simd_cast with SimdArray arguments that | ||
1828 | are bisectable (i.e. \c storage_type0 and \c storage_type1 are equal) the generic | ||
1829 | implementation needs to forward to a simd_cast of the \c internal_data0 and \c | ||
1830 | internal_data1 of the arguments. But the required order of arguments is | ||
1831 | `internal_data0(arg0), internal_data1(arg0), internal_data0(arg1), ...`. This is | ||
1832 | impossible to achieve with pack expansion. It is only possible to write | ||
1833 | `internal_data0(args)..., internal_data1(args)...` and thus have the argument order | ||
1834 | mixed up. The simd_cast_interleaved_argument_order “simply” calls simd_cast with the | ||
1835 | arguments correctly reordered (i.e. interleaved). | ||
1836 | |||
1837 | The implementation of simd_cast_interleaved_argument_order is done generically, so that | ||
1838 | it supports any number of arguments. The central idea of the implementation is an | ||
1839 | `extract` function which returns one value of an argument pack determined via an index | ||
1840 | passed as template argument. This index is generated via an index_sequence. The | ||
1841 | `extract` function uses two argument packs (of equal size) to easily return values from | ||
1842 | the front and middle of the argument pack (for doing the deinterleave). | ||
1843 | */ | ||
1844 | template <typename Return, typename... Ts> | ||
1845 | Vc_INTRINSIC Vc_CONST Return | ||
1846 | simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b); | ||
1847 | |||
1848 | // simd_cast_with_offset (declarations and one impl) {{{2 | ||
1849 | // offset == 0 {{{3 | ||
1850 | template <typename Return, std::size_t offset, typename From, typename... Froms> | ||
1851 | Vc_INTRINSIC Vc_CONST | ||
1852 | enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return> | ||
1853 | simd_cast_with_offset(const From &x, const Froms &... xs); | ||
1854 | // offset > 0 && offset divisible by Return::Size {{{3 | ||
1855 | template <typename Return, std::size_t offset, typename From> | ||
1856 | Vc_INTRINSIC Vc_CONST | ||
1857 | enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return> | ||
1858 | simd_cast_with_offset(const From &x); | ||
1859 | // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3 | ||
1860 | template <typename Return, std::size_t offset, typename From> | ||
1861 | Vc_INTRINSIC Vc_CONST | ||
1862 | enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && | ||
1863 | ((Traits::isSimdArray<Return>::value && | ||
1864 | !Traits::isAtomicSimdArray<Return>::value) || | ||
1865 | (Traits::isSimdMaskArray<Return>::value && | ||
1866 | !Traits::isAtomicSimdMaskArray<Return>::value))), | ||
1867 | Return> | ||
1868 | simd_cast_with_offset(const From &x); | ||
1869 | // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3 | ||
1870 | template <typename Return, std::size_t offset, typename From> | ||
1871 | Vc_INTRINSIC Vc_CONST | ||
1872 | enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && | ||
1873 | ((Traits::isSimdArray<Return>::value && | ||
1874 | Traits::isAtomicSimdArray<Return>::value) || | ||
1875 | (Traits::isSimdMaskArray<Return>::value && | ||
1876 | Traits::isAtomicSimdMaskArray<Return>::value))), | ||
1877 | Return> | ||
1878 | simd_cast_with_offset(const From &x); | ||
1879 | // offset > first argument (drops first arg) {{{3 | ||
1880 | template <typename Return, std::size_t offset, typename From, typename... Froms> | ||
1881 | Vc_INTRINSIC Vc_CONST enable_if< | ||
1882 | (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return> | ||
1883 | simd_cast_with_offset(const From &, const Froms &... xs) | ||
1884 | { | ||
1885 | return simd_cast_with_offset<Return, offset - From::Size>(xs...); | ||
1886 | } | ||
1887 | |||
1888 | // offset > first and only argument (returns Zero) {{{3 | ||
1889 | template <typename Return, std::size_t offset, typename From> | ||
1890 | Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset( | ||
1891 | const From &) | ||
1892 | { | ||
1893 | return Return::Zero(); | ||
1894 | } | ||
1895 | |||
1896 | // first_type_of {{{2 | ||
1897 | template <typename T, typename... Ts> struct first_type_of_impl | ||
1898 | { | ||
1899 | using type = T; | ||
1900 | }; | ||
1901 | template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type; | ||
1902 | |||
1903 | // simd_cast_drop_arguments (declarations) {{{2 | ||
1904 | template <typename Return, typename From> | ||
1905 | Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x); | ||
1906 | template <typename Return, typename... Froms> | ||
1907 | Vc_INTRINSIC Vc_CONST | ||
1908 | enable_if<(are_all_types_equal<Froms...>::value && | ||
1909 | sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size), | ||
1910 | Return> | ||
1911 | simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x); | ||
1912 | // The following function can be implemented without the sizeof...(From) overload. | ||
1913 | // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the | ||
1914 | // function in two works around the issue. | ||
1915 | template <typename Return, typename From, typename... Froms> | ||
1916 | Vc_INTRINSIC Vc_CONST enable_if< | ||
1917 | (are_all_types_equal<From, Froms...>::value && | ||
1918 | (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0), | ||
1919 | Return> | ||
1920 | simd_cast_drop_arguments(Froms... xs, From x, From); | ||
1921 | template <typename Return, typename From> | ||
1922 | Vc_INTRINSIC Vc_CONST | ||
1923 | enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return> | ||
1924 | simd_cast_drop_arguments(From x, From); | ||
1925 | |||
1926 | namespace | ||
1927 | { | ||
1928 | #ifdef Vc_DEBUG_SIMD_CAST | ||
1929 | void debugDoNothing(const std::initializer_list<void *> &) {} | ||
1930 | template <typename T0, typename... Ts> | ||
1931 | inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0, | ||
1932 | const Ts &... args) | ||
1933 | { | ||
1934 | std::cerr << prefix << arg0; | ||
1935 | debugDoNothing({&(std::cerr << ", " << args)...}); | ||
1936 | std::cerr << suffix; | ||
1937 | } | ||
1938 | #else | ||
1939 | template <typename T0, typename... Ts> | ||
1940 | Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...) | ||
1941 | { | ||
1942 | } | ||
1943 | #endif | ||
1944 | } // unnamed namespace | ||
1945 | |||
1946 | // is_less trait{{{2 | ||
1947 | template <size_t A, size_t B> | ||
1948 | struct is_less : public std::integral_constant<bool, (A < B)> { | ||
1949 | }; | ||
1950 | |||
1951 | // is_power_of_2 trait{{{2 | ||
1952 | template <size_t N> | ||
1953 | struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> { | ||
1954 | }; | ||
1955 | |||
1956 | // simd_cast<T>(xs...) to SimdArray/-mask {{{2 | ||
1957 | #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \ | ||
1958 | template <typename Return, typename T, typename A, typename... Froms> \ | ||
1959 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
1960 | (Traits::isAtomic##SimdArrayType_<Return>::value && \ | ||
1961 | is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \ | ||
1962 | are_all_types_equal<NativeType_<T, A>, Froms...>::value), \ | ||
1963 | Return> \ | ||
1964 | simd_cast(NativeType_<T, A> x, Froms... xs) \ | ||
1965 | { \ | ||
1966 | vc_debug_("simd_cast{1}(", ")\n", x, xs...); \ | ||
1967 | return {simd_cast<typename Return::storage_type>(x, xs...)}; \ | ||
1968 | } \ | ||
1969 | template <typename Return, typename T, typename A, typename... Froms> \ | ||
1970 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
1971 | (Traits::isAtomic##SimdArrayType_<Return>::value && \ | ||
1972 | !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \ | ||
1973 | are_all_types_equal<NativeType_<T, A>, Froms...>::value), \ | ||
1974 | Return> \ | ||
1975 | simd_cast(NativeType_<T, A> x, Froms... xs) \ | ||
1976 | { \ | ||
1977 | vc_debug_("simd_cast{2}(", ")\n", x, xs...); \ | ||
1978 | return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \ | ||
1979 | } \ | ||
1980 | template <typename Return, typename T, typename A, typename... Froms> \ | ||
1981 | Vc_INTRINSIC Vc_CONST \ | ||
1982 | enable_if<(Traits::is##SimdArrayType_<Return>::value && \ | ||
1983 | !Traits::isAtomic##SimdArrayType_<Return>::value && \ | ||
1984 | is_less<Common::left_size<Return::Size>(), \ | ||
1985 | NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \ | ||
1986 | are_all_types_equal<NativeType_<T, A>, Froms...>::value), \ | ||
1987 | Return> \ | ||
1988 | simd_cast(NativeType_<T, A> x, Froms... xs) \ | ||
1989 | { \ | ||
1990 | vc_debug_("simd_cast{3}(", ")\n", x, xs...); \ | ||
1991 | using R0 = typename Return::storage_type0; \ | ||
1992 | using R1 = typename Return::storage_type1; \ | ||
1993 | return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \ | ||
1994 | simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \ | ||
1995 | } \ | ||
1996 | template <typename Return, typename T, typename A, typename... Froms> \ | ||
1997 | Vc_INTRINSIC Vc_CONST \ | ||
1998 | enable_if<(Traits::is##SimdArrayType_<Return>::value && \ | ||
1999 | !Traits::isAtomic##SimdArrayType_<Return>::value && \ | ||
2000 | !is_less<Common::left_size<Return::Size>(), \ | ||
2001 | NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \ | ||
2002 | are_all_types_equal<NativeType_<T, A>, Froms...>::value), \ | ||
2003 | Return> \ | ||
2004 | simd_cast(NativeType_<T, A> x, Froms... xs) \ | ||
2005 | { \ | ||
2006 | vc_debug_("simd_cast{4}(", ")\n", x, xs...); \ | ||
2007 | using R0 = typename Return::storage_type0; \ | ||
2008 | using R1 = typename Return::storage_type1; \ | ||
2009 | return {simd_cast<R0>(x, xs...), R1::Zero()}; \ | ||
2010 | } \ | ||
2011 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
2012 | |||
2013 | Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector); | ||
2014 | Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask); | ||
2015 | #undef Vc_SIMDARRAY_CASTS | ||
2016 | |||
2017 | // simd_cast<SimdArray/-mask, offset>(V) {{{2 | ||
2018 | #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \ | ||
2019 | /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \ | ||
2020 | template <typename Return, int offset, typename T, typename A> \ | ||
2021 | Vc_INTRINSIC Vc_CONST \ | ||
2022 | enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \ | ||
2023 | simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \ | ||
2024 | { \ | ||
2025 | vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \ | ||
2026 | return {simd_cast<typename Return::storage_type, offset>(x)}; \ | ||
2027 | } \ | ||
2028 | /* both halves of Return array are extracted from argument */ \ | ||
2029 | template <typename Return, int offset, typename T, typename A> \ | ||
2030 | Vc_INTRINSIC Vc_CONST \ | ||
2031 | enable_if<(Traits::is##SimdArrayType_<Return>::value && \ | ||
2032 | !Traits::isAtomic##SimdArrayType_<Return>::value && \ | ||
2033 | Return::Size * offset + Common::left_size<Return::Size>() < \ | ||
2034 | NativeType_<T, A>::Size), \ | ||
2035 | Return> \ | ||
2036 | simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \ | ||
2037 | { \ | ||
2038 | vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \ | ||
2039 | using R0 = typename Return::storage_type0; \ | ||
2040 | constexpr int entries_offset = offset * Return::Size; \ | ||
2041 | constexpr int entries_offset_right = entries_offset + R0::Size; \ | ||
2042 | return { \ | ||
2043 | simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \ | ||
2044 | simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \ | ||
2045 | x)}; \ | ||
2046 | } \ | ||
2047 | /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \ | ||
2048 | /* right half of Return array is zero */ \ | ||
2049 | template <typename Return, int offset, typename T, typename A> \ | ||
2050 | Vc_INTRINSIC Vc_CONST \ | ||
2051 | enable_if<(Traits::is##SimdArrayType_<Return>::value && \ | ||
2052 | !Traits::isAtomic##SimdArrayType_<Return>::value && \ | ||
2053 | Return::Size * offset + Common::left_size<Return::Size>() >= \ | ||
2054 | NativeType_<T, A>::Size), \ | ||
2055 | Return> \ | ||
2056 | simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \ | ||
2057 | { \ | ||
2058 | vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \ | ||
2059 | using R0 = typename Return::storage_type0; \ | ||
2060 | using R1 = typename Return::storage_type1; \ | ||
2061 | constexpr int entries_offset = offset * Return::Size; \ | ||
2062 | return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \ | ||
2063 | } \ | ||
2064 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
2065 | |||
2066 | Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector); | ||
2067 | Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask); | ||
2068 | #undef Vc_SIMDARRAY_CASTS | ||
2069 | |||
2070 | // simd_cast<T>(xs...) from SimdArray/-mask {{{2 | ||
2071 | #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \ | ||
2072 | /* indivisible SimdArrayType_ */ \ | ||
2073 | template <typename Return, typename T, std::size_t N, typename V, typename... From> \ | ||
2074 | Vc_INTRINSIC Vc_CONST \ | ||
2075 | enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \ | ||
2076 | (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \ | ||
2077 | !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \ | ||
2078 | Return> \ | ||
2079 | simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \ | ||
2080 | { \ | ||
2081 | vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \ | ||
2082 | return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \ | ||
2083 | } \ | ||
2084 | /* indivisible SimdArrayType_ && can drop arguments from the end */ \ | ||
2085 | template <typename Return, typename T, std::size_t N, typename V, typename... From> \ | ||
2086 | Vc_INTRINSIC Vc_CONST \ | ||
2087 | enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \ | ||
2088 | (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \ | ||
2089 | !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \ | ||
2090 | Return> \ | ||
2091 | simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \ | ||
2092 | { \ | ||
2093 | vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \ | ||
2094 | return simd_cast_without_last<Return, \ | ||
2095 | typename SimdArrayType_<T, N, V, N>::storage_type, \ | ||
2096 | typename From::storage_type...>( \ | ||
2097 | internal_data(x0), internal_data(xs)...); \ | ||
2098 | } \ | ||
2099 | /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \ | ||
2100 | template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \ | ||
2101 | typename... From> \ | ||
2102 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
2103 | (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \ | ||
2104 | !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \ | ||
2105 | is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \ | ||
2106 | Return> \ | ||
2107 | simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \ | ||
2108 | { \ | ||
2109 | vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \ | ||
2110 | return simd_cast_interleaved_argument_order< \ | ||
2111 | Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \ | ||
2112 | typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \ | ||
2113 | internal_data1(x0), internal_data1(xs)...); \ | ||
2114 | } \ | ||
2115 | /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \ | ||
2116 | * input can be dropped */ \ | ||
2117 | template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \ | ||
2118 | typename... From> \ | ||
2119 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
2120 | (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \ | ||
2121 | !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \ | ||
2122 | Return> \ | ||
2123 | simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \ | ||
2124 | { \ | ||
2125 | vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \ | ||
2126 | return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \ | ||
2127 | x0, xs...); \ | ||
2128 | } \ | ||
2129 | /* remaining SimdArrayType_ input never larger (N != 2^n) */ \ | ||
2130 | template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \ | ||
2131 | typename... From> \ | ||
2132 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
2133 | (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \ | ||
2134 | N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \ | ||
2135 | Return> \ | ||
2136 | simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \ | ||
2137 | { \ | ||
2138 | vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \ | ||
2139 | return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \ | ||
2140 | From...>(x0, xs...); \ | ||
2141 | } \ | ||
2142 | /* remaining SimdArrayType_ input larger (N != 2^n) */ \ | ||
2143 | template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \ | ||
2144 | typename... From> \ | ||
2145 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
2146 | (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \ | ||
2147 | N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \ | ||
2148 | Return> \ | ||
2149 | simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \ | ||
2150 | { \ | ||
2151 | vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \ | ||
2152 | return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \ | ||
2153 | From...>(x0, xs...); \ | ||
2154 | } \ | ||
2155 | /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \ | ||
2156 | template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \ | ||
2157 | Vc_INTRINSIC Vc_CONST \ | ||
2158 | enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \ | ||
2159 | simd_cast(const SimdArrayType_<T, N, V, M> &x) \ | ||
2160 | { \ | ||
2161 | vc_debug_("simd_cast{single bisectable}(", ")\n", x); \ | ||
2162 | return simd_cast<Return>(internal_data0(x)); \ | ||
2163 | } \ | ||
2164 | template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \ | ||
2165 | Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \ | ||
2166 | N < 2 * Return::Size && is_power_of_2<N>::value), \ | ||
2167 | Return> \ | ||
2168 | simd_cast(const SimdArrayType_<T, N, V, M> &x) \ | ||
2169 | { \ | ||
2170 | vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \ | ||
2171 | return simd_cast<Return>(internal_data0(x), internal_data1(x)); \ | ||
2172 | } \ | ||
2173 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
2174 | |||
2175 | Vc_SIMDARRAY_CASTS(SimdArray); | ||
2176 | Vc_SIMDARRAY_CASTS(SimdMaskArray); | ||
2177 | #undef Vc_SIMDARRAY_CASTS | ||
2178 | |||
2179 | // simd_cast<T, offset>(SimdArray/-mask) {{{2 | ||
2180 | #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \ | ||
2181 | /* offset == 0 is like without offset */ \ | ||
2182 | template <typename Return, int offset, typename T, std::size_t N, typename V, \ | ||
2183 | std::size_t M> \ | ||
2184 | Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \ | ||
2185 | const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \ | ||
2186 | { \ | ||
2187 | vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \ | ||
2188 | return simd_cast<Return>(x); \ | ||
2189 | } \ | ||
2190 | /* forward to V */ \ | ||
2191 | template <typename Return, int offset, typename T, std::size_t N, typename V> \ | ||
2192 | Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \ | ||
2193 | const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \ | ||
2194 | { \ | ||
2195 | vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \ | ||
2196 | return simd_cast<Return, offset>(internal_data(x)); \ | ||
2197 | } \ | ||
2198 | /* convert from right member of SimdArray */ \ | ||
2199 | template <typename Return, int offset, typename T, std::size_t N, typename V, \ | ||
2200 | std::size_t M> \ | ||
2201 | Vc_INTRINSIC Vc_CONST \ | ||
2202 | enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \ | ||
2203 | offset != 0 && Common::left_size<N>() % Return::Size == 0), \ | ||
2204 | Return> \ | ||
2205 | simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \ | ||
2206 | { \ | ||
2207 | vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \ | ||
2208 | return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \ | ||
2209 | internal_data1(x)); \ | ||
2210 | } \ | ||
2211 | /* same as above except for odd cases where offset * Return::Size doesn't fit the \ | ||
2212 | * left side of the SimdArray */ \ | ||
2213 | template <typename Return, int offset, typename T, std::size_t N, typename V, \ | ||
2214 | std::size_t M> \ | ||
2215 | Vc_INTRINSIC Vc_CONST \ | ||
2216 | enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \ | ||
2217 | offset != 0 && Common::left_size<N>() % Return::Size != 0), \ | ||
2218 | Return> \ | ||
2219 | simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \ | ||
2220 | { \ | ||
2221 | vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \ | ||
2222 | return simd_cast_with_offset<Return, \ | ||
2223 | offset * Return::Size - Common::left_size<N>()>( \ | ||
2224 | internal_data1(x)); \ | ||
2225 | } \ | ||
2226 | /* convert from left member of SimdArray */ \ | ||
2227 | template <typename Return, int offset, typename T, std::size_t N, typename V, \ | ||
2228 | std::size_t M> \ | ||
2229 | Vc_INTRINSIC Vc_CONST enable_if< \ | ||
2230 | (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \ | ||
2231 | offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \ | ||
2232 | Return> \ | ||
2233 | simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \ | ||
2234 | { \ | ||
2235 | vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \ | ||
2236 | return simd_cast<Return, offset>(internal_data0(x)); \ | ||
2237 | } \ | ||
2238 | /* fallback to copying scalars */ \ | ||
2239 | template <typename Return, int offset, typename T, std::size_t N, typename V, \ | ||
2240 | std::size_t M> \ | ||
2241 | Vc_INTRINSIC Vc_CONST \ | ||
2242 | enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \ | ||
2243 | offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \ | ||
2244 | Return> \ | ||
2245 | simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \ | ||
2246 | { \ | ||
2247 | vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \ | ||
2248 | using R = typename Return::EntryType; \ | ||
2249 | Return r = Return::Zero(); \ | ||
2250 | for (std::size_t i = offset * Return::Size; \ | ||
2251 | i < std::min(N, (offset + 1) * Return::Size); ++i) { \ | ||
2252 | r[i - offset * Return::Size] = static_cast<R>(x[i]); \ | ||
2253 | } \ | ||
2254 | return r; \ | ||
2255 | } \ | ||
2256 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
2257 | Vc_SIMDARRAY_CASTS(SimdArray); | ||
2258 | Vc_SIMDARRAY_CASTS(SimdMaskArray); | ||
2259 | #undef Vc_SIMDARRAY_CASTS | ||
2260 | // simd_cast_drop_arguments (definitions) {{{2 | ||
2261 | template <typename Return, typename From> | ||
2262 | Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x) | ||
2263 | { | ||
2264 | return simd_cast<Return>(x); | ||
2265 | } | ||
2266 | template <typename Return, typename... Froms> | ||
2267 | Vc_INTRINSIC Vc_CONST | ||
2268 | enable_if<(are_all_types_equal<Froms...>::value && | ||
2269 | sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size), | ||
2270 | Return> | ||
2271 | simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x) | ||
2272 | { | ||
2273 | return simd_cast<Return>(xs..., x); | ||
2274 | } | ||
2275 | // The following function can be implemented without the sizeof...(From) overload. | ||
2276 | // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the | ||
2277 | // function in two works around the issue. | ||
2278 | template <typename Return, typename From, typename... Froms> | ||
2279 | Vc_INTRINSIC Vc_CONST enable_if< | ||
2280 | (are_all_types_equal<From, Froms...>::value && | ||
2281 | (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0), | ||
2282 | Return> | ||
2283 | simd_cast_drop_arguments(Froms... xs, From x, From) | ||
2284 | { | ||
2285 | return simd_cast_drop_arguments<Return, Froms...>(xs..., x); | ||
2286 | } | ||
2287 | template <typename Return, typename From> | ||
2288 | Vc_INTRINSIC Vc_CONST | ||
2289 | enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return> | ||
2290 | simd_cast_drop_arguments(From x, From) | ||
2291 | { | ||
2292 | return simd_cast_drop_arguments<Return>(x); | ||
2293 | } | ||
2294 | |||
2295 | // simd_cast_with_offset (definitions) {{{2 | ||
2296 | template <typename Return, std::size_t offset, typename From> | ||
2297 | Vc_INTRINSIC Vc_CONST | ||
2298 | enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), | ||
2299 | Return> simd_cast_with_offset(const From &x) | ||
2300 | { | ||
2301 | return simd_cast<Return, offset / Return::Size>(x); | ||
2302 | } | ||
2303 | template <typename Return, std::size_t offset, typename From> | ||
2304 | Vc_INTRINSIC Vc_CONST | ||
2305 | enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && | ||
2306 | ((Traits::isSimdArray<Return>::value && | ||
2307 | !Traits::isAtomicSimdArray<Return>::value) || | ||
2308 | (Traits::isSimdMaskArray<Return>::value && | ||
2309 | !Traits::isAtomicSimdMaskArray<Return>::value))), | ||
2310 | Return> | ||
2311 | simd_cast_with_offset(const From &x) | ||
2312 | { | ||
2313 | using R0 = typename Return::storage_type0; | ||
2314 | using R1 = typename Return::storage_type1; | ||
2315 | return {simd_cast_with_offset<R0, offset>(x), | ||
2316 | simd_cast_with_offset<R1, offset + R0::Size>(x)}; | ||
2317 | } | ||
2318 | template <typename Return, std::size_t offset, typename From> | ||
2319 | Vc_INTRINSIC Vc_CONST | ||
2320 | enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && | ||
2321 | ((Traits::isSimdArray<Return>::value && | ||
2322 | Traits::isAtomicSimdArray<Return>::value) || | ||
2323 | (Traits::isSimdMaskArray<Return>::value && | ||
2324 | Traits::isAtomicSimdMaskArray<Return>::value))), | ||
2325 | Return> | ||
2326 | simd_cast_with_offset(const From &x) | ||
2327 | { | ||
2328 | return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size)); | ||
2329 | } | ||
2330 | template <typename Return, std::size_t offset, typename From, typename... Froms> | ||
2331 | Vc_INTRINSIC Vc_CONST | ||
2332 | enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return> | ||
2333 | simd_cast_with_offset(const From &x, const Froms &... xs) | ||
2334 | { | ||
2335 | return simd_cast<Return>(x, xs...); | ||
2336 | } | ||
2337 | |||
2338 | // simd_cast_without_last (definition) {{{2 | ||
2339 | template <typename Return, typename T, typename... From> | ||
2340 | Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &) | ||
2341 | { | ||
2342 | return simd_cast<Return>(xs...); | ||
2343 | } | ||
2344 | |||
2345 | // simd_cast_interleaved_argument_order (definitions) {{{2 | ||
2346 | |||
2347 | #ifdef Vc_MSVC | ||
2348 | // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved | ||
2349 | // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make | ||
2350 | // MSVC do the right thing. | ||
2351 | template <std::size_t I, typename T0> | ||
2352 | Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &) | ||
2353 | { | ||
2354 | return a0; | ||
2355 | } | ||
2356 | template <std::size_t I, typename T0> | ||
2357 | Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0) | ||
2358 | { | ||
2359 | return b0; | ||
2360 | } | ||
2361 | #endif // Vc_MSVC | ||
2362 | |||
2363 | /// \internal returns the first argument | ||
2364 | template <std::size_t I, typename T0, typename... Ts> | ||
2365 | Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, | ||
2366 | const Ts &..., | ||
2367 | const T0 &, | ||
2368 | const Ts &...) | ||
2369 | { | ||
2370 | return a0; | ||
2371 | } | ||
2372 | /// \internal returns the center argument | ||
2373 | template <std::size_t I, typename T0, typename... Ts> | ||
2374 | Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, | ||
2375 | const Ts &..., | ||
2376 | const T0 &b0, | ||
2377 | const Ts &...) | ||
2378 | { | ||
2379 | return b0; | ||
2380 | } | ||
2381 | /// \internal drops the first and center arguments and recurses | ||
2382 | template <std::size_t I, typename T0, typename... Ts> | ||
2383 | Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &, | ||
2384 | const Ts &... a, | ||
2385 | const T0 &, | ||
2386 | const Ts &... b) | ||
2387 | { | ||
2388 | return extract_interleaved<I - 2, Ts...>(a..., b...); | ||
2389 | } | ||
2390 | /// \internal calls simd_cast with correct argument order thanks to extract_interleaved | ||
2391 | template <typename Return, typename... Ts, std::size_t... Indexes> | ||
2392 | Vc_INTRINSIC Vc_CONST Return | ||
2393 | simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a, | ||
2394 | const Ts &... b) | ||
2395 | { | ||
2396 | return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...); | ||
2397 | } | ||
2398 | /// \internal constructs the necessary index_sequence to pass it to | ||
2399 | /// simd_cast_interleaved_argument_order_1 | ||
2400 | template <typename Return, typename... Ts> | ||
2401 | Vc_INTRINSIC Vc_CONST Return | ||
2402 | simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b) | ||
2403 | { | ||
2404 | using seq = make_index_sequence<sizeof...(Ts)*2>; | ||
2405 | return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...); | ||
2406 | } | ||
2407 | |||
2408 | // conditional_assign {{{1 | ||
2409 | #define Vc_CONDITIONAL_ASSIGN(name_, op_) \ | ||
2410 | template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \ | ||
2411 | typename U> \ | ||
2412 | Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \ | ||
2413 | SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \ | ||
2414 | { \ | ||
2415 | lhs(mask) op_ rhs; \ | ||
2416 | } \ | ||
2417 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
2418 | Vc_CONDITIONAL_ASSIGN( Assign, =); | ||
2419 | Vc_CONDITIONAL_ASSIGN( PlusAssign, +=); | ||
2420 | Vc_CONDITIONAL_ASSIGN( MinusAssign, -=); | ||
2421 | Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=); | ||
2422 | Vc_CONDITIONAL_ASSIGN( DivideAssign, /=); | ||
2423 | Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=); | ||
2424 | Vc_CONDITIONAL_ASSIGN( XorAssign, ^=); | ||
2425 | Vc_CONDITIONAL_ASSIGN( AndAssign, &=); | ||
2426 | Vc_CONDITIONAL_ASSIGN( OrAssign, |=); | ||
2427 | Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=); | ||
2428 | Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=); | ||
2429 | #undef Vc_CONDITIONAL_ASSIGN | ||
2430 | |||
2431 | #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \ | ||
2432 | template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \ | ||
2433 | Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \ | ||
2434 | conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \ | ||
2435 | { \ | ||
2436 | return expr_; \ | ||
2437 | } \ | ||
2438 | Vc_NOTHING_EXPECTING_SEMICOLON | ||
2439 | Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++); | ||
2440 | Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask)); | ||
2441 | Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--); | ||
2442 | Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask)); | ||
2443 | #undef Vc_CONDITIONAL_ASSIGN | ||
2444 | // transpose_impl {{{1 | ||
2445 | namespace Common | ||
2446 | { | ||
2447 | template <typename T, size_t N, typename V> | ||
2448 | inline void transpose_impl( | ||
2449 | TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[], | ||
2450 | const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>, | ||
2451 | SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy) | ||
2452 | { | ||
2453 | V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]), | ||
2454 | &internal_data(*r[2]), &internal_data(*r[3])}; | ||
2455 | transpose_impl(TransposeTag<4, 4>(), &r2[0], | ||
2456 | TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)), | ||
2457 | internal_data(std::get<1>(proxy.in)), | ||
2458 | internal_data(std::get<2>(proxy.in)), | ||
2459 | internal_data(std::get<3>(proxy.in))}); | ||
2460 | } | ||
2461 | |||
2462 | template <typename T, typename V> | ||
2463 | inline void transpose_impl( | ||
2464 | TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[], | ||
2465 | const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>, | ||
2466 | SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy) | ||
2467 | { | ||
2468 | auto &lo = *r[0]; | ||
2469 | auto &hi = *r[1]; | ||
2470 | internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in)); | ||
2471 | internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in)); | ||
2472 | internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in)); | ||
2473 | internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in)); | ||
2474 | internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in)); | ||
2475 | internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in)); | ||
2476 | internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in)); | ||
2477 | internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in)); | ||
2478 | } | ||
2479 | |||
2480 | template <typename T, typename V> | ||
2481 | inline void transpose_impl( | ||
2482 | TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[], | ||
2483 | const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>, | ||
2484 | SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy) | ||
2485 | { | ||
2486 | V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]), | ||
2487 | &internal_data(*r[2]), &internal_data(*r[3])}; | ||
2488 | transpose_impl(TransposeTag<4, 4>(), &r2[0], | ||
2489 | TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)), | ||
2490 | internal_data(std::get<1>(proxy.in)), | ||
2491 | internal_data(std::get<2>(proxy.in)), | ||
2492 | internal_data(std::get<3>(proxy.in))}); | ||
2493 | } | ||
2494 | |||
2495 | template <typename T, size_t N, typename V> | ||
2496 | inline void transpose_impl( | ||
2497 | TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[], | ||
2498 | const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>, | ||
2499 | SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy) | ||
2500 | { | ||
2501 | SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]}; | ||
2502 | SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]}; | ||
2503 | using H = SimdArray<T, 2>; | ||
2504 | transpose_impl(TransposeTag<2, 4>(), &r0[0], | ||
2505 | TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)), | ||
2506 | internal_data0(std::get<1>(proxy.in)), | ||
2507 | internal_data0(std::get<2>(proxy.in)), | ||
2508 | internal_data0(std::get<3>(proxy.in))}); | ||
2509 | transpose_impl(TransposeTag<2, 4>(), &r1[0], | ||
2510 | TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)), | ||
2511 | internal_data1(std::get<1>(proxy.in)), | ||
2512 | internal_data1(std::get<2>(proxy.in)), | ||
2513 | internal_data1(std::get<3>(proxy.in))}); | ||
2514 | } | ||
2515 | |||
2516 | /* TODO: | ||
2517 | template <typename T, std::size_t N, typename V, std::size_t VSize> | ||
2518 | inline enable_if<(N > VSize), void> transpose_impl( | ||
2519 | std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r, | ||
2520 | const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>, | ||
2521 | SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy) | ||
2522 | { | ||
2523 | typedef SimdArray<T, N, V, VSize> SA; | ||
2524 | std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = { | ||
2525 | {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]), | ||
2526 | &internal_data0(*r[3])}}; | ||
2527 | transpose_impl( | ||
2528 | r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0, | ||
2529 | typename SA::storage_type0, typename SA::storage_type0>{ | ||
2530 | internal_data0(std::get<0>(proxy.in)), | ||
2531 | internal_data0(std::get<1>(proxy.in)), | ||
2532 | internal_data0(std::get<2>(proxy.in)), | ||
2533 | internal_data0(std::get<3>(proxy.in))}); | ||
2534 | |||
2535 | std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = { | ||
2536 | {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]), | ||
2537 | &internal_data1(*r[3])}}; | ||
2538 | transpose_impl( | ||
2539 | r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1, | ||
2540 | typename SA::storage_type1, typename SA::storage_type1>{ | ||
2541 | internal_data1(std::get<0>(proxy.in)), | ||
2542 | internal_data1(std::get<1>(proxy.in)), | ||
2543 | internal_data1(std::get<2>(proxy.in)), | ||
2544 | internal_data1(std::get<3>(proxy.in))}); | ||
2545 | } | ||
2546 | */ | ||
2547 | } // namespace Common | ||
2548 | |||
2549 | // Traits static assertions {{{1 | ||
2550 | static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4> &>::value, ""); | ||
2551 | static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4>>::value, ""); | ||
2552 | static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4> &>::value, ""); | ||
2553 | static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4>>::value, ""); | ||
2554 | static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4> &>::value, ""); | ||
2555 | static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4>>::value, ""); | ||
2556 | static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value, ""); | ||
2557 | static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value, ""); | ||
2558 | // }}}1 | ||
2559 | /// @} | ||
2560 | |||
2561 | } // namespace Vc_VERSIONED_NAMESPACE | ||
2562 | |||
2563 | // numeric_limits {{{1 | ||
2564 | namespace std | ||
2565 | { | ||
2566 | template <typename T, size_t N, typename V, size_t VN> | ||
2567 | struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> { | ||
2568 | private: | ||
2569 | using R = Vc::SimdArray<T, N, V, VN>; | ||
2570 | |||
2571 | public: | ||
2572 | static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); } | ||
2573 | static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); } | ||
2574 | static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept | ||
2575 | { | ||
2576 | return numeric_limits<T>::lowest(); | ||
2577 | } | ||
2578 | static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept | ||
2579 | { | ||
2580 | return numeric_limits<T>::epsilon(); | ||
2581 | } | ||
2582 | static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept | ||
2583 | { | ||
2584 | return numeric_limits<T>::round_error(); | ||
2585 | } | ||
2586 | static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept | ||
2587 | { | ||
2588 | return numeric_limits<T>::infinity(); | ||
2589 | } | ||
2590 | static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept | ||
2591 | { | ||
2592 | return numeric_limits<T>::quiet_NaN(); | ||
2593 | } | ||
2594 | static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept | ||
2595 | { | ||
2596 | return numeric_limits<T>::signaling_NaN(); | ||
2597 | } | ||
2598 | static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept | ||
2599 | { | ||
2600 | return numeric_limits<T>::denorm_min(); | ||
2601 | } | ||
2602 | }; | ||
2603 | } // namespace std | ||
2604 | //}}}1 | ||
2605 | |||
2606 | #endif // VC_COMMON_SIMDARRAY_H_ | ||
2607 | |||
2608 | // vim: foldmethod=marker | ||
2609 |
Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.