/home/users/khuck/src/octotiger/src/grid_fmm.cpp

Line% of fetchesSource
1  
/*
2  
 * grid_fmm.cpp
3  
 *
4  
 *  Created on: Jun 3, 2015
5  
 *      Author: dmarce1
6  
 */
7  
#include "grid.hpp"
8  
#include "simd.hpp"
9  
#include "profiler.hpp"
10  
#include "options.hpp"
11  
#include "taylor.hpp"
12  
13  
#include <hpx/include/parallel_for_loop.hpp>
14  
15  
#include <cstddef>
16  
#include <utility>
17  
18  
#ifdef USE_GRAV_PAR
19  
const auto for_loop_policy = hpx::parallel::par;
20  
#else
21  
const auto for_loop_policy = hpx::parallel::seq;
22  
#endif
23  
24  
25  
static std::vector<interaction_type> ilist_n;
26  
static std::vector<interaction_type> ilist_d;
27  
static std::vector<interaction_type> ilist_r;
28  
static std::vector<std::vector<boundary_interaction_type>> ilist_d_bnd(geo::direction::count());
29  
static std::vector<std::vector<boundary_interaction_type>> ilist_n_bnd(geo::direction::count());
30  
static taylor<4, real> factor;
31  
extern options opts;
32  
33  
template<class T>
34  
void load_multipole(taylor<4, T>& m, space_vector& c, const gravity_boundary_type& data, integer iter, bool monopole) {
35  
    if (monopole) {
36  
        m = T(0.0);
37  
        m = T((*(data.m))[iter]);
38  
    } else {
39  
        auto const& tmp1 = (*(data.M))[iter];
40  
#pragma GCC ivdep
41  
        for (int i = 0; i != 20; ++i) {
42  
            m[i] = tmp1[i];
43  
        }
44  
        auto const& tmp2 = (*(data.x))[iter];
45  
#pragma GCC ivdep
46  
        for (integer d = 0; d != NDIM; ++d) {
47  
            c[d] = tmp2[d];
48  
        }
49  
    }
50  
}
51  
52  
void find_eigenvectors(real q[3][3], real e[3][3], real lambda[3]) {
53  
    PROF_BEGIN;
54  
    real b0[3], b1[3], A, bdif;
55  
    int iter = 0;
56  
    for (int l = 0; l < 3; l++) {
57  
        b0[0] = b0[1] = b0[2] = 0.0;
58  
        b0[l] = 1.0;
59  
        do {
60  
            iter++;
61  
            b1[0] = b1[1] = b1[2] = 0.0;
62  
            for (int i = 0; i < 3; i++) {
63  
                for (int m = 0; m < 3; m++) {
64  
                    b1[i] += q[i][m] * b0[m];
65  
                }
66  
            }
67  
            A = sqrt(sqr(b1[0]) + sqr(b1[1]) + sqr(b1[2]));
68  
            bdif = 0.0;
69  
            for (int i = 0; i < 3; i++) {
70  
                b1[i] = b1[i] / A;
71  
                bdif += pow(b0[i] - b1[i], 2);
72  
            }
73  
            for (int i = 0; i < 3; i++) {
74  
                b0[i] = b1[i];
75  
            }
76  
77  
        } while (fabs(bdif) > 1.0e-14);
78  
        for (int m = 0; m < 3; m++) {
79  
            e[l][m] = b0[m];
80  
        }
81  
        for (int i = 0; i < 3; i++) {
82  
            A += b0[i] * q[l][i];
83  
        }
84  
        lambda[l] = sqrt(A) / sqrt(sqr(e[l][0]) + sqr(e[l][1]) + sqr(e[l][2]));
85  
    }
86  
    PROF_END;
87  
}
88  
89  
std::pair<space_vector, space_vector> grid::find_axis() const {
90  
    PROF_BEGIN;
91  
    real quad_moment[NDIM][NDIM];
92  
    real eigen[NDIM][NDIM];
93  
    real lambda[NDIM];
94  
    space_vector this_com;
95  
    real mtot = 0.0;
96  
    for (integer i = 0; i != NDIM; ++i) {
97  
        this_com[i] = 0.0;
98  
        for (integer j = 0; j != NDIM; ++j) {
99  
            quad_moment[i][j] = 0.0;
100  
        }
101  
    }
102  
103  
    auto& M = *M_ptr;
104  
    auto& mon = *mon_ptr;
105  
    std::vector<space_vector> const& com0 = *(com_ptr[0]);
106  
    for (integer i = 0; i != G_NX; ++i) {
107  
        for (integer j = 0; j != G_NX; ++j) {
108  
            for (integer k = 0; k != G_NX; ++k) {
109  
                const integer iii1 = gindex(i, j, k);
110  
                const integer iii0 = gindex(i + H_BW , j + H_BW , k + H_BW );
111  
                space_vector const& com0iii1 = com0[iii1];
112  
                multipole const& Miii1 = M[iii1];
113  
                for (integer n = 0; n != NDIM; ++n) {
114  
                    real mass;
115  
                    if (is_leaf) {
116  
                        mass = mon[iii1];
117  
                    } else {
118  
                        mass = Miii1();
119  
                    }
120  
                    this_com[n] += mass * com0iii1[n];
121  
                    mtot += mass;
122  
                    for (integer m = 0; m != NDIM; ++m) {
123  
                        if (!is_leaf) {
124  
                            quad_moment[n][m] += Miii1(n, m);
125  
                        }
126  
                        quad_moment[n][m] += mass * com0iii1[n] * com0iii1[m];
127  
                    }
128  
                }
129  
            }
130  
        }
131  
    }
132  
    for (integer j = 0; j != NDIM; ++j) {
133  
        this_com[j] /= mtot;
134  
    }
135  
136  
    find_eigenvectors(quad_moment, eigen, lambda);
137  
    integer index;
138  
    if (lambda[0] > lambda[1] && lambda[0] > lambda[2]) {
139  
        index = 0;
140  
    } else if (lambda[1] > lambda[2]) {
141  
        index = 1;
142  
    } else {
143  
        index = 2;
144  
    }
145  
    space_vector rc;
146  
    for (integer j = 0; j != NDIM; ++j) {
147  
        rc[j] = eigen[index][j];
148  
    }
149  
    std::pair<space_vector, space_vector> pair{rc, this_com};
150  
    PROF_END;
151  
    return pair;
152  
}
153  
154  
void grid::solve_gravity(gsolve_type type) {
155  
156  
    compute_multipoles(type);
157  
    compute_interactions(type);
158  
    compute_expansions(type);
159  
}
160  
161  
void grid::compute_interactions(gsolve_type type) {
162  
    PROF_BEGIN;
163  
    auto& M = *M_ptr;
164  
    auto& mon = *mon_ptr;
165  
    std::fill(std::begin(L), std::end(L), ZERO);
166  
    std::fill(std::begin(L_c), std::end(L_c), ZERO);
167  
    if (!is_leaf) {
168  
        const auto& this_ilist = is_root ? ilist_r : ilist_n;
169  
        const integer list_size = this_ilist.size();
170  
171  
        std::vector<space_vector> const& com0 = (*(com_ptr[0]));
172  
        hpx::parallel::for_loop_strided(
173  
            for_loop_policy, 0, list_size, simd_len,
174  
            [&com0, &this_ilist, list_size, type, this](std::size_t li) {
175  
176  
                std::array<simd_vector, NDIM> dX;
177  
                std::array<simd_vector, NDIM> X;
178  
                std::array<simd_vector, NDIM> Y;
179  
180  
                taylor<4, simd_vector> m0;
181  
                taylor<4, simd_vector> m1;
182  
                taylor<4, simd_vector> n0;
183  
                taylor<4, simd_vector> n1;
184  
185  
                auto& M = *M_ptr;
186  
                auto& mon = *mon_ptr;
187  
                // FIXME: replace with vector-pack gather-loads
188  
#pragma GCC ivdep
189  
                for (integer i = 0; i != simd_len; ++i) {
190  
                    const auto index = std::min(integer(li + i),integer(list_size-1));
191  
                    const integer iii0 = this_ilist[index].first;
192  
                    const integer iii1 = this_ilist[index].second;
193 2.4%
                    space_vector const& com0iii0 = com0[iii0];
194  
                    space_vector const& com0iii1 = com0[iii1];
195  
                    for (integer d = 0; d < NDIM; ++d) {
196  
                        X[d][i] = com0iii0[d];
197  
                        Y[d][i] = com0iii1[d];
198  
                    }
199  
                    multipole const& Miii0 = M[iii0];
200  
                    multipole const& Miii1 = M[iii1];
201  
                    for (integer j = 0; j != taylor_sizes[3]; ++j) {
202  
                        m0[j][i] = Miii1[j];
203  
                        m1[j][i] = Miii0[j];
204  
                    }
205  
                    if (type == RHO) {
206  
                        real const tmp1 = Miii1() / Miii0();
207  
                        real const tmp2 = Miii0() / Miii1();
208  
                        for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
209  
                            n0[j][i] = Miii1[j] - Miii0[j] * tmp1;
210  
                            n1[j][i] = Miii0[j] - Miii1[j] * tmp2;
211  
                        }
212  
                    }
213  
                }
214  
215  
                if (type != RHO) {
216  
#pragma GCC ivdep
217  
                    for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
218  
                        n0[j] = ZERO;
219  
                        n1[j] = ZERO;
220  
                    }
221  
                }
222  
#pragma GCC ivdep
223  
                for (integer d = 0; d < NDIM; ++d) {
224  
                    dX[d] = X[d] - Y[d];
225  
                }
226  
227  
                taylor<5, simd_vector> D;
228  
                taylor<4, simd_vector> A0, A1;
229  
                std::array<simd_vector, NDIM> B0 = {
230  
                    simd_vector(ZERO), simd_vector(ZERO), simd_vector(ZERO)
231  
                };
232  
                std::array<simd_vector, NDIM> B1 = {
233  
                    simd_vector(ZERO), simd_vector(ZERO), simd_vector(ZERO)
234  
                };
235  
236  
                D.set_basis(dX);
237  
                A0[0] = m0[0] * D[0];
238  
                A1[0] = m1[0] * D[0];
239  
                if (type != RHO) {
240  
                    for (integer i = taylor_sizes[0]; i != taylor_sizes[1]; ++i) {
241  
                        A0[0] -= m0[i] * D[i];
242  
                        A1[0] += m1[i] * D[i];
243  
                    }
244  
                }
245  
                for (integer i = taylor_sizes[1]; i != taylor_sizes[2]; ++i) {
246  
                    const auto tmp = D[i] * (factor[i] / real(2));
247  
                    A0[0] += m0[i] * tmp;
248  
                    A1[0] += m1[i] * tmp;
249  
                }
250  
                for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) {
251  
                    const auto tmp = D[i] * (factor[i] / real(6));
252  
                    A0[0] -= m0[i] * tmp;
253  
                    A1[0] += m1[i] * tmp;
254  
                }
255  
256  
                for (integer a = 0; a < NDIM; ++a) {
257  
                    A0(a) =  m0() * D(a);
258  
                    A1(a) = -m1() * D(a);
259  
                    for (integer b = 0; b < NDIM; ++b) {
260  
                        if (type != RHO) {
261  
                            A0(a) -= m0(a) * D(a, b);
262  
                            A1(a) -= m1(a) * D(a, b);
263  
                        }
264  
                        for (integer c = b; c < NDIM; ++c) {
265  
                            const auto tmp1 = D(a, b, c) * (factor(c, b) / real(2));
266  
                            A0(a) += m0(c, b) * tmp1;
267  
                            A1(a) -= m1(c, b) * tmp1;
268  
                        }
269  
270  
                    }
271  
                }
272  
273  
                if (type == RHO) {
274  
                    for (integer a = 0; a != NDIM; ++a) {
275  
                        for (integer b = 0; b != NDIM; ++b) {
276  
                            for (integer c = b; c != NDIM; ++c) {
277  
                                for (integer d = c; d != NDIM; ++d) {
278  
                                    const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6));
279  
                                    B0[a] -= n0(b, c, d) * tmp;
280  
                                    B1[a] -= n1(b, c, d) * tmp;
281  
                                }
282  
                            }
283  
                        }
284  
285  
                    }
286  
                }
287  
288  
                for (integer a = 0; a < NDIM; ++a) {
289  
                    for (integer b = a; b < NDIM; ++b) {
290  
                        A0(a, b) = m0() * D(a, b);
291  
                        A1(a, b) = m1() * D(a, b);
292  
                        for (integer c = 0; c < NDIM; ++c) {
293  
                            const auto& tmp = D(a, b, c);
294  
                            A0(a, b) -= m0(c) * tmp;
295  
                            A1(a, b) += m1(c) * tmp;
296  
                        }
297  
298  
                    }
299  
                }
300  
301  
                for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) {
302  
                    const auto& tmp = D[i];
303  
                    A1[i] = -m1[0] * tmp;
304  
                    A0[i] =  m0[0] * tmp;
305  
                }
306  
#ifdef USE_GRAV_PAR
307  
                std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx);
308  
#endif
309  
                for (integer i = 0; i != simd_len && i + li < list_size; ++i) {
310  
                    const integer iii0 = this_ilist[li + i].first;
311  
                    const integer iii1 = this_ilist[li + i].second;
312 0.0%
                    expansion& Liii0 = L[iii0];
313 0.0%
                    expansion& Liii1 = L[iii1];
314  
                    for (integer j = 0; j != taylor_sizes[3]; ++j) {
315  
                        Liii0[j] += A0[j][i];
316 1.5%
                        Liii1[j] += A1[j][i];
317  
                    }
318  
319  
                    if (type == RHO) {
320  
                        space_vector& L_ciii0 = L_c[iii0];
321  
                        space_vector& L_ciii1 = L_c[iii1];
322  
                        for (integer j = 0; j != NDIM; ++j) {
323  
                            L_ciii0[j] += B0[j][i];
324 0.3%
                            L_ciii1[j] += B1[j][i];
325  
                        }
326  
                    }
327  
                }
328  
            });
329  
    } else {
330  
#if !defined(HPX_HAVE_DATAPAR)
331  
        const v4sd d0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) };
332  
        const v4sd d1 = { 1.0 / dx, -1.0 / sqr(dx), -1.0 / sqr(dx), -1.0 / sqr(dx) };
333  
#else
334  
        const std::array<double, 4> di0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) };
335  
        const v4sd d0(di0.data());
336  
337  
        const std::array<double, 4> di1 = { 1.0 / dx, -1.0 / sqr(dx), -1.0 / sqr(dx), -1.0 / sqr(dx) };
338  
        const v4sd d1(di1.data());
339  
#endif
340  
341  
        const integer dsize = ilist_d.size();
342  
        const integer lev = 0;
343  
        for (integer li = 0; li < dsize; ++li) {
344  
            const auto& ele = ilist_d[li];
345  
            const integer iii0 = ele.first;
346  
            const integer iii1 = ele.second;
347  
#if !defined(HPX_HAVE_DATAPAR)
348  
            v4sd m0, m1;
349  
#pragma GCC ivdep
350  
            for (integer i = 0; i != 4; ++i) {
351  
                m0[i] = mon[iii1];
352  
            }
353  
#pragma GCC ivdep
354  
            for (integer i = 0; i != 4; ++i) {
355  
                m1[i] = mon[iii0];
356  
            }
357  
#else
358 11.2%
            v4sd m0 = mon[iii1];
359  
            v4sd m1 = mon[iii0];
360  
#endif
361  
#ifdef USE_GRAV_PAR
362  
                std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx);
363  
#endif
364  
            auto tmp1 =  m0 * ele.four * d0;
365  
            auto tmp2 = m1 * ele.four * d1;
366  
            expansion& Liii0 = L[iii0];
367  
            expansion& Liii1 = L[iii1];
368  
#pragma GCC ivdep
369  
            for(integer i = 0; i != 4; ++i) {
370 0.0%
                Liii0[i] += tmp1[i];
371  
                Liii1[i] += tmp2[i];
372  
            }
373  
        }
374  
    }
375  
    PROF_END;
376  
}
377  
378  
void grid::compute_boundary_interactions(gsolve_type type, const geo::direction& dir, bool is_monopole, const gravity_boundary_type& mpoles) {
379  
    if (!is_leaf) {
380  
        if (!is_monopole) {
381  
            compute_boundary_interactions_multipole_multipole(type, ilist_n_bnd[dir], mpoles);
382  
        } else {
383  
            compute_boundary_interactions_monopole_multipole(type, ilist_d_bnd[dir], mpoles);
384  
        }
385  
    } else {
386  
        if (!is_monopole) {
387  
            compute_boundary_interactions_multipole_monopole(type, ilist_d_bnd[dir], mpoles);
388  
        } else {
389  
            compute_boundary_interactions_monopole_monopole(type, ilist_d_bnd[dir], mpoles);
390  
        }
391  
    }
392  
393  
}
394  
395  
void grid::compute_boundary_interactions_multipole_multipole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd,
396  
    const gravity_boundary_type& mpoles) {
397  
    PROF_BEGIN;
398  
    auto& M = *M_ptr;
399  
    auto& mon = *mon_ptr;
400  
401  
    std::vector<space_vector> const& com0 = *(com_ptr[0]);
402  
    hpx::parallel::for_loop(
403  
        for_loop_policy, 0, ilist_n_bnd.size(),
404  
        [&mpoles, &com0, &ilist_n_bnd, type, this, &M](std::size_t si) {
405  
406  
            taylor<4, simd_vector> m0;
407  
            taylor<4, simd_vector> n0;
408  
            std::array<simd_vector, NDIM> dX;
409  
            std::array<simd_vector, NDIM> X;
410  
            space_vector Y;
411  
412  
            boundary_interaction_type const& bnd = ilist_n_bnd[si];
413  
            integer index = mpoles.is_local ? bnd.second : si;
414  
415  
            load_multipole(m0, Y, mpoles, index, false);
416  
417  
            std::array<simd_vector, NDIM> simdY = {
418  
                simd_vector(Y[0]), simd_vector(Y[1]), simd_vector(Y[2]),
419  
            };
420  
421  
            const integer list_size = bnd.first.size();
422  
            for (integer li = 0; li < list_size; li += simd_len) {
423  
#pragma GCC ivdep
424  
                for (integer i = 0; i != simd_len; ++i) {
425  
                    const auto index = std::min(integer(li + i), integer(list_size-1));
426  
                    const integer iii0 = bnd.first[index];
427  
                    space_vector const& com0iii0 = com0[iii0];
428  
                    for (integer d = 0; d < NDIM; ++d) {
429  
                        X[d][i] = com0iii0[d];
430  
                    }
431  
                    if (type == RHO) {
432  
                        multipole const& Miii0 = M[iii0];
433  
                        real const tmp = m0()[i] / Miii0();
434  
                        for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
435  
                            n0[j][i] = m0[j][i] - Miii0[j] * tmp;
436  
                        }
437  
                    }
438  
                }
439  
                if (type != RHO) {
440  
#pragma GCC ivdep
441  
                    for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
442  
                        n0[j] = ZERO;
443  
                    }
444  
                }
445  
#pragma GCC ivdep
446  
                for (integer d = 0; d < NDIM; ++d) {
447  
                    dX[d] = X[d] - simdY[d];
448  
                }
449  
450  
                taylor<5, simd_vector> D;
451  
                taylor<4, simd_vector> A0;
452  
                std::array<simd_vector, NDIM> B0 = {
453  
                    simd_vector(0.0), simd_vector(0.0), simd_vector(0.0)
454  
                };
455  
456  
                D.set_basis(dX);
457  
                A0[0] = m0[0] * D[0];
458  
                if (type != RHO) {
459  
                    for (integer i = taylor_sizes[0]; i != taylor_sizes[1]; ++i) {
460  
                        A0[0] -= m0[i] * D[i];
461  
                    }
462  
                }
463  
                for (integer i = taylor_sizes[1]; i != taylor_sizes[2]; ++i) {
464  
                    A0[0] += m0[i] * D[i] * (factor[i] / real(2));
465  
                }
466  
                for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) {
467  
                    A0[0] -= m0[i] * D[i] * (factor[i] / real(6));
468  
                }
469  
470  
                for (integer a = 0; a < NDIM; ++a) {
471  
                    A0(a) = m0() * D(a);
472  
                    for (integer b = 0; b < NDIM; ++b) {
473  
                        if (type != RHO) {
474  
                            A0(a) -= m0(a) * D(a, b);
475  
                        }
476  
                        for (integer c = b; c < NDIM; ++c) {
477  
                            const auto tmp = D(a, b, c) * (factor(c, b) / real(2));
478  
                            A0(a) += m0(c, b) * tmp;
479  
                        }
480  
                    }
481  
                }
482  
483  
                if (type == RHO) {
484  
                    for (integer a = 0; a < NDIM; ++a) {
485  
                        for (integer b = 0; b < NDIM; ++b) {
486  
                            for (integer c = b; c < NDIM; ++c) {
487  
                                for (integer d = c; d < NDIM; ++d) {
488  
                                    const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6));
489  
                                    B0[a] -= n0(b, c, d) * tmp;
490  
                                }
491  
                            }
492  
                        }
493  
                    }
494  
                }
495  
496  
                for (integer a = 0; a < NDIM; ++a) {
497  
                    for (integer b = a; b < NDIM; ++b) {
498  
                        A0(a, b) = m0() * D(a, b);
499  
                        for (integer c = 0; c < NDIM; ++c) {
500  
                            A0(a, b) -= m0(c) * D(a, b, c);
501  
                        }
502  
                    }
503  
                }
504  
                for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) {
505  
                    A0[i] =  m0[0] * D[i];
506  
                }
507  
#ifdef USE_GRAV_PAR
508  
                std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx);
509  
#endif
510  
                for (integer i = 0; i != simd_len && i + li < list_size; ++i) {
511  
                    const integer iii0 = bnd.first[li + i];
512  
                    expansion& Liii0 = L[iii0];
513  
514  
#pragma GCC ivdep
515  
                    for (integer j = 0; j != 20; ++j) {
516 3.2%
                        Liii0[j] +=  A0[j][i];
517  
                    }
518  
519  
                    if (type == RHO) {
520  
                        space_vector& L_ciii0 = L_c[iii0];
521  
#pragma GCC ivdep
522  
                        for (integer j = 0; j != NDIM; ++j) {
523  
                            L_ciii0[j] +=  B0[j][i];
524  
                        }
525  
                    }
526  
                }
527  
            }
528  
        });
529  
    PROF_END;
530  
}
531  
532  
void grid::compute_boundary_interactions_multipole_monopole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd,
533  
    const gravity_boundary_type& mpoles) {
534  
    PROF_BEGIN;
535  
    auto& M = *M_ptr;
536  
    auto& mon = *mon_ptr;
537  
538  
    std::vector<space_vector> const& com0 = *(com_ptr[0]);
539  
    hpx::parallel::for_loop(
540  
        for_loop_policy, 0, ilist_n_bnd.size(),
541  
        [&mpoles, &com0, &ilist_n_bnd, type, this](std::size_t si) {
542  
543  
            taylor<4, simd_vector> m0;
544  
            taylor<4, simd_vector> n0;
545  
            std::array<simd_vector, NDIM> dX;
546  
            std::array<simd_vector, NDIM> X;
547  
            space_vector Y;
548  
549  
            boundary_interaction_type const& bnd = ilist_n_bnd[si];
550  
            const integer list_size = bnd.first.size();
551  
            integer index = mpoles.is_local ? bnd.second : si;
552  
            load_multipole(m0, Y, mpoles, index, false);
553  
554  
            std::array<simd_vector, NDIM> simdY = {
555  
                simd_vector(Y[0]), simd_vector(Y[1]), simd_vector(Y[2]),
556  
            };
557  
558  
            if (type == RHO) {
559  
#pragma GCC ivdep
560  
                for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
561  
                    n0[j] = m0[j];
562  
                }
563  
            }
564  
            else {
565  
#pragma GCC ivdep
566  
                for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
567  
                    n0[j] = ZERO;
568  
                }
569  
            }
570  
            for (integer li = 0; li < list_size; li += simd_len) {
571  
#pragma GCC ivdep
572  
                for (integer i = 0; i != simd_len; ++i) {
573  
                    const auto index = std::min(integer(li + i), integer(list_size-1));
574  
                    const integer iii0 = bnd.first[index];
575  
                    space_vector const& com0iii0 = com0[iii0];
576  
                    for (integer d = 0; d < NDIM; ++d) {
577  
                        X[d][i] = com0iii0[d];
578  
                    }
579  
                }
580  
#pragma GCC ivdep
581  
                for (integer d = 0; d < NDIM; ++d) {
582  
                    dX[d] = X[d] - simdY[d];
583  
                }
584  
585  
                taylor<5, simd_vector> D;
586  
                taylor<2, simd_vector> A0;
587  
                std::array<simd_vector, NDIM> B0 = {
588  
                    simd_vector(0.0), simd_vector(0.0), simd_vector(0.0)
589  
                };
590  
591  
                D.set_basis(dX);
592  
                A0[0] = m0[0] * D[0];
593  
                if (type != RHO) {
594  
                    for (integer i = taylor_sizes[0]; i != taylor_sizes[1]; ++i) {
595  
                        A0[0] -= m0[i] * D[i];
596  
                    }
597  
                }
598  
                for (integer i = taylor_sizes[1]; i != taylor_sizes[2]; ++i) {
599  
                    A0[0] += m0[i] * D[i] * (factor[i] / real(2));
600  
                }
601  
                for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) {
602  
                    A0[0] -= m0[i] * D[i] * (factor[i] / real(6));
603  
                }
604  
605  
                for (integer a = 0; a < NDIM; ++a) {
606  
                    A0(a) = m0() * D(a);
607  
                    for (integer b = 0; b < NDIM; ++b) {
608  
                        if (type != RHO) {
609  
                            A0(a) -= m0(a) * D(a, b);
610  
                        }
611  
                        for (integer c = b; c < NDIM; ++c) {
612  
                            A0(a) += m0(c, b) * D(a, b, c) * (factor(b, c) / real(2));
613  
                        }
614  
                    }
615  
                }
616  
617  
                if (type == RHO) {
618  
                    for (integer a = 0; a < NDIM; ++a) {
619  
                        for (integer b = 0; b < NDIM; ++b) {
620  
                            for (integer c = b; c < NDIM; ++c) {
621  
                                for (integer d = c; d < NDIM; ++d) {
622  
                                    const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6));
623  
                                    B0[a] -= n0(b, c, d) * tmp;
624  
                                }
625  
                            }
626  
                        }
627  
                    }
628  
                }
629  
#ifdef USE_GRAV_PAR
630  
                std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx);
631  
#endif
632  
                for (integer i = 0; i != simd_len && i + li < list_size; ++i) {
633  
                    const integer iii0 = bnd.first[li + i];
634  
                    expansion& Liii0 = L[iii0];
635  
#pragma GCC ivdep
636  
                    for (integer j = 0; j != 4; ++j) {
637  
                        Liii0[j] +=  A0[j][i];
638  
                    }
639  
                    if (type == RHO) {
640  
                        space_vector& L_ciii0 = L_c[iii0];
641  
#pragma GCC ivdep
642  
                        for (integer j = 0; j != NDIM; ++j) {
643  
                            L_ciii0[j] += B0[j][i];
644  
                        }
645  
                    }
646  
                }
647  
            }
648  
        });
649  
    PROF_END;
650  
}
651  
652  
void grid::compute_boundary_interactions_monopole_multipole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd,
653  
    const gravity_boundary_type& mpoles) {
654  
    PROF_BEGIN;
655  
    auto& M = *M_ptr;
656  
    auto& mon = *mon_ptr;
657  
658  
    std::array<real, NDIM> Xbase = {
659  
        X[0][hindex(H_BW, H_BW, H_BW)],
660  
        X[1][hindex(H_BW, H_BW, H_BW)],
661  
        X[2][hindex(H_BW, H_BW, H_BW)]
662  
    };
663  
664  
    std::vector<space_vector> const& com0 = *(com_ptr[0]);
665  
    hpx::parallel::for_loop(
666  
        for_loop_policy, 0, ilist_n_bnd.size(),
667  
        [&mpoles, &Xbase, &com0, &ilist_n_bnd, type, this, &M](std::size_t si) {
668  
669  
            simd_vector m0;
670  
            taylor<4, simd_vector> n0;
671  
            std::array<simd_vector, NDIM> dX;
672  
            std::array<simd_vector, NDIM> X;
673  
            std::array<simd_vector, NDIM> Y;
674  
675  
            boundary_interaction_type const& bnd = ilist_n_bnd[si];
676  
            integer index = mpoles.is_local ? bnd.second : si;
677  
            const integer list_size = bnd.first.size();
678  
679  
#pragma GCC ivdep
680  
            for (integer d = 0; d != NDIM; ++d) {
681  
                Y[d] = bnd.x[d] * dx + Xbase[d];
682  
            }
683  
684  
            m0 = (*(mpoles.m))[index];
685  
            for (integer li = 0; li < list_size; li += simd_len) {
686  
                for (integer i = 0; i != simd_len && li + i < list_size; ++i) {
687  
                    const integer iii0 = bnd.first[li + i];
688  
                    space_vector const& com0iii0 = com0[iii0];
689  
#pragma GCC ivdep
690  
                    for (integer d = 0; d < NDIM; ++d) {
691  
                        X[d][i] = com0iii0[d];
692  
                    }
693  
                    if (type == RHO) {
694  
                        multipole const& Miii0 = M[iii0];
695  
                        real const tmp = m0[i] / Miii0();
696  
#pragma GCC ivdep
697  
                        for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
698  
                            n0[j][i] = -Miii0[j] * tmp;
699  
                        }
700  
                    }
701  
                }
702  
703  
                if (type != RHO) {
704  
#pragma GCC ivdep
705  
                    for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) {
706  
                        n0[j] = ZERO;
707  
                    }
708  
                }
709  
#pragma GCC ivdep
710  
                for (integer d = 0; d < NDIM; ++d) {
711  
                    dX[d] = X[d] - Y[d];
712  
                }
713  
714  
                taylor<5, simd_vector> D;
715  
                taylor<4, simd_vector> A0;
716  
                std::array<simd_vector, NDIM> B0 = {
717  
                    simd_vector(0.0), simd_vector(0.0), simd_vector(0.0)
718  
                };
719  
720  
                D.set_basis(dX);
721  
722  
                A0[0] = m0 * D[0];
723  
                for (integer i = taylor_sizes[0]; i != taylor_sizes[2]; ++i) {
724  
                    A0[i] = m0 * D[i];
725  
                }
726  
                if (type == RHO) {
727  
                    for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) {
728  
                        A0[i] = m0 * D[i];
729  
                    }
730  
                }
731  
732  
                if (type == RHO) {
733  
                    for (integer a = 0; a != NDIM; ++a) {
734  
                        for (integer b = 0; b != NDIM; ++b) {
735  
                            for (integer c = b; c != NDIM; ++c) {
736  
                                for (integer d = c; d != NDIM; ++d) {
737  
                                    const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6));
738  
                                    B0[a] -= n0(b, c, d) * tmp;
739  
                                }
740  
                            }
741  
                        }
742  
                    }
743  
                }
744  
#ifdef USE_GRAV_PAR
745  
                std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx);
746  
#endif
747  
                for (integer i = 0; i != simd_len && i + li < list_size; ++i) {
748  
                    const integer iii0 = bnd.first[li + i];
749  
                    expansion& Liii0 = L[iii0];
750  
751  
#pragma GCC ivdep
752  
                    for (integer j = 0; j != 20; ++j) {
753 2.2%
                        Liii0[j] +=  A0[j][i];
754  
                    }
755  
756  
                    if (type == RHO) {
757  
                        space_vector& L_ciii0 = L_c[iii0];
758  
#pragma GCC ivdep
759  
                        for (integer j = 0; j != NDIM; ++j) {
760  
                            L_ciii0[j] +=  B0[j][i];
761  
                        }
762  
                    }
763  
                }
764  
            }
765  
        });
766  
    PROF_END;
767  
}
768  
769  
void grid::compute_boundary_interactions_monopole_monopole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd,
770  
    const gravity_boundary_type& mpoles) {
771  
    PROF_BEGIN;
772  
    auto& M = *M_ptr;
773  
    auto& mon = *mon_ptr;
774  
775  
#if !defined(HPX_HAVE_DATAPAR)
776  
    const v4sd d0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) };
777  
#else
778  
    const std::array<double, 4> di0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) };
779  
    const v4sd d0(di0.data());
780  
#endif
781  
    hpx::parallel::for_loop(
782  
        for_loop_policy, 0, ilist_n_bnd.size(),
783  
        [&mpoles, &ilist_n_bnd, &d0, this](std::size_t si) {
784  
785  
            boundary_interaction_type const& bnd = ilist_n_bnd[si];
786  
            const integer dsize = bnd.first.size();
787  
            const integer lev = 0;
788  
            integer index = mpoles.is_local ? bnd.second : si;
789  
#if !defined(HPX_HAVE_DATAPAR)
790  
            const auto tmp = (*(mpoles).m)[index];
791  
            v4sd m0;
792  
#pragma GCC ivdep
793  
            for (integer i = 0; i != 4; ++i) {
794  
                m0[i] = tmp;
795  
            }
796  
#else
797  
            v4sd m0 = (*(mpoles).m)[index];
798  
#endif
799  
            m0 *= d0;
800  
#ifdef USE_GRAV_PAR
801  
                std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx);
802  
#endif
803  
            for (integer li = 0; li < dsize; ++li) {
804  
                const integer iii0 = bnd.first[li];
805 16.1%
                auto tmp1 =  m0 * bnd.four[li];
806  
                expansion& Liii0 = L[iii0];
807  
#pragma GCC ivdep
808  
                for( integer i = 0; i != 4; ++i) {
809 6.7%
                    Liii0[i] += tmp1[i];
810  
                }
811  
812  
            }
813  
        });
814  
    PROF_END;
815  
}
816  
817  
void compute_ilist() {
818  
819  
    factor = 0.0;
820  
    factor() += 1.0;
821  
    for (integer a = 0; a < NDIM; ++a) {
822  
        factor(a) += 1.0;
823  
        for (integer b = 0; b < NDIM; ++b) {
824  
            factor(a, b) += 1.0;
825  
            for (integer c = 0; c < NDIM; ++c) {
826  
                factor(a, b, c) += 1.0;
827  
            }
828  
        }
829  
    }
830  
831  
    const integer inx = INX;
832  
    const integer nx = inx;
833  
    interaction_type np;
834  
    interaction_type dp;
835  
    std::vector<interaction_type> ilist_r0;
836  
    std::vector<interaction_type> ilist_n0;
837  
    std::vector<interaction_type> ilist_d0;
838  
    std::array < std::vector<interaction_type>, geo::direction::count() > ilist_n0_bnd;
839  
    std::array < std::vector<interaction_type>, geo::direction::count() > ilist_d0_bnd;
840  
    const real theta0 = opts.theta;
841  
    const auto theta = [](integer i0, integer j0, integer k0, integer i1, integer j1, integer k1) {
842  
        real tmp = (sqr(i0-i1) + sqr(j0-j1) + sqr(k0-k1));
843  
        // protect against sqrt(0)
844  
        if( tmp > 0.0 ) {
845  
            return 1.0 / (std::sqrt(tmp));
846  
        } else {
847  
            return 1.0e+10;
848  
        }
849  
    };
850  
    const auto interior = [](integer i, integer j, integer k) {
851  
        bool rc = true;
852  
        if( i < 0 || i >= G_NX ) {
853  
            rc = false;
854  
        }
855  
        else if( j < 0 || j >= G_NX ) {
856  
            rc = false;
857  
        }
858  
        else if( k < 0 || k >= G_NX ) {
859  
            rc = false;
860  
        }
861  
        return rc;
862  
    };
863  
    const auto neighbor_dir = [](integer i, integer j, integer k) {
864  
        integer i0 = 0, j0 = 0, k0 = 0;
865  
        if( i < 0) {
866  
            i0 = -1;
867  
        } else if( i >= G_NX ) {
868  
            i0 = +1;
869  
        }
870  
        if( j < 0) {
871  
            j0 = -1;
872  
        } else if( j >= G_NX ) {
873  
            j0 = +1;
874  
        }
875  
        if( k < 0) {
876  
            k0 = -1;
877  
        } else if( k >= G_NX ) {
878  
            k0 = +1;
879  
        }
880  
        geo::direction d;
881  
        d.set(i0,j0,k0);
882  
        return d;
883  
    };
884  
    integer max_d = 0;
885  
    integer width = INX;
886  
    for (integer i0 = 0; i0 != nx; ++i0) {
887  
        for (integer j0 = 0; j0 != nx; ++j0) {
888  
            for (integer k0 = 0; k0 != nx; ++k0) {
889  
                integer this_d = 0;
890  
                integer ilb = std::min(i0 - width, integer(0));
891  
                integer jlb = std::min(j0 - width, integer(0));
892  
                integer klb = std::min(k0 - width, integer(0));
893  
                integer iub = std::max(i0 + width + 1, G_NX);
894  
                integer jub = std::max(j0 + width + 1, G_NX);
895  
                integer kub = std::max(k0 + width + 1, G_NX);
896  
                for (integer i1 = ilb; i1 < iub; ++i1) {
897  
                    for (integer j1 = jlb; j1 < jub; ++j1) {
898  
                        for (integer k1 = klb; k1 < kub; ++k1) {
899  
                            const real x = i0 - i1;
900  
                            const real y = j0 - j1;
901  
                            const real z = k0 - k1;
902  
                            // protect against sqrt(0)
903  
                            const real tmp = sqr(x) + sqr(y) + sqr(z);
904  
                            const real r = (tmp == 0) ? 0 : std::sqrt(tmp);
905  
                            const real r3 = r * r * r;
906  
                            v4sd four;
907  
                            if (r > 0.0) {
908  
                                four[0] = -1.0 / r;
909  
                                four[1] = x / r3;
910  
                                four[2] = y / r3;
911  
                                four[3] = z / r3;
912  
                            } else {
913  
                                for (integer i = 0; i != 4; ++i) {
914  
                                    four[i] = 0.0;
915  
                                }
916  
                            }
917  
                            if (i0 == i1 && j0 == j1 && k0 == k1) {
918  
                                continue;
919  
                            }
920  
                            const integer i0_c = (i0 + INX) / 2 - INX / 2;
921  
                            const integer j0_c = (j0 + INX) / 2 - INX / 2;
922  
                            const integer k0_c = (k0 + INX) / 2 - INX / 2;
923  
                            const integer i1_c = (i1 + INX) / 2 - INX / 2;
924  
                            const integer j1_c = (j1 + INX) / 2 - INX / 2;
925  
                            const integer k1_c = (k1 + INX) / 2 - INX / 2;
926  
                            const real theta_f = theta(i0, j0, k0, i1, j1, k1);
927  
                            const real theta_c = theta(i0_c, j0_c, k0_c, i1_c, j1_c, k1_c);
928  
                            const integer iii0 = gindex(i0, j0, k0);
929  
                            const integer iii1n = gindex((i1+INX)%INX, (j1+INX)%INX, (k1+INX)%INX);
930  
                            const integer iii1 = gindex(i1, j1, k1);
931  
                            if (theta_c > theta0 && theta_f <= theta0) {
932  
                                np.first = iii0;
933  
                                np.second = iii1n;
934  
                                np.four = four;
935  
                                np.x[XDIM] = i1;
936  
                                np.x[YDIM] = j1;
937  
                                np.x[ZDIM] = k1;
938  
                                if (interior(i1, j1, k1) && interior(i0, j0, k0)) {
939  
                                    if (iii1 > iii0) {
940  
                                        ilist_n0.push_back(np);
941  
                                    }
942  
                                } else if (interior(i0, j0, k0)) {
943  
                                    ilist_n0_bnd[neighbor_dir(i1, j1, k1)].push_back(np);
944  
                                }
945  
                            }
946  
                            if (theta_c > theta0) {
947  
                                ++this_d;
948  
                                dp.first = iii0;
949  
                                dp.second = iii1n;
950  
                                dp.x[XDIM] = i1;
951  
                                dp.x[YDIM] = j1;
952  
                                dp.x[ZDIM] = k1;
953  
                                dp.four = four;
954  
                                if (interior(i1, j1, k1) && interior(i0, j0, k0)) {
955  
                                    if (iii1 > iii0) {
956  
                                        ilist_d0.push_back(dp);
957  
                                    }
958  
                                } else if (interior(i0, j0, k0)) {
959  
                                    ilist_d0_bnd[neighbor_dir(i1, j1, k1)].push_back(dp);
960  
                                }
961  
                            }
962  
                            if (theta_f <= theta0) {
963  
                                np.first = iii0;
964  
                                np.second = iii1n;
965  
                                np.x[XDIM] = i1;
966  
                                np.x[YDIM] = j1;
967  
                                np.x[ZDIM] = k1;
968  
                                np.four = four;
969  
                                if (interior(i1, j1, k1) && interior(i0, j0, k0)) {
970  
                                    if (iii1 > iii0) {
971  
                                        ilist_r0.push_back(np);
972  
                                    }
973  
                                }
974  
                            }
975  
                        }
976  
                    }
977  
                }
978  
                max_d = std::max(max_d, this_d);
979  
            }
980  
        }
981  
    }
982  
    printf("# direct = %i\n", int(max_d));
983  
    ilist_n = std::vector < interaction_type > (ilist_n0.begin(), ilist_n0.end());
984  
    ilist_d = std::vector < interaction_type > (ilist_d0.begin(), ilist_d0.end());
985  
    ilist_r = std::vector < interaction_type > (ilist_r0.begin(), ilist_r0.end());
986  
    for (auto& dir : geo::direction::full_set()) {
987  
        auto& d = ilist_d_bnd[dir];
988  
        auto& d0 = ilist_d0_bnd[dir];
989  
        auto& n = ilist_n_bnd[dir];
990  
        auto& n0 = ilist_n0_bnd[dir];
991  
        for (auto i0 : d0) {
992  
            bool found = false;
993  
            for (auto& i : d) {
994  
                if (i.second == i0.second) {
995  
                    i.first.push_back(i0.first);
996  
                    i.four.push_back(i0.four);
997  
                    found = true;
998  
                    break;
999  
                }
1000  
            }
1001  
            if (!found) {
1002  
                boundary_interaction_type i;
1003  
                i.second = i0.second;
1004  
                i.x = i0.x;
1005  
                n.push_back(i);
1006  
                i.first.push_back(i0.first);
1007  
                i.four.push_back(i0.four);
1008  
                d.push_back(i);
1009  
            }
1010  
        }
1011  
        for (auto i0 : n0) {
1012  
            bool found = false;
1013  
            for (auto& i : n) {
1014  
                if (i.second == i0.second) {
1015  
                    i.first.push_back(i0.first);
1016  
                    i.four.push_back(i0.four);
1017  
                    found = true;
1018  
                    break;
1019  
                }
1020  
            }
1021  
            assert(found);
1022  
        }
1023  
    }
1024  
}
1025  
1026  
expansion_pass_type grid::compute_expansions(gsolve_type type, const expansion_pass_type* parent_expansions) {
1027  
    PROF_BEGIN;
1028  
    expansion_pass_type exp_ret;
1029  
    if (!is_leaf) {
1030  
        exp_ret.first.resize(INX * INX * INX);
1031  
        if (type == RHO) {
1032  
            exp_ret.second.resize(INX * INX * INX);
1033  
        }
1034  
    }
1035  
    const integer inx = INX;
1036  
    const integer nxp = (inx / 2);
1037  
    auto child_index = [=](integer ip, integer jp, integer kp, integer ci, integer bw=0) -> integer {
1038  
        const integer ic = (2 * (ip )+bw) + ((ci >> 0) & 1);
1039  
        const integer jc = (2 * (jp )+bw) + ((ci >> 1) & 1);
1040  
        const integer kc = (2 * (kp )+bw) + ((ci >> 2) & 1);
1041  
        return (inx+2*bw) * (inx+2*bw) * ic + (inx+2*bw) * jc + kc;
1042  
    };
1043  
1044  
    for (integer ip = 0; ip != nxp; ++ip) {
1045  
        for (integer jp = 0; jp != nxp; ++jp) {
1046  
            for (integer kp = 0; kp != nxp; ++kp) {
1047  
                const integer iiip = nxp * nxp * ip + nxp * jp + kp;
1048  
                std::array<simd_vector, NDIM> X;
1049  
                std::array<simd_vector, NDIM> dX;
1050  
                taylor<4, simd_vector> l;
1051  
                std::array<simd_vector, NDIM> lc;
1052  
                if (!is_root) {
1053  
                    const integer index = (INX * INX / 4) * (ip) + (INX / 2) * (jp) + (kp);
1054  
                    for (integer j = 0; j != 20; ++j) {
1055  
                        l[j] = parent_expansions->first[index][j];
1056  
                    }
1057  
                    if (type == RHO) {
1058  
                        for (integer j = 0; j != NDIM; ++j) {
1059  
                            lc[j] = parent_expansions->second[index][j];
1060  
                        }
1061  
                    }
1062  
                } else {
1063  
                    for (integer j = 0; j != 20; ++j) {
1064  
                        l[j] = 0.0;
1065  
                    }
1066  
                    for (integer j = 0; j != NDIM; ++j) {
1067  
                        lc[j] = 0.0;
1068  
                    }
1069  
                }
1070  
                for (integer ci = 0; ci != NCHILD; ++ci) {
1071  
                    const integer iiic = child_index(ip, jp, kp, ci);
1072  
                    for (integer d = 0; d < NDIM; ++d) {
1073  
                        X[d][ci] = (*(com_ptr[0]))[iiic][d];
1074  
                    }
1075  
                }
1076  
                const auto& Y = (*(com_ptr[1]))[iiip];
1077  
                for (integer d = 0; d < NDIM; ++d) {
1078  
                    dX[d] = X[d] - Y[d];
1079  
                }
1080  
                l <<= dX;
1081  
                for (integer ci = 0; ci != NCHILD; ++ci) {
1082  
                    const integer iiic = child_index(ip, jp, kp, ci);
1083  
                    expansion& Liiic = L[iiic];
1084  
                    for (integer j = 0; j != 20; ++j) {
1085 0.5%
                            Liiic[j] += l[j][ci];
1086  
                    }
1087  
1088  
                    space_vector& L_ciiic = L_c[iiic];
1089  
                    if (type == RHO) {
1090  
                        for (integer j = 0; j != NDIM; ++j) {
1091  
                            L_ciiic[j] += lc[j][ci];
1092  
                        }
1093  
                    }
1094  
1095  
                    if (!is_leaf) {
1096  
                        integer index = child_index(ip, jp, kp, ci, 0);
1097  
                        for( integer j = 0; j != 20; ++j) {
1098  
                         	exp_ret.first[index][j] = Liiic[j];
1099  
                        }
1100  
1101  
                        if (type == RHO) {
1102  
                            for( integer j = 0; j != 3; ++j) {
1103  
                                exp_ret.second[index][j] = L_ciiic[j];
1104  
                            }
1105  
                        }
1106  
                    }
1107  
                }
1108  
            }
1109  
        }
1110  
    }
1111  
1112  
    if (is_leaf) {
1113  
        for (integer i = 0; i != G_NX; ++i) {
1114  
            for (integer j = 0; j != G_NX; ++j) {
1115  
                for (integer k = 0; k != G_NX; ++k) {
1116  
                    const integer iii = gindex(i, j, k);
1117  
                    const integer iii0 = h0index(i, j, k);
1118  
                    const integer iiih = hindex(i + H_BW , j + H_BW , k + H_BW);
1119  
                    if (type == RHO) {
1120  
                        G[iii][phi_i] = L[iii]();
1121  
                        for (integer d = 0; d < NDIM; ++d) {
1122  
                            G[iii][gx_i + d] = -L[iii](d);
1123  
                            if (opts.ang_con == true) {
1124  
                                G[iii][gx_i + d] -= L_c[iii][d];
1125  
                            }
1126  
                        }
1127  
                        U[pot_i][iiih] = G[iii][phi_i] * U[rho_i][iiih];
1128  
                    } else {
1129  
                        dphi_dt[iii0] = L[iii]();
1130  
                    }
1131  
                }
1132  
            }
1133  
        }
1134  
    }
1135  
    PROF_END;
1136  
    return exp_ret;
1137  
}
1138  
1139  
multipole_pass_type grid::compute_multipoles(gsolve_type type, const multipole_pass_type* child_poles) {
1140  
1141  
//	if( int(*Muse_counter) > 0)
1142  
//	printf( "%i\n", int(*Muse_counter));
1143  
    PROF_BEGIN;
1144  
    integer lev = 0;
1145  
    const real dx3 = dx * dx * dx;
1146  
    M_ptr = std::make_shared<std::vector<multipole>>();
1147  
    mon_ptr = std::make_shared<std::vector<real>>();
1148  
    if (com_ptr[1] == nullptr) {
1149  
        com_ptr[1] = std::make_shared<std::vector<space_vector>>(G_N3 / 8);
1150  
    }
1151  
    if (type == RHO) {
1152  
        com_ptr[0] = std::make_shared<std::vector<space_vector>>(G_N3);
1153  
    }
1154  
    auto& M = *M_ptr;
1155  
    auto& mon = *mon_ptr;
1156  
    if (is_leaf) {
1157  
        M.resize(0);
1158  
        mon.resize(G_N3);
1159  
    } else {
1160  
        M.resize(G_N3);
1161  
        mon.resize(0);
1162  
    }
1163  
    if (type == RHO) {
1164  
        const integer iii0 = hindex(H_BW, H_BW, H_BW);
1165  
        const std::array<real, NDIM> x0 = { X[XDIM][iii0], X[YDIM][iii0], X[ZDIM][iii0] };
1166  
1167  
//         std::array<integer, 3> i;
1168  
//         for (i[0] = 0; i[0] != G_NX; ++i[0]) {
1169  
//             for (i[1] = 0; i[1] != G_NX; ++i[1]) {
1170  
//                 for (i[2] = 0; i[2] != G_NX; ++i[2]) {
1171  
//                     const integer iii = gindex(i[0], i[1], i[2]);
1172  
//                     for (integer dim = 0; dim != NDIM; ++dim) {
1173  
//                         (*(com_ptr[0]))[iii][dim] = x0[dim] + (i[dim]) * dx;
1174  
//                     }
1175  
//                 }
1176  
//             }
1177  
//         }
1178  
        for (integer i = 0; i != G_NX; ++i) {
1179  
            for (integer j = 0; j != G_NX; ++j) {
1180  
                for (integer k = 0; k != G_NX; ++k) {
1181  
                    auto& com0iii = (*(com_ptr[0]))[gindex(i, j, k)];
1182  
                    // for (integer dim = 0; dim != NDIM; ++dim)
1183  
                    com0iii[0] = x0[0] + i * dx;
1184  
                    com0iii[1] = x0[1] + j * dx;
1185  
                    com0iii[2] = x0[2] + k * dx;
1186  
                }
1187  
            }
1188  
        }
1189  
    }
1190  
1191  
    multipole_pass_type mret;
1192  
    if (!is_root) {
1193  
        mret.first.resize(INX * INX * INX / NCHILD);
1194  
        mret.second.resize(INX * INX * INX / NCHILD);
1195  
    }
1196  
    taylor<4, real> MM;
1197  
    integer index = 0;
1198  
    for (integer inx = INX; (inx >= INX / 2); inx >>= 1) {
1199  
1200  
        const integer nxp = inx;
1201  
        const integer nxc = (2 * inx);
1202  
1203  
        auto child_index = [=](integer ip, integer jp, integer kp, integer ci) -> integer {
1204  
            const integer ic = (2 * ip ) + ((ci >> 0) & 1);
1205  
            const integer jc = (2 * jp ) + ((ci >> 1) & 1);
1206  
            const integer kc = (2 * kp ) + ((ci >> 2) & 1);
1207  
            return nxc * nxc * ic + nxc * jc + kc;
1208  
        };
1209  
1210  
        for (integer ip = 0; ip != nxp; ++ip) {
1211  
            for (integer jp = 0; jp != nxp; ++jp) {
1212  
                for (integer kp = 0; kp != nxp; ++kp) {
1213  
                    const integer iiip = nxp * nxp * ip + nxp * jp + kp;
1214  
                    if (lev != 0) {
1215  
                        if (type == RHO) {
1216  
                            simd_vector mc;
1217  
                            std::array < simd_vector, NDIM > X;
1218  
                            for (integer ci = 0; ci != NCHILD; ++ci) {
1219  
                                const integer iiic = child_index(ip, jp, kp, ci);
1220  
                                if (is_leaf) {
1221  
                                    mc[ci] = mon[iiic];
1222  
                                } else {
1223  
                                    mc[ci] = M[iiic]();
1224  
                                }
1225  
                                for (integer d = 0; d < NDIM; ++d) {
1226  
                                    X[d][ci] = (*(com_ptr[0]))[iiic][d];
1227  
                                }
1228  
                            }
1229  
                            real mtot = mc.sum();
1230  
                            for (integer d = 0; d < NDIM; ++d) {
1231  
                                (*(com_ptr[1]))[iiip][d] = (X[d] * mc).sum() / mtot;
1232  
                            }
1233  
                        }
1234  
                        taylor<4, simd_vector> mc, mp;
1235  
                        std::array<simd_vector, NDIM> x, y, dx;
1236  
                        for (integer ci = 0; ci != NCHILD; ++ci) {
1237  
                            const integer iiic = child_index(ip, jp, kp, ci);
1238  
                            const space_vector& X = (*(com_ptr[lev - 1]))[iiic];
1239  
                            if (is_leaf) {
1240  
                                mc()[ci] = mon[iiic];
1241  
                                for (integer j = 1; j != 20; ++j) {
1242  
                                    mc.ptr()[j][ci] = 0.0;
1243  
                                }
1244  
                            } else {
1245  
                                for (integer j = 0; j != 20; ++j) {
1246  
                                    mc.ptr()[j][ci] = M[iiic].ptr()[j];
1247  
                                }
1248  
                            }
1249  
                            for (integer d = 0; d < NDIM; ++d) {
1250  
                                x[d][ci] = X[d];
1251  
                            }
1252  
                        }
1253  
                        const space_vector& Y = (*(com_ptr[lev]))[iiip];
1254  
                        for (integer d = 0; d < NDIM; ++d) {
1255  
                            dx[d] = x[d] - simd_vector(Y[d]);
1256  
                        }
1257  
                        mp = mc >> dx;
1258  
                        for (integer j = 0; j != 20; ++j) {
1259  
                            MM[j] = mp[j].sum();
1260  
                        }
1261  
                    } else {
1262  
                        if (child_poles == nullptr) {
1263  
                            const integer iiih = hindex(ip + H_BW , jp + H_BW , kp + H_BW );
1264  
                            const integer iii0 = h0index(ip, jp, kp);
1265  
                            if (type == RHO) {
1266  
                                mon[iiip] = U[rho_i][iiih] * dx3;
1267  
                            } else {
1268  
                                mon[iiip] = dUdt[rho_i][iii0] * dx3;
1269  
                            }
1270  
                        } else {
1271  
                            M[iiip] = child_poles->first[index];
1272  
                            if (type == RHO) {
1273  
                                (*(com_ptr)[lev])[iiip] = child_poles->second[index];
1274  
                            }
1275  
                            ++index;
1276  
                        }
1277  
                    }
1278  
                    if (!is_root && (lev == 1)) {
1279  
                        mret.first[index] = MM;
1280  
                        mret.second[index] = (*(com_ptr[lev]))[iiip];
1281  
                        ++index;
1282  
                    }
1283  
                }
1284  
            }
1285  
        }
1286  
        ++lev;
1287  
        index = 0;
1288  
    }
1289  
    PROF_END;
1290  
    return mret;
1291  
}
1292  
1293  
gravity_boundary_type grid::get_gravity_boundary(const geo::direction& dir, bool is_local) {
1294  
    PROF_BEGIN;
1295  
//	std::array<integer, NDIM> lb, ub;
1296  
    gravity_boundary_type data;
1297  
    data.is_local = is_local;
1298  
    auto& M = *M_ptr;
1299  
    auto& mon = *mon_ptr;
1300  
    if (!is_local) {
1301  
        data.allocate();
1302  
        integer iter = 0;
1303  
        const std::vector<boundary_interaction_type>& list = ilist_n_bnd[dir.flip()];
1304  
        if (is_leaf) {
1305  
            data.m->reserve(list.size());
1306  
            for (auto i : list) {
1307  
                const integer iii = i.second;
1308  
                data.m->push_back(mon[iii]);
1309  
            }
1310  
        } else {
1311  
            data.M->reserve(list.size());
1312  
            data.x->reserve(list.size());
1313  
            for (auto i : list) {
1314  
                const integer iii = i.second;
1315  
                const integer top = M[iii].size();
1316  
                data.M->push_back(M[iii]);
1317  
                data.x->push_back((*(com_ptr[0]))[iii]);
1318  
            }
1319  
        }
1320  
    } else {
1321  
        if (is_leaf) {
1322  
            data.m = mon_ptr;
1323  
        } else {
1324  
            data.M = M_ptr;
1325  
            data.x = com_ptr[0];
1326  
        }
1327  
    }
1328  
    PROF_END;
1329  
    return data;
1330  
}
1331  

Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.