Line | % of fetches | Source |
1 | | /* |
2 | | * grid_fmm.cpp |
3 | | * |
4 | | * Created on: Jun 3, 2015 |
5 | | * Author: dmarce1 |
6 | | */ |
7 | | #include "grid.hpp" |
8 | | #include "simd.hpp" |
9 | | #include "profiler.hpp" |
10 | | #include "options.hpp" |
11 | | #include "taylor.hpp" |
12 | | |
13 | | #include <hpx/include/parallel_for_loop.hpp> |
14 | | |
15 | | #include <cstddef> |
16 | | #include <utility> |
17 | | |
18 | | #ifdef USE_GRAV_PAR |
19 | | const auto for_loop_policy = hpx::parallel::par; |
20 | | #else |
21 | | const auto for_loop_policy = hpx::parallel::seq; |
22 | | #endif |
23 | | |
24 | | |
25 | | static std::vector<interaction_type> ilist_n; |
26 | | static std::vector<interaction_type> ilist_d; |
27 | | static std::vector<interaction_type> ilist_r; |
28 | | static std::vector<std::vector<boundary_interaction_type>> ilist_d_bnd(geo::direction::count()); |
29 | | static std::vector<std::vector<boundary_interaction_type>> ilist_n_bnd(geo::direction::count()); |
30 | | static taylor<4, real> factor; |
31 | | extern options opts; |
32 | | |
33 | | template<class T> |
34 | | void load_multipole(taylor<4, T>& m, space_vector& c, const gravity_boundary_type& data, integer iter, bool monopole) { |
35 | | if (monopole) { |
36 | | m = T(0.0); |
37 | | m = T((*(data.m))[iter]); |
38 | | } else { |
39 | | auto const& tmp1 = (*(data.M))[iter]; |
40 | | #pragma GCC ivdep |
41 | | for (int i = 0; i != 20; ++i) { |
42 | | m[i] = tmp1[i]; |
43 | | } |
44 | | auto const& tmp2 = (*(data.x))[iter]; |
45 | | #pragma GCC ivdep |
46 | | for (integer d = 0; d != NDIM; ++d) { |
47 | | c[d] = tmp2[d]; |
48 | | } |
49 | | } |
50 | | } |
51 | | |
52 | | void find_eigenvectors(real q[3][3], real e[3][3], real lambda[3]) { |
53 | | PROF_BEGIN; |
54 | | real b0[3], b1[3], A, bdif; |
55 | | int iter = 0; |
56 | | for (int l = 0; l < 3; l++) { |
57 | | b0[0] = b0[1] = b0[2] = 0.0; |
58 | | b0[l] = 1.0; |
59 | | do { |
60 | | iter++; |
61 | | b1[0] = b1[1] = b1[2] = 0.0; |
62 | | for (int i = 0; i < 3; i++) { |
63 | | for (int m = 0; m < 3; m++) { |
64 | | b1[i] += q[i][m] * b0[m]; |
65 | | } |
66 | | } |
67 | | A = sqrt(sqr(b1[0]) + sqr(b1[1]) + sqr(b1[2])); |
68 | | bdif = 0.0; |
69 | | for (int i = 0; i < 3; i++) { |
70 | | b1[i] = b1[i] / A; |
71 | | bdif += pow(b0[i] - b1[i], 2); |
72 | | } |
73 | | for (int i = 0; i < 3; i++) { |
74 | | b0[i] = b1[i]; |
75 | | } |
76 | | |
77 | | } while (fabs(bdif) > 1.0e-14); |
78 | | for (int m = 0; m < 3; m++) { |
79 | | e[l][m] = b0[m]; |
80 | | } |
81 | | for (int i = 0; i < 3; i++) { |
82 | | A += b0[i] * q[l][i]; |
83 | | } |
84 | | lambda[l] = sqrt(A) / sqrt(sqr(e[l][0]) + sqr(e[l][1]) + sqr(e[l][2])); |
85 | | } |
86 | | PROF_END; |
87 | | } |
88 | | |
89 | | std::pair<space_vector, space_vector> grid::find_axis() const { |
90 | | PROF_BEGIN; |
91 | | real quad_moment[NDIM][NDIM]; |
92 | | real eigen[NDIM][NDIM]; |
93 | | real lambda[NDIM]; |
94 | | space_vector this_com; |
95 | | real mtot = 0.0; |
96 | | for (integer i = 0; i != NDIM; ++i) { |
97 | | this_com[i] = 0.0; |
98 | | for (integer j = 0; j != NDIM; ++j) { |
99 | | quad_moment[i][j] = 0.0; |
100 | | } |
101 | | } |
102 | | |
103 | | auto& M = *M_ptr; |
104 | | auto& mon = *mon_ptr; |
105 | | std::vector<space_vector> const& com0 = *(com_ptr[0]); |
106 | | for (integer i = 0; i != G_NX; ++i) { |
107 | | for (integer j = 0; j != G_NX; ++j) { |
108 | | for (integer k = 0; k != G_NX; ++k) { |
109 | | const integer iii1 = gindex(i, j, k); |
110 | | const integer iii0 = gindex(i + H_BW , j + H_BW , k + H_BW ); |
111 | | space_vector const& com0iii1 = com0[iii1]; |
112 | | multipole const& Miii1 = M[iii1]; |
113 | | for (integer n = 0; n != NDIM; ++n) { |
114 | | real mass; |
115 | | if (is_leaf) { |
116 | | mass = mon[iii1]; |
117 | | } else { |
118 | | mass = Miii1(); |
119 | | } |
120 | | this_com[n] += mass * com0iii1[n]; |
121 | | mtot += mass; |
122 | | for (integer m = 0; m != NDIM; ++m) { |
123 | | if (!is_leaf) { |
124 | | quad_moment[n][m] += Miii1(n, m); |
125 | | } |
126 | | quad_moment[n][m] += mass * com0iii1[n] * com0iii1[m]; |
127 | | } |
128 | | } |
129 | | } |
130 | | } |
131 | | } |
132 | | for (integer j = 0; j != NDIM; ++j) { |
133 | | this_com[j] /= mtot; |
134 | | } |
135 | | |
136 | | find_eigenvectors(quad_moment, eigen, lambda); |
137 | | integer index; |
138 | | if (lambda[0] > lambda[1] && lambda[0] > lambda[2]) { |
139 | | index = 0; |
140 | | } else if (lambda[1] > lambda[2]) { |
141 | | index = 1; |
142 | | } else { |
143 | | index = 2; |
144 | | } |
145 | | space_vector rc; |
146 | | for (integer j = 0; j != NDIM; ++j) { |
147 | | rc[j] = eigen[index][j]; |
148 | | } |
149 | | std::pair<space_vector, space_vector> pair{rc, this_com}; |
150 | | PROF_END; |
151 | | return pair; |
152 | | } |
153 | | |
154 | | void grid::solve_gravity(gsolve_type type) { |
155 | | |
156 | | compute_multipoles(type); |
157 | | compute_interactions(type); |
158 | | compute_expansions(type); |
159 | | } |
160 | | |
161 | | void grid::compute_interactions(gsolve_type type) { |
162 | | PROF_BEGIN; |
163 | | auto& M = *M_ptr; |
164 | | auto& mon = *mon_ptr; |
165 | | std::fill(std::begin(L), std::end(L), ZERO); |
166 | | std::fill(std::begin(L_c), std::end(L_c), ZERO); |
167 | | if (!is_leaf) { |
168 | | const auto& this_ilist = is_root ? ilist_r : ilist_n; |
169 | | const integer list_size = this_ilist.size(); |
170 | | |
171 | | std::vector<space_vector> const& com0 = (*(com_ptr[0])); |
172 | | hpx::parallel::for_loop_strided( |
173 | | for_loop_policy, 0, list_size, simd_len, |
174 | | [&com0, &this_ilist, list_size, type, this](std::size_t li) { |
175 | | |
176 | | std::array<simd_vector, NDIM> dX; |
177 | | std::array<simd_vector, NDIM> X; |
178 | | std::array<simd_vector, NDIM> Y; |
179 | | |
180 | | taylor<4, simd_vector> m0; |
181 | | taylor<4, simd_vector> m1; |
182 | | taylor<4, simd_vector> n0; |
183 | | taylor<4, simd_vector> n1; |
184 | | |
185 | | auto& M = *M_ptr; |
186 | | auto& mon = *mon_ptr; |
187 | | // FIXME: replace with vector-pack gather-loads |
188 | | #pragma GCC ivdep |
189 | | for (integer i = 0; i != simd_len; ++i) { |
190 | | const auto index = std::min(integer(li + i),integer(list_size-1)); |
191 | | const integer iii0 = this_ilist[index].first; |
192 | | const integer iii1 = this_ilist[index].second; |
193   | 2.4%
| space_vector const& com0iii0 = com0[iii0]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 2.4%Thread Id | % of fetches |
---|
Thread Total | 2.4% | 23408 | 0.1% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.7% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.5% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.2% | 23421 | 0.2% | 23422 | 0.0% | 23423 | 0.2% | 23424 | 0.2% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.2% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 92.1%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 92.1% | 0.0% | 92.1% | 0.0% | 0.0% | 23408 | 52.5% | 0.0% | 52.5% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 97.2% | 0.0% | 97.2% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 95.4% | 0.0% | 95.4% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 77.8% | 0.0% | 77.8% | 0.0% | 0.0% | 23421 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23424 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 92.1%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 92.1% | 0.0% | 92.1% | 0.0% | 0.0% | 23408 | 52.5% | 0.0% | 52.5% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 97.2% | 0.0% | 97.2% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 95.4% | 0.0% | 95.4% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 77.8% | 0.0% | 77.8% | 0.0% | 0.0% | 23421 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23424 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 5.4%Thread Id | Fetch utilization |
---|
Thread Average | 5.4% | 23408 | 9.1% | 23409 | 0.0% | 23410 | 100.0% | 23411 | 4.2% | 23412 | 48.9% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 0.0% | 23417 | 3.1% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 3.1% | 23421 | 3.1% | 23422 | 100.0% | 23423 | 6.2% | 23424 | 3.1% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 3.1% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9dbd8d | R |  
|
|
|  | |
194 | | space_vector const& com0iii1 = com0[iii1]; |
195 | | for (integer d = 0; d < NDIM; ++d) { |
196 | | X[d][i] = com0iii0[d]; |
197 | | Y[d][i] = com0iii1[d]; |
198 | | } |
199 | | multipole const& Miii0 = M[iii0]; |
200 | | multipole const& Miii1 = M[iii1]; |
201 | | for (integer j = 0; j != taylor_sizes[3]; ++j) { |
202 | | m0[j][i] = Miii1[j]; |
203 | | m1[j][i] = Miii0[j]; |
204 | | } |
205 | | if (type == RHO) { |
206 | | real const tmp1 = Miii1() / Miii0(); |
207 | | real const tmp2 = Miii0() / Miii1(); |
208 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
209 | | n0[j][i] = Miii1[j] - Miii0[j] * tmp1; |
210 | | n1[j][i] = Miii0[j] - Miii1[j] * tmp2; |
211 | | } |
212 | | } |
213 | | } |
214 | | |
215 | | if (type != RHO) { |
216 | | #pragma GCC ivdep |
217 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
218 | | n0[j] = ZERO; |
219 | | n1[j] = ZERO; |
220 | | } |
221 | | } |
222 | | #pragma GCC ivdep |
223 | | for (integer d = 0; d < NDIM; ++d) { |
224 | | dX[d] = X[d] - Y[d]; |
225 | | } |
226 | | |
227 | | taylor<5, simd_vector> D; |
228 | | taylor<4, simd_vector> A0, A1; |
229 | | std::array<simd_vector, NDIM> B0 = { |
230 | | simd_vector(ZERO), simd_vector(ZERO), simd_vector(ZERO) |
231 | | }; |
232 | | std::array<simd_vector, NDIM> B1 = { |
233 | | simd_vector(ZERO), simd_vector(ZERO), simd_vector(ZERO) |
234 | | }; |
235 | | |
236 | | D.set_basis(dX); |
237 | | A0[0] = m0[0] * D[0]; |
238 | | A1[0] = m1[0] * D[0]; |
239 | | if (type != RHO) { |
240 | | for (integer i = taylor_sizes[0]; i != taylor_sizes[1]; ++i) { |
241 | | A0[0] -= m0[i] * D[i]; |
242 | | A1[0] += m1[i] * D[i]; |
243 | | } |
244 | | } |
245 | | for (integer i = taylor_sizes[1]; i != taylor_sizes[2]; ++i) { |
246 | | const auto tmp = D[i] * (factor[i] / real(2)); |
247 | | A0[0] += m0[i] * tmp; |
248 | | A1[0] += m1[i] * tmp; |
249 | | } |
250 | | for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) { |
251 | | const auto tmp = D[i] * (factor[i] / real(6)); |
252 | | A0[0] -= m0[i] * tmp; |
253 | | A1[0] += m1[i] * tmp; |
254 | | } |
255 | | |
256 | | for (integer a = 0; a < NDIM; ++a) { |
257 | | A0(a) = m0() * D(a); |
258 | | A1(a) = -m1() * D(a); |
259 | | for (integer b = 0; b < NDIM; ++b) { |
260 | | if (type != RHO) { |
261 | | A0(a) -= m0(a) * D(a, b); |
262 | | A1(a) -= m1(a) * D(a, b); |
263 | | } |
264 | | for (integer c = b; c < NDIM; ++c) { |
265 | | const auto tmp1 = D(a, b, c) * (factor(c, b) / real(2)); |
266 | | A0(a) += m0(c, b) * tmp1; |
267 | | A1(a) -= m1(c, b) * tmp1; |
268 | | } |
269 | | |
270 | | } |
271 | | } |
272 | | |
273 | | if (type == RHO) { |
274 | | for (integer a = 0; a != NDIM; ++a) { |
275 | | for (integer b = 0; b != NDIM; ++b) { |
276 | | for (integer c = b; c != NDIM; ++c) { |
277 | | for (integer d = c; d != NDIM; ++d) { |
278 | | const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6)); |
279 | | B0[a] -= n0(b, c, d) * tmp; |
280 | | B1[a] -= n1(b, c, d) * tmp; |
281 | | } |
282 | | } |
283 | | } |
284 | | |
285 | | } |
286 | | } |
287 | | |
288 | | for (integer a = 0; a < NDIM; ++a) { |
289 | | for (integer b = a; b < NDIM; ++b) { |
290 | | A0(a, b) = m0() * D(a, b); |
291 | | A1(a, b) = m1() * D(a, b); |
292 | | for (integer c = 0; c < NDIM; ++c) { |
293 | | const auto& tmp = D(a, b, c); |
294 | | A0(a, b) -= m0(c) * tmp; |
295 | | A1(a, b) += m1(c) * tmp; |
296 | | } |
297 | | |
298 | | } |
299 | | } |
300 | | |
301 | | for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) { |
302 | | const auto& tmp = D[i]; |
303 | | A1[i] = -m1[0] * tmp; |
304 | | A0[i] = m0[0] * tmp; |
305 | | } |
306 | | #ifdef USE_GRAV_PAR |
307 | | std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx); |
308 | | #endif |
309 | | for (integer i = 0; i != simd_len && i + li < list_size; ++i) { |
310 | | const integer iii0 = this_ilist[li + i].first; |
311 | | const integer iii1 = this_ilist[li + i].second; |
312   | 0.0%
| expansion& Liii0 = L[iii0]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.2%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23429 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.2%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23429 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 8.5%Thread Id | Fetch utilization |
---|
Thread Average | 8.5% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 3.1% | 23412 | 6.5% | 23413 | 100.0% | 23414 | 3.1% | 23415 | 0.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 3.1% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 6.0% | 23429 | 8.6% | 23430 | 3.1% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9dc67e | R | 
|
|
| | |
313   | 0.0%
| expansion& Liii1 = L[iii1]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 8.5%Thread Id | Fetch utilization |
---|
Thread Average | 8.5% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 3.1% | 23412 | 6.5% | 23413 | 100.0% | 23414 | 3.1% | 23415 | 0.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 3.1% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 6.0% | 23429 | 8.6% | 23430 | 3.1% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9dc682 | R | 
|
|
| | |
314 | | for (integer j = 0; j != taylor_sizes[3]; ++j) { |
315 | | Liii0[j] += A0[j][i]; |
316   | 1.5%
| Liii1[j] += A1[j][i]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 1.5%Thread Id | % of fetches |
---|
Thread Total | 1.5% | 23408 | 0.2% | 23409 | 0.0% | 23410 | 0.5% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.3% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.2% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.2% | 23431 | 0.0% |
| 2.1%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.1% | 0.0% | 0.8% | 1.4% | 0.0% | 23408 | 4.8% | 0.0% | 0.0% | 4.8% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 15.4% | 0.0% | 7.7% | 7.7% | 0.0% | 23411 | 0.8% | 0.0% | 0.8% | 0.0% | 0.0% | 23412 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23416 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23421 | 8.5% | 0.0% | 0.8% | 7.7% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 6.5% | 0.0% | 6.5% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23430 | 6.1% | 0.0% | 0.4% | 5.7% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 2.1%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.1% | 0.0% | 0.8% | 1.4% | 0.0% | 23408 | 4.8% | 0.0% | 0.0% | 4.8% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 15.4% | 0.0% | 7.7% | 7.7% | 0.0% | 23411 | 0.8% | 0.0% | 0.8% | 0.0% | 0.0% | 23412 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23416 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23421 | 8.5% | 0.0% | 0.8% | 7.7% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 6.5% | 0.0% | 6.5% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23430 | 6.1% | 0.0% | 0.4% | 5.7% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 88.0%Thread Id | Fetch utilization |
---|
Thread Average | 88.0% | 23408 | 19.0% | 23409 | 100.0% | 23410 | 14.6% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 27.4% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 12.8% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 5.8% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 29.5% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 49.4% | 23409 | 100.0% | 23410 | 27.3% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 24.9% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 53.5% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 69.4% | 23430 | 31.5% | 23431 | 100.0% |
| 0x9dc6af | R |   
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 1.8%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 1.8% | 0.0% | 0.4% | 1.4% | 0.0% | 23408 | 4.8% | 0.0% | 0.0% | 4.8% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 8.3% | 0.0% | 0.0% | 8.3% | 0.0% | 23411 | 0.8% | 0.0% | 0.8% | 0.0% | 0.0% | 23412 | 0.5% | 0.0% | 0.5% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23416 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23421 | 8.5% | 0.0% | 0.8% | 7.7% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 6.5% | 0.0% | 6.5% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.6% | 0.0% | 0.6% | 0.0% | 0.0% | 23430 | 6.1% | 0.0% | 0.4% | 5.7% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 88.0%Thread Id | Fetch utilization |
---|
Thread Average | 88.0% | 23408 | 19.0% | 23409 | 100.0% | 23410 | 14.6% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 27.4% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 12.8% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 5.8% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 29.5% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 49.4% | 23409 | 100.0% | 23410 | 27.3% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 24.9% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 53.5% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 69.4% | 23430 | 31.5% | 23431 | 100.0% |
| 0x9dc6ba | W |   
|
|
|   | |
317 | | } |
318 | | |
319 | | if (type == RHO) { |
320 | | space_vector& L_ciii0 = L_c[iii0]; |
321 | | space_vector& L_ciii1 = L_c[iii1]; |
322 | | for (integer j = 0; j != NDIM; ++j) { |
323 | | L_ciii0[j] += B0[j][i]; |
324   | 0.3%
| L_ciii1[j] += B1[j][i]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 0.3%Thread Id | % of fetches |
---|
Thread Total | 0.3% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.2% | 23430 | 0.0% | 23431 | 0.0% |
| 2.5%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.5% | 0.0% | 0.3% | 2.2% | 0.0% | 23408 | 2.0% | 0.0% | 2.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 1.5% | 0.0% | 1.5% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 3.1% | 0.0% | 3.1% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 25.0% | 0.0% | 1.0% | 24.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 2.5%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.5% | 0.0% | 0.3% | 2.2% | 0.0% | 23408 | 2.0% | 0.0% | 2.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 1.5% | 0.0% | 1.5% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 3.1% | 0.0% | 3.1% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 25.0% | 0.0% | 1.0% | 24.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 42.6%Thread Id | Fetch utilization |
---|
Thread Average | 42.6% | 23408 | 0.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 1.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 0.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 100.0% |
| 73.3%Thread Id | Write-back utilization |
---|
Thread Average | 73.3% | 23408 | 12.5% | 23409 | 0.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 12.5% | 23423 | 29.9% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 1.4% | 23430 | 28.7% | 23431 | 100.0% |
| 0x9dc6fc | R |  
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 2.5%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.5% | 0.0% | 0.4% | 2.2% | 0.0% | 23408 | 2.0% | 0.0% | 2.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 1.5% | 0.0% | 1.5% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 3.1% | 0.0% | 3.1% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 25.0% | 0.0% | 1.1% | 24.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 42.6%Thread Id | Fetch utilization |
---|
Thread Average | 42.6% | 23408 | 0.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 1.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 0.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 100.0% |
| 73.3%Thread Id | Write-back utilization |
---|
Thread Average | 73.3% | 23408 | 12.5% | 23409 | 0.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 12.5% | 23423 | 29.9% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 1.4% | 23430 | 28.7% | 23431 | 100.0% |
| 0x9dc707 | W |  
|
|
|  | |
325 | | } |
326 | | } |
327 | | } |
328 | | }); |
329 | | } else { |
330 | | #if !defined(HPX_HAVE_DATAPAR) |
331 | | const v4sd d0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) }; |
332 | | const v4sd d1 = { 1.0 / dx, -1.0 / sqr(dx), -1.0 / sqr(dx), -1.0 / sqr(dx) }; |
333 | | #else |
334 | | const std::array<double, 4> di0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) }; |
335 | | const v4sd d0(di0.data()); |
336 | | |
337 | | const std::array<double, 4> di1 = { 1.0 / dx, -1.0 / sqr(dx), -1.0 / sqr(dx), -1.0 / sqr(dx) }; |
338 | | const v4sd d1(di1.data()); |
339 | | #endif |
340 | | |
341 | | const integer dsize = ilist_d.size(); |
342 | | const integer lev = 0; |
343 | | for (integer li = 0; li < dsize; ++li) { |
344 | | const auto& ele = ilist_d[li]; |
345 | | const integer iii0 = ele.first; |
346 | | const integer iii1 = ele.second; |
347 | | #if !defined(HPX_HAVE_DATAPAR) |
348 | | v4sd m0, m1; |
349 | | #pragma GCC ivdep |
350 | | for (integer i = 0; i != 4; ++i) { |
351 | | m0[i] = mon[iii1]; |
352 | | } |
353 | | #pragma GCC ivdep |
354 | | for (integer i = 0; i != 4; ++i) { |
355 | | m1[i] = mon[iii0]; |
356 | | } |
357 | | #else |
358   | 11.2%
| v4sd m0 = mon[iii1]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 11.2%Thread Id | % of fetches |
---|
Thread Total | 11.2% | 23408 | 0.3% | 23409 | 0.6% | 23410 | 0.4% | 23411 | 0.2% | 23412 | 0.6% | 23413 | 0.7% | 23414 | 0.0% | 23415 | 0.6% | 23416 | 0.6% | 23417 | 0.2% | 23418 | 0.4% | 23419 | 0.2% | 23420 | 0.9% | 23421 | 0.8% | 23422 | 1.2% | 23423 | 0.1% | 23424 | 0.7% | 23425 | 0.3% | 23426 | 0.3% | 23427 | 0.4% | 23428 | 0.2% | 23429 | 0.6% | 23430 | 0.5% | 23431 | 0.3% |
| 41.9%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 41.9% | 0.0% | 41.9% | 0.0% | 0.0% | 23408 | 43.6% | 0.0% | 43.6% | 0.0% | 0.0% | 23409 | 42.5% | 0.0% | 42.5% | 0.0% | 0.0% | 23410 | 31.1% | 0.0% | 31.1% | 0.0% | 0.0% | 23411 | 20.2% | 0.0% | 20.2% | 0.0% | 0.0% | 23412 | 48.9% | 0.0% | 48.9% | 0.0% | 0.0% | 23413 | 51.6% | 0.0% | 51.6% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 28.1% | 0.0% | 28.1% | 0.0% | 0.0% | 23416 | 86.6% | 0.0% | 86.6% | 0.0% | 0.0% | 23417 | 19.4% | 0.0% | 19.4% | 0.0% | 0.0% | 23418 | 45.1% | 0.0% | 45.1% | 0.0% | 0.0% | 23419 | 41.4% | 0.0% | 41.4% | 0.0% | 0.0% | 23420 | 62.4% | 0.0% | 62.4% | 0.0% | 0.0% | 23421 | 68.8% | 0.0% | 68.8% | 0.0% | 0.0% | 23422 | 62.0% | 0.0% | 62.0% | 0.0% | 0.0% | 23423 | 9.3% | 0.0% | 9.3% | 0.0% | 0.0% | 23424 | 39.3% | 0.0% | 39.3% | 0.0% | 0.0% | 23425 | 48.2% | 0.0% | 48.2% | 0.0% | 0.0% | 23426 | 27.6% | 0.0% | 27.6% | 0.0% | 0.0% | 23427 | 25.1% | 0.0% | 25.1% | 0.0% | 0.0% | 23428 | 48.1% | 0.0% | 48.1% | 0.0% | 0.0% | 23429 | 48.1% | 0.0% | 48.1% | 0.0% | 0.0% | 23430 | 41.0% | 0.0% | 41.0% | 0.0% | 0.0% | 23431 | 56.9% | 0.0% | 56.9% | 0.0% | 0.0% |
| 41.9%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 41.9% | 0.0% | 41.9% | 0.0% | 0.0% | 23408 | 43.6% | 0.0% | 43.6% | 0.0% | 0.0% | 23409 | 42.5% | 0.0% | 42.5% | 0.0% | 0.0% | 23410 | 31.1% | 0.0% | 31.1% | 0.0% | 0.0% | 23411 | 20.2% | 0.0% | 20.2% | 0.0% | 0.0% | 23412 | 48.9% | 0.0% | 48.9% | 0.0% | 0.0% | 23413 | 51.6% | 0.0% | 51.6% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 28.1% | 0.0% | 28.1% | 0.0% | 0.0% | 23416 | 86.6% | 0.0% | 86.6% | 0.0% | 0.0% | 23417 | 19.4% | 0.0% | 19.4% | 0.0% | 0.0% | 23418 | 45.1% | 0.0% | 45.1% | 0.0% | 0.0% | 23419 | 41.4% | 0.0% | 41.4% | 0.0% | 0.0% | 23420 | 62.4% | 0.0% | 62.4% | 0.0% | 0.0% | 23421 | 68.8% | 0.0% | 68.8% | 0.0% | 0.0% | 23422 | 62.0% | 0.0% | 62.0% | 0.0% | 0.0% | 23423 | 9.3% | 0.0% | 9.3% | 0.0% | 0.0% | 23424 | 39.3% | 0.0% | 39.3% | 0.0% | 0.0% | 23425 | 48.2% | 0.0% | 48.2% | 0.0% | 0.0% | 23426 | 27.6% | 0.0% | 27.6% | 0.0% | 0.0% | 23427 | 25.1% | 0.0% | 25.1% | 0.0% | 0.0% | 23428 | 48.1% | 0.0% | 48.1% | 0.0% | 0.0% | 23429 | 48.1% | 0.0% | 48.1% | 0.0% | 0.0% | 23430 | 41.0% | 0.0% | 41.0% | 0.0% | 0.0% | 23431 | 56.9% | 0.0% | 56.9% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 54.3%Thread Id | Fetch utilization |
---|
Thread Average | 54.3% | 23408 | 52.7% | 23409 | 50.8% | 23410 | 57.0% | 23411 | 57.5% | 23412 | 50.7% | 23413 | 53.0% | 23414 | 100.0% | 23415 | 51.9% | 23416 | 56.9% | 23417 | 64.8% | 23418 | 53.9% | 23419 | 56.9% | 23420 | 53.3% | 23421 | 52.1% | 23422 | 51.1% | 23423 | 71.4% | 23424 | 53.9% | 23425 | 51.5% | 23426 | 54.3% | 23427 | 54.6% | 23428 | 51.6% | 23429 | 64.4% | 23430 | 52.5% | 23431 | 54.8% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9e73dd | R |  
|
|
|  | |
359 | | v4sd m1 = mon[iii0]; |
360 | | #endif |
361 | | #ifdef USE_GRAV_PAR |
362 | | std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx); |
363 | | #endif |
364 | | auto tmp1 = m0 * ele.four * d0; |
365 | | auto tmp2 = m1 * ele.four * d1; |
366 | | expansion& Liii0 = L[iii0]; |
367 | | expansion& Liii1 = L[iii1]; |
368 | | #pragma GCC ivdep |
369 | | for(integer i = 0; i != 4; ++i) { |
370   | 0.0%
| Liii0[i] += tmp1[i]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 100.0%Thread Id | Fetch utilization |
---|
Thread Average | 100.0% | 23408 | 35.5% | 23409 | 42.6% | 23410 | 99.5% | 23411 | 8.5% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 12.0% | 23415 | 30.0% | 23416 | 36.1% | 23417 | 18.8% | 23418 | 11.3% | 23419 | 5.9% | 23420 | 41.5% | 23421 | 9.4% | 23422 | 35.1% | 23423 | 100.0% | 23424 | 12.0% | 23425 | 80.3% | 23426 | 100.0% | 23427 | 23.9% | 23428 | 88.6% | 23429 | 73.1% | 23430 | 44.3% | 23431 | 41.3% |
| 30.0%Thread Id | Write-back utilization |
---|
Thread Average | 30.0% | 23408 | 47.6% | 23409 | 80.2% | 23410 | 36.5% | 23411 | 33.8% | 23412 | 63.4% | 23413 | 37.4% | 23414 | 21.7% | 23415 | 40.9% | 23416 | 31.5% | 23417 | 36.5% | 23418 | 29.2% | 23419 | 24.9% | 23420 | 37.6% | 23421 | 20.4% | 23422 | 30.1% | 23423 | 31.6% | 23424 | 25.8% | 23425 | 28.1% | 23426 | 24.4% | 23427 | 34.3% | 23428 | 72.3% | 23429 | 37.7% | 23430 | 25.9% | 23431 | 27.7% |
| 0x9e74a4 | W | 
|
|
| | |
371 | | Liii1[i] += tmp2[i]; |
372 | | } |
373 | | } |
374 | | } |
375 | | PROF_END; |
376 | | } |
377 | | |
378 | | void grid::compute_boundary_interactions(gsolve_type type, const geo::direction& dir, bool is_monopole, const gravity_boundary_type& mpoles) { |
379 | | if (!is_leaf) { |
380 | | if (!is_monopole) { |
381 | | compute_boundary_interactions_multipole_multipole(type, ilist_n_bnd[dir], mpoles); |
382 | | } else { |
383 | | compute_boundary_interactions_monopole_multipole(type, ilist_d_bnd[dir], mpoles); |
384 | | } |
385 | | } else { |
386 | | if (!is_monopole) { |
387 | | compute_boundary_interactions_multipole_monopole(type, ilist_d_bnd[dir], mpoles); |
388 | | } else { |
389 | | compute_boundary_interactions_monopole_monopole(type, ilist_d_bnd[dir], mpoles); |
390 | | } |
391 | | } |
392 | | |
393 | | } |
394 | | |
395 | | void grid::compute_boundary_interactions_multipole_multipole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd, |
396 | | const gravity_boundary_type& mpoles) { |
397 | | PROF_BEGIN; |
398 | | auto& M = *M_ptr; |
399 | | auto& mon = *mon_ptr; |
400 | | |
401 | | std::vector<space_vector> const& com0 = *(com_ptr[0]); |
402 | | hpx::parallel::for_loop( |
403 | | for_loop_policy, 0, ilist_n_bnd.size(), |
404 | | [&mpoles, &com0, &ilist_n_bnd, type, this, &M](std::size_t si) { |
405 | | |
406 | | taylor<4, simd_vector> m0; |
407 | | taylor<4, simd_vector> n0; |
408 | | std::array<simd_vector, NDIM> dX; |
409 | | std::array<simd_vector, NDIM> X; |
410 | | space_vector Y; |
411 | | |
412 | | boundary_interaction_type const& bnd = ilist_n_bnd[si]; |
413 | | integer index = mpoles.is_local ? bnd.second : si; |
414 | | |
415 | | load_multipole(m0, Y, mpoles, index, false); |
416 | | |
417 | | std::array<simd_vector, NDIM> simdY = { |
418 | | simd_vector(Y[0]), simd_vector(Y[1]), simd_vector(Y[2]), |
419 | | }; |
420 | | |
421 | | const integer list_size = bnd.first.size(); |
422 | | for (integer li = 0; li < list_size; li += simd_len) { |
423 | | #pragma GCC ivdep |
424 | | for (integer i = 0; i != simd_len; ++i) { |
425 | | const auto index = std::min(integer(li + i), integer(list_size-1)); |
426 | | const integer iii0 = bnd.first[index]; |
427 | | space_vector const& com0iii0 = com0[iii0]; |
428 | | for (integer d = 0; d < NDIM; ++d) { |
429 | | X[d][i] = com0iii0[d]; |
430 | | } |
431 | | if (type == RHO) { |
432 | | multipole const& Miii0 = M[iii0]; |
433 | | real const tmp = m0()[i] / Miii0(); |
434 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
435 | | n0[j][i] = m0[j][i] - Miii0[j] * tmp; |
436 | | } |
437 | | } |
438 | | } |
439 | | if (type != RHO) { |
440 | | #pragma GCC ivdep |
441 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
442 | | n0[j] = ZERO; |
443 | | } |
444 | | } |
445 | | #pragma GCC ivdep |
446 | | for (integer d = 0; d < NDIM; ++d) { |
447 | | dX[d] = X[d] - simdY[d]; |
448 | | } |
449 | | |
450 | | taylor<5, simd_vector> D; |
451 | | taylor<4, simd_vector> A0; |
452 | | std::array<simd_vector, NDIM> B0 = { |
453 | | simd_vector(0.0), simd_vector(0.0), simd_vector(0.0) |
454 | | }; |
455 | | |
456 | | D.set_basis(dX); |
457 | | A0[0] = m0[0] * D[0]; |
458 | | if (type != RHO) { |
459 | | for (integer i = taylor_sizes[0]; i != taylor_sizes[1]; ++i) { |
460 | | A0[0] -= m0[i] * D[i]; |
461 | | } |
462 | | } |
463 | | for (integer i = taylor_sizes[1]; i != taylor_sizes[2]; ++i) { |
464 | | A0[0] += m0[i] * D[i] * (factor[i] / real(2)); |
465 | | } |
466 | | for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) { |
467 | | A0[0] -= m0[i] * D[i] * (factor[i] / real(6)); |
468 | | } |
469 | | |
470 | | for (integer a = 0; a < NDIM; ++a) { |
471 | | A0(a) = m0() * D(a); |
472 | | for (integer b = 0; b < NDIM; ++b) { |
473 | | if (type != RHO) { |
474 | | A0(a) -= m0(a) * D(a, b); |
475 | | } |
476 | | for (integer c = b; c < NDIM; ++c) { |
477 | | const auto tmp = D(a, b, c) * (factor(c, b) / real(2)); |
478 | | A0(a) += m0(c, b) * tmp; |
479 | | } |
480 | | } |
481 | | } |
482 | | |
483 | | if (type == RHO) { |
484 | | for (integer a = 0; a < NDIM; ++a) { |
485 | | for (integer b = 0; b < NDIM; ++b) { |
486 | | for (integer c = b; c < NDIM; ++c) { |
487 | | for (integer d = c; d < NDIM; ++d) { |
488 | | const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6)); |
489 | | B0[a] -= n0(b, c, d) * tmp; |
490 | | } |
491 | | } |
492 | | } |
493 | | } |
494 | | } |
495 | | |
496 | | for (integer a = 0; a < NDIM; ++a) { |
497 | | for (integer b = a; b < NDIM; ++b) { |
498 | | A0(a, b) = m0() * D(a, b); |
499 | | for (integer c = 0; c < NDIM; ++c) { |
500 | | A0(a, b) -= m0(c) * D(a, b, c); |
501 | | } |
502 | | } |
503 | | } |
504 | | for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) { |
505 | | A0[i] = m0[0] * D[i]; |
506 | | } |
507 | | #ifdef USE_GRAV_PAR |
508 | | std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx); |
509 | | #endif |
510 | | for (integer i = 0; i != simd_len && i + li < list_size; ++i) { |
511 | | const integer iii0 = bnd.first[li + i]; |
512 | | expansion& Liii0 = L[iii0]; |
513 | | |
514 | | #pragma GCC ivdep |
515 | | for (integer j = 0; j != 20; ++j) { |
516   | 3.2%
| Liii0[j] += A0[j][i]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 100.0%Thread Id | Fetch utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 0.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 0.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 1.9% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 0.4% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 0.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 100.0% |
| 0x9ddf50 | R | 
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 100.0%Thread Id | Fetch utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 0.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 0.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 1.9% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 0.4% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 0.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 100.0% |
| 0x9ddf5a | W | 
| 0.5%Thread Id | % of fetches |
---|
Thread Total | 0.5% | 23408 | 0.0% | 23410 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23421 | 0.0% | 23423 | 0.0% | 23424 | 0.2% | 23425 | 0.0% | 23426 | 0.2% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 33.3%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 33.3% | 0.0% | 0.0% | 33.3% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 100.0% | 0.0% | 0.0% | 100.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 100.0% | 0.0% | 0.0% | 100.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 33.3%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 33.3% | 0.0% | 0.0% | 33.3% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 100.0% | 0.0% | 0.0% | 100.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 100.0% | 0.0% | 0.0% | 100.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.1%Thread Id | Fetch utilization |
---|
Thread Average | 0.1% | 23408 | 100.0% | 23410 | 100.0% | 23412 | 0.0% | 23413 | 100.0% | 23414 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23421 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 0.0% |
| 18.7%Thread Id | Write-back utilization |
---|
Thread Average | 18.7% | 23408 | 100.0% | 23410 | 100.0% | 23412 | 0.0% | 23413 | 100.0% | 23414 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23421 | 100.0% | 23423 | 100.0% | 23424 | 12.5% | 23425 | 0.0% | 23426 | 12.5% | 23427 | 0.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 0.0% |
| 0x9ddf50 | R | | 2.8%Thread Id | % of fetches |
---|
Thread Total | 2.8% | 23408 | 0.0% | 23409 | 0.2% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.4% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.3% | 23425 | 0.1% | 23426 | 0.3% | 23427 | 0.2% | 23428 | 0.5% | 23429 | 0.2% | 23430 | 0.0% | 23431 | 0.5% |
| 1.8%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 1.8% | 0.0% | 0.9% | 0.9% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 3.4% | 0.0% | 3.4% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23412 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23413 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23418 | 0.3% | 0.0% | 0.3% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 6.5% | 0.0% | 6.5% | 0.0% | 0.0% | 23421 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23424 | 4.1% | 0.0% | 3.9% | 0.2% | 0.0% | 23425 | 1.0% | 0.0% | 1.0% | 0.0% | 0.0% | 23426 | 4.0% | 0.0% | 4.0% | 0.0% | 0.0% | 23427 | 3.8% | 0.0% | 0.0% | 3.8% | 0.0% | 23428 | 8.0% | 0.0% | 0.0% | 8.0% | 0.0% | 23429 | 4.5% | 0.0% | 0.1% | 4.3% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 6.7% | 0.0% | 0.0% | 6.7% | 0.0% |
| 1.8%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 1.8% | 0.0% | 0.9% | 0.9% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 3.4% | 0.0% | 3.4% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23412 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23413 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23418 | 0.3% | 0.0% | 0.3% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 6.5% | 0.0% | 6.5% | 0.0% | 0.0% | 23421 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23424 | 4.1% | 0.0% | 3.9% | 0.2% | 0.0% | 23425 | 1.0% | 0.0% | 1.0% | 0.0% | 0.0% | 23426 | 4.0% | 0.0% | 4.0% | 0.0% | 0.0% | 23427 | 3.8% | 0.0% | 0.0% | 3.8% | 0.0% | 23428 | 8.0% | 0.0% | 0.0% | 8.0% | 0.0% | 23429 | 4.5% | 0.0% | 0.1% | 4.3% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 6.7% | 0.0% | 0.0% | 6.7% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 89.2%Thread Id | Fetch utilization |
---|
Thread Average | 89.2% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 8.8% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 25.2% | 23425 | 100.0% | 23426 | 45.6% | 23427 | 40.4% | 23428 | 1.1% | 23429 | 42.1% | 23430 | 100.0% | 23431 | 28.5% |
| 91.6%Thread Id | Write-back utilization |
---|
Thread Average | 91.6% | 23408 | 100.0% | 23409 | 49.9% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 36.1% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 62.0% | 23425 | 100.0% | 23426 | 71.5% | 23427 | 65.7% | 23428 | 34.2% | 23429 | 26.6% | 23430 | 100.0% | 23431 | 38.1% |
| 0x9ddf50 | R |   
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 1.8%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 1.8% | 0.0% | 0.9% | 0.9% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 3.4% | 0.0% | 3.4% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23412 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23413 | 0.4% | 0.0% | 0.4% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23418 | 0.3% | 0.0% | 0.3% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 6.5% | 0.0% | 6.5% | 0.0% | 0.0% | 23421 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23424 | 4.1% | 0.0% | 3.9% | 0.2% | 0.0% | 23425 | 1.0% | 0.0% | 1.0% | 0.0% | 0.0% | 23426 | 4.0% | 0.0% | 4.0% | 0.0% | 0.0% | 23427 | 3.8% | 0.0% | 0.0% | 3.8% | 0.0% | 23428 | 8.0% | 0.0% | 0.0% | 8.0% | 0.0% | 23429 | 4.5% | 0.0% | 0.1% | 4.3% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 6.7% | 0.0% | 0.0% | 6.7% | 0.0% |
| 89.2%Thread Id | Fetch utilization |
---|
Thread Average | 89.2% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 8.8% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 25.2% | 23425 | 100.0% | 23426 | 45.6% | 23427 | 40.4% | 23428 | 1.1% | 23429 | 42.1% | 23430 | 100.0% | 23431 | 28.5% |
| 91.6%Thread Id | Write-back utilization |
---|
Thread Average | 91.6% | 23408 | 100.0% | 23409 | 49.9% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 36.1% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 62.0% | 23425 | 100.0% | 23426 | 71.5% | 23427 | 65.7% | 23428 | 34.2% | 23429 | 26.6% | 23430 | 100.0% | 23431 | 38.1% |
| 0x9ddf5a | W |   
|
|
|    | |
517 | | } |
518 | | |
519 | | if (type == RHO) { |
520 | | space_vector& L_ciii0 = L_c[iii0]; |
521 | | #pragma GCC ivdep |
522 | | for (integer j = 0; j != NDIM; ++j) { |
523 | | L_ciii0[j] += B0[j][i]; |
524 | | } |
525 | | } |
526 | | } |
527 | | } |
528 | | }); |
529 | | PROF_END; |
530 | | } |
531 | | |
532 | | void grid::compute_boundary_interactions_multipole_monopole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd, |
533 | | const gravity_boundary_type& mpoles) { |
534 | | PROF_BEGIN; |
535 | | auto& M = *M_ptr; |
536 | | auto& mon = *mon_ptr; |
537 | | |
538 | | std::vector<space_vector> const& com0 = *(com_ptr[0]); |
539 | | hpx::parallel::for_loop( |
540 | | for_loop_policy, 0, ilist_n_bnd.size(), |
541 | | [&mpoles, &com0, &ilist_n_bnd, type, this](std::size_t si) { |
542 | | |
543 | | taylor<4, simd_vector> m0; |
544 | | taylor<4, simd_vector> n0; |
545 | | std::array<simd_vector, NDIM> dX; |
546 | | std::array<simd_vector, NDIM> X; |
547 | | space_vector Y; |
548 | | |
549 | | boundary_interaction_type const& bnd = ilist_n_bnd[si]; |
550 | | const integer list_size = bnd.first.size(); |
551 | | integer index = mpoles.is_local ? bnd.second : si; |
552 | | load_multipole(m0, Y, mpoles, index, false); |
553 | | |
554 | | std::array<simd_vector, NDIM> simdY = { |
555 | | simd_vector(Y[0]), simd_vector(Y[1]), simd_vector(Y[2]), |
556 | | }; |
557 | | |
558 | | if (type == RHO) { |
559 | | #pragma GCC ivdep |
560 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
561 | | n0[j] = m0[j]; |
562 | | } |
563 | | } |
564 | | else { |
565 | | #pragma GCC ivdep |
566 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
567 | | n0[j] = ZERO; |
568 | | } |
569 | | } |
570 | | for (integer li = 0; li < list_size; li += simd_len) { |
571 | | #pragma GCC ivdep |
572 | | for (integer i = 0; i != simd_len; ++i) { |
573 | | const auto index = std::min(integer(li + i), integer(list_size-1)); |
574 | | const integer iii0 = bnd.first[index]; |
575 | | space_vector const& com0iii0 = com0[iii0]; |
576 | | for (integer d = 0; d < NDIM; ++d) { |
577 | | X[d][i] = com0iii0[d]; |
578 | | } |
579 | | } |
580 | | #pragma GCC ivdep |
581 | | for (integer d = 0; d < NDIM; ++d) { |
582 | | dX[d] = X[d] - simdY[d]; |
583 | | } |
584 | | |
585 | | taylor<5, simd_vector> D; |
586 | | taylor<2, simd_vector> A0; |
587 | | std::array<simd_vector, NDIM> B0 = { |
588 | | simd_vector(0.0), simd_vector(0.0), simd_vector(0.0) |
589 | | }; |
590 | | |
591 | | D.set_basis(dX); |
592 | | A0[0] = m0[0] * D[0]; |
593 | | if (type != RHO) { |
594 | | for (integer i = taylor_sizes[0]; i != taylor_sizes[1]; ++i) { |
595 | | A0[0] -= m0[i] * D[i]; |
596 | | } |
597 | | } |
598 | | for (integer i = taylor_sizes[1]; i != taylor_sizes[2]; ++i) { |
599 | | A0[0] += m0[i] * D[i] * (factor[i] / real(2)); |
600 | | } |
601 | | for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) { |
602 | | A0[0] -= m0[i] * D[i] * (factor[i] / real(6)); |
603 | | } |
604 | | |
605 | | for (integer a = 0; a < NDIM; ++a) { |
606 | | A0(a) = m0() * D(a); |
607 | | for (integer b = 0; b < NDIM; ++b) { |
608 | | if (type != RHO) { |
609 | | A0(a) -= m0(a) * D(a, b); |
610 | | } |
611 | | for (integer c = b; c < NDIM; ++c) { |
612 | | A0(a) += m0(c, b) * D(a, b, c) * (factor(b, c) / real(2)); |
613 | | } |
614 | | } |
615 | | } |
616 | | |
617 | | if (type == RHO) { |
618 | | for (integer a = 0; a < NDIM; ++a) { |
619 | | for (integer b = 0; b < NDIM; ++b) { |
620 | | for (integer c = b; c < NDIM; ++c) { |
621 | | for (integer d = c; d < NDIM; ++d) { |
622 | | const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6)); |
623 | | B0[a] -= n0(b, c, d) * tmp; |
624 | | } |
625 | | } |
626 | | } |
627 | | } |
628 | | } |
629 | | #ifdef USE_GRAV_PAR |
630 | | std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx); |
631 | | #endif |
632 | | for (integer i = 0; i != simd_len && i + li < list_size; ++i) { |
633 | | const integer iii0 = bnd.first[li + i]; |
634 | | expansion& Liii0 = L[iii0]; |
635 | | #pragma GCC ivdep |
636 | | for (integer j = 0; j != 4; ++j) { |
637 | | Liii0[j] += A0[j][i]; |
638 | | } |
639 | | if (type == RHO) { |
640 | | space_vector& L_ciii0 = L_c[iii0]; |
641 | | #pragma GCC ivdep |
642 | | for (integer j = 0; j != NDIM; ++j) { |
643 | | L_ciii0[j] += B0[j][i]; |
644 | | } |
645 | | } |
646 | | } |
647 | | } |
648 | | }); |
649 | | PROF_END; |
650 | | } |
651 | | |
652 | | void grid::compute_boundary_interactions_monopole_multipole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd, |
653 | | const gravity_boundary_type& mpoles) { |
654 | | PROF_BEGIN; |
655 | | auto& M = *M_ptr; |
656 | | auto& mon = *mon_ptr; |
657 | | |
658 | | std::array<real, NDIM> Xbase = { |
659 | | X[0][hindex(H_BW, H_BW, H_BW)], |
660 | | X[1][hindex(H_BW, H_BW, H_BW)], |
661 | | X[2][hindex(H_BW, H_BW, H_BW)] |
662 | | }; |
663 | | |
664 | | std::vector<space_vector> const& com0 = *(com_ptr[0]); |
665 | | hpx::parallel::for_loop( |
666 | | for_loop_policy, 0, ilist_n_bnd.size(), |
667 | | [&mpoles, &Xbase, &com0, &ilist_n_bnd, type, this, &M](std::size_t si) { |
668 | | |
669 | | simd_vector m0; |
670 | | taylor<4, simd_vector> n0; |
671 | | std::array<simd_vector, NDIM> dX; |
672 | | std::array<simd_vector, NDIM> X; |
673 | | std::array<simd_vector, NDIM> Y; |
674 | | |
675 | | boundary_interaction_type const& bnd = ilist_n_bnd[si]; |
676 | | integer index = mpoles.is_local ? bnd.second : si; |
677 | | const integer list_size = bnd.first.size(); |
678 | | |
679 | | #pragma GCC ivdep |
680 | | for (integer d = 0; d != NDIM; ++d) { |
681 | | Y[d] = bnd.x[d] * dx + Xbase[d]; |
682 | | } |
683 | | |
684 | | m0 = (*(mpoles.m))[index]; |
685 | | for (integer li = 0; li < list_size; li += simd_len) { |
686 | | for (integer i = 0; i != simd_len && li + i < list_size; ++i) { |
687 | | const integer iii0 = bnd.first[li + i]; |
688 | | space_vector const& com0iii0 = com0[iii0]; |
689 | | #pragma GCC ivdep |
690 | | for (integer d = 0; d < NDIM; ++d) { |
691 | | X[d][i] = com0iii0[d]; |
692 | | } |
693 | | if (type == RHO) { |
694 | | multipole const& Miii0 = M[iii0]; |
695 | | real const tmp = m0[i] / Miii0(); |
696 | | #pragma GCC ivdep |
697 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
698 | | n0[j][i] = -Miii0[j] * tmp; |
699 | | } |
700 | | } |
701 | | } |
702 | | |
703 | | if (type != RHO) { |
704 | | #pragma GCC ivdep |
705 | | for (integer j = taylor_sizes[2]; j != taylor_sizes[3]; ++j) { |
706 | | n0[j] = ZERO; |
707 | | } |
708 | | } |
709 | | #pragma GCC ivdep |
710 | | for (integer d = 0; d < NDIM; ++d) { |
711 | | dX[d] = X[d] - Y[d]; |
712 | | } |
713 | | |
714 | | taylor<5, simd_vector> D; |
715 | | taylor<4, simd_vector> A0; |
716 | | std::array<simd_vector, NDIM> B0 = { |
717 | | simd_vector(0.0), simd_vector(0.0), simd_vector(0.0) |
718 | | }; |
719 | | |
720 | | D.set_basis(dX); |
721 | | |
722 | | A0[0] = m0 * D[0]; |
723 | | for (integer i = taylor_sizes[0]; i != taylor_sizes[2]; ++i) { |
724 | | A0[i] = m0 * D[i]; |
725 | | } |
726 | | if (type == RHO) { |
727 | | for (integer i = taylor_sizes[2]; i != taylor_sizes[3]; ++i) { |
728 | | A0[i] = m0 * D[i]; |
729 | | } |
730 | | } |
731 | | |
732 | | if (type == RHO) { |
733 | | for (integer a = 0; a != NDIM; ++a) { |
734 | | for (integer b = 0; b != NDIM; ++b) { |
735 | | for (integer c = b; c != NDIM; ++c) { |
736 | | for (integer d = c; d != NDIM; ++d) { |
737 | | const auto tmp = D(a, b, c, d) * (factor(b, c, d) / real(6)); |
738 | | B0[a] -= n0(b, c, d) * tmp; |
739 | | } |
740 | | } |
741 | | } |
742 | | } |
743 | | } |
744 | | #ifdef USE_GRAV_PAR |
745 | | std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx); |
746 | | #endif |
747 | | for (integer i = 0; i != simd_len && i + li < list_size; ++i) { |
748 | | const integer iii0 = bnd.first[li + i]; |
749 | | expansion& Liii0 = L[iii0]; |
750 | | |
751 | | #pragma GCC ivdep |
752 | | for (integer j = 0; j != 20; ++j) { |
753   | 2.2%
| Liii0[j] += A0[j][i]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 1.9%Thread Id | % of fetches |
---|
Thread Total | 1.9% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.2% | 23413 | 0.2% | 23414 | 0.2% | 23415 | 0.2% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.2% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.2% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.2% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.2% |
| 3.9%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 3.9% | 0.0% | 1.0% | 2.9% | 0.0% | 23408 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 8.8% | 0.0% | 8.8% | 0.0% | 0.0% | 23413 | 25.0% | 0.0% | 0.0% | 25.0% | 0.0% | 23414 | 10.0% | 0.0% | 0.0% | 10.0% | 0.0% | 23415 | 14.3% | 0.0% | 0.0% | 14.3% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 9.1% | 0.0% | 0.0% | 9.1% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.3% | 0.0% | 0.3% | 0.0% | 0.0% | 23422 | 10.2% | 0.0% | 0.2% | 10.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23426 | 8.3% | 0.0% | 8.3% | 0.0% | 0.0% | 23427 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 12.6% | 0.0% | 0.1% | 12.5% | 0.0% |
| 3.9%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 3.9% | 0.0% | 1.0% | 2.9% | 0.0% | 23408 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 8.8% | 0.0% | 8.8% | 0.0% | 0.0% | 23413 | 25.0% | 0.0% | 0.0% | 25.0% | 0.0% | 23414 | 10.0% | 0.0% | 0.0% | 10.0% | 0.0% | 23415 | 14.3% | 0.0% | 0.0% | 14.3% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 9.1% | 0.0% | 0.0% | 9.1% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.3% | 0.0% | 0.3% | 0.0% | 0.0% | 23422 | 10.2% | 0.0% | 0.2% | 10.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23426 | 8.3% | 0.0% | 8.3% | 0.0% | 0.0% | 23427 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 12.6% | 0.0% | 0.1% | 12.5% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 47.8%Thread Id | Fetch utilization |
---|
Thread Average | 47.8% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 26.8% | 23413 | 13.0% | 23414 | 0.1% | 23415 | 25.8% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 26.4% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 1.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 13.6% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 38.1% |
| 50.2%Thread Id | Write-back utilization |
---|
Thread Average | 50.2% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 26.2% | 23413 | 25.0% | 23414 | 12.7% | 23415 | 12.7% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 36.5% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 90.5% | 23422 | 38.2% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 50.8% | 23427 | 13.2% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 13.1% |
| 0x9dcf70 | R |   
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 3.9%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 3.9% | 0.0% | 1.0% | 2.9% | 0.0% | 23408 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 8.8% | 0.0% | 8.8% | 0.0% | 0.0% | 23413 | 25.0% | 0.0% | 0.0% | 25.0% | 0.0% | 23414 | 10.0% | 0.0% | 0.0% | 10.0% | 0.0% | 23415 | 14.3% | 0.0% | 0.0% | 14.3% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 9.1% | 0.0% | 0.0% | 9.1% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.3% | 0.0% | 0.3% | 0.0% | 0.0% | 23422 | 10.2% | 0.0% | 0.2% | 10.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23426 | 8.3% | 0.0% | 8.3% | 0.0% | 0.0% | 23427 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 12.6% | 0.0% | 0.1% | 12.5% | 0.0% |
| 47.8%Thread Id | Fetch utilization |
---|
Thread Average | 47.8% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 26.8% | 23413 | 13.0% | 23414 | 0.1% | 23415 | 25.8% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 26.4% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 1.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 13.6% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 38.1% |
| 50.2%Thread Id | Write-back utilization |
---|
Thread Average | 50.2% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 26.2% | 23413 | 25.0% | 23414 | 12.7% | 23415 | 12.7% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 36.5% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 90.5% | 23422 | 38.2% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 50.8% | 23427 | 13.2% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 13.1% |
| 0x9dcf7a | W |   
| 0.2%Thread Id | % of fetches |
---|
Thread Total | 0.2% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.2% | 23431 | 0.0% |
| 12.5%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 12.5% | 0.0% | 0.0% | 12.5% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 50.0% | 0.0% | 0.0% | 50.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 12.5%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 12.5% | 0.0% | 0.0% | 12.5% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 50.0% | 0.0% | 0.0% | 50.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 25.5%Thread Id | Fetch utilization |
---|
Thread Average | 25.5% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.5% | 23431 | 0.0% |
| 25.0%Thread Id | Write-back utilization |
---|
Thread Average | 25.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 0.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 12.5% | 23431 | 0.0% |
| 0x9dcf70 | R | 
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 12.5%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 12.5% | 0.0% | 0.0% | 12.5% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 50.0% | 0.0% | 0.0% | 50.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 25.5%Thread Id | Fetch utilization |
---|
Thread Average | 25.5% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 0.5% | 23431 | 0.0% |
| 25.0%Thread Id | Write-back utilization |
---|
Thread Average | 25.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 0.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 0.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 0.0% | 23430 | 12.5% | 23431 | 0.0% |
| 0x9dcf7a | W | 
|
|
|    | |
754 | | } |
755 | | |
756 | | if (type == RHO) { |
757 | | space_vector& L_ciii0 = L_c[iii0]; |
758 | | #pragma GCC ivdep |
759 | | for (integer j = 0; j != NDIM; ++j) { |
760 | | L_ciii0[j] += B0[j][i]; |
761 | | } |
762 | | } |
763 | | } |
764 | | } |
765 | | }); |
766 | | PROF_END; |
767 | | } |
768 | | |
769 | | void grid::compute_boundary_interactions_monopole_monopole(gsolve_type type, const std::vector<boundary_interaction_type>& ilist_n_bnd, |
770 | | const gravity_boundary_type& mpoles) { |
771 | | PROF_BEGIN; |
772 | | auto& M = *M_ptr; |
773 | | auto& mon = *mon_ptr; |
774 | | |
775 | | #if !defined(HPX_HAVE_DATAPAR) |
776 | | const v4sd d0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) }; |
777 | | #else |
778 | | const std::array<double, 4> di0 = { 1.0 / dx, +1.0 / sqr(dx), +1.0 / sqr(dx), +1.0 / sqr(dx) }; |
779 | | const v4sd d0(di0.data()); |
780 | | #endif |
781 | | hpx::parallel::for_loop( |
782 | | for_loop_policy, 0, ilist_n_bnd.size(), |
783 | | [&mpoles, &ilist_n_bnd, &d0, this](std::size_t si) { |
784 | | |
785 | | boundary_interaction_type const& bnd = ilist_n_bnd[si]; |
786 | | const integer dsize = bnd.first.size(); |
787 | | const integer lev = 0; |
788 | | integer index = mpoles.is_local ? bnd.second : si; |
789 | | #if !defined(HPX_HAVE_DATAPAR) |
790 | | const auto tmp = (*(mpoles).m)[index]; |
791 | | v4sd m0; |
792 | | #pragma GCC ivdep |
793 | | for (integer i = 0; i != 4; ++i) { |
794 | | m0[i] = tmp; |
795 | | } |
796 | | #else |
797 | | v4sd m0 = (*(mpoles).m)[index]; |
798 | | #endif |
799 | | m0 *= d0; |
800 | | #ifdef USE_GRAV_PAR |
801 | | std::lock_guard<hpx::lcos::local::spinlock> lock(*L_mtx); |
802 | | #endif |
803 | | for (integer li = 0; li < dsize; ++li) { |
804 | | const integer iii0 = bnd.first[li]; |
805   | 16.1%
| auto tmp1 = m0 * bnd.four[li]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 16.1%Thread Id | % of fetches |
---|
Thread Total | 16.1% | 23408 | 0.6% | 23409 | 0.2% | 23410 | 0.6% | 23411 | 0.7% | 23412 | 0.5% | 23413 | 1.0% | 23414 | 1.0% | 23415 | 0.2% | 23416 | 0.8% | 23417 | 0.2% | 23418 | 0.7% | 23419 | 1.4% | 23420 | 0.7% | 23421 | 1.1% | 23422 | 0.4% | 23423 | 0.3% | 23424 | 0.4% | 23425 | 0.6% | 23426 | 1.4% | 23427 | 0.6% | 23428 | 1.1% | 23429 | 0.5% | 23430 | 0.3% | 23431 | 0.8% |
| 24.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 24.0% | 0.0% | 24.0% | 0.0% | 0.0% | 23408 | 20.2% | 0.0% | 20.2% | 0.0% | 0.0% | 23409 | 10.2% | 0.0% | 10.2% | 0.0% | 0.0% | 23410 | 24.3% | 0.0% | 24.3% | 0.0% | 0.0% | 23411 | 22.1% | 0.0% | 22.1% | 0.0% | 0.0% | 23412 | 20.0% | 0.0% | 20.0% | 0.0% | 0.0% | 23413 | 27.3% | 0.0% | 27.3% | 0.0% | 0.0% | 23414 | 26.9% | 0.0% | 26.9% | 0.0% | 0.0% | 23415 | 11.5% | 0.0% | 11.5% | 0.0% | 0.0% | 23416 | 27.4% | 0.0% | 27.4% | 0.0% | 0.0% | 23417 | 10.9% | 0.0% | 10.9% | 0.0% | 0.0% | 23418 | 31.0% | 0.0% | 31.0% | 0.0% | 0.0% | 23419 | 34.8% | 0.0% | 34.8% | 0.0% | 0.0% | 23420 | 21.7% | 0.0% | 21.7% | 0.0% | 0.0% | 23421 | 34.2% | 0.0% | 34.2% | 0.0% | 0.0% | 23422 | 21.9% | 0.0% | 21.9% | 0.0% | 0.0% | 23423 | 13.6% | 0.0% | 13.6% | 0.0% | 0.0% | 23424 | 18.4% | 0.0% | 18.4% | 0.0% | 0.0% | 23425 | 36.0% | 0.0% | 36.0% | 0.0% | 0.0% | 23426 | 34.3% | 0.0% | 34.3% | 0.0% | 0.0% | 23427 | 19.5% | 0.0% | 19.5% | 0.0% | 0.0% | 23428 | 42.7% | 0.0% | 42.7% | 0.0% | 0.0% | 23429 | 23.0% | 0.0% | 23.0% | 0.0% | 0.0% | 23430 | 5.5% | 0.0% | 5.5% | 0.0% | 0.0% | 23431 | 31.9% | 0.0% | 31.9% | 0.0% | 0.0% |
| 24.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 24.0% | 0.0% | 24.0% | 0.0% | 0.0% | 23408 | 20.2% | 0.0% | 20.2% | 0.0% | 0.0% | 23409 | 10.2% | 0.0% | 10.2% | 0.0% | 0.0% | 23410 | 24.3% | 0.0% | 24.3% | 0.0% | 0.0% | 23411 | 22.1% | 0.0% | 22.1% | 0.0% | 0.0% | 23412 | 20.0% | 0.0% | 20.0% | 0.0% | 0.0% | 23413 | 27.3% | 0.0% | 27.3% | 0.0% | 0.0% | 23414 | 26.9% | 0.0% | 26.9% | 0.0% | 0.0% | 23415 | 11.5% | 0.0% | 11.5% | 0.0% | 0.0% | 23416 | 27.4% | 0.0% | 27.4% | 0.0% | 0.0% | 23417 | 10.9% | 0.0% | 10.9% | 0.0% | 0.0% | 23418 | 31.0% | 0.0% | 31.0% | 0.0% | 0.0% | 23419 | 34.8% | 0.0% | 34.8% | 0.0% | 0.0% | 23420 | 21.7% | 0.0% | 21.7% | 0.0% | 0.0% | 23421 | 34.2% | 0.0% | 34.2% | 0.0% | 0.0% | 23422 | 21.9% | 0.0% | 21.9% | 0.0% | 0.0% | 23423 | 13.6% | 0.0% | 13.6% | 0.0% | 0.0% | 23424 | 18.4% | 0.0% | 18.4% | 0.0% | 0.0% | 23425 | 36.0% | 0.0% | 36.0% | 0.0% | 0.0% | 23426 | 34.3% | 0.0% | 34.3% | 0.0% | 0.0% | 23427 | 19.5% | 0.0% | 19.5% | 0.0% | 0.0% | 23428 | 42.7% | 0.0% | 42.7% | 0.0% | 0.0% | 23429 | 23.0% | 0.0% | 23.0% | 0.0% | 0.0% | 23430 | 5.5% | 0.0% | 5.5% | 0.0% | 0.0% | 23431 | 31.9% | 0.0% | 31.9% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 66.1%Thread Id | Fetch utilization |
---|
Thread Average | 66.1% | 23408 | 62.4% | 23409 | 89.5% | 23410 | 57.1% | 23411 | 81.8% | 23412 | 84.1% | 23413 | 76.9% | 23414 | 51.0% | 23415 | 100.0% | 23416 | 60.2% | 23417 | 67.2% | 23418 | 62.1% | 23419 | 58.3% | 23420 | 65.3% | 23421 | 61.4% | 23422 | 100.0% | 23423 | 99.9% | 23424 | 70.2% | 23425 | 79.3% | 23426 | 54.8% | 23427 | 78.4% | 23428 | 59.3% | 23429 | 52.8% | 23430 | 64.6% | 23431 | 54.3% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9e409b | R |  
|
|
|  | |
806 | | expansion& Liii0 = L[iii0]; |
807 | | #pragma GCC ivdep |
808 | | for( integer i = 0; i != 4; ++i) { |
809   | 6.7%
| Liii0[i] += tmp1[i]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 6.4%Thread Id | % of fetches |
---|
Thread Total | 6.4% | 23408 | 0.2% | 23409 | 0.7% | 23410 | 0.0% | 23411 | 0.3% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.4% | 23415 | 0.6% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.7% | 23419 | 0.5% | 23420 | 0.0% | 23421 | 0.7% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.5% | 23426 | 0.2% | 23427 | 0.5% | 23428 | 0.5% | 23429 | 0.2% | 23430 | 0.0% | 23431 | 0.2% |
| 2.5%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.5% | 0.0% | 1.7% | 0.8% | 0.0% | 23408 | 2.0% | 0.0% | 1.8% | 0.2% | 0.0% | 23409 | 7.5% | 0.0% | 7.5% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 2.4% | 0.0% | 2.4% | 0.0% | 0.0% | 23412 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 3.9% | 0.0% | 3.9% | 0.0% | 0.0% | 23415 | 4.3% | 0.0% | 2.5% | 1.8% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 7.8% | 0.0% | 2.8% | 5.0% | 0.0% | 23419 | 3.5% | 0.0% | 1.8% | 1.8% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 7.5% | 0.0% | 4.1% | 3.4% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23424 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23425 | 4.6% | 0.0% | 4.4% | 0.2% | 0.0% | 23426 | 2.1% | 0.0% | 2.0% | 0.1% | 0.0% | 23427 | 6.3% | 0.0% | 3.2% | 3.1% | 0.0% | 23428 | 3.9% | 0.0% | 3.0% | 1.0% | 0.0% | 23429 | 3.1% | 0.0% | 0.0% | 3.1% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 2.5% | 0.0% | 2.2% | 0.2% | 0.0% |
| 2.5%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.5% | 0.0% | 1.7% | 0.8% | 0.0% | 23408 | 2.0% | 0.0% | 1.8% | 0.2% | 0.0% | 23409 | 7.5% | 0.0% | 7.5% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 2.4% | 0.0% | 2.4% | 0.0% | 0.0% | 23412 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 3.9% | 0.0% | 3.9% | 0.0% | 0.0% | 23415 | 4.3% | 0.0% | 2.5% | 1.8% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 7.8% | 0.0% | 2.8% | 5.0% | 0.0% | 23419 | 3.5% | 0.0% | 1.8% | 1.8% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 7.5% | 0.0% | 4.1% | 3.4% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23424 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23425 | 4.6% | 0.0% | 4.4% | 0.2% | 0.0% | 23426 | 2.1% | 0.0% | 2.0% | 0.1% | 0.0% | 23427 | 6.3% | 0.0% | 3.2% | 3.1% | 0.0% | 23428 | 3.9% | 0.0% | 3.0% | 1.0% | 0.0% | 23429 | 3.1% | 0.0% | 0.0% | 3.1% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 2.5% | 0.0% | 2.2% | 0.2% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 56.3%Thread Id | Fetch utilization |
---|
Thread Average | 56.3% | 23408 | 25.1% | 23409 | 16.5% | 23410 | 100.0% | 23411 | 42.2% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 27.9% | 23415 | 32.3% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 22.7% | 23419 | 13.1% | 23420 | 100.0% | 23421 | 21.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 37.8% | 23426 | 28.3% | 23427 | 19.1% | 23428 | 37.1% | 23429 | 58.4% | 23430 | 100.0% | 23431 | 100.0% |
| 46.9%Thread Id | Write-back utilization |
---|
Thread Average | 46.9% | 23408 | 73.7% | 23409 | 32.4% | 23410 | 100.0% | 23411 | 34.6% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 67.1% | 23415 | 38.5% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 12.7% | 23419 | 16.2% | 23420 | 100.0% | 23421 | 29.3% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 19.6% | 23426 | 17.6% | 23427 | 12.7% | 23428 | 35.3% | 23429 | 37.7% | 23430 | 100.0% | 23431 | 62.3% |
| 0x9e40b1 | R |    
| 0.3%Thread Id | % of fetches |
---|
Thread Total | 0.3% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.2% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.1%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.1% | 0.1% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 2.0% | 2.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.1%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.1% | 0.1% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 2.0% | 2.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 9.7%Thread Id | Fetch utilization |
---|
Thread Average | 9.7% | 23408 | 29.1% | 23409 | 100.0% | 23410 | 66.3% | 23411 | 31.5% | 23412 | 100.0% | 23413 | 18.8% | 23414 | 34.1% | 23415 | 0.2% | 23416 | 47.9% | 23417 | 68.0% | 23418 | 23.5% | 23419 | 23.7% | 23420 | 40.0% | 23421 | 41.8% | 23422 | 32.7% | 23423 | 56.3% | 23424 | 42.2% | 23425 | 29.0% | 23426 | 55.1% | 23427 | 56.9% | 23428 | 37.7% | 23429 | 31.9% | 23430 | 32.6% | 23431 | 53.1% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 100.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 100.0% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 100.0% | 23426 | 100.0% | 23427 | 100.0% | 23428 | 100.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9e40b6 | R | | 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 2.5%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 2.5% | 0.0% | 1.7% | 0.8% | 0.0% | 23408 | 2.0% | 0.0% | 1.8% | 0.2% | 0.0% | 23409 | 7.5% | 0.0% | 7.5% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 2.4% | 0.0% | 2.4% | 0.0% | 0.0% | 23412 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 3.9% | 0.0% | 3.9% | 0.0% | 0.0% | 23415 | 4.3% | 0.0% | 2.5% | 1.8% | 0.0% | 23416 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 7.8% | 0.0% | 2.8% | 5.0% | 0.0% | 23419 | 3.5% | 0.0% | 1.8% | 1.8% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 7.5% | 0.0% | 4.1% | 3.4% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23424 | 0.2% | 0.0% | 0.2% | 0.0% | 0.0% | 23425 | 4.6% | 0.0% | 4.4% | 0.2% | 0.0% | 23426 | 2.1% | 0.0% | 2.0% | 0.1% | 0.0% | 23427 | 6.3% | 0.0% | 3.2% | 3.1% | 0.0% | 23428 | 3.9% | 0.0% | 3.0% | 1.0% | 0.0% | 23429 | 3.2% | 0.0% | 0.0% | 3.1% | 0.0% | 23430 | 0.1% | 0.0% | 0.1% | 0.0% | 0.0% | 23431 | 2.5% | 0.0% | 2.2% | 0.2% | 0.0% |
| 56.3%Thread Id | Fetch utilization |
---|
Thread Average | 56.3% | 23408 | 25.1% | 23409 | 16.5% | 23410 | 100.0% | 23411 | 42.2% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 27.9% | 23415 | 32.3% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 22.7% | 23419 | 13.1% | 23420 | 100.0% | 23421 | 21.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 37.8% | 23426 | 28.3% | 23427 | 19.1% | 23428 | 37.1% | 23429 | 58.4% | 23430 | 100.0% | 23431 | 100.0% |
| 46.9%Thread Id | Write-back utilization |
---|
Thread Average | 46.9% | 23408 | 73.7% | 23409 | 32.4% | 23410 | 100.0% | 23411 | 34.6% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 67.1% | 23415 | 38.5% | 23416 | 100.0% | 23417 | 100.0% | 23418 | 12.7% | 23419 | 16.2% | 23420 | 100.0% | 23421 | 29.3% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 100.0% | 23425 | 19.6% | 23426 | 17.6% | 23427 | 12.7% | 23428 | 35.3% | 23429 | 37.7% | 23430 | 100.0% | 23431 | 62.3% |
| 0x9e40bc | W |    
|
|
|    | |
810 | | } |
811 | | |
812 | | } |
813 | | }); |
814 | | PROF_END; |
815 | | } |
816 | | |
817 | | void compute_ilist() { |
818 | | |
819 | | factor = 0.0; |
820 | | factor() += 1.0; |
821 | | for (integer a = 0; a < NDIM; ++a) { |
822 | | factor(a) += 1.0; |
823 | | for (integer b = 0; b < NDIM; ++b) { |
824 | | factor(a, b) += 1.0; |
825 | | for (integer c = 0; c < NDIM; ++c) { |
826 | | factor(a, b, c) += 1.0; |
827 | | } |
828 | | } |
829 | | } |
830 | | |
831 | | const integer inx = INX; |
832 | | const integer nx = inx; |
833 | | interaction_type np; |
834 | | interaction_type dp; |
835 | | std::vector<interaction_type> ilist_r0; |
836 | | std::vector<interaction_type> ilist_n0; |
837 | | std::vector<interaction_type> ilist_d0; |
838 | | std::array < std::vector<interaction_type>, geo::direction::count() > ilist_n0_bnd; |
839 | | std::array < std::vector<interaction_type>, geo::direction::count() > ilist_d0_bnd; |
840 | | const real theta0 = opts.theta; |
841 | | const auto theta = [](integer i0, integer j0, integer k0, integer i1, integer j1, integer k1) { |
842 | | real tmp = (sqr(i0-i1) + sqr(j0-j1) + sqr(k0-k1)); |
843 | | // protect against sqrt(0) |
844 | | if( tmp > 0.0 ) { |
845 | | return 1.0 / (std::sqrt(tmp)); |
846 | | } else { |
847 | | return 1.0e+10; |
848 | | } |
849 | | }; |
850 | | const auto interior = [](integer i, integer j, integer k) { |
851 | | bool rc = true; |
852 | | if( i < 0 || i >= G_NX ) { |
853 | | rc = false; |
854 | | } |
855 | | else if( j < 0 || j >= G_NX ) { |
856 | | rc = false; |
857 | | } |
858 | | else if( k < 0 || k >= G_NX ) { |
859 | | rc = false; |
860 | | } |
861 | | return rc; |
862 | | }; |
863 | | const auto neighbor_dir = [](integer i, integer j, integer k) { |
864 | | integer i0 = 0, j0 = 0, k0 = 0; |
865 | | if( i < 0) { |
866 | | i0 = -1; |
867 | | } else if( i >= G_NX ) { |
868 | | i0 = +1; |
869 | | } |
870 | | if( j < 0) { |
871 | | j0 = -1; |
872 | | } else if( j >= G_NX ) { |
873 | | j0 = +1; |
874 | | } |
875 | | if( k < 0) { |
876 | | k0 = -1; |
877 | | } else if( k >= G_NX ) { |
878 | | k0 = +1; |
879 | | } |
880 | | geo::direction d; |
881 | | d.set(i0,j0,k0); |
882 | | return d; |
883 | | }; |
884 | | integer max_d = 0; |
885 | | integer width = INX; |
886 | | for (integer i0 = 0; i0 != nx; ++i0) { |
887 | | for (integer j0 = 0; j0 != nx; ++j0) { |
888 | | for (integer k0 = 0; k0 != nx; ++k0) { |
889 | | integer this_d = 0; |
890 | | integer ilb = std::min(i0 - width, integer(0)); |
891 | | integer jlb = std::min(j0 - width, integer(0)); |
892 | | integer klb = std::min(k0 - width, integer(0)); |
893 | | integer iub = std::max(i0 + width + 1, G_NX); |
894 | | integer jub = std::max(j0 + width + 1, G_NX); |
895 | | integer kub = std::max(k0 + width + 1, G_NX); |
896 | | for (integer i1 = ilb; i1 < iub; ++i1) { |
897 | | for (integer j1 = jlb; j1 < jub; ++j1) { |
898 | | for (integer k1 = klb; k1 < kub; ++k1) { |
899 | | const real x = i0 - i1; |
900 | | const real y = j0 - j1; |
901 | | const real z = k0 - k1; |
902 | | // protect against sqrt(0) |
903 | | const real tmp = sqr(x) + sqr(y) + sqr(z); |
904 | | const real r = (tmp == 0) ? 0 : std::sqrt(tmp); |
905 | | const real r3 = r * r * r; |
906 | | v4sd four; |
907 | | if (r > 0.0) { |
908 | | four[0] = -1.0 / r; |
909 | | four[1] = x / r3; |
910 | | four[2] = y / r3; |
911 | | four[3] = z / r3; |
912 | | } else { |
913 | | for (integer i = 0; i != 4; ++i) { |
914 | | four[i] = 0.0; |
915 | | } |
916 | | } |
917 | | if (i0 == i1 && j0 == j1 && k0 == k1) { |
918 | | continue; |
919 | | } |
920 | | const integer i0_c = (i0 + INX) / 2 - INX / 2; |
921 | | const integer j0_c = (j0 + INX) / 2 - INX / 2; |
922 | | const integer k0_c = (k0 + INX) / 2 - INX / 2; |
923 | | const integer i1_c = (i1 + INX) / 2 - INX / 2; |
924 | | const integer j1_c = (j1 + INX) / 2 - INX / 2; |
925 | | const integer k1_c = (k1 + INX) / 2 - INX / 2; |
926 | | const real theta_f = theta(i0, j0, k0, i1, j1, k1); |
927 | | const real theta_c = theta(i0_c, j0_c, k0_c, i1_c, j1_c, k1_c); |
928 | | const integer iii0 = gindex(i0, j0, k0); |
929 | | const integer iii1n = gindex((i1+INX)%INX, (j1+INX)%INX, (k1+INX)%INX); |
930 | | const integer iii1 = gindex(i1, j1, k1); |
931 | | if (theta_c > theta0 && theta_f <= theta0) { |
932 | | np.first = iii0; |
933 | | np.second = iii1n; |
934 | | np.four = four; |
935 | | np.x[XDIM] = i1; |
936 | | np.x[YDIM] = j1; |
937 | | np.x[ZDIM] = k1; |
938 | | if (interior(i1, j1, k1) && interior(i0, j0, k0)) { |
939 | | if (iii1 > iii0) { |
940 | | ilist_n0.push_back(np); |
941 | | } |
942 | | } else if (interior(i0, j0, k0)) { |
943 | | ilist_n0_bnd[neighbor_dir(i1, j1, k1)].push_back(np); |
944 | | } |
945 | | } |
946 | | if (theta_c > theta0) { |
947 | | ++this_d; |
948 | | dp.first = iii0; |
949 | | dp.second = iii1n; |
950 | | dp.x[XDIM] = i1; |
951 | | dp.x[YDIM] = j1; |
952 | | dp.x[ZDIM] = k1; |
953 | | dp.four = four; |
954 | | if (interior(i1, j1, k1) && interior(i0, j0, k0)) { |
955 | | if (iii1 > iii0) { |
956 | | ilist_d0.push_back(dp); |
957 | | } |
958 | | } else if (interior(i0, j0, k0)) { |
959 | | ilist_d0_bnd[neighbor_dir(i1, j1, k1)].push_back(dp); |
960 | | } |
961 | | } |
962 | | if (theta_f <= theta0) { |
963 | | np.first = iii0; |
964 | | np.second = iii1n; |
965 | | np.x[XDIM] = i1; |
966 | | np.x[YDIM] = j1; |
967 | | np.x[ZDIM] = k1; |
968 | | np.four = four; |
969 | | if (interior(i1, j1, k1) && interior(i0, j0, k0)) { |
970 | | if (iii1 > iii0) { |
971 | | ilist_r0.push_back(np); |
972 | | } |
973 | | } |
974 | | } |
975 | | } |
976 | | } |
977 | | } |
978 | | max_d = std::max(max_d, this_d); |
979 | | } |
980 | | } |
981 | | } |
982 | | printf("# direct = %i\n", int(max_d)); |
983 | | ilist_n = std::vector < interaction_type > (ilist_n0.begin(), ilist_n0.end()); |
984 | | ilist_d = std::vector < interaction_type > (ilist_d0.begin(), ilist_d0.end()); |
985 | | ilist_r = std::vector < interaction_type > (ilist_r0.begin(), ilist_r0.end()); |
986 | | for (auto& dir : geo::direction::full_set()) { |
987 | | auto& d = ilist_d_bnd[dir]; |
988 | | auto& d0 = ilist_d0_bnd[dir]; |
989 | | auto& n = ilist_n_bnd[dir]; |
990 | | auto& n0 = ilist_n0_bnd[dir]; |
991 | | for (auto i0 : d0) { |
992 | | bool found = false; |
993 | | for (auto& i : d) { |
994 | | if (i.second == i0.second) { |
995 | | i.first.push_back(i0.first); |
996 | | i.four.push_back(i0.four); |
997 | | found = true; |
998 | | break; |
999 | | } |
1000 | | } |
1001 | | if (!found) { |
1002 | | boundary_interaction_type i; |
1003 | | i.second = i0.second; |
1004 | | i.x = i0.x; |
1005 | | n.push_back(i); |
1006 | | i.first.push_back(i0.first); |
1007 | | i.four.push_back(i0.four); |
1008 | | d.push_back(i); |
1009 | | } |
1010 | | } |
1011 | | for (auto i0 : n0) { |
1012 | | bool found = false; |
1013 | | for (auto& i : n) { |
1014 | | if (i.second == i0.second) { |
1015 | | i.first.push_back(i0.first); |
1016 | | i.four.push_back(i0.four); |
1017 | | found = true; |
1018 | | break; |
1019 | | } |
1020 | | } |
1021 | | assert(found); |
1022 | | } |
1023 | | } |
1024 | | } |
1025 | | |
1026 | | expansion_pass_type grid::compute_expansions(gsolve_type type, const expansion_pass_type* parent_expansions) { |
1027 | | PROF_BEGIN; |
1028 | | expansion_pass_type exp_ret; |
1029 | | if (!is_leaf) { |
1030 | | exp_ret.first.resize(INX * INX * INX); |
1031 | | if (type == RHO) { |
1032 | | exp_ret.second.resize(INX * INX * INX); |
1033 | | } |
1034 | | } |
1035 | | const integer inx = INX; |
1036 | | const integer nxp = (inx / 2); |
1037 | | auto child_index = [=](integer ip, integer jp, integer kp, integer ci, integer bw=0) -> integer { |
1038 | | const integer ic = (2 * (ip )+bw) + ((ci >> 0) & 1); |
1039 | | const integer jc = (2 * (jp )+bw) + ((ci >> 1) & 1); |
1040 | | const integer kc = (2 * (kp )+bw) + ((ci >> 2) & 1); |
1041 | | return (inx+2*bw) * (inx+2*bw) * ic + (inx+2*bw) * jc + kc; |
1042 | | }; |
1043 | | |
1044 | | for (integer ip = 0; ip != nxp; ++ip) { |
1045 | | for (integer jp = 0; jp != nxp; ++jp) { |
1046 | | for (integer kp = 0; kp != nxp; ++kp) { |
1047 | | const integer iiip = nxp * nxp * ip + nxp * jp + kp; |
1048 | | std::array<simd_vector, NDIM> X; |
1049 | | std::array<simd_vector, NDIM> dX; |
1050 | | taylor<4, simd_vector> l; |
1051 | | std::array<simd_vector, NDIM> lc; |
1052 | | if (!is_root) { |
1053 | | const integer index = (INX * INX / 4) * (ip) + (INX / 2) * (jp) + (kp); |
1054 | | for (integer j = 0; j != 20; ++j) { |
1055 | | l[j] = parent_expansions->first[index][j]; |
1056 | | } |
1057 | | if (type == RHO) { |
1058 | | for (integer j = 0; j != NDIM; ++j) { |
1059 | | lc[j] = parent_expansions->second[index][j]; |
1060 | | } |
1061 | | } |
1062 | | } else { |
1063 | | for (integer j = 0; j != 20; ++j) { |
1064 | | l[j] = 0.0; |
1065 | | } |
1066 | | for (integer j = 0; j != NDIM; ++j) { |
1067 | | lc[j] = 0.0; |
1068 | | } |
1069 | | } |
1070 | | for (integer ci = 0; ci != NCHILD; ++ci) { |
1071 | | const integer iiic = child_index(ip, jp, kp, ci); |
1072 | | for (integer d = 0; d < NDIM; ++d) { |
1073 | | X[d][ci] = (*(com_ptr[0]))[iiic][d]; |
1074 | | } |
1075 | | } |
1076 | | const auto& Y = (*(com_ptr[1]))[iiip]; |
1077 | | for (integer d = 0; d < NDIM; ++d) { |
1078 | | dX[d] = X[d] - Y[d]; |
1079 | | } |
1080 | | l <<= dX; |
1081 | | for (integer ci = 0; ci != NCHILD; ++ci) { |
1082 | | const integer iiic = child_index(ip, jp, kp, ci); |
1083 | | expansion& Liiic = L[iiic]; |
1084 | | for (integer j = 0; j != 20; ++j) { |
1085   | 0.5%
| Liiic[j] += l[j][ci]; |
| % of fetches | Miss ratio | Fetch ratio | WB ratio | Fetch Util | WB Util | PC | Type | Issues | 0.5%Thread Id | % of fetches |
---|
Thread Total | 0.5% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.2% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.2% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 13.3%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 13.3% | 0.0% | 13.3% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 50.0% | 0.0% | 50.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 13.3%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 13.3% | 0.0% | 13.3% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 50.0% | 0.0% | 50.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 91.8%Thread Id | Fetch utilization |
---|
Thread Average | 91.8% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 12.5% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 0.0% | 23418 | 12.5% | 23419 | 100.0% | 23420 | 0.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 100.0% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 25.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 0.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 0.0% | 23418 | 12.5% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9e0990 | R | 
| 0.0%Thread Id | % of fetches |
---|
Thread Total | 0.0% | 23408 | 0.0% | 23409 | 0.0% | 23410 | 0.0% | 23411 | 0.0% | 23412 | 0.0% | 23413 | 0.0% | 23414 | 0.0% | 23415 | 0.0% | 23416 | 0.0% | 23417 | 0.0% | 23418 | 0.0% | 23419 | 0.0% | 23420 | 0.0% | 23421 | 0.0% | 23422 | 0.0% | 23423 | 0.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 0.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 0.0% | 23431 | 0.0% |
| 0.0%Thread Id | Total Miss ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 0.0%Thread Id | Total Fetch ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 13.3%Thread Id | Total Write-back ratio | Uncategorized | Replacement | Coherence | Flush |
---|
Thread Average | 13.3% | 0.0% | 13.3% | 0.0% | 0.0% | 23408 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23409 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23410 | 100.0% | 0.0% | 100.0% | 0.0% | 0.0% | 23411 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23412 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23413 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23414 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23415 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23416 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23417 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23418 | 50.0% | 0.0% | 50.0% | 0.0% | 0.0% | 23419 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23420 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23421 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23422 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23423 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23424 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23425 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23426 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23427 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23428 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23429 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23430 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 23431 | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% |
| 91.8%Thread Id | Fetch utilization |
---|
Thread Average | 91.8% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 12.5% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 100.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 0.0% | 23418 | 12.5% | 23419 | 100.0% | 23420 | 0.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 0.0% | 23430 | 100.0% | 23431 | 100.0% |
| 100.0%Thread Id | Write-back utilization |
---|
Thread Average | 100.0% | 23408 | 100.0% | 23409 | 100.0% | 23410 | 25.0% | 23411 | 100.0% | 23412 | 100.0% | 23413 | 100.0% | 23414 | 0.0% | 23415 | 100.0% | 23416 | 100.0% | 23417 | 0.0% | 23418 | 12.5% | 23419 | 100.0% | 23420 | 100.0% | 23421 | 100.0% | 23422 | 100.0% | 23423 | 100.0% | 23424 | 0.0% | 23425 | 0.0% | 23426 | 100.0% | 23427 | 0.0% | 23428 | 0.0% | 23429 | 100.0% | 23430 | 100.0% | 23431 | 100.0% |
| 0x9e099a | W | 
|
|
| | |
1086 | | } |
1087 | | |
1088 | | space_vector& L_ciiic = L_c[iiic]; |
1089 | | if (type == RHO) { |
1090 | | for (integer j = 0; j != NDIM; ++j) { |
1091 | | L_ciiic[j] += lc[j][ci]; |
1092 | | } |
1093 | | } |
1094 | | |
1095 | | if (!is_leaf) { |
1096 | | integer index = child_index(ip, jp, kp, ci, 0); |
1097 | | for( integer j = 0; j != 20; ++j) { |
1098 | | exp_ret.first[index][j] = Liiic[j]; |
1099 | | } |
1100 | | |
1101 | | if (type == RHO) { |
1102 | | for( integer j = 0; j != 3; ++j) { |
1103 | | exp_ret.second[index][j] = L_ciiic[j]; |
1104 | | } |
1105 | | } |
1106 | | } |
1107 | | } |
1108 | | } |
1109 | | } |
1110 | | } |
1111 | | |
1112 | | if (is_leaf) { |
1113 | | for (integer i = 0; i != G_NX; ++i) { |
1114 | | for (integer j = 0; j != G_NX; ++j) { |
1115 | | for (integer k = 0; k != G_NX; ++k) { |
1116 | | const integer iii = gindex(i, j, k); |
1117 | | const integer iii0 = h0index(i, j, k); |
1118 | | const integer iiih = hindex(i + H_BW , j + H_BW , k + H_BW); |
1119 | | if (type == RHO) { |
1120 | | G[iii][phi_i] = L[iii](); |
1121 | | for (integer d = 0; d < NDIM; ++d) { |
1122 | | G[iii][gx_i + d] = -L[iii](d); |
1123 | | if (opts.ang_con == true) { |
1124 | | G[iii][gx_i + d] -= L_c[iii][d]; |
1125 | | } |
1126 | | } |
1127 | | U[pot_i][iiih] = G[iii][phi_i] * U[rho_i][iiih]; |
1128 | | } else { |
1129 | | dphi_dt[iii0] = L[iii](); |
1130 | | } |
1131 | | } |
1132 | | } |
1133 | | } |
1134 | | } |
1135 | | PROF_END; |
1136 | | return exp_ret; |
1137 | | } |
1138 | | |
1139 | | multipole_pass_type grid::compute_multipoles(gsolve_type type, const multipole_pass_type* child_poles) { |
1140 | | |
1141 | | // if( int(*Muse_counter) > 0) |
1142 | | // printf( "%i\n", int(*Muse_counter)); |
1143 | | PROF_BEGIN; |
1144 | | integer lev = 0; |
1145 | | const real dx3 = dx * dx * dx; |
1146 | | M_ptr = std::make_shared<std::vector<multipole>>(); |
1147 | | mon_ptr = std::make_shared<std::vector<real>>(); |
1148 | | if (com_ptr[1] == nullptr) { |
1149 | | com_ptr[1] = std::make_shared<std::vector<space_vector>>(G_N3 / 8); |
1150 | | } |
1151 | | if (type == RHO) { |
1152 | | com_ptr[0] = std::make_shared<std::vector<space_vector>>(G_N3); |
1153 | | } |
1154 | | auto& M = *M_ptr; |
1155 | | auto& mon = *mon_ptr; |
1156 | | if (is_leaf) { |
1157 | | M.resize(0); |
1158 | | mon.resize(G_N3); |
1159 | | } else { |
1160 | | M.resize(G_N3); |
1161 | | mon.resize(0); |
1162 | | } |
1163 | | if (type == RHO) { |
1164 | | const integer iii0 = hindex(H_BW, H_BW, H_BW); |
1165 | | const std::array<real, NDIM> x0 = { X[XDIM][iii0], X[YDIM][iii0], X[ZDIM][iii0] }; |
1166 | | |
1167 | | // std::array<integer, 3> i; |
1168 | | // for (i[0] = 0; i[0] != G_NX; ++i[0]) { |
1169 | | // for (i[1] = 0; i[1] != G_NX; ++i[1]) { |
1170 | | // for (i[2] = 0; i[2] != G_NX; ++i[2]) { |
1171 | | // const integer iii = gindex(i[0], i[1], i[2]); |
1172 | | // for (integer dim = 0; dim != NDIM; ++dim) { |
1173 | | // (*(com_ptr[0]))[iii][dim] = x0[dim] + (i[dim]) * dx; |
1174 | | // } |
1175 | | // } |
1176 | | // } |
1177 | | // } |
1178 | | for (integer i = 0; i != G_NX; ++i) { |
1179 | | for (integer j = 0; j != G_NX; ++j) { |
1180 | | for (integer k = 0; k != G_NX; ++k) { |
1181 | | auto& com0iii = (*(com_ptr[0]))[gindex(i, j, k)]; |
1182 | | // for (integer dim = 0; dim != NDIM; ++dim) |
1183 | | com0iii[0] = x0[0] + i * dx; |
1184 | | com0iii[1] = x0[1] + j * dx; |
1185 | | com0iii[2] = x0[2] + k * dx; |
1186 | | } |
1187 | | } |
1188 | | } |
1189 | | } |
1190 | | |
1191 | | multipole_pass_type mret; |
1192 | | if (!is_root) { |
1193 | | mret.first.resize(INX * INX * INX / NCHILD); |
1194 | | mret.second.resize(INX * INX * INX / NCHILD); |
1195 | | } |
1196 | | taylor<4, real> MM; |
1197 | | integer index = 0; |
1198 | | for (integer inx = INX; (inx >= INX / 2); inx >>= 1) { |
1199 | | |
1200 | | const integer nxp = inx; |
1201 | | const integer nxc = (2 * inx); |
1202 | | |
1203 | | auto child_index = [=](integer ip, integer jp, integer kp, integer ci) -> integer { |
1204 | | const integer ic = (2 * ip ) + ((ci >> 0) & 1); |
1205 | | const integer jc = (2 * jp ) + ((ci >> 1) & 1); |
1206 | | const integer kc = (2 * kp ) + ((ci >> 2) & 1); |
1207 | | return nxc * nxc * ic + nxc * jc + kc; |
1208 | | }; |
1209 | | |
1210 | | for (integer ip = 0; ip != nxp; ++ip) { |
1211 | | for (integer jp = 0; jp != nxp; ++jp) { |
1212 | | for (integer kp = 0; kp != nxp; ++kp) { |
1213 | | const integer iiip = nxp * nxp * ip + nxp * jp + kp; |
1214 | | if (lev != 0) { |
1215 | | if (type == RHO) { |
1216 | | simd_vector mc; |
1217 | | std::array < simd_vector, NDIM > X; |
1218 | | for (integer ci = 0; ci != NCHILD; ++ci) { |
1219 | | const integer iiic = child_index(ip, jp, kp, ci); |
1220 | | if (is_leaf) { |
1221 | | mc[ci] = mon[iiic]; |
1222 | | } else { |
1223 | | mc[ci] = M[iiic](); |
1224 | | } |
1225 | | for (integer d = 0; d < NDIM; ++d) { |
1226 | | X[d][ci] = (*(com_ptr[0]))[iiic][d]; |
1227 | | } |
1228 | | } |
1229 | | real mtot = mc.sum(); |
1230 | | for (integer d = 0; d < NDIM; ++d) { |
1231 | | (*(com_ptr[1]))[iiip][d] = (X[d] * mc).sum() / mtot; |
1232 | | } |
1233 | | } |
1234 | | taylor<4, simd_vector> mc, mp; |
1235 | | std::array<simd_vector, NDIM> x, y, dx; |
1236 | | for (integer ci = 0; ci != NCHILD; ++ci) { |
1237 | | const integer iiic = child_index(ip, jp, kp, ci); |
1238 | | const space_vector& X = (*(com_ptr[lev - 1]))[iiic]; |
1239 | | if (is_leaf) { |
1240 | | mc()[ci] = mon[iiic]; |
1241 | | for (integer j = 1; j != 20; ++j) { |
1242 | | mc.ptr()[j][ci] = 0.0; |
1243 | | } |
1244 | | } else { |
1245 | | for (integer j = 0; j != 20; ++j) { |
1246 | | mc.ptr()[j][ci] = M[iiic].ptr()[j]; |
1247 | | } |
1248 | | } |
1249 | | for (integer d = 0; d < NDIM; ++d) { |
1250 | | x[d][ci] = X[d]; |
1251 | | } |
1252 | | } |
1253 | | const space_vector& Y = (*(com_ptr[lev]))[iiip]; |
1254 | | for (integer d = 0; d < NDIM; ++d) { |
1255 | | dx[d] = x[d] - simd_vector(Y[d]); |
1256 | | } |
1257 | | mp = mc >> dx; |
1258 | | for (integer j = 0; j != 20; ++j) { |
1259 | | MM[j] = mp[j].sum(); |
1260 | | } |
1261 | | } else { |
1262 | | if (child_poles == nullptr) { |
1263 | | const integer iiih = hindex(ip + H_BW , jp + H_BW , kp + H_BW ); |
1264 | | const integer iii0 = h0index(ip, jp, kp); |
1265 | | if (type == RHO) { |
1266 | | mon[iiip] = U[rho_i][iiih] * dx3; |
1267 | | } else { |
1268 | | mon[iiip] = dUdt[rho_i][iii0] * dx3; |
1269 | | } |
1270 | | } else { |
1271 | | M[iiip] = child_poles->first[index]; |
1272 | | if (type == RHO) { |
1273 | | (*(com_ptr)[lev])[iiip] = child_poles->second[index]; |
1274 | | } |
1275 | | ++index; |
1276 | | } |
1277 | | } |
1278 | | if (!is_root && (lev == 1)) { |
1279 | | mret.first[index] = MM; |
1280 | | mret.second[index] = (*(com_ptr[lev]))[iiip]; |
1281 | | ++index; |
1282 | | } |
1283 | | } |
1284 | | } |
1285 | | } |
1286 | | ++lev; |
1287 | | index = 0; |
1288 | | } |
1289 | | PROF_END; |
1290 | | return mret; |
1291 | | } |
1292 | | |
1293 | | gravity_boundary_type grid::get_gravity_boundary(const geo::direction& dir, bool is_local) { |
1294 | | PROF_BEGIN; |
1295 | | // std::array<integer, NDIM> lb, ub; |
1296 | | gravity_boundary_type data; |
1297 | | data.is_local = is_local; |
1298 | | auto& M = *M_ptr; |
1299 | | auto& mon = *mon_ptr; |
1300 | | if (!is_local) { |
1301 | | data.allocate(); |
1302 | | integer iter = 0; |
1303 | | const std::vector<boundary_interaction_type>& list = ilist_n_bnd[dir.flip()]; |
1304 | | if (is_leaf) { |
1305 | | data.m->reserve(list.size()); |
1306 | | for (auto i : list) { |
1307 | | const integer iii = i.second; |
1308 | | data.m->push_back(mon[iii]); |
1309 | | } |
1310 | | } else { |
1311 | | data.M->reserve(list.size()); |
1312 | | data.x->reserve(list.size()); |
1313 | | for (auto i : list) { |
1314 | | const integer iii = i.second; |
1315 | | const integer top = M[iii].size(); |
1316 | | data.M->push_back(M[iii]); |
1317 | | data.x->push_back((*(com_ptr[0]))[iii]); |
1318 | | } |
1319 | | } |
1320 | | } else { |
1321 | | if (is_leaf) { |
1322 | | data.m = mon_ptr; |
1323 | | } else { |
1324 | | data.M = M_ptr; |
1325 | | data.x = com_ptr[0]; |
1326 | | } |
1327 | | } |
1328 | | PROF_END; |
1329 | | return data; |
1330 | | } |
1331 | | |
Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.