|-> 218.68201 - 100.0000% [1] {min=218.6820, max=218.6820, mean=218.6820, var=0.0000, std dev=0.0000} APEX MAIN | |-> 216.66599 - 99.0781% [1] {min=216.6660, max=216.6660, mean=216.6660, var=0.0000, std dev=0.0000} electron_push | | |-> 163.69737 - 75.5529% [1] {min=163.6974, max=163.6974, mean=163.6974, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, push_diag_op | | | |-> 163.68137 - 99.9902% [1] {min=163.6814, max=163.6814, mean=163.6814, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.01595 - 0.0097% [1] {min=0.0159, max=0.0159, mean=0.0159, var=0.0000, std dev=0.0000} hipMemcpyToSymbol | | | | |-> 0.00003 - 0.2003% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | | | |-> 0.00000 - 10.6009% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | | Remainder: 0.0000 - 0.1791% | | | | Remainder: 0.0159 - 0.0097% | | | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 163.68096 - 2235162684.5145% [1] {min=163.6810, max=163.6810, mean=163.6810, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory(int, bool, Simulation > const&, Grid > const&, MagneticField > const&, Species > const&, TmpSpecies > const&, ElectricField > const&, PerturbedBField > const&, Sheath > const&, SheathParticles > const&, Neutrals > const&, GeneratePermutation > const&, Diagnostics > const&, Charge > const&, HeatDiagnostics > const&)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor(int, bool, Simulation > const&, Grid > const&, MagneticField > const&, Species > const&, TmpSpecies > const&, ElectricField > const&, PerturbedBField > const&, Sheath > const&, SheathParticles > const&, Neutrals > const&, GeneratePermutation > const&, Diagnostics > const&, Charge > const&, HeatDiagnostics > const&)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0000 - 0.0000% | | |-> 51.27839 - 23.6670% [14] {min=3.6521, max=3.6755, mean=3.6627, var=0.0000, std dev=0.0070} Kokkos for, Dev: 50331648, push_op | | | |-> 51.27797 - 99.9992% [14] {min=3.6520, max=3.6755, mean=3.6627, var=0.0000, std dev=0.0070} hipDeviceSynchronize | | | |-> 0.00006 - 0.0001% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 51.27196 - 86772206.3989% [14] {min=3.6516, max=3.6749, mean=3.6623, var=0.0000, std dev=0.0070} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory(int, bool, Simulation > const&, Grid > const&, MagneticField > const&, Species > const&, TmpSpecies > const&, ElectricField > const&, PerturbedBField > const&, Sheath > const&, SheathParticles > const&, Neutrals > const&, GeneratePermutation > const&, Diagnostics > const&, Charge > const&, HeatDiagnostics > const&)::{lambda(int)#3}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor(int, bool, Simulation > const&, Grid > const&, MagneticField > const&, Species > const&, TmpSpecies > const&, ElectricField > const&, PerturbedBField > const&, Sheath > const&, SheathParticles > const&, Neutrals > const&, GeneratePermutation > const&, Diagnostics > const&, Charge > const&, HeatDiagnostics > const&)::{lambda(int)#3}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0000% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 0.0000% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0002% | | |-> 0.45151 - 0.2084% [14] {min=0.0088, max=0.0346, mean=0.0323, var=0.0000, std dev=0.0065} Kokkos for, Dev: 50331648, WritePerm | | | |-> 0.45112 - 99.9133% [14] {min=0.0088, max=0.0346, mean=0.0322, var=0.0000, std dev=0.0065} hipDeviceSynchronize | | | |-> 0.00006 - 0.0132% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.45063 - 757327.9486% [14] {min=0.0088, max=0.0345, mean=0.0322, var=0.0000, std dev=0.0065} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#6}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#6}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0026% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00001 - 0.0025% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.44866 - 0.2071% [14] {min=0.0087, max=0.0344, mean=0.0320, var=0.0000, std dev=0.0065} Kokkos for, Dev: 50331648, FillCount | | | |-> 0.44833 - 99.9253% [14] {min=0.0087, max=0.0344, mean=0.0320, var=0.0000, std dev=0.0065} hipDeviceSynchronize | | | |-> 0.00005 - 0.0114% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.44789 - 877051.1318% [14] {min=0.0087, max=0.0343, mean=0.0320, var=0.0000, std dev=0.0065} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#2}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#2}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0015% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00001 - 0.0014% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.24646 - 0.1138% [14] {min=0.0150, max=0.0251, mean=0.0176, var=0.0000, std dev=0.0023} Kokkos for, Dev: 50331648, ptl_to_sorted_tmp_array_ct_op | | | |-> 0.24610 - 99.8533% [14] {min=0.0150, max=0.0251, mean=0.0176, var=0.0000, std dev=0.0023} hipDeviceSynchronize | | | |-> 0.00006 - 0.0230% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.24561 - 432893.4786% [14] {min=0.0149, max=0.0250, mean=0.0175, var=0.0000, std dev=0.0023} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&)::{lambda(int)#5}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&)::{lambda(int)#5}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0028% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 0.0026% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00000 - 0.0009% [3] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | | Remainder: 0.0003 - 0.0001% | | |-> 0.17143 - 0.0791% [14] {min=0.0100, max=0.0163, mean=0.0122, var=0.0000, std dev=0.0013} Kokkos for, Dev: 50331648, ptl_to_sorted_tmp_array_ph2_op | | | |-> 0.17108 - 99.7967% [14] {min=0.0099, max=0.0163, mean=0.0122, var=0.0000, std dev=0.0013} hipDeviceSynchronize | | | |-> 0.00005 - 0.0317% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.17064 - 313828.9101% [14] {min=0.0099, max=0.0162, mean=0.0122, var=0.0000, std dev=0.0013} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&)::{lambda(int)#3}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&)::{lambda(int)#3}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0041% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00001 - 0.0039% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.15157 - 0.0700% [14] {min=0.0095, max=0.0152, mean=0.0108, var=0.0000, std dev=0.0014} Kokkos for, Dev: 50331648, ptl_to_sorted_tmp_array_ph1_op | | | |-> 0.15122 - 99.7711% [14] {min=0.0095, max=0.0151, mean=0.0108, var=0.0000, std dev=0.0014} hipDeviceSynchronize | | | |-> 0.00005 - 0.0360% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.15075 - 276057.0866% [14] {min=0.0095, max=0.0151, mean=0.0108, var=0.0000, std dev=0.0014} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0044% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 0.0042% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.05236 - 0.0242% [14] {min=0.0037, max=0.0038, mean=0.0037, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, tmp_array_to_ptl_ct_op | | | |-> 0.05199 - 99.2822% [14] {min=0.0037, max=0.0037, mean=0.0037, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00009 - 0.1651% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.05161 - 59701.0885% [14] {min=0.0037, max=0.0037, mean=0.0037, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&)::{lambda(int)#6}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&)::{lambda(int)#6}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0125% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00001 - 0.0123% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.03881 - 0.0179% [14] {min=0.0028, max=0.0028, mean=0.0028, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, tmp_array_to_ptl_ph2_op | | | |-> 0.03847 - 99.1192% [14] {min=0.0027, max=0.0028, mean=0.0027, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00005 - 0.1413% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.03807 - 69432.8105% [14] {min=0.0027, max=0.0027, mean=0.0027, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&)::{lambda(int)#4}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&)::{lambda(int)#4}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0175% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00001 - 0.0167% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.03880 - 0.0179% [14] {min=0.0028, max=0.0028, mean=0.0028, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, tmp_array_to_ptl_ph1_op | | | |-> 0.03845 - 99.1030% [14] {min=0.0027, max=0.0028, mean=0.0027, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00006 - 0.1455% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.03804 - 67380.0677% [14] {min=0.0027, max=0.0028, mean=0.0027, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&)::{lambda(int)#2}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&)::{lambda(int)#2}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0171% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 0.0164% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.03547 - 0.0164% [14] {min=0.0019, max=0.0104, mean=0.0025, var=0.0000, std dev=0.0022} Kokkos for, Dev: 50331648, get_current_triangles_op | | | |-> 0.02747 - 77.4418% [14] {min=0.0018, max=0.0027, mean=0.0020, var=0.0000, std dev=0.0002} hipDeviceSynchronize | | | |-> 0.00759 - 21.3988% [1] {min=0.0076, max=0.0076, mean=0.0076, var=0.0000, std dev=0.0000} hipMemcpyToSymbol | | | | |-> 0.00004 - 0.5778% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | | | |-> 0.00000 - 8.1918% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | | Remainder: 0.0000 - 0.5305% | | | | Remainder: 0.0075 - 21.2752% | | | |-> 0.00008 - 0.2300% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.02712 - 33239.2681% [14] {min=0.0018, max=0.0027, mean=0.0019, var=0.0000, std dev=0.0002} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory > const&, Grid > const&, TmpSpecies > const&, SheathParticles > const&, int)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor > const&, Grid > const&, TmpSpecies > const&, SheathParticles > const&, int)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.0326% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 0.0211% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.02034 - 0.0094% [434] {min=0.0000, max=0.0002, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, SectCount | | | |-> 0.00928 - 45.6296% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00162 - 7.9509% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00314 - 194.4622% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#4}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#4}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00027 - 1.3257% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00022 - 1.0659% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00000 - 0.0150% [4] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | | Remainder: 0.0090 - 0.0041% | | |-> 0.01989 - 0.0092% [434] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, CountOffset | | | |-> 0.00915 - 46.0061% [434] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00157 - 7.8916% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00302 - 192.2680% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#5}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#5}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00028 - 1.4096% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00022 - 1.1045% [434] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | |-> 0.00000 - 0.0109% [3] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | | Remainder: 0.0087 - 0.0040% | | |-> 0.00205 - 0.0009% [14] {min=0.0001, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, LocalCount | | | |-> 0.00171 - 83.2288% [14] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00006 - 2.7387% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00147 - 2620.9884% [14] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#3}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#3}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 0.3205% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 0.3179% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.00088 - 0.0004% [1051] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00064 - 0.0003% [14] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, FillPermBuffer | | | |-> 0.00029 - 45.1886% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00006 - 9.0965% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00008 - 132.7411% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#7}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#7}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 2.2459% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 1.1135% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | |-> 0.00062 - 0.0003% [14] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, ZeroCount | | | |-> 0.00028 - 44.1999% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00005 - 8.7856% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00008 - 142.7370% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor >(GeneratePermutation > const&, TmpSpecies > const&)::{lambda(int)#1}, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00001 - 1.1647% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00001 - 1.0491% [14] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0003 - 0.0001% | | Remainder: 0.0107 - 0.0049% | |-> 0.92086 - 0.4211% [236] {min=0.0000, max=0.6104, mean=0.0039, var=0.0019, std dev=0.0441} hipMemcpy | | |-> 0.00065 - 0.0707% [71] {min=0.0000, max=0.0004, mean=0.0000, var=0.0000, std dev=0.0001} GPU: CopyHostToDevice | | |-> 0.00057 - 0.0623% [165] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | Remainder: 0.9196 - 0.4205% | |-> 0.05526 - 0.0253% [6] {min=0.0000, max=0.0550, mean=0.0092, var=0.0004, std dev=0.0205} Kokkos for, Dev: 16777216, Kokkos::View::destruction [] | | |-> 0.00001 - 0.0244% [6] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} OpenMP Parallel Region: _ZN6Kokkos4Impl16ViewValueFunctorINS_6OpenMPEN6Cabana3SoAINS3_11MemberTypesIJA6_dA3_dxEEELi32EEELb0EE7executeEb:2889 | | |-> 0.00001 - 0.0148% [6] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0552 - 0.0253% | |-> 0.02052 - 0.0094% [1] {min=0.0205, max=0.0205, mean=0.0205, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [rc] | | |-> 0.01981 - 96.5593% [1] {min=0.0198, max=0.0198, mean=0.0198, var=0.0000, std dev=0.0000} hipMemcpyToSymbol | | | |-> 0.00011 - 0.5500% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipMemcpy | | | | |-> 0.00000 - 2.8817% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.5341% | | | Remainder: 0.0197 - 96.0283% | | |-> 0.00061 - 2.9752% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} hipHostMalloc | | |-> 0.00002 - 0.1052% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 0.0402% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 91.0963% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | Remainder: 0.0000 - 0.0036% | | |-> 0.00000 - 0.0042% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.0037% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0025% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0001 - 0.0000% | |-> 0.00121 - 0.0006% [1] {min=0.0012, max=0.0012, mean=0.0012, var=0.0000, std dev=0.0000} Kokkos for, Dev: 16777216, Kokkos::View::initialization [E_phi_ff_mirror] | | |-> 0.00010 - 8.1395% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} OpenMP Parallel Region: _ZN6Kokkos4Impl16ViewValueFunctorINS_6OpenMPE17VectorFieldPlanesLb0EE7executeEb:2889 | | |-> 0.00000 - 0.0912% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0011 - 0.0005% | |-> 0.00110 - 0.0005% [72] {min=0.0000, max=0.0002, mean=0.0000, var=0.0000, std dev=0.0000} hipMalloc | |-> 0.00108 - 0.0005% [67] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipFree | |-> 0.00069 - 0.0003% [1] {min=0.0007, max=0.0007, mean=0.0007, var=0.0000, std dev=0.0000} hipMemcpyToSymbol | | |-> 0.00003 - 3.7433% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 12.1238% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 3.2895% | | Remainder: 0.0007 - 0.0003% | |-> 0.00062 - 0.0003% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP guess_list | | |-> 0.00061 - 96.9291% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00010 - 15.7958% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0005 - 81.6184% | | |-> 0.00000 - 0.2118% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00060 - 0.0003% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP acoeff_all | | |-> 0.00058 - 96.7614% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00003 - 5.8232% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0005 - 91.1268% | | |-> 0.00000 - 0.2054% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00036 - 0.0002% [1] {min=0.0004, max=0.0004, mean=0.0004, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [pot_mirror] | | |-> 0.00000 - 0.3256% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} OpenMP Parallel Region: _ZN6Kokkos4Impl16ViewValueFunctorINS_6OpenMPEdLb1EE27construct_shared_allocationEv:2940 | | |-> 0.00000 - 0.2281% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0004 - 0.0002% | |-> 0.00016 - 0.0001% [1] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [dpot_ff] | | |-> 0.00011 - 71.3444% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 6.1617% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 64.9459% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | Remainder: 0.0000 - 2.1599% | | |-> 0.00000 - 0.8157% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.4630% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.3854% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00015 - 0.0001% [1] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_pv] | | |-> 0.00013 - 80.9551% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 2.3977% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00011 - 2874.5616% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 0.4081% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.3564% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.3176% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00015 - 0.0001% [1] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP guess_xtable | | |-> 0.00013 - 87.1293% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00006 - 42.8125% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0001 - 49.8270% | | |-> 0.00000 - 0.8795% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00014 - 0.0001% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [one_d_cub_acoef] | | |-> 0.00010 - 75.8584% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 4.8430% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 95.3837% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | Remainder: 0.0000 - 0.2236% | | |-> 0.00000 - 0.4671% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.3857% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.3487% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00013 - 0.0001% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP guess_count | | |-> 0.00011 - 85.3637% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00006 - 51.2530% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0001 - 41.6122% | | |-> 0.00000 - 1.0110% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00012 - 0.0001% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos deep copy: Host pot_mirror -> HIP pot | | |-> 0.00010 - 83.0052% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 2.0085% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0001 - 81.3380% | | |-> 0.00000 - 1.1461% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00010 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 16777216, Kokkos::View::destruction [E_phi_ff_mirror] | | |-> 0.00001 - 6.2947% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} OpenMP Parallel Region: _ZN6Kokkos4Impl16ViewValueFunctorINS_6OpenMPE17VectorFieldPlanesLb0EE7executeEb:2889 | | |-> 0.00000 - 0.6422% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0001 - 0.0000% | |-> 0.00009 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos deep copy: Host E_phi_ff_mirror -> HIP E_phi_ff | | |-> 0.00007 - 75.3095% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00002 - 34.6120% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 49.2434% | | |-> 0.00000 - 1.5855% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00008 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [E_phi_ff] | | |-> 0.00003 - 33.1469% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00002 - 19.6787% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 70.0370% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: _ZN6Kokkos12Experimental4ImplL47__device_stub__hip_parallel_launch_local_memoryINS_4Impl11ParallelForINS3_16ViewValueFunctorINS0_3HIPE17VectorFieldPlanesLb0EEENS_11RangePolicyIJS6_NS_9IndexTypeIlEEEEES6_EELj1024ELj1EEEvPKT_:322 | | | Remainder: 0.0000 - 5.8963% | | |-> 0.00000 - 1.6160% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.7789% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.7413% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00008 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP mapping | | |-> 0.00005 - 67.3667% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00002 - 40.1705% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 40.3051% | | |-> 0.00000 - 1.5812% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00007 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos deep copy: Host Scalar -> HIP sort_igroup | | |-> 0.00005 - 63.8383% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::ViewFill-1D | | | |-> 0.00002 - 38.3934% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00001 - 11.7744% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00000 - 93.5849% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: _ZN6Kokkos12Experimental4ImplL47__device_stub__hip_parallel_launch_local_memoryINS_4Impl11ParallelForINS3_8ViewFillINS_4ViewIPiJNS_11LayoutRightENS_6DeviceINS0_3HIPENS_14AnonymousSpaceEEENS_12MemoryTraitsILj0EEEEEES8_SA_Li1EiEENS_11RangePolicyIJSA_NS_9IndexTypeIiEEEEESA_EELj1024ELj1EEEvPKT_:322 | | | | Remainder: 0.0000 - 0.7553% | | | |-> 0.00000 - 1.0019% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | | |-> 0.00000 - 0.9797% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | | Remainder: 0.0000 - 30.5470% | | |-> 0.00000 - 2.6705% [3] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00007 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [idensity] | | |-> 0.00003 - 41.1277% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 9.4612% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 175.4273% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 2.0335% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.0099% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.9168% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [acoeff_all] | | |-> 0.00003 - 52.6550% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.1745% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00002 - 425.1458% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.0555% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.6680% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.6510% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemset | | |-> 0.00001 - 15.9878% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0001 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [psi] | | |-> 0.00002 - 31.6695% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 8.5411% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 111.5596% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.0359% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 1.0046% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.9905% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [one_d_cub_acoef] | | |-> 0.00003 - 45.6068% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 8.7699% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 155.5140% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.0016% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.7393% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.7065% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [edensity] | | |-> 0.00002 - 34.9139% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.6219% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 162.1039% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3818% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.0712% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.7410% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [d1d_df_pv1] | | |-> 0.00002 - 31.6061% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 8.4117% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 107.5575% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.4771% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.3383% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 1.1669% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [d1d_f_pv1] | | |-> 0.00002 - 31.9493% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.1392% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 131.4963% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.4530% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.2270% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.9336% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [lost] | | |-> 0.00002 - 31.8630% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.7092% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 147.4820% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 2.3970% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.1456% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.7643% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [sort_count] | | |-> 0.00002 - 33.9831% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 12.9538% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 73.0888% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | Remainder: 0.0000 - 3.4860% | | |-> 0.00000 - 1.4654% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.4463% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 1.0112% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [zc] | | |-> 0.00003 - 47.1611% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 6.2587% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 219.8261% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.0548% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.7915% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.7564% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [dpot_ff] | | |-> 0.00002 - 35.2575% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 10.2828% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 113.1034% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.8119% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.7996% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.7641% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [gx] | | |-> 0.00002 - 39.3940% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 9.4498% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 112.1212% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3638% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8967% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8609% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00006 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [guess_list] | | |-> 0.00002 - 44.7618% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 6.7151% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 262.5773% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3567% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9954% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8870% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_pv_psi] | | |-> 0.00002 - 44.8392% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 8.2049% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 195.2085% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.2213% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 1.1303% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8190% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [guess_xtable] | | |-> 0.00002 - 44.1714% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.6302% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 200.1443% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.4354% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.2867% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8829% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [mapping] | | |-> 0.00002 - 43.6114% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 8.6416% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 194.4978% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3841% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9049% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8865% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [E_phi_ff] | | |-> 0.00002 - 38.2431% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 9.1364% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 127.1135% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: _ZN6Kokkos12Experimental4ImplL47__device_stub__hip_parallel_launch_local_memoryINS_4Impl11ParallelForINS3_16ViewValueFunctorINS0_3HIPE17VectorFieldPlanesLb0EEENS_11RangePolicyIJS6_NS_9IndexTypeIlEEEEES6_EELj1024ELj1EEEvPKT_:322 | | |-> 0.00000 - 1.3977% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8766% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8394% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [mapping] | | |-> 0.00002 - 39.5900% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 9.5820% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 112.7422% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.2228% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8440% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8440% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [sort_place] | | |-> 0.00002 - 37.4171% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 14.1261% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 77.0672% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: _ZN6Kokkos12Experimental4ImplL47__device_stub__hip_parallel_launch_local_memoryINS_4Impl11ParallelForINS3_16ViewValueFunctorINS0_3HIPEjLb1EEENS_11RangePolicyIJS6_NS_9IndexTypeIlEEEEES6_EELj1024ELj1EEEvPKT_:322 | | | Remainder: 0.0000 - 3.2395% | | |-> 0.00000 - 1.3060% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8335% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8335% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [pot] | | |-> 0.00002 - 43.2451% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.3056% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 203.7952% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.1413% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8945% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8565% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [basis] | | |-> 0.00002 - 37.2787% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00001 - 10.8771% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 104.3908% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3254% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8823% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8267% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [guess_count] | | |-> 0.00002 - 42.3845% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.0306% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 211.2211% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.4927% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.3555% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8914% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [ilost] | | |-> 0.00002 - 38.5794% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 6.5800% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 183.6915% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.1641% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.0286% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8349% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [nodes] | | |-> 0.00002 - 39.1437% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 8.9212% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 129.3141% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3466% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8789% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8399% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [acoeff_all] | | |-> 0.00002 - 40.1395% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 9.7221% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 125.0752% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.2296% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8204% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8185% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [nodes] | | |-> 0.00002 - 39.6410% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 9.3240% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 143.9632% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.1940% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8799% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8604% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::initialization [gx] | | |-> 0.00002 - 40.4075% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 9.7110% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 133.3604% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.4846% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8509% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8509% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_zmax] | | |-> 0.00002 - 37.4513% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.1950% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 140.8141% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.8027% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.8727% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8509% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [params] | | |-> 0.00002 - 38.2543% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 10.0386% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 104.0862% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3102% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9204% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.8592% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [4] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::View::destruction [] | | |-> 0.00000 - 4.9753% [4] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 4.6667% [4] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [rgn] | | |-> 0.00002 - 39.3430% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.9383% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 141.4089% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.5291% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.1370% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 1.0132% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [60] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_rmin] | | |-> 0.00002 - 39.6519% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.0990% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 158.7858% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.6411% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.1625% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 1.1210% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [sort_igroup] | | |-> 0.00002 - 39.7486% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.7005% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 151.0656% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3315% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9369% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.9140% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_rmax] | | |-> 0.00002 - 38.6777% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.4911% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 147.2393% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.5271% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 1.1719% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 1.0048% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [node_to_wall] | | |-> 0.00002 - 39.3919% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.5247% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 146.8298% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.2785% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9228% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.9019% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [wall_nodes] | | |-> 0.00002 - 39.8932% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.0716% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 151.6588% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.8475% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9238% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | |-> 0.00000 - 0.9217% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_pmin] | | |-> 0.00002 - 39.2693% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.6023% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 145.5749% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3457% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9872% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.9244% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_zmin] | | |-> 0.00002 - 39.4745% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.5196% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 142.7775% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.5538% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9667% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.9667% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_dr] | | |-> 0.00002 - 39.6778% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 6.9792% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00000 - 150.0302% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.4165% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9099% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.8677% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_dz] | | |-> 0.00002 - 40.1988% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 6.8803% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00000 - 152.8035% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.5728% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9136% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.9136% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00005 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 0, Kokkos::View::initialization [heat_dp] | | |-> 0.00002 - 39.8248% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00000 - 7.0463% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00001 - 154.8699% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::__device_stub__hip_parallel_launch_local_memory, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | |-> 0.00000 - 1.3662% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.9613% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | | |-> 0.00000 - 0.9378% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_zmin | | |-> 0.00002 - 36.1474% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 13.9794% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 31.0942% | | |-> 0.00001 - 24.3968% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP rc | | |-> 0.00002 - 36.1840% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 19.8700% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 28.9942% | | |-> 0.00000 - 2.7827% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP nodes | | |-> 0.00002 - 55.4330% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00001 - 33.9380% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 36.6201% | | |-> 0.00000 - 2.8678% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_dz | | |-> 0.00002 - 40.6573% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 12.6935% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 35.4965% | | |-> 0.00000 - 7.3800% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_pmin | | |-> 0.00002 - 38.6546% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 12.9962% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 33.6310% | | |-> 0.00000 - 4.0743% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_dp | | |-> 0.00002 - 40.1178% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 14.3734% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 34.3515% | | |-> 0.00000 - 3.9432% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP psi | | |-> 0.00002 - 49.2313% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00001 - 29.8919% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 34.5151% | | |-> 0.00000 - 3.1796% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP gx | | |-> 0.00002 - 51.2750% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00001 - 33.8485% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 33.9192% | | |-> 0.00000 - 3.3502% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP one_d_cub_acoef | | |-> 0.00002 - 41.7664% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 18.1738% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 34.1759% | | |-> 0.00000 - 6.9014% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_dr | | |-> 0.00002 - 45.4954% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 13.4442% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 39.3789% | | |-> 0.00000 - 3.8030% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP node_to_wall | | |-> 0.00002 - 46.4161% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 25.8298% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 34.4269% | | |-> 0.00000 - 3.3982% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_rmin | | |-> 0.00002 - 44.9899% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 13.9794% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 38.7006% | | |-> 0.00000 - 3.5258% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP wall_nodes | | |-> 0.00002 - 48.1099% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 14.4220% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 41.1715% | | |-> 0.00000 - 3.6648% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP rgn | | |-> 0.00002 - 46.9556% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 27.3132% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 34.1305% | | |-> 0.00000 - 3.5674% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_rmax | | |-> 0.00002 - 45.3513% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 13.7672% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 39.1077% | | |-> 0.00000 - 3.9505% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP heat_zmax | | |-> 0.00002 - 45.7346% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 14.1589% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 39.2590% | | |-> 0.00000 - 3.7246% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00004 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP basis | | |-> 0.00002 - 45.3007% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 26.9617% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 33.0869% | | |-> 0.00000 - 3.4750% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00003 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP params | | |-> 0.00002 - 46.1693% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 12.8970% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 40.2148% | | |-> 0.00000 - 3.4999% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00003 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host -> HIP zc | | |-> 0.00002 - 45.8383% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpy | | | |-> 0.00000 - 18.9823% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 37.1371% | | |-> 0.00000 - 3.5520% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00003 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} OpenMP Parallel Region: Kokkos::OpenMP::impl_initialize(int) | |-> 0.00002 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | |-> 0.00001 - 47.9804% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: Kokkos::(anonymous namespace)::__device_stub__init_lock_array_kernel_threadid(int) | | |-> 0.00001 - 43.0845% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: Kokkos::(anonymous namespace)::__device_stub__init_lock_array_kernel_atomic() | | Remainder: 0.0000 - 0.0000% | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipGetDeviceCount | |-> 0.00001 - 0.0000% [4] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipGetDeviceProperties | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} OpenMP Parallel Region: Kokkos::OpenMP::impl_initialize(int) | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos for, Dev: 50331648, Kokkos::ViewCopy-2D | | |-> 0.00000 - 11.7825% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00000 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPushCallConfiguration | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} OpenMP Parallel Region: Kokkos::Impl::OpenMPExec::resize_thread_data(unsigned long, unsigned long, unsigned long, unsigned long) | |-> 0.00000 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} __hipPopCallConfiguration | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipSetDevice | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: FillBuffer | Remainder: 1.0094 - 0.4616%