|-> 13.71998 - 100.0000% [1] {min=13.7200, max=13.7200, mean=13.7200, var=0.0000, std dev=0.0000} APEX MAIN | |-> 1.98565 - 14.4727% [64] {min=0.0268, max=0.0432, mean=0.0310, var=0.0000, std dev=0.0020} Kokkos::parallel_for [Type:HIP, Device: 0] IntegrateStressForElems A | | |-> 1.98130 - 14.4410% [64] {min=0.0268, max=0.0431, mean=0.0310, var=0.0000, std dev=0.0020} hipDeviceSynchronize | | |-> 0.00105 - 0.0076% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00065 - 0.0047% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0003 - 0.0019% | | | Remainder: 0.0004 - 0.0029% | | |-> 0.00051 - 0.0037% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 1.98100 - 14.4388% [64] {min=0.0268, max=0.0431, mean=0.0310, var=0.0000, std dev=0.0020} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00043 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00013 - 0.0009% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0022 - 0.0162% | |-> 1.34679 - 9.8162% [64] {min=0.0195, max=0.0413, mean=0.0210, var=0.0000, std dev=0.0026} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL28CalcHourglassControlForElemsR6DomainPddEUliRiE_ | | |-> 1.34000 - 9.7667% [64] {min=0.0195, max=0.0411, mean=0.0209, var=0.0000, std dev=0.0026} hipStreamSynchronize | | |-> 0.00168 - 0.0123% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00035 - 0.0025% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0013 - 0.0097% | | |-> 0.00079 - 0.0058% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0004 - 0.0030% | | |-> 0.00049 - 0.0035% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 1.33912 - 9.7603% [64] {min=0.0194, max=0.0410, mean=0.0209, var=0.0000, std dev=0.0026} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::InvalidType, Kokkos::RangePolicy> >() | | |-> 0.00042 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00012 - 0.0009% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00008 - 0.0006% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0032 - 0.0234% | |-> 1.13860 - 8.2989% [2240] {min=0.0001, max=0.2065, mean=0.0005, var=0.0000, std dev=0.0044} Kokkos::parallel_for [Type:HIP, Device: 0] CalcEnergyForElems | | |-> 0.73492 - 5.3566% [2240] {min=0.0000, max=0.0005, mean=0.0003, var=0.0000, std dev=0.0001} hipDeviceSynchronize | | |-> 0.26176 - 1.9079% [2240] {min=0.0000, max=0.2060, mean=0.0001, var=0.0000, std dev=0.0044} hipMemcpyToSymbolAsync | | | |-> 0.02158 - 0.1573% [2240] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.01349 - 0.0984% [2240] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0081 - 0.0589% | | | Remainder: 0.2402 - 1.7506% | | |-> 0.03635 - 0.2650% [2240] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.60690 - 4.4235% [2240] {min=0.0000, max=0.0003, mean=0.0003, var=0.0000, std dev=0.0001} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.01455 - 0.1061% [2240] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.01371 - 0.0999% [2240] {min=0.0000, max=0.0091, mean=0.0000, var=0.0000, std dev=0.0002} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0773 - 0.5635% | |-> 1.05236 - 7.6703% [2240] {min=0.0001, max=0.0087, mean=0.0005, var=0.0000, std dev=0.0003} Kokkos::parallel_for [Type:HIP, Device: 0] EvalEOSForElems A | | |-> 0.88848 - 6.4758% [2240] {min=0.0001, max=0.0086, mean=0.0004, var=0.0000, std dev=0.0003} hipDeviceSynchronize | | |-> 0.03767 - 0.2746% [2240] {min=0.0000, max=0.0018, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.02138 - 0.1558% [2240] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.01342 - 0.0978% [2240] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0080 - 0.0580% | | | Remainder: 0.0163 - 0.1188% | | |-> 0.03609 - 0.2631% [2240] {min=0.0000, max=0.0002, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.80173 - 5.8435% [2240] {min=0.0001, max=0.0085, mean=0.0004, var=0.0000, std dev=0.0003} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.01446 - 0.1054% [2240] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00441 - 0.0321% [2240] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0712 - 0.5193% | |-> 1.03254 - 7.5258% [64] {min=0.0160, max=0.0169, mean=0.0161, var=0.0000, std dev=0.0001} Kokkos::parallel_for [Type:HIP, Device: 0] CalcFBHourglassForceForElems A | | |-> 1.02879 - 7.4985% [64] {min=0.0159, max=0.0168, mean=0.0161, var=0.0000, std dev=0.0001} hipDeviceSynchronize | | |-> 0.00078 - 0.0057% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00041 - 0.0030% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00040 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0001% | | | Remainder: 0.0004 - 0.0027% | | |-> 0.00055 - 0.0040% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 1.02801 - 7.4928% [64] {min=0.0159, max=0.0164, mean=0.0161, var=0.0000, std dev=0.0001} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory >, Kokkos::View >, Kokkos::View >, Kokkos::View >, Kokkos::View >, Kokkos::View >, double, int, int)::{lambda(int const&)#1}, Kokkos::RangePolicy, Kokkos::RangePolicy> >() | | |-> 0.00033 - 0.0024% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00011 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0020 - 0.0144% | |-> 0.73756 - 5.3758% [64] {min=0.0108, max=0.0125, mean=0.0115, var=0.0000, std dev=0.0003} Kokkos::parallel_for [Type:HIP, Device: 0] IntegrateStressForElems B | | |-> 0.73315 - 5.3437% [64] {min=0.0108, max=0.0124, mean=0.0115, var=0.0000, std dev=0.0003} hipDeviceSynchronize | | |-> 0.00093 - 0.0068% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00049 - 0.0035% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0007% | | | Remainder: 0.0004 - 0.0032% | | |-> 0.00058 - 0.0042% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.73323 - 5.3442% [64] {min=0.0108, max=0.0124, mean=0.0115, var=0.0000, std dev=0.0003} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::Experimental::HIP> >() | | |-> 0.00045 - 0.0033% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00007 - 0.0005% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0024 - 0.0173% | |-> 0.71381 - 5.2027% [64] {min=0.0103, max=0.0121, mean=0.0112, var=0.0000, std dev=0.0004} Kokkos::parallel_for [Type:HIP, Device: 0] CalcFBHourglassForceForElems B | | |-> 0.70969 - 5.1726% [64] {min=0.0102, max=0.0120, mean=0.0111, var=0.0000, std dev=0.0004} hipDeviceSynchronize | | |-> 0.00068 - 0.0049% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00031 - 0.0023% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00040 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0004 - 0.0026% | | |-> 0.00064 - 0.0047% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.70966 - 5.1724% [64] {min=0.0102, max=0.0120, mean=0.0111, var=0.0000, std dev=0.0003} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory >, Kokkos::View >, Kokkos::View >, Kokkos::View >, Kokkos::View >, Kokkos::View >, double, int, int)::{lambda(Kokkos::Impl::HIPTeamMember const&)#1}, Kokkos::TeamPolicy<>, Kokkos::Experimental::HIP> >() | | |-> 0.00035 - 0.0025% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00005 - 0.0004% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00002 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0024 - 0.0174% | |-> 0.44948 - 3.2761% [704] {min=0.0002, max=0.0063, mean=0.0006, var=0.0000, std dev=0.0003} Kokkos::parallel_for [Type:HIP, Device: 0] CalcMonotonicQRegionForElems | | |-> 0.39177 - 2.8554% [704] {min=0.0001, max=0.0010, mean=0.0006, var=0.0000, std dev=0.0002} hipDeviceSynchronize | | |-> 0.01757 - 0.1280% [704] {min=0.0000, max=0.0058, mean=0.0000, var=0.0000, std dev=0.0002} hipLaunchKernel | | | |-> 0.35165 - 2.5631% [704] {min=0.0001, max=0.0009, mean=0.0005, var=0.0000, std dev=0.0002} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.01098 - 0.0800% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00605 - 0.0441% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00426 - 0.0311% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0018 - 0.0131% | | | Remainder: 0.0049 - 0.0359% | | |-> 0.00401 - 0.0292% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00160 - 0.0117% [704] {min=0.0000, max=0.0002, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0235 - 0.1716% | |-> 0.34437 - 2.5100% [64] {min=0.0053, max=0.0056, mean=0.0054, var=0.0000, std dev=0.0001} Kokkos::parallel_for [Type:HIP, Device: 0] CalcMonotonicQGradientsForElems | | |-> 0.33977 - 2.4765% [64] {min=0.0052, max=0.0056, mean=0.0053, var=0.0000, std dev=0.0001} hipDeviceSynchronize | | |-> 0.00110 - 0.0080% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.33625 - 2.4508% [64] {min=0.0052, max=0.0053, mean=0.0053, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00074 - 0.0054% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00033 - 0.0024% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00047 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0004 - 0.0030% | | |-> 0.00047 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00014 - 0.0010% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0022 - 0.0158% | |-> 0.31367 - 2.2862% [29] {min=0.0001, max=0.0432, mean=0.0108, var=0.0001, std dev=0.0115} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [_mirror] via memset | | |-> 0.00021 - 0.0015% [29] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.3135 - 2.2847% | |-> 0.29509 - 2.1508% [64] {min=0.0015, max=0.1927, mean=0.0046, var=0.0006, std dev=0.0237} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL20CalcLagrangeElementsR6DomainEUliRiE_ | | |-> 0.09715 - 0.7081% [64] {min=0.0014, max=0.0017, mean=0.0015, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00138 - 0.0100% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00024 - 0.0018% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0011 - 0.0083% | | |-> 0.00111 - 0.0081% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.09451 - 0.6888% [64] {min=0.0014, max=0.0016, mean=0.0015, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::InvalidType, Kokkos::RangePolicy> >() | | |-> 0.00109 - 0.0079% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00069 - 0.0050% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00038 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0003 - 0.0022% | | | Remainder: 0.0004 - 0.0029% | | |-> 0.00039 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00012 - 0.0009% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00009 - 0.0007% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.1938 - 1.4122% | |-> 0.28611 - 2.0854% [64] {min=0.0044, max=0.0046, mean=0.0045, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] CalcKinematicsForElems | | |-> 0.28135 - 2.0507% [64] {min=0.0043, max=0.0045, mean=0.0044, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00118 - 0.0086% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.27798 - 2.0261% [64] {min=0.0043, max=0.0044, mean=0.0043, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00079 - 0.0058% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00040 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00038 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0001% | | | Remainder: 0.0004 - 0.0028% | | |-> 0.00054 - 0.0039% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00011 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0021 - 0.0155% | |-> 0.25001 - 1.8222% [942] {min=0.0000, max=0.2087, mean=0.0003, var=0.0000, std dev=0.0068} hipMemcpyAsync | | |-> 0.00147 - 0.0107% [942] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | Remainder: 0.2485 - 1.8115% | |-> 0.19869 - 1.4482% [704] {min=0.0001, max=0.0005, mean=0.0003, var=0.0000, std dev=0.0001} Kokkos::parallel_for [Type:HIP, Device: 0] EvalEOSForElems F | | |-> 0.14928 - 1.0881% [704] {min=0.0000, max=0.0004, mean=0.0002, var=0.0000, std dev=0.0001} hipDeviceSynchronize | | |-> 0.01158 - 0.0844% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.11416 - 0.8321% [704] {min=0.0000, max=0.0003, mean=0.0002, var=0.0000, std dev=0.0001} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.01105 - 0.0805% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00644 - 0.0469% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00418 - 0.0305% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0023 - 0.0164% | | | Remainder: 0.0046 - 0.0336% | | |-> 0.00455 - 0.0332% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00149 - 0.0109% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0207 - 0.1512% | |-> 0.16680 - 1.2158% [704] {min=0.0001, max=0.0006, mean=0.0002, var=0.0000, std dev=0.0001} Kokkos::parallel_for [Type:HIP, Device: 0] CalcSoundSpeedForElems | | |-> 0.11530 - 0.8404% [704] {min=0.0000, max=0.0006, mean=0.0002, var=0.0000, std dev=0.0001} hipDeviceSynchronize | | |-> 0.01141 - 0.0832% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.07789 - 0.5677% [704] {min=0.0000, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0001} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.01127 - 0.0822% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00636 - 0.0464% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00423 - 0.0308% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0021 - 0.0156% | | | Remainder: 0.0049 - 0.0358% | | |-> 0.00459 - 0.0335% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00156 - 0.0114% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0227 - 0.1652% | |-> 0.16476 - 1.2009% [704] {min=0.0001, max=0.0009, mean=0.0002, var=0.0000, std dev=0.0001} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL29CalcCourantConstraintForElemsR6DomainiidRdEUliR9MinFinderE_ | | |-> 0.08705 - 0.6345% [704] {min=0.0000, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0001} hipStreamSynchronize | | |-> 0.01823 - 0.1329% [704] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00332 - 0.0242% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0149 - 0.1087% | | |-> 0.01058 - 0.0771% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.06002 - 0.4375% [704] {min=0.0000, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::InvalidType, Kokkos::RangePolicy> >() | | |-> 0.00949 - 0.0692% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00484 - 0.0353% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00424 - 0.0309% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0006 - 0.0044% | | | Remainder: 0.0046 - 0.0338% | | |-> 0.00389 - 0.0284% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00137 - 0.0100% [704] {min=0.0000, max=0.0003, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00136 - 0.0099% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0328 - 0.2389% | |-> 0.15607 - 1.1375% [704] {min=0.0001, max=0.0005, mean=0.0002, var=0.0000, std dev=0.0001} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL27CalcHydroConstraintForElemsR6DomainiidRdEUliR9MinFinderE_ | | |-> 0.08036 - 0.5857% [704] {min=0.0000, max=0.0003, mean=0.0001, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.01900 - 0.1385% [704] {min=0.0000, max=0.0002, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00352 - 0.0257% [704] {min=0.0000, max=0.0002, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0155 - 0.1128% | | |-> 0.01053 - 0.0768% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.04211 - 0.3069% [704] {min=0.0000, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::InvalidType, Kokkos::RangePolicy> >() | | |-> 0.00911 - 0.0664% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00470 - 0.0342% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00425 - 0.0310% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0004 - 0.0033% | | | Remainder: 0.0044 - 0.0321% | | |-> 0.00366 - 0.0267% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00134 - 0.0098% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00087 - 0.0064% [704] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0312 - 0.2274% | |-> 0.15581 - 1.1356% [23] {min=0.0001, max=0.0353, mean=0.0068, var=0.0001, std dev=0.0071} Kokkos deep copy: Host _mirror -> HIP | | |-> 0.15468 - 1.1274% [23] {min=0.0000, max=0.0352, mean=0.0067, var=0.0001, std dev=0.0071} hipMemcpyAsync | | | |-> 0.03133 - 0.2284% [23] {min=0.0000, max=0.0028, mean=0.0014, var=0.0000, std dev=0.0009} GPU: CopyHostToDevice | | | Remainder: 0.1233 - 0.8990% | | |-> 0.00011 - 0.0008% [46] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0010 - 0.0074% | |-> 0.14388 - 1.0487% [12] {min=0.0038, max=0.0496, mean=0.0120, var=0.0002, std dev=0.0125} Kokkos deep copy: HIP -> Host _mirror | | |-> 0.14337 - 1.0450% [12] {min=0.0037, max=0.0496, mean=0.0119, var=0.0002, std dev=0.0125} hipMemcpyAsync | | | |-> 0.02166 - 0.1579% [12] {min=0.0001, max=0.0043, mean=0.0018, var=0.0000, std dev=0.0013} GPU: CopyDeviceToHost | | | Remainder: 0.1217 - 0.8871% | | |-> 0.00007 - 0.0005% [24] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0004 - 0.0032% | |-> 0.09574 - 0.6978% [613] {min=0.0000, max=0.0005, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [] via memset | | |-> 0.08595 - 0.6265% [613] {min=0.0000, max=0.0005, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00318 - 0.0232% [613] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.07795 - 0.5682% [613] {min=0.0000, max=0.0005, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0066 - 0.0482% | |-> 0.07914 - 0.5768% [945] {min=0.0000, max=0.0003, mean=0.0001, var=0.0000, std dev=0.0000} hipMalloc | |-> 0.07683 - 0.5600% [64] {min=0.0012, max=0.0013, mean=0.0012, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] CalcVelocityForNodes | | |-> 0.07231 - 0.5271% [64] {min=0.0011, max=0.0012, mean=0.0011, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00109 - 0.0079% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.06851 - 0.4993% [64] {min=0.0011, max=0.0011, mean=0.0011, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00090 - 0.0065% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00045 - 0.0033% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0005% | | | Remainder: 0.0004 - 0.0032% | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00014 - 0.0010% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0020 - 0.0146% | |-> 0.07579 - 0.5524% [64] {min=0.0011, max=0.0013, mean=0.0012, var=0.0000, std dev=0.0001} Kokkos::parallel_for [Type:HIP, Device: 0] CalcPositionForNodes | | |-> 0.07100 - 0.5175% [64] {min=0.0011, max=0.0012, mean=0.0011, var=0.0000, std dev=0.0001} hipDeviceSynchronize | | |-> 0.00137 - 0.0100% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00087 - 0.0063% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00038 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0005 - 0.0036% | | | Remainder: 0.0005 - 0.0036% | | |-> 0.00097 - 0.0071% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.06849 - 0.4992% [64] {min=0.0011, max=0.0011, mean=0.0011, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00009 - 0.0007% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0020 - 0.0144% | |-> 0.06343 - 0.4623% [64] {min=0.0009, max=0.0011, mean=0.0010, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] CalcAccelerationForNodes | | |-> 0.05873 - 0.4280% [64] {min=0.0008, max=0.0010, mean=0.0009, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00115 - 0.0084% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.05317 - 0.3875% [64] {min=0.0008, max=0.0008, mean=0.0008, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00102 - 0.0074% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00060 - 0.0044% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00047 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0010% | | | Remainder: 0.0004 - 0.0030% | | |-> 0.00051 - 0.0037% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00011 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00001 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0019 - 0.0139% | |-> 0.05279 - 0.3847% [939] {min=0.0000, max=0.0014, mean=0.0001, var=0.0000, std dev=0.0000} hipFree | |-> 0.04276 - 0.3117% [1] {min=0.0428, max=0.0428, mean=0.0428, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [nodeElemCornerList_mirror] via memset | | |-> 0.00001 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0428 - 0.3116% | |-> 0.04049 - 0.2951% [64] {min=0.0006, max=0.0032, mean=0.0006, var=0.0000, std dev=0.0003} Kokkos::parallel_for [Type:HIP, Device: 0] InitStressTermsForElems | | |-> 0.03375 - 0.2460% [64] {min=0.0005, max=0.0006, mean=0.0005, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00145 - 0.0106% [64] {min=0.0000, max=0.0013, mean=0.0000, var=0.0000, std dev=0.0002} hipEventSynchronize | | |-> 0.00090 - 0.0065% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00043 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0003% | | | Remainder: 0.0005 - 0.0034% | | |-> 0.00064 - 0.0047% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.03369 - 0.2455% [64] {min=0.0005, max=0.0006, mean=0.0005, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00046 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0033 - 0.0240% | |-> 0.03369 - 0.2456% [1] {min=0.0337, max=0.0337, mean=0.0337, var=0.0000, std dev=0.0000} Kokkos deep copy: Host nodeElemCornerList_mirror -> HIP nodeElemCornerList | | |-> 0.03348 - 0.2440% [1] {min=0.0335, max=0.0335, mean=0.0335, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00142 - 0.0104% [1] {min=0.0014, max=0.0014, mean=0.0014, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0321 - 0.2337% | | |-> 0.00003 - 0.0002% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0002 - 0.0013% | |-> 0.02831 - 0.2063% [64] {min=0.0004, max=0.0038, mean=0.0004, var=0.0000, std dev=0.0004} Kokkos::parallel_for [Type:HIP, Device: 0] CalcForceForNodes | | |-> 0.02095 - 0.1527% [64] {min=0.0003, max=0.0004, mean=0.0003, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00324 - 0.0236% [1] {min=0.0032, max=0.0032, mean=0.0032, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | |-> 0.00088 - 0.0064% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00043 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0003% | | | Remainder: 0.0004 - 0.0032% | | |-> 0.00049 - 0.0036% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.02072 - 0.1511% [64] {min=0.0003, max=0.0003, mean=0.0003, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00035 - 0.0025% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00013 - 0.0010% [3] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbol | | | |-> 0.00000 - 0.0000% [3] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0001 - 0.0009% | | |-> 0.00013 - 0.0010% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | Remainder: 0.0021 - 0.0156% | |-> 0.02510 - 0.1829% [64] {min=0.0004, max=0.0004, mean=0.0004, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] UpdateVolumesForElems | | |-> 0.02044 - 0.1490% [64] {min=0.0002, max=0.0003, mean=0.0003, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00118 - 0.0086% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.01371 - 0.0999% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00098 - 0.0071% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00059 - 0.0043% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00047 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0009% | | | Remainder: 0.0004 - 0.0028% | | |-> 0.00043 - 0.0032% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00012 - 0.0009% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0019 - 0.0141% | |-> 0.02074 - 0.1512% [64] {min=0.0002, max=0.0004, mean=0.0003, var=0.0000, std dev=0.0000} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL31ApplyMaterialPropertiesForElemsR6DomainEUliRiE_ | | |-> 0.01360 - 0.0991% [64] {min=0.0001, max=0.0003, mean=0.0002, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00187 - 0.0136% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00031 - 0.0023% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0016 - 0.0113% | | |-> 0.00108 - 0.0079% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00860 - 0.0627% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::InvalidType, Kokkos::RangePolicy> >() | | |-> 0.00085 - 0.0062% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00045 - 0.0033% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0004% | | | Remainder: 0.0004 - 0.0029% | | |-> 0.00050 - 0.0037% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00012 - 0.0009% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00010 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0026 - 0.0191% | |-> 0.01993 - 0.1452% [64] {min=0.0003, max=0.0004, mean=0.0003, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] ApplyMaterialPropertiesForElems A | | |-> 0.01551 - 0.1130% [64] {min=0.0002, max=0.0003, mean=0.0002, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00110 - 0.0080% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.01358 - 0.0990% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00079 - 0.0058% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00042 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0002% | | | Remainder: 0.0004 - 0.0027% | | |-> 0.00052 - 0.0038% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00011 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0019 - 0.0138% | |-> 0.01824 - 0.1330% [11709] {min=0.0000, max=0.0046, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | |-> 0.01746 - 0.1273% [64] {min=0.0002, max=0.0004, mean=0.0003, var=0.0000, std dev=0.0000} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL13CalcQForElemsR6DomainEUlRKiRiE_ | | |-> 0.01020 - 0.0743% [64] {min=0.0001, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00167 - 0.0122% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00032 - 0.0024% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0013 - 0.0098% | | |-> 0.00113 - 0.0082% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00833 - 0.0607% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::InvalidType, Kokkos::RangePolicy> >() | | |-> 0.00098 - 0.0071% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00056 - 0.0041% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00047 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0007% | | | Remainder: 0.0004 - 0.0030% | | |-> 0.00059 - 0.0043% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00013 - 0.0010% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00007 - 0.0005% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0027 - 0.0196% | |-> 0.01213 - 0.0884% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_reduce [Type:HIP, Device: 0] ZL23CalcVolumeForceForElemsR6DomainEUliRiE_ | | |-> 0.00873 - 0.0636% [64] {min=0.0001, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0000} hipStreamSynchronize | | |-> 0.00115 - 0.0084% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00020 - 0.0014% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyDeviceToHost | | | Remainder: 0.0010 - 0.0070% | | |-> 0.00065 - 0.0048% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00790 - 0.0576% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_local_memory, Kokkos::InvalidType, Kokkos::RangePolicy>, 1024u, 1u>(Kokkos::Impl::ParallelReduce, Kokkos::InvalidType, Kokkos::RangePolicy> const*) | | |-> 0.00006 - 0.0004% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0015 - 0.0112% | |-> 0.01127 - 0.0821% [2] {min=0.0028, max=0.0084, mean=0.0056, var=0.0000, std dev=0.0028} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [Buffer] via memset | | |-> 0.01124 - 0.0819% [2] {min=0.0028, max=0.0084, mean=0.0056, var=0.0000, std dev=0.0028} hipDeviceSynchronize | | |-> 0.00001 - 0.0001% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.01122 - 0.0817% [2] {min=0.0028, max=0.0084, mean=0.0056, var=0.0000, std dev=0.0028} GPU: FillBuffer | | Remainder: 0.0000 - 0.0001% | |-> 0.01125 - 0.0820% [64] {min=0.0001, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] ApplyAccelerationBoundaryConditionsForNodes A | | |-> 0.00703 - 0.0512% [64] {min=0.0000, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00106 - 0.0077% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00109 - 0.0079% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | |-> 0.00083 - 0.0060% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00039 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0001% | | | Remainder: 0.0004 - 0.0032% | | |-> 0.00033 - 0.0024% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00010 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0019 - 0.0139% | |-> 0.01050 - 0.0766% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] ApplyMaterialPropertiesForElems B | | |-> 0.00880 - 0.0641% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00077 - 0.0056% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00805 - 0.0587% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_local_memory, Kokkos::RangePolicy>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy> const*) | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0009 - 0.0068% | |-> 0.01029 - 0.0750% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [sigxx] via memset | | |-> 0.00907 - 0.0661% [64] {min=0.0001, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00043 - 0.0032% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00828 - 0.0604% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0008 - 0.0058% | |-> 0.01014 - 0.0739% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [vnewc] via memset | | |-> 0.00899 - 0.0655% [64] {min=0.0001, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00040 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00821 - 0.0598% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0008 - 0.0055% | |-> 0.00998 - 0.0727% [64] {min=0.0002, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] ApplyMaterialPropertiesForElems C | | |-> 0.00873 - 0.0636% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00043 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00803 - 0.0585% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_local_memory, Kokkos::RangePolicy>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::RangePolicy> const*) | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0008 - 0.0060% | |-> 0.00989 - 0.0721% [64] {min=0.0001, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [sigzz] via memset | | |-> 0.00879 - 0.0641% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00033 - 0.0024% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00799 - 0.0582% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0008 - 0.0056% | |-> 0.00987 - 0.0719% [64] {min=0.0001, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [determ] via memset | | |-> 0.00881 - 0.0642% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00030 - 0.0022% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00800 - 0.0583% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0008 - 0.0055% | |-> 0.00972 - 0.0708% [64] {min=0.0001, max=0.0002, mean=0.0002, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [sigyy] via memset | | |-> 0.00871 - 0.0635% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00033 - 0.0024% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00794 - 0.0578% [64] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0007 - 0.0049% | |-> 0.00923 - 0.0672% [64] {min=0.0001, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] ApplyAccelerationBoundaryConditionsForNodes C | | |-> 0.00468 - 0.0341% [64] {min=0.0000, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00110 - 0.0080% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00092 - 0.0067% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | | Remainder: 0.0002 - 0.0013% | | |-> 0.00092 - 0.0067% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00046 - 0.0034% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0006% | | | Remainder: 0.0005 - 0.0033% | | |-> 0.00040 - 0.0029% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00012 - 0.0009% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0020 - 0.0146% | |-> 0.00880 - 0.0642% [64] {min=0.0001, max=0.0002, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] ApplyAccelerationBoundaryConditionsForNodes B | | |-> 0.00455 - 0.0332% [64] {min=0.0000, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00110 - 0.0080% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | |-> 0.00092 - 0.0067% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_constant_memory, Kokkos::RangePolicy> >() | | | Remainder: 0.0002 - 0.0013% | | |-> 0.00091 - 0.0067% [64] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbolAsync | | | |-> 0.00043 - 0.0031% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | | |-> 0.00039 - 0.0028% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0000 - 0.0003% | | | Remainder: 0.0005 - 0.0035% | | |-> 0.00030 - 0.0022% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventRecord | | |-> 0.00012 - 0.0008% [64] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | Remainder: 0.0018 - 0.0133% | |-> 0.00752 - 0.0548% [1] {min=0.0075, max=0.0075, mean=0.0075, var=0.0000, std dev=0.0000} Kokkos deep copy: Host regElemlist::entries_mirror -> HIP regElemlist::entries | | |-> 0.00745 - 0.0543% [1] {min=0.0074, max=0.0074, mean=0.0074, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00540 - 0.0394% [1] {min=0.0054, max=0.0054, mean=0.0054, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0020 - 0.0149% | | |-> 0.00001 - 0.0001% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0001 - 0.0005% | |-> 0.00581 - 0.0424% [612] {min=0.0000, max=0.0023, mean=0.0000, var=0.0000, std dev=0.0001} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::ViewCopy-1D | | |-> 0.00228 - 0.0166% [2] {min=0.0000, max=0.0023, mean=0.0011, var=0.0000, std dev=0.0011} hipFuncGetAttributes | | |-> 0.00054 - 0.0039% [612] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0030 - 0.0218% | |-> 0.00571 - 0.0416% [1] {min=0.0057, max=0.0057, mean=0.0057, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [m_nodeElemStart_mirror] via memset | | |-> 0.00001 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0057 - 0.0415% | |-> 0.00560 - 0.0408% [1] {min=0.0056, max=0.0056, mean=0.0056, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [nodeElemCount] via memset | | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0056 - 0.0408% | |-> 0.00543 - 0.0396% [1] {min=0.0054, max=0.0054, mean=0.0054, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [regElemlist::entries_mirror] via memset | | |-> 0.00001 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0054 - 0.0395% | |-> 0.00332 - 0.0242% [1] {min=0.0033, max=0.0033, mean=0.0033, var=0.0000, std dev=0.0000} Kokkos deep copy: Host m_nodeElemStart_mirror -> HIP m_nodeElemStart | | |-> 0.00329 - 0.0240% [1] {min=0.0033, max=0.0033, mean=0.0033, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00007 - 0.0005% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0032 - 0.0235% | | |-> 0.00000 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0002% | |-> 0.00319 - 0.0233% [2670] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipStreamSynchronize | |-> 0.00281 - 0.0205% [13] {min=0.0001, max=0.0010, mean=0.0002, var=0.0000, std dev=0.0002} Kokkos deep copy: Host Scalar -> HIP | | |-> 0.00253 - 0.0184% [13] {min=0.0001, max=0.0010, mean=0.0002, var=0.0000, std dev=0.0002} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::ViewFill-1D | | | |-> 0.00141 - 0.0102% [13] {min=0.0000, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | | |-> 0.00068 - 0.0049% [1] {min=0.0007, max=0.0007, mean=0.0007, var=0.0000, std dev=0.0000} hipHostMalloc | | | |-> 0.00011 - 0.0008% [13] {min=0.0000, max=0.0001, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | | | |-> 0.00124 - 0.0091% [12] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_local_memory, Kokkos::MemoryTraits<0u> >, Kokkos::LayoutRight, Kokkos::Experimental::HIP, 1, int>, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::MemoryTraits<0u> >, Kokkos::LayoutRight, Kokkos::Experimental::HIP, 1, int>, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: void Kokkos::Experimental::Impl::hip_parallel_launch_local_memory, Kokkos::MemoryTraits<0u> >, Kokkos::LayoutRight, Kokkos::Experimental::HIP, 1, int>, Kokkos::RangePolicy >, Kokkos::Experimental::HIP>, 1024u, 1u>(Kokkos::Impl::ParallelFor, Kokkos::MemoryTraits<0u> >, Kokkos::LayoutRight, Kokkos::Experimental::HIP, 1, int>, Kokkos::RangePolicy >, Kokkos::Experimental::HIP> const*) | | | |-> 0.00006 - 0.0004% [3] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyToSymbol | | | | |-> 0.00000 - 0.0000% [3] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | | Remainder: 0.0001 - 0.0004% | | | |-> 0.00004 - 0.0003% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipFuncGetAttributes | | | Remainder: 0.0002 - 0.0017% | | |-> 0.00004 - 0.0003% [39] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0002 - 0.0018% | |-> 0.00114 - 0.0083% [5] {min=0.0000, max=0.0006, mean=0.0002, var=0.0000, std dev=0.0003} hipMemcpyToSymbol | | |-> 0.00001 - 0.0001% [5] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | Remainder: 0.0011 - 0.0082% | |-> 0.00060 - 0.0044% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [nodeElemCornerList] via memset | | |-> 0.00056 - 0.0041% [1] {min=0.0006, max=0.0006, mean=0.0006, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00002 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00055 - 0.0040% [1] {min=0.0005, max=0.0005, mean=0.0005, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0000 - 0.0002% | |-> 0.00042 - 0.0030% [2] {min=0.0000, max=0.0004, mean=0.0002, var=0.0000, std dev=0.0002} hipHostMalloc | |-> 0.00012 - 0.0009% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [m_nodeElemStart] via memset | | |-> 0.00008 - 0.0006% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00002 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00007 - 0.0005% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0000 - 0.0001% | |-> 0.00009 - 0.0006% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [regElemlist::entries] via memset | | |-> 0.00008 - 0.0006% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00006 - 0.0005% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0000 - 0.0001% | |-> 0.00008 - 0.0006% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemset | | |-> 0.00002 - 0.0002% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: FillBuffer | | Remainder: 0.0001 - 0.0004% | |-> 0.00005 - 0.0004% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipLaunchKernel | | |-> 0.00008 - 0.0006% [1] {min=0.0001, max=0.0001, mean=0.0001, var=0.0000, std dev=0.0000} GPU: desul::(anonymous namespace)::init_lock_arrays_hip_kernel() | | |-> 0.00002 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: Kokkos::(anonymous namespace)::init_lock_array_kernel_atomic() | |-> 0.00004 - 0.0003% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::View::initialization [regElemlist::row_map] via memset | | |-> 0.00002 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemsetAsync | | | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: FillBuffer | | | Remainder: 0.0000 - 0.0001% | | |-> 0.00001 - 0.0001% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0001% | |-> 0.00004 - 0.0003% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos deep copy: Host regElemlist::row_map_mirror -> HIP regElemlist::row_map | | |-> 0.00002 - 0.0002% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipMemcpyAsync | | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} GPU: CopyHostToDevice | | | Remainder: 0.0000 - 0.0001% | | |-> 0.00000 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0001% | |-> 0.00003 - 0.0002% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipGetDeviceCount | |-> 0.00001 - 0.0000% [2] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipGetDeviceProperties | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [regBinEnd] via memset | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00001 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:HIP, Device: 0] Kokkos::ViewCopy-2D | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} Kokkos::parallel_for [Type:OpenMP, Device: 0] Kokkos::View::initialization [regElemlist::row_map_mirror] via memset | | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipDeviceSynchronize | | Remainder: 0.0000 - 0.0000% | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipEventCreate | |-> 0.00000 - 0.0000% [1] {min=0.0000, max=0.0000, mean=0.0000, var=0.0000, std dev=0.0000} hipSetDevice | Remainder: 1.9324 - 14.0847%