Reading Profile files in profile.* NODE 0;CONTEXT 0;THREAD 0: --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 15 6,170 1 1 6170600 .TAU application 99.7 205 6,154 1 24183 6154699 .TAU application => int taupreload_main(int, char **, char **) 99.7 205 6,154 1 24183 6154699 int taupreload_main(int, char **, char **) 89.1 5,500 5,500 6 0 916694 .TAU application => int taupreload_main(int, char **, char **) => hipDeviceSynchronize 89.1 5,500 5,500 6 0 916694 hipDeviceSynchronize 4.5 278 278 22 0 12643 .TAU application => int taupreload_main(int, char **, char **) => hipMemcpyHtoD 4.5 278 278 22 0 12643 hipMemcpyHtoD 1.6 97 97 24078 0 4 .TAU application => int taupreload_main(int, char **, char **) => hipLaunchKernel 1.6 97 97 24078 0 4 hipLaunchKernel 1.2 71 71 3 0 23836 .TAU application => int taupreload_main(int, char **, char **) => hipModuleLoadData 1.2 71 71 3 0 23836 hipModuleLoadData 0.0 1 1 12 0 115 .TAU application => int taupreload_main(int, char **, char **) => hipMemcpyDtoH 0.0 1 1 12 0 115 hipMemcpyDtoH 0.0 0.379 0.379 6 0 63 .TAU application => int taupreload_main(int, char **, char **) => hipFree 0.0 0.379 0.379 6 0 63 hipFree 0.0 0.197 0.197 30 0 7 .TAU application => int taupreload_main(int, char **, char **) => hipExtModuleLaunchKernel 0.0 0.197 0.197 30 0 7 hipExtModuleLaunchKernel 0.0 0.147 0.147 8 0 18 .TAU application => int taupreload_main(int, char **, char **) => hipMalloc 0.0 0.147 0.147 8 0 18 hipMalloc 0.0 0.032 0.032 8 0 4 .TAU application => int taupreload_main(int, char **, char **) => hipModuleGetFunction 0.0 0.032 0.032 8 0 4 hipModuleGetFunction 0.0 0.01 0.01 1 0 10 .TAU application => int taupreload_main(int, char **, char **) => hipSetDevice 0.0 0.01 0.01 1 0 10 hipSetDevice 0.0 0.009 0.009 1 0 9 .TAU application => int taupreload_main(int, char **, char **) => hipGetDeviceCount 0.0 0.009 0.009 4 0 2 .TAU application => int taupreload_main(int, char **, char **) => hipGetDeviceProperties 0.0 0.009 0.009 1 0 9 hipGetDeviceCount 0.0 0.009 0.009 4 0 2 hipGetDeviceProperties 0.0 0.008 0.008 3 0 3 .TAU application => int taupreload_main(int, char **, char **) => hipGetDevice 0.0 0.008 0.008 1 0 8 .TAU application => int taupreload_main(int, char **, char **) => pthread_join 0.0 0.008 0.008 3 0 3 hipGetDevice 0.0 0.008 0.008 1 0 8 pthread_join NODE 0;CONTEXT 0;THREAD 1: --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 276 5,833 1 24142 5833960 .TAU application 87.2 5,084 5,084 6 0 847401 .TAU application => KernelExecution void bdsqrKernel(int, int, int, int, double*, long, double*, long, double*, int, int, long, double*, int, int, long, double*, int, int, long, int*, int, double, double, double, double, double*, long) 87.2 5,084 5,084 6 0 847401 KernelExecution void bdsqrKernel(int, int, int, int, double*, long, double*, long, double*, int, int, long, double*, int, int, long, double*, int, int, long, int*, int, double, double, double, double, double*, long) 3.3 191 191 1992 0 96 .TAU application => KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 3.3 191 191 1992 0 96 KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 2.5 143 143 2790 0 52 .TAU application => KernelExecution void ger_kernel<32, 32, 2, false, double, double const*, double const*, double*>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double*, long, int, long) 2.5 143 143 2790 0 52 KernelExecution void ger_kernel<32, 32, 2, false, double, double const*, double const*, double*>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double*, long, int, long) 0.6 32 32 22 0 1462 .TAU application => CopyHostToDevice 0.6 32 32 22 0 1462 CopyHostToDevice 0.4 25 25 3810 0 7 .TAU application => KernelExecution void gemvn_kernel<64, 16, int, double, double const*, double, double>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.4 25 25 3810 0 7 KernelExecution void gemvn_kernel<64, 16, int, double, double const*, double, double>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.4 23 23 4332 0 5 .TAU application => KernelExecution void rocblas_scal_kernel<256, double, double const*, double*>(int, double const*, long, double*, long, int, long) 0.4 23 23 4332 0 5 KernelExecution void rocblas_scal_kernel<256, double, double const*, double*>(int, double const*, long, double*, long, int, long) 0.2 10 10 2256 0 5 .TAU application => KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.2 10 10 2256 0 5 KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.1 8 8 1794 0 5 .TAU application => KernelExecution void set_diag(double*, int, long, double*, int, int, long, int, bool) 0.1 8 8 1794 0 5 KernelExecution void set_diag(double*, int, long, double*, int, int, long, int, bool) 0.1 8 8 1794 0 5 .TAU application => KernelExecution void subtract_tau(int, int, double*, int, int, long, double*, long) 0.1 8 8 1794 0 5 KernelExecution void subtract_tau(int, int, double*, int, int, long, double*, long) 0.1 8 8 1782 0 5 .TAU application => KernelExecution void set_taubeta(double*, long, double*, double*, int, long) 0.1 8 8 1782 0 5 KernelExecution void set_taubeta(double*, long, double*, double*, int, long) 0.1 5 5 1182 0 5 .TAU application => KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.1 5 5 1182 0 5 KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.1 5 5 1050 0 5 .TAU application => KernelExecution void restore_diag(double*, int, long, double*, int, int, long, int) 0.1 5 5 1050 0 5 KernelExecution void restore_diag(double*, int, long, double*, int, int, long, int) 0.1 4 4 600 0 8 .TAU application => KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.1 4 4 600 0 8 KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.1 2 2 600 0 5 .TAU application => KernelExecution void rocblas_dot_kernel_reduce<512, 2, double, double>(int, double*, double*) 0.1 2 2 600 0 5 KernelExecution void rocblas_dot_kernel_reduce<512, 2, double, double>(int, double*, double*) 0.0 1 1 6 0 194 .TAU application => KernelExecution Cijk_Ailk_Bljk_DB_MT128x128x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS3_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW2_GSU1_GSUAMB_GLS1_ISA90a_IU1_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW2_SNLL0_TT4_64_TLDS0_USFGROn1_VAW1_VSn1_VW2_WSGRA0_WSGRB0_WS64_WG32_8_1_WGM11 0.0 1 1 6 0 194 KernelExecution Cijk_Ailk_Bljk_DB_MT128x128x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS3_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW2_GSU1_GSUAMB_GLS1_ISA90a_IU1_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW2_SNLL0_TT4_64_TLDS0_USFGROn1_VAW1_VSn1_VW2_WSGRA0_WSGRB0_WS64_WG32_8_1_WGM11 0.0 0.414 0.414 12 0 34 .TAU application => KernelExecution void org2r_init_ident(int, int, int, double*, int, int, long) 0.0 0.414 0.414 12 0 34 KernelExecution void org2r_init_ident(int, int, int, double*, int, int, long) 0.0 0.112 0.112 12 0 9 .TAU application => KernelExecution Cijk_Ailk_Bjlk_DB_MT16x16x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS1_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR1_RK0_SIA3_SS0_SU32_SUM0_SUS256_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS0_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG16_4_1_WGM5 0.0 0.112 0.112 12 0 9 KernelExecution Cijk_Ailk_Bjlk_DB_MT16x16x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS1_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR1_RK0_SIA3_SS0_SU32_SUM0_SUS256_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS0_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG16_4_1_WGM5 0.0 0.098 0.098 18 0 5 .TAU application => KernelExecution void restau(int, double*, long) 0.0 0.098 0.098 18 0 5 KernelExecution void restau(int, double*, long) 0.0 0.089 0.089 12 0 7 .TAU application => KernelExecution Cijk_Ailk_Bljk_DB_MT32x16x16_MI16x16x4x1_SN_1LDSB0_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS1_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG32_4_1_WGM11 0.0 0.089 0.089 12 0 7 KernelExecution Cijk_Ailk_Bljk_DB_MT32x16x16_MI16x16x4x1_SN_1LDSB0_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS1_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG32_4_1_WGM11 0.0 0.084 0.084 18 0 5 .TAU application => KernelExecution void reset_batch_info(double*, long, int, int) 0.0 0.084 0.084 18 0 5 KernelExecution void reset_batch_info(double*, long, int, int) 0.0 0.062 0.062 12 0 5 .TAU application => CopyDeviceToHost 0.0 0.062 0.062 12 0 5 .TAU application => KernelExecution void copy_mat(int, int, double*, int, int, long, double*, int, int, long, no_mask, rocblas_fill_, rocblas_diagonal_) 0.0 0.062 0.062 12 0 5 CopyDeviceToHost 0.0 0.062 0.062 12 0 5 KernelExecution void copy_mat(int, int, double*, int, int, long, double*, int, int, long, no_mask, rocblas_fill_, rocblas_diagonal_) 0.0 0.059 0.059 12 0 5 .TAU application => KernelExecution void copyshift_down(bool, int, double*, int, int, long, double*, int, int, long) 0.0 0.059 0.059 12 0 5 KernelExecution void copyshift_down(bool, int, double*, int, int, long, double*, int, int, long) 0.0 0.03 0.03 6 0 5 .TAU application => KernelExecution void set_zero(int, int, double*, int, int, long, rocblas_fill_) 0.0 0.03 0.03 6 0 5 KernelExecution void set_zero(int, int, double*, int, int, long, rocblas_fill_) 0.0 0.028 0.028 6 0 5 .TAU application => KernelExecution void orgl2_init_ident(int, int, int, double*, int, int, long) 0.0 0.028 0.028 6 0 5 KernelExecution void orgl2_init_ident(int, int, int, double*, int, int, long) 0.0 0.027 0.027 6 0 4 .TAU application => KernelExecution void iota_n(double*, unsigned int, double) 0.0 0.027 0.027 6 0 4 KernelExecution void iota_n(double*, unsigned int, double) FUNCTION SUMMARY (total): --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 292 12,004 2 24143 6002280 .TAU application 51.3 205 6,154 1 24183 6154699 .TAU application => int taupreload_main(int, char **, char **) 51.3 205 6,154 1 24183 6154699 int taupreload_main(int, char **, char **) 45.8 5,500 5,500 6 0 916694 .TAU application => int taupreload_main(int, char **, char **) => hipDeviceSynchronize 45.8 5,500 5,500 6 0 916694 hipDeviceSynchronize 42.4 5,084 5,084 6 0 847401 .TAU application => KernelExecution void bdsqrKernel(int, int, int, int, double*, long, double*, long, double*, int, int, long, double*, int, int, long, double*, int, int, long, int*, int, double, double, double, double, double*, long) 42.4 5,084 5,084 6 0 847401 KernelExecution void bdsqrKernel(int, int, int, int, double*, long, double*, long, double*, int, int, long, double*, int, int, long, double*, int, int, long, int*, int, double, double, double, double, double*, long) 2.3 278 278 22 0 12643 .TAU application => int taupreload_main(int, char **, char **) => hipMemcpyHtoD 2.3 278 278 22 0 12643 hipMemcpyHtoD 1.6 191 191 1992 0 96 .TAU application => KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 1.6 191 191 1992 0 96 KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 1.2 143 143 2790 0 52 .TAU application => KernelExecution void ger_kernel<32, 32, 2, false, double, double const*, double const*, double*>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double*, long, int, long) 1.2 143 143 2790 0 52 KernelExecution void ger_kernel<32, 32, 2, false, double, double const*, double const*, double*>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double*, long, int, long) 0.8 97 97 24078 0 4 .TAU application => int taupreload_main(int, char **, char **) => hipLaunchKernel 0.8 97 97 24078 0 4 hipLaunchKernel 0.6 71 71 3 0 23836 .TAU application => int taupreload_main(int, char **, char **) => hipModuleLoadData 0.6 71 71 3 0 23836 hipModuleLoadData 0.3 32 32 22 0 1462 .TAU application => CopyHostToDevice 0.3 32 32 22 0 1462 CopyHostToDevice 0.2 25 25 3810 0 7 .TAU application => KernelExecution void gemvn_kernel<64, 16, int, double, double const*, double, double>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.2 25 25 3810 0 7 KernelExecution void gemvn_kernel<64, 16, int, double, double const*, double, double>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.2 23 23 4332 0 5 .TAU application => KernelExecution void rocblas_scal_kernel<256, double, double const*, double*>(int, double const*, long, double*, long, int, long) 0.2 23 23 4332 0 5 KernelExecution void rocblas_scal_kernel<256, double, double const*, double*>(int, double const*, long, double*, long, int, long) 0.1 10 10 2256 0 5 .TAU application => KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.1 10 10 2256 0 5 KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.1 8 8 1794 0 5 .TAU application => KernelExecution void set_diag(double*, int, long, double*, int, int, long, int, bool) 0.1 8 8 1794 0 5 KernelExecution void set_diag(double*, int, long, double*, int, int, long, int, bool) 0.1 8 8 1794 0 5 .TAU application => KernelExecution void subtract_tau(int, int, double*, int, int, long, double*, long) 0.1 8 8 1794 0 5 KernelExecution void subtract_tau(int, int, double*, int, int, long, double*, long) 0.1 8 8 1782 0 5 .TAU application => KernelExecution void set_taubeta(double*, long, double*, double*, int, long) 0.1 8 8 1782 0 5 KernelExecution void set_taubeta(double*, long, double*, double*, int, long) 0.0 5 5 1182 0 5 .TAU application => KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 5 5 1182 0 5 KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 5 5 1050 0 5 .TAU application => KernelExecution void restore_diag(double*, int, long, double*, int, int, long, int) 0.0 5 5 1050 0 5 KernelExecution void restore_diag(double*, int, long, double*, int, int, long, int) 0.0 4 4 600 0 8 .TAU application => KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 4 4 600 0 8 KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 2 2 600 0 5 .TAU application => KernelExecution void rocblas_dot_kernel_reduce<512, 2, double, double>(int, double*, double*) 0.0 2 2 600 0 5 KernelExecution void rocblas_dot_kernel_reduce<512, 2, double, double>(int, double*, double*) 0.0 1 1 12 0 115 .TAU application => int taupreload_main(int, char **, char **) => hipMemcpyDtoH 0.0 1 1 12 0 115 hipMemcpyDtoH 0.0 1 1 6 0 194 .TAU application => KernelExecution Cijk_Ailk_Bljk_DB_MT128x128x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS3_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW2_GSU1_GSUAMB_GLS1_ISA90a_IU1_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW2_SNLL0_TT4_64_TLDS0_USFGROn1_VAW1_VSn1_VW2_WSGRA0_WSGRB0_WS64_WG32_8_1_WGM11 0.0 1 1 6 0 194 KernelExecution Cijk_Ailk_Bljk_DB_MT128x128x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS3_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW2_GSU1_GSUAMB_GLS1_ISA90a_IU1_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW2_SNLL0_TT4_64_TLDS0_USFGROn1_VAW1_VSn1_VW2_WSGRA0_WSGRB0_WS64_WG32_8_1_WGM11 0.0 0.414 0.414 12 0 34 .TAU application => KernelExecution void org2r_init_ident(int, int, int, double*, int, int, long) 0.0 0.414 0.414 12 0 34 KernelExecution void org2r_init_ident(int, int, int, double*, int, int, long) 0.0 0.379 0.379 6 0 63 .TAU application => int taupreload_main(int, char **, char **) => hipFree 0.0 0.379 0.379 6 0 63 hipFree 0.0 0.197 0.197 30 0 7 .TAU application => int taupreload_main(int, char **, char **) => hipExtModuleLaunchKernel 0.0 0.197 0.197 30 0 7 hipExtModuleLaunchKernel 0.0 0.147 0.147 8 0 18 .TAU application => int taupreload_main(int, char **, char **) => hipMalloc 0.0 0.147 0.147 8 0 18 hipMalloc 0.0 0.112 0.112 12 0 9 .TAU application => KernelExecution Cijk_Ailk_Bjlk_DB_MT16x16x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS1_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR1_RK0_SIA3_SS0_SU32_SUM0_SUS256_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS0_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG16_4_1_WGM5 0.0 0.112 0.112 12 0 9 KernelExecution Cijk_Ailk_Bjlk_DB_MT16x16x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS1_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR1_RK0_SIA3_SS0_SU32_SUM0_SUS256_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS0_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG16_4_1_WGM5 0.0 0.098 0.098 18 0 5 .TAU application => KernelExecution void restau(int, double*, long) 0.0 0.098 0.098 18 0 5 KernelExecution void restau(int, double*, long) 0.0 0.089 0.089 12 0 7 .TAU application => KernelExecution Cijk_Ailk_Bljk_DB_MT32x16x16_MI16x16x4x1_SN_1LDSB0_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS1_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG32_4_1_WGM11 0.0 0.089 0.089 12 0 7 KernelExecution Cijk_Ailk_Bljk_DB_MT32x16x16_MI16x16x4x1_SN_1LDSB0_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS1_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG32_4_1_WGM11 0.0 0.084 0.084 18 0 5 .TAU application => KernelExecution void reset_batch_info(double*, long, int, int) 0.0 0.084 0.084 18 0 5 KernelExecution void reset_batch_info(double*, long, int, int) 0.0 0.062 0.062 12 0 5 .TAU application => CopyDeviceToHost 0.0 0.062 0.062 12 0 5 .TAU application => KernelExecution void copy_mat(int, int, double*, int, int, long, double*, int, int, long, no_mask, rocblas_fill_, rocblas_diagonal_) 0.0 0.062 0.062 12 0 5 CopyDeviceToHost 0.0 0.062 0.062 12 0 5 KernelExecution void copy_mat(int, int, double*, int, int, long, double*, int, int, long, no_mask, rocblas_fill_, rocblas_diagonal_) 0.0 0.059 0.059 12 0 5 .TAU application => KernelExecution void copyshift_down(bool, int, double*, int, int, long, double*, int, int, long) 0.0 0.059 0.059 12 0 5 KernelExecution void copyshift_down(bool, int, double*, int, int, long, double*, int, int, long) 0.0 0.032 0.032 8 0 4 .TAU application => int taupreload_main(int, char **, char **) => hipModuleGetFunction 0.0 0.032 0.032 8 0 4 hipModuleGetFunction 0.0 0.03 0.03 6 0 5 .TAU application => KernelExecution void set_zero(int, int, double*, int, int, long, rocblas_fill_) 0.0 0.03 0.03 6 0 5 KernelExecution void set_zero(int, int, double*, int, int, long, rocblas_fill_) 0.0 0.028 0.028 6 0 5 .TAU application => KernelExecution void orgl2_init_ident(int, int, int, double*, int, int, long) 0.0 0.028 0.028 6 0 5 KernelExecution void orgl2_init_ident(int, int, int, double*, int, int, long) 0.0 0.027 0.027 6 0 4 .TAU application => KernelExecution void iota_n(double*, unsigned int, double) 0.0 0.027 0.027 6 0 4 KernelExecution void iota_n(double*, unsigned int, double) 0.0 0.01 0.01 1 0 10 .TAU application => int taupreload_main(int, char **, char **) => hipSetDevice 0.0 0.01 0.01 1 0 10 hipSetDevice 0.0 0.009 0.009 1 0 9 .TAU application => int taupreload_main(int, char **, char **) => hipGetDeviceCount 0.0 0.009 0.009 4 0 2 .TAU application => int taupreload_main(int, char **, char **) => hipGetDeviceProperties 0.0 0.009 0.009 1 0 9 hipGetDeviceCount 0.0 0.009 0.009 4 0 2 hipGetDeviceProperties 0.0 0.008 0.008 3 0 3 .TAU application => int taupreload_main(int, char **, char **) => hipGetDevice 0.0 0.008 0.008 1 0 8 .TAU application => int taupreload_main(int, char **, char **) => pthread_join 0.0 0.008 0.008 3 0 3 hipGetDevice 0.0 0.008 0.008 1 0 8 pthread_join FUNCTION SUMMARY (mean): --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 146 6,002 1 12071.5 6002280 .TAU application 51.3 102 3,077 0.5 12091.5 6154699 .TAU application => int taupreload_main(int, char **, char **) 51.3 102 3,077 0.5 12091.5 6154699 int taupreload_main(int, char **, char **) 45.8 2,750 2,750 3 0 916694 .TAU application => int taupreload_main(int, char **, char **) => hipDeviceSynchronize 45.8 2,750 2,750 3 0 916694 hipDeviceSynchronize 42.4 2,542 2,542 3 0 847401 .TAU application => KernelExecution void bdsqrKernel(int, int, int, int, double*, long, double*, long, double*, int, int, long, double*, int, int, long, double*, int, int, long, int*, int, double, double, double, double, double*, long) 42.4 2,542 2,542 3 0 847401 KernelExecution void bdsqrKernel(int, int, int, int, double*, long, double*, long, double*, int, int, long, double*, int, int, long, double*, int, int, long, int*, int, double, double, double, double, double*, long) 2.3 139 139 11 0 12643 .TAU application => int taupreload_main(int, char **, char **) => hipMemcpyHtoD 2.3 139 139 11 0 12643 hipMemcpyHtoD 1.6 95 95 996 0 96 .TAU application => KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 1.6 95 95 996 0 96 KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 1.2 71 71 1395 0 52 .TAU application => KernelExecution void ger_kernel<32, 32, 2, false, double, double const*, double const*, double*>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double*, long, int, long) 1.2 71 71 1395 0 52 KernelExecution void ger_kernel<32, 32, 2, false, double, double const*, double const*, double*>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double*, long, int, long) 0.8 48 48 12039 0 4 .TAU application => int taupreload_main(int, char **, char **) => hipLaunchKernel 0.8 48 48 12039 0 4 hipLaunchKernel 0.6 35 35 1.5 0 23836 .TAU application => int taupreload_main(int, char **, char **) => hipModuleLoadData 0.6 35 35 1.5 0 23836 hipModuleLoadData 0.3 16 16 11 0 1462 .TAU application => CopyHostToDevice 0.3 16 16 11 0 1462 CopyHostToDevice 0.2 12 12 1905 0 7 .TAU application => KernelExecution void gemvn_kernel<64, 16, int, double, double const*, double, double>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.2 12 12 1905 0 7 KernelExecution void gemvn_kernel<64, 16, int, double, double const*, double, double>(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.2 11 11 2166 0 5 .TAU application => KernelExecution void rocblas_scal_kernel<256, double, double const*, double*>(int, double const*, long, double*, long, int, long) 0.2 11 11 2166 0 5 KernelExecution void rocblas_scal_kernel<256, double, double const*, double*>(int, double const*, long, double*, long, int, long) 0.1 5 5 1128 0 5 .TAU application => KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.1 5 5 1128 0 5 KernelExecution void gemvt_kernel(int, int, double const*, long, double const*, long, int, long, double const*, long, int, long, double const*, long, double*, long, int, long) 0.1 4 4 897 0 5 .TAU application => KernelExecution void set_diag(double*, int, long, double*, int, int, long, int, bool) 0.1 4 4 897 0 5 KernelExecution void set_diag(double*, int, long, double*, int, int, long, int, bool) 0.1 4 4 897 0 5 .TAU application => KernelExecution void subtract_tau(int, int, double*, int, int, long, double*, long) 0.1 4 4 897 0 5 KernelExecution void subtract_tau(int, int, double*, int, int, long, double*, long) 0.1 4 4 891 0 5 .TAU application => KernelExecution void set_taubeta(double*, long, double*, double*, int, long) 0.1 4 4 891 0 5 KernelExecution void set_taubeta(double*, long, double*, double*, int, long) 0.0 2 2 591 0 5 .TAU application => KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 2 2 591 0 5 KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 2 2 525 0 5 .TAU application => KernelExecution void restore_diag(double*, int, long, double*, int, int, long, int) 0.0 2 2 525 0 5 KernelExecution void restore_diag(double*, int, long, double*, int, int, long, int) 0.0 2 2 300 0 8 .TAU application => KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 2 2 300 0 8 KernelExecution void rocblas_dot_kernel_magsq(int, double const*, long, int, long, double*, double*) 0.0 1 1 300 0 5 .TAU application => KernelExecution void rocblas_dot_kernel_reduce<512, 2, double, double>(int, double*, double*) 0.0 1 1 300 0 5 KernelExecution void rocblas_dot_kernel_reduce<512, 2, double, double>(int, double*, double*) 0.0 0.69 0.69 6 0 115 .TAU application => int taupreload_main(int, char **, char **) => hipMemcpyDtoH 0.0 0.69 0.69 6 0 115 hipMemcpyDtoH 0.0 0.583 0.583 3 0 194 .TAU application => KernelExecution Cijk_Ailk_Bljk_DB_MT128x128x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS3_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW2_GSU1_GSUAMB_GLS1_ISA90a_IU1_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW2_SNLL0_TT4_64_TLDS0_USFGROn1_VAW1_VSn1_VW2_WSGRA0_WSGRB0_WS64_WG32_8_1_WGM11 0.0 0.583 0.583 3 0 194 KernelExecution Cijk_Ailk_Bljk_DB_MT128x128x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS3_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW2_GSU1_GSUAMB_GLS1_ISA90a_IU1_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW2_SNLL0_TT4_64_TLDS0_USFGROn1_VAW1_VSn1_VW2_WSGRA0_WSGRB0_WS64_WG32_8_1_WGM11 0.0 0.207 0.207 6 0 34 .TAU application => KernelExecution void org2r_init_ident(int, int, int, double*, int, int, long) 0.0 0.207 0.207 6 0 34 KernelExecution void org2r_init_ident(int, int, int, double*, int, int, long) 0.0 0.19 0.19 3 0 63 .TAU application => int taupreload_main(int, char **, char **) => hipFree 0.0 0.19 0.19 3 0 63 hipFree 0.0 0.0985 0.0985 15 0 7 .TAU application => int taupreload_main(int, char **, char **) => hipExtModuleLaunchKernel 0.0 0.0985 0.0985 15 0 7 hipExtModuleLaunchKernel 0.0 0.0735 0.0735 4 0 18 .TAU application => int taupreload_main(int, char **, char **) => hipMalloc 0.0 0.0735 0.0735 4 0 18 hipMalloc 0.0 0.056 0.056 6 0 9 .TAU application => KernelExecution Cijk_Ailk_Bjlk_DB_MT16x16x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS1_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR1_RK0_SIA3_SS0_SU32_SUM0_SUS256_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS0_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG16_4_1_WGM5 0.0 0.056 0.056 6 0 9 KernelExecution Cijk_Ailk_Bjlk_DB_MT16x16x8_MI16x16x4x1_SN_1LDSB1_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS1_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR1_RK0_SIA3_SS0_SU32_SUM0_SUS256_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS0_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG16_4_1_WGM5 0.0 0.049 0.049 9 0 5 .TAU application => KernelExecution void restau(int, double*, long) 0.0 0.049 0.049 9 0 5 KernelExecution void restau(int, double*, long) 0.0 0.0445 0.0445 6 0 7 .TAU application => KernelExecution Cijk_Ailk_Bljk_DB_MT32x16x16_MI16x16x4x1_SN_1LDSB0_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS1_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG32_4_1_WGM11 0.0 0.0445 0.0445 6 0 7 KernelExecution Cijk_Ailk_Bljk_DB_MT32x16x16_MI16x16x4x1_SN_1LDSB0_APM1_ABV0_ACED0_AF0EM1_AF1EM1_AMAS0_ASGT_ASAE01_ASCE01_ASEM1_AAC0_BL1_BS1_DTL0_DTVA0_DVO0_ETSP_EPS0_FL0_GRVW1_GSU1_GSUAMB_GLS1_ISA90a_IU2_K1_KLA_LBSPP0_LPA0_LPB0_LDL1_LRVW1_LWPMn1_LDW0_MAC_MIAV0_MDA2_NTC0_NTD0_NEPBS4_NLCA1_NLCB1_ONLL1_OPLV0_PK0_PAP0_PGR2_PLR3_RK0_SIA3_SS1_SU0_SUM0_SUS0_SCIUI1_SPO1_SRVW0_SSO6_SVW1_SNLL0_TT1_16_TLDS1_USFGROn1_VAW1_VSn1_VW1_WSGRA0_WSGRB0_WS64_WG32_4_1_WGM11 0.0 0.042 0.042 9 0 5 .TAU application => KernelExecution void reset_batch_info(double*, long, int, int) 0.0 0.042 0.042 9 0 5 KernelExecution void reset_batch_info(double*, long, int, int) 0.0 0.031 0.031 6 0 5 .TAU application => CopyDeviceToHost 0.0 0.031 0.031 6 0 5 .TAU application => KernelExecution void copy_mat(int, int, double*, int, int, long, double*, int, int, long, no_mask, rocblas_fill_, rocblas_diagonal_) 0.0 0.031 0.031 6 0 5 CopyDeviceToHost 0.0 0.031 0.031 6 0 5 KernelExecution void copy_mat(int, int, double*, int, int, long, double*, int, int, long, no_mask, rocblas_fill_, rocblas_diagonal_) 0.0 0.0295 0.0295 6 0 5 .TAU application => KernelExecution void copyshift_down(bool, int, double*, int, int, long, double*, int, int, long) 0.0 0.0295 0.0295 6 0 5 KernelExecution void copyshift_down(bool, int, double*, int, int, long, double*, int, int, long) 0.0 0.016 0.016 4 0 4 .TAU application => int taupreload_main(int, char **, char **) => hipModuleGetFunction 0.0 0.016 0.016 4 0 4 hipModuleGetFunction 0.0 0.015 0.015 3 0 5 .TAU application => KernelExecution void set_zero(int, int, double*, int, int, long, rocblas_fill_) 0.0 0.015 0.015 3 0 5 KernelExecution void set_zero(int, int, double*, int, int, long, rocblas_fill_) 0.0 0.014 0.014 3 0 5 .TAU application => KernelExecution void orgl2_init_ident(int, int, int, double*, int, int, long) 0.0 0.014 0.014 3 0 5 KernelExecution void orgl2_init_ident(int, int, int, double*, int, int, long) 0.0 0.0135 0.0135 3 0 4 .TAU application => KernelExecution void iota_n(double*, unsigned int, double) 0.0 0.0135 0.0135 3 0 4 KernelExecution void iota_n(double*, unsigned int, double) 0.0 0.005 0.005 0.5 0 10 .TAU application => int taupreload_main(int, char **, char **) => hipSetDevice 0.0 0.005 0.005 0.5 0 10 hipSetDevice 0.0 0.0045 0.0045 0.5 0 9 .TAU application => int taupreload_main(int, char **, char **) => hipGetDeviceCount 0.0 0.0045 0.0045 2 0 2 .TAU application => int taupreload_main(int, char **, char **) => hipGetDeviceProperties 0.0 0.0045 0.0045 0.5 0 9 hipGetDeviceCount 0.0 0.0045 0.0045 2 0 2 hipGetDeviceProperties 0.0 0.004 0.004 1.5 0 3 .TAU application => int taupreload_main(int, char **, char **) => hipGetDevice 0.0 0.004 0.004 0.5 0 8 .TAU application => int taupreload_main(int, char **, char **) => pthread_join 0.0 0.004 0.004 1.5 0 3 hipGetDevice 0.0 0.004 0.004 0.5 0 8 pthread_join