from edu.uoregon.tau.perfdmf import * from edu.uoregon.tau.perfexplorer.rules import * from edu.uoregon.tau.perfexplorer.glue import * from edu.uoregon.tau.perfexplorer.client import PerfExplorerModel from java.util import * from java.lang import * import math tauData = "" masterMeans = None masterMaxs = None iterationPrefix = "Iteration" vectorT_i = [] vectorT = [] ################################################################### def trunc(s,min_pos=0,max_pos=75,ellipsis=True): # Sentinel value -1 returned by String function rfind NOT_FOUND = -1 # Error message for max smaller than min positional error ERR_MAXMIN = 'Minimum position cannot be greater than maximum position' # If the minimum position value is greater than max, throw an exception if max_pos < min_pos: raise ValueError(ERR_MAXMIN) # Change the ellipsis characters here if you want a true ellipsis if ellipsis: suffix = '...' else: suffix = '' # Case 1: Return string if it is shorter (or equal to) than the limit length = len(s) if length <= max_pos: return s + suffix else: # Case 2: Return it to nearest period if possible try: end = s.rindex('.',min_pos,max_pos) except ValueError: # Case 3: Return string to nearest space end = s.rfind(' ',min_pos,max_pos) if end == NOT_FOUND: end = max_pos return s[0:end] + suffix ################################################################### def getParameters(): global parameterMap global tauData global iterationPrefix parameterMap = PerfExplorerModel.getModel().getScriptParameters() keys = parameterMap.keySet() #for key in keys: #print key, parameterMap.get(key) tmp = parameterMap.get("tauData") if tmp != None: tauData = tmp print "Performance data: " + tauData else: print "TAU profile data path not specified... using current directory of profile.x.x.x files." tmp = parameterMap.get("prefix") if tmp != None: iterationPrefix = tmp print "Iteration Prefix: " + iterationPrefix else: print "Iteration Prefix not specified... using", iterationPrefix def loadFile(fileName): global gprof # load the trial files = [] files.append(fileName) input = None if fileName.endswith("gprof.out"): input = DataSourceResult(DataSourceResult.GPROF, files, True) gprof = True elif fileName.endswith("ppk"): input = DataSourceResult(DataSourceResult.PPK, files, False) elif fileName.endswith("xml"): input = DataSourceResult(DataSourceResult.SNAP, files, False) else: input = DataSourceResult(DataSourceResult.TAUPROFILE, files, False) return input ################################################################### def computeLoadBalance(trial, callpath, numPhases): global masterMeans global masterMaxs global iterationPrefix global vectorT_i global vectorT # extract the non-callpath events from the trial trial.setIgnoreWarnings(True) extracted = trial #if not callpath: #extractor = ExtractNonCallpathEventOperation(trial) #extracted = extractor.processData().get(0) mainEventLong = extracted.getMainEvent() mainEvent = mainEventLong if not callpath: mainEvent = Utilities.shortenEventName(mainEventLong) #print "Main Event: ", mainEvent # compute the load imbalance splitter = LoadImbalanceOperation(extracted) splitter.setPercentage(False) loadBalance = splitter.processData() thread = 0 metric = trial.getTimeMetric() conversion = 1.0 / 1000000.0 event = LoadImbalanceOperation.COMPUTATION means = loadBalance.get(LoadImbalanceOperation.MEAN) maxs = loadBalance.get(LoadImbalanceOperation.MAX) mins = loadBalance.get(LoadImbalanceOperation.MIN) stddevs = loadBalance.get(LoadImbalanceOperation.STDDEV) ratios = loadBalance.get(LoadImbalanceOperation.LOAD_BALANCE) mean = means.getExclusive(thread, event, metric) * conversion max = maxs.getExclusive(thread, event, metric) * conversion min = mins.getExclusive(thread, event, metric) * conversion stddev = stddevs.getExclusive(thread, event, metric) * conversion ratio = ratios.getExclusive(thread, event, metric) #if callpath: #for event in extracted.getEvents(): #print event #print mean, max, min, stddev, ratio #for event in extracted.getEvents(): #print event, means.getInclusive(thread, mainEvent, metric) * conversion #inclusive = means.getInclusive(thread, mainEventLong, metric) * conversion #inclusive = masterMeans.getInclusive(0, mainEventLong, metric) * conversion inclusive = masterMaxs.getInclusive(0, mainEventLong, metric) * conversion threads = trial.getThreads().size() if callpath: if numPhases < 100: print "%s\t %d\t %.2f\t %s\t %.2f\t %.2f\t %.2f\t %.2f\t %.4f\t %.4f" % (trunc(mainEvent, max_pos=10), threads, inclusive, event, mean, max, min, stddev, max/inclusive, ratio) #print "%s\t %d\t %s\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\t" % (mainEvent, trial.getThreads().size(), event, mean*100, 100, 100, 100, 100) else: print "%d\t %.2f\t %s\t %.2f\t %.2f\t %.2f\t %.2f\t %.4f\t %.4f" % (threads, inclusive, event, mean, max, min, stddev, max/inclusive, ratio) splits = loadBalance.get(LoadImbalanceOperation.COMPUTATION_SPLITS) for thread in splits.getThreads(): vectorT_i[thread] = splits.getExclusive(thread, event, metric) * conversion vectorT[thread] = extracted.getInclusive(thread, mainEventLong, metric) * conversion return mean, max, min, stddev, inclusive, max/inclusive ################################################################### def myMax(a, b): if a > b: return a return b def myMin(a, b): if a < b: return a return b def main(): global tauData global masterMeans global masterMaxs global iterationPrefix global vectorT_i global vectorT print "--------------- JPython test script start ------------" print "--- Looking for load imbalances --- " # get the parameters getParameters() # load the data trial = loadFile(tauData) trial.setIgnoreWarnings(True) print "Getting basic statistics..." statter = BasicStatisticsOperation(trial) masterStats = statter.processData() masterMeans = masterStats.get(BasicStatisticsOperation.MEAN) masterMaxs = masterStats.get(BasicStatisticsOperation.MAX) totalVectorT_i = [] totalVectorT = [] for thread in trial.getThreads(): totalVectorT_i.append(0) vectorT_i.append(0) totalVectorT.append(0) vectorT.append(0) #print "Procs\t Incl.\t Type\t\t AVG\t MAX\t MIN\t STDEV\t AVG/MAX" #computeLoadBalance(trial, False, 1) print splitter = SplitTrialPhasesOperation(trial, iterationPrefix) phases = splitter.processData() totalMean = 0.0 totalInclusive = 0.0 totalCommEff = 0.0 avgMax = 0.0 avgMin = 1.0 totalMax = 0.0 totalMin = 1.0 totalStddev = 0.0 totalRatio = 0.0 print "LoopID\t\t Procs\t Incl.\t Type\t\t AVG\t MAX\t MIN\t STDEV\t CommEff AVG/MAX" print "------------------------------------------------------------------------------------------------" for phase in phases: mean, max, min, stddev, inclusive, commEff = computeLoadBalance(phase, True, phases.size()) if mean == max == min == stddev == 0: continue totalMean = totalMean + mean avgMax = myMax(avgMax, max) avgMin = myMin(avgMin, min) totalMax = totalMax + max totalMin = totalMin + min totalStddev = totalStddev + (stddev * stddev) totalInclusive = totalInclusive + inclusive totalCommEff = totalCommEff + commEff for index, item in enumerate(vectorT_i): totalVectorT_i[index] = totalVectorT_i[index] + item for index, item in enumerate(vectorT): totalVectorT[index] = totalVectorT[index] + item avgMean = totalMean / phases.size() avgMax = totalMax / phases.size() avgMin = totalMin / phases.size() avgStddev = math.sqrt(totalStddev / phases.size()) avgRatio = avgMean / avgMax avgInclusive = totalInclusive / phases.size() avgCommEff = totalCommEff / phases.size() maxT_i = 0 T = 0 maxEff = 0 totalEff = 0 totalT_i = 0 for value in totalVectorT: T = myMax(T, value) for value in totalVectorT_i: maxT_i = myMax(maxT_i, value) maxEff = myMax(maxEff, value/T) totalEff = totalEff + value/T totalT_i = totalT_i + value commEff = maxEff avgEff = totalEff / len(totalVectorT_i) LB = avgEff / maxEff avgT_i = totalT_i / len(totalVectorT_i) event = LoadImbalanceOperation.COMPUTATION #print "%s\t\t %d\t %ls\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\t" % ("Average", trial.getThreads().size(), event, avgMean*100, avgMax*100, avgMin*100, avgStddev*100, avgRatio*100) print "------------------------------------------------------------------------------------------------" print "%s\t\t %d\t %.2f\t %s\t %.2f\t %.2f\t %.2f\t %.2f\t %.2f\t %.2f" % ("Totals", trial.getThreads().size(), totalInclusive, event, totalMean, totalMax, totalMin, math.sqrt(totalStddev), totalCommEff, totalMean / totalMax) print "%s\t\t %d\t %.2f\t %s\t %.2f\t %.2f\t %.2f\t %.2f\t %.4f\t %.4f" % ("Average", trial.getThreads().size(), avgInclusive, event, avgMean, avgMax, avgMin, avgStddev, avgCommEff, avgRatio) # the total time spent in the loop. Essentially, for each # iteration of the loop, get the total time for each process. Accumulate # that vector over the whole loop. The process with the longest time spent # computing (aggregated over all iterations) is the T. print "\nT:\t\t", T # the total time spent computing, collapsed. Essentially, for each # iteration of the loop, get the computing time for each process. Accumulate # that vector over the whole loop. The process with the longest time spent # computing (aggregated over all iterations) is the max(T_i). print "max(T_i):\t", maxT_i print "avg(T_i):\t", avgT_i print "maxEff:\t\t", maxEff print "CommEff:\t", commEff, "(should be same as maxEff)" # the load balance for the loop. This is the sum of all efficiencies for # all processes, divided by the number of processes times the maxiumum # efficiency. This can be (and is) simplified, by summing the mean # computing times, and dividing by the max computing times. print "avgEff:\t\t", avgEff print "LB:\t\t", LB # the total time spent computing in the loop, serialized. Essentially, for each # iteration of the loop, get the max computing time in that loop. Add # those together. Because of overlapping iterations, this can be larger # than the actual time in the loop. If there were # no time spent in communication, this is how long the loop should take. print "T ideal:\t", totalMax # the micro load balance is the process with the highest computation time # divided by the ideal total loop execution time. print "microLB:\t", maxT_i / totalMax # the transfer term is the total time spent in the ideal loop divided by # the actual time spent in the loop. print "Transfer:\t", totalMax / T # finally, compute the efficiency. == LB * microLB * Transfer * IPC print "n:\t\t", LB * (maxT_i / totalMax) * (totalMax / T) * 1.0, "\n" print "---------------- JPython test script end -------------" if __name__ == "__main__": main()