from edu.uoregon.tau.perfdmf import Trial from edu.uoregon.tau.perfexplorer.rules import * from edu.uoregon.tau.perfexplorer.glue import * from java.util import * ################################################################### import re def sort_nicely( l ): """ Sort the given list in the way that humans expect. """ convert = lambda text: int(text) if text.isdigit() else text alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] l.sort( key=alphanum_key ) return l ################################################################### nonMPI = "Computation" MPI = "MPI" kernNonMPI = "Kernel Computation" kernMPI = "Kernel MPI" mapping = {kernNonMPI:"Communication Efficiency", nonMPI:"Communication Efficiency"} instructionsMetric = "PAPI_TOT_INS" cyclesMetric = "PAPI_TOT_CYC" instructions = 0 cycles = 0 kernInstructions = 0 kernCycles = 0 ################################################################### print "--------------- JPython test script start ------------" print "--- Looking for load imbalances --- " # load the trial print "loading the data..." Utilities.setSession("local") files = [] files.append("justtime.ppk") trial = DataSourceResult(DataSourceResult.PPK, files, False) print "\nProcs\t Type\t\t\t AVG\t MAX\t MIN\t STDEV\t AVG/MAX" trial.setIgnoreWarnings(True) # extract the non-callpath events from the trial extractor = ExtractNonCallpathEventOperation(trial) extracted = extractor.processData().get(0) mainEvent = extracted.getMainEvent() #print "Main Event: ", mainEvent split = DefaultResult(trial, False) init = "MPI_Init" final = "MPI_Finalize" # extract the non-MPI events from the trial for thread in extracted.getThreads(): # initialize split.putCalls(thread, nonMPI, 1) split.putSubroutines(thread, nonMPI, 0) split.putCalls(thread, MPI, 1) split.putSubroutines(thread, MPI, 0) split.putCalls(thread, kernNonMPI, 1) split.putSubroutines(thread, kernNonMPI, 0) split.putCalls(thread, kernMPI, 1) split.putSubroutines(thread, kernMPI, 0) for metric in extracted.getMetrics(): # initialize split.putExclusive(thread, nonMPI, metric, 0.0) split.putInclusive(thread, nonMPI, metric, 0.0) split.putInclusive(thread, MPI, metric, 0.0) split.putExclusive(thread, MPI, metric, 0.0) split.putExclusive(thread, kernNonMPI, metric, 0.0) split.putInclusive(thread, kernNonMPI, metric, 0.0) split.putInclusive(thread, kernMPI, metric, 0.0) split.putExclusive(thread, kernMPI, metric, 0.0) # get the total runtime for this thread total = extracted.getInclusive(thread, mainEvent, metric) kernTotal = total for event in extracted.getEvents(): # get the exclusive time for this event value = extracted.getExclusive(thread, event, metric) if event.startswith(MPI): # if MPI, add to MPI running total current = split.getExclusive(thread, MPI, metric) split.putExclusive(thread, MPI, metric, value + current) split.putInclusive(thread, MPI, metric, value + current) if event.startswith(init) or event.startswith(final): kernTotal = kernTotal - value else: current = split.getExclusive(thread, kernMPI, metric) split.putExclusive(thread, kernMPI, metric, value + current) split.putInclusive(thread, kernMPI, metric, value + current) #else: #current = split.getInclusive(thread, nonMPI, metric) #split.putExclusive(thread, nonMPI, metric, value + current) #split.putInclusive(thread, nonMPI, metric, value + current) # save the values which include all fuctions communication = split.getExclusive(thread, MPI, metric) computation = total - communication split.putInclusive(thread, nonMPI, metric, computation / total) split.putExclusive(thread, nonMPI, metric, computation / total ) split.putInclusive(thread, MPI, metric, communication / total ) split.putExclusive(thread, MPI, metric, communication / total ) #print thread, split.getExclusive(thread, nonMPI, metric) # save the values which ignore init, finalize communication = split.getExclusive(thread, kernMPI, metric) computation = kernTotal - communication split.putInclusive(thread, kernNonMPI, metric, computation / kernTotal) split.putExclusive(thread, kernNonMPI, metric, computation / kernTotal ) split.putInclusive(thread, kernMPI, metric, communication / kernTotal ) split.putExclusive(thread, kernMPI, metric, communication / kernTotal ) #print thread, split.getExclusive(thread, kernNonMPI, metric) if metric == instructionsMetric: kernInstructions = kernInstructions + kernTotal instructions = instructions + total if metric == cyclesMetric: kernCycles = kernCycles + kernTotal cycles = cycles + total #avgIPC = (instructions / cycles) #kernAvgIPC = (kernInstructions / kernCycles) avgInstructions = instructions / extracted.getThreads().size() kernAvgInstructions = kernInstructions / extracted.getThreads().size() # get basic statistics event = nonMPI metric = "TIME" statMaker = BasicStatisticsOperation(split, False) stats = statMaker.processData() stddevs = stats.get(BasicStatisticsOperation.STDDEV) means = stats.get(BasicStatisticsOperation.MEAN) totals = stats.get(BasicStatisticsOperation.TOTAL) maxs = stats.get(BasicStatisticsOperation.MAX) mins = stats.get(BasicStatisticsOperation.MIN) # get the ratio between stddev and total ratioMaker = RatioOperation(means, maxs) ratios = ratioMaker.processData().get(0) thread = 0 metric = "TIME" event = nonMPI mean = means.getExclusive(thread, event, metric) max = maxs.getExclusive(thread, event, metric) min = mins.getExclusive(thread, event, metric) stddev = stddevs.getExclusive(thread, event, metric) ratio = ratios.getExclusive(thread, event, metric) print "%d\t %s\t\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%" % (trial.getThreads().size(), event, mean*100, max*100, min*100, stddev*100, ratio*100) event = kernNonMPI mean = means.getExclusive(thread, event, metric) max = maxs.getExclusive(thread, event, metric) min = mins.getExclusive(thread, event, metric) stddev = stddevs.getExclusive(thread, event, metric) ratio = ratios.getExclusive(thread, event, metric) print "%d\t %s\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\t %.2f%%\n" % (trial.getThreads().size(), event, mean*100, max*100, min*100, stddev*100, ratio*100) print "Communication Efficiency (kernel only):\t%.3f" % max print "Load Balance (kernel only):\t\t%.3f" % ratio #print "Average IPC (kernel only):\t\t%.10f" % kernAvgIPC print "Total Instructions(kernel only):\t%.3f" % kernAvgInstructions print "\nNext Step: Computing Micro Load Imbalance.\n" print "Searching for loop events..." # get a list of the loop names metric = "TIME" loopPrefix = "loop [" loopNames = set() index = 0 for event in extracted.getEvents(): if event.find(loopPrefix) > -1: loopNames.add(event) print "Extracting callpath events..." # extract the callpath events extractor = ExtractCallpathEventOperation(trial) extracted = extractor.processData().get(0) print "Generating Statistics..." statMaker = BasicStatisticsOperation(extracted, False) stats = statMaker.processData() stddevs = stats.get(BasicStatisticsOperation.STDDEV) means = stats.get(BasicStatisticsOperation.MEAN) totals = stats.get(BasicStatisticsOperation.TOTAL) maxs = stats.get(BasicStatisticsOperation.MAX) mins = stats.get(BasicStatisticsOperation.MIN) print "Iterating over main loop..." print "Loop ID:\t RealCommEff\t uLB\t\t CommEff" # iterate over the iterations totalLoopTimeIdeal = 0 totalLoopTime = 0 totalMaxTi = 0 loopSet = {} for loopName in sort_nicely(list(loopNames)): # for each iteration, find all of the subroutines (path contains loop name) communicationTime = 0 for event in extracted.getEvents(): if event.find(loopName) > -1: tokens = event.split("=>") # for each MPI routine in the iteration, compute the T_ideal if len(tokens) > 1 and tokens[len(tokens)-1].find("MPI_") > -1: communicationTime = communicationTime + mins.getExclusive(0, event, metric) #print event, mins.getExclusive(0, event, metric) elif len(tokens) > 1 and tokens[len(tokens)-1].strip().find(loopName) > -1: loopTime = means.getInclusive(0, event, metric) maxTi = maxs.getInclusive(0, event, metric) loopTimeIdeal = loopTime - communicationTime #print loopName, loopTime, communicationTime, loopTimeIdeal realCommEff = loopTimeIdeal / loopTime uLB = loopTimeIdeal / maxTi commEff = realCommEff * uLB print "%s:\t %.5f\t %.5f\t %.5f" % (loopName, realCommEff, uLB, commEff) totalLoopTimeIdeal = totalLoopTimeIdeal + loopTimeIdeal totalLoopTime = totalLoopTime + loopTime totalMaxTi = totalMaxTi + maxTi #loopSet[loopName] = realCommEff, uLB, commEff realCommEff = totalLoopTimeIdeal / totalLoopTime uLB = totalLoopTimeIdeal / totalMaxTi #uLB = totalMaxTi / totalLoopTimeIdeal commEff = realCommEff * uLB print "\n\nLoop ID:\t RealCommEff\t uLB\t\t CommEff" print "Total: \t\t %.5f\t %.5f\t %.5f\n" % (realCommEff, uLB, commEff) print "---------------- JPython test script end -------------"