from edu.uoregon.tau.perfexplorer.glue import * from edu.uoregon.tau.perfexplorer.client import PerfExplorerModel from java.util import * threshold = 10.0 callsCutoff = 1000.0 tauData = "" def getParameters(): global tauData global threshold global callsCutoff parameterMap = PerfExplorerModel.getModel().getScriptParameters() keys = parameterMap.keySet() tmp = parameterMap.get("tauData") if tmp != None: tauData = tmp print "Performance data: " + tauData else: print "TAU profile data path not specified... using current directory of profile.x.x.x files." tmp = parameterMap.get("threshold") if tmp != None: threshold = float(tmp) tmp = parameterMap.get("callsCutoff") if tmp != None: callsCutoff = float(tmp) print "Max Functions Threshold:\t", threshold print "Max Calls Threshold:\t", callsCutoff def loadFile(fileName): # load the trial files = [] files.append(fileName) input = None if fileName.endswith("ppk"): input = DataSourceResult(DataSourceResult.PPK, files, False) elif fileName.endswith("gprof"): input = DataSourceResult(DataSourceResult.GPROF, files, False) elif fileName.endswith("xml"): input = DataSourceResult(DataSourceResult.SNAP, files, False) else: input = DataSourceResult(DataSourceResult.TAUPROFILE, files, False) return input def main(): global filename global tauData global threshold global callsCutoff print "--------------- JPython test script start ------------" print "doing cluster test" # get the parameters getParameters() # load the data result = loadFile(tauData) result.setIgnoreWarnings(True) # set the metric, type we are interested in metric = result.getTimeMetric() type = result.EXCLUSIVE # split communication and computation splitter = SplitCommunicationComputationOperation(result) outputs = splitter.processData() computation = outputs.get(SplitCommunicationComputationOperation.COMPUTATION) communication = outputs.get(SplitCommunicationComputationOperation.COMMUNICATION) #computation = result # do some basic statistics first stats = BasicStatisticsOperation(computation) means = stats.processData().get(BasicStatisticsOperation.MEAN) # then, using the stats, find the top X event names reducer = TopXEvents(means, metric, type, 20) reduced = reducer.processData().get(0) # then, extract those events from the actual data tmpEvents = ArrayList(reduced.getEvents()) reducer = ExtractEventOperation(computation, tmpEvents) reduced = reducer.processData().get(0) # cluster the data clusterer = DBSCANOperation(reduced, metric, type, 1.0) clusterResult = clusterer.processData() print "Estimated value for k:", str(clusterResult.get(0).getThreads().size()) clusterIDs = clusterResult.get(4) # split the trial into the clusters splitter = SplitTrialClusters(result, clusterResult) clusters = splitter.processData() functions = "function-list.txt" gprof = False functionSet = set() for input in clusters: # extract the non-callpath data print "Extracting non-callpath data..." input.setIgnoreWarnings(True) extractor = ExtractNonCallpathEventOperation(input) extracted = extractor.processData().get(0) # extract computation code (remove MPI) myEvents = ArrayList() print "Filtering out MPI calls..." #print "And functions called less than 1000 times..." for event in extracted.getEvents(): if not event.startswith("MPI_"): #if extracted.getCalls(extracted.getThreads().first(), event) > 999: myEvents.add(event) extractor = ExtractEventOperation(extracted, myEvents) extracted = extractor.processData().get(0) # generate statistics print "Generating stats..." doStats = BasicStatisticsOperation(extracted, False) mean = doStats.processData().get(BasicStatisticsOperation.MEAN) for type in AbstractResult.EXCLUSIVE, AbstractResult.INCLUSIVE: # get the top X events print "Extracting top events..." mean.setIgnoreWarnings(True) topper = TopXEvents(mean, mean.getTimeMetric(), type, threshold) topped = topper.processData().get(0) # put the top X events names in a file for event in topped.getEvents(): shortEvent = event # fix gprof names if gprof: shortEvent = shortEvent.upper() if shortEvent.startswith("__MODULE"): shortEvent = shortEvent.replace("__MODULE","MODULE") shortEvent = shortEvent.replace("_NMOD_","::") # fix TAU names else: shortEvent = Utilities.shortenEventName(event) percentage = topped.getDataPoint(0,event,topped.getTimeMetric(),type) / mean.getInclusive(0,mean.getMainEvent(),mean.getTimeMetric()) * 100.0 calls = topped.getCalls(0,event) if calls < callsCutoff: if calls == 0.0: print "%00.2f%%\t %d\t %0.5f%%\t %s" % (percentage, calls, 0.0, shortEvent) else: print "%00.2f%%\t %d\t %0.5f%%\t %s" % (percentage, calls, percentage / float(calls), shortEvent) functionSet.add(shortEvent) myFile = open(functions, 'w') for shortEvent in functionSet: myFile.write(shortEvent + "\n") myFile.close() print "---------------- JPython test script end -------------" if __name__ == "__main__": main()