| Line | % of fetches | Source |
|---|---|---|
| 1 | // Copyright (c) 2014 University of Oregon | |
| 2 | // | |
| 3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
| 4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
| 5 | ||
| 6 | #pragma once | |
| 7 | ||
| 8 | #include <iostream> | |
| 9 | #include <sstream> | |
| 10 | #include <math.h> | |
| 11 | #include "apex_options.hpp" | |
| 12 | #include "apex_types.h" | |
| 13 | #include <chrono> | |
| 14 | #include "task_identifier.hpp" | |
| 15 | #if defined(APEX_HAVE_HPX) | |
| 16 | #include <hpx/util/hardware/timestamp.hpp> | |
| 17 | #endif | |
| 18 | ||
| 19 | #ifdef __INTEL_COMPILER | |
| 20 | #define CLOCK_TYPE high_resolution_clock | |
| 21 | #else | |
| 22 | #define CLOCK_TYPE steady_clock | |
| 23 | #endif | |
| 24 | ||
| 25 | namespace apex { | |
| 26 | ||
| 27 | enum struct reset_type { | |
| 28 | NONE, // not a reset event | |
| 29 | CURRENT, // reset the specified counter | |
| 30 | ALL // reset all counters | |
| 31 | }; | |
| 32 | ||
| 33 | class disabled_profiler_exception : public std::exception { | |
| 34 | virtual const char* what() const throw() { | |
| 35 | return "Disabled profiler."; | |
| 36 | } | |
| 37 | }; | |
| 38 | ||
| 39 | #ifndef APEX_USE_CLOCK_TIMESTAMP | |
| 40 | template<std::intmax_t clock_freq> | |
| 41 | struct rdtsc_clock { | |
| 42 | typedef unsigned long long rep; | |
| 43 | typedef std::ratio<1, clock_freq> period; | |
| 44 | typedef std::chrono::duration<rep, period> duration; | |
| 45 | typedef std::chrono::time_point<rdtsc_clock> time_point; | |
| 46 | static const bool is_steady = true; | |
| 47 | static time_point now() noexcept { | |
| 48 | #if defined(APEX_HAVE_HPX) | |
| 49 | return time_point(duration(hpx::util::hardware::timestamp())); | |
| 50 | #else | |
| 51 | unsigned lo, hi; | |
| 52 | asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); | |
| 53 | return time_point(duration(static_cast<rep>(hi) << 32 | lo)); | |
| 54 | #endif | |
| 55 | } | |
| 56 | }; | |
| 57 | #endif | |
| 58 | ||
| 59 | #ifdef APEX_USE_CLOCK_TIMESTAMP | |
| 60 | #define MYCLOCK std::chrono::CLOCK_TYPE | |
| 61 | #else | |
| 62 | typedef rdtsc_clock<1> OneHzClock; | |
| 63 | #define MYCLOCK OneHzClock | |
| 64 | #endif | |
| 65 | ||
| 66 | class profiler { | |
| 67 | public: | |
| 68 | MYCLOCK::time_point start; | |
| 69 | MYCLOCK::time_point end; | |
| 70 | #if APEX_HAVE_PAPI | |
| 71 | long long papi_start_values[8]; | |
| 72 | long long papi_stop_values[8]; | |
| 73 | #endif | |
| 74 | double value; | |
| 75 | double children_value; | |
| 76 | //apex_function_address action_address; | |
| 77 | //std::string * timer_name; | |
| 78 | //bool have_name; | |
| 79 | task_identifier * task_id; | |
| 80 | bool is_counter; | |
| 81 | bool is_resume; // for yield or resume | |
| 82 | reset_type is_reset; | |
| 83 | bool stopped; | |
| 84 | profiler(task_identifier * id, | |
| 85 | bool resume = false, | |
| 86 | reset_type reset = reset_type::NONE) : | |
| 87 | start(MYCLOCK::now()), | |
| 88 | #if APEX_HAVE_PAPI | |
| 89 | papi_start_values{0,0,0,0,0,0,0,0}, | |
| 90 | papi_stop_values{0,0,0,0,0,0,0,0}, | |
| 91 | #endif | |
| 92 | value(0.0), | |
| 93 | children_value(0.0), | |
| 94 | task_id(id), | |
| 95 | is_counter(false), | |
| 96 | is_resume(resume), | |
| 97 | is_reset(reset), stopped(false) {}; | |
| 98 | profiler(task_identifier * id, double value_) : | |
| 99 | start(MYCLOCK::now()), | |
| 100 | #if APEX_HAVE_PAPI | |
| 101 | papi_start_values{0,0,0,0,0,0,0,0}, | |
| 102 | papi_stop_values{0,0,0,0,0,0,0,0}, | |
| 103 | #endif | |
| 104 | value(value_), | |
| 105 | children_value(0.0), | |
| 106 | task_id(id), | |
| 107 | is_counter(true), | |
| 108 | is_resume(false), | |
| 109 | is_reset(reset_type::NONE), stopped(true) { }; | |
| 110 | //copy constructor | |
| 111 | profiler(profiler* in) : start(in->start), end(in->end) { | |
| 112 | #if APEX_HAVE_PAPI | |
| 113 | for (int i = 0 ; i < 8 ; i++) { | |
| 114 | papi_start_values[i] = in->papi_start_values[i]; | |
| 115 | papi_stop_values[i] = in->papi_stop_values[i]; | |
| 116 | } | |
| 117 | #endif | |
| 118 | value = in->elapsed(); | |
| 119 | children_value = in->children_value; | |
| 120 | task_id = new task_identifier(*in->task_id); | |
| 121 | is_counter = in->is_counter; | |
| 122 | is_resume = in->is_resume; // for yield or resume | |
| 123 | is_reset = in->is_reset; | |
| 124 | stopped = in->stopped; | |
| 125 | } | |
| 126 | ~profiler(void) { if (task_id != nullptr) delete task_id; }; | |
| 127 | // for "yield" support | |
| 128 | void stop(bool is_resume) { | |
| 129 | this->is_resume = is_resume; | |
| 130 | end = MYCLOCK::now(); | |
| 131 | stopped = true; | |
| 132 | }; | |
| 133 | void stop() { | |
| 134 | end = MYCLOCK::now(); | |
| 135 | stopped = true; | |
| 136 | }; | |
| 137 | double elapsed(void) { | |
| 138 | if(is_counter) { | |
| 139 | return value; | |
| 140 | } else { | |
| 141 | std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start); | |
| 142 | return time_span.count(); | |
| 143 | } | |
| 144 | } | |
| 145 | double exclusive_elapsed(void) { | |
| 146 | return elapsed() - children_value; | |
| 147 | } | |
| 148 | ||
| 149 | static inline profiler* get_disabled_profiler(void) { | |
| 150 | return disabled_profiler; | |
| 151 | } | |
| 152 | // default constructor for the dummy profiler | |
| 153 | profiler(void) {}; | |
| 154 | // dummy profiler to indicate that stop/yield should resume immediately | |
| 155 | static profiler* disabled_profiler; // initialized in profiler_listener.cpp | |
| 156 | ||
| 157 | /* This function returns 1/X, where "X" is the MHz rating of the CPU. */ | |
| 158 | static double get_cpu_mhz () { | |
| 159 | #ifdef APEX_USE_CLOCK_TIMESTAMP | |
| 160 | return 1.0; | |
| 161 | #else | |
| 162 | static double ticks_per_period = 0.0; | |
| 163 | if (ticks_per_period == 0.0) { | |
| 164 | typedef std::chrono::duration<double, typename MYCLOCK::period> CycleA; | |
| 165 | typedef std::chrono::duration<double, typename std::chrono::CLOCK_TYPE::period> CycleB; | |
| 166 | const int N = 100000000; | |
| 167 | auto t0a = MYCLOCK::now(); | |
| 168 | auto t0b = std::chrono::CLOCK_TYPE::now(); | |
| 169 | for (int j = 0; j < N; ++j) { | |
| 170 | #if !defined(_MSC_VER) | |
| 171 | asm volatile(""); | |
| 172 | #endif | |
| 173 | } | |
| 174 | auto t1a = MYCLOCK::now(); | |
| 175 | auto t1b = std::chrono::CLOCK_TYPE::now(); | |
| 176 | // Get the clock ticks per time period | |
| 177 | //std::cout << CycleA(t1a-t0a).count() << " 1MHz ticks seen." << std::endl; | |
| 178 | //std::cout << std::chrono::duration_cast<std::chrono::seconds>(CycleB(t1b-t0b)).count() << " Seconds? seen." << std::endl; | |
| 179 | ticks_per_period = CycleB(t1b-t0b)/CycleA(t1a-t0a); | |
| 180 | if (apex_options::use_screen_output()) { | |
| 181 | std::cout << "CPU is " << (1.0/ticks_per_period) << " Hz." << std::endl; | |
| 182 | } | |
| 183 | } | |
| 184 | return ticks_per_period; | |
| 185 | #endif | |
| 186 | } | |
| 187 | ||
| 188 | /* this is for OTF2 tracing. | |
| 189 | * We want a timestamp for the start of the trace. | |
| 190 | * We will also need one for the end of the trace. */ | |
| 191 | static MYCLOCK::time_point get_global_start(void) { | |
| 192 | static MYCLOCK::time_point global_now = MYCLOCK::now(); | |
| 193 | return global_now; | |
| 194 | } | |
| 195 | /* this is for getting the endpoint of the trace. */ | |
| 196 | static MYCLOCK::time_point get_global_end(void) { | |
| 197 | return MYCLOCK::now(); | |
| 198 | } | |
| 199 | static uint64_t time_point_to_nanoseconds(MYCLOCK::time_point tp) { | |
| 200 | auto value = tp.time_since_epoch(); | |
| 201 | uint64_t duration = std::chrono::duration_cast<std::chrono::nanoseconds>(value).count(); | |
| 202 | return duration; | |
| 203 | } | |
| 204 | double normalized_timestamp(void) { | |
| 205 | if(is_counter) { | |
| 206 | return value; | |
| 207 | } else { | |
| 208 | std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(start - get_global_start()); | |
| 209 | return time_span.count()*get_cpu_mhz(); | |
| 210 | } | |
| 211 | } | |
| 212 | }; | |
| 213 | ||
| 214 | } | |
| 215 | ||
| 216 |
Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.