Line | % of fetches | Source |
---|---|---|
1 | // Copyright (c) 2014 University of Oregon | |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #pragma once | |
7 | ||
8 | #include <iostream> | |
9 | #include <sstream> | |
10 | #include <math.h> | |
11 | #include "apex_options.hpp" | |
12 | #include "apex_types.h" | |
13 | #include <chrono> | |
14 | #include "task_identifier.hpp" | |
15 | #if defined(APEX_HAVE_HPX) | |
16 | #include <hpx/util/hardware/timestamp.hpp> | |
17 | #endif | |
18 | ||
19 | #ifdef __INTEL_COMPILER | |
20 | #define CLOCK_TYPE high_resolution_clock | |
21 | #else | |
22 | #define CLOCK_TYPE steady_clock | |
23 | #endif | |
24 | ||
25 | namespace apex { | |
26 | ||
27 | enum struct reset_type { | |
28 | NONE, // not a reset event | |
29 | CURRENT, // reset the specified counter | |
30 | ALL // reset all counters | |
31 | }; | |
32 | ||
33 | class disabled_profiler_exception : public std::exception { | |
34 | virtual const char* what() const throw() { | |
35 | return "Disabled profiler."; | |
36 | } | |
37 | }; | |
38 | ||
39 | #ifndef APEX_USE_CLOCK_TIMESTAMP | |
40 | template<std::intmax_t clock_freq> | |
41 | struct rdtsc_clock { | |
42 | typedef unsigned long long rep; | |
43 | typedef std::ratio<1, clock_freq> period; | |
44 | typedef std::chrono::duration<rep, period> duration; | |
45 | typedef std::chrono::time_point<rdtsc_clock> time_point; | |
46 | static const bool is_steady = true; | |
47 | static time_point now() noexcept { | |
48 | #if defined(APEX_HAVE_HPX) | |
49 | return time_point(duration(hpx::util::hardware::timestamp())); | |
50 | #else | |
51 | unsigned lo, hi; | |
52 | asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); | |
53 | return time_point(duration(static_cast<rep>(hi) << 32 | lo)); | |
54 | #endif | |
55 | } | |
56 | }; | |
57 | #endif | |
58 | ||
59 | #ifdef APEX_USE_CLOCK_TIMESTAMP | |
60 | #define MYCLOCK std::chrono::CLOCK_TYPE | |
61 | #else | |
62 | typedef rdtsc_clock<1> OneHzClock; | |
63 | #define MYCLOCK OneHzClock | |
64 | #endif | |
65 | ||
66 | class profiler { | |
67 | public: | |
68 | MYCLOCK::time_point start; | |
69 | MYCLOCK::time_point end; | |
70 | #if APEX_HAVE_PAPI | |
71 | long long papi_start_values[8]; | |
72 | long long papi_stop_values[8]; | |
73 | #endif | |
74 | double value; | |
75 | double children_value; | |
76 | //apex_function_address action_address; | |
77 | //std::string * timer_name; | |
78 | //bool have_name; | |
79 | task_identifier * task_id; | |
80 | bool is_counter; | |
81 | bool is_resume; // for yield or resume | |
82 | reset_type is_reset; | |
83 | bool stopped; | |
84 | profiler(task_identifier * id, | |
85 | bool resume = false, | |
86 | reset_type reset = reset_type::NONE) : | |
87 | start(MYCLOCK::now()), | |
88 | #if APEX_HAVE_PAPI | |
89 | papi_start_values{0,0,0,0,0,0,0,0}, | |
90 | papi_stop_values{0,0,0,0,0,0,0,0}, | |
91 | #endif | |
92 | value(0.0), | |
93 | children_value(0.0), | |
94 | task_id(id), | |
95 | is_counter(false), | |
96 | is_resume(resume), | |
97 | is_reset(reset), stopped(false) {}; | |
98 | profiler(task_identifier * id, double value_) : | |
99 | start(MYCLOCK::now()), | |
100 | #if APEX_HAVE_PAPI | |
101 | papi_start_values{0,0,0,0,0,0,0,0}, | |
102 | papi_stop_values{0,0,0,0,0,0,0,0}, | |
103 | #endif | |
104 | value(value_), | |
105 | children_value(0.0), | |
106 | task_id(id), | |
107 | is_counter(true), | |
108 | is_resume(false), | |
109 | is_reset(reset_type::NONE), stopped(true) { }; | |
110 | //copy constructor | |
111 | profiler(profiler* in) : start(in->start), end(in->end) { | |
112 | #if APEX_HAVE_PAPI | |
113 | for (int i = 0 ; i < 8 ; i++) { | |
114 | papi_start_values[i] = in->papi_start_values[i]; | |
115 | papi_stop_values[i] = in->papi_stop_values[i]; | |
116 | } | |
117 | #endif | |
118 | value = in->elapsed(); | |
119 | children_value = in->children_value; | |
120 | task_id = new task_identifier(*in->task_id); | |
121 | is_counter = in->is_counter; | |
122 | is_resume = in->is_resume; // for yield or resume | |
123 | is_reset = in->is_reset; | |
124 | stopped = in->stopped; | |
125 | } | |
126 | ~profiler(void) { if (task_id != nullptr) delete task_id; }; | |
127 | // for "yield" support | |
128 | void stop(bool is_resume) { | |
129 | this->is_resume = is_resume; | |
130 | end = MYCLOCK::now(); | |
131 | stopped = true; | |
132 | }; | |
133 | void stop() { | |
134 | end = MYCLOCK::now(); | |
135 | stopped = true; | |
136 | }; | |
137 | double elapsed(void) { | |
138 | if(is_counter) { | |
139 | return value; | |
140 | } else { | |
141 | std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start); | |
142 | return time_span.count(); | |
143 | } | |
144 | } | |
145 | double exclusive_elapsed(void) { | |
146 | return elapsed() - children_value; | |
147 | } | |
148 | ||
149 | static inline profiler* get_disabled_profiler(void) { | |
150 | return disabled_profiler; | |
151 | } | |
152 | // default constructor for the dummy profiler | |
153 | profiler(void) {}; | |
154 | // dummy profiler to indicate that stop/yield should resume immediately | |
155 | static profiler* disabled_profiler; // initialized in profiler_listener.cpp | |
156 | ||
157 | /* This function returns 1/X, where "X" is the MHz rating of the CPU. */ | |
158 | static double get_cpu_mhz () { | |
159 | #ifdef APEX_USE_CLOCK_TIMESTAMP | |
160 | return 1.0; | |
161 | #else | |
162 | static double ticks_per_period = 0.0; | |
163 | if (ticks_per_period == 0.0) { | |
164 | typedef std::chrono::duration<double, typename MYCLOCK::period> CycleA; | |
165 | typedef std::chrono::duration<double, typename std::chrono::CLOCK_TYPE::period> CycleB; | |
166 | const int N = 100000000; | |
167 | auto t0a = MYCLOCK::now(); | |
168 | auto t0b = std::chrono::CLOCK_TYPE::now(); | |
169 | for (int j = 0; j < N; ++j) { | |
170 | #if !defined(_MSC_VER) | |
171 | asm volatile(""); | |
172 | #endif | |
173 | } | |
174 | auto t1a = MYCLOCK::now(); | |
175 | auto t1b = std::chrono::CLOCK_TYPE::now(); | |
176 | // Get the clock ticks per time period | |
177 | //std::cout << CycleA(t1a-t0a).count() << " 1MHz ticks seen." << std::endl; | |
178 | //std::cout << std::chrono::duration_cast<std::chrono::seconds>(CycleB(t1b-t0b)).count() << " Seconds? seen." << std::endl; | |
179 | ticks_per_period = CycleB(t1b-t0b)/CycleA(t1a-t0a); | |
180 | if (apex_options::use_screen_output()) { | |
181 | std::cout << "CPU is " << (1.0/ticks_per_period) << " Hz." << std::endl; | |
182 | } | |
183 | } | |
184 | return ticks_per_period; | |
185 | #endif | |
186 | } | |
187 | ||
188 | /* this is for OTF2 tracing. | |
189 | * We want a timestamp for the start of the trace. | |
190 | * We will also need one for the end of the trace. */ | |
191 | static MYCLOCK::time_point get_global_start(void) { | |
192 | static MYCLOCK::time_point global_now = MYCLOCK::now(); | |
193 | return global_now; | |
194 | } | |
195 | /* this is for getting the endpoint of the trace. */ | |
196 | static MYCLOCK::time_point get_global_end(void) { | |
197 | return MYCLOCK::now(); | |
198 | } | |
199 | static uint64_t time_point_to_nanoseconds(MYCLOCK::time_point tp) { | |
200 | auto value = tp.time_since_epoch(); | |
201 | uint64_t duration = std::chrono::duration_cast<std::chrono::nanoseconds>(value).count(); | |
202 | return duration; | |
203 | } | |
204 | double normalized_timestamp(void) { | |
205 | if(is_counter) { | |
206 | return value; | |
207 | } else { | |
208 | std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(start - get_global_start()); | |
209 | return time_span.count()*get_cpu_mhz(); | |
210 | } | |
211 | } | |
212 | }; | |
213 | ||
214 | } | |
215 | ||
216 |
Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.