benchmark
1.6.1
Toggle main menu visibility
Loading...
Searching...
No Matches
cycleclock.h
1
// ----------------------------------------------------------------------
2
// CycleClock
3
// A CycleClock tells you the current time in Cycles. The "time"
4
// is actually time since power-on. This is like time() but doesn't
5
// involve a system call and is much more precise.
6
//
7
// NOTE: Not all cpu/platform/kernel combinations guarantee that this
8
// clock increments at a constant rate or is synchronized across all logical
9
// cpus in a system.
10
//
11
// If you need the above guarantees, please consider using a different
12
// API. There are efforts to provide an interface which provides a millisecond
13
// granularity and implemented as a memory read. A memory read is generally
14
// cheaper than the CycleClock for many architectures.
15
//
16
// Also, in some out of order CPU implementations, the CycleClock is not
17
// serializing. So if you're trying to count at cycles granularity, your
18
// data might be inaccurate due to out of order instruction execution.
19
// ----------------------------------------------------------------------
20
21
#ifndef BENCHMARK_CYCLECLOCK_H_
22
#define BENCHMARK_CYCLECLOCK_H_
23
24
#include <cstdint>
25
26
#include "benchmark/benchmark.h"
27
#include "internal_macros.h"
28
29
#if defined(BENCHMARK_OS_MACOSX)
30
#include <mach/mach_time.h>
31
#endif
32
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
33
// with even ancient MSVC compilers), and when not possible the
34
// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some
35
// environments, <windows.h> and <intrin.h> have conflicting
36
// declarations of some other intrinsics, breaking compilation.
37
// Therefore, we simply declare __rdtsc ourselves. See also
38
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
39
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64)
40
extern
"C"
uint64_t __rdtsc();
41
#pragma intrinsic(__rdtsc)
42
#endif
43
44
#if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW)
45
#include <sys/time.h>
46
#include <time.h>
47
#endif
48
49
#ifdef BENCHMARK_OS_EMSCRIPTEN
50
#include <emscripten.h>
51
#endif
52
53
namespace
benchmark {
54
// NOTE: only i386 and x86_64 have been well tested.
55
// PPC, sparc, alpha, and ia64 are based on
56
// http://peter.kuscsik.com/wordpress/?p=14
57
// with modifications by m3b. See also
58
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
59
namespace
cycleclock {
60
// This should return the number of cycles since power-on. Thread-safe.
61
inline
BENCHMARK_ALWAYS_INLINE int64_t Now() {
62
#if defined(BENCHMARK_OS_MACOSX)
63
// this goes at the top because we need ALL Macs, regardless of
64
// architecture, to return the number of "mach time units" that
65
// have passed since startup. See sysinfo.cc where
66
// InitializeSystemInfo() sets the supposed cpu clock frequency of
67
// macs to the number of mach time units per second, not actual
68
// CPU clock frequency (which can change in the face of CPU
69
// frequency scaling). Also note that when the Mac sleeps, this
70
// counter pauses; it does not continue counting, nor does it
71
// reset to zero.
72
return
mach_absolute_time();
73
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
74
// this goes above x86-specific code because old versions of Emscripten
75
// define __x86_64__, although they have nothing to do with it.
76
return
static_cast<
int64_t
>
(emscripten_get_now() * 1e+6);
77
#elif defined(__i386__)
78
int64_t ret;
79
__asm__
volatile
(
"rdtsc"
:
"=A"
(ret));
80
return
ret;
81
#elif defined(__x86_64__) || defined(__amd64__)
82
uint64_t low, high;
83
__asm__
volatile
(
"rdtsc"
:
"=a"
(low),
"=d"
(high));
84
return
(high << 32) | low;
85
#elif defined(__powerpc__) || defined(__ppc__)
86
// This returns a time-base, which is not always precisely a cycle-count.
87
#if defined(__powerpc64__) || defined(__ppc64__)
88
int64_t tb;
89
asm
volatile
(
"mfspr %0, 268"
:
"=r"
(tb));
90
return
tb;
91
#else
92
uint32_t tbl, tbu0, tbu1;
93
asm
volatile
(
94
"mftbu %0\n"
95
"mftb %1\n"
96
"mftbu %2"
97
:
"=r"
(tbu0),
"=r"
(tbl),
"=r"
(tbu1));
98
tbl &= -
static_cast<
int32_t
>
(tbu0 == tbu1);
99
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed)
100
return
(
static_cast<
uint64_t
>
(tbu1) << 32) | tbl;
101
#endif
102
#elif defined(__sparc__)
103
int64_t tick;
104
asm
(
".byte 0x83, 0x41, 0x00, 0x00"
);
105
asm
(
"mov %%g1, %0"
:
"=r"
(tick));
106
return
tick;
107
#elif defined(__ia64__)
108
int64_t itc;
109
asm
(
"mov %0 = ar.itc"
:
"=r"
(itc));
110
return
itc;
111
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
112
// Older MSVC compilers (like 7.x) don't seem to support the
113
// __rdtsc intrinsic properly, so I prefer to use _asm instead
114
// when I know it will work. Otherwise, I'll use __rdtsc and hope
115
// the code is being compiled with a non-ancient compiler.
116
_asm rdtsc
117
#elif defined(COMPILER_MSVC) && defined(_M_ARM64)
118
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
119
// and https://reviews.llvm.org/D53115
120
int64_t virtual_timer_value;
121
virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT);
122
return
virtual_timer_value;
123
#elif defined(COMPILER_MSVC)
124
return
__rdtsc();
125
#elif defined(BENCHMARK_OS_NACL)
126
// Native Client validator on x86/x86-64 allows RDTSC instructions,
127
// and this case is handled above. Native Client validator on ARM
128
// rejects MRC instructions (used in the ARM-specific sequence below),
129
// so we handle it here. Portable Native Client compiles to
130
// architecture-agnostic bytecode, which doesn't provide any
131
// cycle counter access mnemonics.
132
133
// Native Client does not provide any API to access cycle counter.
134
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
135
// because is provides nanosecond resolution (which is noticable at
136
// least for PNaCl modules running on x86 Mac & Linux).
137
// Initialize to always return 0 if clock_gettime fails.
138
struct
timespec ts = {0, 0};
139
clock_gettime(CLOCK_MONOTONIC, &ts);
140
return
static_cast<
int64_t
>
(ts.tv_sec) * 1000000000 + ts.tv_nsec;
141
#elif defined(__aarch64__)
142
// System timer of ARMv8 runs at a different frequency than the CPU's.
143
// The frequency is fixed, typically in the range 1-50MHz. It can be
144
// read at CNTFRQ special register. We assume the OS has set up
145
// the virtual timer properly.
146
int64_t virtual_timer_value;
147
asm
volatile
(
"mrs %0, cntvct_el0"
:
"=r"
(virtual_timer_value));
148
return
virtual_timer_value;
149
#elif defined(__ARM_ARCH)
150
// V6 is the earliest arch that has a standard cyclecount
151
// Native Client validator doesn't allow MRC instructions.
152
#if (__ARM_ARCH >= 6)
153
uint32_t pmccntr;
154
uint32_t pmuseren;
155
uint32_t pmcntenset;
156
// Read the user mode perf monitor counter access permissions.
157
asm
volatile
(
"mrc p15, 0, %0, c9, c14, 0"
:
"=r"
(pmuseren));
158
if
(pmuseren & 1) {
// Allows reading perfmon counters for user mode code.
159
asm
volatile
(
"mrc p15, 0, %0, c9, c12, 1"
:
"=r"
(pmcntenset));
160
if
(pmcntenset & 0x80000000ul) {
// Is it counting?
161
asm
volatile
(
"mrc p15, 0, %0, c9, c13, 0"
:
"=r"
(pmccntr));
162
// The counter is set up to count every 64th cycle
163
return
static_cast<
int64_t
>
(pmccntr) * 64;
// Should optimize to << 6
164
}
165
}
166
#endif
167
struct
timeval tv;
168
gettimeofday(&tv,
nullptr
);
169
return
static_cast<
int64_t
>
(tv.tv_sec) * 1000000 + tv.tv_usec;
170
#elif defined(__mips__) || defined(__m68k__)
171
// mips apparently only allows rdtsc for superusers, so we fall
172
// back to gettimeofday. It's possible clock_gettime would be better.
173
struct
timeval tv;
174
gettimeofday(&tv,
nullptr
);
175
return
static_cast<
int64_t
>
(tv.tv_sec) * 1000000 + tv.tv_usec;
176
#elif defined(__loongarch__)
177
struct
timeval tv;
178
gettimeofday(&tv,
nullptr
);
179
return
static_cast<
int64_t
>
(tv.tv_sec) * 1000000 + tv.tv_usec;
180
#elif defined(__s390__)
// Covers both s390 and s390x.
181
// Return the CPU clock.
182
uint64_t tsc;
183
#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL)
184
// z/OS XL compiler HLASM syntax.
185
asm
(
" stck %0"
:
"=m"
(tsc) : :
"cc"
);
186
#else
187
asm
(
"stck %0"
:
"=Q"
(tsc) : :
"cc"
);
188
#endif
189
return
tsc;
190
#elif defined(__riscv)
// RISC-V
191
// Use RDTIME (and RDTIMEH on riscv32).
192
// RDCYCLE is a privileged instruction since Linux 6.6.
193
#if __riscv_xlen == 32
194
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
195
// This asm also includes the PowerPC overflow handling strategy, as above.
196
// Implemented in assembly because Clang insisted on branching.
197
asm
volatile
(
198
"rdtimeh %0\n"
199
"rdtime %1\n"
200
"rdtimeh %2\n"
201
"sub %0, %0, %2\n"
202
"seqz %0, %0\n"
203
"sub %0, zero, %0\n"
204
"and %1, %1, %0\n"
205
:
"=r"
(cycles_hi0),
"=r"
(cycles_lo),
"=r"
(cycles_hi1));
206
return
(
static_cast<
uint64_t
>
(cycles_hi1) << 32) | cycles_lo;
207
#else
208
uint64_t cycles;
209
asm
volatile
(
"rdtime %0"
:
"=r"
(cycles));
210
return
cycles;
211
#endif
212
#elif defined(__e2k__) || defined(__elbrus__)
213
struct
timeval tv;
214
gettimeofday(&tv,
nullptr
);
215
return
static_cast<
int64_t
>
(tv.tv_sec) * 1000000 + tv.tv_usec;
216
#else
217
// The soft failover to a generic implementation is automatic only for ARM.
218
// For other platforms the developer is expected to make an attempt to create
219
// a fast implementation and use generic version if nothing better is available.
220
#error You need to define CycleTimer for your OS and CPU
221
#endif
222
}
223
}
// end namespace cycleclock
224
}
// end namespace benchmark
225
226
#endif
// BENCHMARK_CYCLECLOCK_H_
src
cycleclock.h
Generated by
1.17.0