Cycles: A precious performance profile tool using rdstc

Signed-off-by: Haomai Wang <haomaiwang@gmail.com>
This commit is contained in:
Haomai Wang 2014-12-01 16:06:17 +08:00
parent 50de36edff
commit dba078a165
4 changed files with 400 additions and 2 deletions

221
src/common/Cycles.cc Normal file
View File

@ -0,0 +1,221 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
*
* Author: Haomai Wang <haomaiwang@gmail.com>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
/* Copyright (c) 2011-2014 Stanford University
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <errno.h>
#include <sys/time.h>
#include "errno.h"
#include "debug.h"
#include "Initialize.h"
#include "Cycles.h"
double Cycles::cycles_per_sec = 0;
static Initialize _(Cycles::init);
/**
* Perform once-only overall initialization for the Cycles class, such
* as calibrating the clock frequency. This method is invoked automatically
* during initialization, but it may be invoked explicitly by other modules
* to ensure that initialization occurs before those modules initialize
* themselves.
*/
void Cycles::init()
{
if (cycles_per_sec != 0)
return;
// Compute the frequency of the fine-grained CPU timer: to do this,
// take parallel time readings using both rdtsc and gettimeofday.
// After 10ms have elapsed, take the ratio between these readings.
struct timeval start_time, stop_time;
uint64_t start_cycles, stop_cycles, micros;
double old_cycles;
// There is one tricky aspect, which is that we could get interrupted
// between calling gettimeofday and reading the cycle counter, in which
// case we won't have corresponding readings. To handle this (unlikely)
// case, compute the overall result repeatedly, and wait until we get
// two successive calculations that are within 0.1% of each other.
old_cycles = 0;
while (1) {
if (gettimeofday(&start_time, NULL) != 0) {
assert(0 == "couldn't read clock");
}
start_cycles = rdtsc();
while (1) {
if (gettimeofday(&stop_time, NULL) != 0) {
assert(0 == "couldn't read clock");
}
stop_cycles = rdtsc();
micros = (stop_time.tv_usec - start_time.tv_usec) +
(stop_time.tv_sec - start_time.tv_sec)*1000000;
if (micros > 10000) {
cycles_per_sec = static_cast<double>(stop_cycles - start_cycles);
cycles_per_sec = 1000000.0*cycles_per_sec/ static_cast<double>(micros);
break;
}
}
double delta = cycles_per_sec/1000.0;
if ((old_cycles > (cycles_per_sec - delta)) &&
(old_cycles < (cycles_per_sec + delta))) {
return;
}
old_cycles = cycles_per_sec;
}
}
/**
* Return the number of CPU cycles per second.
*/
double Cycles::per_second()
{
return get_cycles_per_sec();
}
/**
* Given an elapsed time measured in cycles, return a floating-point number
* giving the corresponding time in seconds.
* \param cycles
* Difference between the results of two calls to rdtsc.
* \param cycles_per_sec
* Optional parameter to specify the frequency of the counter that #cycles
* was taken from. Useful when converting a remote machine's tick counter
* to seconds. The default value of 0 will use the local processor's
* computed counter frequency.
* \return
* The time in seconds corresponding to cycles.
*/
double Cycles::to_seconds(uint64_t cycles, double cycles_per_sec)
{
if (cycles_per_sec == 0)
cycles_per_sec = get_cycles_per_sec();
return static_cast<double>(cycles)/cycles_per_sec;
}
/**
* Given a time in seconds, return the number of cycles that it
* corresponds to.
* \param seconds
* Time in seconds.
* \param cycles_per_sec
* Optional parameter to specify the frequency of the counter that #cycles
* was taken from. Useful when converting a remote machine's tick counter
* to seconds. The default value of 0 will use the local processor's
* computed counter frequency.
* \return
* The approximate number of cycles corresponding to #seconds.
*/
uint64_t Cycles::from_seconds(double seconds, double cycles_per_sec)
{
if (cycles_per_sec == 0)
cycles_per_sec = get_cycles_per_sec();
return (uint64_t) (seconds*cycles_per_sec + 0.5);
}
/**
* Given an elapsed time measured in cycles, return an integer
* giving the corresponding time in microseconds. Note: to_seconds()
* is faster than this method.
* \param cycles
* Difference between the results of two calls to rdtsc.
* \param cycles_per_sec
* Optional parameter to specify the frequency of the counter that #cycles
* was taken from. Useful when converting a remote machine's tick counter
* to seconds. The default value of 0 will use the local processor's
* computed counter frequency.
* \return
* The time in microseconds corresponding to cycles (rounded).
*/
uint64_t Cycles::to_microseconds(uint64_t cycles, double cycles_per_sec)
{
return to_nanoseconds(cycles, cycles_per_sec) / 1000;
}
/**
* Given an elapsed time measured in cycles, return an integer
* giving the corresponding time in nanoseconds. Note: to_seconds()
* is faster than this method.
* \param cycles
* Difference between the results of two calls to rdtsc.
* \param cycles_per_sec
* Optional parameter to specify the frequency of the counter that #cycles
* was taken from. Useful when converting a remote machine's tick counter
* to seconds. The default value of 0 will use the local processor's
* computed counter frequency.
* \return
* The time in nanoseconds corresponding to cycles (rounded).
*/
uint64_t Cycles::to_nanoseconds(uint64_t cycles, double cycles_per_sec)
{
if (cycles_per_sec == 0)
cycles_per_sec = get_cycles_per_sec();
return (uint64_t) (1e09*static_cast<double>(cycles)/cycles_per_sec + 0.5);
}
/**
* Given a number of nanoseconds, return an approximate number of
* cycles for an equivalent time length.
* \param ns
* Number of nanoseconds.
* \param cycles_per_sec
* Optional parameter to specify the frequency of the counter that #cycles
* was taken from. Useful when converting a remote machine's tick counter
* to seconds. The default value of 0 will use the local processor's
* computed counter frequency.
* \return
* The approximate number of cycles for the same time length.
*/
uint64_t
Cycles::from_nanoseconds(uint64_t ns, double cycles_per_sec)
{
if (cycles_per_sec == 0)
cycles_per_sec = get_cycles_per_sec();
return (uint64_t) (static_cast<double>(ns)*cycles_per_sec/1e09 + 0.5);
}
/**
* Busy wait for a given number of microseconds.
* Callers should use this method in most reasonable cases as opposed to
* usleep for accurate measurements. Calling usleep may put the the processor
* in a low power mode/sleep state which reduces the clock frequency.
* So, each time the process/thread wakes up from usleep, it takes some time
* to ramp up to maximum frequency. Thus meausrements often incur higher
* latencies.
* \param us
* Number of microseconds.
*/
void
Cycles::sleep(uint64_t us)
{
uint64_t stop = Cycles::rdtsc() + Cycles::from_nanoseconds(1000*us);
while (Cycles::rdtsc() < stop);
}

79
src/common/Cycles.h Normal file
View File

@ -0,0 +1,79 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
*
* Author: Haomai Wang <haomaiwang@gmail.com>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
/* Copyright (c) 2011-2014 Stanford University
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef CEPH_CYCLES_H
#define CEPH_CYCLES_H
/**
* This class provides static methods that read the fine-grain CPU
* cycle counter and translate between cycle-level times and absolute
* times.
*/
class Cycles {
public:
static void init();
/**
* Return the current value of the fine-grain CPU cycle counter
* (accessed via the RDTSC instruction).
*/
static __inline __attribute__((always_inline)) uint64_t rdtsc() {
uint32_t lo, hi;
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
return (((uint64_t)hi << 32) | lo);
}
static double per_second();
static double to_seconds(uint64_t cycles, double cycles_per_sec = 0);
static uint64_t from_seconds(double seconds, double cycles_per_sec = 0);
static uint64_t to_microseconds(uint64_t cycles, double cycles_per_sec = 0);
static uint64_t to_nanoseconds(uint64_t cycles, double cycles_per_sec = 0);
static uint64_t from_nanoseconds(uint64_t ns, double cycles_per_sec = 0);
static void sleep(uint64_t us);
private:
Cycles();
/// Conversion factor between cycles and the seconds; computed by
/// Cycles::init.
static double cycles_per_sec;
/**
* Returns the conversion factor between cycles in seconds, using
* a mock value for testing when appropriate.
*/
static __inline __attribute__((always_inline)) double get_cycles_per_sec() {
return cycles_per_sec;
}
};
#endif // CEPH_CYCLES_H

96
src/common/Initialize.h Normal file
View File

@ -0,0 +1,96 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
*
* Author: Haomai Wang <haomaiwang@gmail.com>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
/* Copyright (c) 2011 Stanford University
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef CEPH_INITIALIZE_H
#define CEPH_INITIALIZE_H
/**
* This class is used to manage once-only initialization that should occur
* before main() is invoked, such as the creation of static variables. It
* also provides a mechanism for handling dependencies (where one class
* needs to perform its once-only initialization before another).
*
* The simplest way to use an Initialize object is to define a static
* initialization method for a class, say Foo::init(). Then, declare
* a static Initialize object in the class:
* "static Initialize initializer(Foo::init);".
* The result is that Foo::init will be invoked when the object is
* constructed (before main() is invoked). Foo::init can create static
* objects and perform any other once-only initialization needed by the
* class. Furthermore, if some other class needs to ensure that Foo has
* been initialized (e.g. as part of its own initialization) it can invoke
* Foo::init directly (Foo::init should contain an internal guard so that
* it only performs its functions once, even if invoked several times).
*
* There is also a second form of constructor for Initialize that causes a
* new object to be dynamically allocated and assigned to a pointer, instead
* of invoking a function. This form allows for the creation of static objects
* that are never destructed (thereby avoiding issues with the order of
* destruction).
*/
class Initialize {
public:
/**
* This form of constructor causes its function argument to be invoked
* when the object is constructed. When used with a static Initialize
* object, this will cause #func to run before main() runs, so that
* #func can perform once-only initialization.
*
* \param func
* This function is invoked with no arguments when the object is
* constructed. Typically the function will create static
* objects and/or invoke other initialization functions. The
* function should normally contain an internal guard so that it
* only performs its initialization the first time it is invoked.
*/
Initialize(void (*func)()) {
(*func)();
}
/**
* This form of constructor causes a new object of a particular class
* to be constructed with a no-argument constructor and assigned to a
* given pointer. This form is typically used with a static Initialize
* object: the result is that the object will be created and assigned
* to the pointer before main() runs.
*
* \param p
* Pointer to an object of any type. If the pointer is NULL then
* it is replaced with a pointer to a newly allocated object of
* the given type.
*/
template<typename T>
explicit Initialize(T*& p) {
if (p == NULL) {
p = new T;
}
}
};
#endif // CEPH_INITIALIZE_H

View File

@ -73,7 +73,8 @@ libcommon_la_SOURCES = \
common/bloom_filter.cc \
common/linux_version.c \
common/module.c \
common/Readahead.cc
common/Readahead.cc \
common/Cycles.cc
# these should go out of libcommon
libcommon_la_SOURCES += \
@ -207,7 +208,8 @@ noinst_HEADERS += \
common/linux_version.h \
common/module.h \
common/Continuation.h \
common/Readahead.h
common/Readahead.h \
common/Cycles.h
noinst_LTLIBRARIES += libcommon.la