mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
Cycles: A precious performance profile tool using rdstc
Signed-off-by: Haomai Wang <haomaiwang@gmail.com>
This commit is contained in:
parent
50de36edff
commit
dba078a165
221
src/common/Cycles.cc
Normal file
221
src/common/Cycles.cc
Normal file
@ -0,0 +1,221 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
|
||||
*
|
||||
* Author: Haomai Wang <haomaiwang@gmail.com>
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
/* Copyright (c) 2011-2014 Stanford University
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <errno.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "errno.h"
|
||||
#include "debug.h"
|
||||
#include "Initialize.h"
|
||||
#include "Cycles.h"
|
||||
|
||||
double Cycles::cycles_per_sec = 0;
|
||||
static Initialize _(Cycles::init);
|
||||
|
||||
/**
|
||||
* Perform once-only overall initialization for the Cycles class, such
|
||||
* as calibrating the clock frequency. This method is invoked automatically
|
||||
* during initialization, but it may be invoked explicitly by other modules
|
||||
* to ensure that initialization occurs before those modules initialize
|
||||
* themselves.
|
||||
*/
|
||||
void Cycles::init()
|
||||
{
|
||||
if (cycles_per_sec != 0)
|
||||
return;
|
||||
|
||||
// Compute the frequency of the fine-grained CPU timer: to do this,
|
||||
// take parallel time readings using both rdtsc and gettimeofday.
|
||||
// After 10ms have elapsed, take the ratio between these readings.
|
||||
|
||||
struct timeval start_time, stop_time;
|
||||
uint64_t start_cycles, stop_cycles, micros;
|
||||
double old_cycles;
|
||||
|
||||
// There is one tricky aspect, which is that we could get interrupted
|
||||
// between calling gettimeofday and reading the cycle counter, in which
|
||||
// case we won't have corresponding readings. To handle this (unlikely)
|
||||
// case, compute the overall result repeatedly, and wait until we get
|
||||
// two successive calculations that are within 0.1% of each other.
|
||||
old_cycles = 0;
|
||||
while (1) {
|
||||
if (gettimeofday(&start_time, NULL) != 0) {
|
||||
assert(0 == "couldn't read clock");
|
||||
}
|
||||
start_cycles = rdtsc();
|
||||
while (1) {
|
||||
if (gettimeofday(&stop_time, NULL) != 0) {
|
||||
assert(0 == "couldn't read clock");
|
||||
}
|
||||
stop_cycles = rdtsc();
|
||||
micros = (stop_time.tv_usec - start_time.tv_usec) +
|
||||
(stop_time.tv_sec - start_time.tv_sec)*1000000;
|
||||
if (micros > 10000) {
|
||||
cycles_per_sec = static_cast<double>(stop_cycles - start_cycles);
|
||||
cycles_per_sec = 1000000.0*cycles_per_sec/ static_cast<double>(micros);
|
||||
break;
|
||||
}
|
||||
}
|
||||
double delta = cycles_per_sec/1000.0;
|
||||
if ((old_cycles > (cycles_per_sec - delta)) &&
|
||||
(old_cycles < (cycles_per_sec + delta))) {
|
||||
return;
|
||||
}
|
||||
old_cycles = cycles_per_sec;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of CPU cycles per second.
|
||||
*/
|
||||
double Cycles::per_second()
|
||||
{
|
||||
return get_cycles_per_sec();
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an elapsed time measured in cycles, return a floating-point number
|
||||
* giving the corresponding time in seconds.
|
||||
* \param cycles
|
||||
* Difference between the results of two calls to rdtsc.
|
||||
* \param cycles_per_sec
|
||||
* Optional parameter to specify the frequency of the counter that #cycles
|
||||
* was taken from. Useful when converting a remote machine's tick counter
|
||||
* to seconds. The default value of 0 will use the local processor's
|
||||
* computed counter frequency.
|
||||
* \return
|
||||
* The time in seconds corresponding to cycles.
|
||||
*/
|
||||
double Cycles::to_seconds(uint64_t cycles, double cycles_per_sec)
|
||||
{
|
||||
if (cycles_per_sec == 0)
|
||||
cycles_per_sec = get_cycles_per_sec();
|
||||
return static_cast<double>(cycles)/cycles_per_sec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a time in seconds, return the number of cycles that it
|
||||
* corresponds to.
|
||||
* \param seconds
|
||||
* Time in seconds.
|
||||
* \param cycles_per_sec
|
||||
* Optional parameter to specify the frequency of the counter that #cycles
|
||||
* was taken from. Useful when converting a remote machine's tick counter
|
||||
* to seconds. The default value of 0 will use the local processor's
|
||||
* computed counter frequency.
|
||||
* \return
|
||||
* The approximate number of cycles corresponding to #seconds.
|
||||
*/
|
||||
uint64_t Cycles::from_seconds(double seconds, double cycles_per_sec)
|
||||
{
|
||||
if (cycles_per_sec == 0)
|
||||
cycles_per_sec = get_cycles_per_sec();
|
||||
return (uint64_t) (seconds*cycles_per_sec + 0.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an elapsed time measured in cycles, return an integer
|
||||
* giving the corresponding time in microseconds. Note: to_seconds()
|
||||
* is faster than this method.
|
||||
* \param cycles
|
||||
* Difference between the results of two calls to rdtsc.
|
||||
* \param cycles_per_sec
|
||||
* Optional parameter to specify the frequency of the counter that #cycles
|
||||
* was taken from. Useful when converting a remote machine's tick counter
|
||||
* to seconds. The default value of 0 will use the local processor's
|
||||
* computed counter frequency.
|
||||
* \return
|
||||
* The time in microseconds corresponding to cycles (rounded).
|
||||
*/
|
||||
uint64_t Cycles::to_microseconds(uint64_t cycles, double cycles_per_sec)
|
||||
{
|
||||
return to_nanoseconds(cycles, cycles_per_sec) / 1000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an elapsed time measured in cycles, return an integer
|
||||
* giving the corresponding time in nanoseconds. Note: to_seconds()
|
||||
* is faster than this method.
|
||||
* \param cycles
|
||||
* Difference between the results of two calls to rdtsc.
|
||||
* \param cycles_per_sec
|
||||
* Optional parameter to specify the frequency of the counter that #cycles
|
||||
* was taken from. Useful when converting a remote machine's tick counter
|
||||
* to seconds. The default value of 0 will use the local processor's
|
||||
* computed counter frequency.
|
||||
* \return
|
||||
* The time in nanoseconds corresponding to cycles (rounded).
|
||||
*/
|
||||
uint64_t Cycles::to_nanoseconds(uint64_t cycles, double cycles_per_sec)
|
||||
{
|
||||
if (cycles_per_sec == 0)
|
||||
cycles_per_sec = get_cycles_per_sec();
|
||||
return (uint64_t) (1e09*static_cast<double>(cycles)/cycles_per_sec + 0.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a number of nanoseconds, return an approximate number of
|
||||
* cycles for an equivalent time length.
|
||||
* \param ns
|
||||
* Number of nanoseconds.
|
||||
* \param cycles_per_sec
|
||||
* Optional parameter to specify the frequency of the counter that #cycles
|
||||
* was taken from. Useful when converting a remote machine's tick counter
|
||||
* to seconds. The default value of 0 will use the local processor's
|
||||
* computed counter frequency.
|
||||
* \return
|
||||
* The approximate number of cycles for the same time length.
|
||||
*/
|
||||
uint64_t
|
||||
Cycles::from_nanoseconds(uint64_t ns, double cycles_per_sec)
|
||||
{
|
||||
if (cycles_per_sec == 0)
|
||||
cycles_per_sec = get_cycles_per_sec();
|
||||
return (uint64_t) (static_cast<double>(ns)*cycles_per_sec/1e09 + 0.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Busy wait for a given number of microseconds.
|
||||
* Callers should use this method in most reasonable cases as opposed to
|
||||
* usleep for accurate measurements. Calling usleep may put the the processor
|
||||
* in a low power mode/sleep state which reduces the clock frequency.
|
||||
* So, each time the process/thread wakes up from usleep, it takes some time
|
||||
* to ramp up to maximum frequency. Thus meausrements often incur higher
|
||||
* latencies.
|
||||
* \param us
|
||||
* Number of microseconds.
|
||||
*/
|
||||
void
|
||||
Cycles::sleep(uint64_t us)
|
||||
{
|
||||
uint64_t stop = Cycles::rdtsc() + Cycles::from_nanoseconds(1000*us);
|
||||
while (Cycles::rdtsc() < stop);
|
||||
}
|
79
src/common/Cycles.h
Normal file
79
src/common/Cycles.h
Normal file
@ -0,0 +1,79 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
|
||||
*
|
||||
* Author: Haomai Wang <haomaiwang@gmail.com>
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
/* Copyright (c) 2011-2014 Stanford University
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef CEPH_CYCLES_H
|
||||
#define CEPH_CYCLES_H
|
||||
|
||||
|
||||
/**
|
||||
* This class provides static methods that read the fine-grain CPU
|
||||
* cycle counter and translate between cycle-level times and absolute
|
||||
* times.
|
||||
*/
|
||||
class Cycles {
|
||||
public:
|
||||
static void init();
|
||||
|
||||
/**
|
||||
* Return the current value of the fine-grain CPU cycle counter
|
||||
* (accessed via the RDTSC instruction).
|
||||
*/
|
||||
static __inline __attribute__((always_inline)) uint64_t rdtsc() {
|
||||
uint32_t lo, hi;
|
||||
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
|
||||
return (((uint64_t)hi << 32) | lo);
|
||||
}
|
||||
|
||||
static double per_second();
|
||||
static double to_seconds(uint64_t cycles, double cycles_per_sec = 0);
|
||||
static uint64_t from_seconds(double seconds, double cycles_per_sec = 0);
|
||||
static uint64_t to_microseconds(uint64_t cycles, double cycles_per_sec = 0);
|
||||
static uint64_t to_nanoseconds(uint64_t cycles, double cycles_per_sec = 0);
|
||||
static uint64_t from_nanoseconds(uint64_t ns, double cycles_per_sec = 0);
|
||||
static void sleep(uint64_t us);
|
||||
|
||||
private:
|
||||
Cycles();
|
||||
|
||||
/// Conversion factor between cycles and the seconds; computed by
|
||||
/// Cycles::init.
|
||||
static double cycles_per_sec;
|
||||
|
||||
/**
|
||||
* Returns the conversion factor between cycles in seconds, using
|
||||
* a mock value for testing when appropriate.
|
||||
*/
|
||||
static __inline __attribute__((always_inline)) double get_cycles_per_sec() {
|
||||
return cycles_per_sec;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CEPH_CYCLES_H
|
96
src/common/Initialize.h
Normal file
96
src/common/Initialize.h
Normal file
@ -0,0 +1,96 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
|
||||
*
|
||||
* Author: Haomai Wang <haomaiwang@gmail.com>
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
/* Copyright (c) 2011 Stanford University
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CEPH_INITIALIZE_H
|
||||
#define CEPH_INITIALIZE_H
|
||||
|
||||
/**
|
||||
* This class is used to manage once-only initialization that should occur
|
||||
* before main() is invoked, such as the creation of static variables. It
|
||||
* also provides a mechanism for handling dependencies (where one class
|
||||
* needs to perform its once-only initialization before another).
|
||||
*
|
||||
* The simplest way to use an Initialize object is to define a static
|
||||
* initialization method for a class, say Foo::init(). Then, declare
|
||||
* a static Initialize object in the class:
|
||||
* "static Initialize initializer(Foo::init);".
|
||||
* The result is that Foo::init will be invoked when the object is
|
||||
* constructed (before main() is invoked). Foo::init can create static
|
||||
* objects and perform any other once-only initialization needed by the
|
||||
* class. Furthermore, if some other class needs to ensure that Foo has
|
||||
* been initialized (e.g. as part of its own initialization) it can invoke
|
||||
* Foo::init directly (Foo::init should contain an internal guard so that
|
||||
* it only performs its functions once, even if invoked several times).
|
||||
*
|
||||
* There is also a second form of constructor for Initialize that causes a
|
||||
* new object to be dynamically allocated and assigned to a pointer, instead
|
||||
* of invoking a function. This form allows for the creation of static objects
|
||||
* that are never destructed (thereby avoiding issues with the order of
|
||||
* destruction).
|
||||
*/
|
||||
class Initialize {
|
||||
public:
|
||||
/**
|
||||
* This form of constructor causes its function argument to be invoked
|
||||
* when the object is constructed. When used with a static Initialize
|
||||
* object, this will cause #func to run before main() runs, so that
|
||||
* #func can perform once-only initialization.
|
||||
*
|
||||
* \param func
|
||||
* This function is invoked with no arguments when the object is
|
||||
* constructed. Typically the function will create static
|
||||
* objects and/or invoke other initialization functions. The
|
||||
* function should normally contain an internal guard so that it
|
||||
* only performs its initialization the first time it is invoked.
|
||||
*/
|
||||
Initialize(void (*func)()) {
|
||||
(*func)();
|
||||
}
|
||||
|
||||
/**
|
||||
* This form of constructor causes a new object of a particular class
|
||||
* to be constructed with a no-argument constructor and assigned to a
|
||||
* given pointer. This form is typically used with a static Initialize
|
||||
* object: the result is that the object will be created and assigned
|
||||
* to the pointer before main() runs.
|
||||
*
|
||||
* \param p
|
||||
* Pointer to an object of any type. If the pointer is NULL then
|
||||
* it is replaced with a pointer to a newly allocated object of
|
||||
* the given type.
|
||||
*/
|
||||
template<typename T>
|
||||
explicit Initialize(T*& p) {
|
||||
if (p == NULL) {
|
||||
p = new T;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CEPH_INITIALIZE_H
|
@ -73,7 +73,8 @@ libcommon_la_SOURCES = \
|
||||
common/bloom_filter.cc \
|
||||
common/linux_version.c \
|
||||
common/module.c \
|
||||
common/Readahead.cc
|
||||
common/Readahead.cc \
|
||||
common/Cycles.cc
|
||||
|
||||
# these should go out of libcommon
|
||||
libcommon_la_SOURCES += \
|
||||
@ -207,7 +208,8 @@ noinst_HEADERS += \
|
||||
common/linux_version.h \
|
||||
common/module.h \
|
||||
common/Continuation.h \
|
||||
common/Readahead.h
|
||||
common/Readahead.h \
|
||||
common/Cycles.h
|
||||
|
||||
noinst_LTLIBRARIES += libcommon.la
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user