bluestore: Revert "os/bluestore: Add CoDel to BlueStore for Bufferbloat mitigation"

Merge caused: https://tracker.ceph.com/issues/55433

Signed-off-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Samuel Just 2022-04-25 12:29:49 -07:00 committed by Samuel Just
parent a9cb584d3f
commit f6a9ee72d7
15 changed files with 0 additions and 1037 deletions

View File

@ -1,41 +0,0 @@
============================================
BlueStore Bufferbloat Mitigation Using CoDel
============================================
Introduction
------------
Bufferbloat happens when a frontend buffer too much data to a backend.
This can introduce latency spikes to the backend and compromise the
request schedulability of the frontend.
BlueStore has the bufferbloat problem due to its large queue. All
write requests are submitted immediately to BlueStore to achieve high
performance. However, this can compromise request schedulability in OSD.
As a solution, the CoDel algorithm is implemented in the BlueStore as
an admission control system to control the amount of transaction
submitted to BlueStore. This mechanism will negatively impact the
throughput of BlueStore. However, a tradeoff parameter has been introduced
to control BlueStore throughput loss versus BlueStore latency decrease.
Configurations
--------------
CoDel can be enabled using "*bluestore_codel*" config. The other important
config that needs to be set is "*bluestore_codel_throughput_latency_tradeoff*".
This config adjust the tradeoff between BlueStore throughput loss and
BlueStore latency decrease. This parameter defines the amount of throughput
loss in MB/s for one ms decrease in BlueStore latency. For example, a value
of 5 means that we are willing to lose maximum of 5 MB/s of throughput for
every 1 ms decrease in BlueStore latency.
Experiments
-----------
For measuring the impact of BlueStore CoDel on BlueStore, we measured the
transaction latency inside the BlueStore (BlueStore latency) and BlueStore
throughput. We compared this measurements with measurements from Vanilla BlueStore.
These experiments shows that:
1. The BlueStore CoDel can decrease the BlueStore latency by small and controllable
impact on throughput.
2. The BlueStore CoDel can react to workload changes to keep the desired tradeoff
between latency and throughput.

View File

@ -1,45 +0,0 @@
overrides:
thrashosds:
bdev_inject_crash: 2
bdev_inject_crash_probability: .5
ceph:
fs: xfs
conf:
osd:
osd objectstore: bluestore
bluestore block size: 96636764160
debug bluestore: 20
debug bluefs: 20
debug rocksdb: 10
bluestore fsck on mount: true
bluestore allocator: bitmap
# lower the full ratios since we can fill up a 100gb osd so quickly
mon osd full ratio: .9
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95
# this doesn't work with failures bc the log writes are not atomic across the two backends
# bluestore bluefs env mirror: true
bdev enable discard: true
bdev async discard: true
bluestore codel: true
ceph-deploy:
fs: xfs
bluestore: yes
conf:
osd:
osd objectstore: bluestore
bluestore block size: 96636764160
debug bluestore: 20
debug bluefs: 20
debug rocksdb: 10
bluestore fsck on mount: true
# lower the full ratios since we can fill up a 100gb osd so quickly
mon osd full ratio: .9
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95
bdev enable discard: true
bdev async discard: true
bluestore codel: true

View File

@ -6312,72 +6312,3 @@ options:
default: 0
services:
- mgr
- name: bluestore_codel
type: bool
level: advanced
desc: enable/disable bluestore SlowFastCodel
default: false
with_legacy: true
- name: bluestore_codel_throughput_latency_tradeoff
type: float
level: advanced
desc: adjust the tradeoff between throughput and bluestore latency in SlowFastCodel
long_desc: This parameter defines the amount of throughput loss (MB/s) for one ms
decrease in bluestore latency. (a value of 5 means that we are willing to lose
maximum of 5 MB/s of throughput for every 1 ms decrease in bluestore latency)
default: 5
with_legacy: true
- name: bluestore_codel_initial_target_latency
type: float
level: advanced
desc: initial target latency for SlowFastCodel in ms
default: 5.0
with_legacy: true
- name: bluestore_codel_slow_interval
type: float
level: advanced
desc: the interval of slow loop in SlowFastCodel in ms (this parameter should be larger that 'bluestore_codel_fast_interval')
default: 500.0
with_legacy: true
- name: bluestore_codel_fast_interval
type: float
level: advanced
desc: the interval of the fast loop in SlowFastCodel in ms
default: 50.0
with_legacy: true
- name: bluestore_codel_min_target_latency
type: float
level: advanced
desc: the minimum possible target latency in SlowFastCodel in ms
default: 1.0
with_legacy: true
- name: bluestore_codel_max_target_latency
type: float
level: advanced
desc: the maximum possible target latency in SlowFastCodel in ms
default: 1000.0
with_legacy: true
- name: bluestore_codel_initial_budget_bytes
type: size
level: advanced
desc: the initial bluestore throttle budget in SlowFastCodel
default: 100_K
with_legacy: true
- name: bluestore_codel_min_budget_bytes
type: size
level: advanced
desc: the minimum bluestore throttle budget in SlowFastCodel
default: 100_K
with_legacy: true
- name: bluestore_codel_budget_increment_bytes
type: size
level: advanced
desc: the increment size for opening the bluestore throttle in SlowFastCodel
default: 10_K
with_legacy: true
- name: bluestore_codel_regression_history_size
type: int
level: advanced
desc: number of the slow interval throughput and latency samples that SlowFastCodel keeps for regression
default: 100
with_legacy: true

View File

@ -1,127 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#pragma once
#include <iostream>
#include <vector>
#include <cmath>
#include <boost/numeric/ublas/matrix.hpp>
#define Z_P 2.33 // z score for 99th percentile
namespace ceph {
/***
* Calculate the inverse of a 2x2 matrix.
* @param matrix<double>& m, an square 2x2 matrix
* @return the inverse of the m (m^-1)
*/
static boost::numeric::ublas::matrix<double>
matrix_inverse(boost::numeric::ublas::matrix<double> &m) {
assert(m.size1() == m.size2() &&
"Can only calculate the inverse of square matrices");
assert(m.size1() == 2 && m.size2() == 2 && "Only for 2x2 matrices");
boost::numeric::ublas::matrix<double> m_inverse(2, 2);
const double a = m(0, 0);
const double b = m(0, 1);
const double c = m(1, 0);
const double d = m(1, 1);
const double determinant = 1.0 / ((a * d) - (b * c));
m_inverse(0, 0) = d * determinant;
m_inverse(0, 1) = -b * determinant;
m_inverse(1, 0) = -c * determinant;
m_inverse(1, 1) = a * determinant;
return m_inverse;
}
/***
* Find a logarithmic function in form of "y = a + b * ln(x)" which fits
* the given points (x_values and y_values).
* @param std::vector<double> x_values, x values for sample points
* @param std::vector<double> y_values, y values for sample points
* @param double theta[2], holds the a and b as output (theta[0] = a and theta[1] = b)
*/
static void regression(
const std::vector<double> &x_values,
const std::vector<double> &y_values,
double theta[2]) {
assert(x_values.size() == y_values.size() &&
"x and y values vectors should have a same size.");
const int n = x_values.size();
boost::numeric::ublas::matrix<double> y_m(n, 1);
for (int i = 0; i < n; i++) {
y_m(i, 0) = y_values[i];
}
boost::numeric::ublas::scalar_matrix<double> sm(n, 2, 1);
boost::numeric::ublas::matrix<double> x_new_m(sm);
for (int i = 0; i < n; i++) {
x_new_m(i, 0) = 1;
x_new_m(i, 1) = std::log(x_values[i]);
}
boost::numeric::ublas::matrix<double> x_new_trans_m = boost::numeric::ublas::trans(
x_new_m);
boost::numeric::ublas::matrix<double> x_new_trans_dot_x_new_m = boost::numeric::ublas::prod(
x_new_trans_m, x_new_m);
boost::numeric::ublas::matrix<double> temp_1_m = matrix_inverse(
x_new_trans_dot_x_new_m);
boost::numeric::ublas::matrix<double> temp_2_m = boost::numeric::ublas::prod(
x_new_trans_m, y_m);
boost::numeric::ublas::matrix<double> theta_m = boost::numeric::ublas::prod(
temp_1_m, temp_2_m);
theta[0] = theta_m(0, 0);
theta[1] = theta_m(1, 0);
}
/***
* Finds the x location on a fitted logarithmic curve on sample points where
* the slope is equal to target_slope
* @param x_values, x values for sample points
* @param y_values, y values for sample points
* @param target_slope, the slope that we are looking for
* @return the x location where the slope of the curve is target_slope
*/
static double find_slope_on_curve(
const std::vector<double> &x_values,
const std::vector<double> &y_values,
double target_slope) {
assert(x_values.size() == y_values.size() &&
"x and y values vectors should have a same size.");
assert(target_slope != 0 &&
"The target slope of zero will result to a inf x, try a nonzero value.");
assert(target_slope >= 0 &&
"The target slope for a logarithmic function should be positive.");
double theta[2]; // theta[0] + theta[1] * ln(x)
regression(x_values, y_values,
theta); // find the logarithmic function using regression
double target_x = theta[1] /
target_slope; // find the x where the slope is close to target_slope
return target_x;
}
/***
* Finds the mu and std parameters of the lognormal distribution from its mode
* and x boundaries.
* @param mode, the mode of the distribution.
* @param min_x, x lower boundary of distribution (zero percentile)
* @param max_x, x upper boundary of distribution (99th percentile)
* @param params, holds the calculated distribution parameters (mu and std) as
* output (params[0] = mu and params[1] = std)
*/
static void
find_log_normal_dist_params(double mode, double min_x, double max_x,
double params[2]) {
assert(min_x < max_x && "The min_x should be smaller than max_x");
assert(mode >= min_x && mode < max_x &&
"The mode should be between min_x and max_x");
double max_x_normalized = max_x - min_x;
double mode_normalized = mode - min_x;
double std_dev = (-Z_P + std::sqrt(
Z_P * Z_P + 4 * std::log(max_x_normalized) -
4 * std::log(mode_normalized))) / 2;
double mu = std::log(max_x_normalized) - Z_P * std_dev;
params[0] = mu;
params[1] = std_dev;
}
}

View File

@ -55,7 +55,6 @@ set(alien_store_srcs
${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueStore.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/simple_bitmap.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/bluestore_types.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueStoreSlowFastCoDel.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/fastbmap_allocator_impl.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/FreelistManager.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/HybridAllocator.cc

View File

@ -29,7 +29,6 @@ if(WITH_BLUESTORE)
bluestore/BlueStore.cc
bluestore/simple_bitmap.cc
bluestore/bluestore_types.cc
bluestore/BlueStoreSlowFastCoDel.cc
bluestore/fastbmap_allocator_impl.cc
bluestore/FreelistManager.cc
bluestore/StupidAllocator.cc

View File

@ -4601,15 +4601,6 @@ BlueStore::BlueStore(CephContext *cct,
_init_logger();
cct->_conf.add_observer(this);
set_cache_shards(1);
if ( cct->_conf->bluestore_codel) {
codel = std::make_unique<BlueStoreSlowFastCoDel>(
cct, [this](int64_t x) mutable {
this->throttle.reset_kv_throttle_max(x);
},
[this]() mutable {
return this->throttle.get_kv_throttle_current();
});
}
}
BlueStore::~BlueStore()
@ -4675,17 +4666,6 @@ const char **BlueStore::get_tracked_conf_keys() const
"bluestore_warn_on_no_per_pool_omap",
"bluestore_warn_on_no_per_pg_omap",
"bluestore_max_defer_interval",
"bluestore_codel",
"bluestore_codel_slow_interval",
"bluestore_codel_fast_interval",
"bluestore_codel_initial_target_latency",
"bluestore_codel_min_target_latency",
"bluestore_codel_max_target_latency",
"bluestore_codel_throughput_latency_tradeoff",
"bluestore_codel_initial_budget_bytes",
"bluestore_codel_min_budget_bytes",
"bluestore_codel_budget_increment_bytes",
"bluestore_codel_regression_history_size",
NULL
};
return KEYS;
@ -4744,9 +4724,6 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
changed.count("bluestore_throttle_deferred_bytes") ||
changed.count("bluestore_throttle_trace_rate")) {
throttle.reset_throttle(conf);
if (codel) {
codel->reset_bluestore_budget();
}
}
if (changed.count("bluestore_max_defer_interval")) {
if (bdev) {
@ -4759,21 +4736,6 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
changed.count("osd_memory_expected_fragmentation")) {
_update_osd_memory_options();
}
if (changed.count("bluestore_codel") ||
changed.count("bluestore_codel_slow_interval") ||
changed.count("bluestore_codel_fast_interval") ||
changed.count("bluestore_codel_initial_target_latency") ||
changed.count("bluestore_codel_min_target_latency") ||
changed.count("bluestore_codel_max_target_latency") ||
changed.count("bluestore_codel_throughput_latency_tradeoff") ||
changed.count("bluestore_codel_initial_budget_bytes") ||
changed.count("bluestore_codel_min_budget_bytes") ||
changed.count("bluestore_codel_budget_increment_bytes") ||
changed.count("bluestore_codel_regression_history_size")) {
if (codel) {
codel->on_config_changed(cct);
}
}
}
void BlueStore::_set_compression()
@ -12612,9 +12574,6 @@ void BlueStore::_txc_state_proc(TransContext *txc)
case TransContext::STATE_KV_DONE:
throttle.log_state_latency(*txc, logger, l_bluestore_state_kv_done_lat);
if (codel) {
codel->update_from_txc_info(txc->txc_state_proc_start, txc->bytes);
}
if (txc->deferred_txn) {
txc->set_state(TransContext::STATE_DEFERRED_QUEUED);
_deferred_queue(txc);
@ -14086,7 +14045,6 @@ int BlueStore::queue_transactions(
logger->inc(l_bluestore_txc);
// execute (start)
txc->txc_state_proc_start = mono_clock::now();
_txc_state_proc(txc);
if (bdev->is_smr()) {

View File

@ -51,7 +51,6 @@
#include "bluestore_types.h"
#include "BlueFS.h"
#include "common/EventTrace.h"
#include "BlueStoreSlowFastCoDel.h"
#ifdef WITH_BLKIN
#include "common/zipkin_trace.h"
@ -1725,7 +1724,6 @@ public:
uint64_t seq = 0;
ceph::mono_clock::time_point start;
ceph::mono_clock::time_point last_stamp;
ceph::mono_clock::time_point txc_state_proc_start;
uint64_t last_nid = 0; ///< if non-zero, highest new nid we allocated
uint64_t last_blobid = 0; ///< if non-zero, highest new blobid we allocated
@ -1902,16 +1900,8 @@ public:
trace_period_mcs = rate > 0 ? floor((1/rate) * 1000000.0) : 0;
#endif
}
int64_t get_kv_throttle_current() {
return throttle_bytes.get_current();
}
void reset_kv_throttle_max(int64_t m) {
throttle_bytes.reset_max(m);
}
} throttle;
std::unique_ptr<BlueStoreSlowFastCoDel> codel;
typedef boost::intrusive::list<
TransContext,
boost::intrusive::member_hook<

View File

@ -1,272 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#include "BlueStoreSlowFastCoDel.h"
#include "common/regression_utils.h"
BlueStoreSlowFastCoDel::BlueStoreSlowFastCoDel(
CephContext *_cct,
std::function<void(int64_t)> _bluestore_budget_reset_callback,
std::function<int64_t()> _get_kv_throttle_current) :
fast_timer(_cct, fast_timer_lock),
slow_timer(_cct, slow_timer_lock),
bluestore_budget_reset_callback(_bluestore_budget_reset_callback),
get_kv_throttle_current(_get_kv_throttle_current) {
on_config_changed(_cct);
}
BlueStoreSlowFastCoDel::~BlueStoreSlowFastCoDel() {
{
std::lock_guard l1{fast_timer_lock};
fast_timer.cancel_all_events();
fast_timer.shutdown();
}
{
std::lock_guard l2{slow_timer_lock};
slow_timer.cancel_all_events();
slow_timer.shutdown();
}
regression_throughput_history.clear();
regression_target_latency_history.clear();
}
void BlueStoreSlowFastCoDel::update_from_txc_info(
ceph::mono_clock::time_point txc_start_time,
uint64_t txc_bytes) {
std::lock_guard l(register_lock);
ceph::mono_clock::time_point now = ceph::mono_clock::now();
int64_t latency = std::chrono::nanoseconds(now - txc_start_time).count();
if (activated && max_queue_length < get_kv_throttle_current()) {
max_queue_length = get_kv_throttle_current();
}
if (min_latency == INITIAL_LATENCY_VALUE || latency < min_latency) {
min_latency = latency;
}
slow_interval_txc_cnt++;
slow_interval_registered_bytes += txc_bytes;
}
void BlueStoreSlowFastCoDel::on_min_latency_violation() {
if (target_latency > 0) {
double diff = (double) (target_latency - min_latency);
auto error_ratio = std::abs(diff) / min_latency;
if (error_ratio > 0.5) {
error_ratio = 0.5;
}
bluestore_budget = std::max(bluestore_budget * (1 - error_ratio),
min_bluestore_budget * 1.0);
}
}
void BlueStoreSlowFastCoDel::on_no_violation() {
if (bluestore_budget < max_queue_length * 1.5) {
bluestore_budget = bluestore_budget + bluestore_budget_increment;
}
}
void BlueStoreSlowFastCoDel::on_config_changed(CephContext *cct) {
{
std::lock_guard l(register_lock);
activated = cct->_conf->bluestore_codel;
target_slope = cct->_conf->bluestore_codel_throughput_latency_tradeoff;
slow_interval = ((int64_t) cct->_conf->bluestore_codel_slow_interval) *
1000 * 1000;
initial_fast_interval = ((int64_t)
cct->_conf->bluestore_codel_fast_interval) * 1000 * 1000;
initial_target_latency = ((int64_t)
cct->_conf->bluestore_codel_initial_target_latency) * 1000 * 1000;
min_target_latency = ((int64_t)
cct->_conf->bluestore_codel_min_target_latency) * 1000 * 1000;
max_target_latency = ((int64_t)
cct->_conf->bluestore_codel_max_target_latency) * 1000 * 1000;
initial_bluestore_budget = cct->_conf->bluestore_codel_initial_budget_bytes;
min_bluestore_budget = cct->_conf->bluestore_codel_min_budget_bytes;
bluestore_budget_increment =
cct->_conf->bluestore_codel_budget_increment_bytes;
regression_history_size =
cct->_conf->bluestore_codel_regression_history_size;
bluestore_budget = initial_bluestore_budget;
min_bluestore_budget = initial_bluestore_budget;
max_queue_length = min_bluestore_budget;
fast_interval = initial_fast_interval;
target_latency = initial_target_latency;
min_latency = INITIAL_LATENCY_VALUE;
slow_interval_registered_bytes = 0;
regression_throughput_history.clear();
regression_target_latency_history.clear();
slow_interval_start = ceph::mono_clock::zero();
}
{
std::lock_guard l1{fast_timer_lock};
fast_timer.cancel_all_events();
fast_timer.init();
}
_fast_interval_process();
{
std::lock_guard l2{slow_timer_lock};
slow_timer.cancel_all_events();
slow_timer.init();
}
_slow_interval_process();
}
void BlueStoreSlowFastCoDel::reset_bluestore_budget() {
if (activated) {
bluestore_budget = std::max(min_bluestore_budget, bluestore_budget);
bluestore_budget_reset_callback(bluestore_budget);
}
}
void BlueStoreSlowFastCoDel::_fast_interval_process() {
std::lock_guard l(register_lock);
if (target_latency != INITIAL_LATENCY_VALUE &&
min_latency != INITIAL_LATENCY_VALUE) {
if (activated) {
if (_check_latency_violation()) {
// min latency violation
violation_count++;
_update_interval();
on_min_latency_violation(); // handle the violation
} else {
// no latency violation
violation_count = 0;
fast_interval = initial_fast_interval;
on_no_violation();
}
bluestore_budget = std::max(min_bluestore_budget, bluestore_budget);
bluestore_budget_reset_callback(bluestore_budget);
}
// reset interval
min_latency = INITIAL_LATENCY_VALUE;
on_fast_interval_finished();
}
auto codel_ctx = new LambdaContext(
[this](int r) {
_fast_interval_process();
});
auto interval_duration = std::chrono::nanoseconds(fast_interval);
fast_timer.add_event_after(interval_duration, codel_ctx);
}
void BlueStoreSlowFastCoDel::_slow_interval_process() {
std::lock_guard l(register_lock);
ceph::mono_clock::time_point now = ceph::mono_clock::now();
if (activated && !ceph::mono_clock::is_zero(slow_interval_start)
&& slow_interval_txc_cnt > 0) {
double time_sec = nanosec_to_sec(
std::chrono::nanoseconds(now - slow_interval_start).count());
double slow_interval_throughput =
(slow_interval_registered_bytes * 1.0) / time_sec;
slow_interval_throughput = slow_interval_throughput / (1024.0 * 1024.0);
regression_target_latency_history.push_back(
nanosec_to_millisec(target_latency));
regression_throughput_history.push_back(slow_interval_throughput);
if (regression_target_latency_history.size() > regression_history_size) {
regression_target_latency_history.erase(
regression_target_latency_history.begin());
regression_throughput_history.erase(
regression_throughput_history.begin());
}
std::vector<double> targets;
std::vector<double> throughputs;
double target_ms = nanosec_to_millisec(initial_target_latency);
// If there is sufficient number of points, use the regression to find the
// target_ms. Otherwise, target_ms will be initial_target_latency
if (regression_target_latency_history.size() >= regression_history_size) {
target_ms = ceph::find_slope_on_curve(
regression_target_latency_history,
regression_throughput_history,
target_slope);
}
target_latency_without_noise = millisec_to_nanosec(target_ms);
target_latency_without_noise = std::max(target_latency_without_noise,
min_target_latency);
target_latency_without_noise = std::min(target_latency_without_noise,
max_target_latency);
target_ms = nanosec_to_millisec(target_latency_without_noise);
// add log_normal noise
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(seed);
double dist_params[2];
double rnd_std_dev = 5;
ceph::find_log_normal_dist_params(
target_ms,
nanosec_to_millisec(min_target_latency),
target_ms * rnd_std_dev,
dist_params);
std::lognormal_distribution<double> distribution(dist_params[0],
dist_params[1]);
target_latency = millisec_to_nanosec(distribution(generator));
target_latency += min_target_latency;
if (target_latency < millisec_to_nanosec(target_ms)) {
std::uniform_real_distribution<> distr(0, 0.5);
target_latency = target_latency +
(target_latency - millisec_to_nanosec(target_ms)) *
distr(generator);
}
if (target_latency != INITIAL_LATENCY_VALUE) {
target_latency = std::max(target_latency, min_target_latency);
target_latency = std::min(target_latency, max_target_latency);
}
on_slow_interval_finished();
}
slow_interval_start = ceph::mono_clock::now();
slow_interval_registered_bytes = 0;
slow_interval_txc_cnt = 0;
max_queue_length = min_bluestore_budget;
auto codel_ctx = new LambdaContext(
[this](int r) {
_slow_interval_process();
});
auto interval_duration = std::chrono::nanoseconds(slow_interval);
slow_timer.add_event_after(interval_duration, codel_ctx);
}
/**
* check if the min latency violate the target
* @return true if min latency violate the target, false otherwise
*/
bool BlueStoreSlowFastCoDel::_check_latency_violation() {
if (target_latency != INITIAL_LATENCY_VALUE &&
min_latency != INITIAL_LATENCY_VALUE) {
if (min_latency > target_latency) {
return true;
}
}
return false;
}
void BlueStoreSlowFastCoDel::_update_interval() {
auto sqrt = (int) std::round(std::sqrt(violation_count));
fast_interval = initial_fast_interval / sqrt;
if (fast_interval <= 0) {
fast_interval = 1000;
}
}
int64_t BlueStoreSlowFastCoDel::get_bluestore_budget() {
return bluestore_budget;
}
int64_t BlueStoreSlowFastCoDel::get_target_latency() {
return target_latency;
}

View File

@ -1,128 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#pragma once
#include <iostream>
#include "include/Context.h"
#include "common/Timer.h"
#include "common/ceph_time.h"
class BlueStoreSlowFastCoDel {
public:
BlueStoreSlowFastCoDel(
CephContext *_cct,
std::function<void(int64_t)> _bluestore_budget_reset_callback,
std::function<int64_t()> _get_kv_throttle_current);
virtual ~BlueStoreSlowFastCoDel();
void on_config_changed(CephContext *cct);
void reset_bluestore_budget();
void update_from_txc_info(
ceph::mono_clock::time_point txc_start_time,
uint64_t txc_bytes);
int64_t get_bluestore_budget();
int64_t get_target_latency();
bool is_activated();
protected:
static const int64_t INITIAL_LATENCY_VALUE = -1;
/* config values */
// Config value 'bluestore_codel',true if SlowFastCodel is activated
bool activated = false;
// Config value 'bluestore_codel_fast_interval', Initial interval for fast loop
int64_t initial_fast_interval = INITIAL_LATENCY_VALUE;
// Config value 'bluestore_codel_initial_target_latency', Initial target latency
// to start the algorithm
int64_t initial_target_latency = INITIAL_LATENCY_VALUE;
// Config value 'bluestore_codel_slow_interval', the interval for the slow loop
int64_t slow_interval = INITIAL_LATENCY_VALUE;
// Config value 'bluestore_codel_min_target_latency', min possible value for target
int64_t min_target_latency = INITIAL_LATENCY_VALUE; // in ns
// Config value 'bluestore_codel_max_target_latency', max possible value for target
int64_t max_target_latency = INITIAL_LATENCY_VALUE; // in ns
// Config value 'bluestore_codel_throughput_latency_tradeoff', define the
// tradeoff between throughput and latency (MB/s loss for every 1ms latency drop)
double target_slope = 5;
// Config value 'bluestore_codel_regression_history_size', regression history size
int64_t regression_history_size = 100;
// Config value 'bluestore_codel_min_budget_bytes', the minimum bluestore
// throttle budget
int64_t min_bluestore_budget = 102400;
// Config value 'bluestore_codel_initial_budget_bytes', the initial bluestore
// throttle budget
int64_t initial_bluestore_budget = 102400;
// Config value 'bluestore_codel_budget_increment_bytes', the increment size
// for opening the bluestore throttle
int64_t bluestore_budget_increment = 102400;
/* internal state variables */
// current interval for the fast loop
int64_t fast_interval = INITIAL_LATENCY_VALUE;
// current target latency that fast loop is using
int64_t target_latency = INITIAL_LATENCY_VALUE;
int64_t target_latency_without_noise = INITIAL_LATENCY_VALUE;
// min latency in the current fast interval
int64_t min_latency = INITIAL_LATENCY_VALUE;
int64_t violation_count = 0;
ceph::mutex fast_timer_lock = ceph::make_mutex("CoDel::fast_timer_lock");
ceph::mutex slow_timer_lock = ceph::make_mutex("CoDel::slow_timer_lock");
ceph::mutex register_lock = ceph::make_mutex("CoDel::register_lock");
SafeTimer fast_timer; // fast loop timer
SafeTimer slow_timer; // slow loop timer
// marks the start of the current slow interval
ceph::mono_clock::time_point slow_interval_start = ceph::mono_clock::zero();
// amount of bytes that has been processed in current slow interval
int64_t slow_interval_registered_bytes = 0;
// number of transactions that has been processed in current slow interval
int64_t slow_interval_txc_cnt = 0;
// target latency history for regression
std::vector<double> regression_target_latency_history;
// throughput history for regression
std::vector<double> regression_throughput_history;
int64_t bluestore_budget = 102400; // current bluestore throttle budget
// maximum amount of inflight data in current slow interval
int64_t max_queue_length = 102400;
std::function<void(int64_t)> bluestore_budget_reset_callback;
std::function<int64_t(void)> get_kv_throttle_current;
void on_min_latency_violation();
void on_no_violation();
virtual void on_fast_interval_finished() {}
virtual void on_slow_interval_finished() {}
private:
bool _check_latency_violation();
void _update_interval();
void _fast_interval_process();
void _slow_interval_process();
template<typename T>
double millisec_to_nanosec(T ms) {
return ms * 1000.0 * 1000.0;
}
template<typename T>
double nanosec_to_millisec(T ns) {
return ns / (1000.0 * 1000.0);
}
template<typename T>
double nanosec_to_sec(T ns) {
return ns / (1000.0 * 1000.0 * 1000.0);
}
};

View File

@ -155,11 +155,6 @@ add_executable(unittest_random
add_ceph_unittest(unittest_random)
target_link_libraries(unittest_random Boost::random)
# unittest_regression_utils
add_executable(unittest_regression_utils test_regression_utils.cc)
add_ceph_unittest(unittest_regression_utils)
target_link_libraries(unittest_regression_utils ceph-common global)
# unittest_throttle
add_executable(unittest_throttle
Throttle.cc

View File

@ -1,89 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include <gtest/gtest.h>
#include <vector>
#include <cmath>
#include "common/regression_utils.h"
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/io.hpp>
using namespace boost::numeric::ublas;
std::vector<double> generate_rand_vector(int size, int max_value) {
std::srand(std::time(0));
std::vector<double> rand_vec;
for (int i = 0; i < size; i++) {
double rand_value = std::rand() % max_value;
rand_vec.push_back(rand_value);
}
return rand_vec;
}
matrix<double> generate_rand_matrix(int size1, int size2, int max_value) {
std::srand(std::time(0));
matrix<double> rand_m(size1, size2);
for (int i = 0; i < size1; i++) {
for (int j = 0; j < size2; j++) {
double rand_value = std::rand() % max_value;
rand_m(i, j) = rand_value;
}
}
return rand_m;
}
std::vector<double> exp_vector(std::vector<double> x) {
std::vector<double> exp_vec;
for (int i = 0; i < x.size(); i++) {
exp_vec.push_back(std::exp(x[i]));
}
return exp_vec;
}
bool is_almost_equal(double x1, double x2, double precision) {
if (std::abs(x1 - x2) < precision) {
return true;
}
return false;
}
TEST(matrix_op, matrix_inverse) {
int matrix_size = 2; // has to be 2x2
matrix<double> random_square_m = generate_rand_matrix(matrix_size, matrix_size, 1000);
matrix<double> random_square_m_inv = ceph::matrix_inverse(random_square_m);
// the inverse matrix should have the same size
ASSERT_EQ(random_square_m_inv.size1(), random_square_m.size1());
ASSERT_EQ(random_square_m_inv.size2(), random_square_m.size2());
matrix<double> matrix_prod = prod(random_square_m, random_square_m_inv);
// the product should be an identity matrix
for ( int i = 0; i < matrix_prod.size1(); i++){
for (int j = 0; j < matrix_prod.size2(); j++){
if (i == j) {
ASSERT_TRUE(is_almost_equal(matrix_prod(i, j), 1, 1e-9)); // i == j -> 1
} else {
ASSERT_TRUE(is_almost_equal(matrix_prod(i, j), 0, 1e-9)); // i <> j -> 0
}
}
}
}
TEST(regression, log_regression) {
// y = ln(x)
std::vector<double> y = generate_rand_vector(200, 100);
std::vector<double> x = exp_vector(y);
double theta[2]; // y = theta[0] + theta[1] * ln(x)
ceph::regression(x, y, theta);
ASSERT_TRUE(is_almost_equal(theta[0], 0, 1e-9)); // theta[0] = 0
ASSERT_TRUE(is_almost_equal(theta[1], 1, 1e-9)); // theta[1] = 1
}
TEST(regression, find_slope_location) {
// y = ln(x)
std::vector<double> y = generate_rand_vector(200, 100);
std::vector<double> x = exp_vector(y);
double target_slope = 5;
double x_target = ceph::find_slope_on_curve(x, y, target_slope);
ASSERT_TRUE(is_almost_equal(x_target, 0.2, 1e-9)); // y'(0.2) = 5
}

View File

@ -5,4 +5,3 @@ add_executable(unittest_lfnindex
add_ceph_unittest(unittest_lfnindex)
target_link_libraries(unittest_lfnindex os global)
add_subdirectory(bluestore)

View File

@ -1,15 +0,0 @@
# unittest_slow_fast_codel
add_executable(unittest_slow_fast_codel
TestBlueStoreSlowFastCoDel.cc
)
add_ceph_unittest(unittest_slow_fast_codel)
target_link_libraries(unittest_slow_fast_codel
rados_test_stub
librados
global
radostest-cxx
GTest::GTest
ceph-common
os
global
)

View File

@ -1,191 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include <filesystem>
#include <iostream>
#include <unistd.h>
#include <mutex>
#include <cmath>
#include <vector>
#include <condition_variable>
#include <cmath>
#include <cstdlib>
#include "gtest/gtest.h"
#include "include/Context.h"
#include "common/ceph_time.h"
#include "os/bluestore/BlueStoreSlowFastCoDel.h"
static int64_t milliseconds_to_nanoseconds(int64_t ms) {
return ms * 1000.0 * 1000.0;
}
static double nanoseconds_to_milliseconds(int64_t ms) {
return ms / (1000.0 * 1000.0);
}
class BlueStoreSlowFastCoDelMock : public BlueStoreSlowFastCoDel {
public:
BlueStoreSlowFastCoDelMock(
CephContext *_cct,
std::function<void(int64_t)> _bluestore_budget_reset_callback,
std::function<int64_t()> _get_kv_throttle_current,
std::mutex &_iteration_mutex,
std::condition_variable &_iteration_cond,
int64_t _target_latency,
int64_t _fast_interval,
int64_t _slow_interval,
double _target_slope
) : BlueStoreSlowFastCoDel(_cct, _bluestore_budget_reset_callback,
_get_kv_throttle_current),
iteration_mutex(_iteration_mutex), iteration_cond(_iteration_cond),
test_target_latency(_target_latency), test_fast_interval(_fast_interval),
test_slow_interval(_slow_interval), test_target_slope(_target_slope) {
init_test();
}
void init_test() {
std::lock_guard l(register_lock);
activated = true;
target_slope = test_target_slope;
slow_interval = test_slow_interval;
initial_fast_interval = test_fast_interval;
min_target_latency = milliseconds_to_nanoseconds(1);
initial_target_latency = test_target_latency;
max_target_latency = milliseconds_to_nanoseconds(500);
initial_bluestore_budget = 100 * 1024;
min_bluestore_budget = 10 * 1024;
bluestore_budget_increment = 1024;
regression_history_size = 5;
bluestore_budget = initial_bluestore_budget;
min_bluestore_budget = initial_bluestore_budget;
max_queue_length = min_bluestore_budget;
fast_interval = initial_fast_interval;
target_latency = initial_target_latency;
min_latency = INITIAL_LATENCY_VALUE;
slow_interval_registered_bytes = 0;
regression_throughput_history.clear();
regression_target_latency_history.clear();
slow_interval_start = ceph::mono_clock::zero();
}
std::vector <int64_t> target_latency_vector;
protected:
std::mutex &iteration_mutex;
std::condition_variable &iteration_cond;
int64_t test_target_latency;
int64_t test_fast_interval;
int64_t test_slow_interval;
double test_target_slope;
void on_fast_interval_finished() override {
std::unique_lock <std::mutex> locker(iteration_mutex);
iteration_cond.notify_one();
}
void on_slow_interval_finished() override {
target_latency_vector.push_back(target_latency);
}
};
class TestSlowFastCoDel : public ::testing::Test {
public:
CephContext *ceph_context = nullptr;
BlueStoreSlowFastCoDelMock *slow_fast_codel = nullptr;
int64_t test_throttle_budget = 0;
std::mutex iteration_mutex;
std::condition_variable iteration_cond;
int64_t target_latency = milliseconds_to_nanoseconds(50);
int64_t fast_interval = milliseconds_to_nanoseconds(100);
int64_t slow_interval = milliseconds_to_nanoseconds(400);
double target_slope = 1;
std::vector <int64_t> target_latency_vector;
std::vector <int64_t> txc_size_vector;
TestSlowFastCoDel() {}
~TestSlowFastCoDel() {}
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override {
ceph_context = (new CephContext(CEPH_ENTITY_TYPE_ANY))->get();
}
void create_bluestore_slow_fast_codel() {
slow_fast_codel = new BlueStoreSlowFastCoDelMock(
ceph_context,
[this](int64_t x) mutable {
this->test_throttle_budget = x;
},
[this]() mutable {
return this->test_throttle_budget;
},
iteration_mutex,
iteration_cond,
target_latency,
fast_interval,
slow_interval,
target_slope);
}
void TearDown() override {
if (slow_fast_codel)
delete slow_fast_codel;
}
void test_codel() {
int64_t max_iterations = 50;
int iteration_timeout = 1; // 1 sec
int txc_num = 4;
for (int iteration = 0; iteration < max_iterations; iteration++) {
std::unique_lock <std::mutex> locker(iteration_mutex);
bool violation = iteration % 2 == 1;
auto budget_tmp = test_throttle_budget;
auto target = slow_fast_codel->get_target_latency();
double target_throughput =
(target_slope * nanoseconds_to_milliseconds(target_latency)) *
std::log(nanoseconds_to_milliseconds(target) * 1.0);
int64_t txc_size =
(nanoseconds_to_milliseconds(slow_interval) *
target_throughput) /
(1000 * txc_num * (slow_interval / fast_interval));
txc_size *= 1024 * 1024;
txc_size_vector.push_back(txc_size);
target_latency_vector.push_back(target);
for (int i = 0; i < txc_num; i++) {
auto time = ceph::mono_clock::now();
if (violation) {
int rand_ms = std::rand() % 1000 + 1000;
int64_t time_diff = milliseconds_to_nanoseconds(rand_ms);
time = time - std::chrono::nanoseconds(target + time_diff);
}
slow_fast_codel->update_from_txc_info(time, txc_size);
}
if (iteration_cond.wait_for(
locker, std::chrono::seconds(iteration_timeout)) ==
std::cv_status::timeout) {
ASSERT_TRUE(false) << "Test timeout.";
return;
}
if (violation) {
ASSERT_LT(test_throttle_budget, budget_tmp);
} else {
ASSERT_GT(test_throttle_budget, budget_tmp);
}
}
ASSERT_TRUE(slow_fast_codel->target_latency_vector.size() > 0);
}
};
TEST_F(TestSlowFastCoDel, test1) {
create_bluestore_slow_fast_codel();
test_codel();
}