os/bluestore: SlowFastCoDel added to the BlueStore

Signed-off-by: Esmaeil Mirvakili <smirvaki@ucsc.edu>
This commit is contained in:
Esmaeil Mirvakili 2022-02-01 02:18:51 -08:00 committed by esmaeil-mirvakili
parent 9671f179be
commit b4268f5e1a
10 changed files with 736 additions and 0 deletions

View File

@ -0,0 +1,41 @@
============================================
BlueStore Bufferbloat Mitigation Using CoDel
============================================
Introduction
------------
Bufferbloat happens when a frontend buffer too much data to a backend.
This can introduce latency spikes to the backend and compromise the
request schedulability of the frontend.
BlueStore has the bufferbloat problem due to its large queue. All
write requests are submitted immediately to BlueStore to achieve high
performance. However, this can compromise request schedulability in OSD.
As a solution, the CoDel algorithm is implemented in the BlueStore as
an admission control system to control the amount of transaction
submitted to BlueStore. This mechanism will negatively impact the
throughput of BlueStore. However, a tradeoff parameter has been introduced
to control BlueStore throughput loss versus BlueStore latency decrease.
Configurations
--------------
CoDel can be enabled using "*bluestore_codel*" config. The other important
config that needs to be set is "*bluestore_codel_throughput_latency_tradeoff*".
This config adjust the tradeoff between BlueStore throughput loss and
BlueStore latency decrease. This parameter defines the amount of throughput
loss in MB/s for one ms decrease in BlueStore latency. For example, a value
of 5 means that we are willing to lose maximum of 5 MB/s of throughput for
every 1 ms decrease in BlueStore latency.
Experiments
-----------
For measuring the impact of BlueStore CoDel on BlueStore, we measured the
transaction latency inside the BlueStore (BlueStore latency) and BlueStore
throughput. We compared this measurements with measurements from Vanilla BlueStore.
These experiments shows that:
1. The BlueStore CoDel can decrease the BlueStore latency by small and controllable
impact on throughput.
2. The BlueStore CoDel can react to workload changes to keep the desired tradeoff
between latency and throughput.

View File

@ -0,0 +1,45 @@
overrides:
thrashosds:
bdev_inject_crash: 2
bdev_inject_crash_probability: .5
ceph:
fs: xfs
conf:
osd:
osd objectstore: bluestore
bluestore block size: 96636764160
debug bluestore: 20
debug bluefs: 20
debug rocksdb: 10
bluestore fsck on mount: true
bluestore allocator: bitmap
# lower the full ratios since we can fill up a 100gb osd so quickly
mon osd full ratio: .9
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95
# this doesn't work with failures bc the log writes are not atomic across the two backends
# bluestore bluefs env mirror: true
bdev enable discard: true
bdev async discard: true
bluestore codel: true
ceph-deploy:
fs: xfs
bluestore: yes
conf:
osd:
osd objectstore: bluestore
bluestore block size: 96636764160
debug bluestore: 20
debug bluefs: 20
debug rocksdb: 10
bluestore fsck on mount: true
# lower the full ratios since we can fill up a 100gb osd so quickly
mon osd full ratio: .9
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95
bdev enable discard: true
bdev async discard: true
bluestore codel: true

View File

@ -6312,3 +6312,72 @@ options:
default: 0
services:
- mgr
- name: bluestore_codel
type: bool
level: advanced
desc: enable/disable bluestore SlowFastCodel
default: false
with_legacy: true
- name: bluestore_codel_throughput_latency_tradeoff
type: float
level: advanced
desc: adjust the tradeoff between throughput and bluestore latency in SlowFastCodel
long_desc: This parameter defines the amount of throughput loss (MB/s) for one ms
decrease in bluestore latency. (a value of 5 means that we are willing to lose
maximum of 5 MB/s of throughput for every 1 ms decrease in bluestore latency)
default: 5
with_legacy: true
- name: bluestore_codel_initial_target_latency
type: float
level: advanced
desc: initial target latency for SlowFastCodel in ms
default: 5.0
with_legacy: true
- name: bluestore_codel_slow_interval
type: float
level: advanced
desc: the interval of slow loop in SlowFastCodel in ms (this parameter should be larger that 'bluestore_codel_fast_interval')
default: 500.0
with_legacy: true
- name: bluestore_codel_fast_interval
type: float
level: advanced
desc: the interval of the fast loop in SlowFastCodel in ms
default: 50.0
with_legacy: true
- name: bluestore_codel_min_target_latency
type: float
level: advanced
desc: the minimum possible target latency in SlowFastCodel in ms
default: 1.0
with_legacy: true
- name: bluestore_codel_max_target_latency
type: float
level: advanced
desc: the maximum possible target latency in SlowFastCodel in ms
default: 1000.0
with_legacy: true
- name: bluestore_codel_initial_budget_bytes
type: size
level: advanced
desc: the initial bluestore throttle budget in SlowFastCodel
default: 100_K
with_legacy: true
- name: bluestore_codel_min_budget_bytes
type: size
level: advanced
desc: the minimum bluestore throttle budget in SlowFastCodel
default: 100_K
with_legacy: true
- name: bluestore_codel_budget_increment_bytes
type: size
level: advanced
desc: the increment size for opening the bluestore throttle in SlowFastCodel
default: 10_K
with_legacy: true
- name: bluestore_codel_regression_history_size
type: int
level: advanced
desc: number of the slow interval throughput and latency samples that SlowFastCodel keeps for regression
default: 100
with_legacy: true

View File

@ -0,0 +1,127 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#pragma once
#include <iostream>
#include <vector>
#include <cmath>
#include <boost/numeric/ublas/matrix.hpp>
#define Z_P 2.33 // z score for 99th percentile
namespace ceph {
/***
* Calculate the inverse of a 2x2 matrix.
* @param matrix<double>& m, an square 2x2 matrix
* @return the inverse of the m (m^-1)
*/
static boost::numeric::ublas::matrix<double>
matrix_inverse(boost::numeric::ublas::matrix<double> &m) {
assert(m.size1() == m.size2() &&
"Can only calculate the inverse of square matrices");
assert(m.size1() == 2 && m.size2() == 2 && "Only for 2x2 matrices");
boost::numeric::ublas::matrix<double> m_inverse(2, 2);
const double a = m(0, 0);
const double b = m(0, 1);
const double c = m(1, 0);
const double d = m(1, 1);
const double determinant = 1.0 / ((a * d) - (b * c));
m_inverse(0, 0) = d * determinant;
m_inverse(0, 1) = -b * determinant;
m_inverse(1, 0) = -c * determinant;
m_inverse(1, 1) = a * determinant;
return m_inverse;
}
/***
* Find a logarithmic function in form of "y = a + b * ln(x)" which fits
* the given points (x_values and y_values).
* @param std::vector<double> x_values, x values for sample points
* @param std::vector<double> y_values, y values for sample points
* @param double theta[2], holds the a and b as output (theta[0] = a and theta[1] = b)
*/
static void regression(
const std::vector<double> &x_values,
const std::vector<double> &y_values,
double theta[2]) {
assert(x_values.size() == y_values.size() &&
"x and y values vectors should have a same size.");
const int n = x_values.size();
boost::numeric::ublas::matrix<double> y_m(n, 1);
for (int i = 0; i < n; i++) {
y_m(i, 0) = y_values[i];
}
boost::numeric::ublas::scalar_matrix<double> sm(n, 2, 1);
boost::numeric::ublas::matrix<double> x_new_m(sm);
for (int i = 0; i < n; i++) {
x_new_m(i, 0) = 1;
x_new_m(i, 1) = std::log(x_values[i]);
}
boost::numeric::ublas::matrix<double> x_new_trans_m = boost::numeric::ublas::trans(
x_new_m);
boost::numeric::ublas::matrix<double> x_new_trans_dot_x_new_m = boost::numeric::ublas::prod(
x_new_trans_m, x_new_m);
boost::numeric::ublas::matrix<double> temp_1_m = matrix_inverse(
x_new_trans_dot_x_new_m);
boost::numeric::ublas::matrix<double> temp_2_m = boost::numeric::ublas::prod(
x_new_trans_m, y_m);
boost::numeric::ublas::matrix<double> theta_m = boost::numeric::ublas::prod(
temp_1_m, temp_2_m);
theta[0] = theta_m(0, 0);
theta[1] = theta_m(1, 0);
}
/***
* Finds the x location on a fitted logarithmic curve on sample points where
* the slope is equal to target_slope
* @param x_values, x values for sample points
* @param y_values, y values for sample points
* @param target_slope, the slope that we are looking for
* @return the x location where the slope of the curve is target_slope
*/
static double find_slope_on_curve(
const std::vector<double> &x_values,
const std::vector<double> &y_values,
double target_slope) {
assert(x_values.size() == y_values.size() &&
"x and y values vectors should have a same size.");
assert(target_slope != 0 &&
"The target slope of zero will result to a inf x, try a nonzero value.");
assert(target_slope >= 0 &&
"The target slope for a logarithmic function should be positive.");
double theta[2]; // theta[0] + theta[1] * ln(x)
regression(x_values, y_values,
theta); // find the logarithmic function using regression
double target_x = theta[1] /
target_slope; // find the x where the slope is close to target_slope
return target_x;
}
/***
* Finds the mu and std parameters of the lognormal distribution from its mode
* and x boundaries.
* @param mode, the mode of the distribution.
* @param min_x, x lower boundary of distribution (zero percentile)
* @param max_x, x upper boundary of distribution (99th percentile)
* @param params, holds the calculated distribution parameters (mu and std) as
* output (params[0] = mu and params[1] = std)
*/
static void
find_log_normal_dist_params(double mode, double min_x, double max_x,
double params[2]) {
assert(min_x < max_x && "The min_x should be smaller than max_x");
assert(mode >= min_x && mode < max_x &&
"The mode should be between min_x and max_x");
double max_x_normalized = max_x - min_x;
double mode_normalized = mode - min_x;
double std_dev = (-Z_P + std::sqrt(
Z_P * Z_P + 4 * std::log(max_x_normalized) -
4 * std::log(mode_normalized))) / 2;
double mu = std::log(max_x_normalized) - Z_P * std_dev;
params[0] = mu;
params[1] = std_dev;
}
}

View File

@ -55,6 +55,7 @@ set(alien_store_srcs
${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueStore.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/simple_bitmap.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/bluestore_types.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueStoreSlowFastCoDel.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/fastbmap_allocator_impl.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/FreelistManager.cc
${PROJECT_SOURCE_DIR}/src/os/bluestore/HybridAllocator.cc

View File

@ -29,6 +29,7 @@ if(WITH_BLUESTORE)
bluestore/BlueStore.cc
bluestore/simple_bitmap.cc
bluestore/bluestore_types.cc
bluestore/BlueStoreSlowFastCoDel.cc
bluestore/fastbmap_allocator_impl.cc
bluestore/FreelistManager.cc
bluestore/StupidAllocator.cc

View File

@ -4601,6 +4601,15 @@ BlueStore::BlueStore(CephContext *cct,
_init_logger();
cct->_conf.add_observer(this);
set_cache_shards(1);
if ( cct->_conf->bluestore_codel) {
codel = std::make_unique<BlueStoreSlowFastCoDel>(
cct, [this](int64_t x) mutable {
this->throttle.reset_kv_throttle_max(x);
},
[this]() mutable {
return this->throttle.get_kv_throttle_current();
});
}
}
BlueStore::~BlueStore()
@ -4666,6 +4675,17 @@ const char **BlueStore::get_tracked_conf_keys() const
"bluestore_warn_on_no_per_pool_omap",
"bluestore_warn_on_no_per_pg_omap",
"bluestore_max_defer_interval",
"bluestore_codel",
"bluestore_codel_slow_interval",
"bluestore_codel_fast_interval",
"bluestore_codel_initial_target_latency",
"bluestore_codel_min_target_latency",
"bluestore_codel_max_target_latency",
"bluestore_codel_throughput_latency_tradeoff",
"bluestore_codel_initial_budget_bytes",
"bluestore_codel_min_budget_bytes",
"bluestore_codel_budget_increment_bytes",
"bluestore_codel_regression_history_size",
NULL
};
return KEYS;
@ -4724,6 +4744,9 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
changed.count("bluestore_throttle_deferred_bytes") ||
changed.count("bluestore_throttle_trace_rate")) {
throttle.reset_throttle(conf);
if (codel) {
codel->reset_bluestore_budget();
}
}
if (changed.count("bluestore_max_defer_interval")) {
if (bdev) {
@ -4736,6 +4759,21 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
changed.count("osd_memory_expected_fragmentation")) {
_update_osd_memory_options();
}
if (changed.count("bluestore_codel") ||
changed.count("bluestore_codel_slow_interval") ||
changed.count("bluestore_codel_fast_interval") ||
changed.count("bluestore_codel_initial_target_latency") ||
changed.count("bluestore_codel_min_target_latency") ||
changed.count("bluestore_codel_max_target_latency") ||
changed.count("bluestore_codel_throughput_latency_tradeoff") ||
changed.count("bluestore_codel_initial_budget_bytes") ||
changed.count("bluestore_codel_min_budget_bytes") ||
changed.count("bluestore_codel_budget_increment_bytes") ||
changed.count("bluestore_codel_regression_history_size")) {
if (codel) {
codel->on_config_changed(cct);
}
}
}
void BlueStore::_set_compression()
@ -12561,6 +12599,9 @@ void BlueStore::_txc_state_proc(TransContext *txc)
case TransContext::STATE_KV_DONE:
throttle.log_state_latency(*txc, logger, l_bluestore_state_kv_done_lat);
if (codel) {
codel->update_from_txc_info(txc->txc_state_proc_start, txc->bytes);
}
if (txc->deferred_txn) {
txc->set_state(TransContext::STATE_DEFERRED_QUEUED);
_deferred_queue(txc);
@ -14032,6 +14073,7 @@ int BlueStore::queue_transactions(
logger->inc(l_bluestore_txc);
// execute (start)
txc->txc_state_proc_start = mono_clock::now();
_txc_state_proc(txc);
if (bdev->is_smr()) {

View File

@ -51,6 +51,7 @@
#include "bluestore_types.h"
#include "BlueFS.h"
#include "common/EventTrace.h"
#include "BlueStoreSlowFastCoDel.h"
#ifdef WITH_BLKIN
#include "common/zipkin_trace.h"
@ -1724,6 +1725,7 @@ public:
uint64_t seq = 0;
ceph::mono_clock::time_point start;
ceph::mono_clock::time_point last_stamp;
ceph::mono_clock::time_point txc_state_proc_start;
uint64_t last_nid = 0; ///< if non-zero, highest new nid we allocated
uint64_t last_blobid = 0; ///< if non-zero, highest new blobid we allocated
@ -1900,8 +1902,16 @@ public:
trace_period_mcs = rate > 0 ? floor((1/rate) * 1000000.0) : 0;
#endif
}
int64_t get_kv_throttle_current() {
return throttle_bytes.get_current();
}
void reset_kv_throttle_max(int64_t m) {
throttle_bytes.reset_max(m);
}
} throttle;
std::unique_ptr<BlueStoreSlowFastCoDel> codel;
typedef boost::intrusive::list<
TransContext,
boost::intrusive::member_hook<

View File

@ -0,0 +1,272 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#include "BlueStoreSlowFastCoDel.h"
#include "common/regression_utils.h"
BlueStoreSlowFastCoDel::BlueStoreSlowFastCoDel(
CephContext *_cct,
std::function<void(int64_t)> _bluestore_budget_reset_callback,
std::function<int64_t()> _get_kv_throttle_current) :
fast_timer(_cct, fast_timer_lock),
slow_timer(_cct, slow_timer_lock),
bluestore_budget_reset_callback(_bluestore_budget_reset_callback),
get_kv_throttle_current(_get_kv_throttle_current) {
on_config_changed(_cct);
}
BlueStoreSlowFastCoDel::~BlueStoreSlowFastCoDel() {
{
std::lock_guard l1{fast_timer_lock};
fast_timer.cancel_all_events();
fast_timer.shutdown();
}
{
std::lock_guard l2{slow_timer_lock};
slow_timer.cancel_all_events();
slow_timer.shutdown();
}
regression_throughput_history.clear();
regression_target_latency_history.clear();
}
void BlueStoreSlowFastCoDel::update_from_txc_info(
ceph::mono_clock::time_point txc_start_time,
uint64_t txc_bytes) {
std::lock_guard l(register_lock);
ceph::mono_clock::time_point now = ceph::mono_clock::now();
int64_t latency = std::chrono::nanoseconds(now - txc_start_time).count();
if (activated && max_queue_length < get_kv_throttle_current()) {
max_queue_length = get_kv_throttle_current();
}
if (min_latency == INITIAL_LATENCY_VALUE || latency < min_latency) {
min_latency = latency;
}
slow_interval_txc_cnt++;
slow_interval_registered_bytes += txc_bytes;
}
void BlueStoreSlowFastCoDel::on_min_latency_violation() {
if (target_latency > 0) {
double diff = (double) (target_latency - min_latency);
auto error_ratio = std::abs(diff) / min_latency;
if (error_ratio > 0.5) {
error_ratio = 0.5;
}
bluestore_budget = std::max(bluestore_budget * (1 - error_ratio),
min_bluestore_budget * 1.0);
}
}
void BlueStoreSlowFastCoDel::on_no_violation() {
if (bluestore_budget < max_queue_length * 1.5) {
bluestore_budget = bluestore_budget + bluestore_budget_increment;
}
}
void BlueStoreSlowFastCoDel::on_config_changed(CephContext *cct) {
{
std::lock_guard l(register_lock);
activated = cct->_conf->bluestore_codel;
target_slope = cct->_conf->bluestore_codel_throughput_latency_tradeoff;
slow_interval = ((int64_t) cct->_conf->bluestore_codel_slow_interval) *
1000 * 1000;
initial_fast_interval = ((int64_t)
cct->_conf->bluestore_codel_fast_interval) * 1000 * 1000;
initial_target_latency = ((int64_t)
cct->_conf->bluestore_codel_initial_target_latency) * 1000 * 1000;
min_target_latency = ((int64_t)
cct->_conf->bluestore_codel_min_target_latency) * 1000 * 1000;
max_target_latency = ((int64_t)
cct->_conf->bluestore_codel_max_target_latency) * 1000 * 1000;
initial_bluestore_budget = cct->_conf->bluestore_codel_initial_budget_bytes;
min_bluestore_budget = cct->_conf->bluestore_codel_min_budget_bytes;
bluestore_budget_increment =
cct->_conf->bluestore_codel_budget_increment_bytes;
regression_history_size =
cct->_conf->bluestore_codel_regression_history_size;
bluestore_budget = initial_bluestore_budget;
min_bluestore_budget = initial_bluestore_budget;
max_queue_length = min_bluestore_budget;
fast_interval = initial_fast_interval;
target_latency = initial_target_latency;
min_latency = INITIAL_LATENCY_VALUE;
slow_interval_registered_bytes = 0;
regression_throughput_history.clear();
regression_target_latency_history.clear();
slow_interval_start = ceph::mono_clock::zero();
}
{
std::lock_guard l1{fast_timer_lock};
fast_timer.cancel_all_events();
fast_timer.init();
}
_fast_interval_process();
{
std::lock_guard l2{slow_timer_lock};
slow_timer.cancel_all_events();
slow_timer.init();
}
_slow_interval_process();
}
void BlueStoreSlowFastCoDel::reset_bluestore_budget() {
if (activated) {
bluestore_budget = std::max(min_bluestore_budget, bluestore_budget);
bluestore_budget_reset_callback(bluestore_budget);
}
}
void BlueStoreSlowFastCoDel::_fast_interval_process() {
std::lock_guard l(register_lock);
if (target_latency != INITIAL_LATENCY_VALUE &&
min_latency != INITIAL_LATENCY_VALUE) {
if (activated) {
if (_check_latency_violation()) {
// min latency violation
violation_count++;
_update_interval();
on_min_latency_violation(); // handle the violation
} else {
// no latency violation
violation_count = 0;
fast_interval = initial_fast_interval;
on_no_violation();
}
bluestore_budget = std::max(min_bluestore_budget, bluestore_budget);
bluestore_budget_reset_callback(bluestore_budget);
}
// reset interval
min_latency = INITIAL_LATENCY_VALUE;
on_fast_interval_finished();
}
auto codel_ctx = new LambdaContext(
[this](int r) {
_fast_interval_process();
});
auto interval_duration = std::chrono::nanoseconds(fast_interval);
fast_timer.add_event_after(interval_duration, codel_ctx);
}
void BlueStoreSlowFastCoDel::_slow_interval_process() {
std::lock_guard l(register_lock);
ceph::mono_clock::time_point now = ceph::mono_clock::now();
if (activated && !ceph::mono_clock::is_zero(slow_interval_start)
&& slow_interval_txc_cnt > 0) {
double time_sec = nanosec_to_sec(
std::chrono::nanoseconds(now - slow_interval_start).count());
double slow_interval_throughput =
(slow_interval_registered_bytes * 1.0) / time_sec;
slow_interval_throughput = slow_interval_throughput / (1024.0 * 1024.0);
regression_target_latency_history.push_back(
nanosec_to_millisec(target_latency));
regression_throughput_history.push_back(slow_interval_throughput);
if (regression_target_latency_history.size() > regression_history_size) {
regression_target_latency_history.erase(
regression_target_latency_history.begin());
regression_throughput_history.erase(
regression_throughput_history.begin());
}
std::vector<double> targets;
std::vector<double> throughputs;
double target_ms = nanosec_to_millisec(initial_target_latency);
// If there is sufficient number of points, use the regression to find the
// target_ms. Otherwise, target_ms will be initial_target_latency
if (regression_target_latency_history.size() >= regression_history_size) {
target_ms = ceph::find_slope_on_curve(
regression_target_latency_history,
regression_throughput_history,
target_slope);
}
target_latency_without_noise = millisec_to_nanosec(target_ms);
target_latency_without_noise = std::max(target_latency_without_noise,
min_target_latency);
target_latency_without_noise = std::min(target_latency_without_noise,
max_target_latency);
target_ms = nanosec_to_millisec(target_latency_without_noise);
// add log_normal noise
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(seed);
double dist_params[2];
double rnd_std_dev = 5;
ceph::find_log_normal_dist_params(
target_ms,
nanosec_to_millisec(min_target_latency),
target_ms * rnd_std_dev,
dist_params);
std::lognormal_distribution<double> distribution(dist_params[0],
dist_params[1]);
target_latency = millisec_to_nanosec(distribution(generator));
target_latency += min_target_latency;
if (target_latency < millisec_to_nanosec(target_ms)) {
std::uniform_real_distribution<> distr(0, 0.5);
target_latency = target_latency +
(target_latency - millisec_to_nanosec(target_ms)) *
distr(generator);
}
if (target_latency != INITIAL_LATENCY_VALUE) {
target_latency = std::max(target_latency, min_target_latency);
target_latency = std::min(target_latency, max_target_latency);
}
on_slow_interval_finished();
}
slow_interval_start = ceph::mono_clock::now();
slow_interval_registered_bytes = 0;
slow_interval_txc_cnt = 0;
max_queue_length = min_bluestore_budget;
auto codel_ctx = new LambdaContext(
[this](int r) {
_slow_interval_process();
});
auto interval_duration = std::chrono::nanoseconds(slow_interval);
slow_timer.add_event_after(interval_duration, codel_ctx);
}
/**
* check if the min latency violate the target
* @return true if min latency violate the target, false otherwise
*/
bool BlueStoreSlowFastCoDel::_check_latency_violation() {
if (target_latency != INITIAL_LATENCY_VALUE &&
min_latency != INITIAL_LATENCY_VALUE) {
if (min_latency > target_latency) {
return true;
}
}
return false;
}
void BlueStoreSlowFastCoDel::_update_interval() {
auto sqrt = (int) std::round(std::sqrt(violation_count));
fast_interval = initial_fast_interval / sqrt;
if (fast_interval <= 0) {
fast_interval = 1000;
}
}
int64_t BlueStoreSlowFastCoDel::get_bluestore_budget() {
return bluestore_budget;
}
int64_t BlueStoreSlowFastCoDel::get_target_latency() {
return target_latency;
}

View File

@ -0,0 +1,128 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
#pragma once
#include <iostream>
#include "include/Context.h"
#include "common/Timer.h"
#include "common/ceph_time.h"
class BlueStoreSlowFastCoDel {
public:
BlueStoreSlowFastCoDel(
CephContext *_cct,
std::function<void(int64_t)> _bluestore_budget_reset_callback,
std::function<int64_t()> _get_kv_throttle_current);
virtual ~BlueStoreSlowFastCoDel();
void on_config_changed(CephContext *cct);
void reset_bluestore_budget();
void update_from_txc_info(
ceph::mono_clock::time_point txc_start_time,
uint64_t txc_bytes);
int64_t get_bluestore_budget();
int64_t get_target_latency();
bool is_activated();
protected:
static const int64_t INITIAL_LATENCY_VALUE = -1;
/* config values */
// Config value 'bluestore_codel',true if SlowFastCodel is activated
bool activated = false;
// Config value 'bluestore_codel_fast_interval', Initial interval for fast loop
int64_t initial_fast_interval = INITIAL_LATENCY_VALUE;
// Config value 'bluestore_codel_initial_target_latency', Initial target latency
// to start the algorithm
int64_t initial_target_latency = INITIAL_LATENCY_VALUE;
// Config value 'bluestore_codel_slow_interval', the interval for the slow loop
int64_t slow_interval = INITIAL_LATENCY_VALUE;
// Config value 'bluestore_codel_min_target_latency', min possible value for target
int64_t min_target_latency = INITIAL_LATENCY_VALUE; // in ns
// Config value 'bluestore_codel_max_target_latency', max possible value for target
int64_t max_target_latency = INITIAL_LATENCY_VALUE; // in ns
// Config value 'bluestore_codel_throughput_latency_tradeoff', define the
// tradeoff between throughput and latency (MB/s loss for every 1ms latency drop)
double target_slope = 5;
// Config value 'bluestore_codel_regression_history_size', regression history size
int64_t regression_history_size = 100;
// Config value 'bluestore_codel_min_budget_bytes', the minimum bluestore
// throttle budget
int64_t min_bluestore_budget = 102400;
// Config value 'bluestore_codel_initial_budget_bytes', the initial bluestore
// throttle budget
int64_t initial_bluestore_budget = 102400;
// Config value 'bluestore_codel_budget_increment_bytes', the increment size
// for opening the bluestore throttle
int64_t bluestore_budget_increment = 102400;
/* internal state variables */
// current interval for the fast loop
int64_t fast_interval = INITIAL_LATENCY_VALUE;
// current target latency that fast loop is using
int64_t target_latency = INITIAL_LATENCY_VALUE;
int64_t target_latency_without_noise = INITIAL_LATENCY_VALUE;
// min latency in the current fast interval
int64_t min_latency = INITIAL_LATENCY_VALUE;
int64_t violation_count = 0;
ceph::mutex fast_timer_lock = ceph::make_mutex("CoDel::fast_timer_lock");
ceph::mutex slow_timer_lock = ceph::make_mutex("CoDel::slow_timer_lock");
ceph::mutex register_lock = ceph::make_mutex("CoDel::register_lock");
SafeTimer fast_timer; // fast loop timer
SafeTimer slow_timer; // slow loop timer
// marks the start of the current slow interval
ceph::mono_clock::time_point slow_interval_start = ceph::mono_clock::zero();
// amount of bytes that has been processed in current slow interval
int64_t slow_interval_registered_bytes = 0;
// number of transactions that has been processed in current slow interval
int64_t slow_interval_txc_cnt = 0;
// target latency history for regression
std::vector<double> regression_target_latency_history;
// throughput history for regression
std::vector<double> regression_throughput_history;
int64_t bluestore_budget = 102400; // current bluestore throttle budget
// maximum amount of inflight data in current slow interval
int64_t max_queue_length = 102400;
std::function<void(int64_t)> bluestore_budget_reset_callback;
std::function<int64_t(void)> get_kv_throttle_current;
void on_min_latency_violation();
void on_no_violation();
virtual void on_fast_interval_finished() {}
virtual void on_slow_interval_finished() {}
private:
bool _check_latency_violation();
void _update_interval();
void _fast_interval_process();
void _slow_interval_process();
template<typename T>
double millisec_to_nanosec(T ms) {
return ms * 1000.0 * 1000.0;
}
template<typename T>
double nanosec_to_millisec(T ns) {
return ns / (1000.0 * 1000.0);
}
template<typename T>
double nanosec_to_sec(T ns) {
return ns / (1000.0 * 1000.0 * 1000.0);
}
};