mirror of https://github.com/schoebel/mars
infra: fix IOPS and other computations
This commit is contained in:
parent
1238dcaf64
commit
c575f28fbe
|
@ -28,8 +28,18 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* For precisions, _internal_ time is in multiples of the following basic time units */
|
||||
|
||||
#define LIMITER_TIME_RESOLUTION NSEC_PER_SEC
|
||||
|
||||
#define DEFAULT_MIN_WINDOW (LIMITER_TIME_RESOLUTION * 1)
|
||||
#define DEFAULT_MAX_WINDOW (LIMITER_TIME_RESOLUTION * 4)
|
||||
|
||||
#define MAX_DIVIDER (DEFAULT_MIN_WINDOW / 10)
|
||||
|
||||
#define MS_TO_TR(x) ((__s64)(x) * (LIMITER_TIME_RESOLUTION / 1000))
|
||||
#define TR_TO_MS(x) ((x) / (LIMITER_TIME_RESOLUTION / 1000))
|
||||
|
||||
int mars_limit(struct mars_limiter *lim, int amount)
|
||||
{
|
||||
int delay = 0;
|
||||
|
@ -45,17 +55,22 @@ int mars_limit(struct mars_limiter *lim, int amount)
|
|||
*/
|
||||
while (lim != NULL) {
|
||||
struct lamport_time diff = lamport_time_sub(now, lim->lim_stamp);
|
||||
s64 window = lamport_time_to_ns(&diff);
|
||||
__s64 window = lamport_time_to_ns(&diff);
|
||||
__s64 rate_raw;
|
||||
int rate;
|
||||
int max_rate;
|
||||
|
||||
/* Sometimes, raw CPU clocks may do weired things...
|
||||
* Smaller windows in the denominator than 1s could fake unrealistic rates.
|
||||
* Small windows in the denominator could fake unrealistic rates.
|
||||
* Do not divide by too small numbers.
|
||||
*/
|
||||
if (unlikely(lim->lim_min_window <= 0))
|
||||
lim->lim_min_window = 1000;
|
||||
if (unlikely(lim->lim_max_window <= lim->lim_min_window))
|
||||
lim->lim_max_window = lim->lim_min_window + 8000;
|
||||
if (unlikely(window < (long long)lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000)))
|
||||
window = (long long)lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000);
|
||||
if (window < MAX_DIVIDER)
|
||||
window = MAX_DIVIDER;
|
||||
|
||||
if (unlikely(lim->lim_min_window_ms <= TR_TO_MS(MAX_DIVIDER)))
|
||||
lim->lim_min_window_ms = TR_TO_MS(DEFAULT_MIN_WINDOW);
|
||||
if (unlikely(lim->lim_max_window_ms <= lim->lim_min_window_ms))
|
||||
lim->lim_max_window_ms = lim->lim_min_window_ms + TR_TO_MS(DEFAULT_MAX_WINDOW);
|
||||
|
||||
/* Update total statistics.
|
||||
* They will intentionally wrap around.
|
||||
|
@ -69,90 +84,101 @@ int mars_limit(struct mars_limiter *lim, int amount)
|
|||
/* Only use incremental accumulation at repeated calls, but
|
||||
* never after longer pauses.
|
||||
*/
|
||||
if (likely(lim->lim_stamp.tv_sec &&
|
||||
window < (long long)lim->lim_max_window * (LIMITER_TIME_RESOLUTION / 1000))) {
|
||||
long long rate_raw;
|
||||
int rate;
|
||||
int max_rate;
|
||||
|
||||
/* Races are possible, but taken into account.
|
||||
* There is no real harm from rarely lost updates.
|
||||
if (!lim->lim_stamp.tv_sec ||
|
||||
window > MS_TO_TR(lim->lim_max_window_ms)) {
|
||||
/* reset, start over with new measurement cycle */
|
||||
memset(&diff, 0, sizeof(diff));
|
||||
lim->lim_stamp = now;
|
||||
lim->lim_ops_accu = 0;
|
||||
lim->lim_amount_accu = 0;
|
||||
lim->lim_ops_rate = 0;
|
||||
lim->lim_amount_rate = 0;
|
||||
window = MAX_DIVIDER;
|
||||
} else {
|
||||
__s64 diff_window;
|
||||
|
||||
/* Try to keep the window between min_window and 2 * min_window.
|
||||
* We wait until min_window has been exceeded _twice_,
|
||||
* and then reduce the window by only 1 * min_window.
|
||||
*/
|
||||
if (likely(amount > 0)) {
|
||||
lim->lim_amount_accu += amount;
|
||||
lim->lim_amount_cumul += amount;
|
||||
lim->lim_ops_accu++;
|
||||
lim->lim_ops_cumul++;
|
||||
}
|
||||
diff_window = window - MS_TO_TR(lim->lim_min_window_ms);
|
||||
if (diff_window > MS_TO_TR(lim->lim_min_window_ms)) {
|
||||
__s64 used_up;
|
||||
__s64 add_window = 0;
|
||||
|
||||
/* compute amount values */
|
||||
rate_raw = lim->lim_amount_accu * LIMITER_TIME_RESOLUTION / window;
|
||||
rate = rate_raw;
|
||||
if (unlikely(rate_raw > INT_MAX)) {
|
||||
rate = INT_MAX;
|
||||
}
|
||||
lim->lim_amount_rate = rate;
|
||||
|
||||
/* amount limit exceeded? */
|
||||
max_rate = lim->lim_max_amount_rate;
|
||||
if (max_rate > 0 && rate > max_rate) {
|
||||
int this_delay = (window * rate / max_rate - window) / (LIMITER_TIME_RESOLUTION / 1000);
|
||||
// compute maximum
|
||||
if (this_delay > delay && this_delay > 0)
|
||||
delay = this_delay;
|
||||
}
|
||||
|
||||
/* compute ops values */
|
||||
rate_raw = lim->lim_ops_accu * LIMITER_TIME_RESOLUTION / window;
|
||||
rate = rate_raw;
|
||||
if (unlikely(rate_raw > INT_MAX)) {
|
||||
rate = INT_MAX;
|
||||
}
|
||||
lim->lim_ops_rate = rate;
|
||||
|
||||
/* ops limit exceeded? */
|
||||
max_rate = lim->lim_max_ops_rate;
|
||||
if (max_rate > 0 && rate > max_rate) {
|
||||
int this_delay = (window * rate / max_rate - window) / (LIMITER_TIME_RESOLUTION / 1000);
|
||||
|
||||
// compute maximum
|
||||
if (this_delay > delay && this_delay > 0)
|
||||
delay = this_delay;
|
||||
}
|
||||
|
||||
/* Try to keep the next window below min_window
|
||||
*/
|
||||
window -= lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000);
|
||||
if (window > 0) {
|
||||
long long used_up = (long long)lim->lim_amount_rate * window / LIMITER_TIME_RESOLUTION;
|
||||
used_up = lim->lim_amount_accu * diff_window / window;
|
||||
if (used_up > 0) {
|
||||
lamport_time_add_ns(&lim->lim_stamp, window);
|
||||
add_window = diff_window;
|
||||
lim->lim_amount_accu -= used_up;
|
||||
if (unlikely(lim->lim_amount_accu < 0))
|
||||
lim->lim_amount_accu = 0;
|
||||
}
|
||||
used_up = (long long)lim->lim_ops_rate * window / LIMITER_TIME_RESOLUTION;
|
||||
|
||||
used_up = lim->lim_ops_accu * diff_window / window;
|
||||
if (used_up > 0) {
|
||||
lamport_time_add_ns(&lim->lim_stamp, window);
|
||||
if (diff_window > add_window)
|
||||
add_window = diff_window;
|
||||
lim->lim_ops_accu -= used_up;
|
||||
if (unlikely(lim->lim_ops_accu < 0))
|
||||
lim->lim_ops_accu = 0;
|
||||
}
|
||||
|
||||
if (add_window > 0) {
|
||||
lamport_time_add_ns(&lim->lim_stamp, add_window);
|
||||
/* recompute the new window */
|
||||
diff = lamport_time_sub(now, lim->lim_stamp);
|
||||
window = lamport_time_to_ns(&diff);
|
||||
}
|
||||
}
|
||||
} else { // reset, start over with new measurement cycle
|
||||
struct lamport_time sub = ns_to_lamport_time(lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000));
|
||||
|
||||
if (unlikely(amount < 0))
|
||||
amount = 0;
|
||||
lim->lim_ops_accu = 1;
|
||||
lim->lim_amount_accu = amount;
|
||||
lim->lim_stamp = lamport_time_sub(now, sub);
|
||||
lim->lim_ops_rate = 0;
|
||||
lim->lim_amount_rate = 0;
|
||||
}
|
||||
|
||||
/* Races are possible, but taken into account.
|
||||
* There is no real harm from rarely lost updates.
|
||||
*/
|
||||
if (likely(amount > 0)) {
|
||||
lim->lim_amount_accu += amount;
|
||||
lim->lim_ops_accu++;
|
||||
}
|
||||
|
||||
/* compute amount values */
|
||||
rate_raw = lim->lim_amount_accu * LIMITER_TIME_RESOLUTION / window;
|
||||
rate = rate_raw;
|
||||
if (unlikely(rate_raw > INT_MAX)) {
|
||||
rate = INT_MAX;
|
||||
}
|
||||
lim->lim_amount_rate = rate;
|
||||
|
||||
/* amount limit exceeded? */
|
||||
max_rate = lim->lim_max_amount_rate;
|
||||
if (max_rate > 0 && rate > max_rate) {
|
||||
int this_delay = (window * rate / max_rate - window);
|
||||
|
||||
// compute maximum
|
||||
if (this_delay > delay && this_delay > 0)
|
||||
delay = this_delay;
|
||||
}
|
||||
|
||||
/* compute ops values */
|
||||
rate_raw = lim->lim_ops_accu * LIMITER_TIME_RESOLUTION / window;
|
||||
rate = rate_raw;
|
||||
if (unlikely(rate_raw > INT_MAX)) {
|
||||
rate = INT_MAX;
|
||||
}
|
||||
lim->lim_ops_rate = rate;
|
||||
|
||||
/* ops limit exceeded? */
|
||||
max_rate = lim->lim_max_ops_rate;
|
||||
if (max_rate > 0 && rate > max_rate) {
|
||||
int this_delay = (window * rate / max_rate - window);
|
||||
|
||||
// compute maximum
|
||||
if (this_delay > delay && this_delay > 0)
|
||||
delay = this_delay;
|
||||
}
|
||||
|
||||
lim = lim->lim_father;
|
||||
}
|
||||
return delay;
|
||||
return TR_TO_MS(delay);
|
||||
}
|
||||
|
||||
void mars_limit_sleep(struct mars_limiter *lim, int amount)
|
||||
|
@ -160,10 +186,10 @@ void mars_limit_sleep(struct mars_limiter *lim, int amount)
|
|||
int sleep = mars_limit(lim, amount);
|
||||
|
||||
if (sleep > 0) {
|
||||
if (unlikely(lim->lim_max_delay <= 0))
|
||||
lim->lim_max_delay = 1000;
|
||||
if (sleep > lim->lim_max_delay)
|
||||
sleep = lim->lim_max_delay;
|
||||
if (unlikely(lim->lim_max_delay_ms <= 0))
|
||||
lim->lim_max_delay_ms = 1000;
|
||||
if (sleep > lim->lim_max_delay_ms)
|
||||
sleep = lim->lim_max_delay_ms;
|
||||
brick_msleep(sleep);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,26 +29,29 @@
|
|||
|
||||
#include <linux/utsname.h>
|
||||
|
||||
/* Units:
|
||||
* Time is in ms
|
||||
* Rates are in units / s
|
||||
*/
|
||||
|
||||
struct mars_limiter {
|
||||
/* hierarchy tree */
|
||||
struct mars_limiter *lim_father;
|
||||
/* tunables */
|
||||
int lim_max_ops_rate;
|
||||
int lim_max_amount_rate;
|
||||
int lim_max_delay;
|
||||
int lim_min_window;
|
||||
int lim_max_window;
|
||||
int lim_max_delay_ms;
|
||||
int lim_min_window_ms;
|
||||
int lim_max_window_ms;
|
||||
/* readable */
|
||||
int lim_ops_rate;
|
||||
int lim_amount_rate;
|
||||
int lim_ops_cumul;
|
||||
int lim_amount_cumul;
|
||||
int lim_total_ops;
|
||||
int lim_total_amount;
|
||||
unsigned long lim_total_ops;
|
||||
unsigned long lim_total_amount;
|
||||
struct lamport_time lim_stamp;
|
||||
/* internal */
|
||||
long long lim_ops_accu;
|
||||
long long lim_amount_accu;
|
||||
__s64 lim_ops_accu;
|
||||
__s64 lim_amount_accu;
|
||||
};
|
||||
|
||||
extern int mars_limit(struct mars_limiter *lim, int amount);
|
||||
|
|
|
@ -256,30 +256,37 @@ done:
|
|||
#define _CTL_STRATEGY(handler) /*empty*/
|
||||
#endif
|
||||
|
||||
#define VEC_ENTRY(NAME,VAR,MODE,COUNT) \
|
||||
#define _VEC_ENTRY(NAME,VAR,TYPE,HANDLER,MODE,COUNT) \
|
||||
{ \
|
||||
_CTL_NAME \
|
||||
.procname = NAME, \
|
||||
.data = &(VAR), \
|
||||
.maxlen = sizeof(int) * (COUNT),\
|
||||
.maxlen = sizeof(TYPE) * (COUNT),\
|
||||
.mode = MODE, \
|
||||
.proc_handler = &proc_dointvec, \
|
||||
.proc_handler = &HANDLER, \
|
||||
_CTL_STRATEGY(sysctl_intvec) \
|
||||
}
|
||||
|
||||
#define VEC_INT_ENTRY(NAME,VAR,MODE,COUNT) \
|
||||
_VEC_ENTRY(NAME,VAR,int,proc_dointvec,MODE,COUNT)
|
||||
|
||||
#define INT_ENTRY(NAME,VAR,MODE) \
|
||||
VEC_ENTRY(NAME, VAR, MODE, 1)
|
||||
VEC_INT_ENTRY(NAME, VAR, MODE, 1)
|
||||
|
||||
#define VEC_ULONG_ENTRY(NAME,VAR,MODE,COUNT) \
|
||||
_VEC_ENTRY(NAME,VAR,unsigned long,proc_doulongvec_minmax,MODE,COUNT)
|
||||
|
||||
#define ULONG_ENTRY(NAME,VAR,MODE) \
|
||||
VEC_ULONG_ENTRY(NAME, VAR, MODE, 1)
|
||||
|
||||
#define LIMITER_ENTRIES(VAR, PREFIX, SUFFIX) \
|
||||
INT_ENTRY(PREFIX "_total_ops", (VAR)->lim_total_ops, 0400), \
|
||||
INT_ENTRY(PREFIX "_total_" SUFFIX, (VAR)->lim_total_amount, 0400), \
|
||||
ULONG_ENTRY(PREFIX "_total_ops", (VAR)->lim_total_ops, 0400), \
|
||||
ULONG_ENTRY(PREFIX "_total_" SUFFIX, (VAR)->lim_total_amount, 0400), \
|
||||
INT_ENTRY(PREFIX "_ratelimit_ops", (VAR)->lim_max_ops_rate, 0600), \
|
||||
INT_ENTRY(PREFIX "_ratelimit_" SUFFIX, (VAR)->lim_max_amount_rate, 0600), \
|
||||
INT_ENTRY(PREFIX "_maxdelay_ms", (VAR)->lim_max_delay,0600), \
|
||||
INT_ENTRY(PREFIX "_minwindow_ms", (VAR)->lim_min_window,0600), \
|
||||
INT_ENTRY(PREFIX "_maxwindow_ms", (VAR)->lim_max_window,0600), \
|
||||
INT_ENTRY(PREFIX "_cumul_ops", (VAR)->lim_ops_cumul, 0600), \
|
||||
INT_ENTRY(PREFIX "_cumul_" SUFFIX, (VAR)->lim_amount_cumul, 0600), \
|
||||
INT_ENTRY(PREFIX "_maxdelay_ms", (VAR)->lim_max_delay_ms, 0600), \
|
||||
INT_ENTRY(PREFIX "_minwindow_ms", (VAR)->lim_min_window_ms, 0600), \
|
||||
INT_ENTRY(PREFIX "_maxwindow_ms", (VAR)->lim_max_window_ms, 0600), \
|
||||
INT_ENTRY(PREFIX "_rate_ops", (VAR)->lim_ops_rate, 0400), \
|
||||
INT_ENTRY(PREFIX "_rate_" SUFFIX, (VAR)->lim_amount_rate, 0400) \
|
||||
|
||||
|
@ -400,9 +407,9 @@ struct ctl_table mars_table[] = {
|
|||
INT_ENTRY("mem_used_raw_kb", brick_global_block_used,0400),
|
||||
#ifdef CONFIG_MARS_MEM_PREALLOC
|
||||
INT_ENTRY("mem_allow_freelist", brick_allow_freelist, 0600),
|
||||
VEC_ENTRY("mem_freelist_max", brick_mem_freelist_max, 0600, BRICK_MAX_ORDER+1),
|
||||
VEC_ENTRY("mem_alloc_count", brick_mem_alloc_count, 0400, BRICK_MAX_ORDER+1),
|
||||
VEC_ENTRY("mem_alloc_max", brick_mem_alloc_count, 0600, BRICK_MAX_ORDER+1),
|
||||
VEC_INT_ENTRY("mem_freelist_max", brick_mem_freelist_max, 0600, BRICK_MAX_ORDER+1),
|
||||
VEC_INT_ENTRY("mem_alloc_count", brick_mem_alloc_count, 0400, BRICK_MAX_ORDER+1),
|
||||
VEC_INT_ENTRY("mem_alloc_max", brick_mem_alloc_count, 0600, BRICK_MAX_ORDER+1),
|
||||
#endif
|
||||
INT_ENTRY("io_flying_count", mars_global_io_flying, 0400),
|
||||
INT_ENTRY("copy_overlap", mars_copy_overlap, 0600),
|
||||
|
|
Loading…
Reference in New Issue