From c575f28fbe769361e29625c84e911620ce5cab59 Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Mon, 17 Aug 2020 18:03:29 +0200 Subject: [PATCH] infra: fix IOPS and other computations --- kernel/lib_limiter.c | 186 ++++++++++++++++++++++---------------- kernel/lib_limiter.h | 21 +++-- kernel/sy_old/mars_proc.c | 35 ++++--- 3 files changed, 139 insertions(+), 103 deletions(-) diff --git a/kernel/lib_limiter.c b/kernel/lib_limiter.c index fe9b627c..8fd55b4a 100644 --- a/kernel/lib_limiter.c +++ b/kernel/lib_limiter.c @@ -28,8 +28,18 @@ #include #include +/* For precisions, _internal_ time is in multiples of the following basic time units */ + #define LIMITER_TIME_RESOLUTION NSEC_PER_SEC +#define DEFAULT_MIN_WINDOW (LIMITER_TIME_RESOLUTION * 1) +#define DEFAULT_MAX_WINDOW (LIMITER_TIME_RESOLUTION * 4) + +#define MAX_DIVIDER (DEFAULT_MIN_WINDOW / 10) + +#define MS_TO_TR(x) ((__s64)(x) * (LIMITER_TIME_RESOLUTION / 1000)) +#define TR_TO_MS(x) ((x) / (LIMITER_TIME_RESOLUTION / 1000)) + int mars_limit(struct mars_limiter *lim, int amount) { int delay = 0; @@ -45,17 +55,22 @@ int mars_limit(struct mars_limiter *lim, int amount) */ while (lim != NULL) { struct lamport_time diff = lamport_time_sub(now, lim->lim_stamp); - s64 window = lamport_time_to_ns(&diff); + __s64 window = lamport_time_to_ns(&diff); + __s64 rate_raw; + int rate; + int max_rate; /* Sometimes, raw CPU clocks may do weired things... - * Smaller windows in the denominator than 1s could fake unrealistic rates. + * Small windows in the denominator could fake unrealistic rates. + * Do not divide by too small numbers. */ - if (unlikely(lim->lim_min_window <= 0)) - lim->lim_min_window = 1000; - if (unlikely(lim->lim_max_window <= lim->lim_min_window)) - lim->lim_max_window = lim->lim_min_window + 8000; - if (unlikely(window < (long long)lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000))) - window = (long long)lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000); + if (window < MAX_DIVIDER) + window = MAX_DIVIDER; + + if (unlikely(lim->lim_min_window_ms <= TR_TO_MS(MAX_DIVIDER))) + lim->lim_min_window_ms = TR_TO_MS(DEFAULT_MIN_WINDOW); + if (unlikely(lim->lim_max_window_ms <= lim->lim_min_window_ms)) + lim->lim_max_window_ms = lim->lim_min_window_ms + TR_TO_MS(DEFAULT_MAX_WINDOW); /* Update total statistics. * They will intentionally wrap around. @@ -69,90 +84,101 @@ int mars_limit(struct mars_limiter *lim, int amount) /* Only use incremental accumulation at repeated calls, but * never after longer pauses. */ - if (likely(lim->lim_stamp.tv_sec && - window < (long long)lim->lim_max_window * (LIMITER_TIME_RESOLUTION / 1000))) { - long long rate_raw; - int rate; - int max_rate; - - /* Races are possible, but taken into account. - * There is no real harm from rarely lost updates. + if (!lim->lim_stamp.tv_sec || + window > MS_TO_TR(lim->lim_max_window_ms)) { + /* reset, start over with new measurement cycle */ + memset(&diff, 0, sizeof(diff)); + lim->lim_stamp = now; + lim->lim_ops_accu = 0; + lim->lim_amount_accu = 0; + lim->lim_ops_rate = 0; + lim->lim_amount_rate = 0; + window = MAX_DIVIDER; + } else { + __s64 diff_window; + + /* Try to keep the window between min_window and 2 * min_window. + * We wait until min_window has been exceeded _twice_, + * and then reduce the window by only 1 * min_window. */ - if (likely(amount > 0)) { - lim->lim_amount_accu += amount; - lim->lim_amount_cumul += amount; - lim->lim_ops_accu++; - lim->lim_ops_cumul++; - } + diff_window = window - MS_TO_TR(lim->lim_min_window_ms); + if (diff_window > MS_TO_TR(lim->lim_min_window_ms)) { + __s64 used_up; + __s64 add_window = 0; - /* compute amount values */ - rate_raw = lim->lim_amount_accu * LIMITER_TIME_RESOLUTION / window; - rate = rate_raw; - if (unlikely(rate_raw > INT_MAX)) { - rate = INT_MAX; - } - lim->lim_amount_rate = rate; - - /* amount limit exceeded? */ - max_rate = lim->lim_max_amount_rate; - if (max_rate > 0 && rate > max_rate) { - int this_delay = (window * rate / max_rate - window) / (LIMITER_TIME_RESOLUTION / 1000); - // compute maximum - if (this_delay > delay && this_delay > 0) - delay = this_delay; - } - - /* compute ops values */ - rate_raw = lim->lim_ops_accu * LIMITER_TIME_RESOLUTION / window; - rate = rate_raw; - if (unlikely(rate_raw > INT_MAX)) { - rate = INT_MAX; - } - lim->lim_ops_rate = rate; - - /* ops limit exceeded? */ - max_rate = lim->lim_max_ops_rate; - if (max_rate > 0 && rate > max_rate) { - int this_delay = (window * rate / max_rate - window) / (LIMITER_TIME_RESOLUTION / 1000); - - // compute maximum - if (this_delay > delay && this_delay > 0) - delay = this_delay; - } - - /* Try to keep the next window below min_window - */ - window -= lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000); - if (window > 0) { - long long used_up = (long long)lim->lim_amount_rate * window / LIMITER_TIME_RESOLUTION; + used_up = lim->lim_amount_accu * diff_window / window; if (used_up > 0) { - lamport_time_add_ns(&lim->lim_stamp, window); + add_window = diff_window; lim->lim_amount_accu -= used_up; if (unlikely(lim->lim_amount_accu < 0)) lim->lim_amount_accu = 0; } - used_up = (long long)lim->lim_ops_rate * window / LIMITER_TIME_RESOLUTION; + + used_up = lim->lim_ops_accu * diff_window / window; if (used_up > 0) { - lamport_time_add_ns(&lim->lim_stamp, window); + if (diff_window > add_window) + add_window = diff_window; lim->lim_ops_accu -= used_up; if (unlikely(lim->lim_ops_accu < 0)) lim->lim_ops_accu = 0; } + + if (add_window > 0) { + lamport_time_add_ns(&lim->lim_stamp, add_window); + /* recompute the new window */ + diff = lamport_time_sub(now, lim->lim_stamp); + window = lamport_time_to_ns(&diff); + } } - } else { // reset, start over with new measurement cycle - struct lamport_time sub = ns_to_lamport_time(lim->lim_min_window * (LIMITER_TIME_RESOLUTION / 1000)); - - if (unlikely(amount < 0)) - amount = 0; - lim->lim_ops_accu = 1; - lim->lim_amount_accu = amount; - lim->lim_stamp = lamport_time_sub(now, sub); - lim->lim_ops_rate = 0; - lim->lim_amount_rate = 0; } + + /* Races are possible, but taken into account. + * There is no real harm from rarely lost updates. + */ + if (likely(amount > 0)) { + lim->lim_amount_accu += amount; + lim->lim_ops_accu++; + } + + /* compute amount values */ + rate_raw = lim->lim_amount_accu * LIMITER_TIME_RESOLUTION / window; + rate = rate_raw; + if (unlikely(rate_raw > INT_MAX)) { + rate = INT_MAX; + } + lim->lim_amount_rate = rate; + + /* amount limit exceeded? */ + max_rate = lim->lim_max_amount_rate; + if (max_rate > 0 && rate > max_rate) { + int this_delay = (window * rate / max_rate - window); + + // compute maximum + if (this_delay > delay && this_delay > 0) + delay = this_delay; + } + + /* compute ops values */ + rate_raw = lim->lim_ops_accu * LIMITER_TIME_RESOLUTION / window; + rate = rate_raw; + if (unlikely(rate_raw > INT_MAX)) { + rate = INT_MAX; + } + lim->lim_ops_rate = rate; + + /* ops limit exceeded? */ + max_rate = lim->lim_max_ops_rate; + if (max_rate > 0 && rate > max_rate) { + int this_delay = (window * rate / max_rate - window); + + // compute maximum + if (this_delay > delay && this_delay > 0) + delay = this_delay; + } + lim = lim->lim_father; } - return delay; + return TR_TO_MS(delay); } void mars_limit_sleep(struct mars_limiter *lim, int amount) @@ -160,10 +186,10 @@ void mars_limit_sleep(struct mars_limiter *lim, int amount) int sleep = mars_limit(lim, amount); if (sleep > 0) { - if (unlikely(lim->lim_max_delay <= 0)) - lim->lim_max_delay = 1000; - if (sleep > lim->lim_max_delay) - sleep = lim->lim_max_delay; + if (unlikely(lim->lim_max_delay_ms <= 0)) + lim->lim_max_delay_ms = 1000; + if (sleep > lim->lim_max_delay_ms) + sleep = lim->lim_max_delay_ms; brick_msleep(sleep); } } diff --git a/kernel/lib_limiter.h b/kernel/lib_limiter.h index 960c3837..4dd1310b 100644 --- a/kernel/lib_limiter.h +++ b/kernel/lib_limiter.h @@ -29,26 +29,29 @@ #include +/* Units: + * Time is in ms + * Rates are in units / s + */ + struct mars_limiter { /* hierarchy tree */ struct mars_limiter *lim_father; /* tunables */ int lim_max_ops_rate; int lim_max_amount_rate; - int lim_max_delay; - int lim_min_window; - int lim_max_window; + int lim_max_delay_ms; + int lim_min_window_ms; + int lim_max_window_ms; /* readable */ int lim_ops_rate; int lim_amount_rate; - int lim_ops_cumul; - int lim_amount_cumul; - int lim_total_ops; - int lim_total_amount; + unsigned long lim_total_ops; + unsigned long lim_total_amount; struct lamport_time lim_stamp; /* internal */ - long long lim_ops_accu; - long long lim_amount_accu; + __s64 lim_ops_accu; + __s64 lim_amount_accu; }; extern int mars_limit(struct mars_limiter *lim, int amount); diff --git a/kernel/sy_old/mars_proc.c b/kernel/sy_old/mars_proc.c index 9917ba8e..f9d8b706 100644 --- a/kernel/sy_old/mars_proc.c +++ b/kernel/sy_old/mars_proc.c @@ -256,30 +256,37 @@ done: #define _CTL_STRATEGY(handler) /*empty*/ #endif -#define VEC_ENTRY(NAME,VAR,MODE,COUNT) \ +#define _VEC_ENTRY(NAME,VAR,TYPE,HANDLER,MODE,COUNT) \ { \ _CTL_NAME \ .procname = NAME, \ .data = &(VAR), \ - .maxlen = sizeof(int) * (COUNT),\ + .maxlen = sizeof(TYPE) * (COUNT),\ .mode = MODE, \ - .proc_handler = &proc_dointvec, \ + .proc_handler = &HANDLER, \ _CTL_STRATEGY(sysctl_intvec) \ } +#define VEC_INT_ENTRY(NAME,VAR,MODE,COUNT) \ + _VEC_ENTRY(NAME,VAR,int,proc_dointvec,MODE,COUNT) + #define INT_ENTRY(NAME,VAR,MODE) \ - VEC_ENTRY(NAME, VAR, MODE, 1) + VEC_INT_ENTRY(NAME, VAR, MODE, 1) + +#define VEC_ULONG_ENTRY(NAME,VAR,MODE,COUNT) \ + _VEC_ENTRY(NAME,VAR,unsigned long,proc_doulongvec_minmax,MODE,COUNT) + +#define ULONG_ENTRY(NAME,VAR,MODE) \ + VEC_ULONG_ENTRY(NAME, VAR, MODE, 1) #define LIMITER_ENTRIES(VAR, PREFIX, SUFFIX) \ - INT_ENTRY(PREFIX "_total_ops", (VAR)->lim_total_ops, 0400), \ - INT_ENTRY(PREFIX "_total_" SUFFIX, (VAR)->lim_total_amount, 0400), \ + ULONG_ENTRY(PREFIX "_total_ops", (VAR)->lim_total_ops, 0400), \ + ULONG_ENTRY(PREFIX "_total_" SUFFIX, (VAR)->lim_total_amount, 0400), \ INT_ENTRY(PREFIX "_ratelimit_ops", (VAR)->lim_max_ops_rate, 0600), \ INT_ENTRY(PREFIX "_ratelimit_" SUFFIX, (VAR)->lim_max_amount_rate, 0600), \ - INT_ENTRY(PREFIX "_maxdelay_ms", (VAR)->lim_max_delay,0600), \ - INT_ENTRY(PREFIX "_minwindow_ms", (VAR)->lim_min_window,0600), \ - INT_ENTRY(PREFIX "_maxwindow_ms", (VAR)->lim_max_window,0600), \ - INT_ENTRY(PREFIX "_cumul_ops", (VAR)->lim_ops_cumul, 0600), \ - INT_ENTRY(PREFIX "_cumul_" SUFFIX, (VAR)->lim_amount_cumul, 0600), \ + INT_ENTRY(PREFIX "_maxdelay_ms", (VAR)->lim_max_delay_ms, 0600), \ + INT_ENTRY(PREFIX "_minwindow_ms", (VAR)->lim_min_window_ms, 0600), \ + INT_ENTRY(PREFIX "_maxwindow_ms", (VAR)->lim_max_window_ms, 0600), \ INT_ENTRY(PREFIX "_rate_ops", (VAR)->lim_ops_rate, 0400), \ INT_ENTRY(PREFIX "_rate_" SUFFIX, (VAR)->lim_amount_rate, 0400) \ @@ -400,9 +407,9 @@ struct ctl_table mars_table[] = { INT_ENTRY("mem_used_raw_kb", brick_global_block_used,0400), #ifdef CONFIG_MARS_MEM_PREALLOC INT_ENTRY("mem_allow_freelist", brick_allow_freelist, 0600), - VEC_ENTRY("mem_freelist_max", brick_mem_freelist_max, 0600, BRICK_MAX_ORDER+1), - VEC_ENTRY("mem_alloc_count", brick_mem_alloc_count, 0400, BRICK_MAX_ORDER+1), - VEC_ENTRY("mem_alloc_max", brick_mem_alloc_count, 0600, BRICK_MAX_ORDER+1), + VEC_INT_ENTRY("mem_freelist_max", brick_mem_freelist_max, 0600, BRICK_MAX_ORDER+1), + VEC_INT_ENTRY("mem_alloc_count", brick_mem_alloc_count, 0400, BRICK_MAX_ORDER+1), + VEC_INT_ENTRY("mem_alloc_max", brick_mem_alloc_count, 0600, BRICK_MAX_ORDER+1), #endif INT_ENTRY("io_flying_count", mars_global_io_flying, 0400), INT_ENTRY("copy_overlap", mars_copy_overlap, 0600),