mirror of https://github.com/schoebel/mars
all: IO scheduling improvements, tuning
Dastically boost random write performance on RAID controllers with BBUs. Writeback is only performed when there is no IO contention / starvation. The old IO contention controller was suited to workstations. Now server loads are well-controlled even when BBUs are present.
This commit is contained in:
parent
d510dd1a3b
commit
c319230fa1
|
@ -11,7 +11,6 @@
|
|||
atomic_t q_total; \
|
||||
/* tunables */ \
|
||||
int q_batchlen; \
|
||||
int q_max_flying; \
|
||||
int q_io_prio; \
|
||||
bool q_ordering; \
|
||||
/* private */ \
|
||||
|
|
23
lib_rank.c
23
lib_rank.c
|
@ -11,28 +11,27 @@
|
|||
#include "mars.h"
|
||||
#include "lib_rank.h"
|
||||
|
||||
void ranking_compute(struct rank_data *rkd, const struct rank_info rki[], int rki_count, int x)
|
||||
void ranking_compute(struct rank_data *rkd, const struct rank_info rki[], int x)
|
||||
{
|
||||
int i;
|
||||
|
||||
MARS_IO("rki_count = %d at x = %d\n", rki_count, x);
|
||||
|
||||
BUG_ON(rki_count < 2);
|
||||
|
||||
rki_count--;
|
||||
for (i = 0; i < rki_count; i++) {
|
||||
for (i = 0; ; i++) {
|
||||
int x0 = rki[i].rki_x;
|
||||
int x1;
|
||||
int y0;
|
||||
int y1;
|
||||
int points;
|
||||
|
||||
if (x0 == RKI_DUMMY)
|
||||
break;
|
||||
|
||||
if (x < x0 && i+1 < rki_count)
|
||||
if (x < x0)
|
||||
continue;
|
||||
|
||||
x1 = rki[i+1].rki_x;
|
||||
|
||||
BUG_ON(x1 == x0);
|
||||
if (x1 == RKI_DUMMY)
|
||||
break;
|
||||
|
||||
y0 = rki[i].rki_y;
|
||||
y1 = rki[i+1].rki_y;
|
||||
|
@ -59,7 +58,7 @@ int ranking_select(struct rank_data rkd[], int rkd_count)
|
|||
int rest = tmp->rkd_current_points;
|
||||
if (rest <= 0)
|
||||
continue;
|
||||
rest -= tmp->rkd_got;
|
||||
//rest -= tmp->rkd_got;
|
||||
if (rest > max) {
|
||||
max = rest;
|
||||
res = i;
|
||||
|
@ -69,9 +68,9 @@ int ranking_select(struct rank_data rkd[], int rkd_count)
|
|||
* and reset the "clocks" after each round of
|
||||
* weighted round-robin selection.
|
||||
*/
|
||||
if (max <= 0 && res >= 0) {
|
||||
if (max < 0 && res >= 0) {
|
||||
for (i = 0; i < rkd_count; i++)
|
||||
rkd[i].rkd_got -= rkd[i].rkd_current_points;
|
||||
rkd[i].rkd_got += max;
|
||||
}
|
||||
MARS_IO("res = %d\n", res);
|
||||
return res;
|
||||
|
|
16
lib_rank.h
16
lib_rank.h
|
@ -7,6 +7,8 @@
|
|||
/* Generic round-robin scheduler based on ranking information.
|
||||
*/
|
||||
|
||||
#define RKI_DUMMY INT_MIN
|
||||
|
||||
struct rank_info {
|
||||
int rki_x;
|
||||
int rki_y;
|
||||
|
@ -41,8 +43,10 @@ struct rank_data {
|
|||
* Important: the rki[] array describes a ranking function at some
|
||||
* example points (x_i,y_i) which must be ordered according to x_i
|
||||
* in ascending order. And, of course, you need to supply at least
|
||||
* rki_count >= 2 sample points (otherwise a linear function cannot
|
||||
* two sample points (otherwise a linear function cannot
|
||||
* be described).
|
||||
* The array _must_ always end with a dummy record where the x_i has the
|
||||
* value RKI_DUMMY.
|
||||
*/
|
||||
|
||||
extern inline
|
||||
|
@ -54,7 +58,7 @@ void ranking_start(struct rank_data rkd[], int rkd_count)
|
|||
}
|
||||
}
|
||||
|
||||
extern void ranking_compute(struct rank_data *rkd, const struct rank_info rki[], int rki_count, int x);
|
||||
extern void ranking_compute(struct rank_data *rkd, const struct rank_info rki[], int x);
|
||||
|
||||
/* This may be used to (exceptionally) add some extra salt...
|
||||
*/
|
||||
|
@ -105,9 +109,11 @@ extern int ranking_select(struct rank_data rkd[], int rkd_count);
|
|||
extern inline
|
||||
void ranking_select_done(struct rank_data rkd[], int winner, int win_points)
|
||||
{
|
||||
if (win_points < 1)
|
||||
win_points = 1;
|
||||
rkd[winner].rkd_got += win_points;
|
||||
if (winner >= 0) {
|
||||
if (win_points < 1)
|
||||
win_points = 1;
|
||||
rkd[winner].rkd_got += win_points;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
8
mars.h
8
mars.h
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "brick.h"
|
||||
#include "brick_mem.h"
|
||||
#include "lib_timing.h"
|
||||
|
||||
#define GFP_MARS GFP_BRICK
|
||||
|
||||
|
@ -264,6 +265,13 @@ extern void (*_mars_remote_trigger)(void);
|
|||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Some global stuff.
|
||||
*/
|
||||
|
||||
extern struct banning mars_global_ban;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* Some special brick types for avoidance of cyclic references.
|
||||
*
|
||||
* The client/server network bricks use this for independent instantiation
|
||||
|
|
213
mars_aio.c
213
mars_aio.c
|
@ -18,48 +18,47 @@
|
|||
#include "mars.h"
|
||||
#include "lib_timing.h"
|
||||
|
||||
#include "mars_aio.h"
|
||||
|
||||
#define MARS_MAX_AIO 1024
|
||||
#define MARS_MAX_AIO_READ 32
|
||||
|
||||
#define MEASURE_SYNC 8
|
||||
static struct timing_stats timings[3] = {};
|
||||
|
||||
static struct timing_stats timings[2] = {};
|
||||
struct threshold aio_submit_threshold = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = AIO_SUBMIT_MAX_LATENCY,
|
||||
.thr_factor = 10,
|
||||
.thr_plus = 10000,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(aio_submit_threshold);
|
||||
|
||||
struct threshold aio_io_threshold[2] = {
|
||||
[0] = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = AIO_IO_R_MAX_LATENCY,
|
||||
.thr_factor = 100,
|
||||
.thr_plus = 0,
|
||||
},
|
||||
[1] = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = AIO_IO_W_MAX_LATENCY,
|
||||
.thr_factor = 100,
|
||||
.thr_plus = 0,
|
||||
},
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(aio_io_threshold);
|
||||
|
||||
struct threshold aio_sync_threshold = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = AIO_SYNC_MAX_LATENCY,
|
||||
.thr_factor = 100,
|
||||
.thr_plus = 0,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(aio_sync_threshold);
|
||||
|
||||
///////////////////////// own type definitions ////////////////////////
|
||||
|
||||
#include "mars_aio.h"
|
||||
|
||||
#ifdef MEASURE_SYNC
|
||||
static int sync_ticks[MEASURE_SYNC] = {};
|
||||
|
||||
static void measure_sync(int ticks)
|
||||
{
|
||||
int order = ticks;
|
||||
if (ticks > 1) {
|
||||
order = MEASURE_SYNC - 1;
|
||||
while (order > 0 && (1 << (order-1)) >= ticks) {
|
||||
order--;
|
||||
}
|
||||
order++;
|
||||
}
|
||||
sync_ticks[order]++;
|
||||
}
|
||||
|
||||
static char *show_sync(void)
|
||||
{
|
||||
char *res = brick_string_alloc(0);
|
||||
int i;
|
||||
int pos = 0;
|
||||
if (!res)
|
||||
return NULL;
|
||||
for (i = 0; i < MEASURE_SYNC; i++) {
|
||||
pos += snprintf(res + pos, 256, "%d: %d ", i, sync_ticks[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
////////////////// some helpers //////////////////
|
||||
|
||||
static inline
|
||||
|
@ -77,6 +76,8 @@ void _enqueue(struct aio_threadinfo *tinfo, struct aio_mref_aspect *mref_a, int
|
|||
prio = 0;
|
||||
#endif
|
||||
|
||||
mref_a->enqueue_stamp = cpu_clock(raw_smp_processor_id());
|
||||
|
||||
traced_lock(&tinfo->lock, flags);
|
||||
|
||||
if (at_end) {
|
||||
|
@ -84,38 +85,45 @@ void _enqueue(struct aio_threadinfo *tinfo, struct aio_mref_aspect *mref_a, int
|
|||
} else {
|
||||
list_add(&mref_a->io_head, &tinfo->mref_list[prio]);
|
||||
}
|
||||
tinfo->queued[prio]++;
|
||||
tinfo->queued_sum++;
|
||||
|
||||
traced_unlock(&tinfo->lock, flags);
|
||||
|
||||
atomic_inc(&tinfo->total_enqueue_count);
|
||||
|
||||
wake_up_interruptible_all(&tinfo->event);
|
||||
}
|
||||
|
||||
static inline
|
||||
struct aio_mref_aspect *_dequeue(struct aio_threadinfo *tinfo, bool do_remove)
|
||||
struct aio_mref_aspect *_dequeue(struct aio_threadinfo *tinfo)
|
||||
{
|
||||
struct aio_mref_aspect *mref_a = NULL;
|
||||
int prio;
|
||||
unsigned long flags = 0;
|
||||
|
||||
if (do_remove)
|
||||
traced_lock(&tinfo->lock, flags);
|
||||
traced_lock(&tinfo->lock, flags);
|
||||
|
||||
for (prio = 0; prio < MARS_PRIO_NR; prio++) {
|
||||
struct list_head *start = &tinfo->mref_list[prio];
|
||||
struct list_head *tmp = start->next;
|
||||
if (tmp != start) {
|
||||
if (do_remove) {
|
||||
list_del_init(tmp);
|
||||
atomic_inc(&tinfo->total_dequeue_count);
|
||||
}
|
||||
list_del_init(tmp);
|
||||
tinfo->queued[prio]--;
|
||||
tinfo->queued_sum--;
|
||||
mref_a = container_of(tmp, struct aio_mref_aspect, io_head);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (do_remove)
|
||||
traced_unlock(&tinfo->lock, flags);
|
||||
traced_unlock(&tinfo->lock, flags);
|
||||
|
||||
if (likely(mref_a && mref_a->object)) {
|
||||
unsigned long long latency;
|
||||
latency = cpu_clock(raw_smp_processor_id()) - mref_a->enqueue_stamp;
|
||||
threshold_check(&aio_io_threshold[mref_a->object->ref_rw & 1], latency);
|
||||
}
|
||||
return mref_a;
|
||||
}
|
||||
|
||||
|
@ -272,8 +280,6 @@ static void aio_ref_io(struct aio_output *output, struct mref_object *mref)
|
|||
}
|
||||
|
||||
_enqueue(tinfo, mref_a, mref->ref_prio, true);
|
||||
|
||||
wake_up_interruptible_all(&tinfo->event);
|
||||
return;
|
||||
|
||||
done:
|
||||
|
@ -295,16 +301,20 @@ static int aio_submit(struct aio_output *output, struct aio_mref_aspect *mref_a,
|
|||
// .aio_reqprio = something(mref->ref_prio) field exists, but not yet implemented in kernelspace :(
|
||||
};
|
||||
struct iocb *iocbp = &iocb;
|
||||
unsigned long long latency;
|
||||
|
||||
mars_trace(mref, "aio_submit");
|
||||
|
||||
oldfs = get_fs();
|
||||
set_fs(get_ds());
|
||||
TIME_STATS(&timings[mref->ref_rw & 1], res = sys_io_submit(output->ctxp, 1, &iocbp));
|
||||
latency = TIME_STATS(&timings[mref->ref_rw & 1], res = sys_io_submit(output->ctxp, 1, &iocbp));
|
||||
set_fs(oldfs);
|
||||
|
||||
threshold_check(&aio_submit_threshold, latency);
|
||||
|
||||
if (res < 0 && res != -EAGAIN)
|
||||
MARS_ERR("error = %d\n", res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -327,10 +337,12 @@ static int aio_submit_dummy(struct aio_output *output)
|
|||
}
|
||||
|
||||
static
|
||||
int aio_start_thread(struct aio_output *output, int i, int(*fn)(void*))
|
||||
int aio_start_thread(
|
||||
struct aio_output *output,
|
||||
struct aio_threadinfo *tinfo,
|
||||
int(*fn)(void*),
|
||||
char class)
|
||||
{
|
||||
static int index = 0;
|
||||
struct aio_threadinfo *tinfo = &output->tinfo[i];
|
||||
int j;
|
||||
|
||||
for (j = 0; j < MARS_PRIO_NR; j++) {
|
||||
|
@ -339,8 +351,9 @@ int aio_start_thread(struct aio_output *output, int i, int(*fn)(void*))
|
|||
tinfo->output = output;
|
||||
spin_lock_init(&tinfo->lock);
|
||||
init_waitqueue_head(&tinfo->event);
|
||||
init_waitqueue_head(&tinfo->terminate_event);
|
||||
tinfo->terminated = false;
|
||||
tinfo->thread = kthread_create(fn, tinfo, "mars_%daio%d", i, index++);
|
||||
tinfo->thread = kthread_create(fn, tinfo, "mars_aio_%c%d", class, output->index);
|
||||
if (IS_ERR(tinfo->thread)) {
|
||||
int err = PTR_ERR(tinfo->thread);
|
||||
MARS_ERR("cannot create thread\n");
|
||||
|
@ -415,7 +428,7 @@ void aio_stop_thread(struct aio_output *output, int i, bool do_submit_dummy)
|
|||
// wait for termination
|
||||
MARS_INF("waiting for thread %d ...\n", i);
|
||||
wait_event_interruptible_timeout(
|
||||
tinfo->event,
|
||||
tinfo->terminate_event,
|
||||
tinfo->terminated,
|
||||
(60 - i * 2) * HZ);
|
||||
if (likely(tinfo->terminated)) {
|
||||
|
@ -434,29 +447,28 @@ static
|
|||
int aio_sync(struct file *file)
|
||||
{
|
||||
int err;
|
||||
#ifdef MEASURE_SYNC
|
||||
long long old_jiffies = jiffies;
|
||||
#endif
|
||||
|
||||
err = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX);
|
||||
|
||||
|
||||
#ifdef MEASURE_SYNC
|
||||
measure_sync(jiffies - old_jiffies);
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
||||
static
|
||||
void aio_sync_all(struct aio_output *output, struct list_head *tmp_list)
|
||||
{
|
||||
unsigned long long latency;
|
||||
int err;
|
||||
|
||||
output->fdsync_active = true;
|
||||
atomic_inc(&output->total_fdsync_count);
|
||||
|
||||
err = aio_sync(output->filp);
|
||||
latency = TIME_STATS(
|
||||
&timings[2],
|
||||
err = aio_sync(output->filp)
|
||||
);
|
||||
|
||||
threshold_check(&aio_sync_threshold, latency);
|
||||
|
||||
output->fdsync_active = false;
|
||||
wake_up_interruptible_all(&output->fdsync_event);
|
||||
if (err < 0) {
|
||||
|
@ -529,7 +541,7 @@ int aio_sync_thread(void *data)
|
|||
MARS_INF("kthread has started on '%s'.\n", output->brick->brick_path);
|
||||
//set_user_nice(current, -20);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
while (!kthread_should_stop() || tinfo->queued_sum > 0) {
|
||||
LIST_HEAD(tmp_list);
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
@ -539,9 +551,8 @@ int aio_sync_thread(void *data)
|
|||
|
||||
wait_event_interruptible_timeout(
|
||||
tinfo->event,
|
||||
kthread_should_stop() ||
|
||||
_dequeue(tinfo, false),
|
||||
1 * HZ);
|
||||
tinfo->queued_sum > 0,
|
||||
HZ / 4);
|
||||
|
||||
traced_lock(&tinfo->lock, flags);
|
||||
for (i = 0; i < MARS_PRIO_NR; i++) {
|
||||
|
@ -549,6 +560,8 @@ int aio_sync_thread(void *data)
|
|||
if (!list_empty(start)) {
|
||||
// move over the whole list
|
||||
list_replace_init(start, &tmp_list);
|
||||
tinfo->queued_sum -= tinfo->queued[i];
|
||||
tinfo->queued[i] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -568,7 +581,7 @@ int aio_sync_thread(void *data)
|
|||
|
||||
MARS_INF("kthread has stopped.\n");
|
||||
tinfo->terminated = true;
|
||||
wake_up_interruptible_all(&tinfo->event);
|
||||
wake_up_interruptible_all(&tinfo->terminate_event);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -586,17 +599,16 @@ static int aio_event_thread(void *data)
|
|||
if (!current->mm)
|
||||
goto err;
|
||||
|
||||
err = aio_start_thread(output, 2, aio_sync_thread);
|
||||
err = aio_start_thread(output, &output->tinfo[2], aio_sync_thread, 'y');
|
||||
if (unlikely(err < 0))
|
||||
goto err;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
while (!kthread_should_stop() || tinfo->queued_sum > 0) {
|
||||
mm_segment_t oldfs;
|
||||
int count;
|
||||
int bounced;
|
||||
int i;
|
||||
struct timespec timeout = {
|
||||
.tv_sec = 10,
|
||||
.tv_sec = 1,
|
||||
};
|
||||
struct io_event events[MARS_MAX_AIO_READ];
|
||||
|
||||
|
@ -609,7 +621,6 @@ static int aio_event_thread(void *data)
|
|||
set_fs(oldfs);
|
||||
|
||||
//MARS_INF("count = %d\n", count);
|
||||
bounced = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
struct aio_mref_aspect *mref_a = (void*)events[i].data;
|
||||
struct mref_object *mref;
|
||||
|
@ -631,7 +642,6 @@ static int aio_event_thread(void *data)
|
|||
if (!output->filp->f_op->aio_fsync) {
|
||||
mars_trace(mref, "aio_fsync");
|
||||
_enqueue(other, mref_a, mref->ref_prio, true);
|
||||
bounced++;
|
||||
continue;
|
||||
}
|
||||
err = aio_submit(output, mref_a, true);
|
||||
|
@ -642,8 +652,6 @@ static int aio_event_thread(void *data)
|
|||
_complete(output, mref, err);
|
||||
|
||||
}
|
||||
if (bounced)
|
||||
wake_up_interruptible_all(&other->event);
|
||||
}
|
||||
err = 0;
|
||||
|
||||
|
@ -655,7 +663,7 @@ static int aio_event_thread(void *data)
|
|||
unuse_fake_mm();
|
||||
|
||||
tinfo->terminated = true;
|
||||
wake_up_interruptible_all(&tinfo->event);
|
||||
wake_up_interruptible_all(&tinfo->terminate_event);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -759,11 +767,11 @@ static int aio_submit_thread(void *data)
|
|||
if (unlikely(err < 0))
|
||||
goto cleanup_mm;
|
||||
|
||||
err = aio_start_thread(output, 1, aio_event_thread);
|
||||
err = aio_start_thread(output, &output->tinfo[1], aio_event_thread, 'e');
|
||||
if (unlikely(err < 0))
|
||||
goto cleanup_ctxp;
|
||||
|
||||
while (!kthread_should_stop() || atomic_read(&output->read_count) > 0 || atomic_read(&output->write_count) > 0) {
|
||||
while (!kthread_should_stop() || atomic_read(&output->read_count) + atomic_read(&output->write_count) + tinfo->queued_sum > 0) {
|
||||
struct aio_mref_aspect *mref_a;
|
||||
struct mref_object *mref;
|
||||
int sleeptime;
|
||||
|
@ -773,11 +781,10 @@ static int aio_submit_thread(void *data)
|
|||
|
||||
wait_event_interruptible_timeout(
|
||||
tinfo->event,
|
||||
kthread_should_stop() ||
|
||||
_dequeue(tinfo, false),
|
||||
HZ);
|
||||
tinfo->queued_sum > 0,
|
||||
HZ / 4);
|
||||
|
||||
mref_a = _dequeue(tinfo, true);
|
||||
mref_a = _dequeue(tinfo);
|
||||
if (!mref_a) {
|
||||
continue;
|
||||
}
|
||||
|
@ -803,7 +810,7 @@ static int aio_submit_thread(void *data)
|
|||
mref_a->start_jiffies = jiffies;
|
||||
}
|
||||
if ((long long)jiffies - mref_a->start_jiffies <= mref->ref_timeout) {
|
||||
if (!_dequeue(tinfo, false)) {
|
||||
if (!tinfo->queued_sum) {
|
||||
atomic_inc(&output->total_msleep_count);
|
||||
brick_msleep(1000 * 4 / HZ);
|
||||
}
|
||||
|
@ -816,9 +823,9 @@ static int aio_submit_thread(void *data)
|
|||
}
|
||||
}
|
||||
|
||||
sleeptime = 1000 / HZ;
|
||||
sleeptime = 1;
|
||||
for (;;) {
|
||||
/* This is just a test. Don't use it for performance reasons.
|
||||
/* This is just a test. Don't enable it for performance reasons.
|
||||
*/
|
||||
if (output->brick->wait_during_fdsync && mref->ref_rw != READ) {
|
||||
if (output->fdsync_active) {
|
||||
|
@ -826,7 +833,7 @@ static int aio_submit_thread(void *data)
|
|||
atomic_inc(&output->total_fdsync_wait_count);
|
||||
__wait_event_interruptible_timeout(
|
||||
output->fdsync_event,
|
||||
!output->fdsync_active || kthread_should_stop(),
|
||||
!output->fdsync_active,
|
||||
delay);
|
||||
}
|
||||
|
||||
|
@ -842,7 +849,7 @@ static int aio_submit_thread(void *data)
|
|||
atomic_inc(&output->total_delay_count);
|
||||
brick_msleep(sleeptime);
|
||||
if (sleeptime < 100) {
|
||||
sleeptime += 1000 / HZ;
|
||||
sleeptime++;
|
||||
}
|
||||
}
|
||||
err:
|
||||
|
@ -876,7 +883,7 @@ cleanup_fd:
|
|||
done:
|
||||
MARS_DBG("status = %d\n", err);
|
||||
tinfo->terminated = true;
|
||||
wake_up_interruptible_all(&tinfo->event);
|
||||
wake_up_interruptible_all(&tinfo->terminate_event);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -904,12 +911,9 @@ char *aio_statistics(struct aio_brick *brick, int verbose)
|
|||
if (!res)
|
||||
return NULL;
|
||||
|
||||
#ifdef MEASURE_SYNC
|
||||
sync = show_sync();
|
||||
#endif
|
||||
|
||||
pos += report_timing(&timings[0], res + pos, 4096 - pos);
|
||||
pos += report_timing(&timings[1], res + pos, 4096 - pos);
|
||||
pos += report_timing(&timings[2], res + pos, 4096 - pos);
|
||||
|
||||
snprintf(res + pos, 4096 - pos,
|
||||
"total "
|
||||
|
@ -927,10 +931,14 @@ char *aio_statistics(struct aio_brick *brick, int verbose)
|
|||
"flying reads = %d "
|
||||
"writes = %d "
|
||||
"allocs = %d "
|
||||
"q0 = %d (%d - %d) "
|
||||
"q1 = %d (%d - %d) "
|
||||
"q2 = %d (%d - %d) |"
|
||||
" %s\n",
|
||||
"q0 = %d "
|
||||
"q1 = %d "
|
||||
"q2 = %d "
|
||||
"| total "
|
||||
"q0 = %d "
|
||||
"q1 = %d "
|
||||
"q2 = %d "
|
||||
"%s\n",
|
||||
atomic_read(&output->total_read_count),
|
||||
atomic_read(&output->total_write_count),
|
||||
atomic_read(&output->total_alloc_count),
|
||||
|
@ -945,12 +953,12 @@ char *aio_statistics(struct aio_brick *brick, int verbose)
|
|||
atomic_read(&output->read_count),
|
||||
atomic_read(&output->write_count),
|
||||
atomic_read(&output->alloc_count),
|
||||
atomic_read(&output->tinfo[0].total_enqueue_count) - atomic_read(&output->tinfo[0].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[0].total_enqueue_count), atomic_read(&output->tinfo[0].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[1].total_enqueue_count) - atomic_read(&output->tinfo[1].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[1].total_enqueue_count), atomic_read(&output->tinfo[1].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[2].total_enqueue_count) - atomic_read(&output->tinfo[2].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[2].total_enqueue_count), atomic_read(&output->tinfo[2].total_dequeue_count),
|
||||
output->tinfo[0].queued_sum,
|
||||
output->tinfo[1].queued_sum,
|
||||
output->tinfo[2].queued_sum,
|
||||
atomic_read(&output->tinfo[0].total_enqueue_count),
|
||||
atomic_read(&output->tinfo[1].total_enqueue_count),
|
||||
atomic_read(&output->tinfo[2].total_enqueue_count),
|
||||
sync ? sync : "");
|
||||
|
||||
if (sync)
|
||||
|
@ -975,7 +983,6 @@ void aio_reset_statistics(struct aio_brick *brick)
|
|||
for (i = 0; i < 3; i++) {
|
||||
struct aio_threadinfo *tinfo = &output->tinfo[i];
|
||||
atomic_set(&tinfo->total_enqueue_count, 0);
|
||||
atomic_set(&tinfo->total_dequeue_count, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1006,6 +1013,7 @@ static int aio_brick_construct(struct aio_brick *brick)
|
|||
|
||||
static int aio_switch(struct aio_brick *brick)
|
||||
{
|
||||
static int index;
|
||||
struct aio_output *output = brick->outputs[0];
|
||||
const char *path = output->brick->brick_path;
|
||||
int flags = O_CREAT | O_RDWR | O_LARGEFILE;
|
||||
|
@ -1052,7 +1060,8 @@ static int aio_switch(struct aio_brick *brick)
|
|||
}
|
||||
#endif
|
||||
|
||||
err = aio_start_thread(output, 0, aio_submit_thread);
|
||||
output->index = ++index;
|
||||
err = aio_start_thread(output, &output->tinfo[0], aio_submit_thread, 's');
|
||||
if (err < 0)
|
||||
goto err;
|
||||
|
||||
|
|
17
mars_aio.h
17
mars_aio.h
|
@ -5,6 +5,15 @@
|
|||
#include <linux/aio.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#define AIO_SUBMIT_MAX_LATENCY 1000 // 1 ms
|
||||
#define AIO_IO_R_MAX_LATENCY 50000 // 50 ms
|
||||
#define AIO_IO_W_MAX_LATENCY 150000 // 150 ms
|
||||
#define AIO_SYNC_MAX_LATENCY 150000 // 150 ms
|
||||
|
||||
extern struct threshold aio_submit_threshold;
|
||||
extern struct threshold aio_io_threshold[2];
|
||||
extern struct threshold aio_sync_threshold;
|
||||
|
||||
//#define USE_CLEVER_SYNC // TODO: NYI (should result in better write performance)
|
||||
#ifdef USE_CLEVER_SYNC
|
||||
|
||||
|
@ -24,6 +33,7 @@ struct aio_mref_aspect {
|
|||
struct pairing_heap_sync heap_head;
|
||||
#endif
|
||||
struct list_head io_head;
|
||||
unsigned long long enqueue_stamp;
|
||||
long long start_jiffies;
|
||||
int resubmit;
|
||||
int alloc_len;
|
||||
|
@ -50,10 +60,12 @@ struct aio_threadinfo {
|
|||
struct aio_output *output;
|
||||
struct task_struct *thread;
|
||||
wait_queue_head_t event;
|
||||
wait_queue_head_t terminate_event;
|
||||
spinlock_t lock;
|
||||
bool terminated;
|
||||
int queued[MARS_PRIO_NR];
|
||||
int queued_sum;
|
||||
atomic_t total_enqueue_count;
|
||||
atomic_t total_dequeue_count;
|
||||
bool terminated;
|
||||
};
|
||||
|
||||
struct aio_output {
|
||||
|
@ -68,6 +80,7 @@ struct aio_output {
|
|||
wait_queue_head_t fdsync_event;
|
||||
bool fdsync_active;
|
||||
// statistics
|
||||
int index;
|
||||
atomic_t total_read_count;
|
||||
atomic_t total_write_count;
|
||||
atomic_t total_alloc_count;
|
||||
|
|
100
mars_bio.c
100
mars_bio.c
|
@ -19,11 +19,35 @@
|
|||
#include "mars.h"
|
||||
#include "lib_timing.h"
|
||||
|
||||
#include "mars_bio.h"
|
||||
|
||||
static struct timing_stats timings[2] = {};
|
||||
|
||||
///////////////////////// own type definitions ////////////////////////
|
||||
struct threshold bio_submit_threshold = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = BIO_SUBMIT_MAX_LATENCY,
|
||||
.thr_factor = 100,
|
||||
.thr_plus = 0,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(bio_submit_threshold);
|
||||
|
||||
#include "mars_bio.h"
|
||||
struct threshold bio_io_threshold[2] = {
|
||||
[0] = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = BIO_IO_R_MAX_LATENCY,
|
||||
.thr_factor = 10,
|
||||
.thr_plus = 10000,
|
||||
},
|
||||
[1] = {
|
||||
.thr_ban = &mars_global_ban,
|
||||
.thr_limit = BIO_IO_W_MAX_LATENCY,
|
||||
.thr_factor = 10,
|
||||
.thr_plus = 10000,
|
||||
},
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(bio_io_threshold);
|
||||
|
||||
///////////////////////// own type definitions ////////////////////////
|
||||
|
||||
static void bio_ref_put(struct bio_output *output, struct mref_object *mref);
|
||||
|
||||
|
@ -46,10 +70,9 @@ void bio_callback(struct bio *bio, int code)
|
|||
mref_a->status_code = code;
|
||||
|
||||
spin_lock_irqsave(&brick->lock, flags);
|
||||
if (list_empty(&mref_a->io_head)) {
|
||||
list_add_tail(&mref_a->io_head, &brick->completed_list);
|
||||
atomic_inc(&brick->completed_count);
|
||||
}
|
||||
list_del(&mref_a->io_head);
|
||||
list_add_tail(&mref_a->io_head, &brick->completed_list);
|
||||
atomic_inc(&brick->completed_count);
|
||||
spin_unlock_irqrestore(&brick->lock, flags);
|
||||
|
||||
wake_up_interruptible(&brick->response_event);
|
||||
|
@ -303,6 +326,8 @@ void _bio_ref_io(struct bio_output *output, struct mref_object *mref, bool cork)
|
|||
struct bio_brick *brick = output->brick;
|
||||
struct bio_mref_aspect *mref_a = bio_mref_get_aspect(output->brick, mref);
|
||||
struct bio *bio;
|
||||
unsigned long long latency;
|
||||
unsigned long flags;
|
||||
int rw;
|
||||
int status = -EINVAL;
|
||||
|
||||
|
@ -338,13 +363,23 @@ void _bio_ref_io(struct bio_output *output, struct mref_object *mref, bool cork)
|
|||
MARS_IO("starting IO rw = %d prio 0 %d fly = %d\n", rw, mref->ref_prio, atomic_read(&brick->fly_count[PRIO_INDEX(mref)]));
|
||||
mars_trace(mref, "bio_submit");
|
||||
|
||||
mref_a->start_stamp = cpu_clock(raw_smp_processor_id());
|
||||
spin_lock_irqsave(&brick->lock, flags);
|
||||
list_add_tail(&mref_a->io_head, &brick->submitted_list[rw & 1]);
|
||||
spin_unlock_irqrestore(&brick->lock, flags);
|
||||
|
||||
#ifdef FAKE_IO
|
||||
bio->bi_end_io(bio, 0);
|
||||
#else
|
||||
bio->bi_rw = rw;
|
||||
TIME_STATS(&timings[rw & 1], submit_bio(rw, bio));
|
||||
latency = TIME_STATS(
|
||||
&timings[rw & 1],
|
||||
submit_bio(rw, bio)
|
||||
);
|
||||
#endif
|
||||
|
||||
threshold_check(&bio_submit_threshold, latency);
|
||||
|
||||
status = 0;
|
||||
if (unlikely(bio_flagged(bio, BIO_EOPNOTSUPP)))
|
||||
status = -EOPNOTSUPP;
|
||||
|
@ -401,16 +436,30 @@ int bio_response_thread(void *data)
|
|||
for (;;) {
|
||||
LIST_HEAD(tmp_list);
|
||||
unsigned long flags;
|
||||
int thr_limit;
|
||||
int sleeptime;
|
||||
int count;
|
||||
int i;
|
||||
|
||||
thr_limit = bio_io_threshold[0].thr_limit;
|
||||
if (bio_io_threshold[1].thr_limit < thr_limit)
|
||||
thr_limit = bio_io_threshold[1].thr_limit;
|
||||
|
||||
sleeptime = HZ / 10;
|
||||
if (thr_limit > 0) {
|
||||
sleeptime = thr_limit / (1000000 * 2 / HZ);
|
||||
if (unlikely(sleeptime < 2))
|
||||
sleeptime = 2;
|
||||
}
|
||||
|
||||
#ifdef IO_DEBUGGING
|
||||
round++;
|
||||
MARS_IO("%d sleeping...\n", round);
|
||||
MARS_IO("%d sleeping %d...\n", round, sleeptime);
|
||||
#endif
|
||||
wait_event_interruptible_timeout(
|
||||
brick->response_event,
|
||||
atomic_read(&brick->completed_count) > 0,
|
||||
HZ);
|
||||
sleeptime);
|
||||
|
||||
MARS_IO("%d woken up, completed_count = %d fly_count[0] = %d fly_count[1] = %d fly_count[2] = %d\n",
|
||||
round,
|
||||
|
@ -428,6 +477,7 @@ int bio_response_thread(void *data)
|
|||
struct list_head *tmp;
|
||||
struct bio_mref_aspect *mref_a;
|
||||
struct mref_object *mref;
|
||||
unsigned long long latency;
|
||||
int code;
|
||||
|
||||
if (list_empty(&tmp_list)) {
|
||||
|
@ -439,16 +489,20 @@ int bio_response_thread(void *data)
|
|||
tmp = tmp_list.next;
|
||||
list_del_init(tmp);
|
||||
atomic_dec(&brick->completed_count);
|
||||
|
||||
mref_a = container_of(tmp, struct bio_mref_aspect, io_head);
|
||||
mref = mref_a->object;
|
||||
|
||||
|
||||
latency = cpu_clock(raw_smp_processor_id()) - mref_a->start_stamp;
|
||||
threshold_check(&bio_io_threshold[mref->ref_rw & 1], latency);
|
||||
|
||||
code = mref_a->status_code;
|
||||
#ifdef IO_DEBUGGING
|
||||
round++;
|
||||
MARS_IO("%d completed , status = %d\n", round, code);
|
||||
#endif
|
||||
|
||||
mref = mref_a->object;
|
||||
|
||||
mars_trace(mref, "bio_endio");
|
||||
|
||||
if (code < 0) {
|
||||
|
@ -473,6 +527,26 @@ int bio_response_thread(void *data)
|
|||
}
|
||||
bio_ref_put(mref_a->output, mref);
|
||||
}
|
||||
|
||||
/* Try to detect slow requests as early as possible,
|
||||
* even before they have completed.
|
||||
*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
unsigned long long eldest = 0;
|
||||
|
||||
spin_lock_irqsave(&brick->lock, flags);
|
||||
if (!list_empty(&brick->submitted_list[i])) {
|
||||
struct bio_mref_aspect *mref_a;
|
||||
mref_a = container_of(brick->submitted_list[i].next, struct bio_mref_aspect, io_head);
|
||||
eldest = mref_a->start_stamp;
|
||||
}
|
||||
spin_unlock_irqrestore(&brick->lock, flags);
|
||||
|
||||
if (eldest) {
|
||||
threshold_check(&bio_io_threshold[i], cpu_clock(raw_smp_processor_id()) - eldest);
|
||||
}
|
||||
}
|
||||
|
||||
if (count) {
|
||||
brick->submitted = true;
|
||||
wake_up_interruptible(&brick->submit_event);
|
||||
|
@ -510,7 +584,7 @@ int bio_submit_thread(void *data)
|
|||
wait_event_interruptible_timeout(
|
||||
brick->submit_event,
|
||||
brick->submitted,
|
||||
HZ);
|
||||
HZ / 2);
|
||||
|
||||
brick->submitted = false;
|
||||
|
||||
|
@ -719,6 +793,8 @@ static int bio_brick_construct(struct bio_brick *brick)
|
|||
INIT_LIST_HEAD(&brick->queue_list[0]);
|
||||
INIT_LIST_HEAD(&brick->queue_list[1]);
|
||||
INIT_LIST_HEAD(&brick->queue_list[2]);
|
||||
INIT_LIST_HEAD(&brick->submitted_list[0]);
|
||||
INIT_LIST_HEAD(&brick->submitted_list[1]);
|
||||
INIT_LIST_HEAD(&brick->completed_list);
|
||||
init_waitqueue_head(&brick->submit_event);
|
||||
init_waitqueue_head(&brick->response_event);
|
||||
|
|
|
@ -2,6 +2,13 @@
|
|||
#ifndef MARS_BIO_H
|
||||
#define MARS_BIO_H
|
||||
|
||||
#define BIO_SUBMIT_MAX_LATENCY 250 // 250 us
|
||||
#define BIO_IO_R_MAX_LATENCY 40000 // 40 ms
|
||||
#define BIO_IO_W_MAX_LATENCY 100000 // 100 ms
|
||||
|
||||
extern struct threshold bio_submit_threshold;
|
||||
extern struct threshold bio_io_threshold[2];
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
struct bio_mref_aspect {
|
||||
|
@ -9,6 +16,7 @@ struct bio_mref_aspect {
|
|||
struct list_head io_head;
|
||||
struct bio *bio;
|
||||
struct bio_output *output;
|
||||
unsigned long long start_stamp;
|
||||
int status_code;
|
||||
int hash_pos;
|
||||
int alloc_len;
|
||||
|
@ -33,6 +41,7 @@ struct bio_brick {
|
|||
// private
|
||||
spinlock_t lock;
|
||||
struct list_head queue_list[MARS_PRIO_NR];
|
||||
struct list_head submitted_list[2];
|
||||
struct list_head completed_list;
|
||||
wait_queue_head_t submit_event;
|
||||
wait_queue_head_t response_event;
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
|
||||
// infrastructure
|
||||
|
||||
struct banning mars_global_ban = {};
|
||||
EXPORT_SYMBOL_GPL(mars_global_ban);
|
||||
|
||||
static char *id = NULL;
|
||||
|
||||
/* TODO: better use MAC addresses (or motherboard IDs where available).
|
||||
|
|
|
@ -11,12 +11,12 @@
|
|||
|
||||
// variants
|
||||
#define KEEP_UNIQUE
|
||||
//#define LATER
|
||||
#define DELAY_CALLERS // this is _needed_ for production systems
|
||||
//#define WB_COPY // unnecessary (only costs performance)
|
||||
//#define LATE_COMPLETE // unnecessary (only costs performance)
|
||||
//#define EARLY_COMPLETION
|
||||
//#define OLD_POSCOMPLETE
|
||||
#define SHORTCUT_1_to_3 // when possible, queue 1 executes phase3_startio() directly without intermediate queueing into queue 3 => may be irritating, but has better performance. NOTICE: when some day the IO scheduling should be different between queue 1 and 3, you MUST disable this in order to distinguish between them!
|
||||
|
||||
// commenting this out is dangerous for data integrity! use only for testing!
|
||||
#define USE_MEMCPY
|
||||
|
@ -33,23 +33,79 @@
|
|||
#include "lib_rank.h"
|
||||
#include "lib_limiter.h"
|
||||
|
||||
#include "mars_trans_logger.h"
|
||||
|
||||
#ifdef REPLAY_DEBUGGING
|
||||
#define MARS_RPL(_fmt, _args...) _MARS_MSG(false, "REPLAY ", _fmt, ##_args)
|
||||
#else
|
||||
#define MARS_RPL(_args...) /*empty*/
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#define inline noinline
|
||||
#endif
|
||||
|
||||
///////////////////////// global tuning ////////////////////////
|
||||
|
||||
int trans_logger_mem_usage; // in KB
|
||||
EXPORT_SYMBOL_GPL(trans_logger_mem_usage);
|
||||
|
||||
struct writeback_group global_writeback = {
|
||||
.lock = __RW_LOCK_UNLOCKED(global_writeback.lock),
|
||||
.group_anchor = LIST_HEAD_INIT(global_writeback.group_anchor),
|
||||
.until_percent = 30,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(global_writeback);
|
||||
|
||||
static
|
||||
void add_to_group(struct writeback_group *gr, struct trans_logger_brick *brick)
|
||||
{
|
||||
write_lock(&gr->lock);
|
||||
list_add_tail(&brick->group_head, &gr->group_anchor);
|
||||
write_unlock(&gr->lock);
|
||||
}
|
||||
|
||||
static
|
||||
void remove_from_group(struct writeback_group *gr, struct trans_logger_brick *brick)
|
||||
{
|
||||
write_lock(&gr->lock);
|
||||
list_del_init(&brick->group_head);
|
||||
gr->leader = NULL;
|
||||
write_unlock(&gr->lock);
|
||||
}
|
||||
|
||||
static
|
||||
struct trans_logger_brick *elect_leader(struct writeback_group *gr)
|
||||
{
|
||||
struct trans_logger_brick *res = gr->leader;
|
||||
struct list_head *tmp;
|
||||
|
||||
if (res && gr->until_percent >= 0) {
|
||||
loff_t used = atomic64_read(&res->shadow_mem_used);
|
||||
if (used > gr->biggest * gr->until_percent / 100)
|
||||
goto done;
|
||||
}
|
||||
|
||||
read_lock(&gr->lock);
|
||||
for (tmp = gr->group_anchor.next; tmp != &gr->group_anchor; tmp = tmp->next) {
|
||||
struct trans_logger_brick *test = container_of(tmp, struct trans_logger_brick, group_head);
|
||||
loff_t new_used = atomic64_read(&test->shadow_mem_used);
|
||||
|
||||
if (!res || new_used > atomic64_read(&res->shadow_mem_used)) {
|
||||
res = test;
|
||||
gr->biggest = new_used;
|
||||
}
|
||||
}
|
||||
read_unlock(&gr->lock);
|
||||
|
||||
gr->leader = res;
|
||||
|
||||
done:
|
||||
return res;
|
||||
}
|
||||
|
||||
///////////////////////// own type definitions ////////////////////////
|
||||
|
||||
#include "mars_trans_logger.h"
|
||||
|
||||
#if 1
|
||||
#define inline noinline
|
||||
#endif
|
||||
|
||||
static inline
|
||||
int lh_cmp(loff_t *a, loff_t *b)
|
||||
{
|
||||
|
@ -1587,7 +1643,7 @@ bool phase1_startio(struct trans_logger_mref_aspect *orig_mref_a)
|
|||
qq_inc_flying(&brick->q_phase[1]);
|
||||
fire_writeback(&wb->w_sub_read_list, false);
|
||||
} else { // shortcut
|
||||
#ifdef LATER
|
||||
#ifndef SHORTCUT_1_to_3
|
||||
qq_wb_insert(&brick->q_phase[3], wb);
|
||||
wake_up_interruptible_all(&brick->worker_event);
|
||||
#else
|
||||
|
@ -1817,9 +1873,10 @@ bool phase3_startio(struct writeback_info *wb)
|
|||
*/
|
||||
|
||||
static noinline
|
||||
int run_mref_queue(struct logger_queue *q, bool (*startio)(struct trans_logger_mref_aspect *sub_mref_a), int max)
|
||||
int run_mref_queue(struct logger_queue *q, bool (*startio)(struct trans_logger_mref_aspect *sub_mref_a), int max, bool do_limit)
|
||||
{
|
||||
struct trans_logger_brick *brick = q->q_brick;
|
||||
int total_len = 0;
|
||||
bool found = false;
|
||||
bool ok;
|
||||
int res = 0;
|
||||
|
@ -1830,6 +1887,9 @@ int run_mref_queue(struct logger_queue *q, bool (*startio)(struct trans_logger_m
|
|||
if (!mref_a)
|
||||
goto done;
|
||||
|
||||
if (do_limit && likely(mref_a->object))
|
||||
total_len += mref_a->object->ref_len;
|
||||
|
||||
ok = startio(mref_a);
|
||||
if (unlikely(!ok)) {
|
||||
qq_mref_pushback(q, mref_a);
|
||||
|
@ -1842,6 +1902,7 @@ int run_mref_queue(struct logger_queue *q, bool (*startio)(struct trans_logger_m
|
|||
|
||||
done:
|
||||
if (found) {
|
||||
mars_limit(&global_writeback.limiter, (total_len - 1) / 1024 + 1);
|
||||
wake_up_interruptible_all(&brick->worker_event);
|
||||
}
|
||||
return res;
|
||||
|
@ -1851,6 +1912,7 @@ static noinline
|
|||
int run_wb_queue(struct logger_queue *q, bool (*startio)(struct writeback_info *wb), int max)
|
||||
{
|
||||
struct trans_logger_brick *brick = q->q_brick;
|
||||
int total_len = 0;
|
||||
bool found = false;
|
||||
bool ok;
|
||||
int res = 0;
|
||||
|
@ -1861,6 +1923,8 @@ int run_wb_queue(struct logger_queue *q, bool (*startio)(struct writeback_info *
|
|||
if (!wb)
|
||||
goto done;
|
||||
|
||||
total_len += wb->w_len;
|
||||
|
||||
ok = startio(wb);
|
||||
if (unlikely(!ok)) {
|
||||
qq_wb_pushback(q, wb);
|
||||
|
@ -1872,6 +1936,7 @@ int run_wb_queue(struct logger_queue *q, bool (*startio)(struct writeback_info *
|
|||
|
||||
done:
|
||||
if (found) {
|
||||
mars_limit(&global_writeback.limiter, (total_len - 1) / 1024 + 1);
|
||||
wake_up_interruptible_all(&brick->worker_event);
|
||||
}
|
||||
return res;
|
||||
|
@ -1890,58 +1955,166 @@ int _congested(struct trans_logger_brick *brick)
|
|||
|| atomic_read(&brick->q_phase[3].q_flying);
|
||||
}
|
||||
|
||||
static const
|
||||
struct rank_info rank0[] = {
|
||||
{ 0, 100 },
|
||||
{ 100, 200 },
|
||||
};
|
||||
|
||||
static const
|
||||
struct rank_info rank1[] = {
|
||||
{ 0, 10 },
|
||||
{ 100, 20 },
|
||||
};
|
||||
|
||||
static const
|
||||
struct rank_info rank2[] = {
|
||||
{ 0, 10 },
|
||||
{ 100, 20 },
|
||||
};
|
||||
|
||||
static const
|
||||
struct rank_info rank3[] = {
|
||||
{ 0, 10 },
|
||||
{ 100, 20 },
|
||||
};
|
||||
|
||||
/* In general, each individual ranking table may have a different length.
|
||||
/* Ranking tables.
|
||||
*/
|
||||
static const
|
||||
struct rank_info *ranks[LOGGER_QUEUES] = {
|
||||
[0] = rank0,
|
||||
[1] = rank1,
|
||||
[2] = rank2,
|
||||
[3] = rank3,
|
||||
static
|
||||
struct rank_info float_queue_rank_log[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 100 },
|
||||
{ 10000, 100 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
static const
|
||||
int rank_counts[LOGGER_QUEUES] = {
|
||||
[0] = sizeof(rank0) / sizeof(struct rank_info),
|
||||
[1] = sizeof(rank1) / sizeof(struct rank_info),
|
||||
[2] = sizeof(rank2) / sizeof(struct rank_info),
|
||||
[3] = sizeof(rank3) / sizeof(struct rank_info),
|
||||
static
|
||||
struct rank_info float_queue_rank_io[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 1 },
|
||||
{ 10000, 1 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
static
|
||||
struct rank_info float_fly_rank_log[] = {
|
||||
{ 0, 0 },
|
||||
{ 32, 10 },
|
||||
{ 10000, 10 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
static
|
||||
struct rank_info float_fly_rank_io[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 10 },
|
||||
{ 2, -20 },
|
||||
{ 10000, -100 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
|
||||
static
|
||||
struct rank_info nofloat_queue_rank_log[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 100 },
|
||||
{ 100, 10 },
|
||||
{ 10000, 10 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
static
|
||||
struct rank_info nofloat_queue_rank_io[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 10 },
|
||||
{ 100, 100 },
|
||||
{ 10000, 200 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
static
|
||||
struct rank_info nofloat_fly_rank_log[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 1 },
|
||||
{ 32, 10 },
|
||||
{ 10000, 1 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
static
|
||||
struct rank_info nofloat_fly_rank_io[] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 10 },
|
||||
{ 128, 8 },
|
||||
{ 129, -100 },
|
||||
{ 10000, -200 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
|
||||
|
||||
static
|
||||
struct rank_info *queue_ranks[2][LOGGER_QUEUES] = {
|
||||
[0] = {
|
||||
[0] = float_queue_rank_log,
|
||||
[1] = float_queue_rank_io,
|
||||
[2] = float_queue_rank_io,
|
||||
[3] = float_queue_rank_io,
|
||||
},
|
||||
[1] = {
|
||||
[0] = nofloat_queue_rank_log,
|
||||
[1] = nofloat_queue_rank_io,
|
||||
[2] = nofloat_queue_rank_io,
|
||||
[3] = nofloat_queue_rank_io,
|
||||
},
|
||||
};
|
||||
static
|
||||
struct rank_info *fly_ranks[2][LOGGER_QUEUES] = {
|
||||
[0] = {
|
||||
[0] = float_fly_rank_log,
|
||||
[1] = float_fly_rank_io,
|
||||
[2] = float_fly_rank_io,
|
||||
[3] = float_fly_rank_io,
|
||||
},
|
||||
[1] = {
|
||||
[0] = nofloat_fly_rank_log,
|
||||
[1] = nofloat_fly_rank_io,
|
||||
[2] = nofloat_fly_rank_io,
|
||||
[3] = nofloat_fly_rank_io,
|
||||
},
|
||||
};
|
||||
|
||||
static noinline
|
||||
int _do_ranking(struct trans_logger_brick *brick, struct rank_data rkd[])
|
||||
{
|
||||
int i;
|
||||
#ifdef DELAY_CALLERS
|
||||
int floating_mode;
|
||||
bool delay_callers;
|
||||
#endif
|
||||
|
||||
ranking_start(rkd, LOGGER_QUEUES);
|
||||
|
||||
// check the memory situation...
|
||||
delay_callers = false;
|
||||
floating_mode = 1;
|
||||
if (brick_global_memlimit >= 1024) {
|
||||
struct rank_info full_punish_global[] = {
|
||||
{ 0, 0 },
|
||||
{ brick_global_memlimit * 3 / 4, 0 },
|
||||
{ brick_global_memlimit, -1000 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
int global_mem_used = atomic64_read(&global_mshadow_used) / 1024;
|
||||
trans_logger_mem_usage = global_mem_used;
|
||||
|
||||
floating_mode = (global_mem_used < brick_global_memlimit / 2) ? 0 : 1;
|
||||
|
||||
if (global_mem_used >= brick_global_memlimit)
|
||||
delay_callers = true;
|
||||
|
||||
MARS_IO("global_mem_used = %d\n", global_mem_used);
|
||||
ranking_compute(&rkd[0], full_punish_global, global_mem_used);
|
||||
} else if (brick->shadow_mem_limit >= 8) {
|
||||
struct rank_info full_punish_local[] = {
|
||||
{ 0, 0 },
|
||||
{ brick->shadow_mem_limit * 3 / 4, 0 },
|
||||
{ brick->shadow_mem_limit, -1000 },
|
||||
{ RKI_DUMMY }
|
||||
};
|
||||
int local_mem_used = atomic64_read(&brick->shadow_mem_used) / 1024;
|
||||
|
||||
floating_mode = (local_mem_used < brick->shadow_mem_limit / 2) ? 0 : 1;
|
||||
|
||||
if (local_mem_used >= brick->shadow_mem_limit)
|
||||
delay_callers = true;
|
||||
|
||||
MARS_IO("local_mem_used = %d\n", local_mem_used);
|
||||
ranking_compute(&rkd[0], full_punish_local, local_mem_used);
|
||||
}
|
||||
if (delay_callers) {
|
||||
if (!brick->delay_callers) {
|
||||
brick->delay_callers = true;
|
||||
atomic_inc(&brick->total_delay_count);
|
||||
}
|
||||
} else {
|
||||
brick->delay_callers = false;
|
||||
}
|
||||
|
||||
// obey the basic rules...
|
||||
for (i = 0; i < LOGGER_QUEUES; i++) {
|
||||
int queued = atomic_read(&brick->q_phase[i].q_queued);
|
||||
|
@ -1949,62 +2122,44 @@ int _do_ranking(struct trans_logger_brick *brick, struct rank_data rkd[])
|
|||
|
||||
MARS_IO("i = %d queued = %d\n", i, queued);
|
||||
|
||||
/* This must come first.
|
||||
* When a queue is empty, you must not credit any positive points.
|
||||
* Otherwise, (almost) infinite selection of untreatable
|
||||
* queues may occur.
|
||||
*/
|
||||
if (queued <= 0)
|
||||
continue;
|
||||
|
||||
flying = atomic_read(&brick->q_phase[i].q_flying);
|
||||
if (flying >= brick->q_phase[i].q_max_flying && brick->q_phase[i].q_max_flying > 0)
|
||||
continue;
|
||||
if (i == 1 && !floating_mode) {
|
||||
int lim;
|
||||
|
||||
MARS_IO("i = %d queued = %d\n", i, queued);
|
||||
ranking_compute(&rkd[i], ranks[i], rank_counts[i], queued);
|
||||
if (atomic_read(&brick->q_phase[0].q_queued) + atomic_read(&brick->q_phase[0].q_flying) > 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// ... and the contention rule for queue 0 ...
|
||||
if (i == 0 && brick->q_phase[0].q_max_flying > 0) {
|
||||
struct rank_info contention[] = {
|
||||
{ 0, 0 },
|
||||
{ brick->q_phase[0].q_max_flying, 300 },
|
||||
};
|
||||
|
||||
MARS_IO("flying = %d\n", flying);
|
||||
ranking_compute(&rkd[0], contention, 2, flying);
|
||||
if (elect_leader(&global_writeback) != brick) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (banning_is_hit(&mars_global_ban)) {
|
||||
break;
|
||||
}
|
||||
|
||||
lim = mars_limit(&global_writeback.limiter, 0);
|
||||
if (lim > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ranking_compute(&rkd[i], queue_ranks[floating_mode][i], queued);
|
||||
|
||||
flying = atomic_read(&brick->q_phase[i].q_flying);
|
||||
|
||||
MARS_IO("i = %d queued = %d flying = %d\n", i, queued, flying);
|
||||
|
||||
ranking_compute(&rkd[i], fly_ranks[floating_mode][i], flying);
|
||||
}
|
||||
|
||||
// ... and now the exceptions from the rules ...
|
||||
#ifdef DELAY_CALLERS
|
||||
delay_callers = false;
|
||||
if (brick_global_memlimit >= 1024) {
|
||||
struct rank_info full_punish_global[] = {
|
||||
{ 0, 0 },
|
||||
{ brick_global_memlimit * 7 / 8, 0 },
|
||||
{ brick_global_memlimit, -1000 },
|
||||
};
|
||||
int global_mem_used = atomic64_read(&global_mshadow_used) / 1024;
|
||||
trans_logger_mem_usage = global_mem_used;
|
||||
|
||||
if (global_mem_used >= brick_global_memlimit)
|
||||
delay_callers = true;
|
||||
|
||||
MARS_IO("global_mem_used = %d\n", global_mem_used);
|
||||
ranking_compute(&rkd[0], full_punish_global, 3, global_mem_used);
|
||||
} else if (brick->shadow_mem_limit >= 8) {
|
||||
struct rank_info full_punish_local[] = {
|
||||
{ 0, 0 },
|
||||
{ brick->shadow_mem_limit * 7 / 8, 0 },
|
||||
{ brick->shadow_mem_limit, -1000 },
|
||||
};
|
||||
int local_mem_used = atomic64_read(&brick->shadow_mem_used) / 1024;
|
||||
|
||||
if (local_mem_used >= brick->shadow_mem_limit)
|
||||
delay_callers = true;
|
||||
|
||||
MARS_IO("local_mem_used = %d\n", local_mem_used);
|
||||
ranking_compute(&rkd[0], full_punish_local, 3, local_mem_used);
|
||||
}
|
||||
brick->delay_callers = delay_callers;
|
||||
#endif
|
||||
|
||||
// finalize it
|
||||
ranking_stop(rkd, LOGGER_QUEUES);
|
||||
|
||||
|
@ -2137,10 +2292,10 @@ void trans_logger_log(struct trans_logger_brick *brick)
|
|||
|
||||
switch (winner) {
|
||||
case 0:
|
||||
nr = run_mref_queue(&brick->q_phase[0], prep_phase_startio, brick->q_phase[0].q_batchlen);
|
||||
nr = run_mref_queue(&brick->q_phase[0], prep_phase_startio, brick->q_phase[0].q_batchlen, true);
|
||||
goto done;
|
||||
case 1:
|
||||
nr = run_mref_queue(&brick->q_phase[1], phase1_startio, brick->q_phase[1].q_batchlen);
|
||||
nr = run_mref_queue(&brick->q_phase[1], phase1_startio, brick->q_phase[1].q_batchlen, true);
|
||||
goto done;
|
||||
case 2:
|
||||
nr = run_wb_queue(&brick->q_phase[2], phase2_startio, brick->q_phase[2].q_batchlen);
|
||||
|
@ -2711,6 +2866,7 @@ int trans_logger_brick_construct(struct trans_logger_brick *brick)
|
|||
atomic_set(&brick->hash_count, 0);
|
||||
spin_lock_init(&brick->replay_lock);
|
||||
INIT_LIST_HEAD(&brick->replay_list);
|
||||
INIT_LIST_HEAD(&brick->group_head);
|
||||
init_waitqueue_head(&brick->worker_event);
|
||||
init_waitqueue_head(&brick->caller_event);
|
||||
qq_init(&brick->q_phase[0], brick);
|
||||
|
@ -2732,6 +2888,20 @@ int trans_logger_brick_construct(struct trans_logger_brick *brick)
|
|||
brick->new_input_nr = TL_INPUT_LOG1;
|
||||
brick->log_input_nr = TL_INPUT_LOG1;
|
||||
brick->old_input_nr = TL_INPUT_LOG1;
|
||||
add_to_group(&global_writeback, brick);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline
|
||||
int trans_logger_brick_destruct(struct trans_logger_brick *brick)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < TRANS_HASH_MAX; i++) {
|
||||
struct hash_anchor *start = &brick->hash_table[i];
|
||||
CHECK_HEAD_EMPTY(&start->hash_anchor);
|
||||
}
|
||||
CHECK_HEAD_EMPTY(&brick->replay_list);
|
||||
remove_from_group(&global_writeback, brick);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2810,6 +2980,7 @@ const struct trans_logger_brick_type trans_logger_brick_type = {
|
|||
.default_input_types = trans_logger_input_types,
|
||||
.default_output_types = trans_logger_output_types,
|
||||
.brick_construct = &trans_logger_brick_construct,
|
||||
.brick_destruct = &trans_logger_brick_destruct,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(trans_logger_brick_type);
|
||||
|
||||
|
|
|
@ -10,12 +10,27 @@
|
|||
|
||||
#include <linux/time.h>
|
||||
|
||||
#include "mars.h"
|
||||
#include "lib_log.h"
|
||||
#include "lib_pairing_heap.h"
|
||||
#include "lib_queue.h"
|
||||
|
||||
///////////////////////// global tuning ////////////////////////
|
||||
|
||||
extern int trans_logger_mem_usage; // in KB
|
||||
|
||||
struct writeback_group {
|
||||
rwlock_t lock;
|
||||
struct trans_logger_brick *leader;
|
||||
loff_t biggest;
|
||||
struct list_head group_anchor;
|
||||
// tuning
|
||||
struct mars_limiter limiter;
|
||||
int until_percent;
|
||||
};
|
||||
|
||||
extern struct writeback_group global_writeback;
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
_PAIRING_HEAP_TYPEDEF(logger,)
|
||||
|
@ -133,6 +148,7 @@ struct trans_logger_brick {
|
|||
int old_input_nr; // where old IO requests may be on the fly
|
||||
int replay_code; // replay errors (if any)
|
||||
// private
|
||||
struct list_head group_head;
|
||||
loff_t old_margin;
|
||||
spinlock_t replay_lock;
|
||||
struct list_head replay_list;
|
||||
|
|
|
@ -120,7 +120,7 @@ struct mars_rotate {
|
|||
|
||||
// TUNING
|
||||
|
||||
int mars_mem_percent = 0;
|
||||
int mars_mem_percent = 20;
|
||||
EXPORT_SYMBOL_GPL(mars_mem_percent);
|
||||
|
||||
#define CONF_TRANS_SHADOW_LIMIT (1024 * 128) // don't fill the hashtable too much
|
||||
|
@ -132,15 +132,13 @@ EXPORT_SYMBOL_GPL(mars_mem_percent);
|
|||
//#define TRANS_FAKE
|
||||
|
||||
#define CONF_TRANS_BATCHLEN 64
|
||||
#define CONF_TRANS_FLYING 256
|
||||
#define CONF_TRANS_PRIO MARS_PRIO_HIGH
|
||||
#define CONF_TRANS_LOG_READS false
|
||||
//#define CONF_TRANS_LOG_READS true
|
||||
//#define CONF_TRANS_COMPLETION_SEMANTICS 2
|
||||
#define CONF_TRANS_COMPLETION_SEMANTICS 0
|
||||
|
||||
#define CONF_ALL_BATCHLEN 4
|
||||
#define CONF_ALL_FLYING 32
|
||||
#define CONF_ALL_BATCHLEN 1
|
||||
#define CONF_ALL_PRIO MARS_PRIO_NORMAL
|
||||
|
||||
#define IF_SKIP_SYNC true
|
||||
|
@ -186,11 +184,6 @@ int _set_trans_params(struct mars_brick *_brick, void *private)
|
|||
trans_brick->q_phase[2].q_batchlen = CONF_ALL_BATCHLEN;
|
||||
trans_brick->q_phase[3].q_batchlen = CONF_ALL_BATCHLEN;
|
||||
|
||||
trans_brick->q_phase[0].q_max_flying = CONF_TRANS_FLYING;
|
||||
trans_brick->q_phase[1].q_max_flying = CONF_ALL_FLYING;
|
||||
trans_brick->q_phase[2].q_max_flying = CONF_ALL_FLYING;
|
||||
trans_brick->q_phase[3].q_max_flying = CONF_ALL_FLYING;
|
||||
|
||||
trans_brick->q_phase[0].q_io_prio = CONF_TRANS_PRIO;
|
||||
trans_brick->q_phase[1].q_io_prio = CONF_ALL_PRIO;
|
||||
trans_brick->q_phase[2].q_io_prio = CONF_ALL_PRIO;
|
||||
|
@ -3857,7 +3850,7 @@ static int light_thread(void *data)
|
|||
mars_mem_percent = 0;
|
||||
if (mars_mem_percent > 70)
|
||||
mars_mem_percent = 70;
|
||||
brick_global_memlimit = brick_global_memavail * mars_mem_percent / 100;
|
||||
brick_global_memlimit = (long long)brick_global_memavail * mars_mem_percent / 100;
|
||||
|
||||
brick_msleep(100);
|
||||
|
||||
|
@ -3918,6 +3911,8 @@ static int light_thread(void *data)
|
|||
_show_statist(&_global);
|
||||
#endif
|
||||
|
||||
MARS_DBG("ban_count = %d ban_renew_count = %d\n", mars_global_ban.ban_count, mars_global_ban.ban_renew_count);
|
||||
|
||||
brick_msleep(500);
|
||||
|
||||
wait_event_interruptible_timeout(_global.main_event, _global.main_trigger, CONFIG_MARS_SCAN_INTERVAL * HZ);
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
|
||||
#include "strategy.h"
|
||||
#include "mars_proc.h"
|
||||
#include "../mars_bio.h"
|
||||
#include "../mars_aio.h"
|
||||
#include "../mars_client.h"
|
||||
#include "../mars_server.h"
|
||||
#include "../mars_trans_logger.h"
|
||||
|
@ -161,6 +163,44 @@ EXPORT_SYMBOL_GPL(mars_max_loadavg);
|
|||
#define _CTL_STRATEGY(handler) /*empty*/
|
||||
#endif
|
||||
|
||||
#define INT_ENTRY(NAME,VAR,MODE) \
|
||||
{ \
|
||||
_CTL_NAME \
|
||||
.procname = NAME, \
|
||||
.data = &(VAR), \
|
||||
.maxlen = sizeof(int), \
|
||||
.mode = MODE, \
|
||||
.proc_handler = &proc_dointvec, \
|
||||
_CTL_STRATEGY(sysctl_intvec) \
|
||||
}
|
||||
|
||||
#define LIMITER_ENTRIES(VAR, PREFIX, SUFFIX) \
|
||||
INT_ENTRY(PREFIX "_limit_" SUFFIX, (VAR)->lim_max_rate, 0600), \
|
||||
INT_ENTRY(PREFIX "_rate_" SUFFIX, (VAR)->lim_rate, 0600) \
|
||||
|
||||
#define THRESHOLD_ENTRIES(VAR, PREFIX) \
|
||||
INT_ENTRY(PREFIX "_threshold_us", (VAR)->thr_limit, 0600), \
|
||||
INT_ENTRY(PREFIX "_factor_percent", (VAR)->thr_factor, 0600), \
|
||||
INT_ENTRY(PREFIX "_plus_us", (VAR)->thr_plus, 0600), \
|
||||
INT_ENTRY(PREFIX "_triggered", (VAR)->thr_triggered, 0600), \
|
||||
INT_ENTRY(PREFIX "_true_hit", (VAR)->thr_true_hit, 0600) \
|
||||
|
||||
static
|
||||
ctl_table tuning_table[] = {
|
||||
LIMITER_ENTRIES(&client_limiter, "network_traffic", "kb"),
|
||||
LIMITER_ENTRIES(&server_limiter, "server_io", "kb"),
|
||||
LIMITER_ENTRIES(&global_writeback.limiter, "writeback", "kb"),
|
||||
INT_ENTRY("writeback_until_percent", global_writeback.until_percent, 0600),
|
||||
THRESHOLD_ENTRIES(&bio_submit_threshold, "bio_submit"),
|
||||
THRESHOLD_ENTRIES(&bio_io_threshold[0], "bio_io_r"),
|
||||
THRESHOLD_ENTRIES(&bio_io_threshold[1], "bio_io_w"),
|
||||
THRESHOLD_ENTRIES(&aio_submit_threshold, "aio_submit"),
|
||||
THRESHOLD_ENTRIES(&aio_io_threshold[0], "aio_io_r"),
|
||||
THRESHOLD_ENTRIES(&aio_io_threshold[1], "aio_io_w"),
|
||||
THRESHOLD_ENTRIES(&aio_sync_threshold, "aio_sync"),
|
||||
{}
|
||||
};
|
||||
|
||||
static
|
||||
ctl_table mars_table[] = {
|
||||
{
|
||||
|
@ -181,106 +221,20 @@ ctl_table mars_table[] = {
|
|||
.mode = 0400,
|
||||
.proc_handler = &errors_sysctl_handler,
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "percent_mem_limit_kb",
|
||||
.data = &mars_mem_percent,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "mem_used_kb",
|
||||
.data = &trans_logger_mem_usage,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0400,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "logrot_auto_gb",
|
||||
.data = &global_logrot_auto,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "logdel_auto_gb",
|
||||
.data = &global_logdel_auto,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "free_space_mb",
|
||||
.data = &global_free_space,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
INT_ENTRY("percent_mem_limit_kb", mars_mem_percent, 0600),
|
||||
INT_ENTRY("mem_used_kb", trans_logger_mem_usage, 0400),
|
||||
INT_ENTRY("logrot_auto_gb", global_logrot_auto, 0600),
|
||||
INT_ENTRY("logdel_auto_gb", global_logdel_auto, 0600),
|
||||
INT_ENTRY("free_space_mb", global_free_space, 0600),
|
||||
#ifdef CONFIG_MARS_LOADAVG_LIMIT
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "loadavg_limit",
|
||||
.data = &mars_max_loadavg,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
INT_ENTRY("loadavg_limit", mars_max_loadavg, 0600),
|
||||
#endif
|
||||
INT_ENTRY("network_io_timeout", global_net_io_timeout, 0600),
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "network_io_timeout",
|
||||
.data = &global_net_io_timeout,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "network_traffic_limit_kb",
|
||||
.data = &client_limiter.lim_max_rate,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "network_traffic_rate_kb",
|
||||
.data = &client_limiter.lim_rate,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0400,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "server_io_limit_mb",
|
||||
.data = &server_limiter.lim_max_rate,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0600,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
},
|
||||
{
|
||||
_CTL_NAME
|
||||
.procname = "server_io_rate_mb",
|
||||
.data = &server_limiter.lim_rate,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0400,
|
||||
.proc_handler = &proc_dointvec,
|
||||
_CTL_STRATEGY(sysctl_intvec)
|
||||
.procname = "tuning",
|
||||
.mode = 0500,
|
||||
.child = tuning_table,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue