mirror of https://github.com/schoebel/mars
improved debugging
This commit is contained in:
parent
72298768eb
commit
0223a5c946
|
@ -336,7 +336,7 @@ int aio_start_thread(struct aio_output *output, int i, int(*fn)(void*))
|
|||
spin_lock_init(&tinfo->lock);
|
||||
init_waitqueue_head(&tinfo->event);
|
||||
tinfo->terminated = false;
|
||||
tinfo->thread = kthread_create(fn, tinfo, "mars_aio%d", index++);
|
||||
tinfo->thread = kthread_create(fn, tinfo, "mars_%daio%d", i, index++);
|
||||
if (IS_ERR(tinfo->thread)) {
|
||||
int err = PTR_ERR(tinfo->thread);
|
||||
MARS_ERR("cannot create thread\n");
|
||||
|
@ -849,11 +849,14 @@ char *aio_statistics(struct aio_brick *brick, int verbose)
|
|||
|
||||
// FIXME: check for allocation overflows
|
||||
|
||||
snprintf(res, 1024, "total reads = %d writes = %d allocs = %d delays = %d msleeps = %d fdsyncs = %d fdsync_waits = %d | flying reads = %d writes = %d allocs = %d q0 = %d/%d q1 = %d/%d q2 = %d/%d | %s\n",
|
||||
snprintf(res, 1024, "total reads = %d writes = %d allocs = %d delays = %d msleeps = %d fdsyncs = %d fdsync_waits = %d | flying reads = %d writes = %d allocs = %d q0 = %d (%d - %d) q1 = %d (%d - %d) q2 = %d (%d - %d) | %s\n",
|
||||
atomic_read(&output->total_read_count), atomic_read(&output->total_write_count), atomic_read(&output->total_alloc_count), atomic_read(&output->total_delay_count), atomic_read(&output->total_msleep_count), atomic_read(&output->total_fdsync_count), atomic_read(&output->total_fdsync_wait_count),
|
||||
atomic_read(&output->read_count), atomic_read(&output->write_count), atomic_read(&output->alloc_count),
|
||||
atomic_read(&output->tinfo[0].total_enqueue_count) - atomic_read(&output->tinfo[0].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[0].total_enqueue_count), atomic_read(&output->tinfo[0].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[1].total_enqueue_count), atomic_read(&output->tinfo[2].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[1].total_enqueue_count) - atomic_read(&output->tinfo[1].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[1].total_enqueue_count), atomic_read(&output->tinfo[1].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[2].total_enqueue_count) - atomic_read(&output->tinfo[2].total_dequeue_count),
|
||||
atomic_read(&output->tinfo[2].total_enqueue_count), atomic_read(&output->tinfo[2].total_dequeue_count),
|
||||
sync ? sync : "");
|
||||
|
||||
|
|
|
@ -8,13 +8,13 @@
|
|||
//#define REPLAY_DEBUGGING
|
||||
//#define STAT_DEBUGGING // here means: display full statistics
|
||||
//#define HASH_DEBUGGING
|
||||
//#define REFCOUNT_BUG // FIXME!!!
|
||||
|
||||
// variants
|
||||
#define KEEP_UNIQUE
|
||||
//#define WB_COPY
|
||||
#define LATER
|
||||
#define DELAY_CALLERS // this is _needed_
|
||||
//#define EARLY_COMPLETION
|
||||
|
||||
// commenting this out is dangerous for data integrity! use only for testing!
|
||||
#define USE_MEMCPY
|
||||
|
@ -617,8 +617,8 @@ int trans_logger_ref_get(struct trans_logger_output *output, struct mref_object
|
|||
|
||||
/* FIXME: THIS IS PROVISIONARY (use event instead)
|
||||
*/
|
||||
while (unlikely(!output->brick->power.led_on)) {
|
||||
msleep(HZ);
|
||||
while (unlikely(!brick->power.led_on)) {
|
||||
msleep(HZ / 10);
|
||||
}
|
||||
|
||||
return _write_ref_get(output, mref_a);
|
||||
|
@ -911,7 +911,7 @@ void free_writeback(struct writeback_info *wb)
|
|||
#if 1
|
||||
while (!orig_mref_a->is_completed) {
|
||||
MARS_ERR("request %lld (len = %d) was not completed, cleanup_count = %d\n", orig_mref->ref_pos, orig_mref->ref_len, cleanup_count);
|
||||
msleep(10000);
|
||||
msleep(3000);
|
||||
}
|
||||
#endif
|
||||
if (likely(wb->w_error >= 0)) {
|
||||
|
@ -1294,7 +1294,7 @@ void phase1_preio(void *private)
|
|||
// signal completion to the upper layer
|
||||
// FIXME: immediate error signalling is impossible here, but some delayed signalling should be possible as a workaround. Think!
|
||||
CHECK_ATOMIC(&orig_mref_a->object->ref_count, 1);
|
||||
#ifdef REFCOUNT_BUG // FIXME!!!
|
||||
#ifdef EARLY_COMPLETION
|
||||
_complete(brick, orig_mref_a, 0, true);
|
||||
CHECK_ATOMIC(&orig_mref_a->object->ref_count, 1);
|
||||
#endif
|
||||
|
@ -1932,7 +1932,7 @@ void _init_inputs(struct trans_logger_brick *brick)
|
|||
int nr = brick->new_input_nr;
|
||||
|
||||
if (brick->log_input_nr != brick->old_input_nr) {
|
||||
MARS_DBG("nothing to do, new_input_nr = %d log_input_nr = &d old_input_nr = %d\n", brick->new_input_nr, brick->log_input_nr, brick->old_input_nr);
|
||||
MARS_DBG("nothing to do, new_input_nr = %d log_input_nr = %d old_input_nr = %d\n", brick->new_input_nr, brick->log_input_nr, brick->old_input_nr);
|
||||
goto done;
|
||||
}
|
||||
if (unlikely(nr < TL_INPUT_LOG1 || nr > TL_INPUT_LOG2)) {
|
||||
|
@ -2134,7 +2134,7 @@ void trans_logger_log(struct trans_logger_brick *brick)
|
|||
|
||||
delay_callers = LIMIT_FN(1, 1);
|
||||
if (delay_callers != brick->delay_callers) {
|
||||
MARS_DBG("mshadow_count = %d/%d global_mem = %lld/%lld stalling %d -> %d\n", atomic_read(&brick->mshadow_count), brick->shadow_mem_limit, atomic64_read(&brick->shadow_mem_used), brick_global_memlimit, brick->delay_callers, delay_callers);
|
||||
MARS_DBG("mshadow_count = %d/%d global_mem = %ld/%lld stalling %d -> %d\n", atomic_read(&brick->mshadow_count), brick->shadow_mem_limit, atomic64_read(&brick->shadow_mem_used), brick_global_memlimit, brick->delay_callers, delay_callers);
|
||||
brick->delay_callers = delay_callers;
|
||||
wake_up_interruptible_all(&brick->worker_event);
|
||||
wake_up_interruptible_all(&brick->caller_event);
|
||||
|
@ -2150,7 +2150,7 @@ void trans_logger_log(struct trans_logger_brick *brick)
|
|||
brick->q_phase2.q_unlimited = unlimited;
|
||||
brick->q_phase3.q_unlimited = unlimited;
|
||||
brick->q_phase4.q_unlimited = unlimited;
|
||||
MARS_DBG("mshadow_count = %d/%d global_mem = %lld/%lld unlimited %d -> %d\n", atomic_read(&brick->mshadow_count), brick->shadow_mem_limit, atomic64_read(&brick->shadow_mem_used), brick_global_memlimit, old_unlimited, unlimited);
|
||||
MARS_DBG("mshadow_count = %d/%d global_mem = %ld/%lld unlimited %d -> %d\n", atomic_read(&brick->mshadow_count), brick->shadow_mem_limit, atomic64_read(&brick->shadow_mem_used), brick_global_memlimit, old_unlimited, unlimited);
|
||||
old_unlimited = unlimited;
|
||||
wake_up_interruptible_all(&brick->worker_event);
|
||||
wake_up_interruptible_all(&brick->caller_event);
|
||||
|
@ -2325,6 +2325,7 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
loff_t start_pos;
|
||||
loff_t finished_pos;
|
||||
long long old_jiffies = jiffies;
|
||||
int backoff = 0;
|
||||
int status = 0;
|
||||
|
||||
brick->replay_code = 0; // indicates "running"
|
||||
|
@ -2357,12 +2358,17 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
status = log_read(&input->logst, &lh, &buf, &len);
|
||||
if (status == -EAGAIN) {
|
||||
MARS_DBG("got -EAGAIN\n");
|
||||
msleep(100);
|
||||
msleep(backoff);
|
||||
if (backoff < 3000) {
|
||||
backoff += 100;
|
||||
} else {
|
||||
MARS_WRN("logfile replay not possible at position %lld (end_pos = %lld, remaining = %lld), please check/repair your logfile in userspace by some tool!\n", finished_pos, brick->replay_end_pos, brick->replay_end_pos - finished_pos);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (unlikely(status < 0)) {
|
||||
brick->replay_code = status;
|
||||
MARS_ERR("cannot read logfile data, status = %d\n", status);
|
||||
MARS_WRN("cannot read logfile data, status = %d\n", status);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue