mirror of https://github.com/schoebel/mars
Merge branch 'mars0.1.y' into mars0.1a.y
This commit is contained in:
commit
26ddf5e68c
10
ChangeLog
10
ChangeLog
|
@ -296,6 +296,16 @@ Hint: branch 0.1a will get a merge from here, and then get the
|
|||
(except Football related ones) will then go to 0.1b.
|
||||
Finally, when 0.1a is stable, I will close this branch.
|
||||
|
||||
mars0.1stable63
|
||||
* Minor fix: when compiling for some newer kernels (only there),
|
||||
schedule() could be called during wait for some condition,
|
||||
worsening performance unnecessarily.
|
||||
* Minor improvement: starting join-resource in batches
|
||||
was slow because each was waiting for cluster communication.
|
||||
Use a manual "marsadm wait-cluster" before starting batches
|
||||
of join-resource operations.
|
||||
* Doc: some clarifications on BigCluster scalability behaviour.
|
||||
|
||||
mars0.1stable62
|
||||
* Minor fix: race between join-resource and log-rotate.
|
||||
* Minor fix: report split brain logfile amount only when
|
||||
|
|
5786
docu/mars-manual.lyx
5786
docu/mars-manual.lyx
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* MARS Long Distance Replication Software
|
||||
*
|
||||
* This file is part of MARS project: http://schoebel.github.io/mars/
|
||||
*
|
||||
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
|
||||
* Copyright (C) 2011-2014 1&1 Internet AG
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef BRICK_WAIT_H
|
||||
#define BRICK_WAIT_H
|
||||
|
||||
/* compat to some elder kernels...
|
||||
*/
|
||||
#ifndef ___wait_cond_timeout
|
||||
#define ___wait_cond_timeout(x) (x)
|
||||
#define prepare_to_wait_event(a,b,c) (prepare_to_wait(a, b, c), 0)
|
||||
#endif
|
||||
|
||||
/* Some code stolen from include/linux/wait.h
|
||||
*/
|
||||
#define brick_wait(wq, condition, timeout) \
|
||||
({ \
|
||||
__label__ __out; \
|
||||
wait_queue_t __wait; \
|
||||
long __ret = timeout; /* explicit shadow */ \
|
||||
\
|
||||
might_sleep(); \
|
||||
/* check in advance to avoid spinlocks in fastpath */ \
|
||||
if (condition) \
|
||||
goto __out; \
|
||||
\
|
||||
INIT_LIST_HEAD(&__wait.task_list); \
|
||||
__wait.flags = 0; \
|
||||
\
|
||||
for (;;) { \
|
||||
long __int = prepare_to_wait_event(&wq, &__wait, TASK_INTERRUPTIBLE); \
|
||||
\
|
||||
if (__int) { \
|
||||
__ret = __int; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
__ret = schedule_timeout(__ret); \
|
||||
\
|
||||
__set_current_state(TASK_RUNNING); \
|
||||
if (___wait_cond_timeout(condition)) \
|
||||
break; \
|
||||
} \
|
||||
finish_wait(&wq, &__wait); \
|
||||
__out: __ret; \
|
||||
})
|
||||
|
||||
|
||||
#endif
|
|
@ -245,27 +245,19 @@ void copy_endio(struct generic_callback *cb)
|
|||
error = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
st->active[queue] = false;
|
||||
if (unlikely(st->table[queue])) {
|
||||
if (unlikely(st->table[queue] != mref)) {
|
||||
MARS_ERR("table corruption at %d %d (%p => %p)\n", index, queue, st->table[queue], mref);
|
||||
error = -EEXIST;
|
||||
goto exit;
|
||||
}
|
||||
if (unlikely(cb->cb_error < 0)) {
|
||||
error = cb->cb_error;
|
||||
__clear_mref(brick, mref, queue);
|
||||
/* This is racy, but does no harm.
|
||||
* Worst case just produces more error output.
|
||||
*/
|
||||
if (!brick->copy_error_count++) {
|
||||
MARS_WRN("IO error %d on index %d, old state = %d\n", cb->cb_error, index, st->state);
|
||||
}
|
||||
} else {
|
||||
if (unlikely(st->table[queue])) {
|
||||
MARS_ERR("overwriting index %d, state = %d\n", index, st->state);
|
||||
_clear_mref(brick, index, queue);
|
||||
}
|
||||
st->table[queue] = mref;
|
||||
}
|
||||
|
||||
exit:
|
||||
|
@ -273,6 +265,7 @@ exit:
|
|||
st->error = error;
|
||||
_clash(brick);
|
||||
}
|
||||
st->active[queue] = false;
|
||||
if (mref->ref_rw) {
|
||||
atomic_dec(&brick->copy_write_flight);
|
||||
atomic_dec(&global_copy_write_flight);
|
||||
|
@ -294,6 +287,7 @@ int _make_mref(struct copy_brick *brick, int index, int queue, void *data, loff_
|
|||
struct mref_object *mref;
|
||||
struct copy_mref_aspect *mref_a;
|
||||
struct copy_input *input;
|
||||
struct copy_state *st;
|
||||
int offset;
|
||||
int len;
|
||||
int status = -EAGAIN;
|
||||
|
@ -353,7 +347,10 @@ int _make_mref(struct copy_brick *brick, int index, int queue, void *data, loff_
|
|||
|
||||
//MARS_IO("queue = %d index = %d pos = %lld len = %d rw = %d\n", queue, index, mref->ref_pos, mref->ref_len, rw);
|
||||
|
||||
GET_STATE(brick, index).active[queue] = true;
|
||||
st = &GET_STATE(brick, index);
|
||||
st->table[queue] = mref;
|
||||
st->active[queue] = true;
|
||||
|
||||
if (rw) {
|
||||
atomic_inc(&brick->copy_write_flight);
|
||||
atomic_inc(&global_copy_write_flight);
|
||||
|
@ -442,8 +439,6 @@ restart:
|
|||
goto idle;
|
||||
}
|
||||
|
||||
_clear_mref(brick, index, 1);
|
||||
_clear_mref(brick, index, 0);
|
||||
st->writeout = false;
|
||||
st->error = 0;
|
||||
|
||||
|
@ -475,17 +470,16 @@ restart:
|
|||
next_state = COPY_STATE_READ2;
|
||||
/* fallthrough */
|
||||
case COPY_STATE_READ2:
|
||||
mref1 = st->table[1];
|
||||
if (!mref1) { // idempotence: wait by unchanged state
|
||||
if (st->active[1]) { // idempotence: wait by unchanged state
|
||||
goto idle;
|
||||
}
|
||||
/* fallthrough => wait for both mrefs to appear */
|
||||
case COPY_STATE_READ1:
|
||||
case COPY_STATE_READ3:
|
||||
mref0 = st->table[0];
|
||||
if (!mref0) { // idempotence: wait by unchanged state
|
||||
if (st->active[0]) { // idempotence: wait by unchanged state
|
||||
goto idle;
|
||||
}
|
||||
mref0 = st->table[0];
|
||||
if (brick->copy_limiter) {
|
||||
int amount = (mref0->ref_len - 1) / 1024 + 1;
|
||||
mars_limit_sleep(brick->copy_limiter, amount);
|
||||
|
@ -565,6 +559,11 @@ restart:
|
|||
progress = -EILSEQ;
|
||||
break;
|
||||
}
|
||||
if (unlikely(st->active[0])) {
|
||||
MARS_ERR("src buffer for write is active, state %d at index %d\n", state, index);
|
||||
progress = -EILSEQ;
|
||||
break;
|
||||
}
|
||||
if (unlikely(brick->is_aborting)) {
|
||||
progress = -EINTR;
|
||||
break;
|
||||
|
@ -587,8 +586,7 @@ restart:
|
|||
next_state = COPY_STATE_WRITTEN;
|
||||
/* fallthrough */
|
||||
case COPY_STATE_WRITTEN:
|
||||
mref1 = st->table[1];
|
||||
if (!mref1) { // idempotence: wait by unchanged state
|
||||
if (st->active[1]) { // idempotence: wait by unchanged state
|
||||
MARS_IO("irrelevant\n");
|
||||
goto idle;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/bio.h>
|
||||
|
||||
#include "brick_wait.h"
|
||||
#include "mars.h"
|
||||
#include "lib_limiter.h"
|
||||
|
||||
|
@ -735,7 +736,7 @@ int _write_ref_get(struct trans_logger_output *output, struct trans_logger_mref_
|
|||
|
||||
#ifdef DELAY_CALLERS
|
||||
// delay in case of too many master shadows / memory shortage
|
||||
wait_event_interruptible_timeout(brick->caller_event,
|
||||
brick_wait(brick->caller_event,
|
||||
!brick->delay_callers &&
|
||||
(brick_global_memlimit < 1024 || atomic64_read(&global_mshadow_used) / 1024 < brick_global_memlimit),
|
||||
HZ / 2);
|
||||
|
@ -2300,7 +2301,7 @@ int _do_ranking(struct trans_logger_brick *brick)
|
|||
}
|
||||
} else if (brick->delay_callers) {
|
||||
brick->delay_callers = false;
|
||||
wake_up_interruptible(&brick->caller_event);
|
||||
wake_up_interruptible_all(&brick->caller_event);
|
||||
}
|
||||
|
||||
// global limit for flying mrefs
|
||||
|
@ -2605,7 +2606,7 @@ void trans_logger_log(struct trans_logger_brick *brick)
|
|||
int winner;
|
||||
int nr;
|
||||
|
||||
wait_event_interruptible_timeout(
|
||||
brick_wait(
|
||||
brick->worker_event,
|
||||
({
|
||||
winner = _do_ranking(brick);
|
||||
|
@ -2771,7 +2772,7 @@ void wait_replay(struct trans_logger_brick *brick, struct trans_logger_mref_aspe
|
|||
bool ok = false;
|
||||
bool was_empty;
|
||||
|
||||
wait_event_interruptible_timeout(brick->worker_event,
|
||||
brick_wait(brick->worker_event,
|
||||
atomic_read(&brick->replay_count) < max
|
||||
&& (_has_conflict(brick, mref_a) ? conflicts++ : (ok = true), ok),
|
||||
60 * HZ);
|
||||
|
|
|
@ -1697,7 +1697,6 @@ sub try_to_avoid_splitbrain {
|
|||
my $old_timeout = $timeout;
|
||||
$timeout = $window if $timeout < 0;
|
||||
$old_primary = "" if $old_primary eq "(none)";
|
||||
wait_cluster($cmd, $res, $old_primary);
|
||||
if (!detect_splitbrain($res, 0)) {
|
||||
lwarn "ATTENTION: you are starting a non-forced primary switchover in a split brain situation.\n";
|
||||
lwarn "ATTENTION: that's no good idea.\n";
|
||||
|
|
Loading…
Reference in New Issue