mirror of
https://github.com/schoebel/mars
synced 2025-01-28 18:23:25 +00:00
light: fix becoming primary when logfiles are damaged
When logfile replay aborts with an error, becoming primary would be impossible. Without this, repair would be only possible by complete destruction of the resource. A previous version of this patch introduced /proc/sys/mars/allow_primary_when_damaged which would complicate the sysadmin interface. People would be unsure what to do.
This commit is contained in:
parent
69386b33d9
commit
d67336420d
@ -572,6 +572,7 @@ struct mars_rotate {
|
||||
bool has_emergency;
|
||||
bool wants_sync;
|
||||
bool gets_sync;
|
||||
bool log_is_really_damaged;
|
||||
spinlock_t inf_lock;
|
||||
bool infs_is_dirty[MAX_INFOS];
|
||||
struct trans_logger_info infs[MAX_INFOS];
|
||||
@ -3154,7 +3155,9 @@ int _check_logging_status(struct mars_rotate *rot, int *log_nr, long long *oldpo
|
||||
|
||||
status = 0;
|
||||
if (rot->aio_info.current_size > *oldpos_start) {
|
||||
if (rot->aio_info.current_size - *oldpos_start < REPLAY_TOLERANCE &&
|
||||
if ((rot->aio_info.current_size - *oldpos_start < REPLAY_TOLERANCE ||
|
||||
(rot->log_is_really_damaged &&
|
||||
rot->todo_primary)) &&
|
||||
(rot->todo_primary ||
|
||||
(rot->relevant_log &&
|
||||
rot->next_relevant_log &&
|
||||
@ -3235,7 +3238,7 @@ int _make_logging_status(struct mars_rotate *rot)
|
||||
* Allow switching over to a new logfile.
|
||||
*/
|
||||
if (!trans_brick->power.button && !trans_brick->power.led_on && trans_brick->power.led_off) {
|
||||
if (rot->next_relevant_log) {
|
||||
if (rot->next_relevant_log && !rot->log_is_really_damaged) {
|
||||
int replay_tolerance = _get_tolerance(rot);
|
||||
bool skip_new = !!rot->todo_primary;
|
||||
MARS_DBG("check switchover from '%s' to '%s' (size = %lld, skip_new = %d, replay_tolerance = %d)\n", dent->d_path, rot->next_relevant_log->d_path, rot->next_relevant_log->new_stat.size, skip_new, replay_tolerance);
|
||||
@ -3678,7 +3681,7 @@ int make_log_finalize(struct mars_global *global, struct mars_dent *dent)
|
||||
make_rot_msg(rot, "wrn-space-low", "EMERGENCY: the space on /mars/ is becoming low.");
|
||||
}
|
||||
|
||||
|
||||
rot->log_is_really_damaged = false;
|
||||
if (trans_brick->replay_mode) {
|
||||
if (trans_brick->replay_code > 0) {
|
||||
MARS_INF_TO(rot->log_say, "logfile replay ended successfully at position %lld\n", trans_brick->replay_current_pos);
|
||||
@ -3688,6 +3691,7 @@ int make_log_finalize(struct mars_global *global, struct mars_dent *dent)
|
||||
} else if (trans_brick->replay_code < 0) {
|
||||
MARS_ERR_TO(rot->log_say, "logfile replay stopped with error = %d at position %lld\n", trans_brick->replay_code, trans_brick->replay_current_pos);
|
||||
make_rot_msg(rot, "err-replay-stop", "logfile replay stopped with error = %d at position %lld", trans_brick->replay_code, trans_brick->replay_current_pos);
|
||||
rot->log_is_really_damaged = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user