mirror of
https://github.com/schoebel/mars
synced 2024-12-27 00:52:21 +00:00
Merge branch 'mars0.1.y' into mars0.1b.y
This commit is contained in:
commit
4ff7dfe7a4
@ -200,6 +200,15 @@ mars0.1balpha0
|
||||
-----------------------------------
|
||||
Changelog for series 0.1:
|
||||
|
||||
mars0.1stable43
|
||||
--------
|
||||
* Major fix, only relevant for k >= 3 replicas:
|
||||
Logfile fetch did not switch over to another alive peer
|
||||
upon _speicfic_ network problems with the _current_
|
||||
peer. As a consequence, an unaffected replica could
|
||||
hang. Workarould was possible by pause-fetch /
|
||||
resume-fetch or by fixing the network :)
|
||||
|
||||
mars0.1stable42
|
||||
--------
|
||||
* Minor fix: ssh IPs and port numbers are automatically probed
|
||||
|
@ -592,6 +592,7 @@ struct mars_rotate {
|
||||
struct if_brick *if_brick;
|
||||
const char *fetch_path;
|
||||
const char *fetch_peer;
|
||||
const char *avoid_peer;
|
||||
const char *preferred_peer;
|
||||
const char *parent_path;
|
||||
const char *parent_rest;
|
||||
@ -616,6 +617,7 @@ struct mars_rotate {
|
||||
int fetch_next_is_available;
|
||||
int relevant_serial;
|
||||
int replay_code;
|
||||
int avoid_count;
|
||||
bool has_symlinks;
|
||||
bool peer_activated;
|
||||
bool res_shutdown;
|
||||
@ -638,6 +640,7 @@ struct mars_rotate {
|
||||
struct key_value_pair msgs[sizeof(rot_keys) / sizeof(char*)];
|
||||
};
|
||||
|
||||
static struct rw_semaphore rot_sem = __RWSEM_INITIALIZER(rot_sem);
|
||||
static LIST_HEAD(rot_anchor);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
@ -1959,14 +1962,16 @@ int check_logfile(const char *peer, struct mars_dent *remote_dent, struct mars_d
|
||||
} else if (!rot->fetch_serial && rot->allow_update &&
|
||||
!rot->is_primary && !rot->old_is_primary &&
|
||||
(!rot->preferred_peer || !strcmp(rot->preferred_peer, peer)) &&
|
||||
(!rot->avoid_peer || strcmp(peer, rot->avoid_peer) || rot->avoid_count-- <= 0) &&
|
||||
(!rot->split_brain_serial || remote_dent->d_serial < rot->split_brain_serial) &&
|
||||
(dst_size < src_size || !local_dent)) {
|
||||
(dst_size < src_size || !local_dent)) {
|
||||
// start copy brick instance
|
||||
status = _update_file(parent, switch_path, rot->fetch_path, remote_dent->d_path, peer, src_size);
|
||||
MARS_DBG("update '%s' from peer '%s' status = %d\n", remote_dent->d_path, peer, status);
|
||||
if (likely(status >= 0)) {
|
||||
rot->fetch_serial = remote_dent->d_serial;
|
||||
rot->fetch_next_is_available = 0;
|
||||
brick_string_free(rot->avoid_peer);
|
||||
brick_string_free(rot->fetch_peer);
|
||||
rot->fetch_peer = brick_strdup(peer);
|
||||
}
|
||||
@ -2934,12 +2939,15 @@ void rot_destruct(void *_rot)
|
||||
{
|
||||
struct mars_rotate *rot = _rot;
|
||||
if (likely(rot)) {
|
||||
down_write(&rot_sem);
|
||||
list_del_init(&rot->rot_head);
|
||||
up_write(&rot_sem);
|
||||
write_info_links(rot);
|
||||
del_channel(rot->log_say);
|
||||
rot->log_say = NULL;
|
||||
brick_string_free(rot->fetch_path);
|
||||
brick_string_free(rot->fetch_peer);
|
||||
brick_string_free(rot->avoid_peer);
|
||||
brick_string_free(rot->preferred_peer);
|
||||
brick_string_free(rot->parent_path);
|
||||
brick_string_free(rot->parent_rest);
|
||||
@ -3002,8 +3010,11 @@ int make_log_init(void *buf, struct mars_dent *dent)
|
||||
rot->global = global;
|
||||
parent->d_private = rot;
|
||||
parent->d_private_destruct = rot_destruct;
|
||||
list_add_tail(&rot->rot_head, &rot_anchor);
|
||||
assign_keys(rot->msgs, rot_keys);
|
||||
|
||||
down_write(&rot_sem);
|
||||
list_add_tail(&rot->rot_head, &rot_anchor);
|
||||
up_write(&rot_sem);
|
||||
}
|
||||
|
||||
rot->replay_link = NULL;
|
||||
@ -4070,13 +4081,26 @@ done:
|
||||
if (fetch_brick &&
|
||||
(fetch_brick->power.led_off ||
|
||||
fetch_brick->power.force_off ||
|
||||
fetch_brick->copy_error ||
|
||||
!global->global_power.button ||
|
||||
!_check_allow(global, parent, "connect") ||
|
||||
!_check_allow(global, parent, "attach") ||
|
||||
(fetch_brick->copy_last == fetch_brick->copy_end &&
|
||||
(rot->fetch_next_is_available > 0 ||
|
||||
rot->fetch_round++ > 3)))) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (fetch_brick->inputs[i] && fetch_brick->inputs[i]->brick)
|
||||
fetch_brick->inputs[i]->brick->power.io_timeout = 1;
|
||||
}
|
||||
if (fetch_brick->copy_error && !rot->avoid_peer && rot->fetch_peer) {
|
||||
rot->avoid_peer = brick_strdup(rot->fetch_peer);
|
||||
rot->avoid_count = 3;
|
||||
}
|
||||
fetch_brick = (void *)_kill_brick((void *)fetch_brick);
|
||||
if (!fetch_brick)
|
||||
mars_trigger();
|
||||
}
|
||||
rot->fetch_next_is_available = 0;
|
||||
rot->fetch_brick = fetch_brick;
|
||||
@ -4500,7 +4524,7 @@ int _update_syncstatus(struct mars_rotate *rot, struct copy_brick *copy, char *p
|
||||
if (rot->sync_finish_stamp.tv_sec) {
|
||||
struct kstat peer_time_stat = {};
|
||||
|
||||
peer_time_path = path_make("/mars/tree-%s", peer);
|
||||
peer_time_path = path_make("/mars/alive-%s", peer);
|
||||
status = mars_stat(peer_time_path, &peer_time_stat, true);
|
||||
if (unlikely(status < 0)) {
|
||||
MARS_ERR("cannot stat '%s'\n", peer_time_path);
|
||||
|
Loading…
Reference in New Issue
Block a user