all: crash testing hardening infrastructure

This is important for even more hardening of MARS.
Simulate crashes at the "wrong moment", typically with
IO requests flying, or just before a symlink update.

Only for debugging. Never use for production.
This commit is contained in:
Thomas Schoebel-Theuer 2016-02-06 23:35:04 +01:00 committed by Thomas Schoebel-Theuer
parent 401f87aeee
commit 90653476f6
4 changed files with 51 additions and 0 deletions

View File

@ -459,6 +459,20 @@ extern void mref_checksum(struct mref_object *mref);
/////////////////////////////////////////////////////////////////////////
/* Crash-testing instrumentation.
* Only for debugging. Never use this for production.
* Simulate a crash at the "wrong moment".
*/
#ifdef CONFIG_MARS_DEBUG
extern int mars_crash_mode;
extern void _crashme(int mode, bool do_sync);
#else
extern inline void _crashme(int mode, bool do_sync) {}
#endif
/////////////////////////////////////////////////////////////////////////
// init
extern int init_mars(void);

View File

@ -966,6 +966,8 @@ void _trans_logger_endio(struct generic_callback *cb)
struct trans_logger_mref_aspect *mref_a;
struct trans_logger_brick *brick;
_crashme(20, false);
mref_a = cb->cb_private;
CHECK_PTR(mref_a, err);
if (unlikely(&mref_a->cb != cb)) {
@ -1146,6 +1148,8 @@ void wb_endio(struct generic_callback *cb)
void (**_endio)(struct generic_callback *cb);
void (*endio)(struct generic_callback *cb);
_crashme(21, false);
LAST_CALLBACK(cb);
sub_mref_a = cb->cb_private;
CHECK_PTR(sub_mref_a, err);
@ -2698,6 +2702,8 @@ void replay_endio(struct generic_callback *cb)
bool ok;
unsigned long flags;
_crashme(22, false);
LAST_CALLBACK(cb);
CHECK_PTR(mref_a, err);
brick = mref_a->my_brick;

View File

@ -179,6 +179,23 @@ EXPORT_SYMBOL_GPL(mars_reset_emergency);
int mars_keep_msg = 10;
EXPORT_SYMBOL_GPL(mars_keep_msg);
#ifdef CONFIG_MARS_DEBUG
#include <linux/reboot.h>
int mars_crash_mode = 0;
EXPORT_SYMBOL_GPL(mars_crash_mode);
void _crashme(int mode, bool do_sync)
{
if (mode == mars_crash_mode) {
if (do_sync)
mars_sync();
emergency_restart();
}
}
#endif
#define MARS_SYMLINK_MAX 1023
struct key_value_pair {
@ -1130,6 +1147,8 @@ int _update_replay_link(struct mars_rotate *rot, struct trans_logger_info *inf)
goto out;
}
_crashme(1, true);
res = _update_link_when_necessary(rot, "replay", old, new);
out:
@ -1224,6 +1243,8 @@ int _update_version_link(struct mars_rotate *rot, struct trans_logger_info *inf)
goto out;
}
_crashme(2, true);
res = _update_link_when_necessary(rot , "version", old, new);
out:
@ -2477,6 +2498,7 @@ void _create_new_logfile(const char *path)
} else {
MARS_DBG("created empty logfile '%s'\n", path);
mars_sync();
_crashme(10, false);
filp_close(f, NULL);
mars_trigger();
}
@ -4276,6 +4298,8 @@ int _update_syncstatus(struct mars_rotate *rot, struct copy_brick *copy, char *p
goto done;
}
_crashme(3, true);
status = _update_link_when_necessary(rot, "syncpos", peer_replay_link, syncpos_path);
/* Sync is only marked as finished when the syncpos
* production was successful and timestamps are recent enough.
@ -4296,6 +4320,8 @@ int _update_syncstatus(struct mars_rotate *rot, struct copy_brick *copy, char *p
src = path_make("%lld", copy->copy_last);
dst = path_make("%s/syncstatus-%s", rot->parent_path, my_id());
_crashme(4, true);
status = _update_link_when_necessary(rot, "syncstatus", src, dst);
brick_string_free(src);
@ -4303,6 +4329,8 @@ int _update_syncstatus(struct mars_rotate *rot, struct copy_brick *copy, char *p
src = path_make("%lld,%lld", copy->verify_ok_count, copy->verify_error_count);
dst = path_make("%s/verifystatus-%s", rot->parent_path, my_id());
_crashme(5, true);
(void)_update_link_when_necessary(rot, "verifystatus", src, dst);
memset(&rot->sync_finish_stamp, 0, sizeof(rot->sync_finish_stamp));

View File

@ -292,6 +292,9 @@ ctl_table mars_table[] = {
INT_ENTRY("show_statistics_server", server_show_statist, 0600),
INT_ENTRY("show_connections", global_show_connections, 0600),
INT_ENTRY("aio_sync_mode", aio_sync_mode, 0600),
#ifdef CONFIG_MARS_DEBUG
INT_ENTRY("debug_crash_mode", mars_crash_mode, 0600),
#endif
INT_ENTRY("logger_completion_semantics", trans_logger_completion_semantics, 0600),
INT_ENTRY("logger_do_crc", trans_logger_do_crc, 0600),
INT_ENTRY("syslog_min_class", brick_say_syslog_min, 0600),