From fd62940ed142a57ef4056c1f32b49681b2f5b892 Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Sat, 17 Sep 2022 08:45:21 +0200 Subject: [PATCH] main: fix recovery upon defective logfile without successor Workaround was possible by manually providing an empty next logfile, forcing an internal rotate operation. However, IMHO this should not be exposed to userspace, but functionally migrated to kernelspace in the long term. Otherwise, possible races between userspace and kernelspace may explode. Control over logfiles content is not enough; even the sheer existence should be the responsibility of kernel code (as much as possible). Future fixes and improvements should go in this direction. Thus I don't add an extra marsadm command for creation of empty logfiles. The current marsadm code in cron (and siblings) should be decommissioned some day, after the kernel is fully responsible, even for create-resource and so on. --- kernel/sy_old/mars_main.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/kernel/sy_old/mars_main.c b/kernel/sy_old/mars_main.c index 446be8d2..73362034 100644 --- a/kernel/sy_old/mars_main.c +++ b/kernel/sy_old/mars_main.c @@ -1737,6 +1737,15 @@ void _update_info(struct trans_logger_info *inf) done:; } +static +bool _is_trans_input_initialized(struct trans_logger_input *trans_input) +{ + if (!trans_input || + !trans_input->is_operating) + return false; + return true; +} + static bool _is_trans_input_fully_working(struct trans_logger_input *trans_input) { @@ -4549,7 +4558,19 @@ int make_log_init(struct mars_dent *dent) if (rot->trans_brick) { struct trans_logger_input *trans_input = rot->trans_brick->inputs[rot->trans_brick->old_input_nr]; - if (_is_trans_input_fully_working(trans_input)) { + /* There may be initialized inputs, but not yet fully working + * (e.g. after a defective logfile after a crash), or + * not even connected (e.g. due to a non-existing logfile + * or an unusable /mars/ filesystemc, etc). + * Prefer an initialized input, for further setup steps. + * When they cannot be connected for a longer time (for + * whatever reason), they should be + * decommissioned in their current role. + * When nothing helps, the last resort in designated primary + * role may be creation of an empty new logfile. + * If even this fails, we have no chance :( + */ + if (_is_trans_input_initialized(trans_input)) { aio_path = path_make("%s/log-%09d-%s", parent_path, trans_input->inf.inf_sequence,