2014-11-21 10:51:34 +00:00
|
|
|
/*
|
|
|
|
* MARS Long Distance Replication Software
|
|
|
|
*
|
|
|
|
* This file is part of MARS project: http://schoebel.github.io/mars/
|
|
|
|
*
|
|
|
|
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
|
|
|
|
* Copyright (C) 2011-2014 1&1 Internet AG
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
*/
|
|
|
|
|
2010-08-08 20:51:20 +00:00
|
|
|
#ifndef MARS_TRANS_LOGGER_H
|
|
|
|
#define MARS_TRANS_LOGGER_H
|
|
|
|
|
2011-06-30 13:15:52 +00:00
|
|
|
#define REGION_SIZE_BITS (PAGE_SHIFT + 4)
|
|
|
|
#define REGION_SIZE (1 << REGION_SIZE_BITS)
|
2012-02-11 19:01:16 +00:00
|
|
|
#define LOGGER_QUEUES 4
|
2010-08-08 20:51:20 +00:00
|
|
|
|
2010-08-11 16:02:08 +00:00
|
|
|
#include <linux/time.h>
|
2011-04-18 09:23:04 +00:00
|
|
|
|
2012-10-15 14:35:36 +00:00
|
|
|
#include "mars.h"
|
2011-04-18 09:23:04 +00:00
|
|
|
#include "lib_log.h"
|
|
|
|
#include "lib_pairing_heap.h"
|
2011-04-18 14:14:16 +00:00
|
|
|
#include "lib_queue.h"
|
2012-12-30 06:50:47 +00:00
|
|
|
#include "lib_timing.h"
|
2015-12-10 07:24:04 +00:00
|
|
|
#include "lib_rank.h"
|
2010-08-08 20:51:20 +00:00
|
|
|
|
2019-03-12 10:38:35 +00:00
|
|
|
#ifdef CONFIG_MARS_DEBUG
|
|
|
|
#define ADDITIONAL_COUNTERS
|
|
|
|
#endif
|
|
|
|
|
2012-10-15 14:35:36 +00:00
|
|
|
///////////////////////// global tuning ////////////////////////
|
|
|
|
|
2012-12-30 20:48:44 +00:00
|
|
|
/* 0 = early completion of all writes
|
|
|
|
* 1 = early completion of non-sync
|
|
|
|
* 2 = late completion
|
|
|
|
*/
|
|
|
|
extern int trans_logger_completion_semantics;
|
2019-07-25 08:27:43 +00:00
|
|
|
extern int trans_logger_allow_compress;
|
2012-09-26 09:22:36 +00:00
|
|
|
extern int trans_logger_mem_usage; // in KB
|
2019-04-04 08:45:29 +00:00
|
|
|
extern int trans_logger_pressure_limit;
|
2019-06-25 06:06:35 +00:00
|
|
|
extern int trans_logger_disable_pressure; /* only for testing */
|
2019-06-25 05:29:57 +00:00
|
|
|
extern int trans_logger_report_interval;
|
2019-06-24 13:33:58 +00:00
|
|
|
extern int trans_logger_writeback_maxage;
|
2013-04-18 14:25:04 +00:00
|
|
|
extern int trans_logger_max_interleave;
|
2013-04-22 07:06:27 +00:00
|
|
|
extern int trans_logger_resume;
|
2013-07-03 06:27:42 +00:00
|
|
|
extern int trans_logger_replay_timeout; // in s
|
2012-11-13 09:59:00 +00:00
|
|
|
extern atomic_t global_mshadow_count;
|
|
|
|
extern atomic64_t global_mshadow_used;
|
2012-09-26 09:22:36 +00:00
|
|
|
|
2012-10-15 14:35:36 +00:00
|
|
|
struct writeback_group {
|
2017-12-10 21:37:21 +00:00
|
|
|
struct rw_semaphore mutex;
|
2012-10-15 14:35:36 +00:00
|
|
|
struct trans_logger_brick *leader;
|
|
|
|
loff_t biggest;
|
|
|
|
struct list_head group_anchor;
|
|
|
|
// tuning
|
|
|
|
struct mars_limiter limiter;
|
|
|
|
int until_percent;
|
|
|
|
};
|
|
|
|
|
|
|
|
extern struct writeback_group global_writeback;
|
|
|
|
|
2010-08-11 16:02:08 +00:00
|
|
|
////////////////////////////////////////////////////////////////////
|
|
|
|
|
2011-04-18 14:14:16 +00:00
|
|
|
_PAIRING_HEAP_TYPEDEF(logger,)
|
2010-11-12 11:18:40 +00:00
|
|
|
|
2010-08-11 16:02:08 +00:00
|
|
|
struct logger_queue {
|
2011-04-18 14:14:16 +00:00
|
|
|
QUEUE_ANCHOR(logger,loff_t,logger);
|
2011-04-29 09:36:10 +00:00
|
|
|
struct trans_logger_brick *q_brick;
|
2011-03-27 15:18:38 +00:00
|
|
|
const char *q_insert_info;
|
|
|
|
const char *q_pushback_info;
|
|
|
|
const char *q_fetch_info;
|
2012-12-30 06:50:47 +00:00
|
|
|
struct banning q_banning;
|
2012-12-29 22:26:17 +00:00
|
|
|
int no_progress_count;
|
|
|
|
int pushback_count;
|
2011-04-18 14:14:16 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct logger_head {
|
|
|
|
struct list_head lh_head;
|
|
|
|
loff_t *lh_pos;
|
|
|
|
struct pairing_heap_logger ph;
|
2010-08-08 20:51:20 +00:00
|
|
|
};
|
|
|
|
|
2010-08-11 16:02:08 +00:00
|
|
|
////////////////////////////////////////////////////////////////////
|
|
|
|
|
2011-11-03 11:17:59 +00:00
|
|
|
#ifdef CONFIG_MARS_LOGROT
|
2011-05-13 11:19:28 +00:00
|
|
|
|
2011-04-29 09:36:10 +00:00
|
|
|
#define TL_INPUT_READ 0
|
2011-11-03 11:17:59 +00:00
|
|
|
#define TL_INPUT_WRITEBACK 0
|
|
|
|
#define TL_INPUT_LOG1 1
|
|
|
|
#define TL_INPUT_LOG2 2
|
|
|
|
#define TL_INPUT_NR 3
|
2011-04-29 09:36:10 +00:00
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define TL_INPUT_READ 0
|
|
|
|
#define TL_INPUT_WRITEBACK 0
|
2011-11-03 11:17:59 +00:00
|
|
|
#define TL_INPUT_LOG1 1
|
|
|
|
#define TL_INPUT_LOG2 1
|
2011-05-13 11:19:28 +00:00
|
|
|
#define TL_INPUT_NR 2
|
2011-04-29 09:36:10 +00:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2018-04-09 14:15:00 +00:00
|
|
|
#define TL_REPLAY_RUNNING 0
|
|
|
|
#define TL_REPLAY_FINISHED 1
|
|
|
|
#define TL_REPLAY_INCOMPLETE 2
|
|
|
|
|
2011-04-08 09:52:46 +00:00
|
|
|
struct writeback_info {
|
2011-10-03 17:31:02 +00:00
|
|
|
struct trans_logger_brick *w_brick;
|
2011-04-18 14:14:16 +00:00
|
|
|
struct logger_head w_lh;
|
2011-04-08 09:52:46 +00:00
|
|
|
loff_t w_pos;
|
|
|
|
int w_len;
|
2011-04-18 14:14:16 +00:00
|
|
|
int w_error;
|
2011-04-08 09:52:46 +00:00
|
|
|
struct list_head w_collect_list; // list of collected orig requests
|
|
|
|
struct list_head w_sub_read_list; // for saving the old data before overwrite
|
|
|
|
struct list_head w_sub_write_list; // for overwriting
|
2011-04-10 16:59:06 +00:00
|
|
|
atomic_t w_sub_read_count;
|
|
|
|
atomic_t w_sub_write_count;
|
2011-04-19 14:46:38 +00:00
|
|
|
atomic_t w_sub_log_count;
|
2011-04-10 16:59:06 +00:00
|
|
|
void (*read_endio)(struct generic_callback *cb);
|
|
|
|
void (*write_endio)(struct generic_callback *cb);
|
2011-04-08 09:52:46 +00:00
|
|
|
};
|
|
|
|
|
2010-12-15 12:13:18 +00:00
|
|
|
struct trans_logger_mref_aspect {
|
|
|
|
GENERIC_ASPECT(mref);
|
2011-10-03 17:31:02 +00:00
|
|
|
struct trans_logger_brick *my_brick;
|
2011-04-29 09:36:10 +00:00
|
|
|
struct trans_logger_input *my_input;
|
2011-05-13 11:19:28 +00:00
|
|
|
struct trans_logger_input *log_input;
|
2011-04-18 14:14:16 +00:00
|
|
|
struct logger_head lh;
|
2010-08-11 16:02:08 +00:00
|
|
|
struct list_head hash_head;
|
2011-04-18 14:14:16 +00:00
|
|
|
//struct list_head q_head;
|
2011-03-20 17:38:08 +00:00
|
|
|
struct list_head pos_head;
|
2011-04-08 09:52:46 +00:00
|
|
|
struct list_head replay_head;
|
|
|
|
struct list_head collect_head;
|
2011-04-18 14:14:16 +00:00
|
|
|
struct pairing_heap_logger ph;
|
2010-12-15 12:13:18 +00:00
|
|
|
struct trans_logger_mref_aspect *shadow_ref;
|
2011-05-13 11:19:28 +00:00
|
|
|
struct trans_logger_mref_aspect *orig_mref_a;
|
2011-04-08 09:52:46 +00:00
|
|
|
void *shadow_data;
|
2019-03-14 20:11:51 +00:00
|
|
|
int orig_flags;
|
2012-02-08 15:44:53 +00:00
|
|
|
int wb_error;
|
2011-04-01 11:18:32 +00:00
|
|
|
bool do_dealloc;
|
2011-04-08 09:52:46 +00:00
|
|
|
bool do_buffered;
|
2011-03-08 16:45:52 +00:00
|
|
|
bool is_hashed;
|
2013-01-01 11:35:33 +00:00
|
|
|
bool is_stable;
|
2011-04-08 09:52:46 +00:00
|
|
|
bool is_dirty;
|
2011-04-10 16:59:06 +00:00
|
|
|
bool is_collected;
|
2012-02-08 15:26:37 +00:00
|
|
|
bool is_fired;
|
2011-05-26 14:32:32 +00:00
|
|
|
bool is_completed;
|
2015-04-27 06:59:43 +00:00
|
|
|
bool is_endio;
|
2013-01-01 21:36:33 +00:00
|
|
|
bool is_persistent;
|
2014-03-10 15:22:44 +00:00
|
|
|
bool is_emergency;
|
2019-02-19 09:18:29 +00:00
|
|
|
struct lamport_time stamp;
|
2011-03-20 17:38:08 +00:00
|
|
|
loff_t log_pos;
|
2010-08-11 16:02:08 +00:00
|
|
|
struct generic_callback cb;
|
2011-04-10 16:59:06 +00:00
|
|
|
struct writeback_info *wb;
|
2011-04-08 09:52:46 +00:00
|
|
|
struct list_head sub_list;
|
|
|
|
struct list_head sub_head;
|
|
|
|
int total_sub_count;
|
2011-06-30 13:15:52 +00:00
|
|
|
int alloc_len;
|
2011-04-08 09:52:46 +00:00
|
|
|
atomic_t current_sub_count;
|
2010-08-11 16:02:08 +00:00
|
|
|
};
|
|
|
|
|
2013-04-12 09:55:52 +00:00
|
|
|
struct trans_logger_hash_anchor;
|
|
|
|
|
2010-08-11 16:02:08 +00:00
|
|
|
struct trans_logger_brick {
|
|
|
|
MARS_BRICK(trans_logger);
|
2010-08-23 05:06:06 +00:00
|
|
|
// parameters
|
2012-09-25 15:41:07 +00:00
|
|
|
struct mars_limiter *replay_limiter;
|
2011-06-30 13:15:52 +00:00
|
|
|
int shadow_mem_limit; // max # master shadows
|
2012-08-03 08:42:51 +00:00
|
|
|
bool replay_mode; // mode of operation
|
|
|
|
bool continuous_replay_mode; // mode of operation
|
2011-02-23 20:48:06 +00:00
|
|
|
bool log_reads; // additionally log pre-images
|
2012-07-23 07:17:18 +00:00
|
|
|
bool cease_logging; // direct IO without logging (only in case of EMERGENCY)
|
2011-03-18 13:15:40 +00:00
|
|
|
bool debug_shortcut; // only for testing! never use in production!
|
2011-04-08 09:52:46 +00:00
|
|
|
loff_t replay_start_pos; // where to start replay
|
|
|
|
loff_t replay_end_pos; // end of replay
|
2011-11-03 11:17:59 +00:00
|
|
|
int new_input_nr; // whereto we should switchover ASAP
|
2013-07-01 08:17:38 +00:00
|
|
|
int replay_tolerance; // how many bytes to ignore at truncated logfiles
|
2011-02-23 20:48:06 +00:00
|
|
|
// readonly from outside
|
2013-07-08 05:23:03 +00:00
|
|
|
loff_t replay_current_pos; // end of replay
|
2011-11-03 11:17:59 +00:00
|
|
|
int log_input_nr; // where we are currently logging to
|
|
|
|
int old_input_nr; // where old IO requests may be on the fly
|
2011-04-08 09:52:46 +00:00
|
|
|
int replay_code; // replay errors (if any)
|
2022-06-10 15:57:39 +00:00
|
|
|
int mars_error_code; /* MARS-specific error information */
|
|
|
|
int byte_code; /* repeated byte pattern */
|
2013-04-22 07:06:27 +00:00
|
|
|
bool stopped_logging; // direct IO without logging (only in case of EMERGENCY)
|
2011-03-20 17:38:08 +00:00
|
|
|
// private
|
2014-04-03 17:37:20 +00:00
|
|
|
int disk_io_error; // replay errors from callbacks
|
2013-04-12 09:55:52 +00:00
|
|
|
struct trans_logger_hash_anchor **hash_table;
|
2012-10-15 14:35:36 +00:00
|
|
|
struct list_head group_head;
|
2011-04-08 09:52:46 +00:00
|
|
|
loff_t old_margin;
|
2017-12-10 21:43:28 +00:00
|
|
|
struct rw_semaphore replay_mutex;
|
2011-04-08 09:52:46 +00:00
|
|
|
struct list_head replay_list;
|
2011-04-29 09:36:10 +00:00
|
|
|
struct task_struct *thread;
|
2011-06-30 13:15:52 +00:00
|
|
|
wait_queue_head_t worker_event;
|
|
|
|
wait_queue_head_t caller_event;
|
2011-04-29 09:36:10 +00:00
|
|
|
// statistics
|
2011-06-30 13:15:52 +00:00
|
|
|
atomic64_t shadow_mem_used;
|
2011-03-27 15:18:38 +00:00
|
|
|
atomic_t replay_count;
|
2013-01-02 17:37:43 +00:00
|
|
|
atomic_t any_fly_count;
|
|
|
|
atomic_t log_fly_count;
|
2019-03-12 10:38:35 +00:00
|
|
|
#ifdef ADDITIONAL_COUNTERS
|
2011-03-10 11:40:06 +00:00
|
|
|
atomic_t hash_count;
|
2011-03-08 16:45:52 +00:00
|
|
|
atomic_t mshadow_count;
|
|
|
|
atomic_t sshadow_count;
|
2011-03-10 11:40:06 +00:00
|
|
|
atomic_t outer_balance_count;
|
|
|
|
atomic_t inner_balance_count;
|
|
|
|
atomic_t sub_balance_count;
|
2011-04-10 16:59:06 +00:00
|
|
|
atomic_t wb_balance_count;
|
2012-02-12 15:36:34 +00:00
|
|
|
atomic_t total_hash_insert_count;
|
|
|
|
atomic_t total_hash_find_count;
|
|
|
|
atomic_t total_hash_extend_count;
|
2011-06-10 13:57:52 +00:00
|
|
|
atomic_t total_replay_count;
|
2012-02-25 20:36:52 +00:00
|
|
|
atomic_t total_replay_conflict_count;
|
2011-04-29 09:36:10 +00:00
|
|
|
atomic_t total_cb_count;
|
2011-03-18 13:15:40 +00:00
|
|
|
atomic_t total_read_count;
|
|
|
|
atomic_t total_write_count;
|
2011-05-13 11:19:28 +00:00
|
|
|
atomic_t total_flush_count;
|
2011-03-18 13:15:40 +00:00
|
|
|
atomic_t total_writeback_count;
|
2011-05-13 11:19:28 +00:00
|
|
|
atomic_t total_writeback_cluster_count;
|
2011-03-18 13:15:40 +00:00
|
|
|
atomic_t total_shortcut_count;
|
2011-04-01 11:18:32 +00:00
|
|
|
atomic_t total_mshadow_count;
|
|
|
|
atomic_t total_sshadow_count;
|
2012-02-12 15:36:34 +00:00
|
|
|
atomic_t total_mshadow_buffered_count;
|
|
|
|
atomic_t total_sshadow_buffered_count;
|
2011-05-13 11:19:28 +00:00
|
|
|
atomic_t total_round_count;
|
|
|
|
atomic_t total_restart_count;
|
2011-12-09 12:54:30 +00:00
|
|
|
atomic_t total_delay_count;
|
2019-03-12 10:38:35 +00:00
|
|
|
#endif
|
2021-02-05 08:46:22 +00:00
|
|
|
int pressure_mode;
|
2010-08-11 16:02:08 +00:00
|
|
|
// queues
|
2012-02-11 19:01:16 +00:00
|
|
|
struct logger_queue q_phase[LOGGER_QUEUES];
|
2015-12-10 07:24:04 +00:00
|
|
|
struct rank_data rkd[LOGGER_QUEUES];
|
2021-02-07 09:46:31 +00:00
|
|
|
#ifdef CONFIG_MARS_DEBUG
|
|
|
|
int bail[LOGGER_QUEUES];
|
|
|
|
int selected;
|
|
|
|
#endif
|
2020-01-29 08:42:43 +00:00
|
|
|
int caller_flag;
|
|
|
|
int worker_flag;
|
2011-06-30 13:15:52 +00:00
|
|
|
bool delay_callers;
|
2020-01-28 05:56:46 +00:00
|
|
|
/* termination */
|
|
|
|
bool terminate;
|
|
|
|
bool terminated;
|
2010-08-11 16:02:08 +00:00
|
|
|
};
|
|
|
|
|
2011-04-29 09:36:10 +00:00
|
|
|
struct trans_logger_output {
|
|
|
|
MARS_OUTPUT(trans_logger);
|
|
|
|
};
|
|
|
|
|
2013-01-07 16:43:14 +00:00
|
|
|
#define MAX_HOST_LEN 32
|
|
|
|
|
2012-12-17 07:25:17 +00:00
|
|
|
struct trans_logger_info {
|
2012-12-30 22:44:48 +00:00
|
|
|
// to be maintained / initialized from outside
|
2012-12-17 07:25:17 +00:00
|
|
|
void (*inf_callback)(struct trans_logger_info *inf);
|
|
|
|
void *inf_private;
|
2013-01-07 16:43:14 +00:00
|
|
|
char inf_host[MAX_HOST_LEN];
|
2020-06-28 16:00:44 +00:00
|
|
|
int inf_index;
|
2012-12-17 07:25:17 +00:00
|
|
|
int inf_sequence; // logfile sequence number
|
|
|
|
|
|
|
|
// maintained by trans_logger
|
|
|
|
loff_t inf_min_pos; // current replay position (both in replay mode and in logging mode)
|
|
|
|
loff_t inf_max_pos; // dito, indicating the "dirty" area which could be potentially "inconsistent"
|
|
|
|
loff_t inf_log_pos; // position of transaction logging (may be ahead of replay position)
|
2019-02-19 09:18:29 +00:00
|
|
|
struct lamport_time inf_min_pos_stamp; // when the data has been _successfully_ overwritten
|
|
|
|
struct lamport_time inf_max_pos_stamp; // when the data has _started_ overwrite (maybe "trashed" in case of errors / aborts)
|
|
|
|
struct lamport_time inf_log_pos_stamp; // stamp from transaction log
|
2014-03-01 00:48:28 +00:00
|
|
|
bool inf_is_replaying;
|
2012-12-17 07:25:17 +00:00
|
|
|
bool inf_is_logging;
|
|
|
|
};
|
|
|
|
|
2010-08-11 16:02:08 +00:00
|
|
|
struct trans_logger_input {
|
|
|
|
MARS_INPUT(trans_logger);
|
2011-05-13 11:19:28 +00:00
|
|
|
// parameters
|
2011-10-05 14:59:29 +00:00
|
|
|
// informational
|
2012-12-17 07:25:17 +00:00
|
|
|
struct trans_logger_info inf;
|
2011-05-13 11:19:28 +00:00
|
|
|
// readonly from outside
|
2012-12-31 09:33:47 +00:00
|
|
|
atomic_t log_ref_count;
|
2013-01-02 21:43:50 +00:00
|
|
|
atomic_t pos_count;
|
2012-09-11 13:14:07 +00:00
|
|
|
bool is_operating;
|
|
|
|
long long last_jiffies;
|
2011-05-13 11:19:28 +00:00
|
|
|
|
|
|
|
// private
|
2011-04-29 09:36:10 +00:00
|
|
|
struct log_status logst;
|
2011-11-03 11:17:59 +00:00
|
|
|
struct list_head pos_list;
|
2019-06-24 13:33:58 +00:00
|
|
|
long long inf_min_jiffies;
|
|
|
|
long long inf_min_old;
|
2012-12-17 07:25:17 +00:00
|
|
|
long long inf_last_jiffies;
|
2012-12-30 22:44:48 +00:00
|
|
|
struct semaphore inf_mutex;
|
2010-08-08 20:51:20 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
MARS_TYPES(trans_logger);
|
|
|
|
|
|
|
|
#endif
|