mars/kernel/xio_bricks/xio_trans_logger.h
2015-12-31 10:41:48 +01:00

270 lines
7.8 KiB
C

/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project: http://schoebel.github.io/mars/
*
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
* Copyright (C) 2011-2014 1&1 Internet AG
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef XIO_TRANS_LOGGER_H
#define XIO_TRANS_LOGGER_H
#define REGION_SIZE_BITS (PAGE_SHIFT + 4)
#define REGION_SIZE (1 << REGION_SIZE_BITS)
#define LOGGER_QUEUES 4
#include <linux/time.h>
#include "xio.h"
#include "lib_log.h"
#include "../lib/lib_pairing_heap.h"
#include "../lib/lib_queue.h"
#include "../lib/lib_timing.h"
/************************ global tuning ***********************/
/* 0 = early completion of all writes
* 1 = early completion of non-sync
* 2 = late completion
*/
extern int trans_logger_completion_semantics;
extern int trans_logger_do_crc;
extern int trans_logger_mem_usage; /* in KB */
extern int trans_logger_max_interleave;
extern int trans_logger_resume;
extern int trans_logger_replay_timeout; /* in s */
extern atomic_t global_mshadow_count;
extern atomic64_t global_mshadow_used;
struct writeback_group {
rwlock_t lock;
struct trans_logger_brick *leader;
loff_t biggest;
struct list_head group_anchor;
/* tuning */
struct rate_limiter limiter;
int until_percent;
};
extern struct writeback_group global_writeback;
/******************************************************************/
_PAIRING_HEAP_TYPEDEF(logger, /*empty*/)
struct logger_queue {
QUEUE_ANCHOR(logger, loff_t, logger);
struct trans_logger_brick *q_brick;
const char *q_insert_info;
const char *q_pushback_info;
const char *q_fetch_info;
struct banning q_banning;
int no_progress_count;
int pushback_count;
};
struct logger_head {
struct list_head lh_head;
loff_t *lh_pos;
struct pairing_heap_logger ph;
};
/******************************************************************/
#define TL_INPUT_READ 0
#define TL_INPUT_WRITEBACK 0
#define TL_INPUT_LOG1 1
#define TL_INPUT_LOG2 2
#define TL_INPUT_NR 3
struct writeback_info {
struct trans_logger_brick *w_brick;
struct logger_head w_lh;
loff_t w_pos;
int w_len;
int w_error;
struct list_head w_collect_list; /* list of collected orig requests */
struct list_head w_sub_read_list; /* for saving the old data before overwrite */
struct list_head w_sub_write_list; /* for overwriting */
atomic_t w_sub_read_count;
atomic_t w_sub_write_count;
atomic_t w_sub_log_count;
void (*read_endio)(struct generic_callback *cb);
void (*write_endio)(struct generic_callback *cb);
};
struct trans_logger_aio_aspect {
GENERIC_ASPECT(aio);
struct trans_logger_brick *my_brick;
struct trans_logger_input *my_input;
struct trans_logger_input *log_input;
struct logger_head lh;
struct list_head hash_head;
struct list_head pos_head;
struct list_head replay_head;
struct list_head collect_head;
struct pairing_heap_logger ph;
struct trans_logger_aio_aspect *shadow_aio;
struct trans_logger_aio_aspect *orig_aio_a;
void *shadow_data;
int orig_rw;
int wb_error;
bool do_dealloc;
bool do_buffered;
bool is_hashed;
bool is_stable;
bool is_dirty;
bool is_collected;
bool is_fired;
bool is_completed;
bool is_endio;
bool is_persistent;
bool is_emergency;
struct timespec stamp;
loff_t log_pos;
struct generic_callback cb;
struct writeback_info *wb;
struct list_head sub_list;
struct list_head sub_head;
int total_sub_count;
int alloc_len;
atomic_t current_sub_count;
};
struct trans_logger_hash_anchor;
struct trans_logger_brick {
XIO_BRICK(trans_logger);
/* parameters */
struct rate_limiter *replay_limiter;
int shadow_mem_limit; /* max # master shadows */
bool replay_mode; /* mode of operation */
bool continuous_replay_mode; /* mode of operation */
bool log_reads; /* additionally log pre-images */
bool cease_logging; /* direct IO without logging (only in case of EMERGENCY) */
loff_t replay_start_pos; /* where to start replay */
loff_t replay_end_pos; /* end of replay */
int new_input_nr; /* whereto we should switchover ASAP */
int replay_tolerance; /* how many bytes to ignore at truncated logfiles */
/* readonly from outside */
loff_t replay_current_pos; /* end of replay */
int log_input_nr; /* where we are currently logging to */
int old_input_nr; /* where old IO requests may be on the fly */
int replay_code; /* replay errors (if any) */
bool stopped_logging; /* direct IO without logging (only in case of EMERGENCY) */
/* private */
int disk_io_error; /* replay errors from callbacks */
struct trans_logger_hash_anchor **hash_table;
struct list_head group_head;
loff_t old_margin;
spinlock_t replay_lock;
struct list_head replay_list;
struct task_struct *thread;
wait_queue_head_t worker_event;
wait_queue_head_t caller_event;
/* statistics */
atomic64_t shadow_mem_used;
atomic_t replay_count;
atomic_t any_fly_count;
atomic_t log_fly_count;
atomic_t hash_count;
atomic_t mshadow_count;
atomic_t sshadow_count;
atomic_t outer_balance_count;
atomic_t inner_balance_count;
atomic_t sub_balance_count;
atomic_t wb_balance_count;
atomic_t total_hash_insert_count;
atomic_t total_hash_find_count;
atomic_t total_hash_extend_count;
atomic_t total_replay_count;
atomic_t total_replay_conflict_count;
atomic_t total_cb_count;
atomic_t total_read_count;
atomic_t total_write_count;
atomic_t total_flush_count;
atomic_t total_writeback_count;
atomic_t total_writeback_cluster_count;
atomic_t total_shortcut_count;
atomic_t total_mshadow_count;
atomic_t total_sshadow_count;
atomic_t total_mshadow_buffered_count;
atomic_t total_sshadow_buffered_count;
atomic_t total_round_count;
atomic_t total_restart_count;
atomic_t total_delay_count;
/* queues */
struct logger_queue q_phase[LOGGER_QUEUES];
bool delay_callers;
};
struct trans_logger_output {
XIO_OUTPUT(trans_logger);
};
#define MAX_HOST_LEN 32
struct trans_logger_info {
/* to be maintained / initialized from outside */
void (*inf_callback)(struct trans_logger_info *inf);
void *inf_private;
char inf_host[MAX_HOST_LEN];
int inf_sequence; /* logfile sequence number */
/* maintained by trans_logger */
loff_t inf_min_pos; /* current replay position (both in replay mode and in logging mode) */
loff_t inf_max_pos; /* dito, indicating the "dirty" area which could be potentially "inconsistent" */
loff_t inf_log_pos; /* position of transaction logging (may be ahead of replay position) */
struct timespec inf_min_pos_stamp; /* when the data has been _successfully_ overwritten */
/* when the data has _started_ overwrite (maybe "trashed" in case of errors / aborts) */
struct timespec inf_max_pos_stamp;
struct timespec inf_log_pos_stamp; /* stamp from transaction log */
bool inf_is_replaying;
bool inf_is_logging;
};
struct trans_logger_input {
XIO_INPUT(trans_logger);
/* parameters */
/* informational */
struct trans_logger_info inf;
/* readonly from outside */
atomic_t log_obj_count;
atomic_t pos_count;
bool is_operating;
long long last_jiffies;
/* private */
struct log_status logst;
struct list_head pos_list;
long long inf_last_jiffies;
struct semaphore inf_mutex;
};
XIO_TYPES(trans_logger);
#endif