2014-11-21 10:51:34 +00:00
/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project : http : //schoebel.github.io/mars/
*
* Copyright ( C ) 2010 - 2014 Thomas Schoebel - Theuer
* Copyright ( C ) 2011 - 2014 1 & 1 Internet AG
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License along
* with this program ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA .
*/
2010-08-08 20:51:20 +00:00
2011-02-23 20:48:06 +00:00
// Trans_Logger brick
2010-08-08 20:51:20 +00:00
//#define BRICK_DEBUGGING
2013-01-20 22:09:09 +00:00
# define MARS_DEBUGGING
2011-04-01 11:18:32 +00:00
//#define IO_DEBUGGING
2011-07-28 11:41:06 +00:00
//#define REPLAY_DEBUGGING
2013-01-20 22:09:09 +00:00
# define STAT_DEBUGGING // here means: display full statistics
2011-06-30 13:15:52 +00:00
//#define HASH_DEBUGGING
2010-08-08 20:51:20 +00:00
2013-01-09 09:35:39 +00:00
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/string.h>
# include <linux/bio.h>
2018-10-09 05:01:05 +00:00
# include "brick_wait.h"
2013-01-09 09:35:39 +00:00
# include "mars.h"
# include "lib_limiter.h"
# include "mars_trans_logger.h"
2011-04-18 09:23:04 +00:00
// variants
# define KEEP_UNIQUE
2012-02-11 22:32:21 +00:00
# define DELAY_CALLERS // this is _needed_ for production systems
2012-10-15 14:35:36 +00:00
# define SHORTCUT_1_to_3 // when possible, queue 1 executes phase3_startio() directly without intermediate queueing into queue 3 => may be irritating, but has better performance. NOTICE: when some day the IO scheduling should be different between queue 1 and 3, you MUST disable this in order to distinguish between them!
2011-04-18 09:23:04 +00:00
2011-06-30 13:15:52 +00:00
// commenting this out is dangerous for data integrity! use only for testing!
2011-04-08 09:52:46 +00:00
# define USE_MEMCPY
2011-07-15 10:12:06 +00:00
# define DO_WRITEBACK // otherwise FAKE IO
2014-03-01 00:48:28 +00:00
# define REPLAY_DATA
2011-03-08 16:45:52 +00:00
2013-01-04 13:05:00 +00:00
// tuning
2013-01-09 09:35:39 +00:00
# ifdef BRICK_DEBUG_MEM
# define CONF_TRANS_CHUNKSIZE (128 * 1024 - PAGE_SIZE * 2)
# else
# define CONF_TRANS_CHUNKSIZE (128 * 1024)
# endif
2013-01-04 13:05:00 +00:00
# define CONF_TRANS_MAX_MREF_SIZE PAGE_SIZE
//#define CONF_TRANS_ALIGN PAGE_SIZE // FIXME: does not work
# define CONF_TRANS_ALIGN 0
2011-07-28 11:41:06 +00:00
# ifdef REPLAY_DEBUGGING
# define MARS_RPL(_fmt, _args...) _MARS_MSG(false, "REPLAY ", _fmt, ##_args)
# else
# define MARS_RPL(_args...) /*empty*/
# endif
2012-10-15 14:35:36 +00:00
#if 0
# define inline noinline
# endif
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor {
struct rw_semaphore hash_mutex ;
struct list_head hash_anchor ;
} ;
# define NR_HASH_PAGES 64
# define MAX_HASH_PAGES (PAGE_SIZE / sizeof(struct trans_logger_hash_anchor*))
# define HASH_PER_PAGE (PAGE_SIZE / sizeof(struct trans_logger_hash_anchor))
# define HASH_TOTAL (NR_HASH_PAGES * HASH_PER_PAGE)
2012-10-15 14:35:36 +00:00
///////////////////////// global tuning ////////////////////////
2012-12-30 20:48:44 +00:00
int trans_logger_completion_semantics = 1 ;
EXPORT_SYMBOL_GPL ( trans_logger_completion_semantics ) ;
2012-12-27 10:23:38 +00:00
int trans_logger_do_crc =
# ifdef CONFIG_MARS_DEBUG
true ;
# else
false ;
# endif
EXPORT_SYMBOL_GPL ( trans_logger_do_crc ) ;
2012-09-26 09:22:36 +00:00
int trans_logger_mem_usage ; // in KB
EXPORT_SYMBOL_GPL ( trans_logger_mem_usage ) ;
2011-07-28 11:41:06 +00:00
2013-04-18 14:25:04 +00:00
int trans_logger_max_interleave = - 1 ;
EXPORT_SYMBOL_GPL ( trans_logger_max_interleave ) ;
2014-03-11 09:53:09 +00:00
int trans_logger_resume = 1 ;
2013-04-22 07:06:27 +00:00
EXPORT_SYMBOL_GPL ( trans_logger_resume ) ;
2013-07-03 06:27:42 +00:00
int trans_logger_replay_timeout = 1 ; // in s
EXPORT_SYMBOL_GPL ( trans_logger_replay_timeout ) ;
2012-10-15 14:35:36 +00:00
struct writeback_group global_writeback = {
2017-12-10 21:37:21 +00:00
. mutex = __RWSEM_INITIALIZER ( global_writeback . mutex ) ,
2012-10-15 14:35:36 +00:00
. group_anchor = LIST_HEAD_INIT ( global_writeback . group_anchor ) ,
. until_percent = 30 ,
} ;
EXPORT_SYMBOL_GPL ( global_writeback ) ;
2010-08-08 20:51:20 +00:00
2012-10-15 14:35:36 +00:00
static
void add_to_group ( struct writeback_group * gr , struct trans_logger_brick * brick )
{
2017-12-10 21:37:21 +00:00
down_write ( & gr - > mutex ) ;
2012-10-15 14:35:36 +00:00
list_add_tail ( & brick - > group_head , & gr - > group_anchor ) ;
2017-12-10 21:37:21 +00:00
up_write ( & gr - > mutex ) ;
2012-10-15 14:35:36 +00:00
}
2010-08-08 20:51:20 +00:00
2012-10-15 14:35:36 +00:00
static
void remove_from_group ( struct writeback_group * gr , struct trans_logger_brick * brick )
{
2017-12-10 21:37:21 +00:00
down_write ( & gr - > mutex ) ;
2012-10-15 14:35:36 +00:00
list_del_init ( & brick - > group_head ) ;
gr - > leader = NULL ;
2017-12-10 21:37:21 +00:00
up_write ( & gr - > mutex ) ;
2012-10-15 14:35:36 +00:00
}
static
struct trans_logger_brick * elect_leader ( struct writeback_group * gr )
{
struct trans_logger_brick * res = gr - > leader ;
struct list_head * tmp ;
if ( res & & gr - > until_percent > = 0 ) {
loff_t used = atomic64_read ( & res - > shadow_mem_used ) ;
if ( used > gr - > biggest * gr - > until_percent / 100 )
goto done ;
}
2017-12-10 21:37:21 +00:00
/* FIXME: use O(log n) data structure instead */
down_read ( & gr - > mutex ) ;
2012-10-15 14:35:36 +00:00
for ( tmp = gr - > group_anchor . next ; tmp ! = & gr - > group_anchor ; tmp = tmp - > next ) {
struct trans_logger_brick * test = container_of ( tmp , struct trans_logger_brick , group_head ) ;
loff_t new_used = atomic64_read ( & test - > shadow_mem_used ) ;
if ( ! res | | new_used > atomic64_read ( & res - > shadow_mem_used ) ) {
res = test ;
gr - > biggest = new_used ;
}
}
2017-12-10 21:37:21 +00:00
up_read ( & gr - > mutex ) ;
2012-10-15 14:35:36 +00:00
gr - > leader = res ;
done :
return res ;
}
///////////////////////// own type definitions ////////////////////////
2011-04-08 09:52:46 +00:00
static inline
2011-04-18 14:14:16 +00:00
int lh_cmp ( loff_t * a , loff_t * b )
2010-11-12 11:18:40 +00:00
{
2011-04-18 14:14:16 +00:00
if ( * a < * b )
return - 1 ;
if ( * a > * b )
return 1 ;
return 0 ;
2010-11-12 11:18:40 +00:00
}
2011-04-08 09:52:46 +00:00
static inline
2011-04-18 14:14:16 +00:00
int tr_cmp ( struct pairing_heap_logger * _a , struct pairing_heap_logger * _b )
2010-08-11 16:02:08 +00:00
{
2011-04-18 14:14:16 +00:00
struct logger_head * a = container_of ( _a , struct logger_head , ph ) ;
struct logger_head * b = container_of ( _b , struct logger_head , ph ) ;
return lh_cmp ( a - > lh_pos , b - > lh_pos ) ;
2010-08-11 16:02:08 +00:00
}
2011-04-18 14:14:16 +00:00
_PAIRING_HEAP_FUNCTIONS ( static , logger , tr_cmp ) ;
2010-08-11 16:02:08 +00:00
2011-04-08 09:52:46 +00:00
static inline
2011-04-18 14:14:16 +00:00
loff_t * lh_get ( struct logger_head * th )
2010-08-20 10:58:24 +00:00
{
2011-04-18 14:14:16 +00:00
return th - > lh_pos ;
}
2010-08-20 10:58:24 +00:00
2011-04-18 14:14:16 +00:00
QUEUE_FUNCTIONS ( logger , struct logger_head , lh_head , lh_get , lh_cmp , logger ) ;
2010-08-20 10:58:24 +00:00
2011-04-18 14:14:16 +00:00
////////////////////////// logger queue handling ////////////////////////
2010-08-20 10:58:24 +00:00
2011-04-08 09:52:46 +00:00
static inline
2011-04-29 09:36:10 +00:00
void qq_init ( struct logger_queue * q , struct trans_logger_brick * brick )
2010-08-11 16:02:08 +00:00
{
2011-04-18 14:14:16 +00:00
q_logger_init ( q ) ;
2011-04-29 09:36:10 +00:00
q - > q_brick = brick ;
}
2017-06-05 21:18:20 +00:00
static inline
void qq_activate ( struct logger_queue * q )
{
q_logger_activate ( q , 1 ) ;
}
static inline
void qq_deactivate ( struct logger_queue * q )
{
q_logger_activate ( q , - 1 ) ;
}
2011-04-18 09:23:04 +00:00
static inline
2011-04-19 14:46:38 +00:00
void qq_mref_insert ( struct logger_queue * q , struct trans_logger_mref_aspect * mref_a )
2011-04-18 09:23:04 +00:00
{
struct mref_object * mref = mref_a - > object ;
2012-12-10 09:31:28 +00:00
_mref_get ( mref ) ; // must be paired with __trans_logger_ref_put()
2011-04-29 09:36:10 +00:00
atomic_inc ( & q - > q_brick - > inner_balance_count ) ;
2011-04-18 09:23:04 +00:00
mars_trace ( mref , q - > q_insert_info ) ;
2011-04-18 14:14:16 +00:00
q_logger_insert ( q , & mref_a - > lh ) ;
2011-04-18 09:23:04 +00:00
}
2011-04-18 14:14:16 +00:00
static inline
2011-04-19 14:46:38 +00:00
void qq_wb_insert ( struct logger_queue * q , struct writeback_info * wb )
{
q_logger_insert ( q , & wb - > w_lh ) ;
}
static inline
void qq_mref_pushback ( struct logger_queue * q , struct trans_logger_mref_aspect * mref_a )
2011-04-18 14:14:16 +00:00
{
2012-12-10 09:31:28 +00:00
_mref_check ( mref_a - > object ) ;
2011-04-18 14:14:16 +00:00
mars_trace ( mref_a - > object , q - > q_pushback_info ) ;
2012-12-29 22:26:17 +00:00
q - > pushback_count + + ;
2011-04-18 14:14:16 +00:00
q_logger_pushback ( q , & mref_a - > lh ) ;
}
static inline
2011-04-19 14:46:38 +00:00
void qq_wb_pushback ( struct logger_queue * q , struct writeback_info * wb )
{
2012-12-29 22:26:17 +00:00
q - > pushback_count + + ;
2011-04-19 14:46:38 +00:00
q_logger_pushback ( q , & wb - > w_lh ) ;
}
static inline
struct trans_logger_mref_aspect * qq_mref_fetch ( struct logger_queue * q )
2011-04-18 14:14:16 +00:00
{
struct logger_head * test ;
struct trans_logger_mref_aspect * mref_a = NULL ;
test = q_logger_fetch ( q ) ;
if ( test ) {
mref_a = container_of ( test , struct trans_logger_mref_aspect , lh ) ;
2012-12-10 09:31:28 +00:00
_mref_check ( mref_a - > object ) ;
2011-04-18 14:14:16 +00:00
mars_trace ( mref_a - > object , q - > q_fetch_info ) ;
}
return mref_a ;
}
2011-04-18 09:23:04 +00:00
2011-04-19 14:46:38 +00:00
static inline
struct writeback_info * qq_wb_fetch ( struct logger_queue * q )
{
struct logger_head * test ;
struct writeback_info * res = NULL ;
test = q_logger_fetch ( q ) ;
if ( test ) {
res = container_of ( test , struct writeback_info , w_lh ) ;
}
return res ;
}
2010-08-08 20:51:20 +00:00
///////////////////////// own helper functions ////////////////////////
2011-04-08 09:52:46 +00:00
static inline
2011-04-12 15:31:08 +00:00
int hash_fn ( loff_t pos )
2010-08-08 20:51:20 +00:00
{
// simple and stupid
2013-04-12 09:55:52 +00:00
long base_index = pos > > REGION_SIZE_BITS ;
base_index + = base_index / HASH_TOTAL / 7 ;
return base_index % HASH_TOTAL ;
2010-08-08 20:51:20 +00:00
}
2011-04-12 15:31:08 +00:00
static inline
2013-01-01 14:56:32 +00:00
struct trans_logger_mref_aspect * _hash_find ( struct list_head * start , loff_t pos , int * max_len , bool use_collect_head , bool find_unstable )
2010-08-08 20:51:20 +00:00
{
struct list_head * tmp ;
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * res = NULL ;
2011-03-31 16:16:00 +00:00
int len = * max_len ;
2011-06-30 13:15:52 +00:00
# ifdef HASH_DEBUGGING
2011-04-01 11:18:32 +00:00
int count = 0 ;
# endif
2011-04-12 15:31:08 +00:00
2011-03-31 16:16:00 +00:00
/* The lists are always sorted according to age (newest first).
2010-08-10 17:39:30 +00:00
* Caution : there may be duplicates in the list , some of them
* overlapping with the search area in many different ways .
2010-08-08 20:51:20 +00:00
*/
2011-04-12 15:31:08 +00:00
for ( tmp = start - > next ; tmp ! = start ; tmp = tmp - > next ) {
2011-04-01 11:18:32 +00:00
struct trans_logger_mref_aspect * test_a ;
struct mref_object * test ;
int diff ;
2011-06-30 13:15:52 +00:00
# ifdef HASH_DEBUGGING
2010-08-08 20:51:20 +00:00
static int max = 0 ;
if ( + + count > max ) {
max = count ;
2011-06-30 13:15:52 +00:00
if ( ! ( max % 100 ) ) {
MARS_INF ( " hash max=%d hash=%d (pos=%lld) \n " , max , hash_fn ( pos ) , pos ) ;
2010-08-08 20:51:20 +00:00
}
}
# endif
2011-04-12 15:31:08 +00:00
if ( use_collect_head ) {
test_a = container_of ( tmp , struct trans_logger_mref_aspect , collect_head ) ;
} else {
test_a = container_of ( tmp , struct trans_logger_mref_aspect , hash_head ) ;
}
2010-08-08 20:51:20 +00:00
test = test_a - > object ;
2011-04-12 15:31:08 +00:00
2012-12-10 09:31:28 +00:00
_mref_check ( test ) ;
2011-04-01 11:18:32 +00:00
2010-08-09 16:57:56 +00:00
// are the regions overlapping?
2011-04-12 15:31:08 +00:00
if ( pos > = test - > ref_pos + test - > ref_len | | pos + len < = test - > ref_pos ) {
2011-04-01 11:18:32 +00:00
continue ; // not relevant
}
2011-04-12 15:31:08 +00:00
2013-01-01 11:35:33 +00:00
// searching for unstable elements (only in special cases)
if ( find_unstable & & test_a - > is_stable )
break ;
2011-04-01 11:18:32 +00:00
diff = test - > ref_pos - pos ;
if ( diff < = 0 ) {
int restlen = test - > ref_len + diff ;
res = test_a ;
2011-03-31 16:16:00 +00:00
if ( restlen < len ) {
len = restlen ;
2010-08-08 20:51:20 +00:00
}
2011-04-01 11:18:32 +00:00
break ;
}
if ( diff < len ) {
len = diff ;
2010-08-08 20:51:20 +00:00
}
}
2011-04-12 15:31:08 +00:00
* max_len = len ;
return res ;
}
static noinline
2013-01-01 11:35:33 +00:00
struct trans_logger_mref_aspect * hash_find ( struct trans_logger_brick * brick , loff_t pos , int * max_len , bool find_unstable )
2011-04-12 15:31:08 +00:00
{
2011-04-29 09:36:10 +00:00
2011-04-12 15:31:08 +00:00
int hash = hash_fn ( pos ) ;
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor * sub_table = brick - > hash_table [ hash / HASH_PER_PAGE ] ;
struct trans_logger_hash_anchor * start = & sub_table [ hash % HASH_PER_PAGE ] ;
2011-04-12 15:31:08 +00:00
struct trans_logger_mref_aspect * res ;
2011-06-30 13:15:52 +00:00
//unsigned int flags;
2011-04-12 15:31:08 +00:00
2012-02-12 15:36:34 +00:00
atomic_inc ( & brick - > total_hash_find_count ) ;
2011-06-30 13:15:52 +00:00
down_read ( & start - > hash_mutex ) ;
2011-04-12 15:31:08 +00:00
2013-01-01 14:56:32 +00:00
res = _hash_find ( & start - > hash_anchor , pos , max_len , false , find_unstable ) ;
2011-04-12 15:31:08 +00:00
2012-12-10 16:42:47 +00:00
/* Ensure the found mref can't go away...
*/
if ( res & & res - > object )
_mref_get ( res - > object ) ;
2011-06-30 13:15:52 +00:00
up_read ( & start - > hash_mutex ) ;
2010-08-08 20:51:20 +00:00
return res ;
}
2011-04-08 09:52:46 +00:00
static noinline
2011-04-29 09:36:10 +00:00
void hash_insert ( struct trans_logger_brick * brick , struct trans_logger_mref_aspect * elem_a )
2010-08-08 20:51:20 +00:00
{
2011-04-12 15:31:08 +00:00
int hash = hash_fn ( elem_a - > object - > ref_pos ) ;
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor * sub_table = brick - > hash_table [ hash / HASH_PER_PAGE ] ;
struct trans_logger_hash_anchor * start = & sub_table [ hash % HASH_PER_PAGE ] ;
2011-06-30 13:15:52 +00:00
//unsigned int flags;
2010-08-08 20:51:20 +00:00
2010-08-20 10:58:24 +00:00
# if 1
CHECK_HEAD_EMPTY ( & elem_a - > hash_head ) ;
2012-12-10 09:31:28 +00:00
_mref_check ( elem_a - > object ) ;
2010-08-20 10:58:24 +00:00
# endif
2011-03-10 11:40:06 +00:00
// only for statistics:
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > hash_count ) ;
2012-02-12 15:36:34 +00:00
atomic_inc ( & brick - > total_hash_insert_count ) ;
2011-03-10 11:40:06 +00:00
2011-06-30 13:15:52 +00:00
down_write ( & start - > hash_mutex ) ;
2011-03-10 11:40:06 +00:00
2011-04-08 09:52:46 +00:00
list_add ( & elem_a - > hash_head , & start - > hash_anchor ) ;
elem_a - > is_hashed = true ;
2011-06-30 13:15:52 +00:00
up_write ( & start - > hash_mutex ) ;
2011-04-08 09:52:46 +00:00
}
/* Find the transitive closure of overlapping requests
* and collect them into a list .
*/
static noinline
2013-01-01 14:56:32 +00:00
void hash_extend ( struct trans_logger_brick * brick , loff_t * _pos , int * _len , struct list_head * collect_list )
2011-04-08 09:52:46 +00:00
{
loff_t pos = * _pos ;
int len = * _len ;
2011-04-12 15:31:08 +00:00
int hash = hash_fn ( pos ) ;
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor * sub_table = brick - > hash_table [ hash / HASH_PER_PAGE ] ;
struct trans_logger_hash_anchor * start = & sub_table [ hash % HASH_PER_PAGE ] ;
2011-04-12 15:31:08 +00:00
struct list_head * tmp ;
2011-04-08 09:52:46 +00:00
bool extended ;
2011-06-30 13:15:52 +00:00
//unsigned int flags;
# ifdef HASH_DEBUGGING
int count = 0 ;
static int max = 0 ;
# endif
2011-04-08 09:52:46 +00:00
if ( collect_list ) {
CHECK_HEAD_EMPTY ( collect_list ) ;
}
2012-02-12 15:36:34 +00:00
atomic_inc ( & brick - > total_hash_extend_count ) ;
2011-06-30 13:15:52 +00:00
down_read ( & start - > hash_mutex ) ;
2011-04-08 09:52:46 +00:00
do {
extended = false ;
2011-03-18 13:15:40 +00:00
for ( tmp = start - > hash_anchor . next ; tmp ! = & start - > hash_anchor ; tmp = tmp - > next ) {
2011-04-08 09:52:46 +00:00
struct trans_logger_mref_aspect * test_a ;
struct mref_object * test ;
2011-04-12 15:31:08 +00:00
loff_t diff ;
2011-06-30 13:15:52 +00:00
# ifdef HASH_DEBUGGING
count + + ;
# endif
2011-04-08 09:52:46 +00:00
2011-03-18 13:15:40 +00:00
test_a = container_of ( tmp , struct trans_logger_mref_aspect , hash_head ) ;
test = test_a - > object ;
2011-04-08 09:52:46 +00:00
2012-12-10 09:31:28 +00:00
_mref_check ( test ) ;
2011-04-15 10:13:22 +00:00
2011-04-08 09:52:46 +00:00
// are the regions overlapping?
2013-01-01 14:37:09 +00:00
if ( pos > = test - > ref_pos + test - > ref_len | | pos + len < = test - > ref_pos ) {
2011-04-08 09:52:46 +00:00
continue ; // not relevant
}
2011-04-12 15:31:08 +00:00
2013-01-01 14:37:09 +00:00
// collision detection
if ( test_a - > is_collected )
goto collision ;
2013-01-01 21:36:33 +00:00
// no writeback of non-persistent data
2013-07-03 08:01:04 +00:00
if ( ! ( test_a - > is_persistent & test_a - > is_completed ) )
2013-01-01 21:36:33 +00:00
goto collision ;
2011-04-08 09:52:46 +00:00
// extend the search region when necessary
2011-04-12 15:31:08 +00:00
diff = pos - test - > ref_pos ;
if ( diff > 0 ) {
len + = diff ;
2011-04-08 09:52:46 +00:00
pos = test - > ref_pos ;
extended = true ;
}
2011-04-12 15:31:08 +00:00
diff = ( test - > ref_pos + test - > ref_len ) - ( pos + len ) ;
if ( diff > 0 ) {
len + = diff ;
2011-04-08 09:52:46 +00:00
extended = true ;
2011-03-18 13:15:40 +00:00
}
}
2011-04-08 09:52:46 +00:00
} while ( extended ) ; // start over for transitive closure
2010-08-08 20:51:20 +00:00
2011-04-08 09:52:46 +00:00
* _pos = pos ;
* _len = len ;
2010-08-08 20:51:20 +00:00
2011-06-30 13:15:52 +00:00
# ifdef HASH_DEBUGGING
if ( count > max + 100 ) {
int i = 0 ;
max = count ;
MARS_INF ( " iterations max=%d hash=%d (pos=%lld len=%d) \n " , max , hash , pos , len ) ;
for ( tmp = start - > hash_anchor . next ; tmp ! = & start - > hash_anchor ; tmp = tmp - > next ) {
struct trans_logger_mref_aspect * test_a ;
struct mref_object * test ;
test_a = container_of ( tmp , struct trans_logger_mref_aspect , hash_head ) ;
test = test_a - > object ;
MARS_INF ( " %03d pos = %lld len = %d collected = %d \n " , i + + , test - > ref_pos , test - > ref_len , test_a - > is_collected ) ;
}
MARS_INF ( " ---------------- \n " ) ;
}
# endif
2011-04-12 15:31:08 +00:00
for ( tmp = start - > hash_anchor . next ; tmp ! = & start - > hash_anchor ; tmp = tmp - > next ) {
struct trans_logger_mref_aspect * test_a ;
struct mref_object * test ;
test_a = container_of ( tmp , struct trans_logger_mref_aspect , hash_head ) ;
test = test_a - > object ;
// are the regions overlapping?
2013-01-01 14:37:09 +00:00
if ( pos > = test - > ref_pos + test - > ref_len | | pos + len < = test - > ref_pos ) {
2011-04-12 15:31:08 +00:00
continue ; // not relevant
}
// collect
CHECK_HEAD_EMPTY ( & test_a - > collect_head ) ;
2013-01-01 14:37:09 +00:00
if ( unlikely ( test_a - > is_collected ) ) {
MARS_ERR ( " collision detection did not work \n " ) ;
}
2011-04-12 15:31:08 +00:00
test_a - > is_collected = true ;
2012-12-10 09:31:28 +00:00
_mref_check ( test ) ;
2011-04-12 15:31:08 +00:00
list_add_tail ( & test_a - > collect_head , collect_list ) ;
2010-08-11 16:02:08 +00:00
}
2010-08-08 20:51:20 +00:00
2013-01-01 14:37:09 +00:00
collision :
2011-06-30 13:15:52 +00:00
up_read ( & start - > hash_mutex ) ;
2010-08-08 20:51:20 +00:00
}
2011-04-11 13:40:06 +00:00
/* Atomically put all elements from the list.
* All elements must reside in the same collision list .
*/
static inline
2011-04-29 09:36:10 +00:00
void hash_put_all ( struct trans_logger_brick * brick , struct list_head * list )
2011-04-11 13:40:06 +00:00
{
struct list_head * tmp ;
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor * start = NULL ;
2011-04-12 15:31:08 +00:00
int first_hash = - 1 ;
2011-06-30 13:15:52 +00:00
//unsigned int flags;
2011-04-11 13:40:06 +00:00
for ( tmp = list - > next ; tmp ! = list ; tmp = tmp - > next ) {
struct trans_logger_mref_aspect * elem_a ;
struct mref_object * elem ;
int hash ;
2011-04-12 15:31:08 +00:00
elem_a = container_of ( tmp , struct trans_logger_mref_aspect , collect_head ) ;
2011-04-11 13:40:06 +00:00
elem = elem_a - > object ;
2012-02-08 08:40:12 +00:00
CHECK_PTR ( elem , err ) ;
2012-12-10 09:31:28 +00:00
_mref_check ( elem ) ;
2011-04-15 10:13:22 +00:00
2011-04-12 15:31:08 +00:00
hash = hash_fn ( elem - > ref_pos ) ;
2011-04-11 13:40:06 +00:00
if ( ! start ) {
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor * sub_table = brick - > hash_table [ hash / HASH_PER_PAGE ] ;
start = & sub_table [ hash % HASH_PER_PAGE ] ;
2011-04-12 15:31:08 +00:00
first_hash = hash ;
2011-06-30 13:15:52 +00:00
down_write ( & start - > hash_mutex ) ;
2011-04-12 15:31:08 +00:00
} else if ( unlikely ( hash ! = first_hash ) ) {
MARS_ERR ( " oops, different hashes: %d != %d \n " , hash , first_hash ) ;
2011-04-11 13:40:06 +00:00
}
if ( ! elem_a - > is_hashed ) {
continue ;
}
list_del_init ( & elem_a - > hash_head ) ;
elem_a - > is_hashed = false ;
2011-04-29 09:36:10 +00:00
atomic_dec ( & brick - > hash_count ) ;
2011-04-11 13:40:06 +00:00
}
2012-02-08 08:40:12 +00:00
err :
2011-04-11 13:40:06 +00:00
if ( start ) {
2011-06-30 13:15:52 +00:00
up_write ( & start - > hash_mutex ) ;
2011-04-11 13:40:06 +00:00
}
}
2013-01-01 11:35:33 +00:00
static inline
void hash_ensure_stableness ( struct trans_logger_brick * brick , struct trans_logger_mref_aspect * mref_a )
{
if ( ! mref_a - > is_stable ) {
struct mref_object * mref = mref_a - > object ;
int hash = hash_fn ( mref - > ref_pos ) ;
2013-04-12 09:55:52 +00:00
struct trans_logger_hash_anchor * sub_table = brick - > hash_table [ hash / HASH_PER_PAGE ] ;
struct trans_logger_hash_anchor * start = & sub_table [ hash % HASH_PER_PAGE ] ;
2013-01-01 11:35:33 +00:00
down_write ( & start - > hash_mutex ) ;
mref_a - > is_stable = true ;
up_write ( & start - > hash_mutex ) ;
}
}
2012-12-17 07:25:17 +00:00
static
void _inf_callback ( struct trans_logger_input * input , bool force )
{
if ( ! force & &
input - > inf_last_jiffies & &
input - > inf_last_jiffies + 4 * HZ > ( long long ) jiffies )
return ;
if ( input - > inf . inf_callback & & input - > is_operating ) {
input - > inf_last_jiffies = jiffies ;
input - > inf . inf_callback ( & input - > inf ) ;
input - > inf_last_jiffies = jiffies ;
} else {
MARS_DBG ( " %p skipped callback, callback = %p is_operating = %d \n " , input , input - > inf . inf_callback , input - > is_operating ) ;
}
}
2013-04-22 07:06:27 +00:00
static inline
int _congested ( struct trans_logger_brick * brick )
{
2017-06-05 21:10:03 +00:00
return
2017-06-05 21:18:20 +00:00
brick - > q_phase [ 0 ] . q_active | |
brick - > q_phase [ 1 ] . q_active | |
brick - > q_phase [ 2 ] . q_active | |
brick - > q_phase [ 3 ] . q_active ;
2013-04-22 07:06:27 +00:00
}
2010-08-08 20:51:20 +00:00
////////////////// own brick / input / output operations //////////////////
2012-11-13 09:59:00 +00:00
atomic_t global_mshadow_count = ATOMIC_INIT ( 0 ) ;
EXPORT_SYMBOL_GPL ( global_mshadow_count ) ;
atomic64_t global_mshadow_used = ATOMIC64_INIT ( 0 ) ;
EXPORT_SYMBOL_GPL ( global_mshadow_used ) ;
2011-03-29 14:40:40 +00:00
2011-04-08 09:52:46 +00:00
static noinline
int trans_logger_get_info ( struct trans_logger_output * output , struct mars_info * info )
2010-08-08 20:51:20 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_input * input = output - > brick - > inputs [ TL_INPUT_READ ] ;
2010-08-08 20:51:20 +00:00
return GENERIC_INPUT_CALL ( input , mars_get_info , info ) ;
}
2011-04-08 09:52:46 +00:00
static noinline
int _make_sshadow ( struct trans_logger_output * output , struct trans_logger_mref_aspect * mref_a , struct trans_logger_mref_aspect * mshadow_a )
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = output - > brick ;
2011-04-08 09:52:46 +00:00
struct mref_object * mref = mref_a - > object ;
struct mref_object * mshadow ;
int diff ;
mshadow = mshadow_a - > object ;
# if 1
if ( unlikely ( mref - > ref_len > mshadow - > ref_len ) ) {
MARS_ERR ( " oops %d -> %d \n " , mref - > ref_len , mshadow - > ref_len ) ;
mref - > ref_len = mshadow - > ref_len ;
}
if ( unlikely ( mshadow_a = = mref_a ) ) {
MARS_ERR ( " oops %p == %p \n " , mshadow_a , mref_a ) ;
return - EINVAL ;
}
# endif
diff = mref - > ref_pos - mshadow - > ref_pos ;
# if 1
if ( unlikely ( diff < 0 ) ) {
MARS_ERR ( " oops diff = %d \n " , diff ) ;
return - EINVAL ;
}
# endif
/* Attach mref to the existing shadow ("slave shadow").
*/
mref_a - > shadow_data = mshadow_a - > shadow_data + diff ;
mref_a - > do_dealloc = false ;
if ( ! mref - > ref_data ) { // buffered IO
mref - > ref_data = mref_a - > shadow_data ;
mref_a - > do_buffered = true ;
2012-02-12 15:36:34 +00:00
atomic_inc ( & brick - > total_sshadow_buffered_count ) ;
2011-04-08 09:52:46 +00:00
}
mref - > ref_flags = mshadow - > ref_flags ;
mref_a - > shadow_ref = mshadow_a ;
2011-10-03 17:31:02 +00:00
mref_a - > my_brick = brick ;
2011-04-14 14:21:26 +00:00
2011-04-18 09:23:04 +00:00
/* Get an ordinary internal reference
*/
2012-12-10 09:31:28 +00:00
_mref_get_first ( mref ) ; // must be paired with __trans_logger_ref_put()
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > inner_balance_count ) ;
2011-04-14 14:21:26 +00:00
2012-12-10 16:42:47 +00:00
/* The internal reference from slave to master is already
* present due to hash_find ( ) ,
2011-04-18 09:23:04 +00:00
* such that the master cannot go away before the slave .
2012-12-10 16:42:47 +00:00
* It is compensated by master transition in __trans_logger_ref_put ( )
2011-04-18 09:23:04 +00:00
*/
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > inner_balance_count ) ;
2011-04-18 09:23:04 +00:00
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > sshadow_count ) ;
atomic_inc ( & brick - > total_sshadow_count ) ;
2011-04-08 09:52:46 +00:00
# if 1
if ( unlikely ( mref - > ref_len < = 0 ) ) {
MARS_ERR ( " oops, len = %d \n " , mref - > ref_len ) ;
return - EINVAL ;
}
# endif
return mref - > ref_len ;
}
2010-08-20 10:58:24 +00:00
2011-04-08 09:52:46 +00:00
static noinline
int _read_ref_get ( struct trans_logger_output * output , struct trans_logger_mref_aspect * mref_a )
2010-08-08 20:51:20 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = output - > brick ;
2010-12-15 12:13:18 +00:00
struct mref_object * mref = mref_a - > object ;
2011-04-29 09:36:10 +00:00
struct trans_logger_input * input = brick - > inputs [ TL_INPUT_READ ] ;
2011-04-01 11:18:32 +00:00
struct trans_logger_mref_aspect * mshadow_a ;
2010-08-08 20:51:20 +00:00
2010-08-11 16:02:08 +00:00
/* Look if there is a newer version on the fly, shadowing
* the old one .
* When a shadow is found , use it as buffer for the mref .
*/
2013-01-01 11:35:33 +00:00
mshadow_a = hash_find ( brick , mref - > ref_pos , & mref - > ref_len , false ) ;
2011-04-08 09:52:46 +00:00
if ( ! mshadow_a ) {
return GENERIC_INPUT_CALL ( input , mref_get , mref ) ;
2010-08-11 16:02:08 +00:00
}
2011-04-14 14:21:26 +00:00
2011-04-08 09:52:46 +00:00
return _make_sshadow ( output , mref_a , mshadow_a ) ;
}
2010-08-11 16:02:08 +00:00
2011-04-08 09:52:46 +00:00
static noinline
int _write_ref_get ( struct trans_logger_output * output , struct trans_logger_mref_aspect * mref_a )
2010-08-11 16:02:08 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = output - > brick ;
2010-12-15 12:13:18 +00:00
struct mref_object * mref = mref_a - > object ;
2011-04-08 09:52:46 +00:00
void * data ;
# ifdef KEEP_UNIQUE
struct trans_logger_mref_aspect * mshadow_a ;
2012-12-05 07:29:39 +00:00
# endif
# ifdef CONFIG_MARS_DEBUG
if ( unlikely ( mref - > ref_len < = 0 ) ) {
MARS_ERR ( " oops, ref_len = %d \n " , mref - > ref_len ) ;
return - EINVAL ;
}
# endif
# ifdef KEEP_UNIQUE
2013-01-01 11:35:33 +00:00
mshadow_a = hash_find ( brick , mref - > ref_pos , & mref - > ref_len , true ) ;
2011-04-08 09:52:46 +00:00
if ( mshadow_a ) {
return _make_sshadow ( output , mref_a , mshadow_a ) ;
}
# endif
2011-11-14 14:21:15 +00:00
# ifdef DELAY_CALLERS
2011-06-30 13:15:52 +00:00
// delay in case of too many master shadows / memory shortage
2018-10-09 05:28:43 +00:00
brick_wait ( brick - > caller_event ,
2019-02-11 18:45:47 +00:00
brick - > caller_flag ,
2012-12-05 07:29:39 +00:00
! brick - > delay_callers & &
2013-04-02 13:35:13 +00:00
( brick_global_memlimit < 1024 | | atomic64_read ( & global_mshadow_used ) / 1024 < brick_global_memlimit ) ,
2012-12-05 07:29:39 +00:00
HZ / 2 ) ;
2011-11-14 14:21:15 +00:00
# endif
2011-06-30 13:15:52 +00:00
2011-04-08 09:52:46 +00:00
// create a new master shadow
2011-08-12 11:09:48 +00:00
data = brick_block_alloc ( mref - > ref_pos , ( mref_a - > alloc_len = mref - > ref_len ) ) ;
2011-04-08 09:52:46 +00:00
if ( unlikely ( ! data ) ) {
return - ENOMEM ;
}
2011-06-30 13:15:52 +00:00
atomic64_add ( mref - > ref_len , & brick - > shadow_mem_used ) ;
2012-02-08 11:44:42 +00:00
# ifdef CONFIG_MARS_DEBUG
2011-04-08 09:52:46 +00:00
memset ( data , 0x11 , mref - > ref_len ) ;
# endif
mref_a - > shadow_data = data ;
mref_a - > do_dealloc = true ;
2012-02-12 15:36:34 +00:00
if ( ! mref - > ref_data ) { // buffered IO
2011-04-01 11:18:32 +00:00
mref - > ref_data = data ;
2011-04-08 09:52:46 +00:00
mref_a - > do_buffered = true ;
2012-02-12 15:36:34 +00:00
atomic_inc ( & brick - > total_mshadow_buffered_count ) ;
2010-08-11 16:02:08 +00:00
}
2011-10-03 17:31:02 +00:00
mref_a - > my_brick = brick ;
2011-03-31 16:16:00 +00:00
mref - > ref_flags = 0 ;
2011-03-08 16:45:52 +00:00
mref_a - > shadow_ref = mref_a ; // cyclic self-reference => indicates master shadow
2011-04-14 14:21:26 +00:00
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > mshadow_count ) ;
atomic_inc ( & brick - > total_mshadow_count ) ;
2011-04-08 09:52:46 +00:00
atomic_inc ( & global_mshadow_count ) ;
2012-02-11 22:32:21 +00:00
atomic64_add ( mref - > ref_len , & global_mshadow_used ) ;
2011-04-08 09:52:46 +00:00
2012-12-10 09:31:28 +00:00
atomic_inc ( & brick - > inner_balance_count ) ;
_mref_get_first ( mref ) ; // must be paired with __trans_logger_ref_put()
2010-08-11 16:02:08 +00:00
return mref - > ref_len ;
}
2011-04-08 09:52:46 +00:00
static noinline
int trans_logger_ref_get ( struct trans_logger_output * output , struct mref_object * mref )
2010-08-11 16:02:08 +00:00
{
2012-12-07 10:30:33 +00:00
struct trans_logger_brick * brick ;
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * mref_a ;
2010-08-27 15:42:10 +00:00
loff_t base_offset ;
2010-08-11 16:02:08 +00:00
2010-08-23 05:06:06 +00:00
CHECK_PTR ( output , err ) ;
2012-12-07 10:30:33 +00:00
brick = output - > brick ;
CHECK_PTR ( brick , err ) ;
CHECK_PTR ( mref , err ) ;
2010-08-23 05:06:06 +00:00
2011-04-08 09:52:46 +00:00
MARS_IO ( " pos = %lld len = %d \n " , mref - > ref_pos , mref - > ref_len ) ;
2012-12-07 10:30:33 +00:00
mref_a = trans_logger_mref_get_aspect ( brick , mref ) ;
CHECK_PTR ( mref_a , err ) ;
CHECK_ASPECT ( mref_a , mref , err ) ;
2011-11-03 11:22:50 +00:00
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > outer_balance_count ) ;
2011-03-10 11:40:06 +00:00
2012-12-10 09:31:28 +00:00
if ( mref - > ref_initialized ) { // setup already performed
MARS_IO ( " again %d \n " , atomic_read ( & mref - > ref_count . ta_atomic ) ) ;
_mref_check ( mref ) ;
_mref_get ( mref ) ; // must be paired with __trans_logger_ref_put()
2011-03-08 16:45:52 +00:00
return mref - > ref_len ;
}
2012-02-07 08:22:51 +00:00
get_lamport ( & mref_a - > stamp ) ;
2013-01-04 13:05:00 +00:00
if ( mref - > ref_len > CONF_TRANS_MAX_MREF_SIZE & & CONF_TRANS_MAX_MREF_SIZE > 0 )
mref - > ref_len = CONF_TRANS_MAX_MREF_SIZE ;
2012-12-07 10:30:33 +00:00
2011-04-12 15:31:08 +00:00
// ensure that REGION_SIZE boundaries are obeyed by hashing
2010-08-27 15:42:10 +00:00
base_offset = mref - > ref_pos & ( loff_t ) ( REGION_SIZE - 1 ) ;
2011-04-12 15:31:08 +00:00
if ( mref - > ref_len > REGION_SIZE - base_offset ) {
2010-08-08 20:51:20 +00:00
mref - > ref_len = REGION_SIZE - base_offset ;
2011-04-12 15:31:08 +00:00
}
2010-08-08 20:51:20 +00:00
2013-04-22 07:06:27 +00:00
if ( mref - > ref_may_write = = READ ) {
return _read_ref_get ( output , mref_a ) ;
}
if ( unlikely ( brick - > stopped_logging ) ) { // only in EMERGENCY mode
2014-03-10 15:22:44 +00:00
mref_a - > is_emergency = true ;
2013-04-22 07:06:27 +00:00
/* Wait until writeback has finished.
* We have to this because writeback is out - of - order .
* Otherwise consistency could be violated for some time .
*/
while ( _congested ( brick ) ) {
// in case of emergency, busy-wait should be acceptable
brick_msleep ( HZ / 10 ) ;
}
2010-08-11 16:02:08 +00:00
return _read_ref_get ( output , mref_a ) ;
2010-08-08 20:51:20 +00:00
}
2011-02-23 20:48:06 +00:00
/* FIXME: THIS IS PROVISIONARY (use event instead)
*/
2012-02-06 10:41:15 +00:00
while ( unlikely ( ! brick - > power . led_on ) ) {
2012-09-17 10:11:25 +00:00
brick_msleep ( HZ / 10 ) ;
2011-02-23 20:48:06 +00:00
}
2010-08-11 16:02:08 +00:00
return _write_ref_get ( output , mref_a ) ;
2010-08-23 05:06:06 +00:00
err :
return - EINVAL ;
2010-08-08 20:51:20 +00:00
}
2012-02-08 15:44:53 +00:00
static noinline
void pos_complete ( struct trans_logger_mref_aspect * orig_mref_a ) ;
2011-04-08 09:52:46 +00:00
static noinline
2011-10-03 17:31:02 +00:00
void __trans_logger_ref_put ( struct trans_logger_brick * brick , struct trans_logger_mref_aspect * mref_a )
2010-08-08 20:51:20 +00:00
{
2011-04-12 15:31:08 +00:00
struct mref_object * mref ;
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * shadow_a ;
2010-08-20 10:58:24 +00:00
struct trans_logger_input * input ;
2010-08-11 16:02:08 +00:00
2011-03-08 16:45:52 +00:00
restart :
2012-02-08 08:40:12 +00:00
CHECK_PTR ( mref_a , err ) ;
2011-04-12 15:31:08 +00:00
mref = mref_a - > object ;
2012-02-08 08:40:12 +00:00
CHECK_PTR ( mref , err ) ;
2011-07-15 10:12:06 +00:00
MARS_IO ( " pos = %lld len = %d \n " , mref - > ref_pos , mref - > ref_len ) ;
2012-12-10 09:31:28 +00:00
_mref_check ( mref ) ;
2010-12-15 11:58:22 +00:00
2011-04-14 14:21:26 +00:00
// are we a shadow (whether master or slave)?
2010-08-20 10:58:24 +00:00
shadow_a = mref_a - > shadow_ref ;
if ( shadow_a ) {
2011-03-08 16:45:52 +00:00
bool finished ;
2011-04-12 15:31:08 +00:00
2012-02-08 08:40:12 +00:00
CHECK_PTR ( shadow_a , err ) ;
2012-12-10 09:31:28 +00:00
CHECK_PTR ( shadow_a - > object , err ) ;
_mref_check ( shadow_a - > object ) ;
2012-02-08 08:40:12 +00:00
2012-12-10 09:31:28 +00:00
finished = _mref_put ( mref ) ;
2011-04-29 09:36:10 +00:00
atomic_dec ( & brick - > inner_balance_count ) ;
2011-04-12 15:31:08 +00:00
if ( unlikely ( finished & & mref_a - > is_hashed ) ) {
MARS_ERR ( " trying to put a hashed mref, pos = %lld len = %d \n " , mref - > ref_pos , mref - > ref_len ) ;
finished = false ; // leaves a memleak
2011-03-08 16:45:52 +00:00
}
2011-04-12 15:31:08 +00:00
2011-03-08 16:45:52 +00:00
if ( ! finished ) {
return ;
}
2011-04-15 10:13:22 +00:00
2011-04-18 14:14:16 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > lh . lh_head ) ;
2011-04-12 15:31:08 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > hash_head ) ;
CHECK_HEAD_EMPTY ( & mref_a - > replay_head ) ;
CHECK_HEAD_EMPTY ( & mref_a - > collect_head ) ;
CHECK_HEAD_EMPTY ( & mref_a - > sub_list ) ;
CHECK_HEAD_EMPTY ( & mref_a - > sub_head ) ;
2012-02-08 15:44:53 +00:00
if ( mref_a - > is_collected & & likely ( mref_a - > wb_error > = 0 ) ) {
pos_complete ( mref_a ) ;
}
2011-04-14 14:21:26 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > pos_head ) ;
2011-04-15 10:13:22 +00:00
2010-08-20 10:58:24 +00:00
if ( shadow_a ! = mref_a ) { // we are a slave shadow
2011-04-08 09:52:46 +00:00
//MARS_DBG("slave\n");
2011-04-29 09:36:10 +00:00
atomic_dec ( & brick - > sshadow_count ) ;
2010-08-20 10:58:24 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > hash_head ) ;
2011-03-08 16:45:52 +00:00
trans_logger_free_mref ( mref ) ;
// now put the master shadow
2011-04-12 15:31:08 +00:00
mref_a = shadow_a ;
2011-03-08 16:45:52 +00:00
goto restart ;
2010-08-20 10:58:24 +00:00
}
2011-03-08 16:45:52 +00:00
// we are a master shadow
2011-04-08 09:52:46 +00:00
CHECK_PTR ( mref_a - > shadow_data , err ) ;
2011-04-01 11:18:32 +00:00
if ( mref_a - > do_dealloc ) {
2011-08-12 11:09:48 +00:00
brick_block_free ( mref_a - > shadow_data , mref_a - > alloc_len ) ;
2015-12-10 12:50:36 +00:00
atomic64_sub ( mref_a - > alloc_len , & brick - > shadow_mem_used ) ;
2011-04-08 09:52:46 +00:00
mref_a - > shadow_data = NULL ;
mref_a - > do_dealloc = false ;
}
if ( mref_a - > do_buffered ) {
2011-04-01 11:18:32 +00:00
mref - > ref_data = NULL ;
}
2011-04-29 09:36:10 +00:00
atomic_dec ( & brick - > mshadow_count ) ;
2011-03-29 14:40:40 +00:00
atomic_dec ( & global_mshadow_count ) ;
2012-02-11 22:32:21 +00:00
atomic64_sub ( mref - > ref_len , & global_mshadow_used ) ;
2011-03-08 16:45:52 +00:00
trans_logger_free_mref ( mref ) ;
2010-08-11 16:02:08 +00:00
return ;
}
2011-04-29 09:36:10 +00:00
// only READ is allowed on non-shadow buffers
2014-03-10 15:22:44 +00:00
if ( unlikely ( mref - > ref_rw ! = READ & & ! mref_a - > is_emergency ) ) {
2011-04-29 09:36:10 +00:00
MARS_FAT ( " bad operation %d on non-shadow \n " , mref - > ref_rw ) ;
}
// no shadow => call through
input = brick - > inputs [ TL_INPUT_READ ] ;
2012-02-08 08:40:12 +00:00
CHECK_PTR ( input , err ) ;
2010-12-15 12:13:18 +00:00
GENERIC_INPUT_CALL ( input , mref_put , mref ) ;
2012-02-08 08:40:12 +00:00
err : ;
2010-08-08 20:51:20 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2011-04-12 15:31:08 +00:00
void _trans_logger_ref_put ( struct trans_logger_output * output , struct mref_object * mref )
2011-03-10 11:40:06 +00:00
{
2011-04-12 15:31:08 +00:00
struct trans_logger_mref_aspect * mref_a ;
2011-10-03 17:31:02 +00:00
mref_a = trans_logger_mref_get_aspect ( output - > brick , mref ) ;
2011-04-12 15:31:08 +00:00
CHECK_PTR ( mref_a , err ) ;
2012-12-07 10:35:32 +00:00
CHECK_ASPECT ( mref_a , mref , err ) ;
2011-04-12 15:31:08 +00:00
2011-10-03 17:31:02 +00:00
__trans_logger_ref_put ( output - > brick , mref_a ) ;
2011-04-12 15:31:08 +00:00
return ;
err :
MARS_FAT ( " giving up... \n " ) ;
2011-03-10 11:40:06 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2011-04-12 15:31:08 +00:00
void trans_logger_ref_put ( struct trans_logger_output * output , struct mref_object * mref )
2011-03-10 11:40:06 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = output - > brick ;
atomic_dec ( & brick - > outer_balance_count ) ;
2011-04-12 15:31:08 +00:00
_trans_logger_ref_put ( output , mref ) ;
2011-03-10 11:40:06 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
void _trans_logger_endio ( struct generic_callback * cb )
2010-08-26 17:12:30 +00:00
{
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * mref_a ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2010-12-15 11:58:22 +00:00
2016-02-06 22:35:04 +00:00
_crashme ( 20 , false ) ;
2010-08-26 17:12:30 +00:00
mref_a = cb - > cb_private ;
CHECK_PTR ( mref_a , err ) ;
if ( unlikely ( & mref_a - > cb ! = cb ) ) {
MARS_FAT ( " bad callback -- hanging up \n " ) ;
goto err ;
}
2011-10-03 17:31:02 +00:00
brick = mref_a - > my_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2010-08-26 17:12:30 +00:00
2011-10-04 11:34:18 +00:00
NEXT_CHECKED_CALLBACK ( cb , err ) ;
2011-03-10 11:40:06 +00:00
2013-01-02 17:37:43 +00:00
atomic_dec ( & brick - > any_fly_count ) ;
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > total_cb_count ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-29 09:36:10 +00:00
return ;
2011-03-10 11:40:06 +00:00
2011-04-29 09:36:10 +00:00
err :
MARS_FAT ( " cannot handle callback \n " ) ;
2010-08-26 17:12:30 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
void trans_logger_ref_io ( struct trans_logger_output * output , struct mref_object * mref )
2010-08-08 20:51:20 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = output - > brick ;
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * mref_a ;
2011-04-01 11:18:32 +00:00
struct trans_logger_mref_aspect * shadow_a ;
2011-04-29 09:36:10 +00:00
struct trans_logger_input * input ;
2010-08-11 16:02:08 +00:00
2012-12-10 09:31:28 +00:00
_mref_check ( mref ) ;
2010-08-20 10:58:24 +00:00
2011-10-03 17:31:02 +00:00
mref_a = trans_logger_mref_get_aspect ( brick , mref ) ;
2010-08-23 05:06:06 +00:00
CHECK_PTR ( mref_a , err ) ;
2012-12-07 10:35:32 +00:00
CHECK_ASPECT ( mref_a , mref , err ) ;
2010-08-11 16:02:08 +00:00
2011-04-08 09:52:46 +00:00
MARS_IO ( " pos = %lld len = %d \n " , mref - > ref_pos , mref - > ref_len ) ;
2011-03-18 13:15:40 +00:00
// statistics
if ( mref - > ref_rw ) {
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > total_write_count ) ;
2011-03-18 13:15:40 +00:00
} else {
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > total_read_count ) ;
2011-03-18 13:15:40 +00:00
}
2010-08-11 16:02:08 +00:00
// is this a shadow buffer?
2011-04-01 11:18:32 +00:00
shadow_a = mref_a - > shadow_ref ;
if ( shadow_a ) {
2010-08-20 10:58:24 +00:00
# if 1
2011-04-18 14:14:16 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > lh . lh_head ) ;
2011-04-08 09:52:46 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > hash_head ) ;
CHECK_HEAD_EMPTY ( & mref_a - > pos_head ) ;
2011-04-01 11:18:32 +00:00
# endif
2012-12-10 09:31:28 +00:00
_mref_get ( mref ) ; // must be paired with __trans_logger_ref_put()
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > inner_balance_count ) ;
2011-04-08 09:52:46 +00:00
2012-02-11 19:01:16 +00:00
qq_mref_insert ( & brick - > q_phase [ 0 ] , mref_a ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2010-08-11 16:02:08 +00:00
return ;
}
// only READ is allowed on non-shadow buffers
2014-03-10 15:22:44 +00:00
if ( unlikely ( mref - > ref_rw ! = READ & & ! mref_a - > is_emergency ) ) {
2011-04-08 09:52:46 +00:00
MARS_FAT ( " bad operation %d on non-shadow \n " , mref - > ref_rw ) ;
2010-08-11 16:02:08 +00:00
}
2013-01-02 17:37:43 +00:00
atomic_inc ( & brick - > any_fly_count ) ;
2011-03-10 11:40:06 +00:00
2011-10-03 17:31:02 +00:00
mref_a - > my_brick = brick ;
2011-10-04 11:34:18 +00:00
INSERT_CALLBACK ( mref , & mref_a - > cb , _trans_logger_endio , mref_a ) ;
2010-08-26 17:12:30 +00:00
2011-04-29 09:36:10 +00:00
input = output - > brick - > inputs [ TL_INPUT_READ ] ;
2010-12-15 12:13:18 +00:00
GENERIC_INPUT_CALL ( input , mref_io , mref ) ;
2011-04-08 09:52:46 +00:00
return ;
err :
MARS_FAT ( " cannot handle IO \n " ) ;
2010-08-08 20:51:20 +00:00
}
2011-04-10 16:59:06 +00:00
////////////////////////////// writeback info //////////////////////////////
2011-05-13 11:19:28 +00:00
/* save final completion status when necessary
*/
2011-04-15 10:13:22 +00:00
static noinline
void pos_complete ( struct trans_logger_mref_aspect * orig_mref_a )
{
2011-10-03 17:31:02 +00:00
struct trans_logger_brick * brick = orig_mref_a - > my_brick ;
2011-11-03 11:17:59 +00:00
struct trans_logger_input * log_input = orig_mref_a - > log_input ;
2012-12-17 07:25:17 +00:00
loff_t finished ;
2011-04-15 10:13:22 +00:00
struct list_head * tmp ;
2012-12-17 07:25:17 +00:00
CHECK_PTR ( brick , err ) ;
2011-11-03 11:17:59 +00:00
CHECK_PTR ( log_input , err ) ;
2011-05-13 11:19:28 +00:00
atomic_inc ( & brick - > total_writeback_count ) ;
2011-04-15 10:13:22 +00:00
tmp = & orig_mref_a - > pos_head ;
2011-05-13 11:19:28 +00:00
2012-12-30 22:44:48 +00:00
down ( & log_input - > inf_mutex ) ;
2012-12-17 07:25:17 +00:00
finished = orig_mref_a - > log_pos ;
2011-05-13 11:19:28 +00:00
// am I the first member? (means "youngest" list entry)
2011-11-03 11:17:59 +00:00
if ( tmp = = log_input - > pos_list . next ) {
2012-12-17 07:25:17 +00:00
MARS_IO ( " first_finished = %lld \n " , finished ) ;
2012-12-31 09:33:47 +00:00
if ( unlikely ( finished < = log_input - > inf . inf_min_pos ) ) {
2012-12-17 07:25:17 +00:00
MARS_ERR ( " backskip in log writeback: %lld -> %lld \n " , log_input - > inf . inf_min_pos , finished ) ;
2011-05-19 11:36:00 +00:00
}
2012-12-31 09:33:47 +00:00
if ( unlikely ( finished > log_input - > inf . inf_max_pos ) ) {
MARS_ERR ( " min_pos > max_pos: %lld > %lld \n " , finished , log_input - > inf . inf_max_pos ) ;
}
2012-12-17 07:25:17 +00:00
log_input - > inf . inf_min_pos = finished ;
get_lamport ( & log_input - > inf . inf_min_pos_stamp ) ;
2012-12-30 22:44:48 +00:00
_inf_callback ( log_input , false ) ;
2011-05-19 11:36:00 +00:00
} else {
struct trans_logger_mref_aspect * prev_mref_a ;
prev_mref_a = container_of ( tmp - > prev , struct trans_logger_mref_aspect , pos_head ) ;
2012-12-17 07:25:17 +00:00
if ( unlikely ( finished < = prev_mref_a - > log_pos ) ) {
MARS_ERR ( " backskip: %lld -> %lld \n " , finished , prev_mref_a - > log_pos ) ;
2011-05-19 11:36:00 +00:00
} else {
2011-11-03 11:17:59 +00:00
/* Transitively transfer log_pos to the predecessor
2011-05-19 11:36:00 +00:00
* to correctly reflect the committed region .
*/
2012-12-17 07:25:17 +00:00
prev_mref_a - > log_pos = finished ;
2011-04-15 10:13:22 +00:00
}
}
2012-12-30 22:44:48 +00:00
2011-04-15 10:13:22 +00:00
list_del_init ( tmp ) ;
2013-01-02 21:43:50 +00:00
atomic_dec ( & log_input - > pos_count ) ;
2012-12-17 07:25:17 +00:00
2012-12-30 22:44:48 +00:00
up ( & log_input - > inf_mutex ) ;
2011-11-03 11:17:59 +00:00
err : ;
2011-04-15 10:13:22 +00:00
}
2011-04-10 16:59:06 +00:00
static noinline
void free_writeback ( struct writeback_info * wb )
{
2011-04-11 13:40:06 +00:00
struct list_head * tmp ;
2011-04-18 14:14:16 +00:00
if ( unlikely ( wb - > w_error < 0 ) ) {
MARS_ERR ( " writeback error = %d at pos = %lld len = %d, writeback is incomplete \n " , wb - > w_error , wb - > w_pos , wb - > w_len ) ;
}
/* Now complete the original requests.
*/
2011-04-11 13:40:06 +00:00
while ( ( tmp = wb - > w_collect_list . next ) ! = & wb - > w_collect_list ) {
struct trans_logger_mref_aspect * orig_mref_a ;
struct mref_object * orig_mref ;
list_del_init ( tmp ) ;
orig_mref_a = container_of ( tmp , struct trans_logger_mref_aspect , collect_head ) ;
orig_mref = orig_mref_a - > object ;
2012-12-10 09:31:28 +00:00
_mref_check ( orig_mref ) ;
2012-02-08 15:44:53 +00:00
if ( unlikely ( ! orig_mref_a - > is_collected ) ) {
MARS_ERR ( " request %lld (len = %d) was not collected \n " , orig_mref - > ref_pos , orig_mref - > ref_len ) ;
}
if ( unlikely ( wb - > w_error < 0 ) ) {
orig_mref_a - > wb_error = wb - > w_error ;
}
2011-04-15 10:13:22 +00:00
2011-10-03 17:31:02 +00:00
__trans_logger_ref_put ( orig_mref_a - > my_brick , orig_mref_a ) ;
2011-04-11 13:40:06 +00:00
}
2011-04-18 14:14:16 +00:00
2011-08-12 11:09:48 +00:00
brick_mem_free ( wb ) ;
2011-04-10 16:59:06 +00:00
}
2011-04-18 14:14:16 +00:00
/* Generic endio() for writeback_info
*/
2011-04-10 16:59:06 +00:00
static noinline
void wb_endio ( struct generic_callback * cb )
{
struct trans_logger_mref_aspect * sub_mref_a ;
struct mref_object * sub_mref ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2011-04-10 16:59:06 +00:00
struct writeback_info * wb ;
int rw ;
atomic_t * dec ;
2011-05-19 11:36:00 +00:00
void ( * * _endio ) ( struct generic_callback * cb ) ;
2011-04-10 16:59:06 +00:00
void ( * endio ) ( struct generic_callback * cb ) ;
2016-02-06 22:35:04 +00:00
_crashme ( 21 , false ) ;
2014-03-23 09:57:56 +00:00
LAST_CALLBACK ( cb ) ;
2011-04-10 16:59:06 +00:00
sub_mref_a = cb - > cb_private ;
CHECK_PTR ( sub_mref_a , err ) ;
sub_mref = sub_mref_a - > object ;
CHECK_PTR ( sub_mref , err ) ;
wb = sub_mref_a - > wb ;
CHECK_PTR ( wb , err ) ;
2011-10-03 17:31:02 +00:00
brick = wb - > w_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-10 16:59:06 +00:00
2011-04-18 14:14:16 +00:00
if ( cb - > cb_error < 0 ) {
wb - > w_error = cb - > cb_error ;
}
2012-02-08 08:52:00 +00:00
atomic_dec ( & brick - > wb_balance_count ) ;
rw = sub_mref_a - > orig_rw ;
2011-04-10 16:59:06 +00:00
dec = rw ? & wb - > w_sub_write_count : & wb - > w_sub_read_count ;
2011-04-11 13:40:06 +00:00
CHECK_ATOMIC ( dec , 1 ) ;
if ( ! atomic_dec_and_test ( dec ) ) {
2011-11-14 14:21:15 +00:00
goto done ;
2011-04-10 16:59:06 +00:00
}
2011-05-19 11:36:00 +00:00
_endio = rw ? & wb - > write_endio : & wb - > read_endio ;
endio = * _endio ;
* _endio = NULL ;
2011-04-18 14:14:16 +00:00
if ( likely ( endio ) ) {
2011-04-10 16:59:06 +00:00
endio ( cb ) ;
2011-05-19 11:36:00 +00:00
} else {
MARS_ERR ( " internal: no endio defined \n " ) ;
2011-04-10 16:59:06 +00:00
}
2012-12-31 09:33:47 +00:00
2011-11-14 14:21:15 +00:00
done :
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-10 16:59:06 +00:00
return ;
err :
MARS_FAT ( " hanging up.... \n " ) ;
}
2011-04-18 14:14:16 +00:00
/* Atomically create writeback info, based on "snapshot" of current hash
* state .
* Notice that the hash can change during writeback IO , thus we need
* struct writeback_info to precisely catch that information at a single
* point in time .
*/
2011-04-10 16:59:06 +00:00
static noinline
2013-01-01 14:56:32 +00:00
struct writeback_info * make_writeback ( struct trans_logger_brick * brick , loff_t pos , int len )
2011-04-10 16:59:06 +00:00
{
2011-05-13 11:19:28 +00:00
struct writeback_info * wb ;
2011-04-29 09:36:10 +00:00
struct trans_logger_input * read_input ;
struct trans_logger_input * write_input ;
int write_input_nr ;
2011-05-13 11:19:28 +00:00
/* Allocate structure representing a bunch of adjacent writebacks
*/
2011-08-12 11:09:48 +00:00
wb = brick_zmem_alloc ( sizeof ( struct writeback_info ) ) ;
2011-04-10 16:59:06 +00:00
if ( ! wb ) {
goto err ;
}
2011-04-12 15:31:08 +00:00
if ( unlikely ( len < 0 ) ) {
MARS_ERR ( " len = %d \n " , len ) ;
}
2011-04-10 16:59:06 +00:00
2011-10-03 17:31:02 +00:00
wb - > w_brick = brick ;
2011-04-19 14:46:38 +00:00
wb - > w_pos = pos ;
wb - > w_len = len ;
wb - > w_lh . lh_pos = & wb - > w_pos ;
INIT_LIST_HEAD ( & wb - > w_lh . lh_head ) ;
INIT_LIST_HEAD ( & wb - > w_collect_list ) ;
INIT_LIST_HEAD ( & wb - > w_sub_read_list ) ;
INIT_LIST_HEAD ( & wb - > w_sub_write_list ) ;
2011-04-18 14:14:16 +00:00
/* Atomically fetch transitive closure on all requests
* overlapping with the current search region .
*/
2013-01-01 14:56:32 +00:00
hash_extend ( brick , & wb - > w_pos , & wb - > w_len , & wb - > w_collect_list ) ;
2011-04-10 16:59:06 +00:00
2013-01-01 14:37:09 +00:00
if ( list_empty ( & wb - > w_collect_list ) ) {
goto collision ;
}
2011-04-10 16:59:06 +00:00
pos = wb - > w_pos ;
len = wb - > w_len ;
2011-04-12 15:31:08 +00:00
if ( unlikely ( len < 0 ) ) {
MARS_ERR ( " len = %d \n " , len ) ;
}
2011-05-13 11:19:28 +00:00
/* Determine the "channels" we want to operate on
*/
2011-04-29 09:36:10 +00:00
read_input = brick - > inputs [ TL_INPUT_READ ] ;
write_input_nr = TL_INPUT_WRITEBACK ;
write_input = brick - > inputs [ write_input_nr ] ;
if ( ! write_input - > connect ) {
write_input_nr = TL_INPUT_READ ;
write_input = read_input ;
}
2012-02-12 15:41:22 +00:00
/* Create sub_mrefs for read of old disk version (phase1)
2011-04-18 14:14:16 +00:00
*/
if ( brick - > log_reads ) {
while ( len > 0 ) {
struct trans_logger_mref_aspect * sub_mref_a ;
struct mref_object * sub_mref ;
2012-12-31 09:33:47 +00:00
struct trans_logger_input * log_input ;
2011-04-18 14:14:16 +00:00
int this_len ;
int status ;
2012-02-02 15:25:43 +00:00
sub_mref = trans_logger_alloc_mref ( brick ) ;
2011-04-18 14:14:16 +00:00
if ( unlikely ( ! sub_mref ) ) {
MARS_FAT ( " cannot alloc sub_mref \n " ) ;
goto err ;
}
sub_mref - > ref_pos = pos ;
sub_mref - > ref_len = len ;
sub_mref - > ref_may_write = READ ;
sub_mref - > ref_rw = READ ;
sub_mref - > ref_data = NULL ;
2011-10-03 17:31:02 +00:00
sub_mref_a = trans_logger_mref_get_aspect ( brick , sub_mref ) ;
2011-04-18 14:14:16 +00:00
CHECK_PTR ( sub_mref_a , err ) ;
2012-12-07 10:35:32 +00:00
CHECK_ASPECT ( sub_mref_a , sub_mref , err ) ;
2011-04-18 14:14:16 +00:00
2011-04-29 09:36:10 +00:00
sub_mref_a - > my_input = read_input ;
2012-12-31 09:33:47 +00:00
log_input = brick - > inputs [ brick - > log_input_nr ] ;
2011-11-03 11:17:59 +00:00
sub_mref_a - > log_input = log_input ;
2012-12-31 09:33:47 +00:00
atomic_inc ( & log_input - > log_ref_count ) ;
2011-10-03 17:31:02 +00:00
sub_mref_a - > my_brick = brick ;
2012-02-08 08:52:00 +00:00
sub_mref_a - > orig_rw = READ ;
2011-04-18 14:14:16 +00:00
sub_mref_a - > wb = wb ;
2011-04-29 09:36:10 +00:00
status = GENERIC_INPUT_CALL ( read_input , mref_get , sub_mref ) ;
2011-04-18 14:14:16 +00:00
if ( unlikely ( status < 0 ) ) {
MARS_FAT ( " cannot get sub_ref, status = %d \n " , status ) ;
goto err ;
}
list_add_tail ( & sub_mref_a - > sub_head , & wb - > w_sub_read_list ) ;
atomic_inc ( & wb - > w_sub_read_count ) ;
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > wb_balance_count ) ;
2011-04-18 14:14:16 +00:00
this_len = sub_mref - > ref_len ;
pos + = this_len ;
len - = this_len ;
}
/* Re-init for startover
*/
pos = wb - > w_pos ;
len = wb - > w_len ;
}
2012-02-12 15:41:22 +00:00
/* Always create sub_mrefs for writeback (phase3)
2011-04-18 14:14:16 +00:00
*/
2011-04-10 16:59:06 +00:00
while ( len > 0 ) {
struct trans_logger_mref_aspect * sub_mref_a ;
struct mref_object * sub_mref ;
2011-05-13 11:19:28 +00:00
struct trans_logger_mref_aspect * orig_mref_a ;
struct mref_object * orig_mref ;
2012-12-31 09:33:47 +00:00
struct trans_logger_input * log_input ;
2011-04-10 16:59:06 +00:00
void * data ;
int this_len = len ;
int diff ;
int status ;
2012-02-12 15:36:34 +00:00
atomic_inc ( & brick - > total_hash_find_count ) ;
2013-01-01 14:56:32 +00:00
orig_mref_a = _hash_find ( & wb - > w_collect_list , pos , & this_len , true , false ) ;
2011-05-13 11:19:28 +00:00
if ( unlikely ( ! orig_mref_a ) ) {
2011-04-10 16:59:06 +00:00
MARS_FAT ( " could not find data \n " ) ;
goto err ;
}
2011-05-13 11:19:28 +00:00
orig_mref = orig_mref_a - > object ;
diff = pos - orig_mref - > ref_pos ;
2011-04-10 16:59:06 +00:00
if ( unlikely ( diff < 0 ) ) {
MARS_FAT ( " bad diff %d \n " , diff ) ;
goto err ;
}
2011-05-13 11:19:28 +00:00
data = orig_mref_a - > shadow_data + diff ;
2011-04-10 16:59:06 +00:00
2012-02-02 15:25:43 +00:00
sub_mref = trans_logger_alloc_mref ( brick ) ;
2011-04-10 16:59:06 +00:00
if ( unlikely ( ! sub_mref ) ) {
MARS_FAT ( " cannot alloc sub_mref \n " ) ;
goto err ;
}
sub_mref - > ref_pos = pos ;
sub_mref - > ref_len = this_len ;
sub_mref - > ref_may_write = WRITE ;
sub_mref - > ref_rw = WRITE ;
sub_mref - > ref_data = data ;
2011-10-03 17:31:02 +00:00
sub_mref_a = trans_logger_mref_get_aspect ( brick , sub_mref ) ;
2011-04-10 16:59:06 +00:00
CHECK_PTR ( sub_mref_a , err ) ;
2012-12-07 10:35:32 +00:00
CHECK_ASPECT ( sub_mref_a , sub_mref , err ) ;
2011-04-10 16:59:06 +00:00
2011-05-13 11:19:28 +00:00
sub_mref_a - > orig_mref_a = orig_mref_a ;
2011-04-29 09:36:10 +00:00
sub_mref_a - > my_input = write_input ;
2012-12-31 09:33:47 +00:00
log_input = orig_mref_a - > log_input ;
2011-05-13 11:19:28 +00:00
sub_mref_a - > log_input = log_input ;
2012-12-31 09:33:47 +00:00
atomic_inc ( & log_input - > log_ref_count ) ;
2011-10-03 17:31:02 +00:00
sub_mref_a - > my_brick = brick ;
2012-02-08 08:52:00 +00:00
sub_mref_a - > orig_rw = WRITE ;
2011-04-10 16:59:06 +00:00
sub_mref_a - > wb = wb ;
2011-04-29 09:36:10 +00:00
status = GENERIC_INPUT_CALL ( write_input , mref_get , sub_mref ) ;
2011-04-10 16:59:06 +00:00
if ( unlikely ( status < 0 ) ) {
MARS_FAT ( " cannot get sub_ref, status = %d \n " , status ) ;
2013-01-01 14:37:09 +00:00
wb - > w_error = status ;
2011-04-10 16:59:06 +00:00
goto err ;
}
list_add_tail ( & sub_mref_a - > sub_head , & wb - > w_sub_write_list ) ;
atomic_inc ( & wb - > w_sub_write_count ) ;
2011-04-29 09:36:10 +00:00
atomic_inc ( & brick - > wb_balance_count ) ;
2011-04-10 16:59:06 +00:00
this_len = sub_mref - > ref_len ;
pos + = this_len ;
len - = this_len ;
}
2011-04-12 15:31:08 +00:00
2011-04-10 16:59:06 +00:00
return wb ;
err :
2011-04-11 13:40:06 +00:00
MARS_ERR ( " cleaning up... \n " ) ;
2013-01-01 14:37:09 +00:00
collision :
2011-04-10 16:59:06 +00:00
if ( wb ) {
free_writeback ( wb ) ;
}
return NULL ;
}
2011-04-19 14:46:38 +00:00
static inline
2012-02-08 15:26:37 +00:00
void _fire_one ( struct list_head * tmp , bool do_update )
2011-04-19 14:46:38 +00:00
{
struct trans_logger_mref_aspect * sub_mref_a ;
struct mref_object * sub_mref ;
2011-04-29 09:36:10 +00:00
struct trans_logger_input * sub_input ;
2011-04-19 14:46:38 +00:00
sub_mref_a = container_of ( tmp , struct trans_logger_mref_aspect , sub_head ) ;
sub_mref = sub_mref_a - > object ;
2012-02-08 15:26:37 +00:00
if ( unlikely ( sub_mref_a - > is_fired ) ) {
MARS_ERR ( " trying to fire twice \n " ) ;
return ;
}
sub_mref_a - > is_fired = true ;
2011-10-04 11:34:18 +00:00
SETUP_CALLBACK ( sub_mref , wb_endio , sub_mref_a ) ;
2011-04-19 14:46:38 +00:00
2012-12-31 09:33:47 +00:00
sub_input = sub_mref_a - > my_input ;
2011-06-30 13:15:52 +00:00
# ifdef DO_WRITEBACK
2011-04-19 14:46:38 +00:00
GENERIC_INPUT_CALL ( sub_input , mref_io , sub_mref ) ;
2011-06-30 13:15:52 +00:00
# else
2012-02-25 20:36:52 +00:00
SIMPLE_CALLBACK ( sub_mref , 0 ) ;
2011-06-30 13:15:52 +00:00
# endif
2015-09-30 13:17:29 +00:00
if ( do_update ) { // CHECK: shouldn't we do this always?
2011-04-19 14:46:38 +00:00
GENERIC_INPUT_CALL ( sub_input , mref_put , sub_mref ) ;
}
}
2011-04-29 09:36:10 +00:00
static inline
2012-02-08 15:26:37 +00:00
void fire_writeback ( struct list_head * start , bool do_update )
2011-05-13 11:19:28 +00:00
{
struct list_head * tmp ;
2012-02-08 15:26:37 +00:00
/* Caution! The wb structure may get deallocated
* during _fire_one ( ) in some cases ( e . g . when the
* callback is directly called by the mref_io operation ) .
* Ensure that no ptr dereferencing can take
* place after working on the last list member .
*/
tmp = start - > next ;
while ( tmp ! = start ) {
struct list_head * next = tmp - > next ;
2011-05-13 11:19:28 +00:00
list_del_init ( tmp ) ;
2012-02-08 15:26:37 +00:00
_fire_one ( tmp , do_update ) ;
tmp = next ;
2011-05-13 11:19:28 +00:00
}
}
2011-04-11 13:40:06 +00:00
2013-01-03 07:22:34 +00:00
static inline
void update_max_pos ( struct trans_logger_mref_aspect * orig_mref_a )
{
loff_t max_pos = orig_mref_a - > log_pos ;
struct trans_logger_input * log_input = orig_mref_a - > log_input ;
CHECK_PTR ( log_input , done ) ;
down ( & log_input - > inf_mutex ) ;
if ( unlikely ( max_pos < log_input - > inf . inf_min_pos ) ) {
MARS_ERR ( " new max_pos < min_pos: %lld < %lld \n " , max_pos , log_input - > inf . inf_min_pos ) ;
}
if ( log_input - > inf . inf_max_pos < max_pos ) {
log_input - > inf . inf_max_pos = max_pos ;
get_lamport ( & log_input - > inf . inf_max_pos_stamp ) ;
_inf_callback ( log_input , false ) ;
}
up ( & log_input - > inf_mutex ) ;
done : ;
}
static inline
void update_writeback_info ( struct writeback_info * wb )
{
struct list_head * start = & wb - > w_collect_list ;
struct list_head * tmp ;
/* Notice: in case of log rotation, each list member
* may belong to a different log_input .
*/
for ( tmp = start - > next ; tmp ! = start ; tmp = tmp - > next ) {
struct trans_logger_mref_aspect * orig_mref_a ;
orig_mref_a = container_of ( tmp , struct trans_logger_mref_aspect , collect_head ) ;
update_max_pos ( orig_mref_a ) ;
}
}
2010-08-11 16:02:08 +00:00
////////////////////////////// worker thread //////////////////////////////
2010-08-20 10:58:24 +00:00
/*********************************************************************
2012-07-23 07:24:16 +00:00
* Phase 0 : write transaction log entry for the original write request .
2010-08-20 10:58:24 +00:00
*/
2011-05-26 14:32:32 +00:00
2011-04-08 09:52:46 +00:00
static noinline
2011-05-26 14:32:32 +00:00
void _complete ( struct trans_logger_brick * brick , struct trans_logger_mref_aspect * orig_mref_a , int error , bool pre_io )
2010-08-11 16:02:08 +00:00
{
2010-12-15 12:13:18 +00:00
struct mref_object * orig_mref ;
2010-08-20 10:58:24 +00:00
orig_mref = orig_mref_a - > object ;
2010-08-23 05:06:06 +00:00
CHECK_PTR ( orig_mref , err ) ;
2010-08-11 16:02:08 +00:00
2011-05-26 14:32:32 +00:00
if ( orig_mref_a - > is_completed | |
2012-12-30 20:48:44 +00:00
( pre_io & &
( trans_logger_completion_semantics > = 2 | |
( trans_logger_completion_semantics > = 1 & & ! orig_mref - > ref_skip_sync ) ) ) ) {
2011-05-26 14:32:32 +00:00
goto done ;
}
2011-04-29 09:36:10 +00:00
2012-12-30 19:47:39 +00:00
if ( cmpxchg ( & orig_mref_a - > is_completed , false , true ) )
goto done ;
2013-01-02 17:37:43 +00:00
atomic_dec ( & brick - > log_fly_count ) ;
2011-10-04 11:34:18 +00:00
if ( likely ( error > = 0 ) ) {
2012-08-01 10:09:49 +00:00
mref_checksum ( orig_mref ) ;
2010-12-15 12:13:18 +00:00
orig_mref - > ref_flags & = ~ MREF_WRITING ;
orig_mref - > ref_flags | = MREF_UPTODATE ;
2010-08-20 10:58:24 +00:00
}
2011-10-04 11:34:18 +00:00
CHECKED_CALLBACK ( orig_mref , error , err ) ;
2010-08-11 16:02:08 +00:00
2016-12-13 13:13:08 +00:00
update_max_pos ( orig_mref_a ) ;
2011-05-26 14:32:32 +00:00
done :
return ;
err :
MARS_ERR ( " giving up... \n " ) ;
}
static noinline
2012-02-12 15:41:22 +00:00
void phase0_preio ( void * private )
2011-05-26 14:32:32 +00:00
{
struct trans_logger_mref_aspect * orig_mref_a ;
struct trans_logger_brick * brick ;
orig_mref_a = private ;
CHECK_PTR ( orig_mref_a , err ) ;
2012-12-10 09:31:28 +00:00
CHECK_PTR ( orig_mref_a - > object , err ) ;
2011-10-03 17:31:02 +00:00
brick = orig_mref_a - > my_brick ;
2011-05-26 14:32:32 +00:00
CHECK_PTR ( brick , err ) ;
// signal completion to the upper layer
// FIXME: immediate error signalling is impossible here, but some delayed signalling should be possible as a workaround. Think!
2012-12-10 09:31:28 +00:00
_mref_check ( orig_mref_a - > object ) ;
2011-05-26 14:32:32 +00:00
_complete ( brick , orig_mref_a , 0 , true ) ;
2012-12-10 09:31:28 +00:00
_mref_check ( orig_mref_a - > object ) ;
2011-05-26 14:32:32 +00:00
return ;
err :
MARS_ERR ( " giving up... \n " ) ;
}
static noinline
2012-02-12 15:41:22 +00:00
void phase0_endio ( void * private , int error )
2011-05-26 14:32:32 +00:00
{
2012-02-08 10:18:04 +00:00
struct mref_object * orig_mref ;
2011-05-26 14:32:32 +00:00
struct trans_logger_mref_aspect * orig_mref_a ;
struct trans_logger_brick * brick ;
orig_mref_a = private ;
CHECK_PTR ( orig_mref_a , err ) ;
2015-04-27 06:59:43 +00:00
// remove_this
if ( unlikely ( cmpxchg ( & orig_mref_a - > is_endio , false , true ) ) ) {
MARS_ERR ( " Sigh this should not happen %p %p \n " ,
orig_mref_a , orig_mref_a - > object ) ;
return ;
}
// end_remove_this
2011-10-03 17:31:02 +00:00
brick = orig_mref_a - > my_brick ;
2011-05-26 14:32:32 +00:00
CHECK_PTR ( brick , err ) ;
2012-02-08 10:18:04 +00:00
orig_mref = orig_mref_a - > object ;
CHECK_PTR ( orig_mref , err ) ;
2011-05-26 14:32:32 +00:00
2013-01-01 21:36:33 +00:00
orig_mref_a - > is_persistent = true ;
2011-05-26 14:32:32 +00:00
2012-02-08 10:18:04 +00:00
_CHECK ( orig_mref_a - > shadow_ref , err ) ;
2011-07-20 13:11:44 +00:00
2011-05-26 14:32:32 +00:00
// signal completion to the upper layer
_complete ( brick , orig_mref_a , error , false ) ;
2012-02-08 10:18:04 +00:00
/* Queue up for the next phase.
*/
2012-02-11 19:01:16 +00:00
qq_mref_insert ( & brick - > q_phase [ 1 ] , orig_mref_a ) ;
2012-02-08 10:18:04 +00:00
/* Undo the above pinning
*/
__trans_logger_ref_put ( brick , orig_mref_a ) ;
2013-01-01 14:52:54 +00:00
banning_reset ( & brick - > q_phase [ 0 ] . q_banning ) ;
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 0 ] ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-12 15:31:08 +00:00
return ;
err :
MARS_ERR ( " giving up... \n " ) ;
2010-08-11 16:02:08 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool phase0_startio ( struct trans_logger_mref_aspect * orig_mref_a )
2010-08-11 16:02:08 +00:00
{
2010-12-15 12:13:18 +00:00
struct mref_object * orig_mref ;
2010-12-15 11:58:22 +00:00
struct trans_logger_brick * brick ;
2011-11-03 11:17:59 +00:00
struct trans_logger_input * input ;
2011-04-29 09:36:10 +00:00
struct log_status * logst ;
2012-12-17 07:25:17 +00:00
loff_t log_pos ;
2010-08-20 10:58:24 +00:00
void * data ;
bool ok ;
2010-08-23 05:06:06 +00:00
CHECK_PTR ( orig_mref_a , err ) ;
orig_mref = orig_mref_a - > object ;
CHECK_PTR ( orig_mref , err ) ;
2011-10-03 17:31:02 +00:00
brick = orig_mref_a - > my_brick ;
2010-12-15 11:58:22 +00:00
CHECK_PTR ( brick , err ) ;
2012-12-31 09:33:47 +00:00
input = orig_mref_a - > log_input ;
2011-11-03 11:17:59 +00:00
CHECK_PTR ( input , err ) ;
logst = & input - > logst ;
2012-12-27 10:23:38 +00:00
logst - > do_crc = trans_logger_do_crc ;
2010-08-23 05:06:06 +00:00
{
struct log_header l = {
. l_stamp = orig_mref_a - > stamp ,
. l_pos = orig_mref - > ref_pos ,
. l_len = orig_mref - > ref_len ,
. l_code = CODE_WRITE_NEW ,
} ;
2011-04-29 09:36:10 +00:00
data = log_reserve ( logst , & l ) ;
2010-08-23 05:06:06 +00:00
}
2010-08-20 10:58:24 +00:00
if ( unlikely ( ! data ) ) {
2010-08-23 05:06:06 +00:00
goto err ;
2010-08-20 10:58:24 +00:00
}
2013-01-01 11:35:33 +00:00
hash_ensure_stableness ( brick , orig_mref_a ) ;
2011-04-08 09:52:46 +00:00
memcpy ( data , orig_mref_a - > shadow_data , orig_mref - > ref_len ) ;
2010-08-20 10:58:24 +00:00
2015-04-27 06:54:59 +00:00
/* Pin mref->ref_count so it can't go away
* after _complete ( ) .
* This may happen rather early in phase0_preio ( ) .
*/
_mref_get ( orig_mref ) ; // must be paired with __trans_logger_ref_put()
atomic_inc ( & brick - > inner_balance_count ) ;
2013-01-02 17:37:43 +00:00
atomic_inc ( & brick - > log_fly_count ) ;
2013-01-03 09:12:20 +00:00
ok = log_finalize ( logst , orig_mref - > ref_len , phase0_endio , orig_mref_a ) ;
2010-08-20 10:58:24 +00:00
if ( unlikely ( ! ok ) ) {
2013-01-02 17:37:43 +00:00
atomic_dec ( & brick - > log_fly_count ) ;
2010-08-23 05:06:06 +00:00
goto err ;
2010-08-20 10:58:24 +00:00
}
2012-12-17 07:25:17 +00:00
log_pos = logst - > log_pos + logst - > offset ;
orig_mref_a - > log_pos = log_pos ;
// update new log_pos in the symlinks
2012-12-30 22:44:48 +00:00
down ( & input - > inf_mutex ) ;
2012-12-17 07:25:17 +00:00
input - > inf . inf_log_pos = log_pos ;
memcpy ( & input - > inf . inf_log_pos_stamp , & logst - > log_pos_stamp , sizeof ( input - > inf . inf_log_pos_stamp ) ) ;
_inf_callback ( input , false ) ;
2011-03-20 17:38:08 +00:00
2012-12-17 07:25:17 +00:00
# ifdef CONFIG_MARS_DEBUG
2011-11-03 11:17:59 +00:00
if ( ! list_empty ( & input - > pos_list ) ) {
2011-05-13 11:19:28 +00:00
struct trans_logger_mref_aspect * last_mref_a ;
2011-11-03 11:17:59 +00:00
last_mref_a = container_of ( input - > pos_list . prev , struct trans_logger_mref_aspect , pos_head ) ;
2011-05-13 11:19:28 +00:00
if ( last_mref_a - > log_pos > = orig_mref_a - > log_pos ) {
MARS_ERR ( " backskip in pos_list, %lld >= %lld \n " , last_mref_a - > log_pos , orig_mref_a - > log_pos ) ;
}
}
# endif
2011-11-03 11:17:59 +00:00
list_add_tail ( & orig_mref_a - > pos_head , & input - > pos_list ) ;
2013-01-02 21:43:50 +00:00
atomic_inc ( & input - > pos_count ) ;
2012-12-30 22:44:48 +00:00
up ( & input - > inf_mutex ) ;
2011-02-23 20:48:06 +00:00
2013-01-03 09:12:20 +00:00
phase0_preio ( orig_mref_a ) ;
2010-08-20 10:58:24 +00:00
return true ;
2010-08-23 05:06:06 +00:00
err :
return false ;
2010-08-20 10:58:24 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool prep_phase_startio ( struct trans_logger_mref_aspect * mref_a )
2011-04-08 09:52:46 +00:00
{
struct mref_object * mref = mref_a - > object ;
struct trans_logger_mref_aspect * shadow_a ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2011-04-08 09:52:46 +00:00
CHECK_PTR ( mref , err ) ;
shadow_a = mref_a - > shadow_ref ;
CHECK_PTR ( shadow_a , err ) ;
2011-10-03 17:31:02 +00:00
brick = mref_a - > my_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-08 09:52:46 +00:00
MARS_IO ( " pos = %lld len = %d rw = %d \n " , mref - > ref_pos , mref - > ref_len , mref - > ref_rw ) ;
if ( mref - > ref_rw = = READ ) {
// nothing to do: directly signal success.
struct mref_object * shadow = shadow_a - > object ;
if ( unlikely ( shadow = = mref ) ) {
MARS_ERR ( " oops, we should be a slave shadow, but are a master one \n " ) ;
}
# ifdef USE_MEMCPY
if ( mref_a - > shadow_data ! = mref - > ref_data ) {
if ( unlikely ( mref - > ref_len < = 0 | | mref - > ref_len > PAGE_SIZE ) ) {
MARS_ERR ( " implausible ref_len = %d \n " , mref - > ref_len ) ;
}
MARS_IO ( " read memcpy to = %p from = %p len = %d \n " , mref - > ref_data , mref_a - > shadow_data , mref - > ref_len ) ;
memcpy ( mref - > ref_data , mref_a - > shadow_data , mref - > ref_len ) ;
}
# endif
mref - > ref_flags | = MREF_UPTODATE ;
2011-10-04 11:34:18 +00:00
CHECKED_CALLBACK ( mref , 0 , err ) ;
2011-04-08 09:52:46 +00:00
2011-10-03 17:31:02 +00:00
__trans_logger_ref_put ( brick , mref_a ) ;
2011-04-08 09:52:46 +00:00
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 0 ] ) ;
2011-04-08 09:52:46 +00:00
return true ;
}
// else WRITE
# if 1
2011-04-18 14:14:16 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > lh . lh_head ) ;
2011-04-08 09:52:46 +00:00
CHECK_HEAD_EMPTY ( & mref_a - > hash_head ) ;
if ( unlikely ( mref - > ref_flags & ( MREF_READING | MREF_WRITING ) ) ) {
MARS_ERR ( " bad flags %d \n " , mref - > ref_flags ) ;
}
# endif
/* In case of non-buffered IO, the buffer is
* under control of the user . In particular , he
* may change it without telling us .
* Therefore we make a copy ( or " snapshot " ) here .
*/
mref - > ref_flags | = MREF_WRITING ;
# ifdef USE_MEMCPY
if ( mref_a - > shadow_data ! = mref - > ref_data ) {
if ( unlikely ( mref - > ref_len < = 0 | | mref - > ref_len > PAGE_SIZE ) ) {
MARS_ERR ( " implausible ref_len = %d \n " , mref - > ref_len ) ;
}
MARS_IO ( " write memcpy to = %p from = %p len = %d \n " , mref_a - > shadow_data , mref - > ref_data , mref - > ref_len ) ;
memcpy ( mref_a - > shadow_data , mref - > ref_data , mref - > ref_len ) ;
}
# endif
mref_a - > is_dirty = true ;
mref_a - > shadow_ref - > is_dirty = true ;
# ifndef KEEP_UNIQUE
if ( unlikely ( mref_a - > shadow_ref ! = mref_a ) ) {
MARS_ERR ( " something is wrong: %p != %p \n " , mref_a - > shadow_ref , mref_a ) ;
}
# endif
2012-02-12 15:36:34 +00:00
if ( likely ( ! mref_a - > is_hashed ) ) {
2012-12-31 09:33:47 +00:00
struct trans_logger_input * log_input ;
log_input = brick - > inputs [ brick - > log_input_nr ] ;
2012-02-06 13:05:50 +00:00
MARS_IO ( " hashing %d at %lld \n " , mref - > ref_len , mref - > ref_pos ) ;
2012-12-31 09:33:47 +00:00
mref_a - > log_input = log_input ;
atomic_inc ( & log_input - > log_ref_count ) ;
2011-04-29 09:36:10 +00:00
hash_insert ( brick , mref_a ) ;
2012-02-12 15:36:34 +00:00
} else {
MARS_ERR ( " tried to hash twice \n " ) ;
2011-04-08 09:52:46 +00:00
}
2012-02-12 15:41:22 +00:00
return phase0_startio ( mref_a ) ;
2011-04-08 09:52:46 +00:00
err :
MARS_ERR ( " cannot work \n " ) ;
2012-09-17 10:11:25 +00:00
brick_msleep ( 1000 ) ;
2011-04-08 09:52:46 +00:00
return false ;
}
2010-08-20 10:58:24 +00:00
/*********************************************************************
2012-07-23 07:24:16 +00:00
* Phase 1 : read original version of data .
* This happens _after_ phase 0 , deliberately .
2010-08-20 10:58:24 +00:00
* We are explicitly dealing with old and new versions .
* The new version is hashed in memory all the time ( such that parallel
2011-03-18 13:15:40 +00:00
* READs will see them ) , so we have plenty of time for getting the
2010-08-20 10:58:24 +00:00
* old version from disk somewhen later , e . g . when IO contention is low .
*/
2011-04-10 16:59:06 +00:00
static noinline
2012-02-12 15:41:22 +00:00
void phase1_endio ( struct generic_callback * cb )
2011-04-10 16:59:06 +00:00
{
struct trans_logger_mref_aspect * sub_mref_a ;
struct writeback_info * wb ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2011-04-10 16:59:06 +00:00
CHECK_PTR ( cb , err ) ;
sub_mref_a = cb - > cb_private ;
CHECK_PTR ( sub_mref_a , err ) ;
wb = sub_mref_a - > wb ;
CHECK_PTR ( wb , err ) ;
2011-10-03 17:31:02 +00:00
brick = wb - > w_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-10 16:59:06 +00:00
if ( unlikely ( cb - > cb_error < 0 ) ) {
MARS_FAT ( " IO error %d \n " , cb - > cb_error ) ;
goto err ;
}
2013-01-01 14:52:54 +00:00
banning_reset ( & brick - > q_phase [ 1 ] . q_banning ) ;
2011-04-18 14:14:16 +00:00
// queue up for the next phase
2012-02-11 19:01:16 +00:00
qq_wb_insert ( & brick - > q_phase [ 2 ] , wb ) ;
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 1 ] ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-10 16:59:06 +00:00
return ;
err :
MARS_FAT ( " hanging up.... \n " ) ;
}
2011-04-18 14:14:16 +00:00
static noinline
2012-02-12 15:41:22 +00:00
void phase3_endio ( struct generic_callback * cb ) ;
2011-04-19 14:46:38 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool phase3_startio ( struct writeback_info * wb ) ;
2011-04-18 14:14:16 +00:00
2011-04-10 16:59:06 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool phase1_startio ( struct trans_logger_mref_aspect * orig_mref_a )
2011-04-10 16:59:06 +00:00
{
struct mref_object * orig_mref ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2012-02-08 08:40:12 +00:00
struct writeback_info * wb = NULL ;
2011-04-10 16:59:06 +00:00
CHECK_PTR ( orig_mref_a , err ) ;
orig_mref = orig_mref_a - > object ;
CHECK_PTR ( orig_mref , err ) ;
2011-10-03 17:31:02 +00:00
brick = orig_mref_a - > my_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-10 16:59:06 +00:00
if ( orig_mref_a - > is_collected ) {
MARS_IO ( " already collected, pos = %lld len = %d \n " , orig_mref - > ref_pos , orig_mref - > ref_len ) ;
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 1 ] ) ;
2011-04-10 16:59:06 +00:00
goto done ;
}
2011-04-11 13:40:06 +00:00
if ( ! orig_mref_a - > is_hashed ) {
MARS_IO ( " AHA not hashed, pos = %lld len = %d \n " , orig_mref - > ref_pos , orig_mref - > ref_len ) ;
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 1 ] ) ;
2011-04-11 13:40:06 +00:00
goto done ;
}
2012-02-08 08:40:12 +00:00
2013-01-01 14:56:32 +00:00
wb = make_writeback ( brick , orig_mref - > ref_pos , orig_mref - > ref_len ) ;
2011-04-11 13:40:06 +00:00
if ( unlikely ( ! wb ) ) {
2013-01-01 14:37:09 +00:00
goto collision ;
2011-04-10 16:59:06 +00:00
}
2011-04-12 15:31:08 +00:00
if ( unlikely ( list_empty ( & wb - > w_sub_write_list ) ) ) {
2013-01-01 14:37:09 +00:00
MARS_ERR ( " sub_write_list is empty, orig pos = %lld len = %d (collected=%d), extended pos = %lld len = %d \n " , orig_mref - > ref_pos , orig_mref - > ref_len , ( int ) orig_mref_a - > is_collected , wb - > w_pos , wb - > w_len ) ;
2012-02-08 08:40:12 +00:00
goto err ;
2011-04-11 13:40:06 +00:00
}
2011-04-12 15:31:08 +00:00
2012-02-12 15:41:22 +00:00
wb - > read_endio = phase1_endio ;
wb - > write_endio = phase3_endio ;
2011-04-19 14:46:38 +00:00
atomic_set ( & wb - > w_sub_log_count , atomic_read ( & wb - > w_sub_read_count ) ) ;
2011-10-03 17:31:02 +00:00
if ( brick - > log_reads ) {
2012-02-08 15:26:37 +00:00
fire_writeback ( & wb - > w_sub_read_list , false ) ;
2011-04-19 14:46:38 +00:00
} else { // shortcut
2017-06-05 21:18:20 +00:00
# ifdef SHORTCUT_1_to_3
bool res ;
/* speculate that next phase can be immediately started */
qq_activate ( & brick - > q_phase [ 3 ] ) ;
res = phase3_startio ( wb ) ;
if ( likely ( res ) ) {
qq_deactivate ( & brick - > q_phase [ 1 ] ) ;
goto done ;
}
/* speculation was wrong: no shortcutting */
qq_deactivate ( & brick - > q_phase [ 3 ] ) ;
# endif
2012-02-11 19:01:16 +00:00
qq_wb_insert ( & brick - > q_phase [ 3 ] , wb ) ;
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 1 ] ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-19 14:46:38 +00:00
}
2011-04-10 16:59:06 +00:00
done :
return true ;
err :
2012-02-08 08:40:12 +00:00
if ( wb ) {
free_writeback ( wb ) ;
}
2013-01-01 14:37:09 +00:00
collision :
2011-04-10 16:59:06 +00:00
return false ;
}
2010-08-20 10:58:24 +00:00
/*********************************************************************
2012-07-23 07:24:16 +00:00
* Phase 2 : log the old disk version .
2010-08-20 10:58:24 +00:00
*/
2011-04-19 14:46:38 +00:00
static inline
2012-02-12 15:41:22 +00:00
void _phase2_endio ( struct writeback_info * wb )
2011-04-08 09:52:46 +00:00
{
2011-10-03 17:31:02 +00:00
struct trans_logger_brick * brick = wb - > w_brick ;
2011-04-08 09:52:46 +00:00
// queue up for the next phase
2012-02-11 19:01:16 +00:00
qq_wb_insert ( & brick - > q_phase [ 3 ] , wb ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-08 09:52:46 +00:00
return ;
}
static noinline
2012-02-12 15:41:22 +00:00
void phase2_endio ( void * private , int error )
2010-08-11 16:02:08 +00:00
{
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * sub_mref_a ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2011-04-19 14:46:38 +00:00
struct writeback_info * wb ;
2010-08-23 05:06:06 +00:00
2011-03-11 13:57:54 +00:00
sub_mref_a = private ;
2010-08-23 05:06:06 +00:00
CHECK_PTR ( sub_mref_a , err ) ;
2011-04-19 14:46:38 +00:00
wb = sub_mref_a - > wb ;
CHECK_PTR ( wb , err ) ;
2011-10-03 17:31:02 +00:00
brick = wb - > w_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-08 09:52:46 +00:00
2011-03-11 13:57:54 +00:00
if ( unlikely ( error < 0 ) ) {
MARS_FAT ( " IO error %d \n " , error ) ;
goto err ; // FIXME: this leads to hanging requests. do better.
2010-08-20 10:58:24 +00:00
}
2010-08-11 16:02:08 +00:00
2011-04-19 14:46:38 +00:00
CHECK_ATOMIC ( & wb - > w_sub_log_count , 1 ) ;
if ( atomic_dec_and_test ( & wb - > w_sub_log_count ) ) {
2013-01-01 14:52:54 +00:00
banning_reset ( & brick - > q_phase [ 2 ] . q_banning ) ;
2012-02-12 15:41:22 +00:00
_phase2_endio ( wb ) ;
2011-04-08 09:52:46 +00:00
}
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 2 ] ) ;
2011-04-08 09:52:46 +00:00
return ;
err :
MARS_FAT ( " hanging up.... \n " ) ;
2010-08-11 16:02:08 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool _phase2_startio ( struct trans_logger_mref_aspect * sub_mref_a )
2010-08-11 16:02:08 +00:00
{
2011-04-20 14:26:44 +00:00
struct mref_object * sub_mref = NULL ;
2011-04-29 09:36:10 +00:00
struct writeback_info * wb ;
struct trans_logger_input * input ;
2010-12-15 11:58:22 +00:00
struct trans_logger_brick * brick ;
2011-04-29 09:36:10 +00:00
struct log_status * logst ;
2010-08-11 16:02:08 +00:00
void * data ;
bool ok ;
2010-08-23 05:06:06 +00:00
CHECK_PTR ( sub_mref_a , err ) ;
sub_mref = sub_mref_a - > object ;
CHECK_PTR ( sub_mref , err ) ;
2011-04-29 09:36:10 +00:00
wb = sub_mref_a - > wb ;
CHECK_PTR ( wb , err ) ;
2011-10-03 17:31:02 +00:00
brick = wb - > w_brick ;
2010-12-15 11:58:22 +00:00
CHECK_PTR ( brick , err ) ;
2011-11-03 11:17:59 +00:00
input = sub_mref_a - > log_input ;
CHECK_PTR ( input , err ) ;
2011-04-29 09:36:10 +00:00
logst = & input - > logst ;
2012-12-27 10:23:38 +00:00
logst - > do_crc = trans_logger_do_crc ;
2010-08-23 05:06:06 +00:00
{
struct log_header l = {
. l_stamp = sub_mref_a - > stamp ,
. l_pos = sub_mref - > ref_pos ,
. l_len = sub_mref - > ref_len ,
. l_code = CODE_WRITE_OLD ,
} ;
2011-04-29 09:36:10 +00:00
data = log_reserve ( logst , & l ) ;
2010-08-23 05:06:06 +00:00
}
2010-08-11 16:02:08 +00:00
if ( unlikely ( ! data ) ) {
2010-08-23 05:06:06 +00:00
goto err ;
2010-08-11 16:02:08 +00:00
}
2010-08-20 10:58:24 +00:00
memcpy ( data , sub_mref - > ref_data , sub_mref - > ref_len ) ;
2010-08-11 16:02:08 +00:00
2013-01-03 09:12:20 +00:00
ok = log_finalize ( logst , sub_mref - > ref_len , phase2_endio , sub_mref_a ) ;
2010-08-11 16:02:08 +00:00
if ( unlikely ( ! ok ) ) {
2010-08-23 05:06:06 +00:00
goto err ;
2010-08-11 16:02:08 +00:00
}
2011-04-20 14:26:44 +00:00
2010-08-11 16:02:08 +00:00
return true ;
2010-08-23 05:06:06 +00:00
err :
2011-04-20 14:26:44 +00:00
MARS_FAT ( " cannot log old data, pos = %lld len = %d \n " , sub_mref ? sub_mref - > ref_pos : 0 , sub_mref ? sub_mref - > ref_len : 0 ) ;
2010-08-23 05:06:06 +00:00
return false ;
2010-08-11 16:02:08 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool phase2_startio ( struct writeback_info * wb )
2011-04-08 09:52:46 +00:00
{
2011-04-19 14:46:38 +00:00
struct trans_logger_brick * brick ;
bool ok = true ;
2011-04-08 09:52:46 +00:00
2011-04-19 14:46:38 +00:00
CHECK_PTR ( wb , err ) ;
2011-10-03 17:31:02 +00:00
brick = wb - > w_brick ;
2011-04-19 14:46:38 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-08 09:52:46 +00:00
2011-04-29 09:36:10 +00:00
if ( brick - > log_reads & & atomic_read ( & wb - > w_sub_log_count ) > 0 ) {
2011-04-19 14:46:38 +00:00
struct list_head * start ;
2011-04-08 09:52:46 +00:00
struct list_head * tmp ;
2011-04-19 14:46:38 +00:00
start = & wb - > w_sub_read_list ;
2011-04-20 14:26:44 +00:00
for ( tmp = start - > next ; tmp ! = start ; tmp = tmp - > next ) {
2011-04-08 09:52:46 +00:00
struct trans_logger_mref_aspect * sub_mref_a ;
struct mref_object * sub_mref ;
2011-04-19 14:46:38 +00:00
2011-04-08 09:52:46 +00:00
sub_mref_a = container_of ( tmp , struct trans_logger_mref_aspect , sub_head ) ;
sub_mref = sub_mref_a - > object ;
2011-04-19 14:46:38 +00:00
2011-04-08 09:52:46 +00:00
mars_trace ( sub_mref , " sub_log " ) ;
2011-04-19 14:46:38 +00:00
2012-02-12 15:41:22 +00:00
if ( ! _phase2_startio ( sub_mref_a ) ) {
2011-04-19 14:46:38 +00:00
ok = false ;
}
2011-04-08 09:52:46 +00:00
}
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-08 09:52:46 +00:00
} else {
2012-02-12 15:41:22 +00:00
_phase2_endio ( wb ) ;
2011-04-08 09:52:46 +00:00
}
2011-04-19 14:46:38 +00:00
return ok ;
2011-04-08 09:52:46 +00:00
err :
return false ;
}
2010-08-20 10:58:24 +00:00
/*********************************************************************
2012-07-23 07:24:16 +00:00
* Phase 3 : overwrite old disk version with new version .
2010-08-20 10:58:24 +00:00
*/
2011-04-18 14:14:16 +00:00
static noinline
2012-02-12 15:41:22 +00:00
void phase3_endio ( struct generic_callback * cb )
2011-04-18 14:14:16 +00:00
{
struct trans_logger_mref_aspect * sub_mref_a ;
struct writeback_info * wb ;
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick ;
2011-04-18 14:14:16 +00:00
CHECK_PTR ( cb , err ) ;
sub_mref_a = cb - > cb_private ;
CHECK_PTR ( sub_mref_a , err ) ;
wb = sub_mref_a - > wb ;
CHECK_PTR ( wb , err ) ;
2011-10-03 17:31:02 +00:00
brick = wb - > w_brick ;
2011-04-29 09:36:10 +00:00
CHECK_PTR ( brick , err ) ;
2011-04-18 14:14:16 +00:00
if ( unlikely ( cb - > cb_error < 0 ) ) {
MARS_FAT ( " IO error %d \n " , cb - > cb_error ) ;
goto err ;
}
2011-04-29 09:36:10 +00:00
hash_put_all ( brick , & wb - > w_collect_list ) ;
2011-04-18 14:14:16 +00:00
2011-05-13 11:19:28 +00:00
atomic_inc ( & brick - > total_writeback_cluster_count ) ;
2011-03-20 17:38:08 +00:00
2011-04-19 14:46:38 +00:00
free_writeback ( wb ) ;
2010-08-20 10:58:24 +00:00
2013-01-01 14:52:54 +00:00
banning_reset ( & brick - > q_phase [ 3 ] . q_banning ) ;
2017-06-05 21:18:20 +00:00
qq_deactivate ( & brick - > q_phase [ 3 ] ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-03-20 17:38:08 +00:00
2011-04-08 09:52:46 +00:00
return ;
2010-08-23 05:06:06 +00:00
2011-04-08 09:52:46 +00:00
err :
MARS_FAT ( " hanging up.... \n " ) ;
2010-08-20 10:58:24 +00:00
}
2011-04-08 09:52:46 +00:00
static noinline
2012-02-12 15:41:22 +00:00
bool phase3_startio ( struct writeback_info * wb )
2011-04-08 09:52:46 +00:00
{
2011-04-20 14:26:44 +00:00
struct list_head * start = & wb - > w_sub_read_list ;
struct list_head * tmp ;
/* Cleanup read requests (if they exist from previous phases)
*/
while ( ( tmp = start - > next ) ! = start ) {
struct trans_logger_mref_aspect * sub_mref_a ;
struct mref_object * sub_mref ;
2011-04-29 09:36:10 +00:00
struct trans_logger_input * sub_input ;
2011-04-20 14:26:44 +00:00
list_del_init ( tmp ) ;
sub_mref_a = container_of ( tmp , struct trans_logger_mref_aspect , sub_head ) ;
sub_mref = sub_mref_a - > object ;
2011-04-29 09:36:10 +00:00
sub_input = sub_mref_a - > my_input ;
2011-04-20 14:26:44 +00:00
GENERIC_INPUT_CALL ( sub_input , mref_put , sub_mref ) ;
}
2013-01-03 07:22:34 +00:00
update_writeback_info ( wb ) ;
2011-04-20 14:26:44 +00:00
/* Start writeback IO
*/
2012-02-08 15:26:37 +00:00
fire_writeback ( & wb - > w_sub_write_list , true ) ;
2011-04-08 09:52:46 +00:00
return true ;
}
2011-04-19 14:46:38 +00:00
/*********************************************************************
* The logger thread .
* There is only a single instance , dealing with all requests in parallel .
*/
2011-04-08 09:52:46 +00:00
static noinline
2012-10-15 14:35:36 +00:00
int run_mref_queue ( struct logger_queue * q , bool ( * startio ) ( struct trans_logger_mref_aspect * sub_mref_a ) , int max , bool do_limit )
2010-08-20 10:58:24 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = q - > q_brick ;
2012-10-15 14:35:36 +00:00
int total_len = 0 ;
2011-04-19 14:46:38 +00:00
bool found = false ;
2011-04-08 09:52:46 +00:00
bool ok ;
2012-02-11 22:32:21 +00:00
int res = 0 ;
2010-08-20 10:58:24 +00:00
2011-04-29 09:36:10 +00:00
do {
struct trans_logger_mref_aspect * mref_a ;
2011-04-19 14:46:38 +00:00
mref_a = qq_mref_fetch ( q ) ;
if ( ! mref_a )
goto done ;
2011-03-29 14:40:40 +00:00
2012-10-15 14:35:36 +00:00
if ( do_limit & & likely ( mref_a - > object ) )
total_len + = mref_a - > object - > ref_len ;
2011-04-19 14:46:38 +00:00
ok = startio ( mref_a ) ;
if ( unlikely ( ! ok ) ) {
qq_mref_pushback ( q , mref_a ) ;
goto done ;
}
2012-02-11 22:32:21 +00:00
res + + ;
2011-04-29 09:36:10 +00:00
found = true ;
2011-10-03 17:31:02 +00:00
__trans_logger_ref_put ( mref_a - > my_brick , mref_a ) ;
2011-04-29 09:36:10 +00:00
} while ( - - max > 0 ) ;
2010-08-20 10:58:24 +00:00
2011-04-19 14:46:38 +00:00
done :
if ( found ) {
2012-10-15 14:35:36 +00:00
mars_limit ( & global_writeback . limiter , ( total_len - 1 ) / 1024 + 1 ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-04-08 09:52:46 +00:00
}
2011-04-19 14:46:38 +00:00
return res ;
2011-04-08 09:52:46 +00:00
}
static noinline
2011-04-19 14:46:38 +00:00
int run_wb_queue ( struct logger_queue * q , bool ( * startio ) ( struct writeback_info * wb ) , int max )
2010-08-23 05:06:06 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_brick * brick = q - > q_brick ;
2012-10-15 14:35:36 +00:00
int total_len = 0 ;
2011-03-11 13:57:54 +00:00
bool found = false ;
2010-08-23 05:06:06 +00:00
bool ok ;
2012-02-11 22:32:21 +00:00
int res = 0 ;
2010-08-23 05:06:06 +00:00
2011-04-29 09:36:10 +00:00
do {
struct writeback_info * wb ;
2011-04-19 14:46:38 +00:00
wb = qq_wb_fetch ( q ) ;
if ( ! wb )
2011-03-11 13:57:54 +00:00
goto done ;
2012-10-15 14:35:36 +00:00
total_len + = wb - > w_len ;
2011-04-19 14:46:38 +00:00
ok = startio ( wb ) ;
2010-08-23 05:06:06 +00:00
if ( unlikely ( ! ok ) ) {
2011-04-19 14:46:38 +00:00
qq_wb_pushback ( q , wb ) ;
2011-03-11 13:57:54 +00:00
goto done ;
2010-08-23 05:06:06 +00:00
}
2012-02-11 22:32:21 +00:00
res + + ;
2011-04-29 09:36:10 +00:00
found = true ;
} while ( - - max > 0 ) ;
2011-03-11 13:57:54 +00:00
done :
if ( found ) {
2012-10-15 14:35:36 +00:00
mars_limit ( & global_writeback . limiter , ( total_len - 1 ) / 1024 + 1 ) ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-03-11 13:57:54 +00:00
}
return res ;
2010-08-23 05:06:06 +00:00
}
2012-10-15 14:35:36 +00:00
/* Ranking tables.
*/
static
struct rank_info float_queue_rank_log [ ] = {
{ 0 , 0 } ,
{ 1 , 100 } ,
{ RKI_DUMMY }
2012-02-11 22:32:21 +00:00
} ;
2012-10-15 14:35:36 +00:00
static
struct rank_info float_queue_rank_io [ ] = {
{ 0 , 0 } ,
{ 1 , 1 } ,
{ RKI_DUMMY }
2012-02-11 22:32:21 +00:00
} ;
2011-11-03 11:17:59 +00:00
2012-10-15 14:35:36 +00:00
static
struct rank_info float_fly_rank_log [ ] = {
{ 0 , 0 } ,
2012-12-11 09:33:10 +00:00
{ 1 , 1 } ,
2012-10-15 14:35:36 +00:00
{ 32 , 10 } ,
{ RKI_DUMMY }
2012-02-11 22:32:21 +00:00
} ;
2012-10-15 14:35:36 +00:00
static
struct rank_info float_fly_rank_io [ ] = {
{ 0 , 0 } ,
{ 1 , 10 } ,
2012-12-11 13:03:15 +00:00
{ 2 , - 10 } ,
{ 10000 , - 200 } ,
2012-10-15 14:35:36 +00:00
{ RKI_DUMMY }
2012-02-11 22:32:21 +00:00
} ;
2012-10-15 14:35:36 +00:00
static
struct rank_info nofloat_queue_rank_log [ ] = {
{ 0 , 0 } ,
2012-12-11 13:03:15 +00:00
{ 1 , 10 } ,
2012-10-15 14:35:36 +00:00
{ RKI_DUMMY }
2012-02-11 22:32:21 +00:00
} ;
2012-10-15 14:35:36 +00:00
static
struct rank_info nofloat_queue_rank_io [ ] = {
{ 0 , 0 } ,
{ 1 , 10 } ,
{ 100 , 100 } ,
{ RKI_DUMMY }
} ;
2012-12-11 09:33:10 +00:00
# define nofloat_fly_rank_log float_fly_rank_log
2012-10-15 14:35:36 +00:00
static
struct rank_info nofloat_fly_rank_io [ ] = {
{ 0 , 0 } ,
{ 1 , 10 } ,
{ 128 , 8 } ,
2012-12-11 13:03:15 +00:00
{ 129 , - 200 } ,
2012-10-15 14:35:36 +00:00
{ RKI_DUMMY }
} ;
static
struct rank_info * queue_ranks [ 2 ] [ LOGGER_QUEUES ] = {
[ 0 ] = {
[ 0 ] = float_queue_rank_log ,
[ 1 ] = float_queue_rank_io ,
[ 2 ] = float_queue_rank_io ,
[ 3 ] = float_queue_rank_io ,
} ,
[ 1 ] = {
[ 0 ] = nofloat_queue_rank_log ,
[ 1 ] = nofloat_queue_rank_io ,
[ 2 ] = nofloat_queue_rank_io ,
[ 3 ] = nofloat_queue_rank_io ,
} ,
} ;
static
struct rank_info * fly_ranks [ 2 ] [ LOGGER_QUEUES ] = {
[ 0 ] = {
[ 0 ] = float_fly_rank_log ,
[ 1 ] = float_fly_rank_io ,
[ 2 ] = float_fly_rank_io ,
[ 3 ] = float_fly_rank_io ,
} ,
[ 1 ] = {
[ 0 ] = nofloat_fly_rank_log ,
[ 1 ] = nofloat_fly_rank_io ,
[ 2 ] = nofloat_fly_rank_io ,
[ 3 ] = nofloat_fly_rank_io ,
} ,
2011-04-29 09:36:10 +00:00
} ;
2012-12-11 09:33:10 +00:00
static
struct rank_info extra_rank_mref_flying [ ] = {
{ 0 , 0 } ,
{ 1 , 10 } ,
2012-12-29 20:11:41 +00:00
{ 16 , 30 } ,
{ 31 , 0 } ,
{ 32 , - 200 } ,
{ RKI_DUMMY }
} ;
static
struct rank_info global_rank_mref_flying [ ] = {
{ 0 , 0 } ,
{ 63 , 0 } ,
{ 64 , - 200 } ,
2012-12-11 09:33:10 +00:00
{ RKI_DUMMY }
} ;
2011-04-29 09:36:10 +00:00
static noinline
2015-12-10 07:24:04 +00:00
int _do_ranking ( struct trans_logger_brick * brick )
2012-02-11 22:32:21 +00:00
{
2015-12-10 07:24:04 +00:00
struct rank_data * rkd = brick - > rkd ;
2012-12-04 09:16:38 +00:00
int res ;
2012-02-11 22:32:21 +00:00
int i ;
2012-10-15 14:35:36 +00:00
int floating_mode ;
2012-12-11 14:05:47 +00:00
int mref_flying ;
2012-02-11 22:32:21 +00:00
bool delay_callers ;
ranking_start ( rkd , LOGGER_QUEUES ) ;
2012-10-15 14:35:36 +00:00
// check the memory situation...
2012-02-11 22:32:21 +00:00
delay_callers = false ;
2012-10-15 14:35:36 +00:00
floating_mode = 1 ;
2012-08-08 09:16:52 +00:00
if ( brick_global_memlimit > = 1024 ) {
2012-02-11 22:32:21 +00:00
int global_mem_used = atomic64_read ( & global_mshadow_used ) / 1024 ;
2012-09-26 09:22:36 +00:00
trans_logger_mem_usage = global_mem_used ;
2012-02-11 22:32:21 +00:00
2012-10-15 14:35:36 +00:00
floating_mode = ( global_mem_used < brick_global_memlimit / 2 ) ? 0 : 1 ;
2012-02-11 22:32:21 +00:00
if ( global_mem_used > = brick_global_memlimit )
delay_callers = true ;
MARS_IO ( " global_mem_used = %d \n " , global_mem_used ) ;
2012-08-08 09:16:52 +00:00
} else if ( brick - > shadow_mem_limit > = 8 ) {
int local_mem_used = atomic64_read ( & brick - > shadow_mem_used ) / 1024 ;
2012-10-15 14:35:36 +00:00
floating_mode = ( local_mem_used < brick - > shadow_mem_limit / 2 ) ? 0 : 1 ;
2012-08-08 09:16:52 +00:00
if ( local_mem_used > = brick - > shadow_mem_limit )
delay_callers = true ;
MARS_IO ( " local_mem_used = %d \n " , local_mem_used ) ;
2012-10-15 14:35:36 +00:00
}
if ( delay_callers ) {
if ( ! brick - > delay_callers ) {
brick - > delay_callers = true ;
atomic_inc ( & brick - > total_delay_count ) ;
}
2012-12-05 07:29:39 +00:00
} else if ( brick - > delay_callers ) {
2012-10-15 14:35:36 +00:00
brick - > delay_callers = false ;
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > caller_event , brick - > caller_flag ) ;
2012-10-15 14:35:36 +00:00
}
2012-12-29 20:11:41 +00:00
// global limit for flying mrefs
ranking_compute ( & rkd [ 0 ] , global_rank_mref_flying , atomic_read ( & global_mref_flying ) ) ;
// local limit for flying mrefs
2012-12-11 14:05:47 +00:00
mref_flying = 0 ;
for ( i = TL_INPUT_LOG1 ; i < = TL_INPUT_LOG2 ; i + + ) {
struct trans_logger_input * input = brick - > inputs [ i ] ;
mref_flying + = atomic_read ( & input - > logst . mref_flying ) ;
}
2012-10-15 14:35:36 +00:00
// obey the basic rules...
for ( i = 0 ; i < LOGGER_QUEUES ; i + + ) {
2017-06-05 21:10:03 +00:00
int queued = brick - > q_phase [ i ] . q_queued ;
2012-10-15 14:35:36 +00:00
int flying ;
MARS_IO ( " i = %d queued = %d \n " , i , queued ) ;
/* This must come first.
* When a queue is empty , you must not credit any positive points .
* Otherwise , ( almost ) infinite selection of untreatable
* queues may occur .
*/
if ( queued < = 0 )
continue ;
2012-12-30 06:50:47 +00:00
if ( banning_is_hit ( & brick - > q_phase [ i ] . q_banning ) ) {
# ifdef IO_DEBUGGING
unsigned long long now = cpu_clock ( raw_smp_processor_id ( ) ) ;
MARS_IO ( " BAILOUT queue = %d via banning now = %lld last_hit = %lld diff = %lld renew_count = %d count = %d \n " ,
i ,
now ,
now - brick - > q_phase [ i ] . q_banning . ban_last_hit ,
brick - > q_phase [ i ] . q_banning . ban_last_hit ,
brick - > q_phase [ i ] . q_banning . ban_renew_count ,
brick - > q_phase [ i ] . q_banning . ban_count ) ;
# endif
break ;
}
2012-12-11 09:33:10 +00:00
if ( i = = 0 ) {
// limit mref IO parallelism on transaction log
ranking_compute ( & rkd [ 0 ] , extra_rank_mref_flying , mref_flying ) ;
} else if ( i = = 1 & & ! floating_mode ) {
2012-12-04 15:01:50 +00:00
struct trans_logger_brick * leader ;
2012-10-15 14:35:36 +00:00
int lim ;
2017-06-05 21:10:03 +00:00
if ( ! mref_flying & & brick - > q_phase [ 0 ] . q_queued > 0 ) {
2017-06-06 05:23:31 +00:00
MARS_IO ( " BAILOUT phase_[0]queued = %d phase_[0]active = %d \n " ,
2017-06-05 21:10:03 +00:00
brick - > q_phase [ 0 ] . q_queued ,
2017-06-06 05:23:31 +00:00
brick - > q_phase [ 0 ] . q_active ) ;
2012-10-15 14:35:36 +00:00
break ;
}
2012-12-04 15:01:50 +00:00
if ( ( leader = elect_leader ( & global_writeback ) ) ! = brick ) {
MARS_IO ( " BAILOUT leader=%p brick=%p \n " , leader , brick ) ;
2012-10-15 14:35:36 +00:00
break ;
}
if ( banning_is_hit ( & mars_global_ban ) ) {
2012-12-04 15:01:50 +00:00
# ifdef IO_DEBUGGING
unsigned long long now = cpu_clock ( raw_smp_processor_id ( ) ) ;
MARS_IO ( " BAILOUT via banning now = %lld last_hit = %lld diff = %lld renew_count = %d count = %d \n " ,
now ,
now - mars_global_ban . ban_last_hit ,
mars_global_ban . ban_last_hit ,
mars_global_ban . ban_renew_count ,
mars_global_ban . ban_count ) ;
# endif
2012-10-15 14:35:36 +00:00
break ;
}
lim = mars_limit ( & global_writeback . limiter , 0 ) ;
if ( lim > 0 ) {
2012-12-04 15:01:50 +00:00
MARS_IO ( " BAILOUT via limiter %d \n " , lim ) ;
2012-10-15 14:35:36 +00:00
break ;
}
}
ranking_compute ( & rkd [ i ] , queue_ranks [ floating_mode ] [ i ] , queued ) ;
2017-06-06 05:23:31 +00:00
flying = brick - > q_phase [ i ] . q_active - brick - > q_phase [ i ] . q_active ;
2012-10-15 14:35:36 +00:00
MARS_IO ( " i = %d queued = %d flying = %d \n " , i , queued , flying ) ;
ranking_compute ( & rkd [ i ] , fly_ranks [ floating_mode ] [ i ] , flying ) ;
2012-02-11 22:32:21 +00:00
}
// finalize it
ranking_stop ( rkd , LOGGER_QUEUES ) ;
2012-12-04 09:16:38 +00:00
res = ranking_select ( rkd , LOGGER_QUEUES ) ;
# ifdef IO_DEBUGGING
for ( i = 0 ; i < LOGGER_QUEUES ; i + + ) {
2012-12-04 10:52:51 +00:00
MARS_IO ( " rkd[%d]: points = %lld tmp = %lld got = %lld \n " , i , rkd [ i ] . rkd_current_points , rkd [ i ] . rkd_tmp , rkd [ i ] . rkd_got ) ;
2012-12-04 09:16:38 +00:00
}
MARS_IO ( " res = %d \n " , res ) ;
# endif
return res ;
2011-02-23 20:48:06 +00:00
}
2011-11-03 11:17:59 +00:00
static
2014-06-03 08:11:00 +00:00
void _init_input ( struct trans_logger_input * input , loff_t start_pos , loff_t end_pos )
2011-11-03 11:17:59 +00:00
{
struct trans_logger_brick * brick = input - > brick ;
struct log_status * logst = & input - > logst ;
2014-06-03 08:11:00 +00:00
init_logst ( logst , ( void * ) input , start_pos , end_pos ) ;
2012-12-11 15:33:26 +00:00
logst - > signal_event = & brick - > worker_event ;
2019-02-11 21:39:47 +00:00
logst - > signal_flag = & brick - > worker_flag ;
2013-01-04 13:05:00 +00:00
logst - > align_size = CONF_TRANS_ALIGN ;
logst - > chunk_size = CONF_TRANS_CHUNKSIZE ;
logst - > max_size = CONF_TRANS_MAX_MREF_SIZE ;
2011-11-03 11:17:59 +00:00
2012-12-17 07:25:17 +00:00
input - > inf . inf_min_pos = start_pos ;
2014-06-03 08:11:00 +00:00
input - > inf . inf_max_pos = end_pos ;
2012-12-17 07:25:17 +00:00
get_lamport ( & input - > inf . inf_max_pos_stamp ) ;
memcpy ( & input - > inf . inf_min_pos_stamp , & input - > inf . inf_max_pos_stamp , sizeof ( input - > inf . inf_min_pos_stamp ) ) ;
2011-11-03 11:17:59 +00:00
logst - > log_pos = start_pos ;
2012-12-17 07:25:17 +00:00
input - > inf . inf_log_pos = start_pos ;
2013-01-04 08:53:34 +00:00
input - > inf_last_jiffies = jiffies ;
2014-03-01 00:48:28 +00:00
input - > inf . inf_is_replaying = false ;
2013-01-05 21:46:13 +00:00
input - > inf . inf_is_logging = false ;
2012-12-17 07:25:17 +00:00
2011-11-03 11:17:59 +00:00
input - > is_operating = true ;
}
static
2012-12-17 07:25:17 +00:00
void _init_inputs ( struct trans_logger_brick * brick , bool is_first )
2011-11-03 11:17:59 +00:00
{
struct trans_logger_input * input ;
2013-01-04 08:53:34 +00:00
int old_nr = brick - > old_input_nr ;
int log_nr = brick - > log_input_nr ;
int new_nr = brick - > new_input_nr ;
if ( ! is_first & &
( new_nr = = log_nr | |
log_nr ! = old_nr ) ) {
MARS_IO ( " nothing to do, new_input_nr = %d log_input_nr = %d old_input_nr = %d \n " , new_nr , log_nr , old_nr ) ;
2011-11-03 11:17:59 +00:00
goto done ;
}
2013-01-04 08:53:34 +00:00
if ( unlikely ( new_nr < TL_INPUT_LOG1 | | new_nr > TL_INPUT_LOG2 ) ) {
MARS_ERR ( " bad new_input_nr = %d \n " , new_nr ) ;
2011-11-03 11:17:59 +00:00
goto done ;
}
2013-01-04 08:53:34 +00:00
input = brick - > inputs [ new_nr ] ;
2011-11-03 11:17:59 +00:00
CHECK_PTR ( input , done ) ;
if ( input - > is_operating | | ! input - > connect ) {
2013-03-31 18:29:40 +00:00
MARS_IO ( " cannot yet switch over to %d (is_operating = %d connect = %p) \n " , new_nr , input - > is_operating , input - > connect ) ;
2011-11-03 11:17:59 +00:00
goto done ;
}
2012-12-30 22:44:48 +00:00
down ( & input - > inf_mutex ) ;
2014-06-03 08:11:00 +00:00
_init_input ( input , 0 , 0 ) ;
2012-12-17 07:25:17 +00:00
input - > inf . inf_is_logging = is_first ;
2013-01-04 08:53:34 +00:00
// from now on, new requests should go to the new input
brick - > log_input_nr = new_nr ;
2013-01-04 21:04:28 +00:00
MARS_INF ( " switched over to new logfile %d (old = %d) \n " , new_nr , old_nr ) ;
2013-01-04 08:53:34 +00:00
/* Flush the old log buffer and update its symlinks.
* Notice : for some short time , _both_ logfiles may grow
* due to ( harmless ) races with log_flush ( ) .
*/
if ( likely ( ! is_first ) ) {
struct trans_logger_input * other_input = brick - > inputs [ old_nr ] ;
down ( & other_input - > inf_mutex ) ;
log_flush ( & other_input - > logst ) ;
_inf_callback ( other_input , true ) ;
up ( & other_input - > inf_mutex ) ;
}
2012-12-17 07:25:17 +00:00
_inf_callback ( input , true ) ;
2012-12-30 22:44:48 +00:00
up ( & input - > inf_mutex ) ;
2011-11-03 11:17:59 +00:00
done : ;
}
2012-02-11 22:32:21 +00:00
static
int _nr_flying_inputs ( struct trans_logger_brick * brick )
{
int count = 0 ;
int i ;
for ( i = TL_INPUT_LOG1 ; i < = TL_INPUT_LOG2 ; i + + ) {
struct trans_logger_input * input = brick - > inputs [ i ] ;
struct log_status * logst = & input - > logst ;
if ( input - > is_operating ) {
count + = logst - > count ;
}
}
return count ;
}
2011-11-03 11:17:59 +00:00
static
void _flush_inputs ( struct trans_logger_brick * brick )
{
int i ;
for ( i = TL_INPUT_LOG1 ; i < = TL_INPUT_LOG2 ; i + + ) {
struct trans_logger_input * input = brick - > inputs [ i ] ;
struct log_status * logst = & input - > logst ;
if ( input - > is_operating & & logst - > count > 0 ) {
atomic_inc ( & brick - > total_flush_count ) ;
log_flush ( logst ) ;
}
}
}
static
void _exit_inputs ( struct trans_logger_brick * brick , bool force )
{
int i ;
for ( i = TL_INPUT_LOG1 ; i < = TL_INPUT_LOG2 ; i + + ) {
struct trans_logger_input * input = brick - > inputs [ i ] ;
struct log_status * logst = & input - > logst ;
2012-12-17 07:25:17 +00:00
if ( input - > is_operating & &
( force | | ! input - > connect ) ) {
2014-03-01 00:48:28 +00:00
bool old_replaying = input - > inf . inf_is_replaying ;
2012-12-17 07:25:17 +00:00
bool old_logging = input - > inf . inf_is_logging ;
2014-03-01 00:48:28 +00:00
MARS_DBG ( " cleaning up input %d (log = %d old = %d), old_replaying = %d old_logging = %d \n " , i , brick - > log_input_nr , brick - > old_input_nr , old_replaying , old_logging ) ;
2011-11-03 11:17:59 +00:00
exit_logst ( logst ) ;
2012-12-30 22:44:48 +00:00
// no locking here: we should be the only thread doing this.
2012-12-17 07:25:17 +00:00
_inf_callback ( input , true ) ;
2013-01-04 08:53:34 +00:00
input - > inf_last_jiffies = 0 ;
2014-03-01 00:48:28 +00:00
input - > inf . inf_is_replaying = false ;
2012-12-17 07:25:17 +00:00
input - > inf . inf_is_logging = false ;
2011-11-03 11:17:59 +00:00
input - > is_operating = false ;
2012-12-17 07:25:17 +00:00
if ( i = = brick - > old_input_nr & & i ! = brick - > log_input_nr ) {
struct trans_logger_input * other_input = brick - > inputs [ brick - > log_input_nr ] ;
2012-12-30 22:44:48 +00:00
down ( & other_input - > inf_mutex ) ;
2011-11-03 11:17:59 +00:00
brick - > old_input_nr = brick - > log_input_nr ;
2014-03-01 00:48:28 +00:00
other_input - > inf . inf_is_replaying = old_replaying ;
2012-12-17 07:25:17 +00:00
other_input - > inf . inf_is_logging = old_logging ;
_inf_callback ( other_input , true ) ;
2012-12-30 22:44:48 +00:00
up ( & other_input - > inf_mutex ) ;
2012-12-17 07:25:17 +00:00
}
2011-11-03 11:17:59 +00:00
}
}
}
2013-04-18 14:25:04 +00:00
/* Performance-critical:
* Calling log_flush ( ) too often may result in
* increased overhead ( and thus in lower throughput ) .
* Call it only when the IO scheduler need not do anything else .
* OTOH , calling it too seldom may hold back
* IO completion for the end user for too long time .
*
* Be careful to flush any leftovers in the log buffer , at least after
* some short delay .
*
* Description of flush_mode :
* 0 = flush unconditionally
* 1 = flush only when nothing can be appended to the transaction log
* 2 = see 1 & & flush only when the user is waiting for an answer
* 3 = see 1 & & not 2 & & flush only when there is no other activity ( background mode )
* Notice : 3 makes only sense for leftovers where the user is _not_ waiting for
*/
static inline
void flush_inputs ( struct trans_logger_brick * brick , int flush_mode )
{
if ( flush_mode < 1 | |
// there is nothing to append any more
2017-06-05 21:10:03 +00:00
( brick - > q_phase [ 0 ] . q_queued < = 0 & &
2013-04-18 14:25:04 +00:00
// and the user is waiting for an answer
( flush_mode < 2 | |
atomic_read ( & brick - > log_fly_count ) > 0 | |
// else flush any leftovers in background, when there is no writeback activity
( flush_mode = = 3 & &
2017-06-06 05:23:31 +00:00
brick - > q_phase [ 1 ] . q_active - brick - > q_phase [ 1 ] . q_queued +
brick - > q_phase [ 3 ] . q_active - brick - > q_phase [ 3 ] . q_queued < = 0 ) ) ) ) {
2013-04-18 14:25:04 +00:00
_flush_inputs ( brick ) ;
}
}
2017-06-01 07:26:58 +00:00
static atomic_t logger_count = ATOMIC_INIT ( 0 ) ;
2011-04-08 09:52:46 +00:00
static noinline
2011-11-03 11:17:59 +00:00
void trans_logger_log ( struct trans_logger_brick * brick )
2011-02-23 20:48:06 +00:00
{
2012-12-17 07:25:17 +00:00
long long old_jiffies = jiffies ;
2013-04-18 14:25:04 +00:00
long long work_jiffies = jiffies ;
int interleave = 0 ;
2012-12-03 13:55:54 +00:00
int nr_flying ;
2011-04-29 09:36:10 +00:00
2015-12-10 07:24:04 +00:00
memset ( brick - > rkd , 0 , sizeof ( brick - > rkd ) ) ;
2018-04-09 14:15:00 +00:00
brick - > replay_code = TL_REPLAY_RUNNING ;
2014-04-03 17:37:20 +00:00
brick - > disk_io_error = 0 ;
2012-12-04 08:26:19 +00:00
2012-12-17 07:25:17 +00:00
_init_inputs ( brick , true ) ;
2017-06-01 07:26:58 +00:00
if ( atomic_inc_return ( & logger_count ) = = 1 )
mars_limit_reset ( & global_writeback . limiter ) ;
2011-04-08 09:52:46 +00:00
2011-03-29 14:40:40 +00:00
mars_power_led_on ( ( void * ) brick , true ) ;
2012-11-13 16:01:37 +00:00
while ( ! brick_thread_should_stop ( ) | | _congested ( brick ) ) {
2012-02-11 22:32:21 +00:00
int winner ;
int nr ;
2011-04-29 09:36:10 +00:00
2018-10-09 05:01:05 +00:00
brick_wait (
2011-06-30 13:15:52 +00:00
brick - > worker_event ,
2019-02-11 18:45:47 +00:00
brick - > worker_flag ,
2013-04-18 14:25:04 +00:00
( {
2015-12-10 07:24:04 +00:00
winner = _do_ranking ( brick ) ;
2013-04-18 14:25:04 +00:00
MARS_IO ( " winner = %d \n " , winner ) ;
if ( winner < 0 ) { // no more work to do
int flush_mode = 2 - ( ( int ) ( jiffies - work_jiffies ) ) / ( HZ * 2 ) ;
flush_inputs ( brick , flush_mode ) ;
interleave = 0 ;
} else { // reset the timer whenever something is to do
work_jiffies = jiffies ;
}
winner > = 0 ;
} ) ,
HZ / 10 ) ;
2011-03-11 13:57:54 +00:00
2011-05-13 11:19:28 +00:00
atomic_inc ( & brick - > total_round_count ) ;
2013-04-22 07:06:27 +00:00
if ( brick - > cease_logging ) {
brick - > stopped_logging = true ;
} else if ( brick - > stopped_logging & & ! _congested ( brick ) ) {
brick - > stopped_logging = false ;
}
2012-12-17 07:25:17 +00:00
_init_inputs ( brick , false ) ;
2011-11-03 11:17:59 +00:00
2012-02-11 22:32:21 +00:00
switch ( winner ) {
case 0 :
2013-04-18 14:25:04 +00:00
interleave = 0 ;
2012-10-15 14:35:36 +00:00
nr = run_mref_queue ( & brick - > q_phase [ 0 ] , prep_phase_startio , brick - > q_phase [ 0 ] . q_batchlen , true ) ;
2012-02-11 22:32:21 +00:00
goto done ;
case 1 :
2013-04-18 14:25:04 +00:00
if ( interleave > = trans_logger_max_interleave & & trans_logger_max_interleave > = 0 ) {
interleave = 0 ;
flush_inputs ( brick , 3 ) ;
}
2012-10-15 14:35:36 +00:00
nr = run_mref_queue ( & brick - > q_phase [ 1 ] , phase1_startio , brick - > q_phase [ 1 ] . q_batchlen , true ) ;
2013-04-18 14:25:04 +00:00
interleave + = nr ;
2012-02-11 22:32:21 +00:00
goto done ;
case 2 :
2013-04-18 14:25:04 +00:00
interleave = 0 ;
2012-02-11 22:32:21 +00:00
nr = run_wb_queue ( & brick - > q_phase [ 2 ] , phase2_startio , brick - > q_phase [ 2 ] . q_batchlen ) ;
goto done ;
case 3 :
2013-04-18 14:25:04 +00:00
if ( interleave > = trans_logger_max_interleave & & trans_logger_max_interleave > = 0 ) {
interleave = 0 ;
flush_inputs ( brick , 3 ) ;
}
2012-02-11 22:32:21 +00:00
nr = run_wb_queue ( & brick - > q_phase [ 3 ] , phase3_startio , brick - > q_phase [ 3 ] . q_batchlen ) ;
2013-04-18 14:25:04 +00:00
interleave + = nr ;
2012-02-11 22:32:21 +00:00
done :
2012-12-29 22:26:17 +00:00
if ( unlikely ( nr < = 0 ) ) {
/* This should not happen!
* However , in error situations , the ranking
* algorithm cannot foresee anything .
*/
brick - > q_phase [ winner ] . no_progress_count + + ;
2013-04-18 14:25:04 +00:00
banning_hit ( & brick - > q_phase [ winner ] . q_banning , 10000 ) ;
flush_inputs ( brick , 0 ) ;
2012-12-29 22:26:17 +00:00
}
2015-12-10 07:24:04 +00:00
ranking_select_done ( brick - > rkd , winner , nr ) ;
2012-02-11 22:32:21 +00:00
break ;
2011-03-18 13:15:40 +00:00
2013-01-03 11:39:48 +00:00
default :
2013-04-18 14:25:04 +00:00
;
2011-03-11 13:57:54 +00:00
}
2011-04-29 09:36:10 +00:00
2012-12-17 07:25:17 +00:00
/* Update symlinks even during pauses.
*/
if ( winner < 0 & & ( ( long long ) jiffies ) - old_jiffies > = HZ ) {
int i ;
old_jiffies = jiffies ;
for ( i = TL_INPUT_LOG1 ; i < = TL_INPUT_LOG2 ; i + + ) {
struct trans_logger_input * input = brick - > inputs [ i ] ;
2012-12-30 22:44:48 +00:00
down ( & input - > inf_mutex ) ;
2012-12-17 07:25:17 +00:00
_inf_callback ( input , false ) ;
2012-12-30 22:44:48 +00:00
up ( & input - > inf_mutex ) ;
2012-12-17 07:25:17 +00:00
}
}
2011-11-03 11:17:59 +00:00
_exit_inputs ( brick , false ) ;
2010-08-20 10:58:24 +00:00
}
2012-12-03 13:55:54 +00:00
2012-12-17 07:25:17 +00:00
for ( ; ; ) {
2012-12-03 13:55:54 +00:00
_exit_inputs ( brick , true ) ;
2012-12-17 07:25:17 +00:00
nr_flying = _nr_flying_inputs ( brick ) ;
if ( nr_flying < = 0 )
break ;
MARS_INF ( " %d inputs are operating \n " , nr_flying ) ;
2012-12-03 13:55:54 +00:00
brick_msleep ( 1000 ) ;
}
2017-06-01 07:26:58 +00:00
if ( ! atomic_dec_return ( & logger_count ) )
mars_limit_reset ( & global_writeback . limiter ) ;
2011-02-23 20:48:06 +00:00
}
2011-04-08 09:52:46 +00:00
////////////////////////////// log replay //////////////////////////////
2011-03-27 15:18:38 +00:00
2011-04-08 09:52:46 +00:00
static noinline
2011-03-27 15:18:38 +00:00
void replay_endio ( struct generic_callback * cb )
{
struct trans_logger_mref_aspect * mref_a = cb - > cb_private ;
2011-04-08 09:52:46 +00:00
struct trans_logger_brick * brick ;
2014-04-03 17:06:02 +00:00
bool ok ;
2011-03-27 15:18:38 +00:00
2016-02-06 22:35:04 +00:00
_crashme ( 22 , false ) ;
2014-03-23 09:57:56 +00:00
LAST_CALLBACK ( cb ) ;
2011-03-27 15:18:38 +00:00
CHECK_PTR ( mref_a , err ) ;
2011-10-03 17:31:02 +00:00
brick = mref_a - > my_brick ;
2011-04-08 09:52:46 +00:00
CHECK_PTR ( brick , err ) ;
2011-03-27 15:18:38 +00:00
2012-02-25 20:36:52 +00:00
if ( unlikely ( cb - > cb_error < 0 ) ) {
2014-04-03 17:37:20 +00:00
brick - > disk_io_error = cb - > cb_error ;
2012-02-25 20:36:52 +00:00
MARS_ERR ( " IO error = %d \n " , cb - > cb_error ) ;
}
2017-12-10 21:43:28 +00:00
down_write ( & brick - > replay_mutex ) ;
2014-04-03 17:06:02 +00:00
ok = ! list_empty ( & mref_a - > replay_head ) ;
2011-04-08 09:52:46 +00:00
list_del_init ( & mref_a - > replay_head ) ;
2017-12-10 21:43:28 +00:00
up_write ( & brick - > replay_mutex ) ;
2011-04-08 09:52:46 +00:00
2014-04-03 17:06:02 +00:00
if ( likely ( ok ) ) {
atomic_dec ( & brick - > replay_count ) ;
} else {
MARS_ERR ( " callback with empty replay_head (replay_count=%d) \n " , atomic_read ( & brick - > replay_count ) ) ;
}
2019-02-11 18:45:47 +00:00
brick_wake ( & brick - > worker_event , brick - > worker_flag ) ;
2011-03-27 15:18:38 +00:00
return ;
err :
MARS_FAT ( " cannot handle replay IO \n " ) ;
}
2011-04-08 09:52:46 +00:00
static noinline
2011-04-29 09:36:10 +00:00
bool _has_conflict ( struct trans_logger_brick * brick , struct trans_logger_mref_aspect * mref_a )
2011-03-27 15:18:38 +00:00
{
2011-04-08 09:52:46 +00:00
struct mref_object * mref = mref_a - > object ;
struct list_head * tmp ;
bool res = false ;
2017-12-10 21:43:28 +00:00
down_read ( & brick - > replay_mutex ) ;
2011-04-08 09:52:46 +00:00
for ( tmp = brick - > replay_list . next ; tmp ! = & brick - > replay_list ; tmp = tmp - > next ) {
struct trans_logger_mref_aspect * tmp_a ;
struct mref_object * tmp_mref ;
tmp_a = container_of ( tmp , struct trans_logger_mref_aspect , replay_head ) ;
tmp_mref = tmp_a - > object ;
2012-02-25 20:36:52 +00:00
if ( tmp_mref - > ref_pos + tmp_mref - > ref_len > mref - > ref_pos & & tmp_mref - > ref_pos < mref - > ref_pos + mref - > ref_len ) {
2011-04-08 09:52:46 +00:00
res = true ;
break ;
}
}
2017-12-10 21:43:28 +00:00
up_read ( & brick - > replay_mutex ) ;
2011-04-08 09:52:46 +00:00
return res ;
}
static noinline
2011-04-29 09:36:10 +00:00
void wait_replay ( struct trans_logger_brick * brick , struct trans_logger_mref_aspect * mref_a )
2011-04-08 09:52:46 +00:00
{
2012-02-25 20:36:52 +00:00
const int max = 512 ; // limit parallelism somewhat
int conflicts = 0 ;
bool ok = false ;
2014-04-03 17:06:02 +00:00
bool was_empty ;
2011-04-08 09:52:46 +00:00
2018-10-09 05:01:05 +00:00
brick_wait ( brick - > worker_event ,
2019-02-11 18:45:47 +00:00
brick - > worker_flag ,
2012-02-25 20:36:52 +00:00
atomic_read ( & brick - > replay_count ) < max
& & ( _has_conflict ( brick , mref_a ) ? conflicts + + : ( ok = true ) , ok ) ,
2011-04-08 09:52:46 +00:00
60 * HZ ) ;
2011-06-10 13:57:52 +00:00
atomic_inc ( & brick - > total_replay_count ) ;
2012-02-25 20:36:52 +00:00
if ( conflicts )
atomic_inc ( & brick - > total_replay_conflict_count ) ;
2011-06-10 13:57:52 +00:00
2017-12-10 21:43:28 +00:00
down_write ( & brick - > replay_mutex ) ;
2014-04-03 17:06:02 +00:00
was_empty = ! ! list_empty ( & mref_a - > replay_head ) ;
if ( likely ( was_empty ) ) {
atomic_inc ( & brick - > replay_count ) ;
} else {
list_del ( & mref_a - > replay_head ) ;
}
2011-04-08 09:52:46 +00:00
list_add ( & mref_a - > replay_head , & brick - > replay_list ) ;
2017-12-10 21:43:28 +00:00
up_write ( & brick - > replay_mutex ) ;
2014-04-03 17:06:02 +00:00
if ( unlikely ( ! was_empty ) ) {
MARS_ERR ( " replay_head was already used (ok=%d, conflicts=%d, replay_count=%d) \n " , ok , conflicts , atomic_read ( & brick - > replay_count ) ) ;
}
2011-04-08 09:52:46 +00:00
}
static noinline
2014-03-01 00:48:28 +00:00
int replay_data ( struct trans_logger_brick * brick , loff_t pos , void * buf , int len )
2011-04-08 09:52:46 +00:00
{
2011-04-29 09:36:10 +00:00
struct trans_logger_input * input = brick - > inputs [ TL_INPUT_WRITEBACK ] ;
2011-03-27 15:18:38 +00:00
int status ;
2011-04-08 09:52:46 +00:00
MARS_IO ( " got data, pos = %lld, len = %d \n " , pos , len ) ;
2011-03-27 15:18:38 +00:00
2011-04-29 09:36:10 +00:00
if ( ! input - > connect ) {
input = brick - > inputs [ TL_INPUT_READ ] ;
}
2011-03-27 15:18:38 +00:00
/* TODO for better efficiency:
* Instead of starting IO here , just put the data into the hashes
* and queues such that ordinary IO will be corrected .
* Writeback will be lazy then .
* The switch infrastructure must be changed before this
2011-04-08 09:52:46 +00:00
* becomes possible .
2011-03-27 15:18:38 +00:00
*/
2014-03-01 00:48:28 +00:00
# ifdef REPLAY_DATA
2011-03-27 15:18:38 +00:00
while ( len > 0 ) {
struct mref_object * mref ;
struct trans_logger_mref_aspect * mref_a ;
status = - ENOMEM ;
2012-02-02 15:25:43 +00:00
mref = trans_logger_alloc_mref ( brick ) ;
2011-03-27 15:18:38 +00:00
if ( unlikely ( ! mref ) ) {
MARS_ERR ( " no memory \n " ) ;
goto done ;
}
2011-10-03 17:31:02 +00:00
mref_a = trans_logger_mref_get_aspect ( brick , mref ) ;
2011-03-27 15:18:38 +00:00
CHECK_PTR ( mref_a , done ) ;
2012-12-07 10:35:32 +00:00
CHECK_ASPECT ( mref_a , mref , done ) ;
2011-03-27 15:18:38 +00:00
2011-04-08 09:52:46 +00:00
mref - > ref_pos = pos ;
mref - > ref_data = NULL ;
2011-03-27 15:18:38 +00:00
mref - > ref_len = len ;
mref - > ref_may_write = WRITE ;
mref - > ref_rw = WRITE ;
status = GENERIC_INPUT_CALL ( input , mref_get , mref ) ;
if ( unlikely ( status < 0 ) ) {
MARS_ERR ( " cannot get mref, status = %d \n " , status ) ;
goto done ;
}
2011-04-08 09:52:46 +00:00
if ( unlikely ( ! mref - > ref_data ) ) {
status = - ENOMEM ;
MARS_ERR ( " cannot get mref, status = %d \n " , status ) ;
goto done ;
}
if ( unlikely ( mref - > ref_len < = 0 | | mref - > ref_len > len ) ) {
status = - EINVAL ;
MARS_ERR ( " bad ref len = %d (requested = %d) \n " , mref - > ref_len , len ) ;
goto done ;
}
2011-03-27 15:18:38 +00:00
mars_trace ( mref , " replay_start " ) ;
2011-04-08 09:52:46 +00:00
2011-04-29 09:36:10 +00:00
wait_replay ( brick , mref_a ) ;
2011-04-08 09:52:46 +00:00
mars_trace ( mref , " replay_io " ) ;
memcpy ( mref - > ref_data , buf , mref - > ref_len ) ;
2011-10-04 11:34:18 +00:00
SETUP_CALLBACK ( mref , replay_endio , mref_a ) ;
2011-10-03 17:31:02 +00:00
mref_a - > my_brick = brick ;
2012-02-25 20:36:52 +00:00
2011-03-27 15:18:38 +00:00
GENERIC_INPUT_CALL ( input , mref_io , mref ) ;
2011-04-08 09:52:46 +00:00
if ( unlikely ( mref - > ref_len < = 0 ) ) {
status = - EINVAL ;
MARS_ERR ( " bad ref len = %d (requested = %d) \n " , mref - > ref_len , len ) ;
goto done ;
}
pos + = mref - > ref_len ;
2011-03-27 15:18:38 +00:00
buf + = mref - > ref_len ;
len - = mref - > ref_len ;
GENERIC_INPUT_CALL ( input , mref_put , mref ) ;
}
# endif
status = 0 ;
done :
return status ;
}
2011-04-08 09:52:46 +00:00
static noinline
2011-11-03 11:17:59 +00:00
void trans_logger_replay ( struct trans_logger_brick * brick )
2011-02-23 20:48:06 +00:00
{
2011-11-03 11:17:59 +00:00
struct trans_logger_input * input = brick - > inputs [ brick - > log_input_nr ] ;
2012-12-17 07:25:17 +00:00
struct log_header lh = { } ;
2011-05-13 11:19:28 +00:00
loff_t start_pos ;
2014-06-03 08:11:00 +00:00
loff_t end_pos ;
2013-06-18 08:44:32 +00:00
loff_t finished_pos = - 1 ;
2014-03-26 09:52:12 +00:00
loff_t new_finished_pos = - 1 ;
2011-06-30 13:15:52 +00:00
long long old_jiffies = jiffies ;
2012-12-03 13:55:54 +00:00
int nr_flying ;
2012-02-06 10:41:15 +00:00
int backoff = 0 ;
2011-06-10 13:57:52 +00:00
int status = 0 ;
2011-04-08 09:52:46 +00:00
2018-04-09 14:15:00 +00:00
brick - > replay_code = TL_REPLAY_RUNNING ;
2014-04-03 17:37:20 +00:00
brick - > disk_io_error = 0 ;
2011-02-23 20:48:06 +00:00
2011-07-28 11:41:06 +00:00
start_pos = brick - > replay_start_pos ;
2014-06-03 08:11:00 +00:00
end_pos = brick - > replay_end_pos ;
2014-03-26 09:52:12 +00:00
brick - > replay_current_pos = start_pos ;
2012-12-17 07:25:17 +00:00
2014-06-03 08:11:00 +00:00
_init_input ( input , start_pos , end_pos ) ;
2013-01-04 21:04:28 +00:00
2012-12-17 07:25:17 +00:00
input - > inf . inf_min_pos = start_pos ;
2014-06-03 08:11:00 +00:00
input - > inf . inf_max_pos = end_pos ;
input - > inf . inf_log_pos = end_pos ;
2014-03-01 00:48:28 +00:00
input - > inf . inf_is_replaying = true ;
2012-12-17 07:25:17 +00:00
input - > inf . inf_is_logging = false ;
2017-06-01 07:26:58 +00:00
mars_limit_reset ( brick - > replay_limiter ) ;
2011-02-23 20:48:06 +00:00
2014-06-03 08:11:00 +00:00
MARS_INF ( " starting replay from %lld to %lld \n " , start_pos , end_pos ) ;
2011-07-28 11:41:06 +00:00
2011-04-08 09:52:46 +00:00
mars_power_led_on ( ( void * ) brick , true ) ;
for ( ; ; ) {
2011-03-27 15:18:38 +00:00
void * buf = NULL ;
int len = 0 ;
2012-11-13 16:01:37 +00:00
if ( brick_thread_should_stop ( ) | |
2012-08-03 08:42:51 +00:00
( ! brick - > continuous_replay_mode & & finished_pos > = brick - > replay_end_pos ) ) {
2011-07-15 10:12:06 +00:00
status = 0 ; // treat as EOF
2011-03-18 13:15:40 +00:00
break ;
}
2013-06-05 08:31:36 +00:00
status = log_read ( & input - > logst , false , & lh , & buf , & len ) ;
2013-07-01 08:17:38 +00:00
new_finished_pos = input - > logst . log_pos + input - > logst . offset ;
MARS_RPL ( " read %lld %lld \n " , finished_pos , new_finished_pos ) ;
2011-06-10 13:57:52 +00:00
if ( status = = - EAGAIN ) {
2013-07-01 08:17:38 +00:00
loff_t remaining = brick - > replay_end_pos - new_finished_pos ;
MARS_DBG ( " got -EAGAIN, remaining = %lld \n " , remaining ) ;
if ( brick - > replay_tolerance > 0 & & remaining < brick - > replay_tolerance ) {
MARS_WRN ( " logfile is truncated at position %lld (end_pos = %lld, remaining = %lld, tolerance = %d) \n " ,
new_finished_pos ,
brick - > replay_end_pos ,
remaining ,
brick - > replay_tolerance ) ;
finished_pos = new_finished_pos ;
brick - > replay_code = status ;
break ;
}
2012-09-17 10:11:25 +00:00
brick_msleep ( backoff ) ;
2013-07-03 06:27:42 +00:00
if ( backoff < trans_logger_replay_timeout * 1000 ) {
2012-02-06 10:41:15 +00:00
backoff + = 100 ;
} else {
2013-07-01 08:17:38 +00:00
MARS_WRN ( " logfile replay not possible at position %lld (end_pos = %lld, remaining = %lld), please check/repair your logfile in userspace by some tool! \n " ,
new_finished_pos ,
brick - > replay_end_pos ,
remaining ) ;
brick - > replay_code = status ;
break ;
2012-02-06 10:41:15 +00:00
}
2011-06-10 13:57:52 +00:00
continue ;
}
2011-04-08 09:52:46 +00:00
if ( unlikely ( status < 0 ) ) {
brick - > replay_code = status ;
2012-02-06 10:41:15 +00:00
MARS_WRN ( " cannot read logfile data, status = %d \n " , status ) ;
2011-03-27 15:18:38 +00:00
break ;
}
2011-07-28 11:41:06 +00:00
2011-07-15 10:12:06 +00:00
if ( ( ! status & & len < = 0 ) | |
2012-11-13 16:01:37 +00:00
new_finished_pos > brick - > replay_end_pos ) { // EOF -> wait until brick_thread_should_stop()
2011-07-15 10:12:06 +00:00
MARS_DBG ( " EOF at %lld (old = %lld, end_pos = %lld) \n " , new_finished_pos , finished_pos , brick - > replay_end_pos ) ;
2012-08-03 08:42:51 +00:00
if ( ! brick - > continuous_replay_mode ) {
2011-07-15 10:12:06 +00:00
// notice: finished_pos remains at old value here!
break ;
2011-06-10 13:57:52 +00:00
}
2012-09-17 10:11:25 +00:00
brick_msleep ( 1000 ) ;
2011-07-15 10:12:06 +00:00
continue ;
2011-03-29 14:40:40 +00:00
}
2011-03-27 15:18:38 +00:00
2011-04-29 09:36:10 +00:00
if ( lh . l_code ! = CODE_WRITE_NEW ) {
MARS_IO ( " ignoring pos = %lld len = %d code = %d \n " , lh . l_pos , lh . l_len , lh . l_code ) ;
2014-04-03 17:37:20 +00:00
} else if ( unlikely ( brick - > disk_io_error ) ) {
status = brick - > disk_io_error ;
brick - > replay_code = status ;
MARS_ERR ( " IO error %d \n " , status ) ;
break ;
2011-07-15 10:12:06 +00:00
} else if ( likely ( buf & & len ) ) {
2012-09-25 15:41:07 +00:00
if ( brick - > replay_limiter )
2013-07-15 10:14:36 +00:00
mars_limit_sleep ( brick - > replay_limiter , ( len - 1 ) / 1024 + 1 ) ;
2014-03-01 00:48:28 +00:00
status = replay_data ( brick , lh . l_pos , buf , len ) ;
MARS_RPL ( " replay %lld %lld (pos=%lld status=%d) \n " , finished_pos , new_finished_pos , lh . l_pos , status ) ;
2011-04-08 09:52:46 +00:00
if ( unlikely ( status < 0 ) ) {
brick - > replay_code = status ;
2014-03-01 00:48:28 +00:00
MARS_ERR ( " cannot replay data at pos = %lld len = %d, status = %d \n " , lh . l_pos , len , status ) ;
2011-04-08 09:52:46 +00:00
break ;
2011-07-15 10:12:06 +00:00
} else {
finished_pos = new_finished_pos ;
2011-04-08 09:52:46 +00:00
}
}
// do this _after_ any opportunities for errors...
2013-06-18 08:44:32 +00:00
if ( ( atomic_read ( & brick - > replay_count ) < = 0 | |
( ( long long ) jiffies ) - old_jiffies > = HZ * 3 ) & &
finished_pos > = 0 ) {
2013-06-18 09:17:34 +00:00
// for safety, wait until the IO queue has drained.
2019-02-11 18:45:47 +00:00
brick_wait ( brick - > worker_event ,
brick - > worker_flag ,
atomic_read ( & brick - > replay_count ) < = 0 ,
30 * HZ ) ;
2014-04-03 17:37:20 +00:00
if ( unlikely ( brick - > disk_io_error ) ) {
status = brick - > disk_io_error ;
brick - > replay_code = status ;
MARS_ERR ( " IO error %d \n " , status ) ;
break ;
}
2013-06-18 09:17:34 +00:00
2012-12-30 22:44:48 +00:00
down ( & input - > inf_mutex ) ;
2012-12-17 07:25:17 +00:00
input - > inf . inf_min_pos = finished_pos ;
get_lamport ( & input - > inf . inf_min_pos_stamp ) ;
2011-06-30 13:15:52 +00:00
old_jiffies = jiffies ;
2012-12-17 07:25:17 +00:00
_inf_callback ( input , false ) ;
2012-12-30 22:44:48 +00:00
up ( & input - > inf_mutex ) ;
2011-03-18 13:15:40 +00:00
}
2011-11-03 11:17:59 +00:00
_exit_inputs ( brick , false ) ;
2011-03-18 13:15:40 +00:00
}
2011-03-27 15:18:38 +00:00
2011-06-30 13:15:52 +00:00
MARS_INF ( " waiting for finish... \n " ) ;
2019-02-11 18:45:47 +00:00
brick_wait ( brick - > worker_event ,
brick - > worker_flag ,
atomic_read ( & brick - > replay_count ) < = 0 ,
60 * HZ ) ;
2011-03-27 15:18:38 +00:00
2011-07-15 10:12:06 +00:00
if ( unlikely ( finished_pos > brick - > replay_end_pos ) ) {
MARS_ERR ( " finished_pos too large: %lld + %d = %lld > %lld \n " , input - > logst . log_pos , input - > logst . offset , finished_pos , brick - > replay_end_pos ) ;
}
2013-06-18 08:44:32 +00:00
2014-04-03 17:37:20 +00:00
if ( finished_pos > = 0 & & ! brick - > disk_io_error ) {
2012-12-17 07:25:17 +00:00
input - > inf . inf_min_pos = finished_pos ;
2013-07-08 05:23:03 +00:00
brick - > replay_current_pos = finished_pos ;
2011-06-10 13:57:52 +00:00
}
2011-03-27 15:18:38 +00:00
2013-06-18 08:44:32 +00:00
get_lamport ( & input - > inf . inf_min_pos_stamp ) ;
2011-06-10 13:57:52 +00:00
if ( status > = 0 & & finished_pos = = brick - > replay_end_pos ) {
2011-05-13 11:19:28 +00:00
MARS_INF ( " replay finished at %lld \n " , finished_pos ) ;
2018-04-09 14:15:00 +00:00
brick - > replay_code = TL_REPLAY_FINISHED ;
2013-07-08 05:23:03 +00:00
} else if ( status = = - EAGAIN & & finished_pos + brick - > replay_tolerance > brick - > replay_end_pos ) {
2014-03-13 11:27:53 +00:00
MARS_INF ( " TOLERANCE: logfile is incomplete at %lld (of %lld) \n " , finished_pos , brick - > replay_end_pos ) ;
2018-04-09 14:15:00 +00:00
brick - > replay_code = TL_REPLAY_INCOMPLETE ;
2014-03-13 11:27:53 +00:00
} else if ( status < 0 ) {
2014-03-26 09:52:12 +00:00
if ( finished_pos < 0 )
finished_pos = new_finished_pos ;
if ( finished_pos + brick - > replay_tolerance > brick - > replay_end_pos ) {
MARS_INF ( " TOLERANCE: logfile is incomplete at %lld (of %lld), status = %d \n " , finished_pos , brick - > replay_end_pos , status ) ;
} else {
MARS_ERR ( " replay error %d at %lld (of %lld) \n " , status , finished_pos , brick - > replay_end_pos ) ;
}
2013-07-08 05:23:03 +00:00
brick - > replay_code = status ;
2011-03-27 15:18:38 +00:00
} else {
2011-05-13 11:19:28 +00:00
MARS_INF ( " replay stopped prematurely at %lld (of %lld) \n " , finished_pos , brick - > replay_end_pos ) ;
2018-04-09 14:15:00 +00:00
brick - > replay_code = TL_REPLAY_INCOMPLETE ;
2011-04-08 09:52:46 +00:00
}
2012-12-17 07:25:17 +00:00
for ( ; ; ) {
2012-12-03 13:55:54 +00:00
_exit_inputs ( brick , true ) ;
2012-12-17 07:25:17 +00:00
nr_flying = _nr_flying_inputs ( brick ) ;
if ( nr_flying < = 0 )
break ;
MARS_INF ( " %d inputs are operating \n " , nr_flying ) ;
2012-12-03 13:55:54 +00:00
brick_msleep ( 1000 ) ;
}
2011-11-03 11:17:59 +00:00
2011-07-20 13:11:44 +00:00
mars_trigger ( ) ;
2012-11-13 16:01:37 +00:00
while ( ! brick_thread_should_stop ( ) ) {
2012-09-17 10:11:25 +00:00
brick_msleep ( 500 ) ;
2011-02-23 20:48:06 +00:00
}
2017-06-01 07:26:58 +00:00
mars_limit_reset ( brick - > replay_limiter ) ;
2011-02-23 20:48:06 +00:00
}
2011-03-27 15:18:38 +00:00
///////////////////////// logger thread / switching /////////////////////////
2011-04-08 09:52:46 +00:00
static noinline
2011-02-23 20:48:06 +00:00
int trans_logger_thread ( void * data )
{
struct trans_logger_output * output = data ;
struct trans_logger_brick * brick = output - > brick ;
MARS_INF ( " ........... logger has started. \n " ) ;
2012-08-03 08:42:51 +00:00
if ( brick - > replay_mode ) {
2011-11-03 11:17:59 +00:00
trans_logger_replay ( brick ) ;
2011-02-23 20:48:06 +00:00
} else {
2011-11-03 11:17:59 +00:00
trans_logger_log ( brick ) ;
2011-02-23 20:48:06 +00:00
}
MARS_INF ( " ........... logger has stopped. \n " ) ;
2011-03-29 14:40:40 +00:00
mars_power_led_on ( ( void * ) brick , false ) ;
2011-02-23 20:48:06 +00:00
mars_power_led_off ( ( void * ) brick , true ) ;
return 0 ;
}
2011-04-08 09:52:46 +00:00
static noinline
2011-02-23 20:48:06 +00:00
int trans_logger_switch ( struct trans_logger_brick * brick )
{
static int index = 0 ;
struct trans_logger_output * output = brick - > outputs [ 0 ] ;
if ( brick - > power . button ) {
2011-04-29 09:36:10 +00:00
if ( ! brick - > thread & & brick - > power . led_off ) {
2011-03-18 13:15:40 +00:00
mars_power_led_off ( ( void * ) brick , false ) ;
2012-11-13 16:01:37 +00:00
brick - > thread = brick_thread_create ( trans_logger_thread , output , " mars_logger%d " , index + + ) ;
if ( unlikely ( ! brick - > thread ) ) {
MARS_ERR ( " cannot create logger thread \n " ) ;
return - ENOENT ;
2011-02-23 20:48:06 +00:00
}
}
} else {
mars_power_led_on ( ( void * ) brick , false ) ;
2011-04-29 09:36:10 +00:00
if ( brick - > thread ) {
2011-08-25 10:16:32 +00:00
MARS_INF ( " stopping thread... \n " ) ;
2012-11-13 16:01:37 +00:00
brick_thread_stop ( brick - > thread ) ;
2013-09-25 15:07:19 +00:00
brick - > thread = NULL ;
2011-02-23 20:48:06 +00:00
}
}
2010-08-20 10:58:24 +00:00
return 0 ;
}
2011-04-12 15:31:08 +00:00
2011-04-01 11:18:32 +00:00
//////////////// informational / statistics ///////////////
2011-04-08 09:52:46 +00:00
static noinline
2011-04-01 11:18:32 +00:00
char * trans_logger_statistics ( struct trans_logger_brick * brick , int verbose )
{
2011-11-03 11:17:59 +00:00
char * res = brick_string_alloc ( 1024 ) ;
2011-04-01 11:18:32 +00:00
if ( ! res )
return NULL ;
2012-02-12 11:19:57 +00:00
snprintf ( res , 1023 ,
" mode replay=%d "
" continuous=%d "
" replay_code=%d "
2014-04-03 17:37:20 +00:00
" disk_io_error=%d "
2012-02-12 11:19:57 +00:00
" log_reads=%d | "
2014-03-10 15:22:44 +00:00
" cease_logging=%d "
" stopped_logging=%d "
" congested=%d | "
2012-02-12 11:19:57 +00:00
" replay_start_pos = %lld "
" replay_end_pos = %lld | "
" new_input_nr = %d "
" log_input_nr = %d "
" (old = %d) "
2012-12-17 07:25:17 +00:00
" inf_min_pos1 = %lld "
" inf_max_pos1 = %lld "
" inf_min_pos2 = %lld "
" inf_max_pos2 = %lld | "
2012-02-12 15:36:34 +00:00
" total hash_insert=%d "
" hash_find=%d "
" hash_extend=%d "
" replay=%d "
2012-02-25 20:36:52 +00:00
" replay_conflict=%d (%d%%) "
2012-02-12 11:19:57 +00:00
" callbacks=%d "
" reads=%d "
" writes=%d "
" flushes=%d (%d%%) "
" wb_clusters=%d "
" writebacks=%d (%d%%) "
" shortcut=%d (%d%%) "
" mshadow=%d "
" sshadow=%d "
2012-02-12 15:36:34 +00:00
" mshadow_buffered=%d sshadow_buffered=%d "
2012-02-12 11:19:57 +00:00
" rounds=%d "
" restarts=%d "
2017-06-05 21:02:42 +00:00
" delays=%d | "
2012-02-12 11:19:57 +00:00
" current #mrefs = %d "
" shadow_mem_used=%ld/%lld "
2012-02-25 20:36:52 +00:00
" replay_count=%d "
2012-02-12 11:19:57 +00:00
" mshadow=%d/%d "
" sshadow=%d "
" hash_count=%d "
" balance=%d/%d/%d/%d "
2013-01-02 21:43:50 +00:00
" pos_count1=%d "
" pos_count2=%d "
2012-12-31 09:33:47 +00:00
" log_refs1=%d "
" log_refs2=%d "
2013-01-02 17:37:43 +00:00
" any_fly=%d "
" log_fly=%d "
2012-12-11 12:48:28 +00:00
" mref_flying1=%d "
" mref_flying2=%d "
2017-06-06 05:23:31 +00:00
" phase0=%d-%d <%d/%d> "
" phase1=%d-%d <%d/%d> "
" phase2=%d-%d <%d/%d> "
" phase3=%d-%d <%d/%d> \n " ,
2012-08-03 08:42:51 +00:00
brick - > replay_mode ,
brick - > continuous_replay_mode ,
2012-02-12 11:19:57 +00:00
brick - > replay_code ,
2014-04-03 17:37:20 +00:00
brick - > disk_io_error ,
2012-02-12 11:19:57 +00:00
brick - > log_reads ,
2014-03-10 15:22:44 +00:00
brick - > cease_logging ,
brick - > stopped_logging ,
_congested ( brick ) ,
2012-02-12 11:19:57 +00:00
brick - > replay_start_pos ,
brick - > replay_end_pos ,
brick - > new_input_nr ,
brick - > log_input_nr ,
brick - > old_input_nr ,
2012-12-17 07:25:17 +00:00
brick - > inputs [ TL_INPUT_LOG1 ] - > inf . inf_min_pos ,
brick - > inputs [ TL_INPUT_LOG1 ] - > inf . inf_max_pos ,
brick - > inputs [ TL_INPUT_LOG2 ] - > inf . inf_min_pos ,
brick - > inputs [ TL_INPUT_LOG2 ] - > inf . inf_max_pos ,
2012-02-12 15:36:34 +00:00
atomic_read ( & brick - > total_hash_insert_count ) ,
atomic_read ( & brick - > total_hash_find_count ) ,
atomic_read ( & brick - > total_hash_extend_count ) ,
2012-02-12 11:19:57 +00:00
atomic_read ( & brick - > total_replay_count ) ,
2012-02-25 20:36:52 +00:00
atomic_read ( & brick - > total_replay_conflict_count ) ,
atomic_read ( & brick - > total_replay_count ) ? atomic_read ( & brick - > total_replay_conflict_count ) * 100 / atomic_read ( & brick - > total_replay_count ) : 0 ,
2012-02-12 11:19:57 +00:00
atomic_read ( & brick - > total_cb_count ) ,
atomic_read ( & brick - > total_read_count ) ,
atomic_read ( & brick - > total_write_count ) ,
atomic_read ( & brick - > total_flush_count ) ,
atomic_read ( & brick - > total_write_count ) ? atomic_read ( & brick - > total_flush_count ) * 100 / atomic_read ( & brick - > total_write_count ) : 0 ,
atomic_read ( & brick - > total_writeback_cluster_count ) ,
atomic_read ( & brick - > total_writeback_count ) ,
atomic_read ( & brick - > total_writeback_cluster_count ) ? atomic_read ( & brick - > total_writeback_count ) * 100 / atomic_read ( & brick - > total_writeback_cluster_count ) : 0 ,
atomic_read ( & brick - > total_shortcut_count ) ,
atomic_read ( & brick - > total_writeback_count ) ? atomic_read ( & brick - > total_shortcut_count ) * 100 / atomic_read ( & brick - > total_writeback_count ) : 0 ,
atomic_read ( & brick - > total_mshadow_count ) ,
atomic_read ( & brick - > total_sshadow_count ) ,
2012-02-12 15:36:34 +00:00
atomic_read ( & brick - > total_mshadow_buffered_count ) ,
atomic_read ( & brick - > total_sshadow_buffered_count ) ,
2012-02-12 11:19:57 +00:00
atomic_read ( & brick - > total_round_count ) ,
atomic_read ( & brick - > total_restart_count ) ,
atomic_read ( & brick - > total_delay_count ) ,
2012-02-02 15:25:43 +00:00
atomic_read ( & brick - > mref_object_layout . alloc_count ) ,
2012-08-08 09:16:52 +00:00
atomic64_read ( & brick - > shadow_mem_used ) / 1024 ,
2012-02-12 15:36:34 +00:00
brick_global_memlimit ,
atomic_read ( & brick - > replay_count ) ,
2012-02-12 11:19:57 +00:00
atomic_read ( & brick - > mshadow_count ) ,
brick - > shadow_mem_limit ,
atomic_read ( & brick - > sshadow_count ) ,
atomic_read ( & brick - > hash_count ) ,
atomic_read ( & brick - > sub_balance_count ) ,
atomic_read ( & brick - > inner_balance_count ) ,
atomic_read ( & brick - > outer_balance_count ) ,
atomic_read ( & brick - > wb_balance_count ) ,
2013-01-02 21:43:50 +00:00
atomic_read ( & brick - > inputs [ TL_INPUT_LOG1 ] - > pos_count ) ,
atomic_read ( & brick - > inputs [ TL_INPUT_LOG2 ] - > pos_count ) ,
2012-12-31 09:33:47 +00:00
atomic_read ( & brick - > inputs [ TL_INPUT_LOG1 ] - > log_ref_count ) ,
atomic_read ( & brick - > inputs [ TL_INPUT_LOG2 ] - > log_ref_count ) ,
2013-01-02 17:37:43 +00:00
atomic_read ( & brick - > any_fly_count ) ,
atomic_read ( & brick - > log_fly_count ) ,
2012-12-11 12:48:28 +00:00
atomic_read ( & brick - > inputs [ TL_INPUT_LOG1 ] - > logst . mref_flying ) ,
atomic_read ( & brick - > inputs [ TL_INPUT_LOG2 ] - > logst . mref_flying ) ,
2017-06-06 05:23:31 +00:00
brick - > q_phase [ 0 ] . q_active ,
2017-06-05 21:10:03 +00:00
brick - > q_phase [ 0 ] . q_queued ,
2012-12-29 22:26:17 +00:00
brick - > q_phase [ 0 ] . pushback_count ,
brick - > q_phase [ 0 ] . no_progress_count ,
2017-06-06 05:23:31 +00:00
brick - > q_phase [ 1 ] . q_active ,
2017-06-05 21:10:03 +00:00
brick - > q_phase [ 1 ] . q_queued ,
2012-12-29 22:26:17 +00:00
brick - > q_phase [ 1 ] . pushback_count ,
brick - > q_phase [ 1 ] . no_progress_count ,
2017-06-06 05:23:31 +00:00
brick - > q_phase [ 2 ] . q_active ,
2017-06-05 21:10:03 +00:00
brick - > q_phase [ 2 ] . q_queued ,
2012-12-29 22:26:17 +00:00
brick - > q_phase [ 2 ] . pushback_count ,
brick - > q_phase [ 2 ] . no_progress_count ,
2017-06-06 05:23:31 +00:00
brick - > q_phase [ 3 ] . q_active ,
2017-06-05 21:10:03 +00:00
brick - > q_phase [ 3 ] . q_queued ,
2012-12-29 22:26:17 +00:00
brick - > q_phase [ 3 ] . pushback_count ,
brick - > q_phase [ 3 ] . no_progress_count ) ;
2011-04-01 11:18:32 +00:00
return res ;
}
2011-04-08 09:52:46 +00:00
static noinline
2011-04-01 11:18:32 +00:00
void trans_logger_reset_statistics ( struct trans_logger_brick * brick )
{
2012-02-12 15:36:34 +00:00
atomic_set ( & brick - > total_hash_insert_count , 0 ) ;
atomic_set ( & brick - > total_hash_find_count , 0 ) ;
atomic_set ( & brick - > total_hash_extend_count , 0 ) ;
2011-06-10 13:57:52 +00:00
atomic_set ( & brick - > total_replay_count , 0 ) ;
2012-02-25 20:36:52 +00:00
atomic_set ( & brick - > total_replay_conflict_count , 0 ) ;
2011-04-29 09:36:10 +00:00
atomic_set ( & brick - > total_cb_count , 0 ) ;
atomic_set ( & brick - > total_read_count , 0 ) ;
atomic_set ( & brick - > total_write_count , 0 ) ;
2011-05-13 11:19:28 +00:00
atomic_set ( & brick - > total_flush_count , 0 ) ;
2011-04-29 09:36:10 +00:00
atomic_set ( & brick - > total_writeback_count , 0 ) ;
2011-05-13 11:19:28 +00:00
atomic_set ( & brick - > total_writeback_cluster_count , 0 ) ;
2011-04-29 09:36:10 +00:00
atomic_set ( & brick - > total_shortcut_count , 0 ) ;
atomic_set ( & brick - > total_mshadow_count , 0 ) ;
atomic_set ( & brick - > total_sshadow_count , 0 ) ;
2012-02-12 15:36:34 +00:00
atomic_set ( & brick - > total_mshadow_buffered_count , 0 ) ;
atomic_set ( & brick - > total_sshadow_buffered_count , 0 ) ;
2011-05-13 11:19:28 +00:00
atomic_set ( & brick - > total_round_count , 0 ) ;
atomic_set ( & brick - > total_restart_count , 0 ) ;
2011-12-09 12:54:30 +00:00
atomic_set ( & brick - > total_delay_count , 0 ) ;
2011-04-01 11:18:32 +00:00
}
2010-08-20 10:58:24 +00:00
2010-08-08 20:51:20 +00:00
//////////////// object / aspect constructors / destructors ///////////////
2011-04-08 09:52:46 +00:00
static noinline
2011-10-03 17:31:02 +00:00
int trans_logger_mref_aspect_init_fn ( struct generic_aspect * _ini )
2010-08-08 20:51:20 +00:00
{
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * ini = ( void * ) _ini ;
2011-04-18 14:14:16 +00:00
ini - > lh . lh_pos = & ini - > object - > ref_pos ;
INIT_LIST_HEAD ( & ini - > lh . lh_head ) ;
2010-08-08 20:51:20 +00:00
INIT_LIST_HEAD ( & ini - > hash_head ) ;
2011-03-20 17:38:08 +00:00
INIT_LIST_HEAD ( & ini - > pos_head ) ;
2011-04-08 09:52:46 +00:00
INIT_LIST_HEAD ( & ini - > replay_head ) ;
INIT_LIST_HEAD ( & ini - > collect_head ) ;
INIT_LIST_HEAD ( & ini - > sub_list ) ;
INIT_LIST_HEAD ( & ini - > sub_head ) ;
2010-08-08 20:51:20 +00:00
return 0 ;
}
2011-04-08 09:52:46 +00:00
static noinline
2011-10-03 17:31:02 +00:00
void trans_logger_mref_aspect_exit_fn ( struct generic_aspect * _ini )
2010-08-08 20:51:20 +00:00
{
2010-12-15 12:13:18 +00:00
struct trans_logger_mref_aspect * ini = ( void * ) _ini ;
2011-04-18 14:14:16 +00:00
CHECK_HEAD_EMPTY ( & ini - > lh . lh_head ) ;
2010-08-08 20:51:20 +00:00
CHECK_HEAD_EMPTY ( & ini - > hash_head ) ;
2011-04-08 09:52:46 +00:00
CHECK_HEAD_EMPTY ( & ini - > pos_head ) ;
CHECK_HEAD_EMPTY ( & ini - > replay_head ) ;
CHECK_HEAD_EMPTY ( & ini - > collect_head ) ;
CHECK_HEAD_EMPTY ( & ini - > sub_list ) ;
CHECK_HEAD_EMPTY ( & ini - > sub_head ) ;
2012-12-31 09:33:47 +00:00
if ( ini - > log_input ) {
atomic_dec ( & ini - > log_input - > log_ref_count ) ;
}
2010-08-08 20:51:20 +00:00
}
MARS_MAKE_STATICS ( trans_logger ) ;
////////////////////// brick constructors / destructors ////////////////////
2013-04-12 09:55:52 +00:00
static
void _free_pages ( struct trans_logger_brick * brick )
{
int i ;
for ( i = 0 ; i < NR_HASH_PAGES ; i + + ) {
struct trans_logger_hash_anchor * sub_table = brick - > hash_table [ i ] ;
int j ;
if ( ! sub_table ) {
continue ;
}
for ( j = 0 ; j < HASH_PER_PAGE ; j + + ) {
struct trans_logger_hash_anchor * start = & sub_table [ j ] ;
CHECK_HEAD_EMPTY ( & start - > hash_anchor ) ;
}
brick_block_free ( sub_table , PAGE_SIZE ) ;
}
brick_block_free ( brick - > hash_table , PAGE_SIZE ) ;
}
2011-04-08 09:52:46 +00:00
static noinline
int trans_logger_brick_construct ( struct trans_logger_brick * brick )
2010-08-08 20:51:20 +00:00
{
2011-04-29 09:36:10 +00:00
int i ;
2013-04-12 09:55:52 +00:00
brick - > hash_table = brick_block_alloc ( 0 , PAGE_SIZE ) ;
if ( unlikely ( ! brick - > hash_table ) ) {
MARS_ERR ( " cannot allocate hash directory table. \n " ) ;
return - ENOMEM ;
2011-04-29 09:36:10 +00:00
}
2013-04-12 09:55:52 +00:00
memset ( brick - > hash_table , 0 , PAGE_SIZE ) ;
for ( i = 0 ; i < NR_HASH_PAGES ; i + + ) {
struct trans_logger_hash_anchor * sub_table ;
int j ;
// this should be usually optimized away as dead code
if ( unlikely ( i > = MAX_HASH_PAGES ) ) {
MARS_ERR ( " sorry, subtable index %d is too large. \n " , i ) ;
_free_pages ( brick ) ;
return - EINVAL ;
}
sub_table = brick_block_alloc ( 0 , PAGE_SIZE ) ;
brick - > hash_table [ i ] = sub_table ;
if ( unlikely ( ! sub_table ) ) {
MARS_ERR ( " cannot allocate hash subtable %d. \n " , i ) ;
_free_pages ( brick ) ;
return - ENOMEM ;
}
memset ( sub_table , 0 , PAGE_SIZE ) ;
for ( j = 0 ; j < HASH_PER_PAGE ; j + + ) {
struct trans_logger_hash_anchor * start = & sub_table [ j ] ;
init_rwsem ( & start - > hash_mutex ) ;
INIT_LIST_HEAD ( & start - > hash_anchor ) ;
}
}
2011-04-29 09:36:10 +00:00
atomic_set ( & brick - > hash_count , 0 ) ;
2017-12-10 21:43:28 +00:00
init_rwsem ( & brick - > replay_mutex ) ;
2011-04-08 09:52:46 +00:00
INIT_LIST_HEAD ( & brick - > replay_list ) ;
2012-10-15 14:35:36 +00:00
INIT_LIST_HEAD ( & brick - > group_head ) ;
2011-06-30 13:15:52 +00:00
init_waitqueue_head ( & brick - > worker_event ) ;
init_waitqueue_head ( & brick - > caller_event ) ;
2012-02-11 19:01:16 +00:00
qq_init ( & brick - > q_phase [ 0 ] , brick ) ;
qq_init ( & brick - > q_phase [ 1 ] , brick ) ;
qq_init ( & brick - > q_phase [ 2 ] , brick ) ;
qq_init ( & brick - > q_phase [ 3 ] , brick ) ;
brick - > q_phase [ 0 ] . q_insert_info = " q0_ins " ;
brick - > q_phase [ 0 ] . q_pushback_info = " q0_push " ;
brick - > q_phase [ 0 ] . q_fetch_info = " q0_fetch " ;
brick - > q_phase [ 1 ] . q_insert_info = " q1_ins " ;
brick - > q_phase [ 1 ] . q_pushback_info = " q1_push " ;
brick - > q_phase [ 1 ] . q_fetch_info = " q1_fetch " ;
brick - > q_phase [ 2 ] . q_insert_info = " q2_ins " ;
brick - > q_phase [ 2 ] . q_pushback_info = " q2_push " ;
brick - > q_phase [ 2 ] . q_fetch_info = " q2_fetch " ;
brick - > q_phase [ 3 ] . q_insert_info = " q3_ins " ;
brick - > q_phase [ 3 ] . q_pushback_info = " q3_push " ;
brick - > q_phase [ 3 ] . q_fetch_info = " q3_fetch " ;
2011-11-03 11:17:59 +00:00
brick - > new_input_nr = TL_INPUT_LOG1 ;
brick - > log_input_nr = TL_INPUT_LOG1 ;
brick - > old_input_nr = TL_INPUT_LOG1 ;
2012-10-15 14:35:36 +00:00
add_to_group ( & global_writeback , brick ) ;
return 0 ;
}
static noinline
int trans_logger_brick_destruct ( struct trans_logger_brick * brick )
{
2013-04-12 09:55:52 +00:00
_free_pages ( brick ) ;
2012-10-15 14:35:36 +00:00
CHECK_HEAD_EMPTY ( & brick - > replay_list ) ;
remove_from_group ( & global_writeback , brick ) ;
2010-08-08 20:51:20 +00:00
return 0 ;
}
2011-04-08 09:52:46 +00:00
static noinline
int trans_logger_output_construct ( struct trans_logger_output * output )
2010-08-08 20:51:20 +00:00
{
2010-08-11 16:02:08 +00:00
return 0 ;
}
2011-04-08 09:52:46 +00:00
static noinline
int trans_logger_input_construct ( struct trans_logger_input * input )
2010-08-11 16:02:08 +00:00
{
2011-11-03 11:17:59 +00:00
INIT_LIST_HEAD ( & input - > pos_list ) ;
2012-12-30 22:44:48 +00:00
sema_init ( & input - > inf_mutex , 1 ) ;
2011-11-03 11:17:59 +00:00
return 0 ;
}
static noinline
int trans_logger_input_destruct ( struct trans_logger_input * input )
{
CHECK_HEAD_EMPTY ( & input - > pos_list ) ;
2010-08-08 20:51:20 +00:00
return 0 ;
}
///////////////////////// static structs ////////////////////////
static struct trans_logger_brick_ops trans_logger_brick_ops = {
2011-02-23 20:48:06 +00:00
. brick_switch = trans_logger_switch ,
2011-04-01 11:18:32 +00:00
. brick_statistics = trans_logger_statistics ,
. reset_statistics = trans_logger_reset_statistics ,
2010-08-08 20:51:20 +00:00
} ;
static struct trans_logger_output_ops trans_logger_output_ops = {
. mars_get_info = trans_logger_get_info ,
2010-12-15 12:13:18 +00:00
. mref_get = trans_logger_ref_get ,
. mref_put = trans_logger_ref_put ,
. mref_io = trans_logger_ref_io ,
2010-08-08 20:51:20 +00:00
} ;
2010-08-10 17:39:30 +00:00
const struct trans_logger_input_type trans_logger_input_type = {
2010-08-08 20:51:20 +00:00
. type_name = " trans_logger_input " ,
. input_size = sizeof ( struct trans_logger_input ) ,
2010-08-11 16:02:08 +00:00
. input_construct = & trans_logger_input_construct ,
2011-11-03 11:17:59 +00:00
. input_destruct = & trans_logger_input_destruct ,
2010-08-08 20:51:20 +00:00
} ;
static const struct trans_logger_input_type * trans_logger_input_types [ ] = {
& trans_logger_input_type ,
2010-08-09 16:57:56 +00:00
& trans_logger_input_type ,
& trans_logger_input_type ,
2011-05-13 11:19:28 +00:00
& trans_logger_input_type ,
& trans_logger_input_type ,
& trans_logger_input_type ,
2010-08-08 20:51:20 +00:00
} ;
2010-08-10 17:39:30 +00:00
const struct trans_logger_output_type trans_logger_output_type = {
2010-08-08 20:51:20 +00:00
. type_name = " trans_logger_output " ,
. output_size = sizeof ( struct trans_logger_output ) ,
. master_ops = & trans_logger_output_ops ,
. output_construct = & trans_logger_output_construct ,
} ;
static const struct trans_logger_output_type * trans_logger_output_types [ ] = {
& trans_logger_output_type ,
} ;
const struct trans_logger_brick_type trans_logger_brick_type = {
. type_name = " trans_logger_brick " ,
. brick_size = sizeof ( struct trans_logger_brick ) ,
2011-05-13 11:19:28 +00:00
. max_inputs = TL_INPUT_NR ,
2010-08-08 20:51:20 +00:00
. max_outputs = 1 ,
. master_ops = & trans_logger_brick_ops ,
2011-10-03 17:31:02 +00:00
. aspect_types = trans_logger_aspect_types ,
2010-08-08 20:51:20 +00:00
. default_input_types = trans_logger_input_types ,
. default_output_types = trans_logger_output_types ,
. brick_construct = & trans_logger_brick_construct ,
2012-10-15 14:35:36 +00:00
. brick_destruct = & trans_logger_brick_destruct ,
2010-08-08 20:51:20 +00:00
} ;
EXPORT_SYMBOL_GPL ( trans_logger_brick_type ) ;
////////////////// module init stuff /////////////////////////
2011-08-25 10:16:32 +00:00
int __init init_mars_trans_logger ( void )
2010-08-08 20:51:20 +00:00
{
2011-02-23 20:48:06 +00:00
MARS_INF ( " init_trans_logger() \n " ) ;
2010-08-08 20:51:20 +00:00
return trans_logger_register_brick_type ( ) ;
}
2014-04-23 11:16:26 +00:00
void exit_mars_trans_logger ( void )
2010-08-08 20:51:20 +00:00
{
2011-02-23 20:48:06 +00:00
MARS_INF ( " exit_trans_logger() \n " ) ;
2010-08-08 20:51:20 +00:00
trans_logger_unregister_brick_type ( ) ;
}