mirror of https://github.com/schoebel/mars
310 lines
8.5 KiB
C
310 lines
8.5 KiB
C
/*
|
|
* MARS Long Distance Replication Software
|
|
*
|
|
* This file is part of MARS project: http://schoebel.github.io/mars/
|
|
*
|
|
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
|
|
* Copyright (C) 2011-2014 1&1 Internet AG
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
|
|
/* Definitions for logfile format.
|
|
*
|
|
* This is meant for sharing between different transaction logger variants,
|
|
* and/or for sharing with userspace tools (e.g. logfile analyzers).
|
|
* TODO: factor out some remaining kernelspace issues.
|
|
*/
|
|
|
|
#ifndef LIB_LOG_H
|
|
#define LIB_LOG_H
|
|
|
|
#ifdef __KERNEL__
|
|
#include "mars.h"
|
|
|
|
extern atomic_t global_mref_flying;
|
|
#endif
|
|
|
|
/* The following structure is memory-only.
|
|
* Transfers to disk are indirectly via the
|
|
* format conversion functions below.
|
|
* The advantage is that even newer disk formats can be parsed
|
|
* by old code (of course, not all information / features will be
|
|
* available then).
|
|
*/
|
|
#define log_header log_header_v1
|
|
|
|
struct log_header_v1 {
|
|
struct lamport_time l_stamp;
|
|
struct lamport_time l_written;
|
|
loff_t l_pos;
|
|
short l_len;
|
|
short l_code;
|
|
unsigned int l_seq_nr;
|
|
int l_crc;
|
|
};
|
|
|
|
#define FORMAT_VERSION 1 // version of disk format, currently there is no other one
|
|
|
|
#define CODE_UNKNOWN 0
|
|
#define CODE_WRITE_NEW 1
|
|
#define CODE_WRITE_OLD 2
|
|
|
|
#define START_MAGIC 0xa8f7e908d9177957ll
|
|
#define END_MAGIC 0x74941fb74ab5726dll
|
|
|
|
#define START_OVERHEAD \
|
|
( \
|
|
sizeof(START_MAGIC) + \
|
|
sizeof(char) + \
|
|
sizeof(char) + \
|
|
sizeof(short) + \
|
|
sizeof(struct lamport_time) + \
|
|
sizeof(loff_t) + \
|
|
sizeof(int) + \
|
|
sizeof(int) + \
|
|
sizeof(short) + \
|
|
sizeof(short) + \
|
|
0 \
|
|
)
|
|
|
|
#define END_OVERHEAD \
|
|
( \
|
|
sizeof(END_MAGIC) + \
|
|
sizeof(int) + \
|
|
sizeof(char) + \
|
|
3 + 4 /*spare*/ + \
|
|
sizeof(struct lamport_time) + \
|
|
0 \
|
|
)
|
|
|
|
#define OVERHEAD (START_OVERHEAD + END_OVERHEAD)
|
|
|
|
// TODO: make this bytesex-aware.
|
|
#define DATA_PUT(data,offset,val) \
|
|
do { \
|
|
*((typeof(val)*)((data)+offset)) = val; \
|
|
offset += sizeof(val); \
|
|
} while (0)
|
|
|
|
#define DATA_GET(data,offset,val) \
|
|
do { \
|
|
val = *((typeof(val)*)((data)+offset)); \
|
|
offset += sizeof(val); \
|
|
} while (0)
|
|
|
|
#define SCAN_TXT "at file_pos = %lld file_offset = %d scan_offset = %d (%lld) test_offset = %d (%lld) restlen = %d: "
|
|
#define SCAN_PAR file_pos, file_offset, offset, file_pos + file_offset + offset, i, file_pos + file_offset + i, restlen
|
|
|
|
static inline
|
|
int log_scan(void *buf, int len, loff_t file_pos, int file_offset, bool sloppy, struct log_header *lh, void **payload, int *payload_len, unsigned int *seq_nr)
|
|
{
|
|
bool dirty = false;
|
|
int offset;
|
|
int i;
|
|
|
|
*payload = NULL;
|
|
*payload_len = 0;
|
|
|
|
for (i = 0; i < len && i <= len - OVERHEAD; i += sizeof(long)) {
|
|
long long start_magic;
|
|
char format_version;
|
|
char valid_flag;
|
|
short total_len;
|
|
long long end_magic;
|
|
char valid_copy;
|
|
|
|
int restlen = 0;
|
|
int found_offset;
|
|
|
|
offset = i;
|
|
if (unlikely(i > 0 && !sloppy)) {
|
|
MARS_ERR(SCAN_TXT "detected a hole / bad data\n", SCAN_PAR);
|
|
return -EBADMSG;
|
|
}
|
|
|
|
DATA_GET(buf, offset, start_magic);
|
|
if (unlikely(start_magic != START_MAGIC)) {
|
|
if (start_magic != 0)
|
|
dirty = true;
|
|
continue;
|
|
}
|
|
|
|
restlen = len - i;
|
|
if (unlikely(restlen < START_OVERHEAD)) {
|
|
MARS_WRN(SCAN_TXT "magic found, but restlen is too small\n", SCAN_PAR);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
DATA_GET(buf, offset, format_version);
|
|
if (unlikely(format_version != FORMAT_VERSION)) {
|
|
MARS_ERR(SCAN_TXT "found unknown data format %d\n", SCAN_PAR, (int)format_version);
|
|
return -EBADMSG;
|
|
}
|
|
DATA_GET(buf, offset, valid_flag);
|
|
if (unlikely(!valid_flag)) {
|
|
MARS_WRN(SCAN_TXT "data is explicitly marked invalid (was there a short write?)\n", SCAN_PAR);
|
|
continue;
|
|
}
|
|
DATA_GET(buf, offset, total_len);
|
|
if (unlikely(total_len > restlen)) {
|
|
MARS_WRN(SCAN_TXT "total_len = %d but available data restlen = %d. Was the logfile truncated?\n", SCAN_PAR, total_len, restlen);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
memset(lh, 0, sizeof(struct log_header));
|
|
|
|
DATA_GET(buf, offset, lh->l_stamp.tv_sec);
|
|
DATA_GET(buf, offset, lh->l_stamp.tv_nsec);
|
|
DATA_GET(buf, offset, lh->l_pos);
|
|
DATA_GET(buf, offset, lh->l_len);
|
|
offset += 2; // skip spare
|
|
offset += 4; // skip spare
|
|
DATA_GET(buf, offset, lh->l_code);
|
|
offset += 2; // skip spare
|
|
|
|
found_offset = offset;
|
|
offset += lh->l_len;
|
|
|
|
restlen = len - offset;
|
|
if (unlikely(restlen < END_OVERHEAD)) {
|
|
MARS_WRN(SCAN_TXT "restlen %d is too small\n", SCAN_PAR, restlen);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
DATA_GET(buf, offset, end_magic);
|
|
if (unlikely(end_magic != END_MAGIC)) {
|
|
MARS_WRN(SCAN_TXT "bad end_magic 0x%llx, is the logfile truncated?\n", SCAN_PAR, end_magic);
|
|
return -EBADMSG;
|
|
}
|
|
DATA_GET(buf, offset, lh->l_crc);
|
|
DATA_GET(buf, offset, valid_copy);
|
|
|
|
if (unlikely(valid_copy != 1)) {
|
|
MARS_WRN(SCAN_TXT "found data marked as uncompleted / invalid, len = %d, valid_flag = %d\n", SCAN_PAR, lh->l_len, (int)valid_copy);
|
|
return -EBADMSG;
|
|
}
|
|
|
|
// skip spares
|
|
offset += 3;
|
|
|
|
DATA_GET(buf, offset, lh->l_seq_nr);
|
|
DATA_GET(buf, offset, lh->l_written.tv_sec);
|
|
DATA_GET(buf, offset, lh->l_written.tv_nsec);
|
|
|
|
if (unlikely(lh->l_seq_nr > *seq_nr + 1 && lh->l_seq_nr && *seq_nr)) {
|
|
MARS_ERR(SCAN_TXT "record sequence number %u mismatch, expected was %u\n", SCAN_PAR, lh->l_seq_nr, *seq_nr + 1);
|
|
return -EBADMSG;
|
|
} else if (unlikely(lh->l_seq_nr != *seq_nr + 1 && lh->l_seq_nr && *seq_nr)) {
|
|
MARS_WRN(SCAN_TXT "record sequence number %u mismatch, expected was %u\n", SCAN_PAR, lh->l_seq_nr, *seq_nr + 1);
|
|
}
|
|
*seq_nr = lh->l_seq_nr;
|
|
|
|
if (lh->l_crc) {
|
|
unsigned char checksum[mars_digest_size];
|
|
mars_digest(checksum, buf + found_offset, lh->l_len);
|
|
if (unlikely(*(int*)checksum != lh->l_crc)) {
|
|
MARS_ERR(SCAN_TXT "data checksumming mismatch, length = %d\n", SCAN_PAR, lh->l_len);
|
|
return -EBADMSG;
|
|
}
|
|
}
|
|
|
|
// last check
|
|
if (unlikely(total_len != offset - i)) {
|
|
MARS_ERR(SCAN_TXT "internal size mismatch: %d != %d\n", SCAN_PAR, total_len, offset - i);
|
|
return -EBADMSG;
|
|
}
|
|
|
|
// Success...
|
|
*payload = buf + found_offset;
|
|
*payload_len = lh->l_len;
|
|
|
|
// don't cry when nullbytes have been skipped
|
|
if (i > 0 && dirty) {
|
|
MARS_WRN(SCAN_TXT "skipped %d dirty bytes to find valid data\n", SCAN_PAR, i);
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
MARS_ERR("could not find any useful data within len=%d bytes\n", len);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
/* Bookkeeping status between calls
|
|
*/
|
|
struct log_status {
|
|
// interfacing
|
|
wait_queue_head_t *signal_event;
|
|
bool *signal_flag;
|
|
// tunables
|
|
loff_t start_pos;
|
|
loff_t end_pos;
|
|
int align_size; // alignment between requests
|
|
int chunk_size; // must be at least 8K (better 64k)
|
|
int max_size; // max payload length
|
|
int io_prio;
|
|
bool do_crc;
|
|
// informational
|
|
atomic_t mref_flying;
|
|
int count;
|
|
loff_t log_pos;
|
|
struct lamport_time log_pos_stamp;
|
|
// internal
|
|
struct lamport_time tmp_pos_stamp;
|
|
struct mars_input *input;
|
|
struct mars_brick *brick;
|
|
struct mars_info info;
|
|
int offset;
|
|
int validflag_offset;
|
|
int reallen_offset;
|
|
int payload_offset;
|
|
int payload_len;
|
|
unsigned int seq_nr;
|
|
struct mref_object *log_mref;
|
|
struct mref_object *read_mref;
|
|
wait_queue_head_t event;
|
|
int error_code;
|
|
bool got;
|
|
bool do_free;
|
|
void *private;
|
|
};
|
|
|
|
void init_logst(struct log_status *logst, struct mars_input *input, loff_t start_pos, loff_t end_pos);
|
|
void exit_logst(struct log_status *logst);
|
|
|
|
void log_flush(struct log_status *logst);
|
|
|
|
void *log_reserve(struct log_status *logst, struct log_header *lh);
|
|
|
|
bool log_finalize(struct log_status *logst, int len, void (*endio)(void *private, int error), void *private);
|
|
|
|
int log_read(struct log_status *logst, bool sloppy, struct log_header *lh, void **payload, int *payload_len);
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// init
|
|
|
|
extern int init_log_format(void);
|
|
extern void exit_log_format(void);
|
|
|
|
#endif
|
|
#endif
|