mirror of https://github.com/schoebel/mars
lib_log et al: report scan problems
This commit is contained in:
parent
58d7aa98ea
commit
f5f758bf69
116
kernel/lib_log.c
116
kernel/lib_log.c
|
@ -515,7 +515,7 @@ void log_read_endio(struct generic_callback *cb)
|
|||
|
||||
LAST_CALLBACK(cb);
|
||||
CHECK_PTR(logst, err);
|
||||
logst->error_code = cb->cb_error;
|
||||
logst->posix_error_code = cb->cb_error;
|
||||
logst->got = true;
|
||||
wake_up_interruptible(&logst->event);
|
||||
return;
|
||||
|
@ -590,7 +590,7 @@ restart:
|
|||
status = -ETIME;
|
||||
if (!logst->got)
|
||||
goto done_put;
|
||||
status = logst->error_code;
|
||||
status = logst->posix_error_code;
|
||||
if (status < 0)
|
||||
goto done_put;
|
||||
if (mref->ref_len < this_len) {
|
||||
|
@ -611,11 +611,16 @@ restart:
|
|||
payload,
|
||||
payload_len,
|
||||
dealloc,
|
||||
&logst->seq_nr);
|
||||
&logst->seq_nr,
|
||||
&logst->mars_error_code,
|
||||
&logst->byte_code);
|
||||
|
||||
if (unlikely(status == 0)) {
|
||||
MARS_ERR("bad logfile scan\n");
|
||||
status = -EINVAL;
|
||||
if (!logst->mars_error_code)
|
||||
logst->mars_error_code = -MARS_ERR_NOSCAN;
|
||||
goto done_put;
|
||||
}
|
||||
if (unlikely(status < 0)) {
|
||||
goto done_put;
|
||||
|
@ -663,7 +668,8 @@ int _check_crc(struct log_header *lh,
|
|||
void *crc,
|
||||
void *crc_buf,
|
||||
int crc_len,
|
||||
__u32 check_flags)
|
||||
__u32 check_flags,
|
||||
int *mars_error_code)
|
||||
{
|
||||
__u32 invalid_check_flags;
|
||||
bool is_invalid = false;
|
||||
|
@ -742,16 +748,21 @@ int _check_crc(struct log_header *lh,
|
|||
did_iterative_retry);
|
||||
}
|
||||
if (is_invalid) {
|
||||
if (!*mars_error_code)
|
||||
*mars_error_code = -MARS_ERR_CRC_FLAGS;
|
||||
MARS_ERR("Found invalid crc flags=0x%x retried=%d+%d\n",
|
||||
invalid_check_flags,
|
||||
did_simple_retry, did_iterative_retry);
|
||||
}
|
||||
} else if (res < 0 && !*mars_error_code)
|
||||
*mars_error_code = -MARS_ERR_CRC_MISMATCH;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static
|
||||
int classify_bad_magic(unsigned long long this_magic,
|
||||
unsigned char *found_byte_code)
|
||||
unsigned char *found_byte_code,
|
||||
int *mars_error_code)
|
||||
{
|
||||
unsigned long long shifter;
|
||||
unsigned char pattern;
|
||||
|
@ -779,17 +790,27 @@ int classify_bad_magic(unsigned long long this_magic,
|
|||
for (j = 1; j < sizeof(shifter); j++) {
|
||||
shifter >>= 8;
|
||||
byte = shifter;
|
||||
if (byte != pattern) {
|
||||
*found_byte_code = byte;
|
||||
return -EBADMSG;
|
||||
}
|
||||
if (byte != pattern)
|
||||
goto no_repeat;
|
||||
}
|
||||
*found_byte_code = byte;
|
||||
if (!*mars_error_code)
|
||||
*mars_error_code = -MARS_ERR_MAGIC_REPEATED;
|
||||
return -EBADMSG;
|
||||
|
||||
no_repeat:
|
||||
/* Last resort:
|
||||
* not enough knowledge what might have happened before.
|
||||
*/
|
||||
if (!*mars_error_code)
|
||||
*mars_error_code = -MARS_ERR_MAGIC_BAD;
|
||||
return -EBADRQC;
|
||||
}
|
||||
|
||||
#define RECORD_ERR(code) \
|
||||
if (!*mars_error_code) \
|
||||
*mars_error_code = (code)
|
||||
|
||||
int log_scan(void *buf,
|
||||
int len,
|
||||
loff_t file_pos,
|
||||
|
@ -798,7 +819,9 @@ int log_scan(void *buf,
|
|||
struct log_header *lh,
|
||||
void **payload, int *payload_len,
|
||||
void **dealloc,
|
||||
unsigned int *seq_nr)
|
||||
unsigned int *seq_nr,
|
||||
int *mars_error_code,
|
||||
int *byte_code)
|
||||
{
|
||||
bool dirty = false;
|
||||
unsigned char start_byte_code = 0;
|
||||
|
@ -829,7 +852,9 @@ int log_scan(void *buf,
|
|||
|
||||
offset = i;
|
||||
if (unlikely(i > 0 && !sloppy)) {
|
||||
MARS_ERR(SCAN_TXT "detected a hole / bad data\n", SCAN_PAR);
|
||||
MARS_ERR(SCAN_TXT "detected a hole / bad data >= %d\n",
|
||||
SCAN_PAR, i);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_HOLE);
|
||||
return -EBADMSG;
|
||||
}
|
||||
|
||||
|
@ -841,7 +866,15 @@ int log_scan(void *buf,
|
|||
*/
|
||||
if (start_magic != 0) {
|
||||
dirty = true;
|
||||
classify_bad_magic(start_magic, &start_byte_code);
|
||||
classify_bad_magic(start_magic,
|
||||
&start_byte_code,
|
||||
mars_error_code);
|
||||
if (start_byte_code && !*byte_code) {
|
||||
*byte_code = start_byte_code;
|
||||
MARS_WRN(SCAN_TXT "found repeated byte pattern 0x%02x",
|
||||
SCAN_PAR,
|
||||
start_byte_code);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -854,27 +887,37 @@ int log_scan(void *buf,
|
|||
|
||||
DATA_GET(buf, offset, format_version);
|
||||
if (unlikely(format_version != FORMAT_VERSION)) {
|
||||
MARS_ERR(SCAN_TXT "found unknown data format %d\n", SCAN_PAR, (int)format_version);
|
||||
MARS_ERR(SCAN_TXT "found unknown data format %d\n",
|
||||
SCAN_PAR,
|
||||
(int)format_version);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_FORMAT);
|
||||
return -EBADMSG;
|
||||
}
|
||||
DATA_GET(buf, offset, valid_flag);
|
||||
if (unlikely(!valid_flag)) {
|
||||
MARS_WRN(SCAN_TXT "data is explicitly marked invalid (was there a short write?)\n", SCAN_PAR);
|
||||
MARS_WRN(SCAN_TXT "data is explicitly marked invalid (was there a short write?)\n",
|
||||
SCAN_PAR);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_INVAL);
|
||||
continue;
|
||||
}
|
||||
DATA_GET(buf, offset, total_len);
|
||||
if (unlikely(!total_len)) {
|
||||
MARS_WRN(SCAN_TXT "advance is zero\n",
|
||||
SCAN_PAR);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_ZERO);
|
||||
return -ERESTART;
|
||||
}
|
||||
if (unlikely(total_len < 0)) {
|
||||
MARS_WRN(SCAN_TXT "advance %d is wrong\n",
|
||||
SCAN_PAR, total_len);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_NEGATIVE);
|
||||
return -EBADMSG;
|
||||
}
|
||||
if (unlikely(total_len > restlen)) {
|
||||
MARS_WRN(SCAN_TXT "total_len = %d but available data restlen = %d. Was the logfile truncated?\n", SCAN_PAR, total_len, restlen);
|
||||
MARS_WRN(SCAN_TXT "total_len = %d but available data restlen = %d. Was the logfile truncated?\n",
|
||||
SCAN_PAR,
|
||||
total_len, restlen);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_BADLEN);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -894,6 +937,7 @@ int log_scan(void *buf,
|
|||
if (unlikely(advance <= 0)) {
|
||||
MARS_WRN(SCAN_TXT "advance %d is too small\n",
|
||||
SCAN_PAR, advance);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_ADVANCE);
|
||||
return -ERESTART;
|
||||
}
|
||||
offset += advance;
|
||||
|
@ -901,6 +945,7 @@ int log_scan(void *buf,
|
|||
restlen = len - offset;
|
||||
if (unlikely(restlen < END_OVERHEAD)) {
|
||||
MARS_WRN(SCAN_TXT "restlen %d is too small\n", SCAN_PAR, restlen);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_RESTLEN);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -912,8 +957,11 @@ int log_scan(void *buf,
|
|||
MARS_WRN(SCAN_TXT "bad end_magic 0x%llx, is the logfile truncated?\n",
|
||||
SCAN_PAR,
|
||||
end_magic);
|
||||
err = classify_bad_magic(end_magic, &end_byte_code);
|
||||
if (end_byte_code) {
|
||||
err = classify_bad_magic(end_magic,
|
||||
&end_byte_code,
|
||||
mars_error_code);
|
||||
if (end_byte_code && !*byte_code) {
|
||||
*byte_code = end_byte_code;
|
||||
MARS_WRN(SCAN_TXT "found repeated byte pattern 0x%02x",
|
||||
SCAN_PAR,
|
||||
end_byte_code);
|
||||
|
@ -924,7 +972,10 @@ int log_scan(void *buf,
|
|||
DATA_GET(buf, offset, valid_copy);
|
||||
|
||||
if (unlikely(valid_copy != 1)) {
|
||||
MARS_WRN(SCAN_TXT "found data marked as uncompleted / invalid, len = %d, valid_flag = %d\n", SCAN_PAR, lh->l_len, (int)valid_copy);
|
||||
MARS_WRN(SCAN_TXT "found data marked as uncompleted / invalid, len = %d, valid_flag = %d\n",
|
||||
SCAN_PAR,
|
||||
lh->l_len, (int)valid_copy);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_INCOMPL);
|
||||
return -EBADMSG;
|
||||
}
|
||||
|
||||
|
@ -937,10 +988,22 @@ int log_scan(void *buf,
|
|||
offset += LOG_CHKSUM_SIZE;
|
||||
|
||||
if (unlikely(lh->l_seq_nr > *seq_nr + 1 && lh->l_seq_nr && *seq_nr)) {
|
||||
MARS_ERR(SCAN_TXT "record sequence number %u mismatch, expected was %u\n", SCAN_PAR, lh->l_seq_nr, *seq_nr + 1);
|
||||
MARS_ERR(SCAN_TXT "record sequence number %u mismatch, expected was %u\n",
|
||||
SCAN_PAR,
|
||||
lh->l_seq_nr, *seq_nr + 1);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_SEQ_FW);
|
||||
return -EBADMSG;
|
||||
} else if (unlikely(lh->l_seq_nr != *seq_nr + 1 && lh->l_seq_nr && *seq_nr)) {
|
||||
MARS_WRN(SCAN_TXT "record sequence number %u mismatch, expected was %u\n", SCAN_PAR, lh->l_seq_nr, *seq_nr + 1);
|
||||
MARS_WRN(SCAN_TXT "record sequence number %u mismatch, expected was %u\n",
|
||||
SCAN_PAR,
|
||||
lh->l_seq_nr, *seq_nr + 1);
|
||||
/* Theoretically, a backskip of sequence number
|
||||
* can be tolerated, if it is really true.
|
||||
* We may use this in future, e.g. for propagation
|
||||
* of updated records.
|
||||
* It may also happen in some future record patterns.
|
||||
*/
|
||||
RECORD_ERR(-MARS_ERR_SCAN_SEQ_BW);
|
||||
}
|
||||
*seq_nr = lh->l_seq_nr;
|
||||
|
||||
|
@ -966,11 +1029,13 @@ int log_scan(void *buf,
|
|||
(decompr_len % 512) != 0)) {
|
||||
MARS_ERR(SCAN_TXT "implausible decompr_len: %d ~~ %d\n",
|
||||
SCAN_PAR, decompr_len, crc_len);
|
||||
RECORD_ERR(-MARS_ERR_DECOMPR_BADLEN);
|
||||
return -EBADMSG;
|
||||
}
|
||||
if (unlikely(crc_len > MARS_MAX_COMPR_SIZE)) {
|
||||
MARS_ERR(SCAN_TXT "implausible crc_len: %d > %ld\n",
|
||||
SCAN_PAR, crc_len, MARS_MAX_COMPR_SIZE);
|
||||
RECORD_ERR(-MARS_ERR_DECOMPR_TOOBIG);
|
||||
return -EBADMSG;
|
||||
}
|
||||
crc_buf = buf + found_offset;
|
||||
|
@ -988,6 +1053,7 @@ int log_scan(void *buf,
|
|||
MARS_ERR(SCAN_TXT "decompression 0x%x failure len=%d/%d\n",
|
||||
SCAN_PAR, check_flags,
|
||||
crc_len, decompr_len);
|
||||
RECORD_ERR(-MARS_ERR_DECOMPR_FAIL);
|
||||
return -EBADMSG;
|
||||
}
|
||||
}
|
||||
|
@ -997,7 +1063,8 @@ int log_scan(void *buf,
|
|||
crc,
|
||||
crc_buf,
|
||||
crc_len,
|
||||
check_flags);
|
||||
check_flags,
|
||||
mars_error_code);
|
||||
if (crc_status) {
|
||||
MARS_ERR(SCAN_TXT
|
||||
"data checksumming mismatch, flags=0x%x len=%d/%d err=%d\n",
|
||||
|
@ -1011,6 +1078,7 @@ int log_scan(void *buf,
|
|||
// last check
|
||||
if (unlikely(total_len != offset - i)) {
|
||||
MARS_ERR(SCAN_TXT "internal size mismatch: %d != %d\n", SCAN_PAR, total_len, offset - i);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_SIZE);
|
||||
return -EBADMSG;
|
||||
}
|
||||
|
||||
|
@ -1021,7 +1089,8 @@ int log_scan(void *buf,
|
|||
// don't cry when nullbytes have been skipped
|
||||
if (i > 0 && dirty) {
|
||||
MARS_WRN(SCAN_TXT "skipped %d dirty bytes to find valid data\n", SCAN_PAR, i);
|
||||
if (start_byte_code) {
|
||||
if (start_byte_code && !*byte_code) {
|
||||
*byte_code = start_byte_code;
|
||||
MARS_WRN("found repeated byte pattern 0x%02x",
|
||||
start_byte_code);
|
||||
}
|
||||
|
@ -1031,6 +1100,7 @@ int log_scan(void *buf,
|
|||
}
|
||||
|
||||
MARS_ERR("could not find any useful data within len=%d bytes\n", len);
|
||||
RECORD_ERR(-MARS_ERR_SCAN_GARBAGE);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#ifndef LIB_LOG_H
|
||||
#define LIB_LOG_H
|
||||
|
||||
#include "mars_errno.h"
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include "mars.h"
|
||||
|
||||
|
@ -128,7 +130,9 @@ extern int log_scan(void *buf,
|
|||
struct log_header *lh,
|
||||
void **payload, int *payload_len,
|
||||
void **dealloc,
|
||||
unsigned int *seq_nr);
|
||||
unsigned int *seq_nr,
|
||||
int *mars_error_code,
|
||||
int *byte_code);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -169,7 +173,9 @@ struct log_status {
|
|||
struct mref_object *log_mref;
|
||||
struct mref_object *read_mref;
|
||||
wait_queue_head_t event;
|
||||
int error_code;
|
||||
int posix_error_code;
|
||||
int mars_error_code;
|
||||
int byte_code;
|
||||
bool got;
|
||||
bool do_free;
|
||||
void *private;
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* MARS Long Distance Replication Software
|
||||
*
|
||||
* This file is part of MARS project: http://schoebel.github.io/mars/
|
||||
*
|
||||
* Copyright (C) 2022 Thomas Schoebel-Theuer
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
#ifndef MARS_ERRNO_H
|
||||
#define MARS_ERRNO_H
|
||||
|
||||
|
||||
/* MARS needs to report many issues to scripts and to humans.
|
||||
* Many problems are not covered by POSIX and other errno definitions.
|
||||
*
|
||||
* IMPORTANT: keep the following namespaces unique within itself.
|
||||
* Keep MARS-specific namespace parts distinct from POSIX and others:
|
||||
* (a) Integer (primary key)
|
||||
* (b) C preprocessor macro names
|
||||
* (c) Cleartext intended for humans.
|
||||
* Also, make any entry a 1:1:1 correspondence.
|
||||
*
|
||||
* While (a) and (b) are only defined here, the full
|
||||
* relationship (a) <=> (b) <=> (c) should reside in marsadm.
|
||||
*/
|
||||
|
||||
#include <uapi/asm-generic/errno-base.h>
|
||||
#include <uapi/asm-generic/errno.h>
|
||||
|
||||
/* logfile scanning problems */
|
||||
#define MARS_ERR_NOSCAN 10000 /* Logfile scan was not possible */
|
||||
#define MARS_ERR_SCAN_TIMEOUT 10001 /* Logfile scan timsouted */
|
||||
#define MARS_ERR_SCAN_HOLE 10002 /* Detected a hole in logfile */
|
||||
#define MARS_ERR_SCAN_FORMAT 10003 /* Unknown record format in logfile */
|
||||
#define MARS_ERR_SCAN_INVAL 10003 /* Record marked as invalid */
|
||||
#define MARS_ERR_SCAN_ZERO 10004 /* Zero-length record encountered */
|
||||
#define MARS_ERR_SCAN_NEGATIVE 10005 /* Negative record length */
|
||||
#define MARS_ERR_SCAN_BADLEN 10006 /* Mismatch in record length */
|
||||
#define MARS_ERR_SCAN_ADVANCE 10007 /* Positional advance too small */
|
||||
#define MARS_ERR_SCAN_RESTLEN 10008 /* Record restlen not sufficient */
|
||||
#define MARS_ERR_SCAN_INCOMPL 10009 /* Record explicitly marked as incomplete */
|
||||
#define MARS_ERR_SCAN_SEQ_FW 10010 /* Illegal record sequence skip forward */
|
||||
#define MARS_ERR_SCAN_SEQ_BW 10011 /* Record sequence skip backwards */
|
||||
#define MARS_ERR_SCAN_SIZE 10012 /* Internal size mismatch */
|
||||
|
||||
#define MARS_ERR_MAGIC_BAD 10020 /* Bad magic */
|
||||
#define MARS_ERR_MAGIC_REPEATED 10021 /* Bad magic has repeated pattern */
|
||||
#define MARS_ERR_SCAN_GARBAGE 10029 /* Scanning found garbage */
|
||||
|
||||
/* CRC errors */
|
||||
#define MARS_ERR_CRC_FLAGS 10100 /* Bad CRC flags found */
|
||||
#define MARS_ERR_CRC_MISMATCH 10101 /* CRC mismatch found */
|
||||
|
||||
/* Compression errors */
|
||||
#define MARS_ERR_DECOMPR_FAIL 10150 /* Decompression failure */
|
||||
#define MARS_ERR_DECOMPR_TOOBIG 10151 /* Cannot handle decompression length */
|
||||
#define MARS_ERR_DECOMPR_BADLEN 10152 /* Implausible decompression length */
|
||||
|
||||
|
||||
|
||||
#endif
|
|
@ -3083,6 +3083,12 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
MARS_INF("starting replay from %lld to %lld\n", start_pos, end_pos);
|
||||
|
||||
brick->replay_code = replay_code;
|
||||
brick->mars_error_code = 0;
|
||||
brick->byte_code = 0;
|
||||
input->logst.posix_error_code = 0;
|
||||
input->logst.mars_error_code = 0;
|
||||
input->logst.byte_code = 0;
|
||||
|
||||
mars_power_led_on((void *)brick, true);
|
||||
|
||||
for (;;) {
|
||||
|
@ -3100,6 +3106,16 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
status = log_read(&input->logst, false, &lh,
|
||||
&buf, &len, &dealloc);
|
||||
|
||||
/* only catch the very first mars_error_code */
|
||||
if (status < 0 &&
|
||||
!brick->mars_error_code &&
|
||||
input->logst.mars_error_code) {
|
||||
brick->mars_error_code =
|
||||
input->logst.mars_error_code;
|
||||
brick->byte_code =
|
||||
input->logst.byte_code;
|
||||
}
|
||||
|
||||
new_finished_pos = input->logst.log_pos + input->logst.offset;
|
||||
MARS_RPL("read %lld %lld\n", finished_pos, new_finished_pos);
|
||||
|
||||
|
@ -3131,7 +3147,10 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
}
|
||||
if (unlikely(status < 0)) {
|
||||
replay_code = status;
|
||||
MARS_WRN("cannot read logfile data, status = %d\n", status);
|
||||
MARS_WRN("cannot read logfile data, err=%d,%d,%d\n",
|
||||
status,
|
||||
brick->mars_error_code,
|
||||
brick->byte_code);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3151,16 +3170,27 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
} else if (unlikely(brick->disk_io_error)) {
|
||||
status = brick->disk_io_error;
|
||||
replay_code = status;
|
||||
MARS_ERR("IO error %d\n", status);
|
||||
MARS_ERR("IO error %d,%d,%d\n",
|
||||
status,
|
||||
brick->mars_error_code,
|
||||
brick->byte_code);
|
||||
break;
|
||||
} else if (likely(buf && len)) {
|
||||
if (brick->replay_limiter)
|
||||
mars_limit_sleep(brick->replay_limiter, (len - 1) / 1024 + 1);
|
||||
status = replay_data(brick, lh.l_pos, buf, len);
|
||||
MARS_RPL("replay %lld %lld (pos=%lld status=%d)\n", finished_pos, new_finished_pos, lh.l_pos, status);
|
||||
MARS_RPL("replay %lld %lld (pos=%lld err=%d,%d,%d)\n",
|
||||
finished_pos, new_finished_pos, lh.l_pos,
|
||||
status,
|
||||
input->logst.mars_error_code,
|
||||
input->logst.byte_code);
|
||||
if (unlikely(status < 0)) {
|
||||
replay_code = status;
|
||||
MARS_ERR("cannot replay data at pos = %lld len = %d, status = %d\n", lh.l_pos, len, status);
|
||||
MARS_ERR("cannot replay data at pos=%lld len=%d, err=%d,%d,%d\n",
|
||||
lh.l_pos, len,
|
||||
status,
|
||||
brick->mars_error_code,
|
||||
brick->byte_code);
|
||||
break;
|
||||
} else {
|
||||
finished_pos = new_finished_pos;
|
||||
|
@ -3181,7 +3211,10 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
if (unlikely(brick->disk_io_error)) {
|
||||
status = brick->disk_io_error;
|
||||
replay_code = status;
|
||||
MARS_ERR("IO error %d\n", status);
|
||||
MARS_ERR("IO error %d,%d,%d\n",
|
||||
status,
|
||||
brick->mars_error_code,
|
||||
brick->byte_code);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3218,6 +3251,8 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
if (status >= 0 && finished_pos == brick->replay_end_pos) {
|
||||
MARS_INF("replay finished at %lld\n", finished_pos);
|
||||
replay_code = TL_REPLAY_FINISHED;
|
||||
brick->mars_error_code = 0;
|
||||
brick->byte_code = 0;
|
||||
} else if (status == -EAGAIN && finished_pos + brick->replay_tolerance > brick->replay_end_pos) {
|
||||
MARS_INF("TOLERANCE: logfile is incomplete at %lld (of %lld)\n", finished_pos, brick->replay_end_pos);
|
||||
replay_code = TL_REPLAY_INCOMPLETE;
|
||||
|
@ -3225,9 +3260,17 @@ void trans_logger_replay(struct trans_logger_brick *brick)
|
|||
if (finished_pos < 0)
|
||||
finished_pos = new_finished_pos;
|
||||
if (finished_pos + brick->replay_tolerance > brick->replay_end_pos) {
|
||||
MARS_INF("TOLERANCE: logfile is incomplete at %lld (of %lld), status = %d\n", finished_pos, brick->replay_end_pos, status);
|
||||
MARS_INF("TOLERANCE: logfile is incomplete at %lld (of %lld), err=%d,%d,%d\n",
|
||||
finished_pos, brick->replay_end_pos,
|
||||
status,
|
||||
input->logst.mars_error_code,
|
||||
input->logst.byte_code);
|
||||
} else {
|
||||
MARS_ERR("replay error %d at %lld (of %lld)\n", status, finished_pos, brick->replay_end_pos);
|
||||
MARS_ERR("replay error %d,%d,%d at %lld (of %lld)\n",
|
||||
status,
|
||||
input->logst.mars_error_code,
|
||||
input->logst.byte_code,
|
||||
finished_pos, brick->replay_end_pos);
|
||||
}
|
||||
replay_code = status;
|
||||
} else {
|
||||
|
|
|
@ -193,6 +193,8 @@ struct trans_logger_brick {
|
|||
int log_input_nr; // where we are currently logging to
|
||||
int old_input_nr; // where old IO requests may be on the fly
|
||||
int replay_code; // replay errors (if any)
|
||||
int mars_error_code; /* MARS-specific error information */
|
||||
int byte_code; /* repeated byte pattern */
|
||||
bool stopped_logging; // direct IO without logging (only in case of EMERGENCY)
|
||||
// private
|
||||
int disk_io_error; // replay errors from callbacks
|
||||
|
|
|
@ -902,6 +902,8 @@ struct mars_rotate {
|
|||
int fetch_next_is_available;
|
||||
int relevant_serial;
|
||||
int replay_code;
|
||||
int mars_error_code;
|
||||
int byte_code;
|
||||
int avoid_count;
|
||||
int old_open_count;
|
||||
bool is_attached;
|
||||
|
@ -1887,6 +1889,33 @@ done:
|
|||
return status;
|
||||
}
|
||||
|
||||
static
|
||||
int __show_actual3(const char *path,
|
||||
const char *name,
|
||||
int val1, int val2, int val3)
|
||||
{
|
||||
char *src;
|
||||
char *dst = NULL;
|
||||
int status = -EINVAL;
|
||||
|
||||
if (val3)
|
||||
src = path_make("%d,%d,0x%02x", val1, val2, val3);
|
||||
else
|
||||
src = path_make("%d,%d", val1, val2);
|
||||
dst = path_make("%s/actual-%s/%s", path, my_id(), name);
|
||||
status = -ENOMEM;
|
||||
if (!dst)
|
||||
goto done;
|
||||
|
||||
MARS_DBG("symlink '%s' -> '%s'\n", dst, src);
|
||||
status = ordered_symlink(src, dst, NULL);
|
||||
|
||||
done:
|
||||
brick_string_free(src);
|
||||
brick_string_free(dst);
|
||||
return status;
|
||||
}
|
||||
|
||||
static
|
||||
int __show_stamp(const char *path, const char *name, struct lamport_time *stamp)
|
||||
{
|
||||
|
@ -5383,6 +5412,8 @@ void _rotate_trans(struct mars_rotate *rot)
|
|||
trans_brick->new_input_nr = next_nr;
|
||||
MARS_INF_TO(rot->log_say, "started logrotate switchover from '%s' to '%s'\n", rot->relevant_log->d_path, rot->next_relevant_log->d_path);
|
||||
rot->replay_code = TL_REPLAY_RUNNING;
|
||||
rot->mars_error_code = 0;
|
||||
rot->byte_code = 0;
|
||||
}
|
||||
done: ;
|
||||
}
|
||||
|
@ -5512,6 +5543,8 @@ int _start_trans(struct mars_rotate *rot)
|
|||
trans_brick->replay_tolerance = REPLAY_TOLERANCE;
|
||||
_init_trans_input(trans_input, rot->relevant_log, nr, rot);
|
||||
rot->replay_code = TL_REPLAY_RUNNING;
|
||||
rot->mars_error_code = 0;
|
||||
rot->byte_code = 0;
|
||||
|
||||
/* Connect to new transaction log
|
||||
*/
|
||||
|
@ -5737,15 +5770,26 @@ int make_log_finalize(struct mars_dent *dent)
|
|||
(trans_brick->replay_code == TL_REPLAY_INCOMPLETE ||
|
||||
trans_brick->replay_end_pos - trans_brick->replay_current_pos < trans_brick->replay_tolerance))) {
|
||||
if (trans_brick->replay_code < 0) {
|
||||
if (trans_brick->mars_error_code < 0 &&
|
||||
!rot->mars_error_code) {
|
||||
rot->mars_error_code =
|
||||
trans_brick->mars_error_code;
|
||||
rot->byte_code =
|
||||
trans_brick->byte_code;
|
||||
}
|
||||
MARS_ERR_TO(rot->log_say,
|
||||
"logfile replay stopped with error = %d at position %lld + %lld\n",
|
||||
"logfile replay stopped with error=%d,%d,%d at position %lld + %lld\n",
|
||||
trans_brick->replay_code,
|
||||
trans_brick->mars_error_code,
|
||||
trans_brick->byte_code,
|
||||
trans_brick->replay_current_pos,
|
||||
trans_brick->replay_end_pos - trans_brick->replay_current_pos);
|
||||
}
|
||||
make_rot_msg(rot, "err-replay-stop",
|
||||
"logfile replay stopped with error = %d at position %lld + %lld",
|
||||
"logfile replay stopped with error=%d,%d,%d at position %lld + %lld",
|
||||
trans_brick->replay_code,
|
||||
trans_brick->mars_error_code,
|
||||
trans_brick->byte_code,
|
||||
trans_brick->replay_current_pos,
|
||||
trans_brick->replay_end_pos - trans_brick->replay_current_pos);
|
||||
rot->replay_code = trans_brick->replay_code;
|
||||
|
@ -5810,7 +5854,16 @@ int make_log_finalize(struct mars_dent *dent)
|
|||
rot->retry_recovery = 0;
|
||||
|
||||
skip_retry_recovery:
|
||||
__show_actual(parent->d_path, "replay-code", rot->replay_code);
|
||||
if (rot->replay_code && rot->mars_error_code)
|
||||
__show_actual3(parent->d_path,
|
||||
"replay-code",
|
||||
rot->replay_code,
|
||||
rot->mars_error_code,
|
||||
rot->byte_code);
|
||||
else
|
||||
__show_actual(parent->d_path,
|
||||
"replay-code",
|
||||
rot->replay_code);
|
||||
|
||||
/* Stopping is also possible in case of errors
|
||||
*/
|
||||
|
|
|
@ -4423,6 +4423,37 @@ my %errno2names =
|
|||
# disjoint from the official POSIX errno codes.
|
||||
9998 => ["EXAMPLE_MARS_NAME", "ClearText"],
|
||||
9999 => "EXAMPLE_ANY_OTHER_NAME_WITHOUT_CLEARTEXT",
|
||||
|
||||
# /* logfile scanning problems */
|
||||
10000 => [ "MARS_ERR_NOSCAN", "Logfile scan was not possible" ],
|
||||
10001 => [ "MARS_ERR_SCAN_TIMEOUT", "Logfile scan timsouted" ],
|
||||
10002 => [ "MARS_ERR_SCAN_HOLE", "Detected a hole in logfile" ],
|
||||
10003 => [ "MARS_ERR_SCAN_FORMAT", "Unknown record format in logfile" ],
|
||||
10003 => [ "MARS_ERR_SCAN_INVAL", "Record marked as invalid" ],
|
||||
10004 => [ "MARS_ERR_SCAN_ZERO", "Zero-length record encountered" ],
|
||||
10005 => [ "MARS_ERR_SCAN_NEGATIVE", "Negative record length" ],
|
||||
10006 => [ "MARS_ERR_SCAN_BADLEN", "Mismatch in record length" ],
|
||||
10007 => [ "MARS_ERR_SCAN_ADVANCE", "Positional advance too small" ],
|
||||
10008 => [ "MARS_ERR_SCAN_RESTLEN", "Record restlen not sufficient" ],
|
||||
10009 => [ "MARS_ERR_SCAN_INCOMPL", "Record explicitly marked as incomplete" ],
|
||||
10010 => [ "MARS_ERR_SCAN_SEQ_FW", "Illegal record sequence skip forward" ],
|
||||
10011 => [ "MARS_ERR_SCAN_SEQ_BW", "Record sequence skip backwards" ],
|
||||
10012 => [ "MARS_ERR_SCAN_SIZE", "Internal size mismatch" ],
|
||||
|
||||
10020 => [ "MARS_ERR_MAGIC_BAD", "Bad magic " ],
|
||||
10021 => [ "MARS_ERR_MAGIC_REPEATED", "Bad magic has repeated pattern" ],
|
||||
10029 => [ "MARS_ERR_SCAN_GARBAGE", "Scanning found garbage" ],
|
||||
|
||||
# /* CRC errors */
|
||||
10100 => [ "MARS_ERR_CRC_FLAGS", "Bad CRC flags found" ],
|
||||
10101 => [ "MARS_ERR_CRC_MISMATCH", "CRC mismatch found" ],
|
||||
|
||||
# /* Compression errors */
|
||||
10150 => [ "MARS_ERR_DECOMPR_FAIL", "Decompression failure" ],
|
||||
10151 => [ "MARS_ERR_DECOMPR_TOOBIG", "Cannot handle decompression length" ],
|
||||
10152 => [ "MARS_ERR_DECOMPR_BADLEN", "Implausible decompression length" ],
|
||||
|
||||
|
||||
);
|
||||
|
||||
sub __conv_errno_to_names {
|
||||
|
|
Loading…
Reference in New Issue