mirror of https://github.com/schoebel/mars
594 lines
14 KiB
C
594 lines
14 KiB
C
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/bio.h>
|
|
|
|
#include "lib_log.h"
|
|
|
|
bool is_log_ready(struct log_status *logst)
|
|
{
|
|
return logst->max_flying <= 0 ||
|
|
atomic_read(&logst->mref_flying) < logst->max_flying;
|
|
}
|
|
EXPORT_SYMBOL_GPL(is_log_ready);
|
|
|
|
void exit_logst(struct log_status *logst)
|
|
{
|
|
int count = 0;
|
|
log_flush(logst);
|
|
while (atomic_read(&logst->mref_flying) > 0) {
|
|
if (!count++)
|
|
MARS_DBG("waiting for IO terminating...");
|
|
msleep(500);
|
|
}
|
|
exit_generic_object_layout(&logst->ref_object_layout);
|
|
}
|
|
EXPORT_SYMBOL_GPL(exit_logst);
|
|
|
|
void init_logst(struct log_status *logst, struct mars_input *input, loff_t start_pos)
|
|
{
|
|
exit_logst(logst);
|
|
|
|
memset(logst, 0, sizeof(struct log_status));
|
|
|
|
logst->input = input;
|
|
logst->brick = input->brick;
|
|
logst->log_pos = start_pos;
|
|
init_waitqueue_head(&logst->event);
|
|
}
|
|
EXPORT_SYMBOL_GPL(init_logst);
|
|
|
|
#define MARS_LOG_CB_MAX 32
|
|
|
|
struct log_cb_info {
|
|
struct mref_object *mref;
|
|
struct log_status *logst;
|
|
struct semaphore mutex;
|
|
atomic_t refcount;
|
|
int nr_cb;
|
|
void (*preios[MARS_LOG_CB_MAX])(void *private);
|
|
void (*endios[MARS_LOG_CB_MAX])(void *private, int error);
|
|
void *privates[MARS_LOG_CB_MAX];
|
|
};
|
|
|
|
static
|
|
void put_log_cb_info(struct log_cb_info *cb_info)
|
|
{
|
|
if (atomic_dec_and_test(&cb_info->refcount)) {
|
|
brick_mem_free(cb_info);
|
|
}
|
|
}
|
|
|
|
static
|
|
void log_write_endio(struct generic_callback *cb)
|
|
{
|
|
struct log_cb_info *cb_info = cb->cb_private;
|
|
struct log_status *logst;
|
|
int i;
|
|
|
|
CHECK_PTR(cb_info, err);
|
|
|
|
if (cb_info->mref) {
|
|
mars_trace(cb_info->mref, "log_endio");
|
|
mars_log_trace(cb_info->mref);
|
|
}
|
|
|
|
logst = cb_info->logst;
|
|
CHECK_PTR(logst, done);
|
|
atomic_dec(&logst->mref_flying);
|
|
|
|
MARS_IO("nr_cb = %d\n", cb_info->nr_cb);
|
|
|
|
down(&cb_info->mutex);
|
|
for (i = 0; i < cb_info->nr_cb; i++) {
|
|
void (*cbf)(void *private, int error) = cb_info->endios[i];
|
|
if (cbf) {
|
|
cbf(cb_info->privates[i], cb->cb_error);
|
|
}
|
|
}
|
|
cb_info->nr_cb = 0; // prevent late preio() callbacks
|
|
up(&cb_info->mutex);
|
|
|
|
done:
|
|
put_log_cb_info(cb_info);
|
|
return;
|
|
|
|
err:
|
|
MARS_FAT("internal pointer corruption\n");
|
|
}
|
|
|
|
void log_flush(struct log_status *logst)
|
|
{
|
|
struct mref_object *mref = logst->log_mref;
|
|
struct log_cb_info *cb_info;
|
|
int gap;
|
|
int i;
|
|
|
|
if (!mref || !logst->count)
|
|
return;
|
|
|
|
gap = 0;
|
|
if (logst->align_size > 0) {
|
|
// round up to next alignment border
|
|
int align_offset = logst->offset & (logst->align_size-1);
|
|
if (align_offset > 0) {
|
|
int restlen = mref->ref_len - logst->offset;
|
|
gap = logst->align_size - align_offset;
|
|
if (gap > restlen) {
|
|
gap = restlen;
|
|
}
|
|
}
|
|
}
|
|
if (gap > 0) {
|
|
// don't leak information from kernelspace
|
|
memset(mref->ref_data + logst->offset, 0, gap);
|
|
logst->offset += gap;
|
|
}
|
|
mref->ref_len = logst->offset;
|
|
logst->log_pos += logst->offset;
|
|
|
|
cb_info = logst->private;
|
|
logst->private = NULL;
|
|
SETUP_CALLBACK(mref, log_write_endio, cb_info);
|
|
cb_info->logst = logst;
|
|
mref->ref_rw = 1;
|
|
|
|
mars_trace(mref, "log_flush");
|
|
|
|
atomic_inc(&logst->mref_flying);
|
|
GENERIC_INPUT_CALL(logst->input, mref_io, mref);
|
|
GENERIC_INPUT_CALL(logst->input, mref_put, mref);
|
|
|
|
logst->offset = 0;
|
|
logst->count = 0;
|
|
logst->log_mref = NULL;
|
|
|
|
if (cb_info->nr_cb > 0) {
|
|
down(&cb_info->mutex);
|
|
for (i = 0; i < cb_info->nr_cb; i++) {
|
|
void (*cbf)(void *private) = cb_info->preios[i];
|
|
if (cbf) {
|
|
cbf(cb_info->privates[i]);
|
|
}
|
|
}
|
|
up(&cb_info->mutex);
|
|
}
|
|
put_log_cb_info(cb_info);
|
|
}
|
|
EXPORT_SYMBOL_GPL(log_flush);
|
|
|
|
void *log_reserve(struct log_status *logst, struct log_header *lh)
|
|
{
|
|
struct log_cb_info *cb_info = logst->private;
|
|
struct mref_object *mref;
|
|
void *data;
|
|
short total_len = lh->l_len + OVERHEAD;
|
|
int offset;
|
|
int status;
|
|
|
|
MARS_DBG("reserving %d bytes at %lld\n", lh->l_len, logst->log_pos);
|
|
|
|
mref = logst->log_mref;
|
|
if ((mref && total_len > mref->ref_len - logst->offset)
|
|
|| !cb_info || cb_info->nr_cb >= MARS_LOG_CB_MAX) {
|
|
log_flush(logst);
|
|
}
|
|
|
|
mref = logst->log_mref;
|
|
if (!mref) {
|
|
if (unlikely(logst->private)) {
|
|
MARS_ERR("oops\n");
|
|
brick_mem_free(logst->private);
|
|
}
|
|
logst->private = brick_zmem_alloc(sizeof(struct log_cb_info));
|
|
if (unlikely(!logst->private)) {
|
|
MARS_ERR("no memory\n");
|
|
goto err;
|
|
}
|
|
cb_info = logst->private;
|
|
sema_init(&cb_info->mutex, 1);
|
|
atomic_set(&cb_info->refcount, 2);
|
|
|
|
mref = mars_alloc_mref(logst->brick, &logst->ref_object_layout);
|
|
if (unlikely(!mref)) {
|
|
MARS_ERR("no mref\n");
|
|
goto err;
|
|
}
|
|
cb_info->mref = mref;
|
|
|
|
mref->ref_pos = logst->log_pos;
|
|
mref->ref_len = total_len;
|
|
mref->ref_may_write = WRITE;
|
|
mref->ref_prio = logst->io_prio;
|
|
if (logst->chunk_size > 0) {
|
|
int chunk_offset;
|
|
int chunk_rest;
|
|
chunk_offset = logst->log_pos & (loff_t)(logst->chunk_size - 1);
|
|
chunk_rest = logst->chunk_size - chunk_offset;
|
|
while (chunk_rest < total_len) {
|
|
chunk_rest += logst->chunk_size;
|
|
}
|
|
mref->ref_len = chunk_rest;
|
|
}
|
|
|
|
for (;;) {
|
|
status = GENERIC_INPUT_CALL(logst->input, mref_get, mref);
|
|
if (likely(status >= 0)) {
|
|
break;
|
|
}
|
|
if (status != -ENOMEM && status != -EAGAIN) {
|
|
MARS_ERR("mref_get() failed, status = %d\n", status);
|
|
goto err_free;
|
|
}
|
|
msleep(100);
|
|
}
|
|
|
|
mars_trace(mref, "log_start");
|
|
|
|
if (unlikely(mref->ref_len < total_len)) {
|
|
MARS_ERR("ref_len = %d total_len = %d\n", mref->ref_len, total_len);
|
|
goto put;
|
|
}
|
|
|
|
logst->offset = 0;
|
|
logst->log_mref = mref;
|
|
}
|
|
|
|
offset = logst->offset;
|
|
data = mref->ref_data;
|
|
DATA_PUT(data, offset, START_MAGIC);
|
|
DATA_PUT(data, offset, (char)FORMAT_VERSION);
|
|
logst->validflag_offset = offset;
|
|
DATA_PUT(data, offset, (char)0); // valid_flag
|
|
DATA_PUT(data, offset, total_len); // start of next header
|
|
DATA_PUT(data, offset, lh->l_stamp.tv_sec);
|
|
DATA_PUT(data, offset, lh->l_stamp.tv_nsec);
|
|
DATA_PUT(data, offset, lh->l_pos);
|
|
logst->reallen_offset = offset;
|
|
DATA_PUT(data, offset, lh->l_len);
|
|
DATA_PUT(data, offset, lh->l_extra_len);
|
|
DATA_PUT(data, offset, lh->l_code);
|
|
DATA_PUT(data, offset, lh->l_extra);
|
|
|
|
logst->payload_offset = offset;
|
|
logst->payload_len = lh->l_len;
|
|
|
|
return data + offset;
|
|
|
|
put:
|
|
GENERIC_INPUT_CALL(logst->input, mref_put, mref);
|
|
logst->log_mref = NULL;
|
|
return NULL;
|
|
|
|
err_free:
|
|
mars_free_mref(mref);
|
|
if (logst->private) {
|
|
// TODO: if callbacks are already registered, call them here with some error code
|
|
brick_mem_free(logst->private);
|
|
logst->private = NULL;
|
|
}
|
|
err:
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(log_reserve);
|
|
|
|
bool log_finalize(struct log_status *logst, int len, void (*preio)(void *private), void (*endio)(void *private, int error), void *private)
|
|
{
|
|
struct mref_object *mref = logst->log_mref;
|
|
struct log_cb_info *cb_info = logst->private;
|
|
struct timespec now;
|
|
void *data;
|
|
int offset;
|
|
int restlen;
|
|
int nr_cb;
|
|
bool ok = false;
|
|
|
|
CHECK_PTR(mref, err);
|
|
|
|
if (unlikely(len > logst->payload_len)) {
|
|
MARS_ERR("trying to write more than reserved (%d > %d)\n", len, logst->payload_len);
|
|
goto err;
|
|
}
|
|
restlen = mref->ref_len - logst->offset;
|
|
if (unlikely(len + END_OVERHEAD > restlen)) {
|
|
MARS_ERR("trying to write more than available (%d > %d)\n", len, (int)(restlen - END_OVERHEAD));
|
|
goto err;
|
|
}
|
|
if (unlikely(!cb_info || cb_info->nr_cb >= MARS_LOG_CB_MAX)) {
|
|
MARS_ERR("too many endio() calls\n");
|
|
goto err;
|
|
}
|
|
|
|
data = mref->ref_data;
|
|
|
|
/* Correct the length in the header.
|
|
*/
|
|
offset = logst->reallen_offset;
|
|
DATA_PUT(data, offset, len);
|
|
|
|
/* Write the trailer.
|
|
*/
|
|
offset = logst->payload_offset + len;
|
|
DATA_PUT(data, offset, END_MAGIC);
|
|
DATA_PUT(data, offset, (int)0); // crc
|
|
DATA_PUT(data, offset, (char)1); // valid_flag copy
|
|
DATA_PUT(data, offset, (char)0); // spare
|
|
DATA_PUT(data, offset, (short)0); // spare
|
|
DATA_PUT(data, offset, (int)0); // spare
|
|
get_lamport(&now); // when the log entry was ready.
|
|
DATA_PUT(data, offset, now.tv_sec);
|
|
DATA_PUT(data, offset, now.tv_nsec);
|
|
|
|
if (unlikely(offset > mref->ref_len)) {
|
|
MARS_ERR("length calculation was wrong: %d > %d\n", offset, mref->ref_len);
|
|
goto err;
|
|
}
|
|
logst->offset = offset;
|
|
|
|
/* This must come last. In case of incomplete
|
|
* or even overlapping disk transfers, this indicates
|
|
* the completeness / integrity of the payload at
|
|
* the time of starting the transfer.
|
|
*/
|
|
offset = logst->validflag_offset;
|
|
DATA_PUT(data, offset, (char)1);
|
|
|
|
nr_cb = cb_info->nr_cb++;
|
|
cb_info->preios[nr_cb] = preio;
|
|
cb_info->endios[nr_cb] = endio;
|
|
cb_info->privates[nr_cb] = private;
|
|
|
|
logst->count++;
|
|
ok = true;
|
|
|
|
err:
|
|
return ok;
|
|
}
|
|
EXPORT_SYMBOL_GPL(log_finalize);
|
|
|
|
|
|
static
|
|
int log_scan(void *buf, int len, struct log_header *lh, void **payload, int *payload_len)
|
|
{
|
|
bool dirty = false;
|
|
int offset;
|
|
int i;
|
|
|
|
*payload = NULL;
|
|
*payload_len = 0;
|
|
|
|
for (i = 0; i < len; i += sizeof(long)) {
|
|
long long start_magic;
|
|
char format_version;
|
|
char valid_flag;
|
|
short total_len;
|
|
long long end_magic;
|
|
char valid_copy;
|
|
|
|
int restlen;
|
|
int found_offset;
|
|
|
|
offset = i;
|
|
DATA_GET(buf, offset, start_magic);
|
|
if (start_magic != START_MAGIC) {
|
|
if (start_magic != 0)
|
|
dirty = true;
|
|
continue;
|
|
}
|
|
|
|
restlen = len - i;
|
|
if (restlen < START_OVERHEAD) {
|
|
MARS_WRN("magic found at offset %d, but restlen %d is too small\n", i, restlen);
|
|
return -EBADMSG;
|
|
}
|
|
|
|
DATA_GET(buf, offset, format_version);
|
|
if (format_version != FORMAT_VERSION) {
|
|
MARS_WRN("found unknown data format %d at offset %d\n", (int)format_version, i);
|
|
continue;
|
|
}
|
|
DATA_GET(buf, offset, valid_flag);
|
|
if (!valid_flag) {
|
|
MARS_WRN("data at offset %d is marked invalid (was there a short write?)\n", i);
|
|
continue;
|
|
}
|
|
DATA_GET(buf, offset, total_len);
|
|
if (total_len > restlen) {
|
|
return -EAGAIN;
|
|
}
|
|
|
|
memset(lh, 0, sizeof(struct log_header));
|
|
|
|
DATA_GET(buf, offset, lh->l_stamp.tv_sec);
|
|
DATA_GET(buf, offset, lh->l_stamp.tv_nsec);
|
|
DATA_GET(buf, offset, lh->l_pos);
|
|
DATA_GET(buf, offset, lh->l_len);
|
|
DATA_GET(buf, offset, lh->l_extra_len);
|
|
DATA_GET(buf, offset, lh->l_code);
|
|
DATA_GET(buf, offset, lh->l_extra);
|
|
|
|
found_offset = offset;
|
|
offset += lh->l_len;
|
|
|
|
restlen = len - offset;
|
|
if (restlen < END_OVERHEAD) {
|
|
MARS_WRN("magic found at offset %d, but restlen %d is too small\n", i, restlen);
|
|
continue;
|
|
}
|
|
|
|
DATA_GET(buf, offset, end_magic);
|
|
if (end_magic != END_MAGIC) {
|
|
MARS_WRN("bad end_magic 0x%llx\n", end_magic);
|
|
continue;
|
|
}
|
|
DATA_GET(buf, offset, lh->l_crc);
|
|
DATA_GET(buf, offset, valid_copy);
|
|
if (valid_copy != 1) {
|
|
MARS_WRN("found uncompleted / invalid data at %d len = %d (valid_flag = %d)\n", i, lh->l_len, (int)valid_copy);
|
|
continue;
|
|
}
|
|
// skip spares
|
|
offset += 3 + 4;
|
|
DATA_GET(buf, offset, lh->l_written.tv_sec);
|
|
DATA_GET(buf, offset, lh->l_written.tv_nsec);
|
|
|
|
// last check
|
|
if (total_len != offset - i) {
|
|
MARS_WRN("size mismatch at offset %d: %d != %d\n", i, total_len, offset - i);
|
|
// just warn, but no consequences: better use the data, it has been checked by lots of magics
|
|
}
|
|
|
|
// Success...
|
|
*payload = buf + found_offset;
|
|
*payload_len = lh->l_len;
|
|
goto done;
|
|
}
|
|
offset = i;
|
|
|
|
done:
|
|
// don't cry when nullbytes have been skipped
|
|
if (i > 0 && dirty) {
|
|
MARS_WRN("skipped %d dirty bytes at offset %d to find valid data\n", i, offset);
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
static
|
|
void log_read_endio(struct generic_callback *cb)
|
|
{
|
|
struct log_status *logst = cb->cb_private;
|
|
|
|
CHECK_PTR(logst, err);
|
|
logst->error_code = cb->cb_error;
|
|
logst->got = true;
|
|
wake_up_interruptible(&logst->event);
|
|
return;
|
|
|
|
err:
|
|
MARS_FAT("internal pointer corruption\n");
|
|
}
|
|
|
|
|
|
int log_read(struct log_status *logst, struct log_header *lh, void **payload, int *payload_len)
|
|
{
|
|
struct mref_object *mref;
|
|
int status;
|
|
|
|
restart:
|
|
status = 0;
|
|
mref = logst->read_mref;
|
|
if (!mref || logst->do_free) {
|
|
int chunk_offset;
|
|
int chunk_rest;
|
|
|
|
if (mref) {
|
|
logst->log_pos += logst->offset;
|
|
GENERIC_INPUT_CALL(logst->input, mref_put, mref);
|
|
logst->read_mref = NULL;
|
|
logst->offset = 0;
|
|
}
|
|
|
|
mref = mars_alloc_mref(logst->brick, &logst->ref_object_layout);
|
|
if (unlikely(!mref)) {
|
|
MARS_ERR("no mref\n");
|
|
goto done;
|
|
}
|
|
mref->ref_pos = logst->log_pos;
|
|
chunk_offset = logst->log_pos & (loff_t)(logst->chunk_size - 1);
|
|
chunk_rest = logst->chunk_size - chunk_offset;
|
|
mref->ref_len = chunk_rest + logst->chunk_size * 4;
|
|
mref->ref_prio = logst->io_prio;
|
|
|
|
status = GENERIC_INPUT_CALL(logst->input, mref_get, mref);
|
|
if (unlikely(status < 0)) {
|
|
if (status != -ENODATA) {
|
|
MARS_ERR("mref_get() failed, status = %d\n", status);
|
|
}
|
|
goto done_free;
|
|
}
|
|
|
|
SETUP_CALLBACK(mref, log_read_endio, logst);
|
|
mref->ref_rw = READ;
|
|
logst->offset = 0;
|
|
logst->got = false;
|
|
logst->do_free = false;
|
|
|
|
GENERIC_INPUT_CALL(logst->input, mref_io, mref);
|
|
|
|
wait_event_interruptible_timeout(logst->event, logst->got, 60 * HZ);
|
|
status = -EIO;
|
|
if (!logst->got)
|
|
goto done_put;
|
|
status = logst->error_code;
|
|
if (status < 0)
|
|
goto done_put;
|
|
logst->read_mref = mref;
|
|
}
|
|
|
|
status = log_scan(mref->ref_data + logst->offset, mref->ref_len - logst->offset, lh, payload, payload_len);
|
|
if (unlikely(status == 0)) {
|
|
MARS_ERR("bad logfile scan\n");
|
|
status = -EINVAL;
|
|
}
|
|
if (unlikely(status < 0)) {
|
|
goto done_put;
|
|
}
|
|
|
|
// memorize success
|
|
logst->offset += status;
|
|
if (logst->offset > mref->ref_len - logst->chunk_size) {
|
|
logst->do_free = true;
|
|
}
|
|
|
|
done:
|
|
if (status == -ENODATA) {
|
|
status = 0; // indicates EOF
|
|
}
|
|
return status;
|
|
|
|
done_put:
|
|
if (mref) {
|
|
logst->log_pos += logst->offset;
|
|
GENERIC_INPUT_CALL(logst->input, mref_put, mref);
|
|
logst->read_mref = NULL;
|
|
logst->offset = 0;
|
|
}
|
|
if (status == -EAGAIN && logst->offset > 0) {
|
|
goto restart;
|
|
}
|
|
goto done;
|
|
|
|
done_free:
|
|
if (mref) {
|
|
mars_free_mref(mref);
|
|
}
|
|
logst->read_mref = NULL;
|
|
goto done;
|
|
|
|
}
|
|
EXPORT_SYMBOL_GPL(log_read);
|
|
|
|
|
|
////////////////// module init stuff /////////////////////////
|
|
|
|
int __init init_log_format(void)
|
|
{
|
|
MARS_INF("init_log_format()\n");
|
|
return 0;
|
|
}
|
|
|
|
void __exit exit_log_format(void)
|
|
{
|
|
MARS_INF("exit_log_format()\n");
|
|
}
|
|
|
|
#ifndef CONFIG_MARS_HAVE_BIGMODULE
|
|
MODULE_DESCRIPTION("MARS log_format infrastucture");
|
|
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
|
|
MODULE_LICENSE("GPL");
|
|
|
|
module_init(init_log_format);
|
|
module_exit(exit_log_format);
|
|
#endif
|