qio: new brick, replacing historic aio

This commit is contained in:
Thomas Schoebel-Theuer 2022-12-02 22:49:15 +01:00 committed by Thomas Schoebel-Theuer
parent 812cfa685e
commit 26d5f34fbd
13 changed files with 2243 additions and 68 deletions

View File

@ -57,6 +57,8 @@ mars-objs := \
mars_net.o \
mars_server.o \
mars_client.o \
mars_qio.o \
lib_qio_rw.o \
mars_aio.o \
mars_sio.o \
mars_bio.o \

View File

@ -1,6 +1,9 @@
#
# MARS configuration
#
# This version uses new features like $(shell,$command) etc.
# An old version without such features can be found in then plain old
# Kconfig file.
config MARS
tristate "block storage replication MARS"
@ -434,5 +437,47 @@ config MARS_PREFER_SIO
depends on MARS
default n
help
BAD_PERFORMANCE
Normally OFF for production systems.
Only use as alternative for testing.
config MARS_CANNOT_USE_AIO_ANYMORE
bool
# AFAICS the old aio brick cannot be used anymore, starting with
# LTS kernels >= 5.10.
# Maybe that certain Frankenstein kernels can play a different game,
# but suchalike is not my game.
# So I exclude the historic AIO right here, as best as I can at the moment.
# Maybe there is a bug at Kconfig level I did not notice yet.
# Hopefully this will continue to work with future upstream kernel releases.
# When it fails with old / Frankenstein kernel, only a warning should appear,
# which should then be ignored.
default y if $(shell,/bin/sh -c "if grep -q -s QUEUE_FLAG_STABLE_WRITES $abs_srctree/include/linux/blkdev.h; then echo y; else echo n; fi")
config MARS_PREFER_QIO
bool "prefer future qio brick instead of historic aio"
depends on MARS
depends on !MARS_CANNOT_USE_AIO_ANYMORE
depends on $(shell,/bin/sh -c "if grep -q -s QUEUE_FLAG_BIDI $abs_srctree/include/linux/blkdev.h; then echo NONE_NONE; else echo MARS; fi")
default n
help
EXPERIMENTAL
Normally OFF for kernels < 5.10.x.
Only use as alternative for testing with those
kernels where it MAY work.
At the moment, this has not yet the maturity
you might expect.
AFAICS this is not usable before 8b3238cabd50e2715b6544e724e74685209b190a
(and maybe some more upstream patches, dto for Frankenstein kernels)
config MARS_TESTING_QIO_UNSAFE_PERFORMANCE
bool "DANGEROUS - only for testing"
depends on MARS_PREFER_QIO
default n
help
EXPERIMENTAL
Please do not enable this unless you know exactly what you are doing.
Although it passes all my other tests, a primary crash will
easily lead to corrupt transaction logfiles. So you will always
need a forced primary somewhere, losing data.
NEVER FOR PRODUCTION!

416
kernel/lib_qio_rw.c Normal file
View File

@ -0,0 +1,416 @@
/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project: http://schoebel.github.io/mars/
*
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
* Copyright (C) 2011-2014 1&1 Internet AG
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/module.h>
/* Needed since 9a07000400c853c
* detected via d3d1e320d43a7ba
*/
#include <linux/sched.h>
#ifndef __sched
#include <linux/sched/xacct.h>
#endif
#include "mars.h"
#ifdef ENABLE_MARS_QIO
#include "lib_qio_rw.h"
#include <linux/backing-dev.h>
#define MARS_MAX_TRANSFER (128 * 1024)
#define MARS_MAX_RW_COUNT (MAX_RW_COUNT > MARS_MAX_TRANSFER ? \
MARS_MAX_TRANSFER :\
MAX_RW_COUNT)
#define MARS_NOT_STARTED (-EHWPOISON)
/******** KONVETIONS ****/
//#define KONVENTION_RET_LEN
/* Some code stolen from fs/ext4/file.c ext4_file_read_iter()
* and reduced / adapted to qio_rw.
* TODO: adapt this to any upstream kernel changes.
*/
static
ssize_t qio_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
if (!iov_iter_count(to))
return 0; /* skip atime */
return generic_file_read_iter(iocb, to);
}
/* Some code stolen from fs/ext4/file.c ext4_file_write_iter() => ext4_buffered_write_iter()
* and reduced / adapted to qio_rw.
* TODO: adapt this to any upstream kernel changes.
*/
static
ssize_t qio_file_write_iter(struct kiocb *iocb, struct iov_iter *from,
bool wait_for_sync)
{
ssize_t ret;
struct inode *inode = file_inode(iocb->ki_filp);
if (iocb->ki_flags & IOCB_NOWAIT)
return -EOPNOTSUPP;
inode_lock(inode);
current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
current->backing_dev_info = NULL;
inode_unlock(inode);
if (likely(ret > 0)) {
iocb->ki_pos += ret;
if (wait_for_sync)
ret = generic_write_sync(iocb, ret);
}
return ret;
}
#ifndef KONVENTION_RET_LEN /* only for elder kernels */
static
int __fake_readahead(struct file *file,
loff_t pos, int len)
{
int err = 0;
pgoff_t index;
for (index = pos / PAGE_SIZE;
index <= (pos + len - 1) / PAGE_SIZE;
index++) {
struct page *tmp_page;
/* Stolen from fs/f2fs/super.c f2fs_quota_read()
*/
repeat:
tmp_page =
read_cache_page_gfp(file->f_mapping,
index,
GFP_NOFS);
if (IS_ERR(tmp_page) && PTR_ERR(tmp_page) == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto repeat;
}
}
return err;
}
#endif
#define QIO_MAX_KVEC (MARS_MAX_RW_COUNT / PAGE_SIZE + 2)
int qio_init_kvec(struct kvec iov[],
void *buf, size_t count)
{
__u64 segment = (__u64)buf / PAGE_SIZE;
__u64 first_usable_len;
__u64 this_len;
int nr = 0;
first_usable_len = (__u64)buf - segment * PAGE_SIZE;
if (!first_usable_len)
first_usable_len = PAGE_SIZE;
this_len = first_usable_len;
if (count < first_usable_len)
this_len = count;
while (count > 0) {
iov[nr].iov_base = buf;
iov[nr].iov_len = this_len;
buf += this_len;
count -= this_len;
nr++;
this_len = count;
if (this_len > PAGE_SIZE)
this_len = PAGE_SIZE;
if (this_len > count)
this_len = count;
}
return nr;
}
/* Some code stolen from fs/read_write.c __kernel_read()
* and adapted to qio_rw.
* TODO: adapt this to any upstream kernel changes.
*/
static
ssize_t qio_read_start(struct file *file,
void *buf, size_t count,
loff_t *pos,
void *private,
bool nowait)
{
const size_t real_count = min_t(size_t, count, MARS_MAX_RW_COUNT);
struct qio_rw *qio_rw = private;
struct kiocb *kiocb = &qio_rw->kiocb;
struct iov_iter *iter = &qio_rw->from_to;
ssize_t ret;
int nr_segs;
struct kvec iov[QIO_MAX_KVEC];
if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
return -EINVAL;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
nr_segs = qio_init_kvec(iov, buf, real_count);
/*
* Also fail if ->read_iter and ->read are both wired up as that
* implies very convoluted semantics.
*/
if (unlikely(!file->f_op->read_iter || file->f_op->read))
return -EOPNOTSUPP;
qio_rw->qio_phase = QIO_RW_RD_STARTED;
qio_rw->submission_ret = MARS_NOT_STARTED;
qio_rw->completion_ret = MARS_NOT_STARTED;
iov_iter_kvec(iter, READ, iov, nr_segs, real_count);
init_sync_kiocb(kiocb, file);
kiocb->ki_pos = *pos;
kiocb->ki_flags = iocb_flags(file);
/* Some important commentary from mm/filemap.c generic_file_read_iter()
*/
/**
* generic_file_read_iter - generic filesystem read routine
* @iocb: kernel I/O control block
* @iter: destination for the data read
*
* This is the "read_iter()" routine for all filesystems
* that can use the page cache directly.
*
* The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall
* be returned when no data can be read without waiting for I/O requests
* to complete; it doesn't prevent readahead.
*
* The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O
* requests shall be made for the read or for readahead. When no data
* can be read, -EAGAIN shall be returned. When readahead would be
* triggered, a partial, possibly empty read shall be returned.
*
* Return:
* * number of bytes copied, even for partial reads
* * negative error code (or 0 if IOCB_NOIO) if nothing was read
*/
if (nowait) {
kiocb->ki_flags |= IOCB_NOWAIT;
/* AFAIKS the following is not necessary (but not tested
* for all combinations of kernels with QIO):
* kiocb->ki_flags |= IOCB_NOIO;
*/
}
/* In place of file->f_op->read_iter()
* Our qio version should not block unnecessarily.
*/
ret = qio_file_read_iter(kiocb, iter);
#ifndef KONVENTION_RET_LEN
/* only for elder kernels */
if (ret == -EAGAIN) {
int ahead_status;
ahead_status = __fake_readahead(file, *pos, real_count);
/* CHECK: can we better this? */
}
#endif
if (ret < 0) {
qio_rw->submission_ret = ret;
goto done;
}
#ifdef KONVENTION_RET_LEN
if (ret < real_count && ret > 0) {
ret = -EAGAIN;
qio_rw->submission_ret = ret;
goto done;
}
if (ret > real_count) {
ret = -EIO;
qio_rw->submission_ret = ret;
goto done;
}
#else
/* Adaptation of calling concentions */
/* Hmm, how can we deal with any short reads?
* This type of workaround might cause problems.
*/
if (!ret)
goto done;
#endif
/* Success. */
qio_rw->submission_ret = ret;
*pos += ret;
add_rchar(current, ret);
inc_syscr(current);
done:
return ret;
}
/* Some code stolen from fs/read_write.c __kernel_write()
* and adapted to qio_rw.
* TODO: adapt this to any upstream kernel changes.
*/
static
ssize_t qio_write_start(struct file *file,
void *buf, size_t count,
loff_t *pos,
void *private,
bool wait_for_sync)
{
const size_t real_count = min_t(size_t, count, MARS_MAX_RW_COUNT);
struct qio_rw *qio_rw = private;
struct kvec iov = {
.iov_base = (void *)buf,
.iov_len = real_count,
};
struct kiocb *kiocb = &qio_rw->kiocb;
struct iov_iter *iter = &qio_rw->from_to;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
/*
* Also fail if ->write_iter and ->write are both wired up as that
* implies very convoluted semantics.
*/
if (unlikely(!file->f_op->write_iter || file->f_op->write))
return -EOPNOTSUPP;
qio_rw->qio_phase = QIO_RW_WR_STARTED;
qio_rw->submission_ret = MARS_NOT_STARTED;
qio_rw->completion_ret = MARS_NOT_STARTED;
iov_iter_kvec(iter, WRITE, &iov, 1, iov.iov_len);
init_sync_kiocb(kiocb, file);
kiocb->ki_pos = *pos;
kiocb->ki_flags = iocb_flags(file);
#if 0 // only for devel testing, to DISAPPEAR
kiocb->ki_waitq = &qio_rw->wait;
kiocb->ki_flags |= IOCB_WAITQ;
#endif
#ifndef CONFIG_MARS_TESTING_QIO_UNSAFE_PERFORMANCE
/* Full kiocb->ki_flags |= IOCB_SYNC; not needed AFAIK */
/* But the following is ABSOLUTELY needed: */
kiocb->ki_flags |= IOCB_DSYNC;
#else
#warning DANGEROUS test code - this will ensure CORRUPTED DATA!
#endif
kiocb->ki_flags |= IOCB_APPEND;
/* In place of file->f_op->write_iter()
* Our qio version should not block unnecessarily.
*/
ret = qio_file_write_iter(kiocb, iter, wait_for_sync);
if (ret < 0) {
qio_rw->submission_ret = ret;
goto done;
}
#ifdef KONVENTION_RET_LEN
if (ret < real_count && ret > 0) {
ret = -EAGAIN;
qio_rw->submission_ret = ret;
goto done;
}
if (ret > real_count) {
ret = -EIO;
qio_rw->submission_ret = ret;
goto done;
}
#else
/* Adaptation of calling concentions */
if (!ret) {
ret = real_count - iov.iov_len;
}
#endif
/* Success. */
qio_rw->submission_ret = ret;
*pos = kiocb->ki_pos;
add_wchar(current, ret);
inc_syscw(current);
done:
return ret;
}
static
ssize_t qio_rw_wait(struct file *file,
bool is_write,
bool needs_write_sync,
void *private)
{
struct qio_rw *qio_rw = private;
ssize_t ret;
/* Corresponds to (not everywhere implemented) IOCB_NOWAIT
* thus we need to return the final result right now.
*/
ret = qio_rw->submission_ret;
if (needs_write_sync) {
ret = generic_write_sync(&qio_rw->kiocb, ret);
}
return ret;
}
void init_qio_rw(struct qio_rw *qio_rw)
{
/* nothing to do here */
}
void exit_qio_rw(struct qio_rw *qio_rw)
{
/* nothing to do here */
}
const struct qio_rw_operations qio_rw_operations = {
.qio_read_start = qio_read_start,
.qio_write_start = qio_write_start,
.qio_rw_wait = qio_rw_wait,
};
#endif /* ENABLE_MARS_QIO */
int __init init_lib_qio_rw(void)
{
return 0;
}
void exit_lib_qio_rw(void)
{
/* nothing to do here */
}

98
kernel/lib_qio_rw.h Normal file
View File

@ -0,0 +1,98 @@
/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project: http://schoebel.github.io/mars/
*
* Copyright (C) 2010-2022 Thomas Schoebel-Theuer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef MARS_LIB_QIO_RW_H
#define MARS_LIB_QIO_RW_H
#include "brick.h"
#include "lamport.h"
#include <linux/fs.h>
#include <linux/uio.h>
#include <linux/pagemap.h>
/* Minor abstraction to/from the kernel IO interface.
* (a) hopefully this interface will remain stable until MARS
* can go upstream. Only the implementation in lib_qio_rw.c
* should need to cope with kernel versions.
* (b) keep the door open for alternative kernel adaptors, maybe even
* more direct access to the page cache, or some future developments.
*
* Architectural differences to kernel_{read,write}():
* (1) The interface is as similar as possible, for long-term
* code maintenance.
* (2) Submission should work as non-blocking as possible ;)
* (3) *_wait() operations are supposed to block any (separate)
* kernel threads.
* Any information transfer between submission and (internal)
* completion path(s) goes via *private, which should always
* start with prefix "struct qio_rw_...", but may append further
* external items.
*/
enum qio_rw_phase {
QIO_RW_NOTSTARTED,
QIO_RW_RD_STARTED,
QIO_RW_WR_STARTED,
QIO_RW_WAITING,
QIO_RW_COMPLETED,
};
struct qio_rw {
union {
struct {
/* from general kernel infrastructure */
struct kiocb kiocb;
struct iov_iter from_to;
/* from io_uring */
/* currently unused: struct io_async_rw rw; */
/* private */
ssize_t submission_ret;
ssize_t completion_ret;
enum qio_rw_phase qio_phase;
};
};
};
struct qio_rw_operations {
ssize_t (*qio_read_start)(struct file *file,
void *buf, size_t count,
loff_t *pos,
void *private,
bool nowait);
ssize_t (*qio_write_start)(struct file *file,
void *buf, size_t count,
loff_t *pos,
void *private,
bool wait_for_sync);
ssize_t (*qio_rw_wait)(struct file *file,
bool is_write,
bool needs_write_sync,
void *private);
};
extern const struct qio_rw_operations qio_rw_operations;
extern void init_qio_rw(struct qio_rw *qio_rw);
extern void exit_qio_rw(struct qio_rw *qio_rw);
#endif

View File

@ -469,22 +469,42 @@ extern int mars_throttle_end;
*/
extern const struct generic_brick_type *_client_brick_type;
extern const struct generic_brick_type *_bio_brick_type;
extern const struct generic_brick_type *_qio_brick_type;
extern const struct generic_brick_type *_aio_brick_type;
extern const struct generic_brick_type *_sio_brick_type;
#if !defined(CONFIG_MARS_PREFER_SIO) && (defined(MARS_HAS_PREPATCH) || defined(MARS_HAS_PREPATCH_V2))
#if defined(IOCB_NOWAIT) && \
/* see dde0c2e79848298cc25621ad080d47f94dbd7cce */ \
defined(IOCB_DSYNC) && \
/* temporary CONFIG option, only for kernels ~5.1 to ~5.7, to disappear after 5.10 goes EOL */ \
defined(CONFIG_MARS_PREFER_QIO) && \
!defined(CONFIG_MARS_PREFER_SIO) && \
1
#define ENABLE_MARS_QIO
#else
#if !defined(CONFIG_MARS_PREFER_SIO) && \
(defined(MARS_HAS_PREPATCH) || \
defined(MARS_HAS_PREPATCH_V2) || \
defined(MARS_HAS_PREPATCH_V3a) || \
1)
#define ENABLE_MARS_AIO
#endif
#endif
#if defined(ENABLE_MARS_QIO)
# define any_io_brick_type qio_brick_type
#elif defined(ENABLE_MARS_AIO)
# define any_io_brick_type aio_brick_type
#else
# define any_io_brick_type sio_brick_type
#endif
#ifdef ENABLE_MARS_AIO
/* Kludge: our kernel threads will have no mm context, but need one
* for stuff like ioctx_alloc() / aio_setup_ring() etc
* which expect userspace resources.
* We fake one.
* TODO: factor out the userspace stuff from AIO such that
* this fake is no longer necessary.
* Even better: replace do_mmap() in AIO stuff by something
* more friendly to kernelspace apps.
* TODO: REMOVE THIS in favor of QIO
*/
#include <linux/mmu_context.h>

View File

@ -21,6 +21,8 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* HISTORIC - please avoid this ASAP
*/
//#define BRICK_DEBUGGING
#define MARS_DEBUGGING

View File

@ -21,6 +21,8 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* HISTORIC - please avoid this ASAP
*/
#ifndef MARS_AIO_H
#define MARS_AIO_H

1330
kernel/mars_qio.c Normal file

File diff suppressed because it is too large Load Diff

111
kernel/mars_qio.h Normal file
View File

@ -0,0 +1,111 @@
/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project: http://schoebel.github.io/mars/
*
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
* Copyright (C) 2011-2014 1&1 Internet AG
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef MARS_QIO_H
#define MARS_QIO_H
#include <linux/fs.h>
#include "lib_qio_rw.h"
#include "lib_mapfree.h"
#define QIO_IO_R_MAX_LATENCY 50000 // 50 ms
#define QIO_IO_W_MAX_LATENCY 150000 // 150 ms
extern int mars_qio_hang_timeout_s;
struct qio_anchors {
struct mutex prio_mutex;
struct list_head submitted_list;
wait_queue_head_t event;
struct task_struct *thread;
struct qio_brick *brick;
int anch_prio;
bool should_wake_now;
bool should_terminate;
bool has_terminated;
bool all_have_expired;
};
struct qio_mref_aspect {
GENERIC_ASPECT(mref);
struct list_head io_head;
struct lamport_time started_stamp;
struct lamport_time enqueue_stamp;
struct lamport_time dequeue_stamp;
struct lamport_time completion_stamp;
struct qio_anchors *anch;
struct qio_rw qio_rw;
int alloc_len;
int qio_error;
int nr_requeue;
bool do_dealloc;
bool is_write;
bool is_active;
bool use_nowait;
bool has_expired;
};
struct qio_brick {
MARS_BRICK(qio);
/* in parameters */
bool o_creat;
/* out parameters */
int error;
/* private */
struct file *file;
struct mapfree_info *mf;
struct qio_anchors thread_anch[2];
/* statistics */
struct lamport_time last_started_stamp;
struct lamport_time last_submitted_stamp;
#ifdef CONFIG_MARS_DEBUG
struct lamport_time last_dequeue_stamp;
#endif
struct lamport_time last_completion_stamp;
struct lamport_time last_hanging_stamp;
atomic_t flying_reads;
atomic_t flying_writes;
#ifdef CONFIG_MARS_DEBUG
loff_t submit_pos;
loff_t complet_pos;
int submit_len;
int complet_len;
#endif
};
struct qio_input {
MARS_INPUT(qio);
/* parameters */
/* private */
};
struct qio_output {
MARS_OUTPUT(qio);
/* parameters */
/* private */
};
MARS_TYPES(qio);
#endif

View File

@ -37,6 +37,7 @@
#define _STRATEGY
#include "mars.h"
#include "mars_bio.h"
#include "mars_qio.h"
#include "mars_aio.h"
#include "mars_sio.h"
@ -367,11 +368,13 @@ void _clean_list(struct server_brick *brick, struct list_head *start, bool was_w
}
}
#ifdef CONFIG_MARS_PREFER_SIO /* Historic, to disappear, ONLY FOR TESTING */
static
int _set_server_sio_params(struct mars_brick *_brick, void *private)
{
struct sio_brick *sio_brick = (void*)_brick;
if (_brick->type != (void*)_sio_brick_type) {
struct sio_brick *sio_brick = (void *)_brick;
if (_brick->type != (void *)_sio_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
@ -380,15 +383,27 @@ int _set_server_sio_params(struct mars_brick *_brick, void *private)
MARS_INF("name = '%s' path = '%s'\n", _brick->brick_name, _brick->brick_path);
return 1;
}
#endif
#ifdef ENABLE_MARS_QIO
static
int _set_server_qio_params(struct mars_brick *_brick, void *private)
{
struct qio_brick *qio_brick = (void *)_brick;
qio_brick->o_creat = false;
MARS_INF("name = '%s' path = '%s'\n", _brick->brick_name, _brick->brick_path);
return 1;
}
#endif
#ifdef ENABLE_MARS_AIO
static
int _set_server_aio_params(struct mars_brick *_brick, void *private)
{
struct aio_brick *aio_brick = (void*)_brick;
if (_brick->type == (void*)_sio_brick_type) {
return _set_server_sio_params(_brick, private);
}
if (_brick->type != (void*)_aio_brick_type) {
struct aio_brick *aio_brick = (void *)_brick;
if (_brick->type != (void *)_aio_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
@ -398,22 +413,33 @@ int _set_server_aio_params(struct mars_brick *_brick, void *private)
MARS_INF("name = '%s' path = '%s'\n", _brick->brick_name, _brick->brick_path);
return 1;
}
#endif
static
int _set_server_bio_params(struct mars_brick *_brick, void *private)
{
struct bio_brick *bio_brick;
if (_brick->type == (void*)_aio_brick_type) {
return _set_server_aio_params(_brick, private);
}
#ifdef CONFIG_MARS_PREFER_SIO /* Historic, to disappear, ONLY FOR TESTING */
if (_brick->type == (void*)_sio_brick_type) {
return _set_server_sio_params(_brick, private);
}
if (_brick->type != (void*)_bio_brick_type) {
#endif
#ifdef ENABLE_MARS_QIO
if (_brick->type == (void *)_qio_brick_type) {
return _set_server_qio_params(_brick, private);
}
#endif
#ifdef ENABLE_MARS_AIO
if (_brick->type == (void *)_aio_brick_type) {
return _set_server_aio_params(_brick, private);
}
#endif
if (_brick->type != (void *)_bio_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
bio_brick = (void*)_brick;
bio_brick = (void *)_brick;
bio_brick->ra_pages = 0;
bio_brick->do_sync = true;
bio_brick->do_unplug = true;
@ -619,6 +645,9 @@ int handler_thread(void *data)
status = -EINVAL;
CHECK_PTR(path, err);
CHECK_PTR_NULL(_bio_brick_type, err);
#ifdef ENABLE_MARS_QIO
CHECK_PTR_NULL(_qio_brick_type, err);
#endif
prev = make_brick_all(
handler_global,
@ -626,8 +655,8 @@ int handler_thread(void *data)
_set_server_bio_params,
NULL,
path,
(const struct generic_brick_type*)_bio_brick_type,
(const struct generic_brick_type*[]){},
(const struct generic_brick_type *)_bio_brick_type,
(const struct generic_brick_type *[]){},
2, // start always
path,
(const char *[]){},

View File

@ -486,7 +486,7 @@ void sio_ref_io(struct sio_output *output, struct mref_object *mref)
list_add_tail(&mref_a->io_head, &tinfo->mref_list);
traced_unlock(&tinfo->lock, flags);
wake_up_interruptible(&tinfo->event);
brick_wake_smp(&tinfo->event);
}
static int sio_thread(void *data)
@ -502,7 +502,7 @@ static int sio_thread(void *data)
struct sio_mref_aspect *mref_a;
unsigned long flags;
wait_event_interruptible_timeout(
brick_wait_smp(
tinfo->event,
!list_empty(&tinfo->mref_list) || brick_thread_should_stop(),
HZ);

View File

@ -65,6 +65,7 @@
#include "../mars_copy.h"
#include "../mars_bio.h"
#include "../mars_sio.h"
#include "../mars_qio.h"
#include "../mars_aio.h"
#include "../mars_trans_logger.h"
#include "../mars_if.h"
@ -1020,8 +1021,9 @@ int mars_mem_gb = 16;
static
int _set_trans_params(struct mars_brick *_brick, void *private)
{
struct trans_logger_brick *trans_brick = (void*)_brick;
if (_brick->type != (void*)&trans_logger_brick_type) {
struct trans_logger_brick *trans_brick = (void *)_brick;
if (_brick->type != (void *)&trans_logger_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
@ -1059,8 +1061,9 @@ struct client_cookie {
static
int _set_client_params(struct mars_brick *_brick, void *private)
{
struct client_brick *client_brick = (void*)_brick;
struct client_brick *client_brick = (void *)_brick;
struct client_cookie *clc = private;
client_brick->limit_mode = clc ? clc->limit_mode : false;
client_brick->killme = true;
MARS_INF("name = '%s' path = '%s'\n", _brick->brick_name, _brick->brick_path);
@ -1070,11 +1073,12 @@ int _set_client_params(struct mars_brick *_brick, void *private)
static
int _set_sio_params(struct mars_brick *_brick, void *private)
{
struct sio_brick *sio_brick = (void*)_brick;
if (_brick->type == (void*)&client_brick_type) {
struct sio_brick *sio_brick = (void *)_brick;
if (_brick->type == (void *)&client_brick_type) {
return _set_client_params(_brick, private);
}
if (_brick->type != (void*)&sio_brick_type) {
if (_brick->type != (void *)&sio_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
@ -1085,18 +1089,25 @@ int _set_sio_params(struct mars_brick *_brick, void *private)
return 1;
}
#ifdef ENABLE_MARS_AIO
static
int _set_aio_params(struct mars_brick *_brick, void *private)
{
struct aio_brick *aio_brick = (void*)_brick;
struct aio_brick *aio_brick = (void *)_brick;
struct client_cookie *clc = private;
if (_brick->type == (void*)&client_brick_type) {
if (_brick->type == (void *)&client_brick_type) {
return _set_client_params(_brick, private);
}
if (_brick->type == (void*)&sio_brick_type) {
#ifdef ENABLE_MARS_QIO
if (_brick->type == (void *)&qio_brick_type) {
return _set_qio_params(_brick, private);
}
#endif
if (_brick->type == (void *)&sio_brick_type) {
return _set_sio_params(_brick, private);
}
if (_brick->type != (void*)&aio_brick_type) {
if (_brick->type != (void *)&aio_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
@ -1107,25 +1118,68 @@ int _set_aio_params(struct mars_brick *_brick, void *private)
MARS_INF("name = '%s' path = '%s'\n", _brick->brick_name, _brick->brick_path);
return 1;
}
#else
#define _set_aio_params _set_sio_params
#endif
#ifdef ENABLE_MARS_QIO
static
int _set_qio_params(struct mars_brick *_brick, void *private)
{
struct qio_brick *qio_brick = (void *)_brick;
struct client_cookie *clc = private;
if (_brick->type == (void *)&client_brick_type) {
return _set_client_params(_brick, private);
}
#ifdef ENABLE_MARS_AIO
if (_brick->type == (void *)&aio_brick_type) {
return _set_aio_params(_brick, private);
}
#endif
if (_brick->type == (void *)&sio_brick_type) {
return _set_sio_params(_brick, private);
}
if (_brick->type != (void *)&qio_brick_type) {
MARS_ERR("bad qio brick type\n");
return -EINVAL;
}
qio_brick->o_creat = clc && clc->create_mode;
qio_brick->killme = true;
MARS_INF("name = '%s' path = '%s'\n",
_brick->brick_name, _brick->brick_path);
return 1;
}
#else
#define _set_qio_params _set_aio_params
#endif
static
int _set_bio_params(struct mars_brick *_brick, void *private)
{
struct bio_brick *bio_brick;
if (_brick->type == (void*)&client_brick_type) {
if (_brick->type == (void *)&client_brick_type) {
return _set_client_params(_brick, private);
}
if (_brick->type == (void*)&aio_brick_type) {
#ifdef ENABLE_MARS_QIO
if (_brick->type == (void *)&qio_brick_type) {
return _set_qio_params(_brick, private);
}
#endif
#ifdef ENABLE_MARS_AIO
if (_brick->type == (void *)&aio_brick_type) {
return _set_aio_params(_brick, private);
}
if (_brick->type == (void*)&sio_brick_type) {
#endif
if (_brick->type == (void *)&sio_brick_type) {
return _set_sio_params(_brick, private);
}
if (_brick->type != (void*)&bio_brick_type) {
if (_brick->type != (void *)&bio_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
bio_brick = (void*)_brick;
bio_brick = (void *)_brick;
bio_brick->ra_pages = BIO_READAHEAD;
bio_brick->do_sync = BIO_SYNC;
bio_brick->do_unplug = BIO_UNPLUG;
@ -1137,9 +1191,10 @@ int _set_bio_params(struct mars_brick *_brick, void *private)
static
int _set_if_params(struct mars_brick *_brick, void *private)
{
struct if_brick *if_brick = (void*)_brick;
struct if_brick *if_brick = (void *)_brick;
struct mars_rotate *rot = private;
if (_brick->type != (void*)&if_brick_type) {
if (_brick->type != (void *)&if_brick_type) {
MARS_ERR("bad brick type\n");
return -EINVAL;
}
@ -1179,11 +1234,11 @@ struct copy_cookie {
static
int _set_copy_params(struct mars_brick *_brick, void *private)
{
struct copy_brick *copy_brick = (void*)_brick;
struct copy_brick *copy_brick = (void *)_brick;
struct copy_cookie *cc = private;
int status = 1;
if (_brick->type != (void*)&copy_brick_type) {
if (_brick->type != (void *)&copy_brick_type) {
MARS_ERR("bad brick type\n");
status = -EINVAL;
goto done;
@ -2104,8 +2159,8 @@ int __make_copy(struct mars_dent *belongs,
_set_bio_params,
&clc[i],
NULL,
(const struct generic_brick_type*)&bio_brick_type,
(const struct generic_brick_type*[]){},
(const struct generic_brick_type *)&bio_brick_type,
(const struct generic_brick_type *[]){},
switch_copy || (copy && !copy->power.led_off) ? 2 : -1,
cc.fullpath[i],
(const char *[]){},
@ -4637,11 +4692,11 @@ int make_log_init(struct mars_dent *dent)
aio_brick =
make_brick_all(mars_global,
aio_dent,
_set_aio_params,
_set_qio_params,
NULL,
aio_path,
(const struct generic_brick_type*)&aio_brick_type,
(const struct generic_brick_type*[]){},
(const struct generic_brick_type *)&any_io_brick_type,
(const struct generic_brick_type *[]){},
switch_on ||
(rot->trans_brick &&
!rot->trans_brick->power.led_off) ? 2 : -1,
@ -4664,7 +4719,8 @@ int make_log_init(struct mars_dent *dent)
output = aio_brick->outputs[0];
status = output->ops->mars_get_info(output, &rot->aio_info);
if (status < 0) {
MARS_ERR("cannot get info on '%s'\n", aio_path);
MARS_ERR("cannot get info on '%s', status=%d\n",
aio_path, status);
goto done;
}
MARS_DBG("logfile '%s' size = %lld\n", aio_path, rot->aio_info.current_size);
@ -5406,11 +5462,11 @@ void _rotate_trans(struct mars_rotate *rot)
rot->next_relevant_brick =
make_brick_all(mars_global,
rot->next_relevant_log,
_set_aio_params,
_set_qio_params,
NULL,
rot->next_relevant_log->d_path,
(const struct generic_brick_type*)&aio_brick_type,
(const struct generic_brick_type*[]){},
(const struct generic_brick_type *)&any_io_brick_type,
(const struct generic_brick_type *[]){},
2, // create + activate
rot->next_relevant_log->d_path,
(const char *[]){},
@ -5554,11 +5610,11 @@ int _start_trans(struct mars_rotate *rot)
rot->relevant_brick =
make_brick_all(mars_global,
rot->relevant_log,
_set_aio_params,
_set_qio_params,
NULL,
rot->relevant_log->d_path,
(const struct generic_brick_type*)&aio_brick_type,
(const struct generic_brick_type*[]){},
(const struct generic_brick_type *)&any_io_brick_type,
(const struct generic_brick_type *[]){},
2, // start always
rot->relevant_log->d_path,
(const char *[]){},
@ -6180,10 +6236,18 @@ int make_bio(struct mars_dent *dent)
__show_actual(rot->parent_path,
"disk-error",
((struct bio_brick *)brick)->error);
#ifdef ENABLE_MARS_QIO
else if (brick->type == (void *)&qio_brick_type)
__show_actual(rot->parent_path,
"disk-error",
((struct qio_brick *)brick)->error);
#endif
#ifdef ENABLE_MARS_AIO
else if (brick->type == (void *)&aio_brick_type && brick->outputs[0])
__show_actual(rot->parent_path,
"disk-error",
((struct aio_brick *)brick)->outputs[0]->error);
#endif
else if (brick->type == (void *)&sio_brick_type && brick->outputs[0])
__show_actual(rot->parent_path,
"disk-error",
@ -7777,7 +7841,12 @@ static int _main_thread(void *data)
static const struct mars_brick_type *type_list[] = {
(void *)&copy_brick_type,
(void *)&client_brick_type,
#ifdef ENABLE_MARS_QIO
(void *)&qio_brick_type,
#endif
#ifdef ENABLE_MARS_AIO
(void *)&aio_brick_type,
#endif
(void *)&sio_brick_type,
(void *)&bio_brick_type,
NULL
@ -8219,7 +8288,10 @@ static int __init init_main(void)
#endif
DO_INIT(mars_net);
DO_INIT(mars_client);
DO_INIT(mars_qio);
#ifdef ENABLE_MARS_AIO
DO_INIT(mars_aio);
#endif
DO_INIT(mars_sio);
DO_INIT(mars_bio);
DO_INIT(mars_server);
@ -8283,7 +8355,9 @@ MODULE_INFO(prepatch, "has_prepatch_v1");
#else
MODULE_INFO(prepatch, "no_prepatch");
#endif
#ifdef ENABLE_MARS_AIO
#if defined(ENABLE_MARS_QIO)
MODULE_INFO(io_driver, "qio");
#elif defined(ENABLE_MARS_AIO)
MODULE_INFO(io_driver, "aio");
#else
MODULE_INFO(io_driver, "sio");
@ -8297,7 +8371,9 @@ MODULE_INFO(io_driver, "sio");
* sysadmin informations. Sysadmins should not react
* surprised, as far as possible.
*/
#if defined(CONFIG_KASAN)
#if defined(CONFIG_MARS_TESTING_QIO_UNSAFE_PERFORMANCE)
#define KERN_DEBUG_INFO "DAMGEROUS_not_for_production_leads_to_CORRUPTED_DATA"
#elif defined(CONFIG_KASAN)
#define KERN_DEBUG_INFO "CONFIG_KASAN"
#elif defined(CONFIG_DEBUG_PAGEALLOC)
#define KERN_DEBUG_INFO "CONFIG_DEBUG_PAGEALLOC"

View File

@ -50,8 +50,6 @@
#include <linux/kthread.h>
#include <linux/statfs.h>
#define SKIP_BIO false
#include "../brick_wait.h"
#include <linux/version.h>
@ -3292,6 +3290,8 @@ const struct generic_brick_type *_client_brick_type = NULL;
EXPORT_SYMBOL_GPL(_client_brick_type);
const struct generic_brick_type *_bio_brick_type = NULL;
EXPORT_SYMBOL_GPL(_bio_brick_type);
const struct generic_brick_type *_qio_brick_type = NULL;
EXPORT_SYMBOL_GPL(_qio_brick_type);
const struct generic_brick_type *_aio_brick_type = NULL;
EXPORT_SYMBOL_GPL(_aio_brick_type);
const struct generic_brick_type *_sio_brick_type = NULL;
@ -3413,7 +3413,10 @@ struct mars_brick *make_brick_all(
// some generic brick replacements (better performance / network functionality)
brick = NULL;
if ((new_brick_type == _bio_brick_type || new_brick_type == _aio_brick_type)
if ((new_brick_type == _bio_brick_type ||
new_brick_type == _qio_brick_type ||
new_brick_type == _aio_brick_type ||
false)
&& _client_brick_type != NULL) {
char *remote = strchr(new_name, '@');
if (remote) {
@ -3427,20 +3430,61 @@ struct mars_brick *make_brick_all(
}
}
}
if (!brick && new_brick_type == _bio_brick_type && _aio_brick_type) {
/* Important: never use bio on a non-blockdev.
* Omly logfiles are subject of [qas]io.
* Qio is the new default, when present.
*/
if (!brick) {
struct kstat test = {};
int status = mars_stat(new_path, &test, false);
if (SKIP_BIO || status < 0 || !S_ISBLK(test.mode)) {
new_brick_type = _aio_brick_type;
MARS_DBG("substitute bio by aio\n");
int test_status = mars_stat(new_path, &test, false);
bool subst_file =
(test_status < 0 || !S_ISBLK(test.mode));
if (test_status >= 0) {
if (S_ISREG(test.mode))
subst_file = true;
if (S_ISLNK(test.mode))
subst_file = false;
} else if (!strstr(new_path, "/log-")) {
/* We cannot stat, thus we need to assume that
* only log-* is a regular file where qio
* (or the historic aio / sio) can work on.
*/
subst_file = false;
}
#if defined(ENABLE_MARS_AIO)
/* Old variant, to disappear.
*/
if (subst_file &&
_aio_brick_type &&
new_brick_type != _aio_brick_type) {
MARS_DBG("substitute %s by aio\n",
new_brick_type->type_name);
new_brick_type = _aio_brick_type;
}
}
#ifndef ENABLE_MARS_AIO
if (!brick && new_brick_type == _aio_brick_type && _sio_brick_type) {
new_brick_type = _sio_brick_type;
MARS_DBG("substitute aio by sio\n");
}
#endif
#ifdef ENABLE_MARS_QIO
if (subst_file &&
_qio_brick_type &&
new_brick_type != _qio_brick_type) {
MARS_DBG("substitute %s by qio\n",
new_brick_type->type_name);
new_brick_type = _qio_brick_type;
}
#endif
#ifdef CONFIG_MARS_PREFER_SIO /* Historic, to disappear, ONLY FOR TESTING */
if (subst_file &&
_sio_brick_type &&
new_brick_type != _sio_brick_type) {
MARS_DBG("substitute any (%s) by sio for TESTING\n",
new_brick_type->type_name);
new_brick_type = _sio_brick_type;
}
if (subst_file && new_brick_type != _sio_brick_type) {
MARS_ERR("CONFIG_MARS_PREFER_SIO: cannot substitute anything by sio\n");
}
#endif /* CONFIG_MARS_PREFER_SIO, ONLY FOR TESTING */
}
// create it...
if (!brick)