mars/kernel/mars_buf.c

1151 lines
28 KiB
C
Raw Normal View History

2014-11-21 10:51:34 +00:00
/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project: http://schoebel.github.io/mars/
*
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
* Copyright (C) 2011-2014 1&1 Internet AG
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
2010-07-07 14:09:16 +00:00
2010-07-30 11:50:20 +00:00
// Buf brick
2010-07-07 14:09:16 +00:00
/* FIXME: this code has been unused for a long time, it is unlikly
* to work at all.
*/
2010-07-30 05:46:22 +00:00
//#define BRICK_DEBUGGING
2010-07-23 11:55:18 +00:00
//#define MARS_DEBUGGING
//#define IO_DEBUGGING
//#define STAT_DEBUGGING
2010-07-23 11:55:18 +00:00
2010-07-07 14:09:16 +00:00
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/blkdev.h>
2010-07-23 11:55:18 +00:00
#include <linux/delay.h>
2010-07-07 14:09:16 +00:00
#include "mars.h"
2010-08-09 16:57:56 +00:00
//#define USE_VMALLOC
2010-12-10 17:40:20 +00:00
//#define FAKE_IO // only for testing
//#define FAKE_READS // only for testing
//#define FAKE_WRITES // only for testing
//#define OPTIMIZE_FULL_WRITES // does not work currently!
2010-08-09 16:57:56 +00:00
2010-07-07 14:09:16 +00:00
///////////////////////// own type definitions ////////////////////////
#include "mars_buf.h"
2010-08-07 07:59:34 +00:00
#define PRE_ALLOC 8
2010-07-07 14:09:16 +00:00
///////////////////////// own helper functions ////////////////////////
2010-12-10 17:40:20 +00:00
static inline
2021-03-01 11:08:04 +00:00
unsigned int buf_hash_fn(loff_t base_index)
2010-07-07 14:09:16 +00:00
{
2010-08-07 15:02:16 +00:00
// simple and stupid
2021-03-01 11:08:04 +00:00
unsigned long long tmp;
2010-08-07 15:02:16 +00:00
tmp = base_index ^ (base_index / MARS_BUF_HASH_MAX);
2010-12-10 17:40:20 +00:00
//tmp ^= tmp / (MARS_BUF_HASH_MAX * MARS_BUF_HASH_MAX);
return ((unsigned)tmp) % MARS_BUF_HASH_MAX;
2010-07-07 14:09:16 +00:00
}
2010-12-10 17:40:20 +00:00
static
struct buf_head *_hash_find_insert(struct buf_brick *brick, loff_t base_index, struct buf_head *new)
2010-07-07 14:09:16 +00:00
{
2010-08-06 16:14:32 +00:00
2021-03-01 11:08:04 +00:00
unsigned int hash = buf_hash_fn(base_index);
2010-08-09 16:57:56 +00:00
spinlock_t *lock = &brick->cache_anchors[hash].hash_lock;
struct list_head *start = &brick->cache_anchors[hash].hash_anchor;
2010-07-07 14:09:16 +00:00
struct list_head *tmp;
2010-08-07 15:02:16 +00:00
int count = 0;
2010-08-09 16:57:56 +00:00
unsigned long flags;
2010-08-07 15:02:16 +00:00
2010-08-09 16:57:56 +00:00
traced_lock(lock, flags);
for (tmp = start->next; tmp != start; tmp = tmp->next) {
2010-12-10 17:40:20 +00:00
struct buf_head *res;
#if 1
if (!tmp) {
MARS_ERR("tmp is NULL! brick = %p base_index = %lld hash = %d new = %p\n", brick, base_index, hash, new);
//dump_stack();
traced_unlock(lock, flags);
return NULL;
}
#endif
2010-08-07 15:02:16 +00:00
#if 1
{
static int max = 0;
if (++count > max) {
max = count;
if (!(max % 10)) {
2010-08-26 17:12:30 +00:00
MARS_INF("hash maxlen=%d hash=%d base_index=%llu\n", max, hash, base_index);
2010-08-07 15:02:16 +00:00
}
}
}
#endif
2010-07-07 14:09:16 +00:00
res = container_of(tmp, struct buf_head, bf_hash_head);
2010-08-23 15:52:42 +00:00
if (res->bf_base_index == base_index) { // found
2010-12-10 17:40:20 +00:00
/* This must be paired with _bf_put()
*/
atomic_inc(&res->bf_hash_count);
2010-08-09 16:57:56 +00:00
traced_unlock(lock, flags);
return res;
}
2010-07-07 14:09:16 +00:00
}
2010-08-09 16:57:56 +00:00
if (new) {
2010-12-10 17:40:20 +00:00
_CHECK_ATOMIC(&new->bf_hash_count, !=, 0);
atomic_inc(&new->bf_hash_count);
2010-08-09 16:57:56 +00:00
atomic_inc(&brick->hashed_count);
CHECK_HEAD_EMPTY(&new->bf_hash_head);
list_add(&new->bf_hash_head, start);
}
traced_unlock(lock, flags);
return NULL;
2010-07-07 14:09:16 +00:00
}
2010-12-10 17:40:20 +00:00
/* Try to remove bf from the hash.
* When bf is in use, do nothing.
2010-07-07 14:09:16 +00:00
*/
2010-12-10 17:40:20 +00:00
static inline
bool _remove_hash(struct buf_brick *brick, struct buf_head *bf)
2010-08-06 11:29:06 +00:00
{
2021-03-01 11:08:04 +00:00
unsigned int hash;
2010-08-09 16:57:56 +00:00
spinlock_t *lock;
2010-08-23 05:06:06 +00:00
unsigned long flags;
2010-12-10 17:40:20 +00:00
bool success = false;
2010-08-09 16:57:56 +00:00
hash = buf_hash_fn(bf->bf_base_index);
lock = &brick->cache_anchors[hash].hash_lock;
2010-08-23 05:06:06 +00:00
traced_lock(lock, flags);
2010-08-09 16:57:56 +00:00
2010-12-10 17:40:20 +00:00
if (likely(!atomic_read(&bf->bf_hash_count) && !atomic_read(&bf->bf_mref_count) && !atomic_read(&bf->bf_io_count))) {
success = true;
if (likely(!list_empty(&bf->bf_hash_head))) {
list_del_init(&bf->bf_hash_head);
atomic_dec(&brick->hashed_count);
}
2010-08-09 16:57:56 +00:00
}
2010-08-23 05:06:06 +00:00
traced_unlock(lock, flags);
2010-12-10 17:40:20 +00:00
return success;
2010-08-23 05:06:06 +00:00
}
2010-12-10 17:40:20 +00:00
static inline
void _add_bf_list(struct buf_brick *brick, struct buf_head *bf, int nr, bool at_end)
2010-08-23 05:06:06 +00:00
{
2010-12-10 17:40:20 +00:00
unsigned long flags;
2010-08-23 05:06:06 +00:00
2010-12-10 17:40:20 +00:00
#if 1
if (nr < 0 || nr >= LIST_MAX)
MARS_FAT("bad nr %d\n", nr);
#endif
2010-08-23 05:06:06 +00:00
2010-12-10 17:40:20 +00:00
traced_lock(&brick->brick_lock, flags);
2010-08-23 05:06:06 +00:00
2010-12-10 17:40:20 +00:00
atomic_inc(&brick->list_count[nr]);
if (!list_empty(&bf->bf_list_head)) {
atomic_dec(&brick->list_count[bf->bf_member]);
list_del(&bf->bf_list_head);
}
if (at_end) {
list_add_tail(&bf->bf_list_head, &brick->list_anchor[nr]);
} else {
list_add(&bf->bf_list_head, &brick->list_anchor[nr]);
}
bf->bf_member = nr;
bf->bf_jiffies = jiffies;
2010-08-23 05:06:06 +00:00
2010-12-10 17:40:20 +00:00
traced_unlock(&brick->brick_lock, flags);
}
2010-08-09 16:57:56 +00:00
2010-12-10 17:40:20 +00:00
static inline
struct buf_head *_fetch_bf_list(struct buf_brick *brick, int nr, unsigned long age)
{
struct buf_head *bf = NULL;
unsigned long flags;
2010-08-23 15:52:42 +00:00
2010-12-10 17:40:20 +00:00
#if 1
if (nr < 0 || nr >= LIST_MAX)
MARS_FAT("bad nr %d\n", nr);
#endif
2010-08-09 16:57:56 +00:00
2010-12-10 17:40:20 +00:00
traced_lock(&brick->brick_lock, flags);
2010-08-09 16:57:56 +00:00
2010-12-10 17:40:20 +00:00
if (!list_empty(&brick->list_anchor[nr])) {
bf = container_of(brick->list_anchor[nr].prev, struct buf_head, bf_list_head);
#if 1
if (age != 0 && jiffies - bf->bf_jiffies < age) {
traced_unlock(&brick->brick_lock, flags);
return NULL;
}
#endif
atomic_dec(&brick->list_count[nr]);
list_del_init(&bf->bf_list_head);
2010-08-23 15:52:42 +00:00
}
2010-08-06 11:29:06 +00:00
2010-12-10 17:40:20 +00:00
traced_unlock(&brick->brick_lock, flags);
2010-08-06 11:29:06 +00:00
2010-12-10 17:40:20 +00:00
return bf;
2010-07-07 14:09:16 +00:00
}
2010-12-10 17:40:20 +00:00
static inline
void _remove_bf_list(struct buf_brick *brick, struct buf_head *bf)
2010-07-07 14:09:16 +00:00
{
2010-12-10 17:40:20 +00:00
unsigned long flags;
#if 1
if (bf->bf_member < 0 || bf->bf_member >= LIST_MAX)
MARS_FAT("bad nr %d\n", bf->bf_member);
#endif
traced_lock(&brick->brick_lock, flags);
if (!list_empty(&bf->bf_list_head)) {
list_del_init(&bf->bf_list_head);
atomic_dec(&brick->list_count[bf->bf_member]);
}
traced_unlock(&brick->brick_lock, flags);
2010-07-07 14:09:16 +00:00
}
2010-12-10 17:40:20 +00:00
static inline
struct buf_head *_alloc_bf(struct buf_brick *brick)
2010-08-06 11:29:06 +00:00
{
2011-08-12 11:09:48 +00:00
struct buf_head *bf = brick_zmem_alloc(sizeof(struct buf_head));
2010-12-10 17:40:20 +00:00
if (unlikely(!bf))
2010-08-06 11:29:06 +00:00
goto done;
2010-08-09 16:57:56 +00:00
#ifdef USE_VMALLOC
bf->bf_data = vmalloc(brick->backing_size);
#else
2010-08-06 11:29:06 +00:00
bf->bf_data = (void*)__get_free_pages(GFP_MARS, brick->backing_order);
2010-08-09 16:57:56 +00:00
#endif
2010-08-06 11:29:06 +00:00
if (unlikely(!bf->bf_data)) {
2011-08-12 11:09:48 +00:00
brick_mem_free(bf);
2010-12-10 17:40:20 +00:00
goto done;
2010-08-06 11:29:06 +00:00
}
2010-08-09 16:57:56 +00:00
atomic_inc(&brick->alloc_count);
2010-08-07 15:02:16 +00:00
2010-08-06 11:29:06 +00:00
done:
return bf;
}
2010-12-10 17:40:20 +00:00
static inline
void _dealloc_bf(struct buf_brick *brick, struct buf_head *bf)
{
MARS_INF("really freeing bf=%p\n", bf);
_CHECK_ATOMIC(&bf->bf_hash_count, !=, 0);
_CHECK_ATOMIC(&bf->bf_mref_count, !=, 0);
_CHECK_ATOMIC(&bf->bf_io_count, !=, 0);
CHECK_HEAD_EMPTY(&bf->bf_list_head);
CHECK_HEAD_EMPTY(&bf->bf_hash_head);
CHECK_HEAD_EMPTY(&bf->bf_io_pending_anchor);
CHECK_HEAD_EMPTY(&bf->bf_postpone_anchor);
#ifdef USE_VMALLOC
vfree(bf->bf_data);
#else
free_pages((unsigned long)bf->bf_data, brick->backing_order);
#endif
2011-08-12 11:09:48 +00:00
brick_mem_free(bf);
2010-12-10 17:40:20 +00:00
atomic_dec(&brick->alloc_count);
}
static inline
void _prune_cache(struct buf_brick *brick, int max_count)
{
struct buf_head *bf;
int i;
for (i = 0; i < LIST_MAX; i++) {
while (atomic_read(&brick->alloc_count) > max_count) {
bf = _fetch_bf_list(brick, i, 0);
if (bf) {
if (i > 0) {
bool status;
status = _remove_hash(brick, bf);
if (unlikely(!status)) {
MARS_INF("bf %p is in use\n", bf);
continue;
}
}
_dealloc_bf(brick, bf);
}
}
}
}
static inline
struct buf_head *_fetch_bf(struct buf_brick *brick)
{
struct buf_head *bf = NULL;
while (!bf) {
static const int ages[LIST_MAX] = {
[LIST_FORGET] = HZ,
};
int i;
for (i = 0; i < LIST_MAX; i++) {
bf = _fetch_bf_list(brick, i, ages[i]);
if (bf)
goto found;
}
bf = _alloc_bf(brick);
continue;
found:
if (i > 0) {
bool status = _remove_hash(brick, bf);
if (unlikely(!status)) {
MARS_INF("bf %p is in use\n", bf);
bf = NULL; // forget it => _bf_put() must fix it
continue;
}
}
}
return bf;
}
static
void __pre_alloc_bf(struct buf_brick *brick, int max)
2010-08-06 11:29:06 +00:00
{
while (max-- > 0) {
struct buf_head *bf = _alloc_bf(brick);
if (unlikely(!bf))
break;
2010-12-10 17:40:20 +00:00
INIT_LIST_HEAD(&bf->bf_list_head);
_add_bf_list(brick, bf, LIST_FREE, true);
}
}
static inline
void _bf_put(struct buf_head *bf)
{
int list;
bool at_end;
if (!atomic_dec_and_test(&bf->bf_hash_count))
return;
#if 1
MARS_DBG("ZERO_COUNT %p %d\n", bf, at_end);
if (unlikely(!list_empty(&bf->bf_io_pending_anchor))) {
MARS_ERR("bf_io_pending_anchor is not empty!\n");
}
if (unlikely(!list_empty(&bf->bf_postpone_anchor))) {
MARS_ERR("bf_postpone_anchor is not empty!\n");
}
#endif
2010-08-06 11:29:06 +00:00
2010-12-10 17:40:20 +00:00
list = LIST_LRU;
2010-12-15 12:13:18 +00:00
at_end = !(bf->bf_flags & MREF_UPTODATE);
2010-12-10 17:40:20 +00:00
if (bf->bf_chain_detected) {
list = LIST_FORGET;
at_end = false;
}
_add_bf_list(bf->bf_brick, bf, list, at_end);
}
2010-08-06 11:29:06 +00:00
2010-12-10 17:40:20 +00:00
/////////////////////////////////////////////////////////////////////////
2010-08-06 11:29:06 +00:00
2010-12-10 17:40:20 +00:00
/* Routines for the relation bf <-> mref
*/
static inline
2010-12-15 12:13:18 +00:00
void _mref_assign(struct buf_head *bf, struct buf_mref_aspect *mref_a)
2010-12-10 17:40:20 +00:00
{
if (mref_a->rfa_bf) {
return;
2010-08-06 11:29:06 +00:00
}
2010-12-10 17:40:20 +00:00
mref_a->rfa_bf = bf;
atomic_inc(&bf->bf_mref_count);
}
static inline
2010-12-15 12:13:18 +00:00
bool _mref_remove(struct buf_head *bf, struct buf_mref_aspect *mref_a)
2010-12-10 17:40:20 +00:00
{
2010-12-15 12:13:18 +00:00
//struct mref_object *mref;
2010-12-10 17:40:20 +00:00
bool status;
if (!mref_a->rfa_bf) {
return false;
}
mref_a->rfa_bf = NULL;
CHECK_ATOMIC(&bf->bf_mref_count, 1);
status = atomic_dec_and_test(&bf->bf_mref_count);
return status;
}
/////////////////////////////////////////////////////////////7
static inline int _get_info(struct buf_brick *brick)
{
struct buf_input *input = brick->inputs[0];
int status = GENERIC_INPUT_CALL(input, mars_get_info, &brick->base_info);
if (status >= 0)
brick->got_info = true;
return status;
2010-08-06 11:29:06 +00:00
}
2010-07-07 14:09:16 +00:00
////////////////// own brick / input / output operations //////////////////
static int buf_get_info(struct buf_output *output, struct mars_info *info)
{
struct buf_input *input = output->brick->inputs[0];
return GENERIC_INPUT_CALL(input, mars_get_info, info);
}
2010-12-15 12:13:18 +00:00
static int buf_ref_get(struct buf_output *output, struct mref_object *mref)
2010-07-07 14:09:16 +00:00
{
struct buf_brick *brick = output->brick;
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *mref_a;
2010-07-07 14:09:16 +00:00
struct buf_head *bf;
2010-08-09 16:57:56 +00:00
struct buf_head *new = NULL;
2010-07-07 14:09:16 +00:00
loff_t base_pos;
2010-07-30 05:46:22 +00:00
int base_offset;
2010-08-02 16:31:10 +00:00
int max_len;
2010-07-07 14:09:16 +00:00
int status = -EILSEQ;
2010-08-03 16:03:32 +00:00
might_sleep();
2010-12-10 17:40:20 +00:00
#if 0
if (!brick->got_info)
_get_info(brick);
#endif
2010-08-02 16:31:10 +00:00
2010-08-07 07:59:34 +00:00
#ifdef PRE_ALLOC
2010-08-09 16:57:56 +00:00
if (unlikely(atomic_read(&brick->alloc_count) < brick->max_count)) {
2010-08-06 11:29:06 +00:00
// grab all memory in one go => avoid memory fragmentation
2010-08-09 16:57:56 +00:00
__pre_alloc_bf(brick, brick->max_count + PRE_ALLOC - atomic_read(&brick->alloc_count));
2010-08-06 11:29:06 +00:00
}
2010-08-07 07:59:34 +00:00
#endif
2010-08-05 15:54:48 +00:00
/* Grab reference.
*/
_mref_get(mref);
2010-08-05 15:54:48 +00:00
2010-11-26 13:45:10 +00:00
/* shortcut in case of unbuffered IO
*/
if (mref->ref_data) {
/* Note: unbuffered IO is later indicated by rfa_bf == NULL
*/
return 0;
}
mref_a = buf_mref_get_aspect(brick, mref);
2010-08-05 15:54:48 +00:00
if (unlikely(!mref_a))
2010-08-03 16:03:32 +00:00
goto done;
2010-07-07 14:09:16 +00:00
2010-08-05 15:54:48 +00:00
base_pos = mref->ref_pos & ~(loff_t)(brick->backing_size - 1);
base_offset = (mref->ref_pos - base_pos);
2010-08-02 16:31:10 +00:00
if (unlikely(base_offset < 0 || base_offset >= brick->backing_size)) {
MARS_ERR("bad base_offset %d\n", base_offset);
}
max_len = brick->backing_size - base_offset;
2010-08-05 15:54:48 +00:00
if (mref->ref_len > max_len)
mref->ref_len = max_len;
2010-07-07 14:09:16 +00:00
2010-08-06 11:29:06 +00:00
again:
2010-12-10 17:40:20 +00:00
bf = _hash_find_insert(brick, base_pos >> (brick->backing_order + PAGE_SHIFT), new);
2010-08-05 15:54:48 +00:00
if (bf) {
2010-08-26 17:12:30 +00:00
#if 1
loff_t end_pos = bf->bf_pos + brick->backing_size;
if (mref->ref_pos < bf->bf_pos || mref->ref_pos >= end_pos) {
2010-11-26 13:45:10 +00:00
MARS_ERR("hash corruption. %lld not in (%lld ... %lld)\n", mref->ref_pos, bf->bf_pos, end_pos);
2010-08-26 17:12:30 +00:00
}
#endif
2010-12-10 17:40:20 +00:00
_remove_bf_list(brick, bf);
2010-08-10 17:39:30 +00:00
atomic_inc(&brick->hit_count);
if (unlikely(new)) {
atomic_inc(&brick->nr_collisions);
2010-12-10 17:40:20 +00:00
MARS_DBG("race detected: alias appeared in the meantime\n");
_add_bf_list(brick, new, LIST_FREE, true);
2010-08-09 16:57:56 +00:00
new = NULL;
}
} else if (new) {
2010-08-10 17:39:30 +00:00
atomic_inc(&brick->miss_count);
2010-08-09 16:57:56 +00:00
MARS_DBG("new elem added\n");
bf = new;
new = NULL;
2010-12-10 17:40:20 +00:00
bf->bf_chain_detected = false;
2010-08-05 15:54:48 +00:00
} else {
2010-07-23 11:55:18 +00:00
MARS_DBG("buf_get() hash nothing found\n");
2010-08-09 16:57:56 +00:00
2010-12-10 17:40:20 +00:00
new = _fetch_bf(brick);
if (!new)
goto done;
#if 1
// dont initialize new->bf_data
memset(((void*)new) + sizeof(void*), 0, sizeof(struct buf_head) - sizeof(void*));
#else
2010-08-09 16:57:56 +00:00
new->bf_flags = 0;
2010-12-10 17:40:20 +00:00
new->bf_error = 0;
atomic_set(&new->bf_hash_count, 0);
atomic_set(&new->bf_mfu_stat, 0);
atomic_set(&new->bf_chain_len, 0);
new->bf_chain_detected = false;
#endif
spin_lock_init(&new->bf_lock);
new->bf_brick = brick;
new->bf_pos = base_pos;
new->bf_base_index = base_pos >> (brick->backing_order + PAGE_SHIFT);
#ifdef OPTIMIZE_FULL_WRITES
/* Important optimization: treat whole buffer as uptodate
* upon full write.
2010-08-20 10:58:24 +00:00
*/
if ((mref->ref_flags & MREF_MAY_WRITE) &&
2010-12-10 17:40:20 +00:00
((!base_offset && mref->ref_len >= brick->backing_size) ||
(mref->ref_pos >= brick->base_info.current_size && brick->base_info.current_size > 0))) {
2010-12-15 12:13:18 +00:00
new->bf_flags |= MREF_UPTODATE;
2010-12-10 17:40:20 +00:00
atomic_inc(&brick->opt_count);
2010-08-20 10:58:24 +00:00
}
2010-12-10 17:40:20 +00:00
#endif
2010-08-09 16:57:56 +00:00
//INIT_LIST_HEAD(&new->bf_mref_anchor);
2010-12-10 17:40:20 +00:00
INIT_LIST_HEAD(&new->bf_list_head);
2010-08-09 16:57:56 +00:00
INIT_LIST_HEAD(&new->bf_hash_head);
INIT_LIST_HEAD(&new->bf_io_pending_anchor);
INIT_LIST_HEAD(&new->bf_postpone_anchor);
2010-12-10 17:40:20 +00:00
/* Statistics for read-ahead chain detection
*/
if (brick->optimize_chains) {
struct buf_head *prev_bf;
prev_bf = _hash_find_insert(brick, new->bf_base_index - 1, NULL);
if (prev_bf) {
int chainlen = atomic_read(&prev_bf->bf_chain_len);
atomic_set(&new->bf_chain_len, chainlen + 1);
atomic_inc(&brick->chain_count);
prev_bf->bf_chain_detected = true;
_bf_put(prev_bf);
}
}
2010-08-09 16:57:56 +00:00
/* Check for races against us...
*/
goto again;
2010-07-07 14:09:16 +00:00
}
2010-12-10 17:40:20 +00:00
_mref_assign(bf, mref_a);
2010-08-07 07:59:34 +00:00
2019-03-14 12:44:16 +00:00
MARS_DBG("bf=%p index = %lld flags = %x\n", bf, bf->bf_base_index, bf->bf_flags);
2010-07-07 14:09:16 +00:00
2010-08-05 15:54:48 +00:00
mref->ref_flags = bf->bf_flags;
mref->ref_data = bf->bf_data + base_offset;
2010-07-07 14:09:16 +00:00
_mref_check(mref);
2010-12-10 17:40:20 +00:00
CHECK_ATOMIC(&bf->bf_hash_count, 1);
CHECK_ATOMIC(&bf->bf_mref_count, 1);
2010-08-05 15:54:48 +00:00
2010-11-26 13:45:10 +00:00
status = 0;
2010-07-07 14:09:16 +00:00
2010-08-03 16:03:32 +00:00
done:
2010-07-07 14:09:16 +00:00
return status;
}
2010-12-15 12:13:18 +00:00
static void _buf_ref_put(struct buf_output *output, struct buf_mref_aspect *mref_a)
2010-07-07 14:09:16 +00:00
{
2010-12-15 12:13:18 +00:00
struct mref_object *mref = mref_a->object;
2010-07-07 14:09:16 +00:00
struct buf_head *bf;
2010-08-02 16:31:10 +00:00
2010-11-26 13:45:10 +00:00
/* shortcut in case of unbuffered IO
*/
bf = mref_a->rfa_bf;
if (!bf) {
struct buf_brick *brick = output->brick;
2021-06-29 08:52:11 +00:00
GENERIC_INPUT_CALL_VOID(brick->inputs[0], mref_put, mref);
2010-11-26 13:45:10 +00:00
return;
}
if (!_mref_put(mref))
2010-08-03 09:38:12 +00:00
return;
2010-07-07 14:09:16 +00:00
2019-03-14 12:44:16 +00:00
MARS_DBG("buf_ref_put() mref=%p mref_a=%p bf=%p flags=%x\n", mref, mref_a, bf, bf->bf_flags);
2010-12-10 17:40:20 +00:00
_mref_remove(bf, mref_a);
2010-12-15 12:13:18 +00:00
buf_free_mref(mref);
2010-12-10 17:40:20 +00:00
_bf_put(bf); // paired with _hash_find_insert()
2010-07-23 11:55:18 +00:00
}
2010-07-07 14:09:16 +00:00
2010-12-15 12:13:18 +00:00
static void buf_ref_put(struct buf_output *output, struct mref_object *mref)
2010-07-23 11:55:18 +00:00
{
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *mref_a;
mref_a = buf_mref_get_aspect(output->brick, mref);
2010-08-05 15:54:48 +00:00
if (unlikely(!mref_a)) {
2010-08-04 17:32:04 +00:00
MARS_FAT("cannot get aspect\n");
return;
}
2010-11-26 13:45:10 +00:00
_buf_ref_put(output, mref_a);
2010-08-04 17:32:04 +00:00
}
2010-11-26 13:45:10 +00:00
static void _buf_endio(struct generic_callback *cb);
2010-08-08 14:02:54 +00:00
static int _buf_make_io(struct buf_brick *brick, struct buf_head *bf, void *start_data,
loff_t start_pos, int start_len, __u32 ref_flags)
2010-07-23 11:55:18 +00:00
{
2010-08-02 16:31:10 +00:00
struct buf_input *input;
int status = EINVAL;
2010-07-30 05:46:22 +00:00
#if 1
loff_t bf_end = bf->bf_pos + brick->backing_size;
loff_t end_pos;
if (start_pos < bf->bf_pos || start_pos >= bf_end) {
MARS_ERR("bad start_pos %llu (%llu ... %llu)\n", start_pos, bf->bf_pos, bf_end);
2010-08-02 16:31:10 +00:00
goto done;
2010-07-30 05:46:22 +00:00
}
end_pos = start_pos + start_len;
if (end_pos <= bf->bf_pos || end_pos > bf_end) {
MARS_ERR("bad end_pos %llu (%llu ... %llu)\n", end_pos, bf->bf_pos, bf_end);
2010-08-02 16:31:10 +00:00
goto done;
2010-07-30 05:46:22 +00:00
}
if (!start_data) {
MARS_ERR("bad start_data\n");
2010-08-02 16:31:10 +00:00
goto done;
2010-07-30 05:46:22 +00:00
}
if (start_len <= 0) {
MARS_ERR("bad start_len %d\n", start_len);
2010-08-02 16:31:10 +00:00
goto done;
2010-07-30 05:46:22 +00:00
}
#endif
2010-11-26 13:45:10 +00:00
MARS_DBG("bf = %p %ux start = %lld len = %d flags = %x\n", bf,
ref_flags, start_pos, start_len, bf->bf_flags);
2010-12-10 17:40:20 +00:00
2010-11-26 13:45:10 +00:00
atomic_set(&bf->bf_io_count, 0);
2022-07-27 08:16:00 +00:00
status = -EINVAL;
2010-11-26 13:45:10 +00:00
input = brick->inputs[0];
2010-07-23 11:55:18 +00:00
while (start_len > 0) {
2010-12-15 12:13:18 +00:00
struct mref_object *mref;
struct buf_mref_aspect *mref_a;
2010-07-23 11:55:18 +00:00
int len;
2010-07-07 14:09:16 +00:00
2012-02-02 15:25:43 +00:00
mref = buf_alloc_mref(brick);
2010-08-05 15:54:48 +00:00
if (unlikely(!mref))
2010-08-02 16:31:10 +00:00
break;
2010-07-07 14:09:16 +00:00
mref_a = buf_mref_get_aspect(brick, mref);
2010-08-05 15:54:48 +00:00
if (unlikely(!mref_a)) {
2010-12-15 12:13:18 +00:00
buf_free_mref(mref);
2010-08-02 16:31:10 +00:00
break;
}
2010-08-07 15:02:16 +00:00
mref_a->rfa_bf = bf;
SETUP_CALLBACK(mref, _buf_endio, mref_a);
2010-08-04 17:32:04 +00:00
2010-11-26 13:45:10 +00:00
mref->ref_pos = start_pos;
mref->ref_len = start_len;
mref->ref_flags = ref_flags;
2010-11-26 13:45:10 +00:00
mref->ref_data = start_data;
2010-07-23 11:55:18 +00:00
2010-12-15 12:13:18 +00:00
status = GENERIC_INPUT_CALL(input, mref_get, mref);
2010-11-26 13:45:10 +00:00
if (status < 0) {
MARS_ERR("status = %d\n", status);
2010-11-26 13:45:10 +00:00
goto done;
2010-08-02 16:31:10 +00:00
}
2010-11-26 13:45:10 +00:00
/* Remember number of fired-off mrefs
2010-08-03 16:03:32 +00:00
*/
2010-11-26 13:45:10 +00:00
atomic_inc(&bf->bf_io_count);
2010-12-10 17:40:20 +00:00
2010-11-26 13:45:10 +00:00
len = mref->ref_len;
2010-08-20 10:58:24 +00:00
#ifndef FAKE_IO
2021-06-29 08:52:11 +00:00
GENERIC_INPUT_CALL_VOID(input, mref_io, mref);
2010-07-23 11:55:18 +00:00
#else
// fake IO for testing
2010-11-26 13:45:10 +00:00
mref_a->cb.cb_error = status;
mref_a->cb.cb_fn(&mref_a->cb);
2010-08-04 17:32:04 +00:00
#endif
2010-11-26 13:45:10 +00:00
2021-06-29 08:52:11 +00:00
GENERIC_INPUT_CALL_VOID(input, mref_put, mref);
2010-11-26 13:45:10 +00:00
start_data += len;
start_pos += len;
start_len -= len;
2010-08-02 16:31:10 +00:00
#if 1
2010-11-26 13:45:10 +00:00
if (start_len > 0)
MARS_ERR("cannot submit request in one go, rest=%d\n", start_len);
2010-08-02 16:31:10 +00:00
#endif
2010-11-26 13:45:10 +00:00
}
2010-08-02 16:31:10 +00:00
done:
return status;
2010-07-07 14:09:16 +00:00
}
2010-11-26 13:45:10 +00:00
static void _buf_endio(struct generic_callback *cb)
2010-07-23 11:55:18 +00:00
{
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *bf_mref_a = cb->cb_private;
struct mref_object *bf_mref;
2010-07-23 11:55:18 +00:00
struct buf_head *bf;
struct buf_brick *brick;
2010-11-26 13:45:10 +00:00
LIST_HEAD(tmp);
2019-03-14 12:44:16 +00:00
__u32 old_flags;
2010-11-26 13:45:10 +00:00
unsigned long flags;
2010-07-23 11:55:18 +00:00
void *start_data = NULL;
loff_t start_pos = 0;
int start_len = 0;
2010-11-26 13:45:10 +00:00
int error = cb->cb_error;
2010-08-03 16:03:32 +00:00
#if 1
int count = 0;
#endif
2010-07-23 11:55:18 +00:00
LAST_CALLBACK(cb);
2010-11-26 13:45:10 +00:00
CHECK_PTR(bf_mref_a, err);
bf_mref = bf_mref_a->object;
CHECK_PTR(bf_mref, err);
bf = bf_mref_a->rfa_bf;
CHECK_PTR(bf, err);
brick = bf->bf_brick;
CHECK_PTR(brick, err);
2010-07-23 11:55:18 +00:00
2019-03-14 12:44:16 +00:00
MARS_DBG("_buf_endio() bf_mref_a=%p bf_mref=%p bf=%p flags=%x\n", bf_mref_a, bf_mref, bf, bf->bf_flags);
2010-12-10 17:40:20 +00:00
2010-11-26 13:45:10 +00:00
if (error < 0)
bf->bf_error = error;
2010-07-23 11:55:18 +00:00
2010-11-26 13:45:10 +00:00
// wait until all IO on this bf is completed.
if (!atomic_dec_and_test(&bf->bf_io_count))
2010-07-23 11:55:18 +00:00
return;
2010-12-10 17:40:20 +00:00
MARS_DBG("_buf_endio() ZERO bf=%p\n", bf);
2010-08-03 09:38:12 +00:00
// get an extra reference, to avoid freeing bf underneath during callbacks
2010-12-10 17:40:20 +00:00
CHECK_ATOMIC(&bf->bf_hash_count, 1);
atomic_inc(&bf->bf_hash_count);
2010-08-03 09:38:12 +00:00
2010-08-07 15:02:16 +00:00
traced_lock(&bf->bf_lock, flags);
2010-07-23 11:55:18 +00:00
2010-08-02 16:31:10 +00:00
// update flags. this must be done before the callbacks.
old_flags = bf->bf_flags;
2010-12-15 12:13:18 +00:00
if (bf->bf_error >= 0 && (old_flags & MREF_READING)) {
bf->bf_flags |= MREF_UPTODATE;
2010-08-02 16:31:10 +00:00
}
2010-12-10 17:40:20 +00:00
2010-08-02 16:31:10 +00:00
// clear the flags, callbacks must not see them. may be re-enabled later.
2010-12-15 12:13:18 +00:00
bf->bf_flags &= ~(MREF_READING | MREF_WRITING);
2010-08-03 09:38:12 +00:00
/* Remember current version of pending list.
* This is necessary because later the callbacks might
2010-08-02 16:31:10 +00:00
* change it underneath.
*/
if (!list_empty(&bf->bf_io_pending_anchor)) {
2010-08-03 09:38:12 +00:00
struct list_head *next = bf->bf_io_pending_anchor.next;
list_del_init(&bf->bf_io_pending_anchor);
list_add_tail(&tmp, next);
2010-07-23 11:55:18 +00:00
}
2010-08-03 09:38:12 +00:00
/* Move pending jobs to work.
* This is in essence an automatic restart mechanism.
* do this before the callbacks, because they may start
* new IOs. If not done in the right order, this could violate
* IO ordering semantics.
2010-07-23 11:55:18 +00:00
*/
2010-08-07 15:02:16 +00:00
while (!list_empty(&bf->bf_postpone_anchor)) {
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *mref_a = container_of(bf->bf_postpone_anchor.next, struct buf_mref_aspect, rfa_pending_head);
struct mref_object *mref = mref_a->object;
2010-11-26 13:45:10 +00:00
2010-08-07 15:02:16 +00:00
if (mref_a->rfa_bf != bf) {
MARS_ERR("bad pointers %p != %p\n", mref_a->rfa_bf, bf);
2010-07-30 05:46:22 +00:00
}
2010-08-03 16:03:32 +00:00
#if 1
2010-08-06 11:29:06 +00:00
if (!(++count % 1000)) {
2010-08-03 16:03:32 +00:00
MARS_ERR("endless loop 1\n");
}
#endif
2010-08-07 15:02:16 +00:00
list_del_init(&mref_a->rfa_pending_head);
list_add_tail(&mref_a->rfa_pending_head, &bf->bf_io_pending_anchor);
2010-08-03 09:38:12 +00:00
2010-12-10 17:40:20 +00:00
MARS_DBG("postponed mref=%p\n", mref);
2010-07-23 11:55:18 +00:00
// re-enable flags
2010-12-15 12:13:18 +00:00
bf->bf_flags |= MREF_WRITING;
2010-11-26 13:45:10 +00:00
bf->bf_error = 0;
2010-07-23 11:55:18 +00:00
if (!start_len) {
2010-08-02 16:31:10 +00:00
// first time: only flush the affected area
2010-08-05 15:54:48 +00:00
start_data = mref->ref_data;
start_pos = mref->ref_pos;
start_len = mref->ref_len;
} else if (start_data != mref->ref_data ||
start_pos != mref->ref_pos ||
start_len != mref->ref_len) {
2010-08-27 13:17:04 +00:00
// another time: flush larger parts
loff_t start_diff = mref->ref_pos - start_pos;
loff_t end_diff;
if (start_diff < 0) {
start_data += start_diff;
start_pos += start_diff;
start_len -= start_diff;
}
end_diff = (mref->ref_pos + mref->ref_len) - (start_pos + start_len);
if (end_diff > 0) {
start_len += end_diff;
}
2010-07-23 11:55:18 +00:00
}
}
2010-08-07 15:02:16 +00:00
traced_unlock(&bf->bf_lock, flags);
2010-07-23 11:55:18 +00:00
2010-08-03 09:38:12 +00:00
/* Signal success by calling all callbacks.
* Thanks to the tmp list, we can do this outside the spinlock.
*/
2010-08-06 11:29:06 +00:00
count = 0;
2010-08-03 09:38:12 +00:00
while (!list_empty(&tmp)) {
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *mref_a = container_of(tmp.next, struct buf_mref_aspect, rfa_pending_head);
struct mref_object *mref = mref_a->object;
2010-08-03 09:38:12 +00:00
2010-08-07 15:02:16 +00:00
if (mref_a->rfa_bf != bf) {
MARS_ERR("bad pointers %p != %p\n", mref_a->rfa_bf, bf);
2010-08-03 09:38:12 +00:00
}
2010-08-03 16:03:32 +00:00
#if 1
2010-08-06 11:29:06 +00:00
if (!(++count % 1000)) {
2010-08-03 16:03:32 +00:00
MARS_ERR("endless loop 2\n");
}
#endif
_mref_check(mref);
2010-08-07 15:02:16 +00:00
/* It should be safe to do this without locking, because
* tmp is on the stack, so there is no concurrency.
*/
list_del_init(&mref_a->rfa_pending_head);
2010-08-03 09:38:12 +00:00
// update infos for callbacks, they may inspect it.
2010-08-05 15:54:48 +00:00
mref->ref_flags = bf->bf_flags;
CHECKED_CALLBACK(mref, bf->bf_error, err);
2010-08-03 09:38:12 +00:00
atomic_dec(&brick->nr_io_pending);
2010-08-03 09:38:12 +00:00
2010-11-26 13:45:10 +00:00
_buf_ref_put(brick->outputs[0], mref_a);
2010-08-03 09:38:12 +00:00
}
2010-07-23 11:55:18 +00:00
if (start_len) {
2010-11-26 13:45:10 +00:00
MARS_DBG("ATTENTION restart %d\n", start_len);
_buf_make_io(brick, bf, start_data, start_pos, start_len, MREF_WRITE);
2010-07-23 11:55:18 +00:00
}
2010-08-03 09:38:12 +00:00
// drop the extra reference from above
2010-12-10 17:40:20 +00:00
_bf_put(bf);
return;
2010-11-26 13:45:10 +00:00
err:
MARS_FAT("giving up.\n");
2010-07-23 11:55:18 +00:00
}
2010-12-15 12:13:18 +00:00
static void buf_ref_io(struct buf_output *output, struct mref_object *mref)
2010-07-23 11:55:18 +00:00
{
struct buf_brick *brick = output->brick;
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *mref_a;
2010-07-23 11:55:18 +00:00
struct buf_head *bf;
void *start_data = NULL;
loff_t start_pos = 0;
int start_len = 0;
2010-08-03 09:38:12 +00:00
int status = -EINVAL;
2010-08-05 15:54:48 +00:00
bool delay = false;
2010-07-30 05:46:22 +00:00
unsigned long flags;
2010-07-23 11:55:18 +00:00
2010-08-05 15:54:48 +00:00
if (unlikely(!mref)) {
MARS_FAT("internal problem: forgotten to supply mref\n");
goto fatal;
2010-07-23 11:55:18 +00:00
}
mref_a = buf_mref_get_aspect(brick, mref);
2010-08-05 15:54:48 +00:00
if (unlikely(!mref_a)) {
MARS_ERR("internal problem: mref aspect does not work\n");
goto fatal;
2010-07-23 11:55:18 +00:00
}
2010-11-26 13:45:10 +00:00
/* shortcut in case of unbuffered IO
*/
bf = mref_a->rfa_bf;
if (!bf) {
2021-06-29 08:52:11 +00:00
GENERIC_INPUT_CALL_VOID(brick->inputs[0], mref_io, mref);
2010-11-26 13:45:10 +00:00
return;
}
2010-08-05 15:54:48 +00:00
/* Grab an extra reference.
2010-11-26 13:45:10 +00:00
* This will be released later in _bf_endio() after
2010-08-05 15:54:48 +00:00
* calling the callbacks.
*/
_mref_get(mref);
2010-12-10 17:40:20 +00:00
CHECK_ATOMIC(&bf->bf_hash_count, 1);
MARS_DBG("IO mref=%p %d bf=%p flags=%x\n", mref, mref->ref_flags,
bf, bf->bf_flags);
2010-07-23 11:55:18 +00:00
if (mref->ref_flags & MREF_WRITE) {
2010-08-20 10:58:24 +00:00
loff_t end;
if (unlikely(!(mref->ref_flags & MREF_MAY_WRITE))) {
2010-11-26 13:45:10 +00:00
MARS_ERR("sorry, you have forgotten to set ref_may_write\n");
2010-08-03 09:38:12 +00:00
goto callback;
2010-08-02 16:31:10 +00:00
}
2010-08-20 10:58:24 +00:00
end = mref->ref_pos + mref->ref_len;
//FIXME: race condition :(
2010-12-10 17:40:20 +00:00
if (!brick->got_info)
_get_info(brick);
2010-08-20 10:58:24 +00:00
if (end > brick->base_info.current_size) {
brick->base_info.current_size = end;
}
2010-08-02 16:31:10 +00:00
}
2010-08-05 15:54:48 +00:00
#if 1
if (jiffies - brick->last_jiffies >= 30 * HZ) {
2010-08-09 16:57:56 +00:00
unsigned long hit = atomic_read(&brick->hit_count);
unsigned long miss = atomic_read(&brick->miss_count);
unsigned long perc = hit * 100 * 100 / (hit + miss);
2010-08-05 15:54:48 +00:00
brick->last_jiffies = jiffies;
2010-12-10 17:40:20 +00:00
MARS_INF("BUF %p STATISTICS: alloc=%d hashed=%d free=%d forget=%d lru=%d io_pending=%d hit=%lu (%lu.%02lu%%) miss=%lu collisions=%d opt=%d chain=%d post=%d write=%d io=%d\n", brick, atomic_read(&brick->alloc_count), atomic_read(&brick->hashed_count), atomic_read(&brick->list_count[LIST_FREE]), atomic_read(&brick->list_count[LIST_FORGET]), atomic_read(&brick->list_count[LIST_LRU]), atomic_read(&brick->nr_io_pending), hit, perc / 100, perc % 100, miss, atomic_read(&brick->nr_collisions), atomic_read(&brick->opt_count), atomic_read(&brick->chain_count), atomic_read(&brick->post_count), atomic_read(&brick->write_count), atomic_read(&brick->io_count));
2010-08-05 15:54:48 +00:00
}
#endif
2010-08-07 15:02:16 +00:00
traced_lock(&bf->bf_lock, flags);
2010-08-07 07:59:34 +00:00
2010-08-07 15:02:16 +00:00
if (!list_empty(&mref_a->rfa_pending_head)) {
2010-08-05 15:54:48 +00:00
MARS_ERR("trying to start IO on an already started mref\n");
goto already_done;
}
if (mref->ref_flags & MREF_WRITE) {
2010-12-10 17:40:20 +00:00
#ifdef FAKE_WRITES
2010-12-15 12:13:18 +00:00
bf->bf_flags |= MREF_UPTODATE;
2010-12-10 17:40:20 +00:00
goto already_done;
#endif
2010-12-15 12:13:18 +00:00
if (bf->bf_flags & MREF_READING) {
2019-03-14 12:44:16 +00:00
MARS_ERR("bad bf_flags %x\n", bf->bf_flags);
2010-07-30 05:46:22 +00:00
}
2010-12-15 12:13:18 +00:00
if (!(bf->bf_flags & MREF_WRITING)) {
2010-12-10 17:40:20 +00:00
#if 0
2010-08-07 15:02:16 +00:00
// by definition, a writeout buffer is always uptodate
2010-12-15 12:13:18 +00:00
bf->bf_flags |= (MREF_WRITING | MREF_UPTODATE);
2010-12-10 17:40:20 +00:00
#else // wirklich???
2010-12-15 12:13:18 +00:00
bf->bf_flags |= MREF_WRITING;
2010-12-10 17:40:20 +00:00
#endif
2010-11-26 13:45:10 +00:00
bf->bf_error = 0;
2010-08-03 09:38:12 +00:00
#if 1
2010-08-05 15:54:48 +00:00
start_data = mref->ref_data;
start_pos = mref->ref_pos;
start_len = mref->ref_len;
2010-08-03 09:38:12 +00:00
#else // only for testing: write the full buffer
2010-08-05 15:54:48 +00:00
start_data = (void*)((unsigned long)mref->ref_data & ~(unsigned long)(brick->backing_size - 1));
start_pos = mref->ref_pos & ~(loff_t)(brick->backing_size - 1);
2010-07-30 05:46:22 +00:00
start_len = brick->backing_size;
#endif
2010-08-07 15:02:16 +00:00
list_add(&mref_a->rfa_pending_head, &bf->bf_io_pending_anchor);
2010-08-05 15:54:48 +00:00
delay = true;
2010-07-23 11:55:18 +00:00
} else {
2010-08-07 15:02:16 +00:00
list_add(&mref_a->rfa_pending_head, &bf->bf_postpone_anchor);
2010-12-10 17:40:20 +00:00
atomic_inc(&brick->post_count);
2010-08-05 15:54:48 +00:00
delay = true;
MARS_DBG("postponing %lld %d\n", mref->ref_pos, mref->ref_len);
2010-07-23 11:55:18 +00:00
}
} else { // READ
2010-12-10 17:40:20 +00:00
#ifdef FAKE_READS
2010-12-15 12:13:18 +00:00
bf->bf_flags |= MREF_UPTODATE;
2010-12-10 17:40:20 +00:00
goto already_done;
#endif
#if 0
2010-12-15 12:13:18 +00:00
if (bf->bf_flags & (MREF_UPTODATE | MREF_WRITING))
2010-12-10 17:40:20 +00:00
#else
2010-12-15 12:13:18 +00:00
if (bf->bf_flags & MREF_UPTODATE)
2010-12-10 17:40:20 +00:00
#endif
2010-07-30 05:46:22 +00:00
goto already_done;
2010-12-15 12:13:18 +00:00
if (!(bf->bf_flags & MREF_READING)) {
bf->bf_flags |= MREF_READING;
2010-11-26 13:45:10 +00:00
bf->bf_error = 0;
2010-08-03 09:38:12 +00:00
2010-08-03 16:03:32 +00:00
// always read the whole buffer.
2010-08-05 15:54:48 +00:00
start_data = (void*)((unsigned long)mref->ref_data & ~(unsigned long)(brick->backing_size - 1));
start_pos = mref->ref_pos & ~(loff_t)(brick->backing_size - 1);
2010-07-23 11:55:18 +00:00
start_len = brick->backing_size;
}
2010-08-07 15:02:16 +00:00
list_add(&mref_a->rfa_pending_head, &bf->bf_io_pending_anchor);
2010-08-05 15:54:48 +00:00
delay = true;
2010-07-23 11:55:18 +00:00
}
2010-08-05 15:54:48 +00:00
if (likely(delay)) {
atomic_inc(&brick->nr_io_pending);
atomic_inc(&brick->io_count);
if (mref->ref_flags & MREF_WRITE)
2010-12-10 17:40:20 +00:00
atomic_inc(&brick->write_count);
2010-08-05 15:54:48 +00:00
}
2010-08-03 09:38:12 +00:00
2010-08-07 15:02:16 +00:00
traced_unlock(&bf->bf_lock, flags);
2010-07-30 05:46:22 +00:00
2010-08-02 16:31:10 +00:00
if (!start_len) {
2010-08-03 16:03:32 +00:00
// nothing to start, IO is already started.
2010-08-05 15:54:48 +00:00
goto no_callback;
2010-08-02 16:31:10 +00:00
}
status = _buf_make_io(brick, bf, start_data, start_pos, start_len, mref->ref_flags);
2010-08-03 09:38:12 +00:00
if (likely(status >= 0)) {
2010-08-03 16:03:32 +00:00
/* No immediate callback, this time.
2010-11-26 13:45:10 +00:00
* Callbacks will be called later from _bf_endio().
2010-08-03 09:38:12 +00:00
*/
2010-08-05 15:54:48 +00:00
goto no_callback;
2010-08-02 16:31:10 +00:00
}
2010-08-03 09:38:12 +00:00
2010-08-05 15:54:48 +00:00
MARS_ERR("error %d during buf_ref_io()\n", status);
buf_ref_put(output, mref);
2010-08-03 09:38:12 +00:00
goto callback;
2010-07-30 05:46:22 +00:00
already_done:
2010-11-26 13:45:10 +00:00
status = bf->bf_error;
2010-08-03 09:38:12 +00:00
2010-08-07 15:02:16 +00:00
traced_unlock(&bf->bf_lock, flags);
2010-08-03 09:38:12 +00:00
callback:
2010-12-10 17:40:20 +00:00
mref->ref_flags = bf->bf_flags;
CHECKED_CALLBACK(mref, status, fatal);
2010-08-05 15:54:48 +00:00
no_callback:
if (!delay) {
buf_ref_put(output, mref);
2010-12-10 17:40:20 +00:00
} // else the ref_put() will be carried out upon IO completion.
2010-08-05 15:54:48 +00:00
2010-12-10 17:40:20 +00:00
return;
2010-08-05 15:54:48 +00:00
fatal: // no chance to call callback: may produce hanging tasks :(
2010-12-10 17:40:20 +00:00
MARS_FAT("no chance to call callback, tasks may hang.\n");
2010-07-23 11:55:18 +00:00
}
2010-07-07 14:09:16 +00:00
//////////////// object / aspect constructors / destructors ///////////////
static int buf_mref_aspect_init_fn(struct generic_aspect *_ini)
2010-07-07 14:09:16 +00:00
{
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *ini = (void*)_ini;
2010-08-07 15:02:16 +00:00
ini->rfa_bf = NULL;
INIT_LIST_HEAD(&ini->rfa_pending_head);
2010-12-10 17:40:20 +00:00
//INIT_LIST_HEAD(&ini->tmp_head);
2010-07-07 14:09:16 +00:00
return 0;
}
static void buf_mref_aspect_exit_fn(struct generic_aspect *_ini)
2010-08-08 09:03:42 +00:00
{
2010-12-15 12:13:18 +00:00
struct buf_mref_aspect *ini = (void*)_ini;
2010-08-08 09:03:42 +00:00
(void)ini;
#if 1
CHECK_HEAD_EMPTY(&ini->rfa_pending_head);
2010-12-10 17:40:20 +00:00
//CHECK_HEAD_EMPTY(&ini->tmp_head);
2010-08-08 09:03:42 +00:00
#endif
}
2010-07-23 11:55:18 +00:00
MARS_MAKE_STATICS(buf);
2010-07-07 14:09:16 +00:00
////////////////////// brick constructors / destructors ////////////////////
static int buf_brick_construct(struct buf_brick *brick)
{
int i;
2010-11-26 13:45:10 +00:00
brick->backing_order = 0;
2010-12-10 17:40:20 +00:00
brick->backing_size = PAGE_SIZE;
2010-11-26 13:45:10 +00:00
brick->max_count = 32;
2010-08-07 07:59:34 +00:00
spin_lock_init(&brick->brick_lock);
2010-12-10 17:40:20 +00:00
for (i = 0; i < LIST_MAX; i++) {
INIT_LIST_HEAD(&brick->list_anchor[i]);
}
2010-07-07 14:09:16 +00:00
for (i = 0; i < MARS_BUF_HASH_MAX; i++) {
2010-08-09 16:57:56 +00:00
spin_lock_init(&brick->cache_anchors[i].hash_lock);
INIT_LIST_HEAD(&brick->cache_anchors[i].hash_anchor);
2010-07-07 14:09:16 +00:00
}
return 0;
}
static int buf_output_construct(struct buf_output *output)
{
return 0;
}
2010-07-30 05:46:22 +00:00
static int buf_brick_destruct(struct buf_brick *brick)
{
2010-08-23 05:06:06 +00:00
int i;
2010-07-30 05:46:22 +00:00
2010-07-30 11:50:20 +00:00
brick->max_count = 0;
2010-12-10 17:40:20 +00:00
_prune_cache(brick, 0);
2010-08-23 05:06:06 +00:00
2010-12-10 17:40:20 +00:00
for (i = 0; i < LIST_MAX; i++) {
CHECK_HEAD_EMPTY(&brick->list_anchor[i]);
}
2010-08-23 05:06:06 +00:00
for (i = 0; i < MARS_BUF_HASH_MAX; i++) {
CHECK_HEAD_EMPTY(&brick->cache_anchors[i].hash_anchor);
}
2010-07-30 05:46:22 +00:00
return 0;
}
2010-07-07 14:09:16 +00:00
///////////////////////// static structs ////////////////////////
static struct buf_brick_ops buf_brick_ops = {
};
static struct buf_output_ops buf_output_ops = {
.mars_get_info = buf_get_info,
2010-12-15 12:13:18 +00:00
.mref_get = buf_ref_get,
.mref_put = buf_ref_put,
.mref_io = buf_ref_io,
2010-07-07 14:09:16 +00:00
};
2010-08-10 17:39:30 +00:00
const struct buf_input_type buf_input_type = {
2010-07-07 14:09:16 +00:00
.type_name = "buf_input",
.input_size = sizeof(struct buf_input),
};
2010-07-23 11:55:18 +00:00
static const struct buf_input_type *buf_input_types[] = {
2010-07-07 14:09:16 +00:00
&buf_input_type,
};
2010-08-10 17:39:30 +00:00
const struct buf_output_type buf_output_type = {
2010-07-07 14:09:16 +00:00
.type_name = "buf_output",
.output_size = sizeof(struct buf_output),
.master_ops = &buf_output_ops,
.output_construct = &buf_output_construct,
};
2010-07-23 11:55:18 +00:00
static const struct buf_output_type *buf_output_types[] = {
2010-07-07 14:09:16 +00:00
&buf_output_type,
};
2010-07-23 11:55:18 +00:00
const struct buf_brick_type buf_brick_type = {
2010-07-07 14:09:16 +00:00
.type_name = "buf_brick",
.brick_size = sizeof(struct buf_brick),
.max_inputs = 1,
.max_outputs = 1,
.master_ops = &buf_brick_ops,
.aspect_types = buf_aspect_types,
2010-07-07 14:09:16 +00:00
.default_input_types = buf_input_types,
.default_output_types = buf_output_types,
.brick_construct = &buf_brick_construct,
2010-07-30 05:46:22 +00:00
.brick_destruct = &buf_brick_destruct,
2010-07-07 14:09:16 +00:00
};
EXPORT_SYMBOL_GPL(buf_brick_type);
////////////////// module init stuff /////////////////////////
int __init init_mars_buf(void)
2010-07-07 14:09:16 +00:00
{
2012-02-01 15:25:50 +00:00
MARS_INF("init_buf()\n");
2010-07-07 14:09:16 +00:00
return buf_register_brick_type();
}
2014-04-23 11:16:26 +00:00
void exit_mars_buf(void)
2010-07-07 14:09:16 +00:00
{
2012-02-01 15:25:50 +00:00
MARS_INF("exit_buf()\n");
2010-07-07 14:09:16 +00:00
buf_unregister_brick_type();
}