mars/mars_if.c

1058 lines
26 KiB
C
Raw Normal View History

2010-06-14 14:27:40 +00:00
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
/* Interface to a Linux device.
* 1 Input, 0 Outputs.
*/
2010-07-30 05:46:22 +00:00
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
2011-03-18 13:15:40 +00:00
//#define IO_DEBUGGING
2011-04-21 16:03:04 +00:00
#define REQUEST_MERGING
//#define ALWAYS_UNPLUG false // FIXME: does not work! single requests left over!
#define ALWAYS_UNPLUG true
2011-12-08 15:38:04 +00:00
#define ALWAYS_UNPLUG_FROM_EXTERNAL true
2011-04-21 16:03:04 +00:00
#define PREFETCH_LEN PAGE_SIZE
//#define FRONT_MERGE // FIXME: this does not work.
2011-12-08 15:38:04 +00:00
//#define MODIFY_READAHEAD // don't use it, otherwise sequential IO will suffer
2010-07-30 05:46:22 +00:00
2011-03-18 13:15:40 +00:00
// low-level device parameters
2011-12-08 15:38:04 +00:00
#define USE_MAX_SECTORS (MARS_MAX_SEGMENT_SIZE >> 9)
#define USE_MAX_PHYS_SEGMENTS (MARS_MAX_SEGMENT_SIZE >> 9)
#define USE_MAX_SEGMENT_SIZE MARS_MAX_SEGMENT_SIZE
#define USE_LOGICAL_BLOCK_SIZE 512
#define USE_SEGMENT_BOUNDARY (PAGE_SIZE-1)
#define USE_CONGESTED_FN
2011-03-18 13:15:40 +00:00
#define USE_MERGE_BVEC
//#define DENY_READA
2011-03-18 13:15:40 +00:00
2010-06-14 14:27:40 +00:00
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "mars.h"
///////////////////////// own type definitions ////////////////////////
2011-03-07 10:27:38 +00:00
#include "mars_if.h"
2010-06-14 14:27:40 +00:00
///////////////////////// own static definitions ////////////////////////
2011-03-31 16:16:00 +00:00
// TODO: check bounds, ensure that free minor numbers are recycled
2010-06-14 14:27:40 +00:00
static int device_minor = 0;
2010-06-22 13:21:42 +00:00
//////////////// object / aspect constructors / destructors ///////////////
2010-06-14 14:27:40 +00:00
///////////////////////// linux operations ////////////////////////
/* callback
*/
2011-03-11 13:57:54 +00:00
static
void if_endio(struct generic_callback *cb)
2010-06-14 14:27:40 +00:00
{
2011-03-07 10:27:38 +00:00
struct if_mref_aspect *mref_a = cb->cb_private;
2011-03-11 13:57:54 +00:00
struct if_input *input;
2011-03-31 16:16:00 +00:00
int k;
int rw;
2010-08-04 17:32:04 +00:00
int error;
2010-11-26 13:45:10 +00:00
if (unlikely(!mref_a || !mref_a->object)) {
MARS_FAT("mref_a = %p mref = %p, something is very wrong here!\n", mref_a, mref_a->object);
2010-11-26 13:45:10 +00:00
return;
}
2011-03-27 15:18:38 +00:00
mars_trace(mref_a->object, "if_endio");
mars_log_trace(mref_a->object);
rw = mref_a->object->ref_rw;
MARS_IO("rw = %d bio_count = %d\n", rw, mref_a->bio_count);
2010-12-10 17:40:20 +00:00
for (k = 0; k < mref_a->bio_count; k++) {
struct bio_wrapper *biow;
struct bio *bio;
biow = mref_a->orig_biow[k];
mref_a->orig_biow[k] = NULL;
CHECK_PTR(biow, err);
CHECK_ATOMIC(&biow->bi_comp_cnt, 1);
if (!atomic_dec_and_test(&biow->bi_comp_cnt)) {
2010-12-10 17:40:20 +00:00
continue;
}
2010-11-26 13:45:10 +00:00
bio = biow->bio;
CHECK_PTR_NULL(bio, err);
#if 0
2011-04-21 16:03:04 +00:00
if (mref_a->is_kmapped) {
2011-03-31 16:16:00 +00:00
struct bio_vec *bvec;
int i;
2011-03-18 13:15:40 +00:00
bio_for_each_segment(bvec, bio, i) {
MARS_IO("kunmap %p\n", bvec->bv_page);
kunmap(bvec->bv_page);
}
2010-12-10 17:40:20 +00:00
}
2011-03-31 16:16:00 +00:00
#endif
2010-12-10 17:40:20 +00:00
error = CALLBACK_ERROR(mref_a->object);
2010-12-10 17:40:20 +00:00
if (unlikely(error < 0)) {
MARS_ERR("NYI: error=%d RETRY LOGIC %u\n", error, bio->bi_size);
} else { // bio conventions are slightly different...
error = 0;
bio->bi_size = 0;
}
MARS_IO("calling end_io() rw = %d error = %d\n", rw, error);
2010-12-10 17:40:20 +00:00
bio_endio(bio, error);
2011-03-07 05:55:10 +00:00
bio_put(bio);
brick_mem_free(biow);
2010-11-26 13:45:10 +00:00
}
2011-03-11 13:57:54 +00:00
input = mref_a->input;
if (input) {
2011-05-19 11:36:00 +00:00
atomic_dec(&input->flying_count);
if (rw) {
atomic_dec(&input->write_flying_count);
} else {
atomic_dec(&input->read_flying_count);
}
#ifdef IO_DEBUGGING
{
struct if_brick *brick = input->brick;
char *txt = brick->ops->brick_statistics(brick, false);
MARS_IO("%s", txt);
brick_string_free(txt);
}
#endif
2011-03-11 13:57:54 +00:00
}
MARS_IO("finished.\n");
return;
err:
MARS_FAT("error in callback, giving up\n");
2010-12-10 17:40:20 +00:00
}
2010-11-26 13:45:10 +00:00
2010-12-10 17:40:20 +00:00
/* Kick off plugged mrefs
*/
2011-07-15 10:12:06 +00:00
static
void _if_unplug(struct if_input *input)
2010-12-10 17:40:20 +00:00
{
2011-03-27 15:18:38 +00:00
//struct if_brick *brick = input->brick;
2010-12-10 17:40:20 +00:00
LIST_HEAD(tmp_list);
unsigned long flags;
#ifdef CONFIG_MARS_DEBUG
2010-12-10 17:40:20 +00:00
might_sleep();
#endif
2010-12-10 17:40:20 +00:00
2011-12-08 15:38:04 +00:00
MARS_IO("plugged_count = %d\n", atomic_read(&input->plugged_count));
2010-12-10 17:40:20 +00:00
down(&input->kick_sem);
traced_lock(&input->req_lock, flags);
2011-07-15 10:12:06 +00:00
#ifdef USE_TIMER
del_timer(&input->timer);
#endif
2010-12-10 17:40:20 +00:00
if (!list_empty(&input->plug_anchor)) {
// move over the whole list
list_replace_init(&input->plug_anchor, &tmp_list);
2011-03-30 12:02:50 +00:00
atomic_set(&input->plugged_count, 0);
2010-08-04 17:32:04 +00:00
}
2010-12-10 17:40:20 +00:00
traced_unlock(&input->req_lock, flags);
up(&input->kick_sem);
2010-11-26 13:45:10 +00:00
2010-12-10 17:40:20 +00:00
while (!list_empty(&tmp_list)) {
2011-03-07 10:27:38 +00:00
struct if_mref_aspect *mref_a;
2010-12-15 12:13:18 +00:00
struct mref_object *mref;
2011-03-30 12:02:50 +00:00
int hash_index;
unsigned long flags;
2011-03-07 10:27:38 +00:00
mref_a = container_of(tmp_list.next, struct if_mref_aspect, plug_head);
2010-12-10 17:40:20 +00:00
list_del_init(&mref_a->plug_head);
2011-04-21 16:03:04 +00:00
2011-03-30 12:02:50 +00:00
hash_index = mref_a->hash_index;
traced_lock(&input->hash_lock[hash_index], flags);
list_del_init(&mref_a->hash_head);
traced_unlock(&input->hash_lock[hash_index], flags);
2010-12-10 17:40:20 +00:00
mref = mref_a->object;
2010-11-26 13:45:10 +00:00
2011-04-21 16:03:04 +00:00
if (unlikely(mref_a->current_len > mref_a->max_len)) {
MARS_ERR("request len %d > %d\n", mref_a->current_len, mref_a->max_len);
}
mref->ref_len = mref_a->current_len;
2011-03-27 15:18:38 +00:00
mars_trace(mref, "if_unplug");
2011-03-24 16:05:46 +00:00
2011-05-19 11:36:00 +00:00
atomic_inc(&input->flying_count);
if (mref->ref_rw) {
atomic_inc(&input->write_flying_count);
} else {
atomic_inc(&input->read_flying_count);
}
2010-12-15 12:13:18 +00:00
GENERIC_INPUT_CALL(input, mref_io, mref);
GENERIC_INPUT_CALL(input, mref_put, mref);
2010-12-10 17:40:20 +00:00
}
#ifdef IO_DEBUGGING
{
struct if_brick *brick = input->brick;
char *txt = brick->ops->brick_statistics(brick, false);
MARS_IO("%s", txt);
brick_string_free(txt);
}
#endif
2010-06-14 14:27:40 +00:00
}
2012-09-18 06:13:33 +00:00
#ifndef BLK_MAX_REQUEST_COUNT
2011-07-15 10:12:06 +00:00
#ifdef USE_TIMER
static
void if_timer(unsigned long data)
{
2011-12-08 15:38:04 +00:00
MARS_IO("\n");
2011-07-15 10:12:06 +00:00
_if_unplug((void*)data);
}
#endif
2012-09-18 06:13:33 +00:00
#endif // BLK_MAX_REQUEST_COUNT
2011-07-15 10:12:06 +00:00
2010-11-26 13:45:10 +00:00
/* accept a linux bio, convert to mref and call buf_io() on it.
2010-06-14 14:27:40 +00:00
*/
2012-09-18 06:13:33 +00:00
static
#ifdef BIO_CPU_AFFINE
int
#else
void
#endif
if_make_request(struct request_queue *q, struct bio *bio)
2010-06-14 14:27:40 +00:00
{
2012-02-26 15:23:57 +00:00
struct if_input *input = q->queuedata;
struct if_brick *brick = input->brick;
/* Original flags of the source bio
*/
const int rw = bio_data_dir(bio);
const int sectors = bio_sectors(bio);
2012-09-18 06:13:33 +00:00
#ifdef BIO_RW_RQ_MASK
2012-02-26 15:23:57 +00:00
const bool ahead = bio_rw_flagged(bio, BIO_RW_AHEAD) && rw == READ;
const bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
const bool syncio = bio_rw_flagged(bio, BIO_RW_SYNCIO);
const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
const bool meta = bio_rw_flagged(bio, BIO_RW_META);
const bool discard = bio_rw_flagged(bio, BIO_RW_DISCARD);
const bool noidle = bio_rw_flagged(bio, BIO_RW_NOIDLE);
2012-09-18 06:13:33 +00:00
#else
const bool ahead = bio_flagged(bio, __REQ_RAHEAD) && rw == READ;
const bool barrier = bio_flagged(bio, __REQ_FLUSH);
const bool syncio = bio_flagged(bio, __REQ_SYNC);
const bool unplug = false;
const bool meta = bio_flagged(bio, __REQ_META);
const bool discard = bio_flagged(bio, __REQ_DISCARD);
const bool noidle = bio_flagged(bio, __REQ_THROTTLED);
#endif
2012-02-26 15:23:57 +00:00
const int prio = bio_prio(bio);
/* Transform into MARS flags
*/
const int ref_prio =
(prio == IOPRIO_CLASS_RT || (meta | syncio)) ?
MARS_PRIO_HIGH :
(prio == IOPRIO_CLASS_IDLE) ?
MARS_PRIO_LOW :
MARS_PRIO_NORMAL;
const bool do_unplug = ALWAYS_UNPLUG | unplug | noidle;
const bool do_skip_sync = brick->skip_sync && !(barrier | syncio);
struct bio_wrapper *biow;
2010-12-15 12:13:18 +00:00
struct mref_object *mref = NULL;
2011-03-07 10:27:38 +00:00
struct if_mref_aspect *mref_a;
2010-11-26 13:45:10 +00:00
struct bio_vec *bvec;
int i;
2010-12-10 17:40:20 +00:00
bool assigned = false;
2010-11-26 13:45:10 +00:00
loff_t pos = ((loff_t)bio->bi_sector) << 9; // TODO: make dynamic
2011-04-08 09:52:46 +00:00
int total_len = bio->bi_size;
2010-08-04 17:32:04 +00:00
int error = -ENOSYS;
2010-06-14 14:27:40 +00:00
2012-02-26 15:23:57 +00:00
MARS_IO("bio %p "
"size = %d "
"rw = %d "
"sectors = %d "
"ahead = %d "
"barrier = %d "
"syncio = %d "
"unplug = %d "
"meta = %d "
"discard = %d "
"noidle = %d "
"prio = %d "
"pos = %lldd "
"total_len = %d\n",
bio,
bio->bi_size,
rw,
sectors,
ahead,
barrier,
syncio,
unplug,
meta,
discard,
noidle,
prio,
pos,
total_len);
2010-06-16 13:21:30 +00:00
2010-12-10 17:40:20 +00:00
might_sleep();
2012-02-26 15:23:57 +00:00
if (unlikely(!sectors)) {
2011-07-15 10:12:06 +00:00
_if_unplug(input);
/* THINK: usually this happens only at write barriers.
* We have no "barrier" operation in MARS, since
* callback semantics should always denote
* "writethrough accomplished".
* In case of exceptional semantics, we need to do
* something here. For now, we do just nothing.
*/
bio_endio(bio, 0);
2012-09-18 06:13:33 +00:00
error = 0;
goto done;
2011-07-15 10:12:06 +00:00
}
2010-06-14 14:27:40 +00:00
#ifdef DENY_READA // provisinary -- we should introduce an equivalent of READA also to the MARS infrastructure
2012-02-26 15:23:57 +00:00
if (ahead) {
2011-08-05 09:26:24 +00:00
atomic_inc(&input->total_reada_count);
bio_endio(bio, -EWOULDBLOCK);
2012-09-18 06:13:33 +00:00
error = 0;
goto done;
2011-08-05 09:26:24 +00:00
}
2012-02-26 15:23:57 +00:00
#else
(void)ahead; // shut up gcc
2011-08-05 09:26:24 +00:00
#endif
2012-02-26 15:23:57 +00:00
if (unlikely(discard)) { // NYI
bio_endio(bio, 0);
2012-09-18 06:13:33 +00:00
error = 0;
goto done;
2012-02-26 15:23:57 +00:00
}
biow = brick_mem_alloc(sizeof(struct bio_wrapper));
CHECK_PTR(biow, err);
biow->bio = bio;
atomic_set(&biow->bi_comp_cnt, 0);
2011-03-31 16:16:00 +00:00
if (rw) {
2011-05-19 11:36:00 +00:00
atomic_inc(&input->total_write_count);
2011-03-31 16:16:00 +00:00
} else {
2011-05-19 11:36:00 +00:00
atomic_inc(&input->total_read_count);
2011-03-31 16:16:00 +00:00
}
2010-12-10 17:40:20 +00:00
/* Get a reference to the bio.
* Will be released after bio_endio().
*/
atomic_inc(&bio->bi_cnt);
2011-02-23 20:48:06 +00:00
/* FIXME: THIS IS PROVISIONARY (use event instead)
2010-08-10 17:39:30 +00:00
*/
2011-02-23 20:48:06 +00:00
while (unlikely(!brick->power.led_on)) {
brick_msleep(100);
2010-08-10 17:39:30 +00:00
}
2010-12-10 17:40:20 +00:00
down(&input->kick_sem);
2010-11-26 13:45:10 +00:00
bio_for_each_segment(bvec, bio, i) {
2011-03-18 13:15:40 +00:00
struct page *page = bvec->bv_page;
2011-03-31 04:41:42 +00:00
int bv_len = bvec->bv_len;
2011-03-18 13:15:40 +00:00
int offset = bvec->bv_offset;
2011-03-31 04:41:42 +00:00
void *data;
2011-03-18 13:15:40 +00:00
data = kmap(page);
MARS_IO("page = %p data = %p\n", page, data);
2011-04-08 09:52:46 +00:00
error = -EINVAL;
if (unlikely(!data))
break;
2011-03-18 13:15:40 +00:00
data += offset;
2010-11-26 13:45:10 +00:00
while (bv_len > 0) {
2010-12-10 17:40:20 +00:00
struct list_head *tmp;
2011-03-30 12:02:50 +00:00
int hash_index;
2011-03-31 04:41:42 +00:00
int this_len = 0;
2010-12-10 17:40:20 +00:00
unsigned long flags;
mref = NULL;
mref_a = NULL;
2011-03-18 13:15:40 +00:00
MARS_IO("rw = %d i = %d pos = %lld bv_page = %p bv_offset = %d data = %p bv_len = %d\n", rw, i, pos, bvec->bv_page, bvec->bv_offset, data, bv_len);
2010-12-10 17:40:20 +00:00
2011-03-30 12:02:50 +00:00
hash_index = (pos / IF_HASH_CHUNK) % IF_HASH_MAX;
2011-03-31 04:41:42 +00:00
#ifdef REQUEST_MERGING
2011-03-30 12:02:50 +00:00
traced_lock(&input->hash_lock[hash_index], flags);
for (tmp = input->hash_table[hash_index].next; tmp != &input->hash_table[hash_index]; tmp = tmp->next) {
2011-03-07 10:27:38 +00:00
struct if_mref_aspect *tmp_a;
2011-03-31 04:41:42 +00:00
struct mref_object *tmp_mref;
int i;
2011-03-18 13:15:40 +00:00
2011-03-31 04:41:42 +00:00
tmp_a = container_of(tmp, struct if_mref_aspect, hash_head);
tmp_mref = tmp_a->object;
2011-04-21 16:03:04 +00:00
if (tmp_a->orig_page != page || tmp_mref->ref_rw != rw || tmp_a->bio_count >= MAX_BIO || tmp_a->current_len + bv_len > tmp_a->max_len) {
2010-12-10 17:40:20 +00:00
continue;
2011-03-31 04:41:42 +00:00
}
2010-12-10 17:40:20 +00:00
2011-04-21 16:03:04 +00:00
if (tmp_mref->ref_data + tmp_a->current_len == data) {
2011-03-31 04:41:42 +00:00
goto merge_end;
2011-04-21 16:03:04 +00:00
#ifdef FRONT_MERGE // FIXME: this cannot work. ref_data must never be changed. pre-allocate from offset 0 instead.
2011-03-31 04:41:42 +00:00
} else if (data + bv_len == tmp_mref->ref_data) {
goto merge_front;
#endif
}
continue;
2011-04-21 16:03:04 +00:00
#ifdef FRONT_MERGE // FIXME: this cannot work. ref_data must never be changed. pre-allocate from offset 0 instead.
2011-03-31 04:41:42 +00:00
merge_front:
tmp_mref->ref_data = data;
2011-04-21 16:03:04 +00:00
#endif
2011-03-31 04:41:42 +00:00
merge_end:
2011-04-21 16:03:04 +00:00
tmp_a->current_len += bv_len;
2011-03-31 04:41:42 +00:00
mref = tmp_mref;
mref_a = tmp_a;
this_len = bv_len;
2012-02-26 15:23:57 +00:00
if (!do_skip_sync) {
2011-04-08 09:52:46 +00:00
mref->ref_skip_sync = false;
}
2011-03-31 04:41:42 +00:00
for (i = 0; i < mref_a->bio_count; i++) {
if (mref_a->orig_biow[i]->bio == bio) {
2011-03-31 04:41:42 +00:00
goto unlock;
2011-03-24 16:05:46 +00:00
}
2011-03-31 04:41:42 +00:00
}
2010-12-10 17:40:20 +00:00
CHECK_ATOMIC(&biow->bi_comp_cnt, 0);
atomic_inc(&biow->bi_comp_cnt);
mref_a->orig_biow[mref_a->bio_count++] = biow;
2011-03-31 04:41:42 +00:00
assigned = true;
goto unlock;
} // foreach hash collision list member
unlock:
2011-03-30 12:02:50 +00:00
traced_unlock(&input->hash_lock[hash_index], flags);
2010-11-26 13:45:10 +00:00
#endif
if (!mref) {
2011-04-08 09:52:46 +00:00
int prefetch_len;
2010-11-26 13:45:10 +00:00
error = -ENOMEM;
2012-02-02 15:25:43 +00:00
mref = if_alloc_mref(brick);
2010-12-10 17:40:20 +00:00
if (unlikely(!mref)) {
up(&input->kick_sem);
2010-11-26 13:45:10 +00:00
goto err;
2010-12-10 17:40:20 +00:00
}
mref_a = if_mref_get_aspect(brick, mref);
2010-12-10 17:40:20 +00:00
if (unlikely(!mref_a)) {
up(&input->kick_sem);
2010-11-26 13:45:10 +00:00
goto err;
2010-12-10 17:40:20 +00:00
}
2011-04-08 09:52:46 +00:00
#ifdef PREFETCH_LEN
prefetch_len = PREFETCH_LEN - offset;
2011-04-21 16:03:04 +00:00
#if 1
// TODO: this restriction is too strong to be useful for performance boosts. Do better.
if (prefetch_len > total_len) {
prefetch_len = total_len;
}
#endif
if (pos + prefetch_len > brick->dev_size) {
prefetch_len = brick->dev_size - pos;
2011-04-21 16:03:04 +00:00
}
if (prefetch_len < bv_len) {
2011-04-08 09:52:46 +00:00
prefetch_len = bv_len;
2011-04-21 16:03:04 +00:00
}
2011-04-08 09:52:46 +00:00
#else
prefetch_len = bv_len;
#endif
SETUP_CALLBACK(mref, if_endio, mref_a);
2010-11-26 13:45:10 +00:00
mref_a->input = input;
mref->ref_rw = mref->ref_may_write = rw;
mref->ref_pos = pos;
2011-04-08 09:52:46 +00:00
mref->ref_len = prefetch_len;
2010-12-10 17:40:20 +00:00
mref->ref_data = data; // direct IO
2012-02-26 15:23:57 +00:00
mref->ref_prio = ref_prio;
2011-04-21 16:03:04 +00:00
mref_a->orig_page = page;
mref_a->is_kmapped = true;
2010-12-10 17:40:20 +00:00
2010-12-15 12:13:18 +00:00
error = GENERIC_INPUT_CALL(input, mref_get, mref);
2010-12-10 17:40:20 +00:00
if (unlikely(error < 0)) {
up(&input->kick_sem);
2010-11-26 13:45:10 +00:00
goto err;
2010-12-10 17:40:20 +00:00
}
2010-11-26 13:45:10 +00:00
2011-03-27 15:18:38 +00:00
mars_trace(mref, "if_start");
2011-04-08 09:52:46 +00:00
2011-04-21 16:03:04 +00:00
this_len = mref->ref_len; // now may be shorter than originally requested.
mref_a->max_len = this_len;
2011-04-08 09:52:46 +00:00
if (this_len > bv_len) {
2011-04-21 16:03:04 +00:00
this_len = bv_len;
2011-04-08 09:52:46 +00:00
}
2011-04-21 16:03:04 +00:00
mref_a->current_len = this_len;
2011-03-31 16:16:00 +00:00
if (rw) {
2011-05-19 11:36:00 +00:00
atomic_inc(&input->total_mref_write_count);
2011-03-31 16:16:00 +00:00
} else {
2011-05-19 11:36:00 +00:00
atomic_inc(&input->total_mref_read_count);
2011-03-31 16:16:00 +00:00
}
2011-03-27 15:18:38 +00:00
CHECK_ATOMIC(&biow->bi_comp_cnt, 0);
atomic_inc(&biow->bi_comp_cnt);
mref_a->orig_biow[0] = biow;
2010-12-10 17:40:20 +00:00
mref_a->bio_count = 1;
assigned = true;
2010-11-26 13:45:10 +00:00
2012-02-26 15:23:57 +00:00
if (do_skip_sync) {
2011-03-24 16:05:46 +00:00
mref->ref_skip_sync = true;
}
2011-03-30 12:02:50 +00:00
atomic_inc(&input->plugged_count);
mref_a->hash_index = hash_index;
traced_lock(&input->hash_lock[hash_index], flags);
list_add_tail(&mref_a->hash_head, &input->hash_table[hash_index]);
traced_unlock(&input->hash_lock[hash_index], flags);
2011-03-24 16:05:46 +00:00
2010-12-10 17:40:20 +00:00
traced_lock(&input->req_lock, flags);
list_add_tail(&mref_a->plug_head, &input->plug_anchor);
traced_unlock(&input->req_lock, flags);
2011-03-31 04:41:42 +00:00
} // !mref
pos += this_len;
data += this_len;
bv_len -= this_len;
2011-04-08 09:52:46 +00:00
total_len -= this_len;
2010-11-26 13:45:10 +00:00
} // while bv_len > 0
} // foreach bvec
2010-12-10 17:40:20 +00:00
up(&input->kick_sem);
2011-04-08 09:52:46 +00:00
if (likely(!total_len)) {
error = 0;
} else {
MARS_ERR("bad rest len = %d\n", total_len);
}
2010-06-14 14:27:40 +00:00
err:
2010-12-10 17:40:20 +00:00
#ifdef IO_DEBUGGING
{
char *txt = brick->ops->brick_statistics(brick, false);
MARS_IO("%s", txt);
brick_string_free(txt);
}
#endif
2010-11-26 13:45:10 +00:00
if (error < 0) {
2011-03-31 04:41:42 +00:00
MARS_ERR("cannot submit request from bio, status=%d\n", error);
2010-12-10 17:40:20 +00:00
if (assigned) {
2011-03-31 04:41:42 +00:00
//... cleanup the mess NYI
2010-12-10 17:40:20 +00:00
} else {
bio_endio(bio, error);
}
2010-11-26 13:45:10 +00:00
}
2012-02-26 15:23:57 +00:00
if (do_unplug ||
2011-03-30 12:02:50 +00:00
(brick && brick->max_plugged > 0 && atomic_read(&input->plugged_count) > brick->max_plugged)) {
2011-03-07 10:27:38 +00:00
_if_unplug(input);
2010-11-26 13:45:10 +00:00
}
2011-07-15 10:12:06 +00:00
#ifdef USE_TIMER
else {
unsigned long flags;
traced_lock(&input->req_lock, flags);
if (timer_pending(&input->timer)) {
del_timer(&input->timer);
}
input->timer.function = if_timer;
input->timer.data = (unsigned long)input;
2011-12-08 15:38:04 +00:00
input->timer.expires = jiffies + USE_TIMER;
2011-07-15 10:12:06 +00:00
add_timer(&input->timer);
traced_unlock(&input->req_lock, flags);
}
#endif
2010-12-10 17:40:20 +00:00
2012-09-18 06:13:33 +00:00
done:
#ifdef BIO_CPU_AFFINE
2010-08-04 17:32:04 +00:00
return error;
2012-09-18 06:13:33 +00:00
#else
return;
#endif
2010-06-14 14:27:40 +00:00
}
2012-09-18 06:13:33 +00:00
#ifndef BLK_MAX_REQUEST_COUNT
2011-03-18 13:15:40 +00:00
//static
void if_unplug(struct request_queue *q)
2010-06-17 16:57:10 +00:00
{
2011-12-08 15:38:04 +00:00
struct if_input *input = q->queuedata;
2011-05-06 10:25:52 +00:00
int was_plugged = 1;
#if 1
spin_lock_irq(q->queue_lock);
was_plugged = blk_remove_plug(q);
spin_unlock_irq(q->queue_lock);
#else
2010-06-17 16:57:10 +00:00
queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
2011-05-06 10:25:52 +00:00
#endif
2011-12-08 15:38:04 +00:00
was_plugged += atomic_read(&input->plugged_count);
MARS_IO("block layer called UNPLUG was_plugged = %d\n", was_plugged);
2011-12-08 15:38:04 +00:00
if (ALWAYS_UNPLUG_FROM_EXTERNAL || was_plugged) {
2011-05-06 10:25:52 +00:00
_if_unplug(input);
}
2010-06-17 16:57:10 +00:00
}
2012-09-18 06:13:33 +00:00
#endif
2010-06-17 16:57:10 +00:00
2011-03-18 13:15:40 +00:00
//static
int mars_congested(void *data, int bdi_bits)
{
struct if_input *input = data;
int ret = 0;
if (bdi_bits & (1 << BDI_sync_congested) &&
2011-05-19 11:36:00 +00:00
atomic_read(&input->flying_count) > 0) {
2011-03-18 13:15:40 +00:00
ret |= (1 << BDI_sync_congested);
}
return ret;
}
static
int mars_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
{
unsigned int bio_size = bvm->bi_size;
if (!bio_size) {
return bvec->bv_len;
}
return 128;
}
static
unsigned long compute_capacity(struct if_brick *brick)
{
if (brick->dev_size <= 0) {
struct mars_info info = {};
struct if_input *input = brick->inputs[0];
int status;
status = GENERIC_INPUT_CALL(input, mars_get_info, &info);
if (status < 0) {
MARS_ERR("cannot get device info, status=%d\n", status);
return 0;
}
brick->dev_size = info.current_size;
}
return brick->dev_size >> 9; // TODO: make this dynamic
}
static const struct block_device_operations if_blkdev_ops;
2010-06-14 14:27:40 +00:00
2011-03-07 10:27:38 +00:00
static int if_switch(struct if_brick *brick)
2010-06-14 14:27:40 +00:00
{
2011-03-07 10:27:38 +00:00
struct if_input *input = brick->inputs[0];
2010-06-14 14:27:40 +00:00
struct request_queue *q;
struct gendisk *disk;
int minor;
2010-08-26 17:12:30 +00:00
unsigned long capacity;
int status = 0;
2010-06-14 14:27:40 +00:00
down(&brick->switch_sem);
if (brick->power.button && brick->power.led_off) {
2011-02-23 20:48:06 +00:00
mars_power_led_off((void*)brick, false);
capacity = compute_capacity(brick);
status = -ENOMEM;
2011-02-23 20:48:06 +00:00
q = blk_alloc_queue(GFP_MARS);
if (!q) {
MARS_ERR("cannot allocate device request queue\n");
goto is_down;
2011-02-23 20:48:06 +00:00
}
q->queuedata = input;
input->q = q;
disk = alloc_disk(1);
if (!disk) {
MARS_ERR("cannot allocate gendisk\n");
goto is_down;
2011-02-23 20:48:06 +00:00
}
2010-06-14 14:27:40 +00:00
2011-02-23 20:48:06 +00:00
minor = device_minor++; //TODO: protect against races (e.g. atomic_t)
2012-09-18 06:13:33 +00:00
set_disk_ro(disk, true);
2011-02-23 20:48:06 +00:00
disk->queue = q;
disk->major = MARS_MAJOR; //TODO: make this dynamic for >256 devices
disk->first_minor = minor;
2011-03-07 10:27:38 +00:00
disk->fops = &if_blkdev_ops;
2011-02-23 20:48:06 +00:00
snprintf(disk->disk_name, sizeof(disk->disk_name), "mars/%s", brick->brick_name);
2012-09-18 06:13:33 +00:00
MARS_DBG("created device name %s, capacity=%lu\n", disk->disk_name, capacity);
2011-02-23 20:48:06 +00:00
disk->private_data = input;
input->capacity = capacity;
2011-02-23 20:48:06 +00:00
set_capacity(disk, capacity);
2011-03-07 10:27:38 +00:00
blk_queue_make_request(q, if_make_request);
2011-03-18 13:15:40 +00:00
#ifdef USE_MAX_SECTORS
2012-09-18 06:13:33 +00:00
#ifdef MAX_SEGMENT_SIZE
MARS_DBG("blk_queue_max_sectors()\n");
2011-03-18 13:15:40 +00:00
blk_queue_max_sectors(q, USE_MAX_SECTORS);
2012-09-18 06:13:33 +00:00
#else
MARS_DBG("blk_queue_max_hw_sectors()\n");
blk_queue_max_hw_sectors(q, USE_MAX_SECTORS);
#endif
2011-03-18 13:15:40 +00:00
#endif
#ifdef USE_MAX_PHYS_SEGMENTS
2012-09-18 06:13:33 +00:00
#ifdef MAX_SEGMENT_SIZE
MARS_DBG("blk_queue_max_phys_segments()\n");
2011-03-18 13:15:40 +00:00
blk_queue_max_phys_segments(q, USE_MAX_PHYS_SEGMENTS);
2012-09-18 06:13:33 +00:00
#else
MARS_DBG("blk_queue_max_segments()\n");
blk_queue_max_segments(q, USE_MAX_PHYS_SEGMENTS);
#endif
2011-03-18 13:15:40 +00:00
#endif
#ifdef USE_MAX_HW_SEGMENTS
2012-09-18 06:13:33 +00:00
MARS_DBG("blk_queue_max_hw_segments()\n");
2011-03-18 13:15:40 +00:00
blk_queue_max_hw_segments(q, USE_MAX_HW_SEGMENTS);
#endif
#ifdef USE_MAX_SEGMENT_SIZE
2012-09-18 06:13:33 +00:00
MARS_DBG("blk_queue_max_segment_size()\n");
2011-03-18 13:15:40 +00:00
blk_queue_max_segment_size(q, USE_MAX_SEGMENT_SIZE);
#endif
#ifdef USE_LOGICAL_BLOCK_SIZE
2012-09-18 06:13:33 +00:00
MARS_DBG("blk_queue_logical_block_size()\n");
2011-03-18 13:15:40 +00:00
blk_queue_logical_block_size(q, USE_LOGICAL_BLOCK_SIZE);
#endif
#ifdef USE_SEGMENT_BOUNDARY
2012-09-18 06:13:33 +00:00
MARS_DBG("blk_queue_segment_boundary()\n");
2011-03-18 13:15:40 +00:00
blk_queue_segment_boundary(q, USE_SEGMENT_BOUNDARY);
#endif
2012-09-18 06:13:33 +00:00
#ifdef QUEUE_ORDERED_DRAIN
MARS_DBG("blk_queue_ordered()\n");
blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);
2011-03-18 13:15:40 +00:00
#endif
2012-09-18 06:13:33 +00:00
MARS_DBG("blk_queue_bounce_limit()\n");
2011-02-23 20:48:06 +00:00
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2012-09-18 06:13:33 +00:00
#ifndef BLK_MAX_REQUEST_COUNT
MARS_DBG("unplug_fn\n");
2011-03-07 10:27:38 +00:00
q->unplug_fn = if_unplug;
2012-09-18 06:13:33 +00:00
#endif
MARS_DBG("queue_lock\n");
2011-02-23 20:48:06 +00:00
q->queue_lock = &input->req_lock; // needed!
input->bdev = bdget(MKDEV(disk->major, minor));
/* we have no partitions. we contain only ourselves. */
input->bdev->bd_contains = input->bdev;
2010-06-14 14:27:40 +00:00
2011-12-08 15:38:04 +00:00
#ifdef MODIFY_READAHEAD
2011-03-11 13:57:54 +00:00
MARS_INF("ra_pages OLD = %lu NEW = %d\n", q->backing_dev_info.ra_pages, brick->readahead);
q->backing_dev_info.ra_pages = brick->readahead;
2011-08-05 09:26:24 +00:00
#endif
2011-03-18 13:15:40 +00:00
#ifdef USE_CONGESTED_FN
2012-09-18 06:13:33 +00:00
MARS_DBG("congested_fn\n");
2011-02-23 20:48:06 +00:00
q->backing_dev_info.congested_fn = mars_congested;
q->backing_dev_info.congested_data = input;
2010-06-14 14:27:40 +00:00
#endif
2011-03-18 13:15:40 +00:00
#ifdef USE_MERGE_BVEC
2012-09-18 06:13:33 +00:00
MARS_DBG("blk_queue_merge_bvec()\n");
2011-02-23 20:48:06 +00:00
blk_queue_merge_bvec(q, mars_merge_bvec);
2010-06-14 14:27:40 +00:00
#endif
2011-02-23 20:48:06 +00:00
// point of no return
2012-09-18 06:13:33 +00:00
MARS_DBG("add_disk()\n");
2011-02-23 20:48:06 +00:00
add_disk(disk);
input->disk = disk;
2012-09-18 06:13:33 +00:00
#if 1
set_disk_ro(disk, false);
#else
set_device_ro(input->bdev, 0); // TODO: implement modes
#endif
status = 0;
}
if (brick->power.button) {
2011-02-23 20:48:06 +00:00
mars_power_led_on((void*)brick, true);
status = 0;
} else if (!brick->power.led_off) {
2011-02-23 20:48:06 +00:00
mars_power_led_on((void*)brick, false);
2011-03-08 16:45:52 +00:00
disk = input->disk;
if (!disk)
2011-03-23 17:58:02 +00:00
goto is_down;
#if 0
q = disk->queue;
if (q) {
blk_cleanup_queue(q);
input->q = NULL;
}
#endif
2011-03-08 16:45:52 +00:00
if (atomic_read(&input->open_count) > 0) {
MARS_INF("device '%s' is open %d times, cannot shutdown\n", disk->disk_name, atomic_read(&input->open_count));
status = -EBUSY;
goto done; // don't indicate "off" status
2011-03-08 16:45:52 +00:00
}
2011-02-23 20:48:06 +00:00
if (input->bdev) {
bdput(input->bdev);
input->bdev = NULL;
}
2011-03-23 17:58:02 +00:00
del_gendisk(input->disk);
put_disk(input->disk);
input->disk = NULL;
status = 0;
2011-03-23 17:58:02 +00:00
is_down:
2011-02-23 20:48:06 +00:00
mars_power_led_off((void*)brick, true);
}
done:
up(&brick->switch_sem);
return status;
}
//////////////// interface to the outer world (kernel) ///////////////
static int if_open(struct block_device *bdev, fmode_t mode)
{
struct if_input *input = bdev->bd_disk->private_data;
atomic_inc(&input->open_count);
MARS_INF("----------------------- OPEN %d ------------------------------\n", atomic_read(&input->open_count));
2010-08-26 17:12:30 +00:00
return 0;
}
static int if_release(struct gendisk *gd, fmode_t mode)
{
struct if_input *input = gd->private_data;
int nr;
MARS_INF("----------------------- CLOSE %d ------------------------------\n", atomic_read(&input->open_count));
while ((nr = atomic_read(&input->flying_count)) > 0) {
MARS_INF("%d IO requests not yet completed\n", nr);
brick_msleep(3000);
}
if (atomic_dec_and_test(&input->open_count)) {
struct if_brick *brick = input->brick;
if_switch(brick);
mars_trigger();
}
return 0;
}
static const struct block_device_operations if_blkdev_ops = {
.owner = THIS_MODULE,
.open = if_open,
.release = if_release,
};
2011-03-31 16:16:00 +00:00
//////////////// informational / statistics ///////////////
static
char *if_statistics(struct if_brick *brick, int verbose)
{
struct if_input *input = brick->inputs[0];
2012-02-12 11:19:57 +00:00
char *res = brick_string_alloc(512);
2011-08-05 09:26:24 +00:00
int tmp0 = atomic_read(&input->total_reada_count);
2011-05-19 11:36:00 +00:00
int tmp1 = atomic_read(&input->total_read_count);
int tmp2 = atomic_read(&input->total_mref_read_count);
int tmp3 = atomic_read(&input->total_write_count);
int tmp4 = atomic_read(&input->total_mref_write_count);
2012-08-16 13:07:05 +00:00
#if 1 // HACK!
unsigned long capacity;
capacity = compute_capacity(brick);
if (capacity > 0 && capacity != input->capacity) {
MARS_INF("changing capacity from %lld to %lld\n", (long long)input->capacity * 2, (long long)capacity * 2);
input->capacity = capacity;
set_capacity(input->disk, capacity);
}
#endif
2011-03-31 16:16:00 +00:00
if (!res)
return NULL;
2012-02-12 11:19:57 +00:00
snprintf(res, 512,
"total reada = %d "
"reads = %d "
"mref_reads = %d (%d%%) "
"writes = %d "
"mref_writes = %d (%d%%) "
"empty = %d | "
"plugged = %d "
"flying = %d "
"(reads = %d writes = %d)\n",
tmp0,
tmp1,
tmp2,
tmp1 ? tmp2 * 100 / tmp1 : 0,
tmp3,
tmp4,
tmp3 ? tmp4 * 100 / tmp3 : 0,
atomic_read(&input->total_empty_count),
atomic_read(&input->plugged_count),
atomic_read(&input->flying_count),
atomic_read(&input->read_flying_count),
atomic_read(&input->write_flying_count));
2011-03-31 16:16:00 +00:00
return res;
}
2011-05-19 11:36:00 +00:00
static
void if_reset_statistics(struct if_brick *brick)
{
struct if_input *input = brick->inputs[0];
atomic_set(&input->total_read_count, 0);
atomic_set(&input->total_write_count, 0);
2011-07-15 10:12:06 +00:00
atomic_set(&input->total_empty_count, 0);
2011-05-19 11:36:00 +00:00
atomic_set(&input->total_mref_read_count, 0);
atomic_set(&input->total_mref_write_count, 0);
}
2011-03-08 16:45:52 +00:00
////////////////// own brick / input / output operations //////////////////
// none
//////////////// object / aspect constructors / destructors ///////////////
static int if_mref_aspect_init_fn(struct generic_aspect *_ini)
2011-03-08 16:45:52 +00:00
{
struct if_mref_aspect *ini = (void*)_ini;
INIT_LIST_HEAD(&ini->plug_head);
2011-03-30 12:02:50 +00:00
INIT_LIST_HEAD(&ini->hash_head);
2011-03-08 16:45:52 +00:00
return 0;
}
static void if_mref_aspect_exit_fn(struct generic_aspect *_ini)
2011-03-08 16:45:52 +00:00
{
struct if_mref_aspect *ini = (void*)_ini;
CHECK_HEAD_EMPTY(&ini->plug_head);
2011-03-30 12:02:50 +00:00
CHECK_HEAD_EMPTY(&ini->hash_head);
2011-03-08 16:45:52 +00:00
}
MARS_MAKE_STATICS(if);
2011-07-15 10:12:06 +00:00
//////////////////////// constructors / destructors ////////////////////////
2011-03-08 16:45:52 +00:00
static int if_brick_construct(struct if_brick *brick)
{
sema_init(&brick->switch_sem, 1);
2011-03-08 16:45:52 +00:00
return 0;
}
static int if_brick_destruct(struct if_brick *brick)
{
return 0;
}
2011-03-07 10:27:38 +00:00
static int if_input_construct(struct if_input *input)
2010-08-26 17:12:30 +00:00
{
2011-03-30 12:02:50 +00:00
int i;
for (i = 0; i < IF_HASH_MAX; i++) {
spin_lock_init(&input->hash_lock[i]);
INIT_LIST_HEAD(&input->hash_table[i]);
}
2011-03-08 16:45:52 +00:00
INIT_LIST_HEAD(&input->plug_anchor);
sema_init(&input->kick_sem, 1);
spin_lock_init(&input->req_lock);
atomic_set(&input->open_count, 0);
2011-05-19 11:36:00 +00:00
atomic_set(&input->flying_count, 0);
atomic_set(&input->read_flying_count, 0);
atomic_set(&input->write_flying_count, 0);
2011-03-30 12:02:50 +00:00
atomic_set(&input->plugged_count, 0);
2011-07-15 10:12:06 +00:00
#ifdef USE_TIMER
init_timer(&input->timer);
#endif
2010-06-14 14:27:40 +00:00
return 0;
}
2011-03-07 10:27:38 +00:00
static int if_input_destruct(struct if_input *input)
2010-06-14 14:27:40 +00:00
{
return 0;
}
2011-03-07 10:27:38 +00:00
static int if_output_construct(struct if_output *output)
2010-08-10 17:39:30 +00:00
{
return 0;
}
2010-06-14 14:27:40 +00:00
///////////////////////// static structs ////////////////////////
2011-03-07 10:27:38 +00:00
static struct if_brick_ops if_brick_ops = {
.brick_switch = if_switch,
2011-03-31 16:16:00 +00:00
.brick_statistics = if_statistics,
2011-05-19 11:36:00 +00:00
.reset_statistics = if_reset_statistics,
2010-06-14 14:27:40 +00:00
};
2011-03-07 10:27:38 +00:00
static struct if_output_ops if_output_ops = {
2010-08-10 17:39:30 +00:00
};
2011-03-07 10:27:38 +00:00
const struct if_input_type if_input_type = {
.type_name = "if_input",
.input_size = sizeof(struct if_input),
.input_construct = &if_input_construct,
.input_destruct = &if_input_destruct,
2010-06-14 14:27:40 +00:00
};
2011-03-07 10:27:38 +00:00
static const struct if_input_type *if_input_types[] = {
&if_input_type,
2010-06-14 14:27:40 +00:00
};
2011-03-07 10:27:38 +00:00
const struct if_output_type if_output_type = {
.type_name = "if_output",
.output_size = sizeof(struct if_output),
.master_ops = &if_output_ops,
.output_construct = &if_output_construct,
2010-08-10 17:39:30 +00:00
};
2011-03-23 17:58:02 +00:00
static const struct if_output_type *if_output_types[] = {
&if_output_type,
};
2011-03-07 10:27:38 +00:00
const struct if_brick_type if_brick_type = {
.type_name = "if_brick",
.brick_size = sizeof(struct if_brick),
2010-06-14 14:27:40 +00:00
.max_inputs = 1,
.max_outputs = 0,
2011-03-07 10:27:38 +00:00
.master_ops = &if_brick_ops,
.aspect_types = if_aspect_types,
2011-03-07 10:27:38 +00:00
.default_input_types = if_input_types,
2011-03-23 17:58:02 +00:00
.default_output_types = if_output_types,
2011-03-07 10:27:38 +00:00
.brick_construct = &if_brick_construct,
.brick_destruct = &if_brick_destruct,
2010-06-14 14:27:40 +00:00
};
2011-03-07 10:27:38 +00:00
EXPORT_SYMBOL_GPL(if_brick_type);
2010-06-14 14:27:40 +00:00
////////////////// module init stuff /////////////////////////
2011-08-25 10:16:32 +00:00
void __exit exit_mars_if(void)
2010-06-14 14:27:40 +00:00
{
int status;
2011-03-07 10:27:38 +00:00
MARS_INF("exit_if()\n");
status = if_unregister_brick_type();
2011-03-03 18:23:34 +00:00
unregister_blkdev(MARS_MAJOR, "mars");
2010-06-14 14:27:40 +00:00
}
2011-08-25 10:16:32 +00:00
int __init init_mars_if(void)
2010-06-14 14:27:40 +00:00
{
int status;
2010-08-04 17:32:04 +00:00
2011-03-07 10:27:38 +00:00
(void)if_aspect_types; // not used, shut up gcc
2010-08-04 17:32:04 +00:00
2011-03-07 10:27:38 +00:00
MARS_INF("init_if()\n");
2011-03-03 18:23:34 +00:00
status = register_blkdev(MARS_MAJOR, "mars");
2010-06-14 14:27:40 +00:00
if (status)
return status;
2011-03-07 10:27:38 +00:00
status = if_register_brick_type();
2010-06-14 14:27:40 +00:00
if (status)
goto err_device;
return status;
err_device:
2011-03-07 10:27:38 +00:00
MARS_ERR("init_if() status=%d\n", status);
2011-08-25 10:16:32 +00:00
exit_mars_if();
2010-06-14 14:27:40 +00:00
return status;
}
2011-08-25 10:16:32 +00:00
#ifndef CONFIG_MARS_HAVE_BIGMODULE
2011-03-07 10:27:38 +00:00
MODULE_DESCRIPTION("MARS if");
2010-06-14 14:27:40 +00:00
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
2011-08-25 10:16:32 +00:00
module_init(init_mars_if);
module_exit(exit_mars_if);
#endif