// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG //#define BRICK_DEBUGGING //#define MARS_DEBUGGING #include #include #include #include #include #include #include #include #include #include #include #include "mars.h" ///////////////////////// own type definitions //////////////////////// #include "mars_device_sio.h" ////////////////// own brick / input / output operations ////////////////// // some code borrowed from the loopback driver static int transfer_none(int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, int size) { char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; if (unlikely(!raw_buf || !loop_buf)) { MARS_ERR("transfer NULL: %p %p\n", raw_buf, loop_buf); return -EFAULT; } #if 1 if (cmd == READ) memcpy(loop_buf, raw_buf, size); else memcpy(raw_buf, loop_buf, size); #endif kunmap_atomic(raw_buf, KM_USER0); kunmap_atomic(loop_buf, KM_USER1); cond_resched(); return 0; } static void write_aops(struct device_sio_output *output, struct mars_ref_object *mref) { struct bio *bio = mref->orig_bio; loff_t pos = ((loff_t)bio->bi_sector << 9); struct file *file = output->filp; struct address_space *mapping = file->f_mapping; struct bio_vec *bvec; int i; int ret = 0; MARS_DBG("write_aops pos=%llu len=%d\n", pos, bio->bi_size); mutex_lock(&mapping->host->i_mutex); bio_for_each_segment(bvec, bio, i) { //pgoff_t index; unsigned offset, bv_offs; int len; //index = pos >> PAGE_CACHE_SHIFT; offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); bv_offs = bvec->bv_offset; len = bvec->bv_len; while (len > 0) { int transfer_result; unsigned size, copied; struct page *page; void *fsdata; size = PAGE_CACHE_SIZE - offset; if (size > len) size = len; ret = pagecache_write_begin(file, mapping, pos, size, 0, &page, &fsdata); if (ret) { MARS_ERR("cannot start pagecache_write_begin() error=%d\n", ret); if (ret >= 0) ret = -EIO; goto fail; } //file_update_time(file); transfer_result = transfer_none(WRITE, page, offset, bvec->bv_page, bv_offs, size); copied = size; if (transfer_result) { MARS_ERR("transfer error %d\n", transfer_result); copied = 0; } ret = pagecache_write_end(file, mapping, pos, size, copied, page, fsdata); if (ret < 0 || ret != copied || transfer_result) { MARS_ERR("write error %d\n", ret); if (ret >= 0) ret = -EIO; goto fail; } bv_offs += copied; len -= copied; offset = 0; //index++; pos += copied; } ret = 0; } fail: mutex_unlock(&mapping->host->i_mutex); mref->ref_cb->cb_error = ret; #if 1 blk_run_address_space(mapping); #endif } struct cookie_data { struct device_sio_output *output; struct mars_ref_object *mref; struct bio_vec *bvec; unsigned int offset; }; static int device_sio_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct cookie_data *p = sd->u.data; struct page *page = buf->page; sector_t IV; int size, ret; ret = buf->ops->confirm(pipe, buf); if (unlikely(ret)) return ret; IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + (buf->offset >> 9); size = sd->len; if (size > p->bvec->bv_len) size = p->bvec->bv_len; if (transfer_none(READ, page, buf->offset, p->bvec->bv_page, p->offset, size)) { MARS_ERR("transfer error block %ld\n", p->bvec->bv_page->index); size = -EINVAL; } flush_dcache_page(p->bvec->bv_page); if (size > 0) p->offset += size; return size; } static int device_sio_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { return __splice_from_pipe(pipe, sd, device_sio_splice_actor); } static void read_aops(struct device_sio_output *output, struct mars_ref_object *mref) { struct bio *bio = mref->orig_bio; loff_t pos = ((loff_t)bio->bi_sector << 9); // TODO: make dynamic struct bio_vec *bvec; int i; int ret = -EIO; bio_for_each_segment(bvec, bio, i) { struct cookie_data cookie = { .output = output, .mref = mref, .bvec = bvec, .offset = bvec->bv_offset, }; struct splice_desc sd = { .len = 0, .total_len = bvec->bv_len, .flags = 0, .pos = pos, .u.data = &cookie, }; MARS_DBG("start splice %p %p %p %p\n", output, mref, bio, bvec); ret = 0; ret = splice_direct_to_actor(output->filp, &sd, device_sio_direct_splice_actor); if (unlikely(ret < 0)) { MARS_ERR("splice %p %p %p %p status=%d\n", output, mref, bio, bvec, ret); break; } pos += bvec->bv_len; bio->bi_size -= bvec->bv_len; } if (unlikely(bio->bi_size)) { MARS_ERR("unhandled rest size %d on bio %p\n", bio->bi_size, bio); } mref->ref_cb->cb_error = ret; } static void sync_file(struct device_sio_output *output) { struct file *file = output->filp; int ret; #if 1 ret = vfs_fsync(file, file->f_path.dentry, 0); if (unlikely(ret)) { MARS_ERR("syncing pages failed: %d\n", ret); } return; #endif } static void device_sio_ref_io(struct device_sio_output *output, struct mars_ref_object *mref, int rw) { struct bio *bio = mref->orig_bio; struct generic_callback *cb = mref->ref_cb; bool barrier = (rw != READ && bio_rw_flagged(bio, BIO_RW_BARRIER)); int test; if (barrier) { MARS_INF("got barrier request\n"); sync_file(output); } if (unlikely(!output->filp)) { cb->cb_error = -EINVAL; goto done; } if (rw == READ) { read_aops(output, mref); } else { write_aops(output, mref); if (barrier) sync_file(output); } done: #if 1 if (cb->cb_error < 0) MARS_ERR("IO error %d\n", cb->cb_error); #endif cb->cb_fn(cb); test = atomic_read(&mref->ref_count); if (test <= 0) { MARS_ERR("ref_count UNDERRUN %d\n", test); atomic_set(&mref->ref_count, 1); } if (!atomic_dec_and_test(&mref->ref_count)) return; device_sio_free_mars_ref(mref); } static void device_sio_mars_queue(struct device_sio_output *output, struct mars_ref_object *mref, int rw) { int index = 0; struct sio_threadinfo *tinfo; struct device_sio_mars_ref_aspect *mref_a; struct generic_callback *cb = mref->ref_cb; unsigned long flags; if (rw == READ) { traced_lock(&output->g_lock, flags); index = output->index++; traced_unlock(&output->g_lock, flags); index = (index % WITH_THREAD) + 1; } mref_a = device_sio_mars_ref_get_aspect(output, mref); if (unlikely(!mref_a)) { MARS_FAT("cannot get aspect\n"); cb->cb_error = -EINVAL; cb->cb_fn(cb); return; } tinfo = &output->tinfo[index]; MARS_DBG("queueing %p on %d\n", mref, index); traced_lock(&tinfo->lock, flags); mref->ref_rw = rw; list_add_tail(&mref_a->io_head, &tinfo->mref_list); traced_unlock(&tinfo->lock, flags); wake_up(&tinfo->event); } static int device_sio_thread(void *data) { struct sio_threadinfo *tinfo = data; struct device_sio_output *output = tinfo->output; MARS_INF("kthread has started.\n"); //set_user_nice(current, -20); while (!kthread_should_stop()) { struct list_head *tmp = NULL; struct mars_ref_object *mref; struct device_sio_mars_ref_aspect *mref_a; unsigned long flags; wait_event_interruptible_timeout( tinfo->event, !list_empty(&tinfo->mref_list) || kthread_should_stop(), HZ); tinfo->last_jiffies = jiffies; traced_lock(&tinfo->lock, flags); if (!list_empty(&tinfo->mref_list)) { tmp = tinfo->mref_list.next; list_del_init(tmp); } traced_unlock(&tinfo->lock, flags); if (!tmp) continue; mref_a = container_of(tmp, struct device_sio_mars_ref_aspect, io_head); mref = mref_a->object; MARS_DBG("got %p %p\n", mref_a, mref); device_sio_ref_io(output, mref, mref->ref_rw); } MARS_INF("kthread has stopped.\n"); return 0; } static int device_sio_watchdog(void *data) { struct device_sio_output *output = data; MARS_INF("watchdog has started.\n"); while (!kthread_should_stop()) { int i; msleep_interruptible(5000); for (i = 0; i <= WITH_THREAD; i++) { struct sio_threadinfo *tinfo = &output->tinfo[i]; unsigned long now = jiffies; unsigned long elapsed = now - tinfo->last_jiffies; if (elapsed > 10 * HZ) { tinfo->last_jiffies = now; MARS_ERR("thread %d is dead for more than 10 seconds.\n", i); } } } return 0; } static int device_sio_get_info(struct device_sio_output *output, struct mars_info *info) { struct file *file = output->filp; info->current_size = i_size_read(file->f_mapping->host); info->backing_file = file; return 0; } //////////////// object / aspect constructors / destructors /////////////// static int device_sio_mars_ref_aspect_init_fn(struct generic_aspect *_ini, void *_init_data) { struct device_sio_mars_ref_aspect *ini = (void*)_ini; INIT_LIST_HEAD(&ini->io_head); return 0; } static void device_sio_mars_ref_aspect_exit_fn(struct generic_aspect *_ini, void *_init_data) { struct device_sio_mars_ref_aspect *ini = (void*)_ini; (void)ini; #if 1 CHECK_HEAD_EMPTY(&ini->io_head); #endif } MARS_MAKE_STATICS(device_sio); ////////////////////// brick constructors / destructors //////////////////// static int device_sio_brick_construct(struct device_sio_brick *brick) { return 0; } static int device_sio_output_construct(struct device_sio_output *output) { mm_segment_t oldfs; int flags = O_CREAT | O_RDWR | O_LARGEFILE; int prot = 0600; char *path = "/tmp/testfile.img"; struct task_struct *watchdog; int index; oldfs = get_fs(); set_fs(get_ds()); output->filp = filp_open(path, flags, prot); set_fs(oldfs); if (IS_ERR(output->filp)) { int err = PTR_ERR(output->filp); MARS_ERR("can't open file '%s' status=%d\n", path, err); output->filp = NULL; return err; } #if 0 { struct address_space *mapping = output->filp->f_mapping; int old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, old_gfp_mask & ~(__GFP_IO|__GFP_FS)); } #endif spin_lock_init(&output->g_lock); output->index = 0; for (index = 0; index <= WITH_THREAD; index++) { struct sio_threadinfo *tinfo = &output->tinfo[index]; tinfo->output = output; spin_lock_init(&tinfo->lock); init_waitqueue_head(&tinfo->event); INIT_LIST_HEAD(&tinfo->mref_list); tinfo->last_jiffies = jiffies; tinfo->thread = kthread_create(device_sio_thread, tinfo, "mars_sio%d", index); if (IS_ERR(tinfo->thread)) { int error = PTR_ERR(tinfo->thread); MARS_ERR("cannot create thread, status=%d\n", error); filp_close(output->filp, NULL); return error; } wake_up_process(tinfo->thread); } watchdog = kthread_create(device_sio_watchdog, output, "mars_watchdog%d", 0); if (!IS_ERR(watchdog)) { wake_up_process(watchdog); } return 0; } static int device_sio_output_destruct(struct device_sio_output *output) { int index; for (index = 0; index <= WITH_THREAD; index++) { kthread_stop(output->tinfo[index].thread); output->tinfo[index].thread = NULL; } if (output->filp) { filp_close(output->filp, NULL); output->filp = NULL; } return 0; } ///////////////////////// static structs //////////////////////// static struct device_sio_brick_ops device_sio_brick_ops = { }; static struct device_sio_output_ops device_sio_output_ops = { .make_object_layout = device_sio_make_object_layout, .mars_ref_io = device_sio_mars_queue, .mars_get_info = device_sio_get_info, }; const struct device_sio_output_type device_sio_output_type = { .type_name = "device_sio_output", .output_size = sizeof(struct device_sio_output), .master_ops = &device_sio_output_ops, .output_construct = &device_sio_output_construct, .output_destruct = &device_sio_output_destruct, .aspect_types = device_sio_aspect_types, .layout_code = { [BRICK_OBJ_MARS_REF] = LAYOUT_NONE, } }; static const struct device_sio_output_type *device_sio_output_types[] = { &device_sio_output_type, }; const struct device_sio_brick_type device_sio_brick_type = { .type_name = "device_sio_brick", .brick_size = sizeof(struct device_sio_brick), .max_inputs = 0, .max_outputs = 1, .master_ops = &device_sio_brick_ops, .default_output_types = device_sio_output_types, .brick_construct = &device_sio_brick_construct, }; EXPORT_SYMBOL_GPL(device_sio_brick_type); ////////////////// module init stuff ///////////////////////// static int __init init_device_sio(void) { MARS_INF("init_device_sio()\n"); return device_sio_register_brick_type(); } static void __exit exit_device_sio(void) { MARS_INF("exit_device_sio()\n"); device_sio_unregister_brick_type(); } MODULE_DESCRIPTION("MARS device_sio brick"); MODULE_AUTHOR("Thomas Schoebel-Theuer "); MODULE_LICENSE("GPL"); module_init(init_device_sio); module_exit(exit_device_sio);