From 409536c0109d621ef47560aa4626b68a17bcdb96 Mon Sep 17 00:00:00 2001 From: Thomas Schoebel-Theuer Date: Wed, 30 Nov 2011 15:23:44 +0100 Subject: [PATCH] new pre-patch for kernel 2.6.32 + adaptions --- mars_aio.c | 42 +++++ mars_if.c | 57 +++--- mars_if.h | 11 +- pre-patches/README | 21 +++ pre-patches/mars-prepatch-2.6.32.patch | 235 +++++++++++++++++++++++++ sy_old/mars_light.c | 3 +- 6 files changed, 343 insertions(+), 26 deletions(-) create mode 100644 pre-patches/README create mode 100644 pre-patches/mars-prepatch-2.6.32.patch diff --git a/mars_aio.c b/mars_aio.c index 07517cb6..a6a83182 100644 --- a/mars_aio.c +++ b/mars_aio.c @@ -348,6 +348,48 @@ int aio_start_thread(struct aio_output *output, int i, int(*fn)(void*)) return 0; } +#if 1 +/* The following _could_ go to kernel/kthread.c. + * However, we need it only for a workaround here. + * This has some conceptual shortcomings, so I will not + * force that. + */ +#if 1 // remove this for migration to kernel/kthread.c +struct kthread { + int should_stop; + struct completion exited; +}; +#define to_kthread(tsk) \ + container_of((tsk)->vfork_done, struct kthread, exited) +#endif +/** + * kthread_stop_nowait - like kthread_stop(), but don't wait for termination. + * @k: thread created by kthread_create(). + * + * If threadfn() may call do_exit() itself, the caller must ensure + * task_struct can't go away. + * + * Therefore, you must not call this twice (or after kthread_stop()), at least + * if you don't get_task_struct() yourself. + */ +void kthread_stop_nowait(struct task_struct *k) +{ + struct kthread *kthread; + +#if 0 // enable this after migration to kernel/kthread.c + trace_sched_kthread_stop(k); +#endif + + kthread = to_kthread(k); + barrier(); /* it might have exited */ + if (k->vfork_done != NULL) { + kthread->should_stop = 1; + wake_up_process(k); + } +} +//EXPORT_SYMBOL(kthread_stop_nowait); +#endif + static void aio_stop_thread(struct aio_output *output, int i, bool do_submit_dummy) { diff --git a/mars_if.c b/mars_if.c index 6e1a86f1..e23459c9 100644 --- a/mars_if.c +++ b/mars_if.c @@ -56,7 +56,6 @@ void if_endio(struct generic_callback *cb) { struct if_mref_aspect *mref_a = cb->cb_private; struct if_input *input; - struct bio *bio; int k; int rw = 0; int error; @@ -72,19 +71,23 @@ void if_endio(struct generic_callback *cb) MARS_IO("bio_count = %d\n", mref_a->bio_count); for (k = 0; k < mref_a->bio_count; k++) { - bio = mref_a->orig_bio[k]; - mref_a->orig_bio[k] = NULL; - if (unlikely(!bio)) { - MARS_FAT("callback with no bio called (k = %d). something is very wrong here!\n", k); + struct bio_wrapper *biow; + struct bio *bio; + + biow = mref_a->orig_biow[k]; + mref_a->orig_biow[k] = NULL; + CHECK_PTR(biow, err); + + CHECK_ATOMIC(&biow->bi_comp_cnt, 1); + if (!atomic_dec_and_test(&biow->bi_comp_cnt)) { continue; } + bio = biow->bio; + CHECK_PTR_NULL(bio, err); + rw = bio->bi_rw & 1; - CHECK_ATOMIC(&bio->bi_comp_cnt, 1); - if (!atomic_dec_and_test(&bio->bi_comp_cnt)) { - continue; - } #if 1 if (mref_a->is_kmapped) { struct bio_vec *bvec; @@ -106,6 +109,7 @@ void if_endio(struct generic_callback *cb) MARS_IO("calling end_io() rw = %d error = %d\n", rw, error); bio_endio(bio, error); bio_put(bio); + brick_mem_free(biow); } input = mref_a->input; if (input) { @@ -125,6 +129,10 @@ void if_endio(struct generic_callback *cb) #endif } MARS_IO("finished.\n"); + return; + +err: + MARS_FAT("error in callback, giving up\n"); } /* Kick off plugged mrefs @@ -208,6 +216,7 @@ void if_timer(unsigned long data) */ static int if_make_request(struct request_queue *q, struct bio *bio) { + struct bio_wrapper *biow; struct if_input *input; struct if_brick *brick = NULL; struct mref_object *mref = NULL; @@ -250,16 +259,21 @@ static int if_make_request(struct request_queue *q, struct bio *bio) return 0; } #endif + + brick = input->brick; + CHECK_PTR(brick, err); + + biow = brick_mem_alloc(sizeof(struct bio_wrapper)); + CHECK_PTR(biow, err); + biow->bio = bio; + atomic_set(&biow->bi_comp_cnt, 0); + if (rw) { atomic_inc(&input->total_write_count); } else { atomic_inc(&input->total_read_count); } - brick = input->brick; - if (unlikely(!brick)) - goto err; - /* Get a reference to the bio. * Will be released after bio_endio(). */ @@ -271,9 +285,6 @@ static int if_make_request(struct request_queue *q, struct bio *bio) msleep(100); } - _CHECK_ATOMIC(&bio->bi_comp_cnt, !=, 0); - atomic_set(&bio->bi_comp_cnt, 0); - #ifdef IO_DEBUGGING { const unsigned short prio = bio_prio(bio); @@ -347,14 +358,14 @@ static int if_make_request(struct request_queue *q, struct bio *bio) } for (i = 0; i < mref_a->bio_count; i++) { - if (mref_a->orig_bio[i] == bio) { + if (mref_a->orig_biow[i]->bio == bio) { goto unlock; } } - CHECK_ATOMIC(&bio->bi_comp_cnt, 0); - atomic_inc(&bio->bi_comp_cnt); - mref_a->orig_bio[mref_a->bio_count++] = bio; + CHECK_ATOMIC(&biow->bi_comp_cnt, 0); + atomic_inc(&biow->bi_comp_cnt); + mref_a->orig_biow[mref_a->bio_count++] = biow; assigned = true; goto unlock; } // foreach hash collision list member @@ -424,9 +435,9 @@ static int if_make_request(struct request_queue *q, struct bio *bio) atomic_inc(&input->total_mref_read_count); } - CHECK_ATOMIC(&bio->bi_comp_cnt, 0); - atomic_inc(&bio->bi_comp_cnt); - mref_a->orig_bio[0] = bio; + CHECK_ATOMIC(&biow->bi_comp_cnt, 0); + atomic_inc(&biow->bi_comp_cnt); + mref_a->orig_biow[0] = biow; mref_a->bio_count = 1; assigned = true; diff --git a/mars_if.h b/mars_if.h index 53b734c5..c2684cdf 100644 --- a/mars_if.h +++ b/mars_if.h @@ -14,6 +14,15 @@ //#define USE_TIMER +/* I don't want to enhance / intrude into struct bio for compatibility reasons + * (support for a variety of kernel versions). + * The following is just a silly workaround which could be removed again. + */ +struct bio_wrapper { + struct bio *bio; + atomic_t bi_comp_cnt; +}; + struct if_mref_aspect { GENERIC_ASPECT(mref); struct list_head plug_head; @@ -24,7 +33,7 @@ struct if_mref_aspect { int max_len; bool is_kmapped; struct page *orig_page; - struct bio *orig_bio[MAX_BIO]; + struct bio_wrapper *orig_biow[MAX_BIO]; struct if_input *input; }; diff --git a/pre-patches/README b/pre-patches/README new file mode 100644 index 00000000..2fa4a29d --- /dev/null +++ b/pre-patches/README @@ -0,0 +1,21 @@ +The patches in this directory have to be applied to the +pristine kernel sources, _before_ MARS can be compiled. + +The main reason is that MARS needs some additional EXPORT_SYMBOL +statements not present in most kernels. + +Therefore the pre-patches are rather simple and lightweight. + +Usage: + +cd /path/to/your/kernel/source +patch -p1 < /path/to/according/pre-patch.patch +cd block/ +git clone git://url/to/mars.git +# (or another way of putting the mars sources into block/ ) + +# afterwards, configure and compile your kernel as usual, e.g. +cd .. # goto root of the kernel sources again +make oldconfig +make +# ... and so on diff --git a/pre-patches/mars-prepatch-2.6.32.patch b/pre-patches/mars-prepatch-2.6.32.patch new file mode 100644 index 00000000..25e59852 --- /dev/null +++ b/pre-patches/mars-prepatch-2.6.32.patch @@ -0,0 +1,235 @@ +diff -uri linux-2.6.32.14.orig/block/Kconfig linux-2.6.32.14.new.00/block/Kconfig +--- linux-2.6.32.14.orig/block/Kconfig 2010-05-26 23:29:57.000000000 +0200 ++++ linux-2.6.32.14.new.00/block/Kconfig 2010-06-11 17:05:53.000000000 +0200 +@@ -77,6 +77,8 @@ + T10/SCSI Data Integrity Field or the T13/ATA External Path + Protection. If in doubt, say N. + ++source block/mars/Kconfig ++ + endif # BLOCK + + config BLOCK_COMPAT +diff -uri linux-2.6.32.14.orig/block/Makefile linux-2.6.32.14.new.00/block/Makefile +--- linux-2.6.32.14.orig/block/Makefile 2010-05-26 23:29:57.000000000 +0200 ++++ linux-2.6.32.14.new.00/block/Makefile 2010-06-11 17:10:46.000000000 +0200 +@@ -15,3 +15,5 @@ + + obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o + obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o ++ ++obj-$(CONFIG_MARS) += mars/ +\ No newline at end of file +--- linux-2.6.32.14.orig/fs/aio.c 2010-05-31 14:09:40.000000000 +0200 ++++ linux-2.6.32.14.new.00/fs/aio.c 2011-01-26 16:02:44.000000000 +0100 +@@ -1267,6 +1268,7 @@ + out: + return ret; + } ++EXPORT_SYMBOL(sys_io_setup); + + /* sys_io_destroy: + * Destroy the aio_context specified. May cancel any outstanding +@@ -1284,6 +1286,7 @@ + pr_debug("EINVAL: io_destroy: invalid context id\n"); + return -EINVAL; + } ++EXPORT_SYMBOL(sys_io_destroy); + + static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret) + { +@@ -1674,6 +1677,7 @@ + put_ioctx(ctx); + return i ? i : ret; + } ++EXPORT_SYMBOL(sys_io_submit); + + /* lookup_kiocb + * Finds a given iocb for cancellation. +@@ -1753,6 +1757,7 @@ + + return ret; + } ++EXPORT_SYMBOL(sys_io_cancel); + + /* io_getevents: + * Attempts to read at least min_nr events and up to nr events from +@@ -1784,3 +1789,4 @@ + asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); + return ret; + } ++EXPORT_SYMBOL(sys_io_getevents); +diff -uri linux-2.6.32.14.orig/fs/namei.c linux-2.6.32.14.new.00/fs/namei.c +--- linux-2.6.32.14.orig/fs/namei.c 2010-05-31 14:09:40.000000000 +0200 ++++ linux-2.6.32.14.new.00/fs/namei.c 2011-02-21 15:14:05.000000000 +0100 +@@ -3105,12 +3105,16 @@ + EXPORT_SYMBOL(vfs_follow_link); + EXPORT_SYMBOL(vfs_link); + EXPORT_SYMBOL(vfs_mkdir); ++EXPORT_SYMBOL(sys_mkdir); + EXPORT_SYMBOL(vfs_mknod); + EXPORT_SYMBOL(generic_permission); + EXPORT_SYMBOL(vfs_readlink); + EXPORT_SYMBOL(vfs_rename); ++EXPORT_SYMBOL(sys_rename); + EXPORT_SYMBOL(vfs_rmdir); + EXPORT_SYMBOL(vfs_symlink); ++EXPORT_SYMBOL(sys_symlink); + EXPORT_SYMBOL(vfs_unlink); ++EXPORT_SYMBOL(sys_unlink); + EXPORT_SYMBOL(dentry_unhash); + EXPORT_SYMBOL(generic_readlink); +diff -uri linux-2.6.32.14.orig/fs/open.c linux-2.6.32.14.new.00/fs/open.c +--- linux-2.6.32.14.orig/fs/open.c 2010-05-31 14:09:40.000000000 +0200 ++++ linux-2.6.32.14.new.00/fs/open.c 2011-02-28 12:36:20.000000000 +0100 +@@ -719,6 +719,7 @@ + { + return sys_fchmodat(AT_FDCWD, filename, mode); + } ++EXPORT_SYMBOL(sys_chmod); + + static int chown_common(struct dentry * dentry, uid_t user, gid_t group, struct vfsmount *mnt) + { +@@ -810,6 +811,7 @@ + out: + return error; + } ++EXPORT_SYMBOL(sys_lchown); + + SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) + { +diff -uri linux-2.6.32.14.orig/fs/utimes.c linux-2.6.32.14.new.00/fs/utimes.c +--- linux-2.6.32.14.orig/fs/utimes.c 2010-05-31 14:09:40.000000000 +0200 ++++ linux-2.6.32.14.new.00/fs/utimes.c 2011-02-25 12:40:00.000000000 +0100 +@@ -1,3 +1,4 @@ ++#include + #include + #include + #include +@@ -176,6 +177,7 @@ + out: + return error; + } ++EXPORT_SYMBOL(do_utimes); + + SYSCALL_DEFINE4(utimensat, int, dfd, char __user *, filename, + struct timespec __user *, utimes, int, flags) +diff -uri linux-2.6.32.14.orig/include/linux/major.h linux-2.6.32.14.new.00/include/linux/major.h +--- linux-2.6.32.14.orig/include/linux/major.h 2010-05-26 23:29:57.000000000 +0200 ++++ linux-2.6.32.14.new.00/include/linux/major.h 2010-06-13 10:08:53.000000000 +0200 +@@ -146,6 +146,7 @@ + #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) + + #define DRBD_MAJOR 147 ++#define MARS_MAJOR 148 + #define RTF_MAJOR 150 + #define RAW_MAJOR 162 + +diff -uri linux-2.6.32.14.orig/include/linux/sched.h linux-2.6.32.14.new.00/include/linux/sched.h +--- linux-2.6.32.14.orig/include/linux/sched.h 2010-05-31 14:09:46.000000000 +0200 ++++ linux-2.6.32.14.new.00/include/linux/sched.h 2010-07-29 15:37:05.000000000 +0200 +@@ -1429,6 +1429,9 @@ + /* mutex deadlock detection */ + struct mutex_waiter *blocked_on; + #endif ++#ifdef CONFIG_DEBUG_SPINLOCK ++ atomic_t lock_count; ++#endif + #ifdef CONFIG_TRACE_IRQFLAGS + unsigned int irq_events; + int hardirqs_enabled; +diff -uri linux-2.6.32.14.orig/kernel/fork.c linux-2.6.32.14.new.00/kernel/fork.c +--- linux-2.6.32.14.orig/kernel/fork.c 2010-05-31 14:09:46.000000000 +0200 ++++ linux-2.6.32.14.new.00/kernel/fork.c 2011-02-17 10:37:57.000000000 +0100 +@@ -169,6 +169,7 @@ + if (!profile_handoff_task(tsk)) + free_task(tsk); + } ++EXPORT_SYMBOL(__put_task_struct); + + /* + * macro override instead of weak attribute alias, to workaround +diff -uri linux-2.6.32.14.orig/lib/debug_locks.c linux-2.6.32.14.new.00/lib/debug_locks.c +--- linux-2.6.32.14.orig/lib/debug_locks.c 2010-05-26 23:29:57.000000000 +0200 ++++ linux-2.6.32.14.new.00/lib/debug_locks.c 2010-12-10 17:08:44.000000000 +0100 +@@ -45,3 +45,4 @@ + } + return 0; + } ++EXPORT_SYMBOL(debug_locks_off); +diff -uri linux-2.6.32.14.orig/mm/init-mm.c linux-2.6.32.14.new.00/mm/init-mm.c +--- linux-2.6.32.14.orig/mm/init-mm.c 2010-05-26 23:29:57.000000000 +0200 ++++ linux-2.6.32.14.new.00/mm/init-mm.c 2011-01-26 16:53:17.000000000 +0100 +@@ -4,6 +4,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -17,4 +19,10 @@ + .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .cpu_vm_mask = CPU_MASK_ALL, ++#ifdef CONFIG_AIO ++ .ioctx_lock = __SPIN_LOCK_UNLOCKED(init_mm.ioctx_lock), ++ .ioctx_list = HLIST_HEAD_INIT, ++ .get_unmapped_area = arch_get_unmapped_area, ++#endif + }; ++EXPORT_SYMBOL(init_mm); +diff -uri linux-2.6.32.14.orig/mm/mmu_context.c linux-2.6.32.14.new.00/mm/mmu_context.c +--- linux-2.6.32.14.orig/mm/mmu_context.c 2010-05-26 23:29:57.000000000 +0200 ++++ linux-2.6.32.14.new.00/mm/mmu_context.c 2010-11-16 17:12:01.000000000 +0100 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + + #include + +@@ -37,6 +38,7 @@ + if (active_mm != mm) + mmdrop(active_mm); + } ++EXPORT_SYMBOL(use_mm); + + /* + * unuse_mm +@@ -56,3 +58,4 @@ + enter_lazy_tlb(mm, tsk); + task_unlock(tsk); + } ++EXPORT_SYMBOL(unuse_mm); +diff -uri linux-2.6.32.14.orig/mm/page_alloc.c linux-2.6.32.14.new.00/mm/page_alloc.c +--- linux-2.6.32.14.orig/mm/page_alloc.c 2010-05-31 14:09:46.000000000 +0200 ++++ linux-2.6.32.14.new.00/mm/page_alloc.c 2011-03-07 14:47:16.000000000 +0100 +@@ -122,6 +122,7 @@ + }; + + int min_free_kbytes = 1024; ++EXPORT_SYMBOL(min_free_kbytes); + + static unsigned long __meminitdata nr_kernel_pages; + static unsigned long __meminitdata nr_all_pages; +@@ -4769,6 +4770,7 @@ + /* update totalreserve_pages */ + calculate_totalreserve_pages(); + } ++EXPORT_SYMBOL(setup_per_zone_wmarks); + + /* + * The inactive anon list should be small enough that the VM never has to +--- a/mm/swap_state.c~ 2010-12-09 22:29:45.000000000 +0100 ++++ b/mm/swap_state.c 2011-06-27 14:45:43.000000000 +0200 +@@ -46,6 +46,7 @@ + .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), + .backing_dev_info = &swap_backing_dev_info, + }; ++EXPORT_SYMBOL(swapper_space); + + #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) + diff --git a/sy_old/mars_light.c b/sy_old/mars_light.c index 3017e523..4277aab7 100644 --- a/sy_old/mars_light.c +++ b/sy_old/mars_light.c @@ -3386,11 +3386,10 @@ static void __exit exit_light(void) if (thread) { main_thread = NULL; MARS_DBG("=== stopping light thread...\n"); - kthread_stop_nowait(thread); - mars_trigger(); MARS_INF("stopping thread...\n"); kthread_stop(thread); put_task_struct(thread); + mars_trigger(); } brick_allow_freelist = false;