diff --git a/pre-patches/vanilla-4.19/0001-mars-SPECIAL-for-in-tree-build.patch b/pre-patches/vanilla-4.19/0001-mars-SPECIAL-for-in-tree-build.patch new file mode 100644 index 00000000..af0f13ee --- /dev/null +++ b/pre-patches/vanilla-4.19/0001-mars-SPECIAL-for-in-tree-build.patch @@ -0,0 +1,36 @@ +From aa34ea4ff232877673e28b27222a89a4a3e93498 Mon Sep 17 00:00:00 2001 +From: Thomas Schoebel-Theuer +Date: Fri, 28 Jun 2013 11:03:14 +0200 +Subject: [PATCH] mars: SPECIAL for in-tree build + +--- + block/Kconfig | 2 ++ + block/Makefile | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/block/Kconfig b/block/Kconfig +index 1f2469a0123c..7c0b776276aa 100644 +--- a/block/Kconfig ++++ b/block/Kconfig +@@ -206,6 +206,8 @@ source "block/partitions/Kconfig" + + endmenu + ++source block/mars/kernel/Kconfig ++ + endif # BLOCK + + config BLOCK_COMPAT +diff --git a/block/Makefile b/block/Makefile +index 572b33f32c07..a8115cdc4112 100644 +--- a/block/Makefile ++++ b/block/Makefile +@@ -37,3 +37,5 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o + obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o + obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o + obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o ++ ++obj-$(CONFIG_MARS) += mars/kernel/ +-- +2.26.2 + diff --git a/pre-patches/vanilla-4.19/0001-mars-v2-minimum-pre-patch-for-mars.patch b/pre-patches/vanilla-4.19/0001-mars-v2-minimum-pre-patch-for-mars.patch new file mode 100644 index 00000000..32952258 --- /dev/null +++ b/pre-patches/vanilla-4.19/0001-mars-v2-minimum-pre-patch-for-mars.patch @@ -0,0 +1,205 @@ +From 5feb058b6e23e4400a99fec82c8d158b6017597f Mon Sep 17 00:00:00 2001 +From: Thomas Schoebel-Theuer +Date: Mon, 25 Jan 2021 13:03:54 +0100 +Subject: [PATCH] mars: v2 minimum pre-patch for mars + +--- + fs/aio.c | 45 ++++++++++++++++++++++++++++++++++---- + fs/utimes.c | 2 ++ + include/linux/aio.h | 1 + + include/linux/syscalls.h | 10 +++++++++ + include/uapi/linux/major.h | 1 + + mm/page_alloc.c | 2 ++ + 6 files changed, 57 insertions(+), 4 deletions(-) + +diff --git a/fs/aio.c b/fs/aio.c +index 413ec289bfa1..b21db36fdb82 100644 +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -216,6 +216,7 @@ struct aio_kiocb { + static DEFINE_SPINLOCK(aio_nr_lock); + unsigned long aio_nr; /* current system wide number of aio requests */ + unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ ++EXPORT_SYMBOL_GPL(aio_max_nr); + /*----end sysctl variables---*/ + + static struct kmem_cache *kiocb_cachep; +@@ -1301,7 +1302,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. + */ +-SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ++long ksys_io_setup(unsigned nr_events, aio_context_t *ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; +@@ -1330,6 +1331,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) + out: + return ret; + } ++EXPORT_SYMBOL_GPL(ksys_io_setup); ++ ++SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ++{ ++ return ksys_io_setup(nr_events, ctxp); ++} + + #ifdef CONFIG_COMPAT + COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) +@@ -1370,7 +1377,7 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) + * implemented. May fail with -EINVAL if the context pointed to + * is invalid. + */ +-SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) ++long ksys_io_destroy(aio_context_t ctx) + { + struct kioctx *ioctx = lookup_ioctx(ctx); + if (likely(NULL != ioctx)) { +@@ -1399,6 +1406,12 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) + pr_debug("EINVAL: invalid context id\n"); + return -EINVAL; + } ++EXPORT_SYMBOL_GPL(ksys_io_destroy); ++ ++SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) ++{ ++ return ksys_io_destroy(ctx); ++} + + static void aio_remove_iocb(struct aio_kiocb *iocb) + { +@@ -1921,8 +1934,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, + * are available to queue any iocbs. Will return 0 if nr is 0. Will + * fail with -ENOSYS if not implemented. + */ +-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, +- struct iocb __user * __user *, iocbpp) ++long ksys_io_submit(aio_context_t ctx_id, long nr, ++ struct iocb __user *__user *iocbpp) + { + struct kioctx *ctx; + long ret = 0; +@@ -1959,6 +1972,13 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, + percpu_ref_put(&ctx->users); + return i ? i : ret; + } ++EXPORT_SYMBOL_GPL(ksys_io_submit); ++ ++SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, ++ struct iocb __user * __user *, iocbpp) ++{ ++ return ksys_io_submit(ctx_id, nr, iocbpp); ++} + + #ifdef CONFIG_COMPAT + COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, +@@ -2073,6 +2093,23 @@ static long do_io_getevents(aio_context_t ctx_id, + return ret; + } + ++long ksys_io_getevents(aio_context_t ctx_id, ++ long min_nr, ++ long nr, ++ struct io_event __user * events, ++ struct timespec __user * timeout) ++{ ++ struct timespec64 ts; ++ ++ if (timeout) { ++ if (unlikely(get_timespec64(&ts, timeout))) ++ return -EFAULT; ++ } ++ ++ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); ++} ++EXPORT_SYMBOL_GPL(ksys_io_getevents); ++ + /* io_getevents: + * Attempts to read at least min_nr events and up to nr events from + * the completion queue for the aio_context specified by ctx_id. If +diff --git a/fs/utimes.c b/fs/utimes.c +index 69d4b6ba1bfb..0215521c5dbe 100644 +--- a/fs/utimes.c ++++ b/fs/utimes.c +@@ -1,4 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 ++#include + #include + #include + #include +@@ -164,6 +165,7 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, + out: + return error; + } ++EXPORT_SYMBOL(do_utimes); + + SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, + struct timespec __user *, utimes, int, flags) +diff --git a/include/linux/aio.h b/include/linux/aio.h +index b83e68dd006f..62061e975682 100644 +--- a/include/linux/aio.h ++++ b/include/linux/aio.h +@@ -12,6 +12,7 @@ typedef int (kiocb_cancel_fn)(struct kiocb *); + + /* prototypes */ + #ifdef CONFIG_AIO ++#define HAS_AIO_MAX + extern void exit_aio(struct mm_struct *mm); + void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); + #else +diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h +index 2ff814c92f7f..73daac4b9207 100644 +--- a/include/linux/syscalls.h ++++ b/include/linux/syscalls.h +@@ -1126,6 +1126,16 @@ asmlinkage long sys_ni_syscall(void); + * the ksys_xyzyyz() functions prototyped below. + */ + ++long ksys_io_submit(aio_context_t ctx_id, long nr, ++ struct iocb __user *__user *iocbpp); ++long ksys_io_getevents(aio_context_t ctx_id, ++ long min_nr, ++ long nr, ++ struct io_event __user * events, ++ struct timespec __user * timeout); ++long ksys_io_setup(unsigned nr_events, aio_context_t *ctxp); ++long ksys_io_destroy(aio_context_t ctx); ++ + int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, + unsigned long flags, void __user *data); + int ksys_umount(char __user *name, int flags); +diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h +index 7e5fa8e15c43..edfbce01e4ac 100644 +--- a/include/uapi/linux/major.h ++++ b/include/uapi/linux/major.h +@@ -149,6 +149,7 @@ + #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) + + #define DRBD_MAJOR 147 ++#define MARS_MAJOR 148 + #define RTF_MAJOR 150 + #define RAW_MAJOR 162 + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 4446a523e684..0f5271b870dd 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -263,6 +263,7 @@ compound_page_dtor * const compound_page_dtors[] = { + }; + + int min_free_kbytes = 1024; ++EXPORT_SYMBOL(min_free_kbytes); + int user_min_free_kbytes = -1; + int watermark_scale_factor = 10; + +@@ -7337,6 +7338,7 @@ static void __setup_per_zone_wmarks(void) + /* update totalreserve_pages */ + calculate_totalreserve_pages(); + } ++EXPORT_SYMBOL(setup_per_zone_wmarks); + + /** + * setup_per_zone_wmarks - called when min_free_kbytes changes +-- +2.26.2 + diff --git a/pre-patches/vanilla-4.19/0001-sched-wait-fix-endless-kthread-loop-at-timeout.patch b/pre-patches/vanilla-4.19/0001-sched-wait-fix-endless-kthread-loop-at-timeout.patch new file mode 100644 index 00000000..77fbe380 --- /dev/null +++ b/pre-patches/vanilla-4.19/0001-sched-wait-fix-endless-kthread-loop-at-timeout.patch @@ -0,0 +1,104 @@ +From aef8a9c9917a83b94ed30bcd4ec400e51cfea937 Mon Sep 17 00:00:00 2001 +From: Thomas Schoebel-Theuer +Date: Fri, 8 Mar 2019 08:21:15 +0100 +Subject: [PATCH] sched/wait: fix endless kthread loop at timeout + +Scenario, possible since kernel 4.11.x and later: + +1) kthread calls a waiting function with a timeout, and blocks. +2) kthread_stop() is called by somebody else. +3) kthread makes some progress, but neither finishes nor timeouts. +4) Nothing happens => normally the timeout would be reached by the kthread. + +However, the condition in wait_woken() now prevents the call to +schedule_timeout(). +As a consequence, the timeout value will never be decreased, resulting +in an endless loop. + +This fix ensures the following semantics: kthread_should_stop() is treated +as equivalent to a timeout. This is beneficial because most users do not +want to wait for the timeout, but to stop the kthread as soon as possible. +It appears that this semantics was probably intended (otherwise the check +is_kthread_should_stop() would not make much sense), but just went wrong +due to the bug. + +Here is an example, triggered by external kernel module MARS on a +production kernel. However, the problem can be also triggered by other +kthreads and on newer kernels, and also in very different scenarios, +not only during tcp_revcmsg(). + +In the following example, the kthread simply waits for network packets +to arrive, but in the test scenario the network had been blocked +underneath by a firewall rule in order to trigger the bug: + +Mar 08 07:40:08 icpu5133 kernel: watchdog: BUG: soft lockup - CPU#29 stuck for 23s! [mars_receiver8.:8139] +Mar 08 07:40:08 icpu5133 kernel: Modules linked in: mars(-) ip6table_mangle ip6table_raw iptable_raw ip_set_bitmap_port xt_DSCP xt_multiport ip_set_hash_ip xt_own +Mar 08 07:40:08 icpu5133 kernel: irq event stamp: 300719885 +Mar 08 07:40:08 icpu5133 kernel: hardirqs last enabled at (300719883): [] _raw_spin_unlock_irqrestore+0x3d/0x4f +Mar 08 07:40:08 icpu5133 kernel: hardirqs last disabled at (300719885): [] apic_timer_interrupt+0x82/0x90 +Mar 08 07:40:08 icpu5133 kernel: softirqs last enabled at (300719878): [] lock_sock_nested+0x50/0x98 +Mar 08 07:40:08 icpu5133 kernel: softirqs last disabled at (300719884): [] release_sock+0x16/0xda +Mar 08 07:40:08 icpu5133 kernel: CPU: 29 PID: 8139 Comm: mars_receiver8. Not tainted 4.14.104+ #121 +Mar 08 07:40:08 icpu5133 kernel: Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.5.5 08/16/2017 +Mar 08 07:40:08 icpu5133 kernel: task: ffff88bf82764fc0 task.stack: ffffc90012430000 +Mar 08 07:40:08 icpu5133 kernel: RIP: 0010:arch_local_irq_restore+0x2/0x8 +Mar 08 07:40:08 icpu5133 kernel: RSP: 0018:ffffc90012433b78 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 +Mar 08 07:40:08 icpu5133 kernel: RAX: 0000000000000000 RBX: ffff88bf82764fc0 RCX: 00000000fec792b4 +Mar 08 07:40:08 icpu5133 kernel: RDX: 00000000c18b50d3 RSI: 0000000000000000 RDI: 0000000000000246 +Mar 08 07:40:08 icpu5133 kernel: RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 +Mar 08 07:40:08 icpu5133 kernel: R10: ffffc90012433b08 R11: ffffc90012433ba8 R12: 0000000000000246 +Mar 08 07:40:08 icpu5133 kernel: R13: ffffffff819df735 R14: 0000000000000001 R15: ffff88bf82765818 +Mar 08 07:40:08 icpu5133 kernel: FS: 0000000000000000(0000) GS:ffff88c05fb80000(0000) knlGS:0000000000000000 +Mar 08 07:40:08 icpu5133 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +Mar 08 07:40:08 icpu5133 kernel: CR2: 000055abd12eb688 CR3: 000000000241e006 CR4: 00000000001606e0 +Mar 08 07:40:08 icpu5133 kernel: Call Trace: +Mar 08 07:40:08 icpu5133 kernel: lock_release+0x32f/0x33b +Mar 08 07:40:08 icpu5133 kernel: release_sock+0x90/0xda +Mar 08 07:40:08 icpu5133 kernel: sk_wait_data+0x7f/0x13f +Mar 08 07:40:08 icpu5133 kernel: ? prepare_to_wait_exclusive+0xc1/0xc1 +Mar 08 07:40:08 icpu5133 kernel: tcp_recvmsg+0x4e6/0x91a +Mar 08 07:40:08 icpu5133 kernel: ? flush_signals+0x2b/0x6a +Mar 08 07:40:08 icpu5133 kernel: ? lock_acquire+0x20a/0x25a +Mar 08 07:40:08 icpu5133 kernel: inet_recvmsg+0x8d/0xc0 +Mar 08 07:40:08 icpu5133 kernel: kernel_recvmsg+0x8f/0xaa +Mar 08 07:40:08 icpu5133 kernel: ? ___might_sleep+0xf2/0x256 +Mar 08 07:40:08 icpu5133 kernel: mars_recv_raw+0x22a/0x4da [mars] +Mar 08 07:40:08 icpu5133 kernel: desc_recv_struct+0x40/0x375 [mars] +Mar 08 07:40:08 icpu5133 kernel: receiver_thread+0xa2/0x61a [mars] +Mar 08 07:40:08 icpu5133 kernel: ? _hash_insert+0x160/0x160 [mars] +Mar 08 07:40:08 icpu5133 kernel: ? kthread+0x1a6/0x1ae +Mar 08 07:40:08 icpu5133 kernel: kthread+0x1a6/0x1ae +Mar 08 07:40:08 icpu5133 kernel: ? __list_del_entry+0x60/0x60 +Mar 08 07:40:08 icpu5133 kernel: ret_from_fork+0x3a/0x50 +Mar 08 07:40:08 icpu5133 kernel: Code: ee e8 c5 17 00 00 48 85 db 75 0e 31 f6 48 c7 c7 c0 5f 53 82 e8 68 b9 58 00 48 89 5b 58 58 5b 5d c3 9c 58 0f 1f 44 00 00 c3 + +Signed-off-by: Thomas Schoebel-Theuer +--- + kernel/sched/wait.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c +index 5dd47f1103d1..3e3fa1d319cf 100644 +--- a/kernel/sched/wait.c ++++ b/kernel/sched/wait.c +@@ -415,8 +415,15 @@ long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) + * or woken_wake_function() sees our store to current->state. + */ + set_current_state(mode); /* A */ +- if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) +- timeout = schedule_timeout(timeout); ++ if (!(wq_entry->flags & WQ_FLAG_WOKEN)) { ++ /* ++ * Treat kthread stopping as equivalent to a timeout. ++ */ ++ if (is_kthread_should_stop()) ++ timeout = 0; ++ else ++ timeout = schedule_timeout(timeout); ++ } + __set_current_state(TASK_RUNNING); + + /* +-- +2.26.2 + diff --git a/pre-patches/vanilla-5.4/0001-mars-SPECIAL-for-in-tree-build.patch b/pre-patches/vanilla-5.4/0001-mars-SPECIAL-for-in-tree-build.patch new file mode 100644 index 00000000..b87ccdb6 --- /dev/null +++ b/pre-patches/vanilla-5.4/0001-mars-SPECIAL-for-in-tree-build.patch @@ -0,0 +1,36 @@ +From f23cc6f8e7d224ee74a4f9d67b91587343e6de60 Mon Sep 17 00:00:00 2001 +From: Thomas Schoebel-Theuer +Date: Fri, 28 Jun 2013 11:03:14 +0200 +Subject: [PATCH] mars: SPECIAL for in-tree build + +--- + block/Kconfig | 2 ++ + block/Makefile | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/block/Kconfig b/block/Kconfig +index 41c0917ce622..ccf181798cb7 100644 +--- a/block/Kconfig ++++ b/block/Kconfig +@@ -183,6 +183,8 @@ source "block/partitions/Kconfig" + + endmenu + ++source "block/mars/kernel/Kconfig" ++ + endif # BLOCK + + config BLOCK_COMPAT +diff --git a/block/Makefile b/block/Makefile +index 9ef57ace90d4..c074adff6d3a 100644 +--- a/block/Makefile ++++ b/block/Makefile +@@ -36,3 +36,5 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o + obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o + obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o + obj-$(CONFIG_BLK_PM) += blk-pm.o ++ ++obj-$(CONFIG_MARS) += mars/kernel/ +-- +2.26.2 + diff --git a/pre-patches/vanilla-5.4/0001-mars-v2-minimum-pre-patch-for-mars.patch b/pre-patches/vanilla-5.4/0001-mars-v2-minimum-pre-patch-for-mars.patch new file mode 100644 index 00000000..f4f1ee93 --- /dev/null +++ b/pre-patches/vanilla-5.4/0001-mars-v2-minimum-pre-patch-for-mars.patch @@ -0,0 +1,214 @@ +From 716c3f0a4bc57d0bc045e299463cd191e3560cb5 Mon Sep 17 00:00:00 2001 +From: Thomas Schoebel-Theuer +Date: Mon, 25 Jan 2021 13:03:54 +0100 +Subject: [PATCH] mars: v2 minimum pre-patch for mars + +--- + fs/aio.c | 47 ++++++++++++++++++++++++++++++-------- + fs/utimes.c | 2 ++ + include/linux/aio.h | 1 + + include/linux/syscalls.h | 9 ++++++++ + include/uapi/linux/major.h | 1 + + mm/page_alloc.c | 2 ++ + 6 files changed, 53 insertions(+), 9 deletions(-) + +diff --git a/fs/aio.c b/fs/aio.c +index 47bb7b5685ba..f7ff812892ca 100644 +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -223,6 +223,7 @@ struct aio_kiocb { + static DEFINE_SPINLOCK(aio_nr_lock); + unsigned long aio_nr; /* current system wide number of aio requests */ + unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ ++EXPORT_SYMBOL_GPL(aio_max_nr); + /*----end sysctl variables---*/ + + static struct kmem_cache *kiocb_cachep; +@@ -1310,7 +1311,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. + */ +-SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ++long ksys_io_setup(unsigned nr_events, aio_context_t *ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; +@@ -1339,6 +1340,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) + out: + return ret; + } ++EXPORT_SYMBOL_GPL(ksys_io_setup); ++ ++SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ++{ ++ return ksys_io_setup(nr_events, ctxp); ++} + + #ifdef CONFIG_COMPAT + COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) +@@ -1379,7 +1386,7 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) + * implemented. May fail with -EINVAL if the context pointed to + * is invalid. + */ +-SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) ++long ksys_io_destroy(aio_context_t ctx) + { + struct kioctx *ioctx = lookup_ioctx(ctx); + if (likely(NULL != ioctx)) { +@@ -1408,6 +1415,12 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) + pr_debug("EINVAL: invalid context id\n"); + return -EINVAL; + } ++EXPORT_SYMBOL_GPL(ksys_io_destroy); ++ ++SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) ++{ ++ return ksys_io_destroy(ctx); ++} + + static void aio_remove_iocb(struct aio_kiocb *iocb) + { +@@ -1912,8 +1925,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, + * are available to queue any iocbs. Will return 0 if nr is 0. Will + * fail with -ENOSYS if not implemented. + */ +-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, +- struct iocb __user * __user *, iocbpp) ++long ksys_io_submit(aio_context_t ctx_id, long nr, ++ struct iocb __user *__user *iocbpp) + { + struct kioctx *ctx; + long ret = 0; +@@ -1952,6 +1965,13 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, + percpu_ref_put(&ctx->users); + return i ? i : ret; + } ++EXPORT_SYMBOL_GPL(ksys_io_submit); ++ ++SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, ++ struct iocb __user * __user *, iocbpp) ++{ ++ return ksys_io_submit(ctx_id, nr, iocbpp); ++} + + #ifdef CONFIG_COMPAT + COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, +@@ -2082,11 +2102,11 @@ static long do_io_getevents(aio_context_t ctx_id, + */ + #if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) + +-SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, +- long, min_nr, +- long, nr, +- struct io_event __user *, events, +- struct __kernel_timespec __user *, timeout) ++int ksys_io_getevents(aio_context_t ctx_id, ++ long min_nr, ++ long nr, ++ struct io_event __user * events, ++ struct __kernel_timespec __user * timeout) + { + struct timespec64 ts; + int ret; +@@ -2099,7 +2119,16 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, + ret = -EINTR; + return ret; + } ++EXPORT_SYMBOL_GPL(ksys_io_getevents); + ++SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, ++ long, min_nr, ++ long, nr, ++ struct io_event __user *, events, ++ struct __kernel_timespec __user *, timeout) ++{ ++ return ksys_io_getevents(ctx_id, min_nr, nr, events, timeout); ++} + #endif + + struct __aio_sigset { +diff --git a/fs/utimes.c b/fs/utimes.c +index 090739322463..43a2f990c61f 100644 +--- a/fs/utimes.c ++++ b/fs/utimes.c +@@ -1,4 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 ++#include + #include + #include + #include +@@ -133,6 +134,7 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, + out: + return error; + } ++EXPORT_SYMBOL(do_utimes); + + SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, + struct __kernel_timespec __user *, utimes, int, flags) +diff --git a/include/linux/aio.h b/include/linux/aio.h +index b83e68dd006f..62061e975682 100644 +--- a/include/linux/aio.h ++++ b/include/linux/aio.h +@@ -12,6 +12,7 @@ typedef int (kiocb_cancel_fn)(struct kiocb *); + + /* prototypes */ + #ifdef CONFIG_AIO ++#define HAS_AIO_MAX + extern void exit_aio(struct mm_struct *mm); + void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); + #else +diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h +index f7c561c4dcdd..22bf0a5e2aa6 100644 +--- a/include/linux/syscalls.h ++++ b/include/linux/syscalls.h +@@ -1230,6 +1230,15 @@ asmlinkage long sys_ni_syscall(void); + * Instead, use one of the functions which work equivalently, such as + * the ksys_xyzyyz() functions prototyped below. + */ ++long ksys_io_submit(aio_context_t ctx_id, long nr, ++ struct iocb __user *__user *iocbpp); ++int ksys_io_getevents(aio_context_t ctx_id, ++ long min_nr, ++ long nr, ++ struct io_event __user * events, ++ struct __kernel_timespec __user * timeout); ++long ksys_io_setup(unsigned nr_events, aio_context_t *ctxp); ++long ksys_io_destroy(aio_context_t ctx); + + int ksys_mount(const char __user *dev_name, const char __user *dir_name, + const char __user *type, unsigned long flags, void __user *data); +diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h +index 7e5fa8e15c43..edfbce01e4ac 100644 +--- a/include/uapi/linux/major.h ++++ b/include/uapi/linux/major.h +@@ -149,6 +149,7 @@ + #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) + + #define DRBD_MAJOR 147 ++#define MARS_MAJOR 148 + #define RTF_MAJOR 150 + #define RAW_MAJOR 162 + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 4357f5475a50..abbb0bd169d4 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -315,6 +315,7 @@ compound_page_dtor * const compound_page_dtors[] = { + }; + + int min_free_kbytes = 1024; ++EXPORT_SYMBOL(min_free_kbytes); + int user_min_free_kbytes = -1; + #ifdef CONFIG_DISCONTIGMEM + /* +@@ -7826,6 +7827,7 @@ static void __setup_per_zone_wmarks(void) + /* update totalreserve_pages */ + calculate_totalreserve_pages(); + } ++EXPORT_SYMBOL(setup_per_zone_wmarks); + + /** + * setup_per_zone_wmarks - called when min_free_kbytes changes +-- +2.26.2 + diff --git a/pre-patches/vanilla-5.4/0001-sched-wait-fix-endless-kthread-loop-at-timeout.patch b/pre-patches/vanilla-5.4/0001-sched-wait-fix-endless-kthread-loop-at-timeout.patch new file mode 100644 index 00000000..f997bed4 --- /dev/null +++ b/pre-patches/vanilla-5.4/0001-sched-wait-fix-endless-kthread-loop-at-timeout.patch @@ -0,0 +1,104 @@ +From ffa9a3c42d05ec2af33756bfabea7717a2299fef Mon Sep 17 00:00:00 2001 +From: Thomas Schoebel-Theuer +Date: Fri, 8 Mar 2019 08:21:15 +0100 +Subject: [PATCH] sched/wait: fix endless kthread loop at timeout + +Scenario, possible since kernel 4.11.x and later: + +1) kthread calls a waiting function with a timeout, and blocks. +2) kthread_stop() is called by somebody else. +3) kthread makes some progress, but neither finishes nor timeouts. +4) Nothing happens => normally the timeout would be reached by the kthread. + +However, the condition in wait_woken() now prevents the call to +schedule_timeout(). +As a consequence, the timeout value will never be decreased, resulting +in an endless loop. + +This fix ensures the following semantics: kthread_should_stop() is treated +as equivalent to a timeout. This is beneficial because most users do not +want to wait for the timeout, but to stop the kthread as soon as possible. +It appears that this semantics was probably intended (otherwise the check +is_kthread_should_stop() would not make much sense), but just went wrong +due to the bug. + +Here is an example, triggered by external kernel module MARS on a +production kernel. However, the problem can be also triggered by other +kthreads and on newer kernels, and also in very different scenarios, +not only during tcp_revcmsg(). + +In the following example, the kthread simply waits for network packets +to arrive, but in the test scenario the network had been blocked +underneath by a firewall rule in order to trigger the bug: + +Mar 08 07:40:08 icpu5133 kernel: watchdog: BUG: soft lockup - CPU#29 stuck for 23s! [mars_receiver8.:8139] +Mar 08 07:40:08 icpu5133 kernel: Modules linked in: mars(-) ip6table_mangle ip6table_raw iptable_raw ip_set_bitmap_port xt_DSCP xt_multiport ip_set_hash_ip xt_own +Mar 08 07:40:08 icpu5133 kernel: irq event stamp: 300719885 +Mar 08 07:40:08 icpu5133 kernel: hardirqs last enabled at (300719883): [] _raw_spin_unlock_irqrestore+0x3d/0x4f +Mar 08 07:40:08 icpu5133 kernel: hardirqs last disabled at (300719885): [] apic_timer_interrupt+0x82/0x90 +Mar 08 07:40:08 icpu5133 kernel: softirqs last enabled at (300719878): [] lock_sock_nested+0x50/0x98 +Mar 08 07:40:08 icpu5133 kernel: softirqs last disabled at (300719884): [] release_sock+0x16/0xda +Mar 08 07:40:08 icpu5133 kernel: CPU: 29 PID: 8139 Comm: mars_receiver8. Not tainted 4.14.104+ #121 +Mar 08 07:40:08 icpu5133 kernel: Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.5.5 08/16/2017 +Mar 08 07:40:08 icpu5133 kernel: task: ffff88bf82764fc0 task.stack: ffffc90012430000 +Mar 08 07:40:08 icpu5133 kernel: RIP: 0010:arch_local_irq_restore+0x2/0x8 +Mar 08 07:40:08 icpu5133 kernel: RSP: 0018:ffffc90012433b78 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 +Mar 08 07:40:08 icpu5133 kernel: RAX: 0000000000000000 RBX: ffff88bf82764fc0 RCX: 00000000fec792b4 +Mar 08 07:40:08 icpu5133 kernel: RDX: 00000000c18b50d3 RSI: 0000000000000000 RDI: 0000000000000246 +Mar 08 07:40:08 icpu5133 kernel: RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 +Mar 08 07:40:08 icpu5133 kernel: R10: ffffc90012433b08 R11: ffffc90012433ba8 R12: 0000000000000246 +Mar 08 07:40:08 icpu5133 kernel: R13: ffffffff819df735 R14: 0000000000000001 R15: ffff88bf82765818 +Mar 08 07:40:08 icpu5133 kernel: FS: 0000000000000000(0000) GS:ffff88c05fb80000(0000) knlGS:0000000000000000 +Mar 08 07:40:08 icpu5133 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +Mar 08 07:40:08 icpu5133 kernel: CR2: 000055abd12eb688 CR3: 000000000241e006 CR4: 00000000001606e0 +Mar 08 07:40:08 icpu5133 kernel: Call Trace: +Mar 08 07:40:08 icpu5133 kernel: lock_release+0x32f/0x33b +Mar 08 07:40:08 icpu5133 kernel: release_sock+0x90/0xda +Mar 08 07:40:08 icpu5133 kernel: sk_wait_data+0x7f/0x13f +Mar 08 07:40:08 icpu5133 kernel: ? prepare_to_wait_exclusive+0xc1/0xc1 +Mar 08 07:40:08 icpu5133 kernel: tcp_recvmsg+0x4e6/0x91a +Mar 08 07:40:08 icpu5133 kernel: ? flush_signals+0x2b/0x6a +Mar 08 07:40:08 icpu5133 kernel: ? lock_acquire+0x20a/0x25a +Mar 08 07:40:08 icpu5133 kernel: inet_recvmsg+0x8d/0xc0 +Mar 08 07:40:08 icpu5133 kernel: kernel_recvmsg+0x8f/0xaa +Mar 08 07:40:08 icpu5133 kernel: ? ___might_sleep+0xf2/0x256 +Mar 08 07:40:08 icpu5133 kernel: mars_recv_raw+0x22a/0x4da [mars] +Mar 08 07:40:08 icpu5133 kernel: desc_recv_struct+0x40/0x375 [mars] +Mar 08 07:40:08 icpu5133 kernel: receiver_thread+0xa2/0x61a [mars] +Mar 08 07:40:08 icpu5133 kernel: ? _hash_insert+0x160/0x160 [mars] +Mar 08 07:40:08 icpu5133 kernel: ? kthread+0x1a6/0x1ae +Mar 08 07:40:08 icpu5133 kernel: kthread+0x1a6/0x1ae +Mar 08 07:40:08 icpu5133 kernel: ? __list_del_entry+0x60/0x60 +Mar 08 07:40:08 icpu5133 kernel: ret_from_fork+0x3a/0x50 +Mar 08 07:40:08 icpu5133 kernel: Code: ee e8 c5 17 00 00 48 85 db 75 0e 31 f6 48 c7 c7 c0 5f 53 82 e8 68 b9 58 00 48 89 5b 58 58 5b 5d c3 9c 58 0f 1f 44 00 00 c3 + +Signed-off-by: Thomas Schoebel-Theuer +--- + kernel/sched/wait.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c +index c1e566a114ca..08f121154a91 100644 +--- a/kernel/sched/wait.c ++++ b/kernel/sched/wait.c +@@ -412,8 +412,15 @@ long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) + * or woken_wake_function() sees our store to current->state. + */ + set_current_state(mode); /* A */ +- if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) +- timeout = schedule_timeout(timeout); ++ if (!(wq_entry->flags & WQ_FLAG_WOKEN)) { ++ /* ++ * Treat kthread stopping as equivalent to a timeout. ++ */ ++ if (is_kthread_should_stop()) ++ timeout = 0; ++ else ++ timeout = schedule_timeout(timeout); ++ } + __set_current_state(TASK_RUNNING); + + /* +-- +2.26.2 +