diff --git a/target/linux/generic/backport-6.6/600-v6.10-net-Remove-conditional-threaded-NAPI-wakeup-based-on.patch b/target/linux/generic/backport-6.6/600-v6.10-net-Remove-conditional-threaded-NAPI-wakeup-based-on.patch new file mode 100644 index 0000000000..ef7963b601 --- /dev/null +++ b/target/linux/generic/backport-6.6/600-v6.10-net-Remove-conditional-threaded-NAPI-wakeup-based-on.patch @@ -0,0 +1,75 @@ +From 56364c910691f6d10ba88c964c9041b9ab777bd6 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 25 Mar 2024 08:40:28 +0100 +Subject: [PATCH 1/4] net: Remove conditional threaded-NAPI wakeup based on + task state. + +A NAPI thread is scheduled by first setting NAPI_STATE_SCHED bit. If +successful (the bit was not yet set) then the NAPI_STATE_SCHED_THREADED +is set but only if thread's state is not TASK_INTERRUPTIBLE (is +TASK_RUNNING) followed by task wakeup. + +If the task is idle (TASK_INTERRUPTIBLE) then the +NAPI_STATE_SCHED_THREADED bit is not set. The thread is no relying on +the bit but always leaving the wait-loop after returning from schedule() +because there must have been a wakeup. + +The smpboot-threads implementation for per-CPU threads requires an +explicit condition and does not support "if we get out of schedule() +then there must be something to do". + +Removing this optimisation simplifies the following integration. + +Set NAPI_STATE_SCHED_THREADED unconditionally on wakeup and rely on it +in the wait path by removing the `woken' condition. + +Acked-by: Jakub Kicinski +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paolo Abeni +--- + net/core/dev.c | 14 ++------------ + 1 file changed, 2 insertions(+), 12 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4473,13 +4473,7 @@ static inline void ____napi_schedule(str + */ + thread = READ_ONCE(napi->thread); + if (thread) { +- /* Avoid doing set_bit() if the thread is in +- * INTERRUPTIBLE state, cause napi_thread_wait() +- * makes sure to proceed with napi polling +- * if the thread is explicitly woken from here. +- */ +- if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE) +- set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); ++ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); + wake_up_process(thread); + return; + } +@@ -6635,8 +6629,6 @@ static int napi_poll(struct napi_struct + + static int napi_thread_wait(struct napi_struct *napi) + { +- bool woken = false; +- + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { +@@ -6645,15 +6637,13 @@ static int napi_thread_wait(struct napi_ + * Testing SCHED bit is not enough because SCHED bit might be + * set by some other busy poll thread or by napi_disable(). + */ +- if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) { ++ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state)) { + WARN_ON(!list_empty(&napi->poll_list)); + __set_current_state(TASK_RUNNING); + return 0; + } + + schedule(); +- /* woken being true indicates this thread owns this napi. */ +- woken = true; + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); diff --git a/target/linux/generic/backport-6.6/601-v6.10-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch b/target/linux/generic/backport-6.6/601-v6.10-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch new file mode 100644 index 0000000000..3a7962fd88 --- /dev/null +++ b/target/linux/generic/backport-6.6/601-v6.10-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch @@ -0,0 +1,330 @@ +From dad6b97702639fba27a2bd3e986982ad6f0db3a7 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 25 Mar 2024 08:40:29 +0100 +Subject: [PATCH 2/4] net: Allow to use SMP threads for backlog NAPI. + +Backlog NAPI is a per-CPU NAPI struct only (with no device behind it) +used by drivers which don't do NAPI them self, RPS and parts of the +stack which need to avoid recursive deadlocks while processing a packet. + +The non-NAPI driver use the CPU local backlog NAPI. If RPS is enabled +then a flow for the skb is computed and based on the flow the skb can be +enqueued on a remote CPU. Scheduling/ raising the softirq (for backlog's +NAPI) on the remote CPU isn't trivial because the softirq is only +scheduled on the local CPU and performed after the hardirq is done. +In order to schedule a softirq on the remote CPU, an IPI is sent to the +remote CPU which schedules the backlog-NAPI on the then local CPU. + +On PREEMPT_RT interrupts are force-threaded. The soft interrupts are +raised within the interrupt thread and processed after the interrupt +handler completed still within the context of the interrupt thread. The +softirq is handled in the context where it originated. + +With force-threaded interrupts enabled, ksoftirqd is woken up if a +softirq is raised from hardirq context. This is the case if it is raised +from an IPI. Additionally there is a warning on PREEMPT_RT if the +softirq is raised from the idle thread. +This was done for two reasons: +- With threaded interrupts the processing should happen in thread + context (where it originated) and ksoftirqd is the only thread for + this context if raised from hardirq. Using the currently running task + instead would "punish" a random task. +- Once ksoftirqd is active it consumes all further softirqs until it + stops running. This changed recently and is no longer the case. + +Instead of keeping the backlog NAPI in ksoftirqd (in force-threaded/ +PREEMPT_RT setups) I am proposing NAPI-threads for backlog. +The "proper" setup with threaded-NAPI is not doable because the threads +are not pinned to an individual CPU and can be modified by the user. +Additionally a dummy network device would have to be assigned. Also +CPU-hotplug has to be considered if additional CPUs show up. +All this can be probably done/ solved but the smpboot-threads already +provide this infrastructure. + +Sending UDP packets over loopback expects that the packet is processed +within the call. Delaying it by handing it over to the thread hurts +performance. It is not beneficial to the outcome if the context switch +happens immediately after enqueue or after a while to process a few +packets in a batch. +There is no need to always use the thread if the backlog NAPI is +requested on the local CPU. This restores the loopback throuput. The +performance drops mostly to the same value after enabling RPS on the +loopback comparing the IPI and the tread result. + +Create NAPI-threads for backlog if request during boot. The thread runs +the inner loop from napi_threaded_poll(), the wait part is different. It +checks for NAPI_STATE_SCHED (the backlog NAPI can not be disabled). + +The NAPI threads for backlog are optional, it has to be enabled via the boot +argument "thread_backlog_napi". It is mandatory for PREEMPT_RT to avoid the +wakeup of ksoftirqd from the IPI. + +Acked-by: Jakub Kicinski +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paolo Abeni +--- + net/core/dev.c | 148 +++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 113 insertions(+), 35 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -78,6 +78,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -217,6 +218,31 @@ static inline struct hlist_head *dev_ind + return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; + } + ++#ifndef CONFIG_PREEMPT_RT ++ ++static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key); ++ ++static int __init setup_backlog_napi_threads(char *arg) ++{ ++ static_branch_enable(&use_backlog_threads_key); ++ return 0; ++} ++early_param("thread_backlog_napi", setup_backlog_napi_threads); ++ ++static bool use_backlog_threads(void) ++{ ++ return static_branch_unlikely(&use_backlog_threads_key); ++} ++ ++#else ++ ++static bool use_backlog_threads(void) ++{ ++ return true; ++} ++ ++#endif ++ + static inline void rps_lock_irqsave(struct softnet_data *sd, + unsigned long *flags) + { +@@ -4441,6 +4467,7 @@ EXPORT_SYMBOL(__dev_direct_xmit); + /************************************************************************* + * Receiver routines + *************************************************************************/ ++static DEFINE_PER_CPU(struct task_struct *, backlog_napi); + + int netdev_max_backlog __read_mostly = 1000; + EXPORT_SYMBOL(netdev_max_backlog); +@@ -4473,12 +4500,16 @@ static inline void ____napi_schedule(str + */ + thread = READ_ONCE(napi->thread); + if (thread) { ++ if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi)) ++ goto use_local_napi; ++ + set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); + wake_up_process(thread); + return; + } + } + ++use_local_napi: + list_add_tail(&napi->poll_list, &sd->poll_list); + WRITE_ONCE(napi->list_owner, smp_processor_id()); + /* If not called from net_rx_action() +@@ -4724,6 +4755,11 @@ static void napi_schedule_rps(struct sof + + #ifdef CONFIG_RPS + if (sd != mysd) { ++ if (use_backlog_threads()) { ++ __napi_schedule_irqoff(&sd->backlog); ++ return; ++ } ++ + sd->rps_ipi_next = mysd->rps_ipi_list; + mysd->rps_ipi_list = sd; + +@@ -5947,7 +5983,7 @@ static void net_rps_action_and_irq_enabl + #ifdef CONFIG_RPS + struct softnet_data *remsd = sd->rps_ipi_list; + +- if (remsd) { ++ if (!use_backlog_threads() && remsd) { + sd->rps_ipi_list = NULL; + + local_irq_enable(); +@@ -5962,7 +5998,7 @@ static void net_rps_action_and_irq_enabl + static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) + { + #ifdef CONFIG_RPS +- return sd->rps_ipi_list != NULL; ++ return !use_backlog_threads() && sd->rps_ipi_list; + #else + return false; + #endif +@@ -6006,7 +6042,7 @@ static int process_backlog(struct napi_s + * We can use a plain write instead of clear_bit(), + * and we dont need an smp_mb() memory barrier. + */ +- napi->state = 0; ++ napi->state &= NAPIF_STATE_THREADED; + again = false; + } else { + skb_queue_splice_tail_init(&sd->input_pkt_queue, +@@ -6672,43 +6708,48 @@ static void skb_defer_free_flush(struct + } + } + +-static int napi_threaded_poll(void *data) ++static void napi_threaded_poll_loop(struct napi_struct *napi) + { +- struct napi_struct *napi = data; + struct softnet_data *sd; +- void *have; ++ unsigned long last_qs = jiffies; + +- while (!napi_thread_wait(napi)) { +- unsigned long last_qs = jiffies; ++ for (;;) { ++ bool repoll = false; ++ void *have; + +- for (;;) { +- bool repoll = false; ++ local_bh_disable(); ++ sd = this_cpu_ptr(&softnet_data); ++ sd->in_napi_threaded_poll = true; + +- local_bh_disable(); +- sd = this_cpu_ptr(&softnet_data); +- sd->in_napi_threaded_poll = true; +- +- have = netpoll_poll_lock(napi); +- __napi_poll(napi, &repoll); +- netpoll_poll_unlock(have); +- +- sd->in_napi_threaded_poll = false; +- barrier(); +- +- if (sd_has_rps_ipi_waiting(sd)) { +- local_irq_disable(); +- net_rps_action_and_irq_enable(sd); +- } +- skb_defer_free_flush(sd); +- local_bh_enable(); ++ have = netpoll_poll_lock(napi); ++ __napi_poll(napi, &repoll); ++ netpoll_poll_unlock(have); ++ ++ sd->in_napi_threaded_poll = false; ++ barrier(); ++ ++ if (sd_has_rps_ipi_waiting(sd)) { ++ local_irq_disable(); ++ net_rps_action_and_irq_enable(sd); ++ } ++ skb_defer_free_flush(sd); ++ local_bh_enable(); + +- if (!repoll) +- break; ++ if (!repoll) ++ break; + +- rcu_softirq_qs_periodic(last_qs); +- cond_resched(); +- } ++ rcu_softirq_qs_periodic(last_qs); ++ cond_resched(); + } ++} ++ ++static int napi_threaded_poll(void *data) ++{ ++ struct napi_struct *napi = data; ++ ++ while (!napi_thread_wait(napi)) ++ napi_threaded_poll_loop(napi); ++ + return 0; + } + +@@ -11288,7 +11329,7 @@ static int dev_cpu_dead(unsigned int old + + list_del_init(&napi->poll_list); + if (napi->poll == process_backlog) +- napi->state = 0; ++ napi->state &= NAPIF_STATE_THREADED; + else + ____napi_schedule(sd, napi); + } +@@ -11296,12 +11337,14 @@ static int dev_cpu_dead(unsigned int old + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_enable(); + ++ if (!use_backlog_threads()) { + #ifdef CONFIG_RPS +- remsd = oldsd->rps_ipi_list; +- oldsd->rps_ipi_list = NULL; ++ remsd = oldsd->rps_ipi_list; ++ oldsd->rps_ipi_list = NULL; + #endif +- /* send out pending IPI's on offline CPU */ +- net_rps_send_ipi(remsd); ++ /* send out pending IPI's on offline CPU */ ++ net_rps_send_ipi(remsd); ++ } + + /* Process offline CPU's input_pkt_queue */ + while ((skb = __skb_dequeue(&oldsd->process_queue))) { +@@ -11564,6 +11607,38 @@ static struct pernet_operations __net_in + * + */ + ++static int backlog_napi_should_run(unsigned int cpu) ++{ ++ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); ++ struct napi_struct *napi = &sd->backlog; ++ ++ return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state); ++} ++ ++static void run_backlog_napi(unsigned int cpu) ++{ ++ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); ++ ++ napi_threaded_poll_loop(&sd->backlog); ++} ++ ++static void backlog_napi_setup(unsigned int cpu) ++{ ++ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); ++ struct napi_struct *napi = &sd->backlog; ++ ++ napi->thread = this_cpu_read(backlog_napi); ++ set_bit(NAPI_STATE_THREADED, &napi->state); ++} ++ ++static struct smp_hotplug_thread backlog_threads = { ++ .store = &backlog_napi, ++ .thread_should_run = backlog_napi_should_run, ++ .thread_fn = run_backlog_napi, ++ .thread_comm = "backlog_napi/%u", ++ .setup = backlog_napi_setup, ++}; ++ + /* + * This is called single threaded during boot, so no need + * to take the rtnl semaphore. +@@ -11614,7 +11689,10 @@ static int __init net_dev_init(void) + init_gro_hash(&sd->backlog); + sd->backlog.poll = process_backlog; + sd->backlog.weight = weight_p; ++ INIT_LIST_HEAD(&sd->backlog.poll_list); + } ++ if (use_backlog_threads()) ++ smpboot_register_percpu_thread(&backlog_threads); + + dev_boot_phase = 0; + diff --git a/target/linux/generic/backport-6.6/602-v6.10-net-Use-backlog-NAPI-to-clean-up-the-defer_list.patch b/target/linux/generic/backport-6.6/602-v6.10-net-Use-backlog-NAPI-to-clean-up-the-defer_list.patch new file mode 100644 index 0000000000..6a9c113124 --- /dev/null +++ b/target/linux/generic/backport-6.6/602-v6.10-net-Use-backlog-NAPI-to-clean-up-the-defer_list.patch @@ -0,0 +1,121 @@ +From 80d2eefcb4c84aa9018b2a997ab3a4c567bc821a Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 25 Mar 2024 08:40:30 +0100 +Subject: [PATCH 3/4] net: Use backlog-NAPI to clean up the defer_list. + +The defer_list is a per-CPU list which is used to free skbs outside of +the socket lock and on the CPU on which they have been allocated. +The list is processed during NAPI callbacks so ideally the list is +cleaned up. +Should the amount of skbs on the list exceed a certain water mark then +the softirq is triggered remotely on the target CPU by invoking a remote +function call. The raise of the softirqs via a remote function call +leads to waking the ksoftirqd on PREEMPT_RT which is undesired. +The backlog-NAPI threads already provide the infrastructure which can be +utilized to perform the cleanup of the defer_list. + +The NAPI state is updated with the input_pkt_queue.lock acquired. It +order not to break the state, it is needed to also wake the backlog-NAPI +thread with the lock held. This requires to acquire the use the lock in +rps_lock_irq*() if the backlog-NAPI threads are used even with RPS +disabled. + +Move the logic of remotely starting softirqs to clean up the defer_list +into kick_defer_list_purge(). Make sure a lock is held in +rps_lock_irq*() if backlog-NAPI threads are used. Schedule backlog-NAPI +for defer_list cleanup if backlog-NAPI is available. + +Acked-by: Jakub Kicinski +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paolo Abeni +--- + include/linux/netdevice.h | 1 + + net/core/dev.c | 25 +++++++++++++++++++++---- + net/core/skbuff.c | 4 ++-- + 3 files changed, 24 insertions(+), 6 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3300,6 +3300,7 @@ static inline void dev_xmit_recursion_de + __this_cpu_dec(softnet_data.xmit.recursion); + } + ++void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); + void __netif_schedule(struct Qdisc *q); + void netif_schedule_queue(struct netdev_queue *txq); + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -246,7 +246,7 @@ static bool use_backlog_threads(void) + static inline void rps_lock_irqsave(struct softnet_data *sd, + unsigned long *flags) + { +- if (IS_ENABLED(CONFIG_RPS)) ++ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags); + else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_save(*flags); +@@ -254,7 +254,7 @@ static inline void rps_lock_irqsave(stru + + static inline void rps_lock_irq_disable(struct softnet_data *sd) + { +- if (IS_ENABLED(CONFIG_RPS)) ++ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_lock_irq(&sd->input_pkt_queue.lock); + else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); +@@ -263,7 +263,7 @@ static inline void rps_lock_irq_disable( + static inline void rps_unlock_irq_restore(struct softnet_data *sd, + unsigned long *flags) + { +- if (IS_ENABLED(CONFIG_RPS)) ++ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags); + else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_restore(*flags); +@@ -271,7 +271,7 @@ static inline void rps_unlock_irq_restor + + static inline void rps_unlock_irq_enable(struct softnet_data *sd) + { +- if (IS_ENABLED(CONFIG_RPS)) ++ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_unlock_irq(&sd->input_pkt_queue.lock); + else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); +@@ -4774,6 +4774,23 @@ static void napi_schedule_rps(struct sof + __napi_schedule_irqoff(&mysd->backlog); + } + ++void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu) ++{ ++ unsigned long flags; ++ ++ if (use_backlog_threads()) { ++ rps_lock_irqsave(sd, &flags); ++ ++ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) ++ __napi_schedule_irqoff(&sd->backlog); ++ ++ rps_unlock_irq_restore(sd, &flags); ++ ++ } else if (!cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { ++ smp_call_function_single_async(cpu, &sd->defer_csd); ++ } ++} ++ + #ifdef CONFIG_NET_FLOW_LIMIT + int netdev_flow_limit_table_len __read_mostly = (1 << 12); + #endif +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -6863,8 +6863,8 @@ nodefer: __kfree_skb(skb); + /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU + * if we are unlucky enough (this seems very unlikely). + */ +- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) +- smp_call_function_single_async(cpu, &sd->defer_csd); ++ if (unlikely(kick)) ++ kick_defer_list_purge(sd, cpu); + } + + static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, diff --git a/target/linux/generic/backport-6.6/603-v6.10-net-Rename-rps_lock-to-backlog_lock.patch b/target/linux/generic/backport-6.6/603-v6.10-net-Rename-rps_lock-to-backlog_lock.patch new file mode 100644 index 0000000000..801067287b --- /dev/null +++ b/target/linux/generic/backport-6.6/603-v6.10-net-Rename-rps_lock-to-backlog_lock.patch @@ -0,0 +1,164 @@ +From 765b11f8f4e20b7433e4ba4a3e9106a0d59501ed Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Mon, 25 Mar 2024 08:40:31 +0100 +Subject: [PATCH 4/4] net: Rename rps_lock to backlog_lock. + +The rps_lock.*() functions use the inner lock of a sk_buff_head for +locking. This lock is used if RPS is enabled, otherwise the list is +accessed lockless and disabling interrupts is enough for the +synchronisation because it is only accessed CPU local. Not only the list +is protected but also the NAPI state protected. +With the addition of backlog threads, the lock is also needed because of +the cross CPU access even without RPS. The clean up of the defer_list +list is also done via backlog threads (if enabled). + +It has been suggested to rename the locking function since it is no +longer just RPS. + +Rename the rps_lock*() functions to backlog_lock*(). + +Suggested-by: Jakub Kicinski +Acked-by: Jakub Kicinski +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paolo Abeni +--- + net/core/dev.c | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -243,8 +243,8 @@ static bool use_backlog_threads(void) + + #endif + +-static inline void rps_lock_irqsave(struct softnet_data *sd, +- unsigned long *flags) ++static inline void backlog_lock_irq_save(struct softnet_data *sd, ++ unsigned long *flags) + { + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags); +@@ -252,7 +252,7 @@ static inline void rps_lock_irqsave(stru + local_irq_save(*flags); + } + +-static inline void rps_lock_irq_disable(struct softnet_data *sd) ++static inline void backlog_lock_irq_disable(struct softnet_data *sd) + { + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_lock_irq(&sd->input_pkt_queue.lock); +@@ -260,8 +260,8 @@ static inline void rps_lock_irq_disable( + local_irq_disable(); + } + +-static inline void rps_unlock_irq_restore(struct softnet_data *sd, +- unsigned long *flags) ++static inline void backlog_unlock_irq_restore(struct softnet_data *sd, ++ unsigned long *flags) + { + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags); +@@ -269,7 +269,7 @@ static inline void rps_unlock_irq_restor + local_irq_restore(*flags); + } + +-static inline void rps_unlock_irq_enable(struct softnet_data *sd) ++static inline void backlog_unlock_irq_enable(struct softnet_data *sd) + { + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_unlock_irq(&sd->input_pkt_queue.lock); +@@ -4779,12 +4779,12 @@ void kick_defer_list_purge(struct softne + unsigned long flags; + + if (use_backlog_threads()) { +- rps_lock_irqsave(sd, &flags); ++ backlog_lock_irq_save(sd, &flags); + + if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) + __napi_schedule_irqoff(&sd->backlog); + +- rps_unlock_irq_restore(sd, &flags); ++ backlog_unlock_irq_restore(sd, &flags); + + } else if (!cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { + smp_call_function_single_async(cpu, &sd->defer_csd); +@@ -4846,7 +4846,7 @@ static int enqueue_to_backlog(struct sk_ + reason = SKB_DROP_REASON_NOT_SPECIFIED; + sd = &per_cpu(softnet_data, cpu); + +- rps_lock_irqsave(sd, &flags); ++ backlog_lock_irq_save(sd, &flags); + if (!netif_running(skb->dev)) + goto drop; + qlen = skb_queue_len(&sd->input_pkt_queue); +@@ -4855,7 +4855,7 @@ static int enqueue_to_backlog(struct sk_ + enqueue: + __skb_queue_tail(&sd->input_pkt_queue, skb); + input_queue_tail_incr_save(sd, qtail); +- rps_unlock_irq_restore(sd, &flags); ++ backlog_unlock_irq_restore(sd, &flags); + return NET_RX_SUCCESS; + } + +@@ -4870,7 +4870,7 @@ enqueue: + + drop: + sd->dropped++; +- rps_unlock_irq_restore(sd, &flags); ++ backlog_unlock_irq_restore(sd, &flags); + + dev_core_stats_rx_dropped_inc(skb->dev); + kfree_skb_reason(skb, reason); +@@ -5901,7 +5901,7 @@ static void flush_backlog(struct work_st + local_bh_disable(); + sd = this_cpu_ptr(&softnet_data); + +- rps_lock_irq_disable(sd); ++ backlog_lock_irq_disable(sd); + skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { + __skb_unlink(skb, &sd->input_pkt_queue); +@@ -5909,7 +5909,7 @@ static void flush_backlog(struct work_st + input_queue_head_incr(sd); + } + } +- rps_unlock_irq_enable(sd); ++ backlog_unlock_irq_enable(sd); + + skb_queue_walk_safe(&sd->process_queue, skb, tmp) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { +@@ -5927,14 +5927,14 @@ static bool flush_required(int cpu) + struct softnet_data *sd = &per_cpu(softnet_data, cpu); + bool do_flush; + +- rps_lock_irq_disable(sd); ++ backlog_lock_irq_disable(sd); + + /* as insertion into process_queue happens with the rps lock held, + * process_queue access may race only with dequeue + */ + do_flush = !skb_queue_empty(&sd->input_pkt_queue) || + !skb_queue_empty_lockless(&sd->process_queue); +- rps_unlock_irq_enable(sd); ++ backlog_unlock_irq_enable(sd); + + return do_flush; + #endif +@@ -6049,7 +6049,7 @@ static int process_backlog(struct napi_s + + } + +- rps_lock_irq_disable(sd); ++ backlog_lock_irq_disable(sd); + if (skb_queue_empty(&sd->input_pkt_queue)) { + /* + * Inline a custom version of __napi_complete(). +@@ -6065,7 +6065,7 @@ static int process_backlog(struct napi_s + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); + } +- rps_unlock_irq_enable(sd); ++ backlog_unlock_irq_enable(sd); + } + + return work; diff --git a/target/linux/generic/backport-6.6/770-net-introduce-napi_is_scheduled-helper.patch b/target/linux/generic/backport-6.6/770-net-introduce-napi_is_scheduled-helper.patch index 821fd60a2d..6449cd6a3a 100644 --- a/target/linux/generic/backport-6.6/770-net-introduce-napi_is_scheduled-helper.patch +++ b/target/linux/generic/backport-6.6/770-net-introduce-napi_is_scheduled-helper.patch @@ -85,7 +85,7 @@ Signed-off-by: Paolo Abeni /** --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -6555,7 +6555,7 @@ static int __napi_poll(struct napi_struc +@@ -6602,7 +6602,7 @@ static int __napi_poll(struct napi_struc * accidentally calling ->poll() when NAPI is not scheduled. */ work = 0; diff --git a/target/linux/generic/hack-6.6/721-net-add-packet-mangeling.patch b/target/linux/generic/hack-6.6/721-net-add-packet-mangeling.patch index b51a324027..e1d4367a8f 100644 --- a/target/linux/generic/hack-6.6/721-net-add-packet-mangeling.patch +++ b/target/linux/generic/hack-6.6/721-net-add-packet-mangeling.patch @@ -19,7 +19,7 @@ Signed-off-by: Felix Fietkau --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -1759,6 +1759,7 @@ enum netdev_priv_flags { +@@ -1758,6 +1758,7 @@ enum netdev_priv_flags { IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), @@ -27,7 +27,7 @@ Signed-off-by: Felix Fietkau }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -@@ -1792,6 +1793,7 @@ enum netdev_priv_flags { +@@ -1791,6 +1792,7 @@ enum netdev_priv_flags { #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER #define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR @@ -35,7 +35,7 @@ Signed-off-by: Felix Fietkau /* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { -@@ -2184,6 +2186,11 @@ struct net_device { +@@ -2183,6 +2185,11 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif @@ -47,7 +47,7 @@ Signed-off-by: Felix Fietkau const struct header_ops *header_ops; unsigned char operstate; -@@ -2257,6 +2264,10 @@ struct net_device { +@@ -2256,6 +2263,10 @@ struct net_device { struct mctp_dev __rcu *mctp_ptr; #endif @@ -105,7 +105,7 @@ Signed-off-by: Felix Fietkau help --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3571,6 +3571,11 @@ static int xmit_one(struct sk_buff *skb, +@@ -3597,6 +3597,11 @@ static int xmit_one(struct sk_buff *skb, if (dev_nit_active(dev)) dev_queue_xmit_nit(skb, dev); diff --git a/target/linux/generic/pending-6.6/760-net-core-add-optional-threading-for-backlog-processi.patch b/target/linux/generic/pending-6.6/760-net-core-add-optional-threading-for-backlog-processi.patch deleted file mode 100644 index 8a9066bf12..0000000000 --- a/target/linux/generic/pending-6.6/760-net-core-add-optional-threading-for-backlog-processi.patch +++ /dev/null @@ -1,227 +0,0 @@ -From: Felix Fietkau -Date: Thu, 16 Feb 2023 18:39:04 +0100 -Subject: [PATCH] net/core: add optional threading for backlog processing - -When dealing with few flows or an imbalance on CPU utilization, static RPS -CPU assignment can be too inflexible. Add support for enabling threaded NAPI -for backlog processing in order to allow the scheduler to better balance -processing. This helps better spread the load across idle CPUs. - -Signed-off-by: Felix Fietkau ---- - ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -558,6 +558,7 @@ static inline bool napi_complete(struct - } - - int dev_set_threaded(struct net_device *dev, bool threaded); -+int backlog_set_threaded(bool threaded); - - /** - * napi_disable - prevent NAPI from scheduling -@@ -3236,6 +3237,7 @@ struct softnet_data { - /* stats */ - unsigned int processed; - unsigned int time_squeeze; -+ unsigned int process_queue_empty; - #ifdef CONFIG_RPS - struct softnet_data *rps_ipi_list; - #endif ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -4729,7 +4729,7 @@ static void napi_schedule_rps(struct sof - struct softnet_data *mysd = this_cpu_ptr(&softnet_data); - - #ifdef CONFIG_RPS -- if (sd != mysd) { -+ if (sd != mysd && !test_bit(NAPI_STATE_THREADED, &sd->backlog.state)) { - sd->rps_ipi_next = mysd->rps_ipi_list; - mysd->rps_ipi_list = sd; - -@@ -5848,6 +5848,8 @@ static DEFINE_PER_CPU(struct work_struct - /* Network device is going away, flush any packets still pending */ - static void flush_backlog(struct work_struct *work) - { -+ unsigned int process_queue_empty; -+ bool threaded, flush_processq; - struct sk_buff *skb, *tmp; - struct softnet_data *sd; - -@@ -5862,8 +5864,17 @@ static void flush_backlog(struct work_st - input_queue_head_incr(sd); - } - } -+ -+ threaded = test_bit(NAPI_STATE_THREADED, &sd->backlog.state); -+ flush_processq = threaded && -+ !skb_queue_empty_lockless(&sd->process_queue); -+ if (flush_processq) -+ process_queue_empty = sd->process_queue_empty; - rps_unlock_irq_enable(sd); - -+ if (threaded) -+ goto out; -+ - skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { - __skb_unlink(skb, &sd->process_queue); -@@ -5871,7 +5882,16 @@ static void flush_backlog(struct work_st - input_queue_head_incr(sd); - } - } -+ -+out: - local_bh_enable(); -+ -+ while (flush_processq) { -+ msleep(1); -+ rps_lock_irq_disable(sd); -+ flush_processq = process_queue_empty == sd->process_queue_empty; -+ rps_unlock_irq_enable(sd); -+ } - } - - static bool flush_required(int cpu) -@@ -6003,6 +6023,7 @@ static int process_backlog(struct napi_s - } - - rps_lock_irq_disable(sd); -+ sd->process_queue_empty++; - if (skb_queue_empty(&sd->input_pkt_queue)) { - /* - * Inline a custom version of __napi_complete(). -@@ -6012,7 +6033,8 @@ static int process_backlog(struct napi_s - * We can use a plain write instead of clear_bit(), - * and we dont need an smp_mb() memory barrier. - */ -- napi->state = 0; -+ napi->state &= ~(NAPIF_STATE_SCHED | -+ NAPIF_STATE_SCHED_THREADED); - again = false; - } else { - skb_queue_splice_tail_init(&sd->input_pkt_queue, -@@ -6426,6 +6448,55 @@ int dev_set_threaded(struct net_device * - } - EXPORT_SYMBOL(dev_set_threaded); - -+int backlog_set_threaded(bool threaded) -+{ -+ static bool backlog_threaded; -+ int err = 0; -+ int i; -+ -+ if (backlog_threaded == threaded) -+ return 0; -+ -+ for_each_possible_cpu(i) { -+ struct softnet_data *sd = &per_cpu(softnet_data, i); -+ struct napi_struct *n = &sd->backlog; -+ -+ if (n->thread) -+ continue; -+ n->thread = kthread_run(napi_threaded_poll, n, "napi/backlog-%d", i); -+ if (IS_ERR(n->thread)) { -+ err = PTR_ERR(n->thread); -+ pr_err("kthread_run failed with err %d\n", err); -+ n->thread = NULL; -+ threaded = false; -+ break; -+ } -+ -+ } -+ -+ backlog_threaded = threaded; -+ -+ /* Make sure kthread is created before THREADED bit -+ * is set. -+ */ -+ smp_mb__before_atomic(); -+ -+ for_each_possible_cpu(i) { -+ struct softnet_data *sd = &per_cpu(softnet_data, i); -+ struct napi_struct *n = &sd->backlog; -+ unsigned long flags; -+ -+ rps_lock_irqsave(sd, &flags); -+ if (threaded) -+ n->state |= NAPIF_STATE_THREADED; -+ else -+ n->state &= ~NAPIF_STATE_THREADED; -+ rps_unlock_irq_restore(sd, &flags); -+ } -+ -+ return err; -+} -+ - void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) - { -@@ -11307,6 +11378,9 @@ static int dev_cpu_dead(unsigned int old - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_enable(); - -+ if (test_bit(NAPI_STATE_THREADED, &oldsd->backlog.state)) -+ return 0; -+ - #ifdef CONFIG_RPS - remsd = oldsd->rps_ipi_list; - oldsd->rps_ipi_list = NULL; -@@ -11622,6 +11696,7 @@ static int __init net_dev_init(void) - INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); - spin_lock_init(&sd->defer_lock); - -+ INIT_LIST_HEAD(&sd->backlog.poll_list); - init_gro_hash(&sd->backlog); - sd->backlog.poll = process_backlog; - sd->backlog.weight = weight_p; ---- a/net/core/sysctl_net_core.c -+++ b/net/core/sysctl_net_core.c -@@ -30,6 +30,7 @@ static int int_3600 = 3600; - static int min_sndbuf = SOCK_MIN_SNDBUF; - static int min_rcvbuf = SOCK_MIN_RCVBUF; - static int max_skb_frags = MAX_SKB_FRAGS; -+static int backlog_threaded; - static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; - - static int net_msg_warn; /* Unused, but still a sysctl */ -@@ -189,6 +190,23 @@ static int rps_sock_flow_sysctl(struct c - } - #endif /* CONFIG_RPS */ - -+static int backlog_threaded_sysctl(struct ctl_table *table, int write, -+ void *buffer, size_t *lenp, loff_t *ppos) -+{ -+ static DEFINE_MUTEX(backlog_threaded_mutex); -+ int ret; -+ -+ mutex_lock(&backlog_threaded_mutex); -+ -+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ if (write && !ret) -+ ret = backlog_set_threaded(backlog_threaded); -+ -+ mutex_unlock(&backlog_threaded_mutex); -+ -+ return ret; -+} -+ - #ifdef CONFIG_NET_FLOW_LIMIT - static DEFINE_MUTEX(flow_limit_update_mutex); - -@@ -541,6 +559,15 @@ static struct ctl_table net_core_table[] - .proc_handler = rps_sock_flow_sysctl - }, - #endif -+ { -+ .procname = "backlog_threaded", -+ .data = &backlog_threaded, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = backlog_threaded_sysctl, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_ONE -+ }, - #ifdef CONFIG_NET_FLOW_LIMIT - { - .procname = "flow_limit_cpu_bitmap",