From ba72ed537c4a063e1143b9efae579112fee7c42a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 6 Sep 2020 13:29:48 +0200 Subject: [PATCH] kernel: backport GRO improvements Improves network performance Signed-off-by: Felix Fietkau --- ...ast-GRO-for-skbs-with-Ethernet-heade.patch | 78 +++++++++++++++++++ ...tified-Rx-for-GRO_NORMAL-in-napi_gro.patch | 51 ++++++++++++ ...T-skip-GRO-for-foreign-MAC-addresses.patch | 2 +- 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch create mode 100644 target/linux/generic/backport-5.4/701-v5.5-net-core-use-listified-Rx-for-GRO_NORMAL-in-napi_gro.patch diff --git a/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch b/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch new file mode 100644 index 0000000000..f864a7899c --- /dev/null +++ b/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch @@ -0,0 +1,78 @@ +From: Alexander Lobakin +Date: Fri, 15 Nov 2019 12:11:35 +0300 +Subject: [PATCH] net: core: allow fast GRO for skbs with Ethernet header in + head + +Commit 78d3fd0b7de8 ("gro: Only use skb_gro_header for completely +non-linear packets") back in May'09 (v2.6.31-rc1) has changed the +original condition '!skb_headlen(skb)' to +'skb->mac_header == skb->tail' in gro_reset_offset() saying: "Since +the drivers that need this optimisation all provide completely +non-linear packets" (note that this condition has become the current +'skb_mac_header(skb) == skb_tail_pointer(skb)' later with commmit +ced14f6804a9 ("net: Correct comparisons and calculations using +skb->tail and skb-transport_header") without any functional changes). + +For now, we have the following rough statistics for v5.4-rc7: +1) napi_gro_frags: 14 +2) napi_gro_receive with skb->head containing (most of) payload: 83 +3) napi_gro_receive with skb->head containing all the headers: 20 +4) napi_gro_receive with skb->head containing only Ethernet header: 2 + +With the current condition, fast GRO with the usage of +NAPI_GRO_CB(skb)->frag0 is available only in the [1] case. +Packets pushed by [2] and [3] go through the 'slow' path, but +it's not a problem for them as they already contain all the needed +headers in skb->head, so pskb_may_pull() only moves skb->data. + +The layout of skbs in the fourth [4] case at the moment of +dev_gro_receive() is identical to skbs that have come through [1], +as napi_frags_skb() pulls Ethernet header to skb->head. The only +difference is that the mentioned condition is always false for them, +because skb_put() and friends irreversibly alter the tail pointer. +They also go through the 'slow' path, but now every single +pskb_may_pull() in every single .gro_receive() will call the *really* +slow __pskb_pull_tail() to pull headers to head. This significantly +decreases the overall performance for no visible reasons. + +The only two users of method [4] is: +* drivers/staging/qlge +* drivers/net/wireless/iwlwifi (all three variants: dvm, mvm, mvm-mq) + +Note that in case with wireless drivers we can't use [1] +(napi_gro_frags()) at least for now and mac80211 stack always +performs pushes and pulls anyways, so performance hit is inavoidable. + +At the moment of v2.6.31 the mentioned change was necessary (that's +why I don't add the "Fixes:" tag), but it became obsolete since +skb_gro_mac_header() has gone in commit a50e233c50db ("net-gro: +restore frag0 optimization"), so we can simply revert the condition +in gro_reset_offset() to allow skbs from [4] go through the 'fast' +path just like in case [1]. + +This was tested on a 600 MHz MIPS CPU and a custom driver and this +patch gave boosts up to 40 Mbps to method [4] in both directions +comparing to net-next, which made overall performance relatively +close to [1] (without it, [4] is the slowest). + +v2: +- Add more references and explanations to commit message +- Fix some typos ibid +- No functional changes + +Signed-off-by: Alexander Lobakin +Signed-off-by: David S. Miller +--- + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5403,8 +5403,7 @@ static void skb_gro_reset_offset(struct + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + +- if (skb_mac_header(skb) == skb_tail_pointer(skb) && +- pinfo->nr_frags && ++ if (!skb_headlen(skb) && pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, diff --git a/target/linux/generic/backport-5.4/701-v5.5-net-core-use-listified-Rx-for-GRO_NORMAL-in-napi_gro.patch b/target/linux/generic/backport-5.4/701-v5.5-net-core-use-listified-Rx-for-GRO_NORMAL-in-napi_gro.patch new file mode 100644 index 0000000000..7be25f5338 --- /dev/null +++ b/target/linux/generic/backport-5.4/701-v5.5-net-core-use-listified-Rx-for-GRO_NORMAL-in-napi_gro.patch @@ -0,0 +1,51 @@ +From: Alexander Lobakin +Date: Mon, 14 Oct 2019 11:00:33 +0300 +Subject: [PATCH] net: core: use listified Rx for GRO_NORMAL in + napi_gro_receive() + +Commit 323ebb61e32b4 ("net: use listified RX for handling GRO_NORMAL +skbs") made use of listified skb processing for the users of +napi_gro_frags(). +The same technique can be used in a way more common napi_gro_receive() +to speed up non-merged (GRO_NORMAL) skbs for a wide range of drivers +including gro_cells and mac80211 users. +This slightly changes the return value in cases where skb is being +dropped by the core stack, but it seems to have no impact on related +drivers' functionality. +gro_normal_batch is left untouched as it's very individual for every +single system configuration and might be tuned in manual order to +achieve an optimal performance. + +Signed-off-by: Alexander Lobakin +Acked-by: Edward Cree +Signed-off-by: David S. Miller +--- + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5601,12 +5601,13 @@ static void napi_skb_free_stolen_head(st + kmem_cache_free(skbuff_head_cache, skb); + } + +-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) ++static gro_result_t napi_skb_finish(struct napi_struct *napi, ++ struct sk_buff *skb, ++ gro_result_t ret) + { + switch (ret) { + case GRO_NORMAL: +- if (netif_receive_skb_internal(skb)) +- ret = GRO_DROP; ++ gro_normal_one(napi, skb); + break; + + case GRO_DROP: +@@ -5638,7 +5639,7 @@ gro_result_t napi_gro_receive(struct nap + + skb_gro_reset_offset(skb); + +- ret = napi_skb_finish(dev_gro_receive(napi, skb), skb); ++ ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); + trace_napi_gro_receive_exit(ret); + + return ret; diff --git a/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch b/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch index 5acadeceb1..5ba7f3e693 100644 --- a/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch +++ b/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch @@ -32,7 +32,7 @@ Signed-off-by: Felix Fietkau __u16 tc_index; /* traffic control index */ --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5469,6 +5469,9 @@ static enum gro_result dev_gro_receive(s +@@ -5468,6 +5468,9 @@ static enum gro_result dev_gro_receive(s int same_flow; int grow;