kernel: backport GRO improvements
Improves network performance Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
parent
63b6b10670
commit
ba72ed537c
|
@ -0,0 +1,78 @@
|
||||||
|
From: Alexander Lobakin <alobakin@dlink.ru>
|
||||||
|
Date: Fri, 15 Nov 2019 12:11:35 +0300
|
||||||
|
Subject: [PATCH] net: core: allow fast GRO for skbs with Ethernet header in
|
||||||
|
head
|
||||||
|
|
||||||
|
Commit 78d3fd0b7de8 ("gro: Only use skb_gro_header for completely
|
||||||
|
non-linear packets") back in May'09 (v2.6.31-rc1) has changed the
|
||||||
|
original condition '!skb_headlen(skb)' to
|
||||||
|
'skb->mac_header == skb->tail' in gro_reset_offset() saying: "Since
|
||||||
|
the drivers that need this optimisation all provide completely
|
||||||
|
non-linear packets" (note that this condition has become the current
|
||||||
|
'skb_mac_header(skb) == skb_tail_pointer(skb)' later with commmit
|
||||||
|
ced14f6804a9 ("net: Correct comparisons and calculations using
|
||||||
|
skb->tail and skb-transport_header") without any functional changes).
|
||||||
|
|
||||||
|
For now, we have the following rough statistics for v5.4-rc7:
|
||||||
|
1) napi_gro_frags: 14
|
||||||
|
2) napi_gro_receive with skb->head containing (most of) payload: 83
|
||||||
|
3) napi_gro_receive with skb->head containing all the headers: 20
|
||||||
|
4) napi_gro_receive with skb->head containing only Ethernet header: 2
|
||||||
|
|
||||||
|
With the current condition, fast GRO with the usage of
|
||||||
|
NAPI_GRO_CB(skb)->frag0 is available only in the [1] case.
|
||||||
|
Packets pushed by [2] and [3] go through the 'slow' path, but
|
||||||
|
it's not a problem for them as they already contain all the needed
|
||||||
|
headers in skb->head, so pskb_may_pull() only moves skb->data.
|
||||||
|
|
||||||
|
The layout of skbs in the fourth [4] case at the moment of
|
||||||
|
dev_gro_receive() is identical to skbs that have come through [1],
|
||||||
|
as napi_frags_skb() pulls Ethernet header to skb->head. The only
|
||||||
|
difference is that the mentioned condition is always false for them,
|
||||||
|
because skb_put() and friends irreversibly alter the tail pointer.
|
||||||
|
They also go through the 'slow' path, but now every single
|
||||||
|
pskb_may_pull() in every single .gro_receive() will call the *really*
|
||||||
|
slow __pskb_pull_tail() to pull headers to head. This significantly
|
||||||
|
decreases the overall performance for no visible reasons.
|
||||||
|
|
||||||
|
The only two users of method [4] is:
|
||||||
|
* drivers/staging/qlge
|
||||||
|
* drivers/net/wireless/iwlwifi (all three variants: dvm, mvm, mvm-mq)
|
||||||
|
|
||||||
|
Note that in case with wireless drivers we can't use [1]
|
||||||
|
(napi_gro_frags()) at least for now and mac80211 stack always
|
||||||
|
performs pushes and pulls anyways, so performance hit is inavoidable.
|
||||||
|
|
||||||
|
At the moment of v2.6.31 the mentioned change was necessary (that's
|
||||||
|
why I don't add the "Fixes:" tag), but it became obsolete since
|
||||||
|
skb_gro_mac_header() has gone in commit a50e233c50db ("net-gro:
|
||||||
|
restore frag0 optimization"), so we can simply revert the condition
|
||||||
|
in gro_reset_offset() to allow skbs from [4] go through the 'fast'
|
||||||
|
path just like in case [1].
|
||||||
|
|
||||||
|
This was tested on a 600 MHz MIPS CPU and a custom driver and this
|
||||||
|
patch gave boosts up to 40 Mbps to method [4] in both directions
|
||||||
|
comparing to net-next, which made overall performance relatively
|
||||||
|
close to [1] (without it, [4] is the slowest).
|
||||||
|
|
||||||
|
v2:
|
||||||
|
- Add more references and explanations to commit message
|
||||||
|
- Fix some typos ibid
|
||||||
|
- No functional changes
|
||||||
|
|
||||||
|
Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/core/dev.c
|
||||||
|
+++ b/net/core/dev.c
|
||||||
|
@@ -5403,8 +5403,7 @@ static void skb_gro_reset_offset(struct
|
||||||
|
NAPI_GRO_CB(skb)->frag0 = NULL;
|
||||||
|
NAPI_GRO_CB(skb)->frag0_len = 0;
|
||||||
|
|
||||||
|
- if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
|
||||||
|
- pinfo->nr_frags &&
|
||||||
|
+ if (!skb_headlen(skb) && pinfo->nr_frags &&
|
||||||
|
!PageHighMem(skb_frag_page(frag0))) {
|
||||||
|
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
|
||||||
|
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
|
|
@ -0,0 +1,51 @@
|
||||||
|
From: Alexander Lobakin <alobakin@dlink.ru>
|
||||||
|
Date: Mon, 14 Oct 2019 11:00:33 +0300
|
||||||
|
Subject: [PATCH] net: core: use listified Rx for GRO_NORMAL in
|
||||||
|
napi_gro_receive()
|
||||||
|
|
||||||
|
Commit 323ebb61e32b4 ("net: use listified RX for handling GRO_NORMAL
|
||||||
|
skbs") made use of listified skb processing for the users of
|
||||||
|
napi_gro_frags().
|
||||||
|
The same technique can be used in a way more common napi_gro_receive()
|
||||||
|
to speed up non-merged (GRO_NORMAL) skbs for a wide range of drivers
|
||||||
|
including gro_cells and mac80211 users.
|
||||||
|
This slightly changes the return value in cases where skb is being
|
||||||
|
dropped by the core stack, but it seems to have no impact on related
|
||||||
|
drivers' functionality.
|
||||||
|
gro_normal_batch is left untouched as it's very individual for every
|
||||||
|
single system configuration and might be tuned in manual order to
|
||||||
|
achieve an optimal performance.
|
||||||
|
|
||||||
|
Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
|
||||||
|
Acked-by: Edward Cree <ecree@solarflare.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/core/dev.c
|
||||||
|
+++ b/net/core/dev.c
|
||||||
|
@@ -5601,12 +5601,13 @@ static void napi_skb_free_stolen_head(st
|
||||||
|
kmem_cache_free(skbuff_head_cache, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
|
||||||
|
+static gro_result_t napi_skb_finish(struct napi_struct *napi,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ gro_result_t ret)
|
||||||
|
{
|
||||||
|
switch (ret) {
|
||||||
|
case GRO_NORMAL:
|
||||||
|
- if (netif_receive_skb_internal(skb))
|
||||||
|
- ret = GRO_DROP;
|
||||||
|
+ gro_normal_one(napi, skb);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GRO_DROP:
|
||||||
|
@@ -5638,7 +5639,7 @@ gro_result_t napi_gro_receive(struct nap
|
||||||
|
|
||||||
|
skb_gro_reset_offset(skb);
|
||||||
|
|
||||||
|
- ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
|
||||||
|
+ ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
|
||||||
|
trace_napi_gro_receive_exit(ret);
|
||||||
|
|
||||||
|
return ret;
|
|
@ -32,7 +32,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
__u16 tc_index; /* traffic control index */
|
__u16 tc_index; /* traffic control index */
|
||||||
--- a/net/core/dev.c
|
--- a/net/core/dev.c
|
||||||
+++ b/net/core/dev.c
|
+++ b/net/core/dev.c
|
||||||
@@ -5469,6 +5469,9 @@ static enum gro_result dev_gro_receive(s
|
@@ -5468,6 +5468,9 @@ static enum gro_result dev_gro_receive(s
|
||||||
int same_flow;
|
int same_flow;
|
||||||
int grow;
|
int grow;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue