ppp: Fix high softirq utilization with pppoa

Users of the Geos platform are reporting high CPU utilization. This seems to be rooted in a problem with the TX queue restart in PPP. Signed-off-by: Philip Prindeville <philipp@redfish-solutions.com> SVN-Revision: 31096
2012-03-27 17:47:44 +00:00 · 2012-03-27 17:47:44 +00:00 · af2c5df66a
commit af2c5df66a
parent 8de1475a27
2 changed files with 164 additions and 0 deletions
--- a/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch
+++ b/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch
@ -0,0 +1,82 @@
+For every transmitted packet, ppp_start_xmit() will stop the netdev
+queue and then, if appropriate, restart it. This causes the TX softirq
+to run, entirely gratuitously.
+
+This is "only" a waste of CPU time in the normal case, but it's actively
+harmful when the PPP device is a TEQL slave — the wakeup will cause the
+offending device to receive the next TX packet from the TEQL queue, when
+it *should* have gone to the next slave in the list. We end up seeing
+large bursts of packets on just *one* slave device, rather than using
+the full available bandwidth over all slaves.
+
+This patch fixes the problem by *not* unconditionally stopping the queue
+in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
+which indicates whether the queue should be stopped or not.
+
+It *doesn't* remove the call to netif_wake_queue() from
+ppp_xmit_process(), because other code paths (especially from
+ppp_output_wakeup()) need it there and it's messy to push it out to the
+other callers to do it based on the return value. So we leave it in
+place — it's a no-op in the case where the queue wasn't stopped, so it's
+harmless in the TX path.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+
+--- a/drivers/net/ppp/ppp_generic.c~	2012-01-26 00:39:32.000000000 +0000
+++ b/drivers/net/ppp/ppp_generic.c	2012-03-26 10:32:31.286744147 +0100
+@@ -235,7 +235,7 @@ struct ppp_net {
+ /* Prototypes. */
+ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
+ 			struct file *file, unsigned int cmd, unsigned long arg);
+-static void ppp_xmit_process(struct ppp *ppp);
+static int ppp_xmit_process(struct ppp *ppp);
+ static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
+ static void ppp_push(struct ppp *ppp);
+ static void ppp_channel_push(struct channel *pch);
+@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
+ 	proto = npindex_to_proto[npi];
+ 	put_unaligned_be16(proto, pp);
+ 
+-	netif_stop_queue(dev);
+ 	skb_queue_tail(&ppp->file.xq, skb);
+-	ppp_xmit_process(ppp);
+	if (!ppp_xmit_process(ppp))
+		netif_stop_queue(dev);
+ 	return NETDEV_TX_OK;
+ 
+  outf:
+@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
+  * Called to do any work queued up on the transmit side
+  * that can now be done.
+  */
+-static void
+static int
+ ppp_xmit_process(struct ppp *ppp)
+ {
+ 	struct sk_buff *skb;
+	int ret = 0;
+ 
+ 	ppp_xmit_lock(ppp);
+ 	if (!ppp->closing) {
+@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
+ 			ppp_send_frame(ppp, skb);
+ 		/* If there's no work left to do, tell the core net
+ 		   code that we can accept some more. */
+-		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
+		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
+ 			netif_wake_queue(ppp->dev);
+			ret = 1;
+		}
+ 	}
+ 	ppp_xmit_unlock(ppp);
+	return ret;
+ }
+ 
+ static inline struct sk_buff *
+
+-- 
+David Woodhouse                            Open Source Technology Centre
+David.Woodhouse@intel.com                              Intel Corporation
+
+
+
--- a/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch
+++ b/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch
@ -0,0 +1,82 @@
+For every transmitted packet, ppp_start_xmit() will stop the netdev
+queue and then, if appropriate, restart it. This causes the TX softirq
+to run, entirely gratuitously.
+
+This is "only" a waste of CPU time in the normal case, but it's actively
+harmful when the PPP device is a TEQL slave — the wakeup will cause the
+offending device to receive the next TX packet from the TEQL queue, when
+it *should* have gone to the next slave in the list. We end up seeing
+large bursts of packets on just *one* slave device, rather than using
+the full available bandwidth over all slaves.
+
+This patch fixes the problem by *not* unconditionally stopping the queue
+in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
+which indicates whether the queue should be stopped or not.
+
+It *doesn't* remove the call to netif_wake_queue() from
+ppp_xmit_process(), because other code paths (especially from
+ppp_output_wakeup()) need it there and it's messy to push it out to the
+other callers to do it based on the return value. So we leave it in
+place — it's a no-op in the case where the queue wasn't stopped, so it's
+harmless in the TX path.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+
+--- a/drivers/net/ppp/ppp_generic.c~	2012-01-26 00:39:32.000000000 +0000
+++ b/drivers/net/ppp/ppp_generic.c	2012-03-26 10:32:31.286744147 +0100
+@@ -235,7 +235,7 @@ struct ppp_net {
+ /* Prototypes. */
+ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
+ 			struct file *file, unsigned int cmd, unsigned long arg);
+-static void ppp_xmit_process(struct ppp *ppp);
+static int ppp_xmit_process(struct ppp *ppp);
+ static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
+ static void ppp_push(struct ppp *ppp);
+ static void ppp_channel_push(struct channel *pch);
+@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
+ 	proto = npindex_to_proto[npi];
+ 	put_unaligned_be16(proto, pp);
+ 
+-	netif_stop_queue(dev);
+ 	skb_queue_tail(&ppp->file.xq, skb);
+-	ppp_xmit_process(ppp);
+	if (!ppp_xmit_process(ppp))
+		netif_stop_queue(dev);
+ 	return NETDEV_TX_OK;
+ 
+  outf:
+@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
+  * Called to do any work queued up on the transmit side
+  * that can now be done.
+  */
+-static void
+static int
+ ppp_xmit_process(struct ppp *ppp)
+ {
+ 	struct sk_buff *skb;
+	int ret = 0;
+ 
+ 	ppp_xmit_lock(ppp);
+ 	if (!ppp->closing) {
+@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
+ 			ppp_send_frame(ppp, skb);
+ 		/* If there's no work left to do, tell the core net
+ 		   code that we can accept some more. */
+-		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
+		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
+ 			netif_wake_queue(ppp->dev);
+			ret = 1;
+		}
+ 	}
+ 	ppp_xmit_unlock(ppp);
+	return ret;
+ }
+ 
+ static inline struct sk_buff *
+
+-- 
+David Woodhouse                            Open Source Technology Centre
+David.Woodhouse@intel.com                              Intel Corporation
+
+
+