tcp: fix tcp_ack() performance problem

author Eric Dumazet <edumazet@google.com>

Sat, 11 Oct 2014 22:17:29 +0000 (15:17 -0700)

committer David S. Miller <davem@davemloft.net>

Tue, 14 Oct 2014 19:59:37 +0000 (15:59 -0400)
author Eric Dumazet <edumazet@google.com>
Sat, 11 Oct 2014 22:17:29 +0000 (15:17 -0700)
committer David S. Miller <davem@davemloft.net>
Tue, 14 Oct 2014 19:59:37 +0000 (15:59 -0400)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 00a41499d52c89c0961f6229d79f58022c25ba46..a12b455928e52211efdc6b471ef54de6218f5df0 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -68,6 +68,7 @@
  #include <linux/module.h>
  #include <linux/sysctl.h>
  #include <linux/kernel.h>
+#include <linux/prefetch.h>
  #include <net/dst.h>
  #include <net/tcp.h>
  #include <net/inet_common.h>
@@ -3029,6 +3030,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
         return packets_acked;
  }
  
+static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
+                          u32 prior_snd_una)
+{
+       const struct skb_shared_info *shinfo;
+
+       /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
+       if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
+               return;
+
+       shinfo = skb_shinfo(skb);
+       if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+           between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
+               __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+}
+
  /* Remove acknowledged frames from the retransmission queue. If our packet
   * is before the ack sequence we can discard it as it's confirmed to have
   * arrived at the other end.
@@ -3052,14 +3068,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
         first_ackt.v64 = 0;
  
         while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
-               struct skb_shared_info *shinfo = skb_shinfo(skb);
                 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
                 u8 sacked = scb->sacked;
                 u32 acked_pcount;
  
-               if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-                   between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
-                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+               tcp_ack_tstamp(sk, skb, prior_snd_una);
  
                 /* Determine how many packets and what bytes were acked, tso and else */
                 if (after(scb->end_seq, tp->snd_una)) {
@@ -3073,10 +3086,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  
                         fully_acked = false;
                 } else {
+                       /* Speedup tcp_unlink_write_queue() and next loop */
+                       prefetchw(skb->next);
                         acked_pcount = tcp_skb_pcount(skb);
                 }
  
-               if (sacked & TCPCB_RETRANS) {
+               if (unlikely(sacked & TCPCB_RETRANS)) {
                         if (sacked & TCPCB_SACKED_RETRANS)
                                 tp->retrans_out -= acked_pcount;
                         flag |= FLAG_RETRANS_DATA_ACKED;
@@ -3107,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                  * connection startup slow start one packet too
                  * quickly.  This is severely frowned upon behavior.
                  */
-               if (!(scb->tcp_flags & TCPHDR_SYN)) {
+               if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
                         flag |= FLAG_DATA_ACKED;
                 } else {
                         flag |= FLAG_SYN_ACKED;
@@ -3119,9 +3134,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  
                 tcp_unlink_write_queue(skb, sk);
                 sk_wmem_free_skb(sk, skb);
-               if (skb == tp->retransmit_skb_hint)
+               if (unlikely(skb == tp->retransmit_skb_hint))
                         tp->retransmit_skb_hint = NULL;
-               if (skb == tp->lost_skb_hint)
+               if (unlikely(skb == tp->lost_skb_hint))
                         tp->lost_skb_hint = NULL;
         }
  
@@ -3132,7 +3147,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 flag |= FLAG_SACK_RENEGING;
  
         skb_mstamp_get(&now);
-       if (first_ackt.v64) {
+       if (likely(first_ackt.v64)) {
                 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
                 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
         }
@@ -3394,6 +3409,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         int acked = 0; /* Number of packets newly acked */
         long sack_rtt_us = -1L;
  
+       /* We very likely will need to access write queue head. */
+       prefetchw(sk->sk_write_queue.next);
+
         /* If the ack is older than previous acks
          * then we can probably ignore it.
          */
author	Eric Dumazet <edumazet@google.com>
	Sat, 11 Oct 2014 22:17:29 +0000 (15:17 -0700)
committer	David S. Miller <davem@davemloft.net>
	Tue, 14 Oct 2014 19:59:37 +0000 (15:59 -0400)