Merge remote-tracking branch 'ipsec/master'

[karo-tx-linux.git] / net / ipv4 / tcp_output.c
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 7c83cb8bf1378022ba3a68c56403ef40801cd3ae..d46f2143305c2632e7703f2677310dd177d67208 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
         unsigned int size = 0;
         unsigned int eff_sacks;
  
+       opts->options = 0;
+
  #ifdef CONFIG_TCP_MD5SIG
         *md5 = tp->af_specific->md5_lookup(sk, sk);
         if (unlikely(*md5)) {
@@ -895,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
  
         skb_orphan(skb);
         skb->sk = sk;
-       skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
-                         tcp_wfree : sock_wfree;
+       skb->destructor = tcp_wfree;
         atomic_add(skb->truesize, &sk->sk_wmem_alloc);
  
         /* Build TCP header and checksum it. */
@@ -985,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
  static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
                                  unsigned int mss_now)
  {
-       if (skb->len <= mss_now || !sk_can_gso(sk) ||
-           skb->ip_summed == CHECKSUM_NONE) {
+       /* Make sure we own this skb before messing gso_size/gso_segs */
+       WARN_ON_ONCE(skb_cloned(skb));
+
+       if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
                 /* Avoid the costly divide in the normal
                  * non-TSO case.
                  */
@@ -1066,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
         if (nsize < 0)
                 nsize = 0;
  
-       if (skb_cloned(skb) &&
-           skb_is_nonlinear(skb) &&
-           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+       if (skb_unclone(skb, GFP_ATOMIC))
                 return -ENOMEM;
  
         /* Get a new skb... force flag on. */
@@ -1840,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
         while ((skb = tcp_send_head(sk))) {
                 unsigned int limit;
  
-
                 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                 BUG_ON(!tso_segs);
  
@@ -1869,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                                 break;
                 }
  
-               /* TSQ : sk_wmem_alloc accounts skb truesize,
-                * including skb overhead. But thats OK.
+               /* TCP Small Queues :
+                * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+                * This allows for :
+                *  - better RTT estimation and ACK scheduling
+                *  - faster recovery
+                *  - high rates
                  */
-               if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+               limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+
+               if (atomic_read(&sk->sk_wmem_alloc) > limit) {
                         set_bit(TSQ_THROTTLED, &tp->tsq_flags);
                         break;
                 }
+
                 limit = mss_now;
                 if (tso_segs > 1 && !tcp_urg_mode(tp))
                         limit = tcp_mss_split_point(sk, skb, mss_now,
@@ -2337,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                 int oldpcount = tcp_skb_pcount(skb);
  
                 if (unlikely(oldpcount > 1)) {
+                       if (skb_unclone(skb, GFP_ATOMIC))
+                               return -ENOMEM;
                         tcp_init_tso_segs(sk, skb, cur_mss);
                         tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
                 }