]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - net/ipv6/ip6_output.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[karo-tx-linux.git] / net / ipv6 / ip6_output.c
index 92b1aa38f121507b662e2c964423952bf995b81e..0c89671e0767e5debe909d654cd9d089bcf8fa19 100644 (file)
@@ -55,8 +55,9 @@
 #include <net/xfrm.h>
 #include <net/checksum.h>
 #include <linux/mroute6.h>
+#include <net/l3mdev.h>
 
-static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
        struct net_device *dev = dst->dev;
@@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-                   ((mroute6_socket(dev_net(dev), skb) &&
+                   ((mroute6_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
                                         &ipv6_hdr(skb)->saddr))) {
@@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
                         */
                        if (newskb)
                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
-                                       sk, newskb, NULL, newskb->dev,
+                                       net, sk, newskb, NULL, newskb->dev,
                                        dev_loopback_xmit);
 
                        if (ipv6_hdr(skb)->hop_limit == 0) {
-                               IP6_INC_STATS(dev_net(dev), idev,
+                               IP6_INC_STATS(net, idev,
                                              IPSTATS_MIB_OUTDISCARDS);
                                kfree_skb(skb);
                                return 0;
                        }
                }
 
-               IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
-                               skb->len);
+               IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
 
                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
                    IPV6_ADDR_SCOPE_NODELOCAL &&
@@ -116,48 +116,49 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
        }
        rcu_read_unlock_bh();
 
-       IP6_INC_STATS(dev_net(dst->dev),
-                     ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
        kfree_skb(skb);
        return -EINVAL;
 }
 
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
            dst_allfrag(skb_dst(skb)) ||
            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
-               return ip6_fragment(sk, skb, ip6_finish_output2);
+               return ip6_fragment(net, sk, skb, ip6_finish_output2);
        else
-               return ip6_finish_output2(sk, skb);
+               return ip6_finish_output2(net, sk, skb);
 }
 
-int ip6_output(struct sock *sk, struct sk_buff *skb)
+int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        struct net_device *dev = skb_dst(skb)->dev;
        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
        if (unlikely(idev->cnf.disable_ipv6)) {
-               IP6_INC_STATS(dev_net(dev), idev,
-                             IPSTATS_MIB_OUTDISCARDS);
+               IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                kfree_skb(skb);
                return 0;
        }
 
-       return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
-                           NULL, dev,
+       return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+                           net, sk, skb, NULL, dev,
                            ip6_finish_output,
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*
- *     xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * Note : socket lock is not held for SYNACK packets, but might be modified
+ * by calls to skb_set_owner_w() and ipv6_local_error(),
+ * which are using proper atomic operations or spinlocks.
  */
-
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
             struct ipv6_txoptions *opt, int tclass)
 {
        struct net *net = sock_net(sk);
-       struct ipv6_pinfo *np = inet6_sk(sk);
+       const struct ipv6_pinfo *np = inet6_sk(sk);
        struct in6_addr *first_hop = &fl6->daddr;
        struct dst_entry *dst = skb_dst(skb);
        struct ipv6hdr *hdr;
@@ -186,7 +187,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                        }
                        consume_skb(skb);
                        skb = skb2;
-                       skb_set_owner_w(skb, sk);
+                       /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
+                        * it is safe to call in our context (socket lock not held)
+                        */
+                       skb_set_owner_w(skb, (struct sock *)sk);
                }
                if (opt->opt_flen)
                        ipv6_push_frag_opts(skb, opt, &proto);
@@ -224,12 +228,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
                              IPSTATS_MIB_OUT, skb->len);
-               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-                              NULL, dst->dev, dst_output_sk);
+               /* hooks should never assume socket lock is held.
+                * we promote our socket to non const
+                */
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+                              net, (struct sock *)sk, skb, NULL, dst->dev,
+                              dst_output);
        }
 
        skb->dev = dst->dev;
-       ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
+       /* ipv6_local_error() does not require socket lock,
+        * we promote our socket to non const
+        */
+       ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
+
        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
        kfree_skb(skb);
        return -EMSGSIZE;
@@ -317,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
        return 0;
 }
 
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+                                    struct sk_buff *skb)
 {
        skb_sender_cpu_clear(skb);
-       return dst_output_sk(sk, skb);
+       return dst_output(net, sk, skb);
 }
 
 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -376,6 +389,9 @@ int ip6_forward(struct sk_buff *skb)
        if (skb->pkt_type != PACKET_HOST)
                goto drop;
 
+       if (unlikely(skb->sk))
+               goto drop;
+
        if (skb_warn_if_lro(skb))
                goto drop;
 
@@ -512,8 +528,8 @@ int ip6_forward(struct sk_buff *skb)
 
        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
        IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
-       return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
-                      skb->dev, dst->dev,
+       return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+                      net, NULL, skb, skb->dev, dst->dev,
                       ip6_forward_finish);
 
 error:
@@ -540,8 +556,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
        skb_copy_secmark(to, from);
 }
 
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
-                int (*output)(struct sock *, struct sk_buff *))
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+                int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
        struct sk_buff *frag;
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -554,7 +570,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
        __be32 frag_id;
        int ptr, offset = 0, err = 0;
        u8 *prevhdr, nexthdr = 0;
-       struct net *net = dev_net(skb_dst(skb)->dev);
 
        hlen = ip6_find_1stfragopt(skb, &prevhdr);
        nexthdr = *prevhdr;
@@ -674,7 +689,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
                                ip6_copy_metadata(frag, skb);
                        }
 
-                       err = output(sk, skb);
+                       err = output(net, sk, skb);
                        if (!err)
                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
                                              IPSTATS_MIB_FRAGCREATES);
@@ -802,7 +817,7 @@ slow_path:
                /*
                 *      Put this fragment into the sending queue.
                 */
-               err = output(sk, frag);
+               err = output(net, sk, frag);
                if (err)
                        goto fail;
 
@@ -874,7 +889,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 #ifdef CONFIG_IPV6_SUBTREES
            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 #endif
-           (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+          (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
                dst_release(dst);
                dst = NULL;
        }
@@ -883,7 +899,7 @@ out:
        return dst;
 }
 
-static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
+static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
                               struct dst_entry **dst, struct flowi6 *fl6)
 {
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -1014,7 +1030,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
  *     It returns a valid dst pointer on success, or a pointer encoded
  *     error code.
  */
-struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
                                      const struct in6_addr *final_dst)
 {
        struct dst_entry *dst = NULL;
@@ -1026,7 +1042,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
        if (final_dst)
                fl6->daddr = *final_dst;
        if (!fl6->flowi6_oif)
-               fl6->flowi6_oif = dst->dev->ifindex;
+               fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
 
        return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
@@ -1680,7 +1696,7 @@ int ip6_send_skb(struct sk_buff *skb)
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
        int err;
 
-       err = ip6_local_out(skb);
+       err = ip6_local_out(net, skb->sk, skb);
        if (err) {
                if (err > 0)
                        err = net_xmit_errno(err);