]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - net/ipv4/ip_output.c
[TCP]: Honour sk_bound_dev_if in tcp_v4_send_ack
[karo-tx-linux.git] / net / ipv4 / ip_output.c
index 1abc48899f2dfaf4bc7b7375cd21daf37229dcc5..34ea4547ebbea10cda26bca8a6f05307218b9518 100644 (file)
@@ -160,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
+       struct rtable *rt = (struct rtable *)dst;
        struct net_device *dev = dst->dev;
        int hh_len = LL_RESERVED_SPACE(dev);
 
+       if (rt->rt_type == RTN_MULTICAST)
+               IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+       else if (rt->rt_type == RTN_BROADCAST)
+               IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+
        /* Be paranoid, rather than too clever. */
        if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
                struct sk_buff *skb2;
@@ -189,6 +195,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
        return -EINVAL;
 }
 
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+       struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
+
+       return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
+              skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
 static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -198,7 +212,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
                return dst_output(skb);
        }
 #endif
-       if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+       if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
                return ip_fragment(skb, ip_finish_output2);
        else
                return ip_finish_output2(skb);
@@ -384,20 +398,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NET_SCHED
        to->tc_index = from->tc_index;
 #endif
-#ifdef CONFIG_NETFILTER
-       /* Connection association is same as pre-frag packet */
-       nf_conntrack_put(to->nfct);
-       to->nfct = from->nfct;
-       nf_conntrack_get(to->nfct);
-       to->nfctinfo = from->nfctinfo;
+       nf_copy(to, from);
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
        to->ipvs_property = from->ipvs_property;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-       nf_bridge_put(to->nf_bridge);
-       to->nf_bridge = from->nf_bridge;
-       nf_bridge_get(to->nf_bridge);
-#endif
 #endif
        skb_copy_secmark(to, from);
 }
@@ -433,7 +436,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
        if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
                IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-                         htonl(dst_mtu(&rt->u.dst)));
+                         htonl(ip_skb_dst_mtu(skb)));
                kfree_skb(skb);
                return -EMSGSIZE;
        }
@@ -500,7 +503,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
                         * before previous one went down. */
                        if (frag) {
                                frag->ip_summed = CHECKSUM_NONE;
-                               frag->h.raw = frag->data;
+                               skb_reset_transport_header(frag);
                                __skb_push(frag, hlen);
                                skb_reset_network_header(frag);
                                memcpy(skb_network_header(frag), iph, hlen);
@@ -593,7 +596,7 @@ slow_path:
                skb_reserve(skb2, ll_rs);
                skb_put(skb2, len + hlen);
                skb_reset_network_header(skb2);
-               skb2->h.raw = skb2->data + hlen;
+               skb2->transport_header = skb2->network_header + hlen;
 
                /*
                 *      Charge the memory for the fragment to any owner
@@ -607,12 +610,12 @@ slow_path:
                 *      Copy the packet header into the new buffer.
                 */
 
-               memcpy(skb_network_header(skb2), skb->data, hlen);
+               skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
 
                /*
                 *      Copy a block of the IP datagram.
                 */
-               if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+               if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
                        BUG();
                left -= len;
 
@@ -724,7 +727,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
                skb_reset_network_header(skb);
 
                /* initialize protocol header pointer */
-               skb->h.raw = skb->data + fragheaderlen;
+               skb->transport_header = skb->network_header + fragheaderlen;
 
                skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum = 0;
@@ -798,7 +801,9 @@ int ip_append_data(struct sock *sk,
                        inet->cork.addr = ipc->addr;
                }
                dst_hold(&rt->u.dst);
-               inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+               inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
+                                           rt->u.dst.dev->mtu :
+                                           dst_mtu(rt->u.dst.path);
                inet->cork.rt = rt;
                inet->cork.length = 0;
                sk->sk_sndmsg_page = NULL;
@@ -929,7 +934,8 @@ alloc_new_skb:
                         */
                        data = skb_put(skb, fraglen);
                        skb_set_network_header(skb, exthdrlen);
-                       skb->h.raw = skb->nh.raw + fragheaderlen;
+                       skb->transport_header = (skb->network_header +
+                                                fragheaderlen);
                        data += fragheaderlen;
 
                        if (fraggap) {
@@ -1099,8 +1105,6 @@ ssize_t   ip_append_page(struct sock *sk, struct page *page,
                }
                if (len <= 0) {
                        struct sk_buff *skb_prev;
-                       char *data;
-                       struct iphdr *iph;
                        int alloclen;
 
                        skb_prev = skb;
@@ -1123,16 +1127,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
                        /*
                         *      Find where to start putting bytes.
                         */
-                       data = skb_put(skb, fragheaderlen + fraggap);
+                       skb_put(skb, fragheaderlen + fraggap);
                        skb_reset_network_header(skb);
-                       iph = ip_hdr(skb);
-                       data += fragheaderlen;
-                       skb->h.raw = data;
-
+                       skb->transport_header = (skb->network_header +
+                                                fragheaderlen);
                        if (fraggap) {
-                               skb->csum = skb_copy_and_csum_bits(
-                                       skb_prev, maxfraglen,
-                                       data, fraggap, 0);
+                               skb->csum = skb_copy_and_csum_bits(skb_prev,
+                                                                  maxfraglen,
+                                                   skb_transport_header(skb),
+                                                                  fraggap, 0);
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                pskb_trim_unique(skb_prev, maxfraglen);
@@ -1201,7 +1204,7 @@ int ip_push_pending_frames(struct sock *sk)
        if (skb->data < skb_network_header(skb))
                __skb_pull(skb, skb_network_offset(skb));
        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
-               __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+               __skb_pull(tmp_skb, skb_network_header_len(skb));
                *tail_skb = tmp_skb;
                tail_skb = &(tmp_skb->next);
                skb->len += tmp_skb->len;
@@ -1216,13 +1219,13 @@ int ip_push_pending_frames(struct sock *sk)
         * to fragment the frame generated here. No matter, what transforms
         * how transforms change size of the packet, it will come out.
         */
-       if (inet->pmtudisc != IP_PMTUDISC_DO)
+       if (inet->pmtudisc < IP_PMTUDISC_DO)
                skb->local_df = 1;
 
        /* DF bit is set when we want to see DF on outgoing frames.
         * If local_df is set too, we still allow to fragment this frame
         * locally. */
-       if (inet->pmtudisc == IP_PMTUDISC_DO ||
+       if (inet->pmtudisc >= IP_PMTUDISC_DO ||
            (skb->len <= dst_mtu(&rt->u.dst) &&
             ip_dont_fragment(sk, &rt->u.dst)))
                df = htons(IP_DF);
@@ -1349,14 +1352,15 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
        }
 
        {
-               struct flowi fl = { .nl_u = { .ip4_u =
+               struct flowi fl = { .oif = arg->bound_dev_if,
+                                   .nl_u = { .ip4_u =
                                              { .daddr = daddr,
                                                .saddr = rt->rt_spec_dst,
                                                .tos = RT_TOS(ip_hdr(skb)->tos) } },
                                    /* Not quite clean, but right. */
                                    .uli_u = { .ports =
-                                              { .sport = skb->h.th->dest,
-                                                .dport = skb->h.th->source } },
+                                              { .sport = tcp_hdr(skb)->dest,
+                                                .dport = tcp_hdr(skb)->source } },
                                    .proto = sk->sk_protocol };
                security_skb_classify_flow(skb, &fl);
                if (ip_route_output_key(&rt, &fl))
@@ -1373,11 +1377,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
        inet->tos = ip_hdr(skb)->tos;
        sk->sk_priority = skb->priority;
        sk->sk_protocol = ip_hdr(skb)->protocol;
+       sk->sk_bound_dev_if = arg->bound_dev_if;
        ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
                       &ipc, rt, MSG_DONTWAIT);
        if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
                if (arg->csumoffset >= 0)
-                       *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+                       *((__sum16 *)skb_transport_header(skb) +
+                         arg->csumoffset) = csum_fold(csum_add(skb->csum,
+                                                               arg->csum));
                skb->ip_summed = CHECKSUM_NONE;
                ip_push_pending_frames(sk);
        }