]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
authorDavid S. Miller <davem@davemloft.net>
Wed, 3 May 2017 14:11:26 +0000 (10:11 -0400)
committerDavid S. Miller <davem@davemloft.net>
Wed, 3 May 2017 14:11:26 +0000 (10:11 -0400)
Pablo Neira Ayuso says:

====================
Netfilter/IPVS/OVS fixes for net

The following patchset contains a rather large batch of Netfilter, IPVS
and OVS fixes for your net tree. This includes fixes for ctnetlink, the
userspace conntrack helper infrastructure, conntrack OVS support,
ebtables DNAT target, several leaks in error path among other. More
specifically, they are:

1) Fix reference count leak in the CT target error path, from Gao Feng.

2) Remove conntrack entry clashing with a matching expectation, patch
   from Jarno Rajahalme.

3) Fix bogus EEXIST when registering two different userspace helpers,
   from Liping Zhang.

4) Don't leak dummy elements in the new bitmap set type in nf_tables,
   from Liping Zhang.

5) Get rid of module autoload from conntrack update path in ctnetlink,
   we don't need autoload at this late stage and it is happening with
   rcu read lock held which is not good. From Liping Zhang.

6) Fix deadlock due to double-acquire of the expect_lock from conntrack
   update path, this fixes a bug that was introduced when the central
   spinlock got removed. Again from Liping Zhang.

7) Safe ct->status update from ctnetlink path, from Liping. The expect_lock
   protection that was selected when the central spinlock was removed was
   not really protecting anything at all.

8) Protect sequence adjustment under ct->lock.

9) Missing socket match with IPv6, from Peter Tirsek.

10) Adjust skb->pkt_type of DNAT'ed frames from ebtables, from
    Linus Luessing.

11) Don't give up on evaluating the expression on new entries added via
    dynset expression in nf_tables, from Liping Zhang.

12) Use skb_checksum() when mangling icmpv6 in IPv6 NAT as this deals
    with non-linear skbuffs.

13) Don't allow IPv6 service in IPVS if no IPv6 support is available,
    from Paolo Abeni.

14) Missing mutex release in error path of xt_find_table_lock(), from
    Dan Carpenter.

15) Update maintainers files, Netfilter section. Add Florian to the
    file, refer to nftables.org and change project status from Supported
    to Maintained.

16) Bail out on mismatching extensions in element updates in nf_tables.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
14 files changed:
MAINTAINERS
include/uapi/linux/netfilter/nf_conntrack_common.h
net/bridge/netfilter/ebt_dnat.c
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_dynset.c
net/netfilter/nft_set_bitmap.c
net/netfilter/x_tables.c
net/netfilter/xt_CT.c
net/netfilter/xt_socket.c
net/openvswitch/conntrack.c

index 45b173ab0463f5d99a9e8ba7b6917fbf6e05a996..e5b802a227a147732b3d826905b891ec9a45ccde 100644 (file)
@@ -8747,14 +8747,16 @@ F:      drivers/net/ethernet/neterion/
 NETFILTER
 M:     Pablo Neira Ayuso <pablo@netfilter.org>
 M:     Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+M:     Florian Westphal <fw@strlen.de>
 L:     netfilter-devel@vger.kernel.org
 L:     coreteam@netfilter.org
 W:     http://www.netfilter.org/
 W:     http://www.iptables.org/
+W:     http://www.nftables.org/
 Q:     http://patchwork.ozlabs.org/project/netfilter-devel/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
-S:     Supported
+S:     Maintained
 F:     include/linux/netfilter*
 F:     include/linux/netfilter/
 F:     include/net/netfilter/
index a8072cc7fa0ba6e4b4acedca37a25a4316a24159..dc947e59d03a62c5dd6fccf27c1db0a88270ea4f 100644 (file)
@@ -84,10 +84,6 @@ enum ip_conntrack_status {
        IPS_DYING_BIT = 9,
        IPS_DYING = (1 << IPS_DYING_BIT),
 
-       /* Bits that cannot be altered from userland. */
-       IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
-                                IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING),
-
        /* Connection has fixed timeout. */
        IPS_FIXED_TIMEOUT_BIT = 10,
        IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
@@ -103,6 +99,15 @@ enum ip_conntrack_status {
        /* Conntrack got a helper explicitly attached via CT target. */
        IPS_HELPER_BIT = 13,
        IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+       /* Be careful here, modifying these bits can make things messy,
+        * so don't let users modify them directly.
+        */
+       IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+                                IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+                                IPS_SEQ_ADJUST | IPS_TEMPLATE),
+
+       __IPS_MAX_BIT = 14,
 };
 
 /* Connection tracking event types */
index 4e0b0c3593250bd8a1be0cdafca49ce7e4684f94..e0bb624c3845eff5d756830504709eeb3d5bf960 100644 (file)
@@ -9,6 +9,7 @@
  */
 #include <linux/module.h>
 #include <net/sock.h>
+#include "../br_private.h"
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
@@ -18,11 +19,30 @@ static unsigned int
 ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct ebt_nat_info *info = par->targinfo;
+       struct net_device *dev;
 
        if (!skb_make_writable(skb, 0))
                return EBT_DROP;
 
        ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
+
+       if (is_multicast_ether_addr(info->mac)) {
+               if (is_broadcast_ether_addr(info->mac))
+                       skb->pkt_type = PACKET_BROADCAST;
+               else
+                       skb->pkt_type = PACKET_MULTICAST;
+       } else {
+               if (xt_hooknum(par) != NF_BR_BROUTING)
+                       dev = br_port_get_rcu(xt_in(par))->br->dev;
+               else
+                       dev = xt_in(par);
+
+               if (ether_addr_equal(info->mac, dev->dev_addr))
+                       skb->pkt_type = PACKET_HOST;
+               else
+                       skb->pkt_type = PACKET_OTHERHOST;
+       }
+
        return info->target;
 }
 
index bf3ad3e7b6479aeca80b24faefe381d1922ea290..b2b4f031b3a16b1f9f374221396ad02ccc79744e 100644 (file)
@@ -235,7 +235,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
                inside->icmp6.icmp6_cksum =
                        csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
                                        skb->len - hdrlen, IPPROTO_ICMPV6,
-                                       csum_partial(&inside->icmp6,
+                                       skb_checksum(skb, hdrlen,
                                                     skb->len - hdrlen, 0));
        }
 
index 668d9643f0cc7a9e410a73f961ec1730fc57033d..1fa3c2307b6ea0173bbcf13c3d0b5cca8c68cba9 100644 (file)
@@ -3078,6 +3078,17 @@ nla_put_failure:
        return skb->len;
 }
 
+static bool ip_vs_is_af_valid(int af)
+{
+       if (af == AF_INET)
+               return true;
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6 && ipv6_mod_enabled())
+               return true;
+#endif
+       return false;
+}
+
 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
                                    struct ip_vs_service_user_kern *usvc,
                                    struct nlattr *nla, int full_entry,
@@ -3105,11 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
        memset(usvc, 0, sizeof(*usvc));
 
        usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
-       if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
-       if (usvc->af != AF_INET)
-#endif
+       if (!ip_vs_is_af_valid(usvc->af))
                return -EAFNOSUPPORT;
 
        if (nla_fwmark) {
@@ -3612,6 +3619,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
                if (udest.af == 0)
                        udest.af = svc->af;
 
+               if (!ip_vs_is_af_valid(udest.af)) {
+                       ret = -EAFNOSUPPORT;
+                       goto out;
+               }
+
                if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
                        /* The synchronization protocol is incompatible
                         * with mixed family services
index 4b9dfe3eef6241756d6474e1f6caad568a942b63..3a60efa7799b2e4569af35ce943c67fc354dc68a 100644 (file)
@@ -385,7 +385,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
        struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
        unsigned int h = helper_hash(&me->tuple);
        struct nf_conntrack_helper *cur;
-       int ret = 0;
+       int ret = 0, i;
 
        BUG_ON(me->expect_policy == NULL);
        BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
@@ -395,10 +395,26 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
                return -EINVAL;
 
        mutex_lock(&nf_ct_helper_mutex);
-       hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
-               if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
-                       ret = -EEXIST;
-                       goto out;
+       for (i = 0; i < nf_ct_helper_hsize; i++) {
+               hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+                       if (!strcmp(cur->name, me->name) &&
+                           (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+                            cur->tuple.src.l3num == me->tuple.src.l3num) &&
+                           cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+                               ret = -EEXIST;
+                               goto out;
+                       }
+               }
+       }
+
+       /* avoid unpredictable behaviour for auto_assign_helper */
+       if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+               hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+                       if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+                                                    &mask)) {
+                               ret = -EEXIST;
+                               goto out;
+                       }
                }
        }
        hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
index 5f6f2f388928130a2717911fef15c72325ebe2f3..dcf561b5c97a47e627ee00649d756635db0e6fb3 100644 (file)
@@ -417,8 +417,7 @@ nla_put_failure:
        return -1;
 }
 
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
-                                    const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
 {
        struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
        struct nf_ct_seqadj *seq;
@@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
        if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
                return 0;
 
+       spin_lock_bh(&ct->lock);
        seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
        if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
-               return -1;
+               goto err;
 
        seq = &seqadj->seq[IP_CT_DIR_REPLY];
        if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
-               return -1;
+               goto err;
 
+       spin_unlock_bh(&ct->lock);
        return 0;
+err:
+       spin_unlock_bh(&ct->lock);
+       return -1;
 }
 
 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -1417,6 +1421,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 }
 #endif
 
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+                         unsigned long off)
+{
+       unsigned int bit;
+
+       /* Ignore these unchangable bits */
+       on &= ~IPS_UNCHANGEABLE_MASK;
+       off &= ~IPS_UNCHANGEABLE_MASK;
+
+       for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+               if (on & (1 << bit))
+                       set_bit(bit, &ct->status);
+               else if (off & (1 << bit))
+                       clear_bit(bit, &ct->status);
+       }
+}
+
 static int
 ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
 {
@@ -1436,10 +1458,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
                /* ASSURED bit can only be set */
                return -EBUSY;
 
-       /* Be careful here, modifying NAT bits can screw up things,
-        * so don't let users modify them directly if they don't pass
-        * nf_nat_range. */
-       ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+       __ctnetlink_change_status(ct, status, 0);
        return 0;
 }
 
@@ -1508,23 +1527,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
                return 0;
        }
 
+       rcu_read_lock();
        helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
                                            nf_ct_protonum(ct));
        if (helper == NULL) {
-#ifdef CONFIG_MODULES
-               spin_unlock_bh(&nf_conntrack_expect_lock);
-
-               if (request_module("nfct-helper-%s", helpname) < 0) {
-                       spin_lock_bh(&nf_conntrack_expect_lock);
-                       return -EOPNOTSUPP;
-               }
-
-               spin_lock_bh(&nf_conntrack_expect_lock);
-               helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
-                                                   nf_ct_protonum(ct));
-               if (helper)
-                       return -EAGAIN;
-#endif
+               rcu_read_unlock();
                return -EOPNOTSUPP;
        }
 
@@ -1533,13 +1540,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
                        /* update private helper data if allowed. */
                        if (helper->from_nlattr)
                                helper->from_nlattr(helpinfo, ct);
-                       return 0;
+                       err = 0;
                } else
-                       return -EBUSY;
+                       err = -EBUSY;
+       } else {
+               /* we cannot set a helper for an existing conntrack */
+               err = -EOPNOTSUPP;
        }
 
-       /* we cannot set a helper for an existing conntrack */
-       return -EOPNOTSUPP;
+       rcu_read_unlock();
+       return err;
 }
 
 static int ctnetlink_change_timeout(struct nf_conn *ct,
@@ -1630,25 +1640,30 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
        if (!seqadj)
                return 0;
 
+       spin_lock_bh(&ct->lock);
        if (cda[CTA_SEQ_ADJ_ORIG]) {
                ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
                                     cda[CTA_SEQ_ADJ_ORIG]);
                if (ret < 0)
-                       return ret;
+                       goto err;
 
-               ct->status |= IPS_SEQ_ADJUST;
+               set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
        }
 
        if (cda[CTA_SEQ_ADJ_REPLY]) {
                ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
                                     cda[CTA_SEQ_ADJ_REPLY]);
                if (ret < 0)
-                       return ret;
+                       goto err;
 
-               ct->status |= IPS_SEQ_ADJUST;
+               set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
        }
 
+       spin_unlock_bh(&ct->lock);
        return 0;
+err:
+       spin_unlock_bh(&ct->lock);
+       return ret;
 }
 
 static int
@@ -1959,9 +1974,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
        err = -EEXIST;
        ct = nf_ct_tuplehash_to_ctrack(h);
        if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
-               spin_lock_bh(&nf_conntrack_expect_lock);
                err = ctnetlink_change_conntrack(ct, cda);
-               spin_unlock_bh(&nf_conntrack_expect_lock);
                if (err == 0) {
                        nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
                                                      (1 << IPCT_ASSURED) |
@@ -2294,10 +2307,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
        /* This check is less strict than ctnetlink_change_status()
         * because callers often flip IPS_EXPECTED bits when sending
         * an NFQA_CT attribute to the kernel.  So ignore the
-        * unchangeable bits but do not error out.
+        * unchangeable bits but do not error out. Also user programs
+        * are allowed to clear the bits that they are allowed to change.
         */
-       ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
-                    (ct->status & IPS_UNCHANGEABLE_MASK);
+       __ctnetlink_change_status(ct, status, ~status);
        return 0;
 }
 
@@ -2351,11 +2364,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
        if (ret < 0)
                return ret;
 
-       spin_lock_bh(&nf_conntrack_expect_lock);
-       ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
-       spin_unlock_bh(&nf_conntrack_expect_lock);
-
-       return ret;
+       return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
 }
 
 static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
index 1c6482d2c4dcfe9299a8094a138d446fc0d9e5c8..5592250297402fe6e272f3213efa7e02ab230485 100644 (file)
@@ -3778,6 +3778,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
        err = set->ops->insert(ctx->net, set, &elem, &ext2);
        if (err) {
                if (err == -EEXIST) {
+                       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+                           nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+                           nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+                           nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+                               return -EBUSY;
                        if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
                             nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
                             memcmp(nft_set_ext_data(ext),
index 3948da380259538c2fd4823f65a9407241f8af4e..66221ad891a9f281c1cf8e723c6a8c302e1f3741 100644 (file)
@@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
                    nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
                        timeout = priv->timeout ? : set->timeout;
                        *nft_set_ext_expiration(ext) = jiffies + timeout;
-               } else if (sexpr == NULL)
-                       goto out;
+               }
 
                if (sexpr != NULL)
                        sexpr->ops->eval(sexpr, regs, pkt);
@@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
                        regs->verdict.code = NFT_BREAK;
                return;
        }
-out:
+
        if (!priv->invert)
                regs->verdict.code = NFT_BREAK;
 }
index 8ebbc2940f4c593d393c65bd5674d90feb585d98..b988162b5b15b9442b496abf2571a9cf7dbc66f3 100644 (file)
@@ -257,6 +257,11 @@ static int nft_bitmap_init(const struct nft_set *set,
 
 static void nft_bitmap_destroy(const struct nft_set *set)
 {
+       struct nft_bitmap *priv = nft_set_priv(set);
+       struct nft_bitmap_elem *be, *n;
+
+       list_for_each_entry_safe(be, n, &priv->list, head)
+               nft_set_elem_destroy(set, be, true);
 }
 
 static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
index 14857afc9937d30cae604fe839029593c1006944..f134d384852ff2e9795cb8108b1387cfb3966f73 100644 (file)
@@ -1051,8 +1051,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
        list_for_each_entry(t, &init_net.xt.tables[af], list) {
                if (strcmp(t->name, name))
                        continue;
-               if (!try_module_get(t->me))
+               if (!try_module_get(t->me)) {
+                       mutex_unlock(&xt[af].mutex);
                        return NULL;
+               }
 
                mutex_unlock(&xt[af].mutex);
                if (t->table_init(net) != 0) {
index 3cbe1bcf6a742c6d74fa455daa7f1cb7bf56ba57..bb7ad82dcd5603e810db8fba35f81d3f2c03a2b7 100644 (file)
@@ -168,8 +168,10 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
                goto err_put_timeout;
        }
        timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
-       if (timeout_ext == NULL)
+       if (!timeout_ext) {
                ret = -ENOMEM;
+               goto err_put_timeout;
+       }
 
        rcu_read_unlock();
        return ret;
@@ -201,6 +203,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
                          struct xt_ct_target_info_v1 *info)
 {
        struct nf_conntrack_zone zone;
+       struct nf_conn_help *help;
        struct nf_conn *ct;
        int ret = -EOPNOTSUPP;
 
@@ -249,7 +252,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
        if (info->timeout[0]) {
                ret = xt_ct_set_timeout(ct, par, info->timeout);
                if (ret < 0)
-                       goto err3;
+                       goto err4;
        }
        __set_bit(IPS_CONFIRMED_BIT, &ct->status);
        nf_conntrack_get(&ct->ct_general);
@@ -257,6 +260,10 @@ out:
        info->ct = ct;
        return 0;
 
+err4:
+       help = nfct_help(ct);
+       if (help)
+               module_put(help->helper->me);
 err3:
        nf_ct_tmpl_free(ct);
 err2:
index 770bbec878f149f5688584982a552440ff5351fd..e75ef39669c5a9a5b72c9a1cec8b72020600eae1 100644 (file)
@@ -152,7 +152,7 @@ static int socket_mt_enable_defrag(struct net *net, int family)
        switch (family) {
        case NFPROTO_IPV4:
                return nf_defrag_ipv4_enable(net);
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        case NFPROTO_IPV6:
                return nf_defrag_ipv6_enable(net);
 #endif
index 42a95919df094ba0f21cf4e4c1ade6c570f10284..bf602e33c40af4896240c9cc0566fa10126cf662 100644 (file)
@@ -516,10 +516,38 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
                   u16 proto, const struct sk_buff *skb)
 {
        struct nf_conntrack_tuple tuple;
+       struct nf_conntrack_expect *exp;
 
        if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
                return NULL;
-       return __nf_ct_expect_find(net, zone, &tuple);
+
+       exp = __nf_ct_expect_find(net, zone, &tuple);
+       if (exp) {
+               struct nf_conntrack_tuple_hash *h;
+
+               /* Delete existing conntrack entry, if it clashes with the
+                * expectation.  This can happen since conntrack ALGs do not
+                * check for clashes between (new) expectations and existing
+                * conntrack entries.  nf_conntrack_in() will check the
+                * expectations only if a conntrack entry can not be found,
+                * which can lead to OVS finding the expectation (here) in the
+                * init direction, but which will not be removed by the
+                * nf_conntrack_in() call, if a matching conntrack entry is
+                * found instead.  In this case all init direction packets
+                * would be reported as new related packets, while reply
+                * direction packets would be reported as un-related
+                * established packets.
+                */
+               h = nf_conntrack_find_get(net, zone, &tuple);
+               if (h) {
+                       struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+                       nf_ct_delete(ct, 0, 0);
+                       nf_conntrack_put(&ct->ct_general);
+               }
+       }
+
+       return exp;
 }
 
 /* This replicates logic from nf_conntrack_core.c that is not exported. */