NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
M: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+M: Florian Westphal <fw@strlen.de>
L: netfilter-devel@vger.kernel.org
L: coreteam@netfilter.org
W: http://www.netfilter.org/
W: http://www.iptables.org/
+W: http://www.nftables.org/
Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
-S: Supported
+S: Maintained
F: include/linux/netfilter*
F: include/linux/netfilter/
F: include/net/netfilter/
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
- /* Bits that cannot be altered from userland. */
- IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
- IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING),
-
/* Connection has fixed timeout. */
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+ /* Be careful here, modifying these bits can make things messy,
+ * so don't let users modify them directly.
+ */
+ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+ IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+ IPS_SEQ_ADJUST | IPS_TEMPLATE),
+
+ __IPS_MAX_BIT = 14,
};
/* Connection tracking event types */
*/
#include <linux/module.h>
#include <net/sock.h>
+#include "../br_private.h"
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nat_info *info = par->targinfo;
+ struct net_device *dev;
if (!skb_make_writable(skb, 0))
return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
+
+ if (is_multicast_ether_addr(info->mac)) {
+ if (is_broadcast_ether_addr(info->mac))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ if (xt_hooknum(par) != NF_BR_BROUTING)
+ dev = br_port_get_rcu(xt_in(par))->br->dev;
+ else
+ dev = xt_in(par);
+
+ if (ether_addr_equal(info->mac, dev->dev_addr))
+ skb->pkt_type = PACKET_HOST;
+ else
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+
return info->target;
}
inside->icmp6.icmp6_cksum =
csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
skb->len - hdrlen, IPPROTO_ICMPV6,
- csum_partial(&inside->icmp6,
+ skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}
return skb->len;
}
+static bool ip_vs_is_af_valid(int af)
+{
+ if (af == AF_INET)
+ return true;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6 && ipv6_mod_enabled())
+ return true;
+#endif
+ return false;
+}
+
static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
memset(usvc, 0, sizeof(*usvc));
usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
- if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
- if (usvc->af != AF_INET)
-#endif
+ if (!ip_vs_is_af_valid(usvc->af))
return -EAFNOSUPPORT;
if (nla_fwmark) {
if (udest.af == 0)
udest.af = svc->af;
+ if (!ip_vs_is_af_valid(udest.af)) {
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+
if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
/* The synchronization protocol is incompatible
* with mixed family services
struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
unsigned int h = helper_hash(&me->tuple);
struct nf_conntrack_helper *cur;
- int ret = 0;
+ int ret = 0, i;
BUG_ON(me->expect_policy == NULL);
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
return -EINVAL;
mutex_lock(&nf_ct_helper_mutex);
- hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
- if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
- ret = -EEXIST;
- goto out;
+ for (i = 0; i < nf_ct_helper_hsize; i++) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+ if (!strcmp(cur->name, me->name) &&
+ (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+ cur->tuple.src.l3num == me->tuple.src.l3num) &&
+ cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ }
+
+ /* avoid unpredictable behaviour for auto_assign_helper */
+ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+ if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+ &mask)) {
+ ret = -EEXIST;
+ goto out;
+ }
}
}
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
return -1;
}
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
- const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *seq;
if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
- return -1;
+ goto err;
seq = &seqadj->seq[IP_CT_DIR_REPLY];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
- return -1;
+ goto err;
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return -1;
}
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
}
#endif
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+ unsigned long off)
+{
+ unsigned int bit;
+
+ /* Ignore these unchangable bits */
+ on &= ~IPS_UNCHANGEABLE_MASK;
+ off &= ~IPS_UNCHANGEABLE_MASK;
+
+ for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+ if (on & (1 << bit))
+ set_bit(bit, &ct->status);
+ else if (off & (1 << bit))
+ clear_bit(bit, &ct->status);
+ }
+}
+
static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
/* ASSURED bit can only be set */
return -EBUSY;
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * nf_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+ __ctnetlink_change_status(ct, status, 0);
return 0;
}
return 0;
}
+ rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
-#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_expect_lock);
- return -EOPNOTSUPP;
- }
-
- spin_lock_bh(&nf_conntrack_expect_lock);
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper)
- return -EAGAIN;
-#endif
+ rcu_read_unlock();
return -EOPNOTSUPP;
}
/* update private helper data if allowed. */
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- return 0;
+ err = 0;
} else
- return -EBUSY;
+ err = -EBUSY;
+ } else {
+ /* we cannot set a helper for an existing conntrack */
+ err = -EOPNOTSUPP;
}
- /* we cannot set a helper for an existing conntrack */
- return -EOPNOTSUPP;
+ rcu_read_unlock();
+ return err;
}
static int ctnetlink_change_timeout(struct nf_conn *ct,
if (!seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
if (cda[CTA_SEQ_ADJ_ORIG]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
if (cda[CTA_SEQ_ADJ_REPLY]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return ret;
}
static int
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
/* This check is less strict than ctnetlink_change_status()
* because callers often flip IPS_EXPECTED bits when sending
* an NFQA_CT attribute to the kernel. So ignore the
- * unchangeable bits but do not error out.
+ * unchangeable bits but do not error out. Also user programs
+ * are allowed to clear the bits that they are allowed to change.
*/
- ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
- (ct->status & IPS_UNCHANGEABLE_MASK);
+ __ctnetlink_change_status(ct, status, ~status);
return 0;
}
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_expect_lock);
- ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- return ret;
+ return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
}
static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+ nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+ return -EBUSY;
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = jiffies + timeout;
- } else if (sexpr == NULL)
- goto out;
+ }
if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
regs->verdict.code = NFT_BREAK;
return;
}
-out:
+
if (!priv->invert)
regs->verdict.code = NFT_BREAK;
}
static void nft_bitmap_destroy(const struct nft_set *set)
{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *be, *n;
+
+ list_for_each_entry_safe(be, n, &priv->list, head)
+ nft_set_elem_destroy(set, be, true);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
list_for_each_entry(t, &init_net.xt.tables[af], list) {
if (strcmp(t->name, name))
continue;
- if (!try_module_get(t->me))
+ if (!try_module_get(t->me)) {
+ mutex_unlock(&xt[af].mutex);
return NULL;
+ }
mutex_unlock(&xt[af].mutex);
if (t->table_init(net) != 0) {
goto err_put_timeout;
}
timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
- if (timeout_ext == NULL)
+ if (!timeout_ext) {
ret = -ENOMEM;
+ goto err_put_timeout;
+ }
rcu_read_unlock();
return ret;
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_zone zone;
+ struct nf_conn_help *help;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
if (info->timeout[0]) {
ret = xt_ct_set_timeout(ct, par, info->timeout);
if (ret < 0)
- goto err3;
+ goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
info->ct = ct;
return 0;
+err4:
+ help = nfct_help(ct);
+ if (help)
+ module_put(help->helper->me);
err3:
nf_ct_tmpl_free(ct);
err2:
switch (family) {
case NFPROTO_IPV4:
return nf_defrag_ipv4_enable(net);
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
return nf_defrag_ipv6_enable(net);
#endif
u16 proto, const struct sk_buff *skb)
{
struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_expect *exp;
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
- return __nf_ct_expect_find(net, zone, &tuple);
+
+ exp = __nf_ct_expect_find(net, zone, &tuple);
+ if (exp) {
+ struct nf_conntrack_tuple_hash *h;
+
+ /* Delete existing conntrack entry, if it clashes with the
+ * expectation. This can happen since conntrack ALGs do not
+ * check for clashes between (new) expectations and existing
+ * conntrack entries. nf_conntrack_in() will check the
+ * expectations only if a conntrack entry can not be found,
+ * which can lead to OVS finding the expectation (here) in the
+ * init direction, but which will not be removed by the
+ * nf_conntrack_in() call, if a matching conntrack entry is
+ * found instead. In this case all init direction packets
+ * would be reported as new related packets, while reply
+ * direction packets would be reported as un-related
+ * established packets.
+ */
+ h = nf_conntrack_find_get(net, zone, &tuple);
+ if (h) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ nf_ct_delete(ct, 0, 0);
+ nf_conntrack_put(&ct->ct_general);
+ }
+ }
+
+ return exp;
}
/* This replicates logic from nf_conntrack_core.c that is not exported. */