]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
authorDavid S. Miller <davem@davemloft.net>
Tue, 19 Apr 2011 18:24:06 +0000 (11:24 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 19 Apr 2011 18:24:06 +0000 (11:24 -0700)
12 files changed:
include/linux/netfilter/ipset/ip_set_getport.h
include/linux/netfilter/x_tables.h
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv6/netfilter/ip6_tables.c
net/netfilter/ipset/ip_set_getport.c
net/netfilter/ipset/ip_set_hash_ipport.c
net/netfilter/ipset/ip_set_hash_ipportip.c
net/netfilter/ipset/ip_set_hash_ipportnet.c
net/netfilter/ipset/ip_set_hash_netport.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/x_tables.c

index 5aebd170f899f76b9f3231d3aaf5c1066c73870a..90d09300e9541381cffc5aee6a6d7b03e49e607f 100644 (file)
@@ -22,7 +22,9 @@ static inline bool ip_set_proto_with_ports(u8 proto)
 {
        switch (proto) {
        case IPPROTO_TCP:
+       case IPPROTO_SCTP:
        case IPPROTO_UDP:
+       case IPPROTO_UDPLITE:
                return true;
        }
        return false;
index 37219525ff6fbe7a5298f5586bd00c88f7024351..32cddf78b13e35907c9ee76ee555b86d45d76b34 100644 (file)
@@ -456,72 +456,60 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
 
-/*
- * Per-CPU spinlock associated with per-cpu table entries, and
- * with a counter for the "reading" side that allows a recursive
- * reader to avoid taking the lock and deadlocking.
- *
- * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
- * It needs to ensure that the rules are not being changed while the packet
- * is being processed. In some cases, the read lock will be acquired
- * twice on the same CPU; this is okay because of the count.
- *
- * "writing" is used when reading counters.
- *  During replace any readers that are using the old tables have to complete
- *  before freeing the old table. This is handled by the write locking
- *  necessary for reading the counters.
+/**
+ * xt_recseq - recursive seqcount for netfilter use
+ * 
+ * Packet processing changes the seqcount only if no recursion happened
+ * get_counters() can use read_seqcount_begin()/read_seqcount_retry(),
+ * because we use the normal seqcount convention :
+ * Low order bit set to 1 if a writer is active.
  */
-struct xt_info_lock {
-       seqlock_t lock;
-       unsigned char readers;
-};
-DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
+DECLARE_PER_CPU(seqcount_t, xt_recseq);
 
-/*
- * Note: we need to ensure that preemption is disabled before acquiring
- * the per-cpu-variable, so we do it as a two step process rather than
- * using "spin_lock_bh()".
- *
- * We _also_ need to disable bottom half processing before updating our
- * nesting count, to make sure that the only kind of re-entrancy is this
- * code being called by itself: since the count+lock is not an atomic
- * operation, we can allow no races.
+/**
+ * xt_write_recseq_begin - start of a write section
  *
- * _Only_ that special combination of being per-cpu and never getting
- * re-entered asynchronously means that the count is safe.
+ * Begin packet processing : all readers must wait the end
+ * 1) Must be called with preemption disabled
+ * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
+ * Returns :
+ *  1 if no recursion on this cpu
+ *  0 if recursion detected
  */
-static inline void xt_info_rdlock_bh(void)
+static inline unsigned int xt_write_recseq_begin(void)
 {
-       struct xt_info_lock *lock;
+       unsigned int addend;
 
-       local_bh_disable();
-       lock = &__get_cpu_var(xt_info_locks);
-       if (likely(!lock->readers++))
-               write_seqlock(&lock->lock);
-}
+       /*
+        * Low order bit of sequence is set if we already
+        * called xt_write_recseq_begin().
+        */
+       addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1;
 
-static inline void xt_info_rdunlock_bh(void)
-{
-       struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
+       /*
+        * This is kind of a write_seqcount_begin(), but addend is 0 or 1
+        * We dont check addend value to avoid a test and conditional jump,
+        * since addend is most likely 1
+        */
+       __this_cpu_add(xt_recseq.sequence, addend);
+       smp_wmb();
 
-       if (likely(!--lock->readers))
-               write_sequnlock(&lock->lock);
-       local_bh_enable();
+       return addend;
 }
 
-/*
- * The "writer" side needs to get exclusive access to the lock,
- * regardless of readers.  This must be called with bottom half
- * processing (and thus also preemption) disabled.
+/**
+ * xt_write_recseq_end - end of a write section
+ * @addend: return value from previous xt_write_recseq_begin()
+ *
+ * End packet processing : all readers can proceed
+ * 1) Must be called with preemption disabled
+ * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
  */
-static inline void xt_info_wrlock(unsigned int cpu)
-{
-       write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
-}
-
-static inline void xt_info_wrunlock(unsigned int cpu)
+static inline void xt_write_recseq_end(unsigned int addend)
 {
-       write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
+       /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
+       smp_wmb();
+       __this_cpu_add(xt_recseq.sequence, addend);
 }
 
 /*
index 89bc7e66d598ff3c1fbc414ee6d78558c8107edc..fd7a3f68917f1c53e4e8f6051eaa1e4ed728c2b3 100644 (file)
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
        void *table_base;
        const struct xt_table_info *private;
        struct xt_action_param acpar;
+       unsigned int addend;
 
        if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
                return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
        indev = in ? in->name : nulldevname;
        outdev = out ? out->name : nulldevname;
 
-       xt_info_rdlock_bh();
+       local_bh_disable();
+       addend = xt_write_recseq_begin();
        private = table->private;
        table_base = private->entries[smp_processor_id()];
 
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
                        /* Verdict */
                        break;
        } while (!acpar.hotdrop);
-       xt_info_rdunlock_bh();
+       xt_write_recseq_end(addend);
+       local_bh_enable();
 
        if (acpar.hotdrop)
                return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
        unsigned int i;
 
        for_each_possible_cpu(cpu) {
-               seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+               seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
                i = 0;
                xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
                        unsigned int start;
 
                        do {
-                               start = read_seqbegin(lock);
+                               start = read_seqcount_begin(s);
                                bcnt = iter->counters.bcnt;
                                pcnt = iter->counters.pcnt;
-                       } while (read_seqretry(lock, start));
+                       } while (read_seqcount_retry(s, start));
 
                        ADD_COUNTER(counters[i], bcnt, pcnt);
                        ++i;
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
        int ret = 0;
        void *loc_cpu_entry;
        struct arpt_entry *iter;
+       unsigned int addend;
 #ifdef CONFIG_COMPAT
        struct compat_xt_counters_info compat_tmp;
 
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
        /* Choose the copy that is on our node */
        curcpu = smp_processor_id();
        loc_cpu_entry = private->entries[curcpu];
-       xt_info_wrlock(curcpu);
+       addend = xt_write_recseq_begin();
        xt_entry_foreach(iter, loc_cpu_entry, private->size) {
                ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
                ++i;
        }
-       xt_info_wrunlock(curcpu);
+       xt_write_recseq_end(addend);
  unlock_up_free:
        local_bh_enable();
        xt_table_unlock(t);
index 70491502800937a70626ae0fbb1356b8679d51b4..7647438435030a4fa29ba98539ec423921086b16 100644 (file)
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
 }
 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
 
-/*
-   We keep a set of rules for each CPU, so we can avoid write-locking
-   them in the softirq when updating the counters and therefore
-   only need to read-lock in the softirq; doing a write_lock_bh() in user
-   context stops packets coming through and allows user context to read
-   the counters or update the rules.
-
-   Hence the start of any table is given by get_table() below.  */
-
 /* Returns whether matches rule or not. */
 /* Performance critical - called for every packet */
 static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
        unsigned int *stackptr, origptr, cpu;
        const struct xt_table_info *private;
        struct xt_action_param acpar;
+       unsigned int addend;
 
        /* Initialization */
        ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
        acpar.hooknum = hook;
 
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-       xt_info_rdlock_bh();
+       local_bh_disable();
+       addend = xt_write_recseq_begin();
        private = table->private;
        cpu        = smp_processor_id();
        table_base = private->entries[cpu];
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
        pr_debug("Exiting %s; resetting sp from %u to %u\n",
                 __func__, *stackptr, origptr);
        *stackptr = origptr;
-       xt_info_rdunlock_bh();
+       xt_write_recseq_end(addend);
+       local_bh_enable();
+
 #ifdef DEBUG_ALLOW_ALL
        return NF_ACCEPT;
 #else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
        unsigned int i;
 
        for_each_possible_cpu(cpu) {
-               seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+               seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
                i = 0;
                xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
                        unsigned int start;
 
                        do {
-                               start = read_seqbegin(lock);
+                               start = read_seqcount_begin(s);
                                bcnt = iter->counters.bcnt;
                                pcnt = iter->counters.pcnt;
-                       } while (read_seqretry(lock, start));
+                       } while (read_seqcount_retry(s, start));
 
                        ADD_COUNTER(counters[i], bcnt, pcnt);
                        ++i; /* macro does multi eval of i */
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
        int ret = 0;
        void *loc_cpu_entry;
        struct ipt_entry *iter;
+       unsigned int addend;
 #ifdef CONFIG_COMPAT
        struct compat_xt_counters_info compat_tmp;
 
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
        /* Choose the copy that is on our node */
        curcpu = smp_processor_id();
        loc_cpu_entry = private->entries[curcpu];
-       xt_info_wrlock(curcpu);
+       addend = xt_write_recseq_begin();
        xt_entry_foreach(iter, loc_cpu_entry, private->size) {
                ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
                ++i;
        }
-       xt_info_wrunlock(curcpu);
+       xt_write_recseq_end(addend);
  unlock_up_free:
        local_bh_enable();
        xt_table_unlock(t);
index 4c1492ff473c7869cad0908496ac038201788337..94874b0bdcdcf9835fe0b0b53fd088c12db93305 100644 (file)
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
        unsigned int *stackptr, origptr, cpu;
        const struct xt_table_info *private;
        struct xt_action_param acpar;
+       unsigned int addend;
 
        /* Initialization */
        indev = in ? in->name : nulldevname;
@@ -358,7 +359,8 @@ ip6t_do_table(struct sk_buff *skb,
 
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
-       xt_info_rdlock_bh();
+       local_bh_disable();
+       addend = xt_write_recseq_begin();
        private = table->private;
        cpu        = smp_processor_id();
        table_base = private->entries[cpu];
@@ -442,7 +444,9 @@ ip6t_do_table(struct sk_buff *skb,
        } while (!acpar.hotdrop);
 
        *stackptr = origptr;
-       xt_info_rdunlock_bh();
+
+       xt_write_recseq_end(addend);
+       local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
        return NF_ACCEPT;
@@ -899,7 +903,7 @@ get_counters(const struct xt_table_info *t,
        unsigned int i;
 
        for_each_possible_cpu(cpu) {
-               seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+               seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
                i = 0;
                xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -907,10 +911,10 @@ get_counters(const struct xt_table_info *t,
                        unsigned int start;
 
                        do {
-                               start = read_seqbegin(lock);
+                               start = read_seqcount_begin(s);
                                bcnt = iter->counters.bcnt;
                                pcnt = iter->counters.pcnt;
-                       } while (read_seqretry(lock, start));
+                       } while (read_seqcount_retry(s, start));
 
                        ADD_COUNTER(counters[i], bcnt, pcnt);
                        ++i;
@@ -1325,6 +1329,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
        int ret = 0;
        const void *loc_cpu_entry;
        struct ip6t_entry *iter;
+       unsigned int addend;
 #ifdef CONFIG_COMPAT
        struct compat_xt_counters_info compat_tmp;
 
@@ -1381,13 +1386,13 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
        i = 0;
        /* Choose the copy that is on our node */
        curcpu = smp_processor_id();
-       xt_info_wrlock(curcpu);
+       addend = xt_write_recseq_begin();
        loc_cpu_entry = private->entries[curcpu];
        xt_entry_foreach(iter, loc_cpu_entry, private->size) {
                ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
                ++i;
        }
-       xt_info_wrunlock(curcpu);
+       xt_write_recseq_end(addend);
 
  unlock_up_free:
        local_bh_enable();
index 8d52272126867c5637fe644158338a63f98216b6..757143b2240af36395e71f6561e3809f3394e688 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
+#include <linux/sctp.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -35,7 +36,20 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
                *port = src ? th->source : th->dest;
                break;
        }
-       case IPPROTO_UDP: {
+       case IPPROTO_SCTP: {
+               sctp_sctphdr_t _sh;
+               const sctp_sctphdr_t *sh;
+
+               sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
+               if (sh == NULL)
+                       /* No choice either */
+                       return false;
+
+               *port = src ? sh->source : sh->dest;
+               break;
+       }
+       case IPPROTO_UDP:
+       case IPPROTO_UDPLITE: {
                struct udphdr _udph;
                const struct udphdr *uh;
 
index b9214145d357ff7b4cfd8731f62f21970d7c8463..14281b6b8074142aab9e300b408fc7f4b9b86ae9 100644 (file)
@@ -491,7 +491,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
        .features       = IPSET_TYPE_IP | IPSET_TYPE_PORT,
        .dimension      = IPSET_DIM_TWO,
        .family         = AF_UNSPEC,
-       .revision       = 0,
+       .revision       = 1,
        .create         = hash_ipport_create,
        .create_policy  = {
                [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
index 4642872df6e131818a9486ad99aa215a0b997fc8..401c8a2531dbf471677ba4cb13923fda61e6d7cd 100644 (file)
@@ -509,7 +509,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
        .features       = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
        .dimension      = IPSET_DIM_THREE,
        .family         = AF_UNSPEC,
-       .revision       = 0,
+       .revision       = 1,
        .create         = hash_ipportip_create,
        .create_policy  = {
                [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
index 2cb84a54b7adbc9d651ed4f8117ad9119419de08..4743e5402522fb6793c2022ade4bed020b240295 100644 (file)
@@ -574,7 +574,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
        .features       = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
        .dimension      = IPSET_DIM_THREE,
        .family         = AF_UNSPEC,
-       .revision       = 0,
+       .revision       = 1,
        .create         = hash_ipportnet_create,
        .create_policy  = {
                [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
index 8598676f2a053724f523d800410f72857700f305..d2a40362dd3aadb6510ff1603a42e132eeeb5c7a 100644 (file)
@@ -526,7 +526,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
        .features       = IPSET_TYPE_IP | IPSET_TYPE_PORT,
        .dimension      = IPSET_DIM_TWO,
        .family         = AF_UNSPEC,
-       .revision       = 0,
+       .revision       = 1,
        .create         = hash_netport_create,
        .create_policy  = {
                [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
index ae47090bf45fe1fdfbf99192950aacb951cb1af1..9930f340908a6a25ed7830fb2612958b3dcb856e 100644 (file)
@@ -1984,9 +1984,6 @@ static const struct file_operations ip_vs_info_fops = {
        .release = seq_release_private,
 };
 
-#endif
-
-#ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
        struct net *net = seq_file_single_net(seq);
index a9adf4c6b299ee35a72c4617e0d3e565031de940..52959efca858318ccfc2dcdf26d02e47b7956d60 100644 (file)
@@ -762,8 +762,8 @@ void xt_compat_unlock(u_int8_t af)
 EXPORT_SYMBOL_GPL(xt_compat_unlock);
 #endif
 
-DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
 
 static int xt_jumpstack_alloc(struct xt_table_info *i)
 {
@@ -1362,10 +1362,7 @@ static int __init xt_init(void)
        int rv;
 
        for_each_possible_cpu(i) {
-               struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-
-               seqlock_init(&lock->lock);
-               lock->readers = 0;
+               seqcount_init(&per_cpu(xt_recseq, i));
        }
 
        xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);