Merge remote-tracking branch 'rcu/rcu/next'

author Stephen Rothwell <sfr@canb.auug.org.au>

Tue, 13 Sep 2011 04:55:35 +0000 (14:55 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Tue, 13 Sep 2011 04:55:35 +0000 (14:55 +1000)
author Stephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2011 04:55:35 +0000 (14:55 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Sep 2011 04:55:35 +0000 (14:55 +1000)
diff --combined arch/powerpc/platforms/pseries/lpar.c

index c9a29dae8c0538359e60a3a377484277770c1bc3,668f30060b9fc3479ab3bb3efdd1bd9810f04503..83abd09b856ceea228ed9755f1a366ccfc2b007a
--- 1/arch/powerpc/platforms/pseries/lpar.c
--- 2/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@@ -52,6 -52,197 +52,6 @@@ EXPORT_SYMBOL(plpar_hcall_norets)
   
   extern void pSeries_find_serial_port(void);
   
- -
- -static int vtermno;   /* virtual terminal# for udbg  */
- -
- -#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
- -static void udbg_hvsi_putc(char c)
- -{
- -      /* packet's seqno isn't used anyways */
- -      uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c };
- -      int rc;
- -
- -      if (c == '\n')
- -              udbg_hvsi_putc('\r');
- -
- -      do {
- -              rc = plpar_put_term_char(vtermno, sizeof(packet), packet);
- -      } while (rc == H_BUSY);
- -}
- -
- -static long hvsi_udbg_buf_len;
- -static uint8_t hvsi_udbg_buf[256];
- -
- -static int udbg_hvsi_getc_poll(void)
- -{
- -      unsigned char ch;
- -      int rc, i;
- -
- -      if (hvsi_udbg_buf_len == 0) {
- -              rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf);
- -              if (rc != H_SUCCESS || hvsi_udbg_buf[0] != 0xff) {
- -                      /* bad read or non-data packet */
- -                      hvsi_udbg_buf_len = 0;
- -              } else {
- -                      /* remove the packet header */
- -                      for (i = 4; i < hvsi_udbg_buf_len; i++)
- -                              hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i];
- -                      hvsi_udbg_buf_len -= 4;
- -              }
- -      }
- -
- -      if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) {
- -              /* no data ready */
- -              hvsi_udbg_buf_len = 0;
- -              return -1;
- -      }
- -
- -      ch = hvsi_udbg_buf[0];
- -      /* shift remaining data down */
- -      for (i = 1; i < hvsi_udbg_buf_len; i++) {
- -              hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i];
- -      }
- -      hvsi_udbg_buf_len--;
- -
- -      return ch;
- -}
- -
- -static int udbg_hvsi_getc(void)
- -{
- -      int ch;
- -      for (;;) {
- -              ch = udbg_hvsi_getc_poll();
- -              if (ch == -1) {
- -                      /* This shouldn't be needed...but... */
- -                      volatile unsigned long delay;
- -                      for (delay=0; delay < 2000000; delay++)
- -                              ;
- -              } else {
- -                      return ch;
- -              }
- -      }
- -}
- -
- -static void udbg_putcLP(char c)
- -{
- -      char buf[16];
- -      unsigned long rc;
- -
- -      if (c == '\n')
- -              udbg_putcLP('\r');
- -
- -      buf[0] = c;
- -      do {
- -              rc = plpar_put_term_char(vtermno, 1, buf);
- -      } while(rc == H_BUSY);
- -}
- -
- -/* Buffered chars getc */
- -static long inbuflen;
- -static long inbuf[2]; /* must be 2 longs */
- -
- -static int udbg_getc_pollLP(void)
- -{
- -      /* The interface is tricky because it may return up to 16 chars.
- -       * We save them statically for future calls to udbg_getc().
- -       */
- -      char ch, *buf = (char *)inbuf;
- -      int i;
- -      long rc;
- -      if (inbuflen == 0) {
- -              /* get some more chars. */
- -              inbuflen = 0;
- -              rc = plpar_get_term_char(vtermno, &inbuflen, buf);
- -              if (rc != H_SUCCESS)
- -                      inbuflen = 0;   /* otherwise inbuflen is garbage */
- -      }
- -      if (inbuflen <= 0 || inbuflen > 16) {
- -              /* Catch error case as well as other oddities (corruption) */
- -              inbuflen = 0;
- -              return -1;
- -      }
- -      ch = buf[0];
- -      for (i = 1; i < inbuflen; i++)  /* shuffle them down. */
- -              buf[i-1] = buf[i];
- -      inbuflen--;
- -      return ch;
- -}
- -
- -static int udbg_getcLP(void)
- -{
- -      int ch;
- -      for (;;) {
- -              ch = udbg_getc_pollLP();
- -              if (ch == -1) {
- -                      /* This shouldn't be needed...but... */
- -                      volatile unsigned long delay;
- -                      for (delay=0; delay < 2000000; delay++)
- -                              ;
- -              } else {
- -                      return ch;
- -              }
- -      }
- -}
- -
- -/* call this from early_init() for a working debug console on
- - * vterm capable LPAR machines
- - */
- -void __init udbg_init_debug_lpar(void)
- -{
- -      vtermno = 0;
- -      udbg_putc = udbg_putcLP;
- -      udbg_getc = udbg_getcLP;
- -      udbg_getc_poll = udbg_getc_pollLP;
- -
- -      register_early_udbg_console();
- -}
- -
- -/* returns 0 if couldn't find or use /chosen/stdout as console */
- -void __init find_udbg_vterm(void)
- -{
- -      struct device_node *stdout_node;
- -      const u32 *termno;
- -      const char *name;
- -
- -      /* find the boot console from /chosen/stdout */
- -      if (!of_chosen)
- -              return;
- -      name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- -      if (name == NULL)
- -              return;
- -      stdout_node = of_find_node_by_path(name);
- -      if (!stdout_node)
- -              return;
- -      name = of_get_property(stdout_node, "name", NULL);
- -      if (!name) {
- -              printk(KERN_WARNING "stdout node missing 'name' property!\n");
- -              goto out;
- -      }
- -
- -      /* Check if it's a virtual terminal */
- -      if (strncmp(name, "vty", 3) != 0)
- -              goto out;
- -      termno = of_get_property(stdout_node, "reg", NULL);
- -      if (termno == NULL)
- -              goto out;
- -      vtermno = termno[0];
- -
- -      if (of_device_is_compatible(stdout_node, "hvterm1")) {
- -              udbg_putc = udbg_putcLP;
- -              udbg_getc = udbg_getcLP;
- -              udbg_getc_poll = udbg_getc_pollLP;
- -              add_preferred_console("hvc", termno[0] & 0xff, NULL);
- -      } else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
- -              vtermno = termno[0];
- -              udbg_putc = udbg_hvsi_putc;
- -              udbg_getc = udbg_hvsi_getc;
- -              udbg_getc_poll = udbg_hvsi_getc_poll;
- -              add_preferred_console("hvsi", termno[0] & 0xff, NULL);
- -      }
- -out:
- -      of_node_put(stdout_node);
- -}
- -
   void vpa_init(int cpu)
   {
         int hwcpu = get_hard_smp_processor_id(cpu);
@@@ -67,8 -258,9 +67,8 @@@
         ret = register_vpa(hwcpu, addr);
   
         if (ret) {
- -              printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
- -                              "cpu %d (hw %d) of area %lx returns %ld\n",
- -                              cpu, hwcpu, addr, ret);
+ +              pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
+ +                     "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
                 return;
         }
         /*
@@@ -79,9 -271,10 +79,9 @@@
         if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
                 ret = register_slb_shadow(hwcpu, addr);
                 if (ret)
- -                      printk(KERN_ERR
- -                             "WARNING: vpa_init: SLB shadow buffer "
- -                             "registration for cpu %d (hw %d) of area %lx "
- -                             "returns %ld\n", cpu, hwcpu, addr, ret);
+ +                      pr_err("WARNING: SLB shadow buffer registration for "
+ +                             "cpu %d (hw %d) of area %lx failed with %ld\n",
+ +                             cpu, hwcpu, addr, ret);
         }
   
         /*
@@@ -98,9 -291,8 +98,9 @@@
                 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
                 ret = register_dtl(hwcpu, __pa(dtl));
                 if (ret)
- -                      pr_warn("DTL registration failed for cpu %d (%ld)\n",
- -                              cpu, ret);
+ +                      pr_err("WARNING: DTL registration of cpu %d (hw %d) "
+ +                             "failed with %ld\n", smp_processor_id(),
+ +                             hwcpu, ret);
                 lppaca_of(cpu).dtl_enable_mask = 2;
         }
   }
@@@ -203,7 -395,7 +203,7 @@@ static void pSeries_lpar_hptab_clear(vo
                 unsigned long ptel;
         } ptes[4];
         long lpar_rc;
- -      int i, j;
+ +      unsigned long i, j;
   
         /* Read in batches of 4,
          * invalidate only valid entries not in the VRMA
@@@ -523,12 -715,14 +523,14 @@@ EXPORT_SYMBOL(arch_free_page)
   /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
   extern long hcall_tracepoint_refcount;
   
+ #if 0 /* work around buggy use of RCU from dyntick-idle mode */
   /* 
    * Since the tracing code might execute hcalls we need to guard against
    * recursion. One example of this are spinlocks calling H_YIELD on
    * shared processor partitions.
    */
   static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+ #endif /* #if 0 work around buggy use of RCU from dyntick-idle mode */
   
   void hcall_tracepoint_regfunc(void)
   {
@@@ -542,6 -736,7 +544,7 @@@ void hcall_tracepoint_unregfunc(void
   
   void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
   {
+ #if 0 /* work around buggy use of RCU from dyntick-idle mode */
         unsigned long flags;
         unsigned int *depth;
   
@@@ -558,11 -753,13 +561,13 @@@
   
   out:
         local_irq_restore(flags);
+ #endif /* #if 0 work around buggy use of RCU from dyntick-idle mode */
   }
   
   void __trace_hcall_exit(long opcode, unsigned long retval,
                         unsigned long *retbuf)
   {
+ #if 0 /* work around buggy use of RCU from dyntick-idle mode */
         unsigned long flags;
         unsigned int *depth;
   
@@@ -579,6 -776,7 +584,7 @@@
   
   out:
         local_irq_restore(flags);
+ #endif /* #if 0 work around buggy use of RCU from dyntick-idle mode */
   }
   #endif
   
diff --combined include/linux/rcupdate.h

index 8f4f881a0ad8153dd82150745aee03881448a3b0,9d40e425d46c8206019d57566bcb49c7b2406d97..ea9bc708ccef2a6e5448c52cb372a56f4f7160f2
--- 1/include/linux/rcupdate.h
--- 2/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@@ -33,6 -33,7 +33,7 @@@
   #ifndef __LINUX_RCUPDATE_H
   #define __LINUX_RCUPDATE_H
   
+ #include <linux/types.h>
   #include <linux/cache.h>
   #include <linux/spinlock.h>
   #include <linux/threads.h>
@@@ -64,32 -65,74 +65,74 @@@ static inline void rcutorture_record_pr
   #define ULONG_CMP_GE(a, b)    (ULONG_MAX / 2 >= (a) - (b))
   #define ULONG_CMP_LT(a, b)    (ULONG_MAX / 2 < (a) - (b))
   
+ /* Exported common interfaces */
+ 
+ #ifdef CONFIG_PREEMPT_RCU
+ 
   /**
-  * struct rcu_head - callback structure for use with RCU
-  * @next: next update requests in a list
-  * @func: actual update function to call after the grace period.
+  * call_rcu() - Queue an RCU callback for invocation after a grace period.
+  * @head: structure to be used for queueing the RCU updates.
+  * @func: actual callback function to be invoked after the grace period
+  *
+  * The callback function will be invoked some time after a full grace
+  * period elapses, in other words after all pre-existing RCU read-side
+  * critical sections have completed.  However, the callback function
+  * might well execute concurrently with RCU read-side critical sections
+  * that started after call_rcu() was invoked.  RCU read-side critical
+  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+  * and may be nested.
    */
- struct rcu_head {
-       struct rcu_head *next;
-       void (*func)(struct rcu_head *head);
- };
+ extern void call_rcu(struct rcu_head *head,
+                             void (*func)(struct rcu_head *head));
   
- /* Exported common interfaces */
+ #else /* #ifdef CONFIG_PREEMPT_RCU */
+ 
+ /* In classic RCU, call_rcu() is just call_rcu_sched(). */
+ #define       call_rcu        call_rcu_sched
+ 
+ #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+ 
+ /**
+  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+  * @head: structure to be used for queueing the RCU updates.
+  * @func: actual callback function to be invoked after the grace period
+  *
+  * The callback function will be invoked some time after a full grace
+  * period elapses, in other words after all currently executing RCU
+  * read-side critical sections have completed. call_rcu_bh() assumes
+  * that the read-side critical sections end on completion of a softirq
+  * handler. This means that read-side critical sections in process
+  * context must not be interrupted by softirqs. This interface is to be
+  * used when most of the read-side critical sections are in softirq context.
+  * RCU read-side critical sections are delimited by :
+  *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
+  *  OR
+  *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+  *  These may be nested.
+  */
+ extern void call_rcu_bh(struct rcu_head *head,
+                       void (*func)(struct rcu_head *head));
+ 
+ /**
+  * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+  * @head: structure to be used for queueing the RCU updates.
+  * @func: actual callback function to be invoked after the grace period
+  *
+  * The callback function will be invoked some time after a full grace
+  * period elapses, in other words after all currently executing RCU
+  * read-side critical sections have completed. call_rcu_sched() assumes
+  * that the read-side critical sections end on enabling of preemption
+  * or on voluntary preemption.
+  * RCU read-side critical sections are delimited by :
+  *  - rcu_read_lock_sched() and  rcu_read_unlock_sched(),
+  *  OR
+  *  anything that disables preemption.
+  *  These may be nested.
+  */
   extern void call_rcu_sched(struct rcu_head *head,
                            void (*func)(struct rcu_head *rcu));
- extern void synchronize_sched(void);
- extern void rcu_barrier_bh(void);
- extern void rcu_barrier_sched(void);
   
- static inline void __rcu_read_lock_bh(void)
- {
-       local_bh_disable();
- }
- 
- static inline void __rcu_read_unlock_bh(void)
- {
-       local_bh_enable();
- }
+ extern void synchronize_sched(void);
   
   #ifdef CONFIG_PREEMPT_RCU
   
@@@ -152,6 -195,15 +195,15 @@@ static inline void rcu_exit_nohz(void
   
   #endif /* #else #ifdef CONFIG_NO_HZ */
   
+ /*
+  * Infrastructure to implement the synchronize_() primitives in
+  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
+  */
+ 
+ typedef void call_rcu_func_t(struct rcu_head *head,
+                            void (*func)(struct rcu_head *head));
+ void wait_rcu_gp(call_rcu_func_t crf);
+ 
   #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
   #include <linux/rcutree.h>
   #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
@@@ -179,24 -231,33 +231,33 @@@ static inline void destroy_rcu_head_on_
   }
   #endif        /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
   
+ 
+ #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_NO_HZ)
+ extern bool rcu_check_extended_qs(void);
+ #else
+ static inline bool rcu_check_extended_qs(void) { return false; }
+ #endif
+ 
+ 
   #ifdef CONFIG_DEBUG_LOCK_ALLOC
   
- extern struct lockdep_map rcu_lock_map;
- # define rcu_read_acquire() \
-               lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
- # define rcu_read_release()   lock_release(&rcu_lock_map, 1, _THIS_IP_)
+ #define PROVE_RCU(a) a
   
- extern struct lockdep_map rcu_bh_lock_map;
- # define rcu_read_acquire_bh() \
-               lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
- # define rcu_read_release_bh()        lock_release(&rcu_bh_lock_map, 1, _THIS_IP_)
+ static inline void rcu_lock_acquire(struct lockdep_map *map)
+ {
+       WARN_ON_ONCE(rcu_check_extended_qs());
+       lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
+ }
   
- extern struct lockdep_map rcu_sched_lock_map;
- # define rcu_read_acquire_sched() \
-               lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
- # define rcu_read_release_sched() \
-               lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+ static inline void rcu_lock_release(struct lockdep_map *map)
+ {
+       WARN_ON_ONCE(rcu_check_extended_qs());
+       lock_release(map, 1, _THIS_IP_);
+ }
   
+ extern struct lockdep_map rcu_lock_map;
+ extern struct lockdep_map rcu_bh_lock_map;
+ extern struct lockdep_map rcu_sched_lock_map;
   extern int debug_lockdep_rcu_enabled(void);
   
   /**
@@@ -210,11 -271,25 +271,25 @@@
    *
    * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
    * and while lockdep is disabled.
+  *
+  * Note that if the CPU is in an extended quiescent state, for example,
+  * if the CPU is in dyntick-idle mode, then rcu_read_lock_held() returns
+  * false even if the CPU did an rcu_read_lock().  The reason for this is
+  * that RCU ignores CPUs that are in extended quiescent states, so such
+  * a CPU is effectively never in an RCU read-side critical section
+  * regardless of what RCU primitives it invokes.  This state of affairs
+  * is required -- RCU would otherwise need to periodically wake up
+  * dyntick-idle CPUs, which would defeat the whole purpose of dyntick-idle
+  * mode.
    */
   static inline int rcu_read_lock_held(void)
   {
         if (!debug_lockdep_rcu_enabled())
                 return 1;
+ 
+       if (rcu_check_extended_qs())
+               return 0;
+ 
         return lock_is_held(&rcu_lock_map);
   }
   
@@@ -238,33 -313,44 +313,44 @@@ extern int rcu_read_lock_bh_held(void)
    *
    * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
    * and while lockdep is disabled.
+  *
+  * Note that if the CPU is in an extended quiescent state, for example,
+  * if the CPU is in dyntick-idle mode, then rcu_read_lock_held() returns
+  * false even if the CPU did an rcu_read_lock().  The reason for this is
+  * that RCU ignores CPUs that are in extended quiescent states, so such
+  * a CPU is effectively never in an RCU read-side critical section
+  * regardless of what RCU primitives it invokes.  This state of affairs
+  * is required -- RCU would otherwise need to periodically wake up
+  * dyntick-idle CPUs, which would defeat the whole purpose of dyntick-idle
+  * mode.
    */
- -#ifdef CONFIG_PREEMPT
+ +#ifdef CONFIG_PREEMPT_COUNT
   static inline int rcu_read_lock_sched_held(void)
   {
         int lockdep_opinion = 0;
   
         if (!debug_lockdep_rcu_enabled())
                 return 1;
+ 
+       if (rcu_check_extended_qs())
+               return 0;
+ 
         if (debug_locks)
                 lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
         return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
   }
- -#else /* #ifdef CONFIG_PREEMPT */
+ +#else /* #ifdef CONFIG_PREEMPT_COUNT */
   static inline int rcu_read_lock_sched_held(void)
   {
         return 1;
   }
- -#endif /* #else #ifdef CONFIG_PREEMPT */
+ +#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
   
   #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
   
- # define rcu_read_acquire()           do { } while (0)
- # define rcu_read_release()           do { } while (0)
- # define rcu_read_acquire_bh()                do { } while (0)
- # define rcu_read_release_bh()                do { } while (0)
- # define rcu_read_acquire_sched()     do { } while (0)
- # define rcu_read_release_sched()     do { } while (0)
+ # define PROVE_RCU(a)                 do { } while (0)
+ # define rcu_lock_acquire(a)          do { } while (0)
+ # define rcu_lock_release(a)          do { } while (0)
   
   static inline int rcu_read_lock_held(void)
   {
@@@ -276,17 -362,17 +362,17 @@@ static inline int rcu_read_lock_bh_held
         return 1;
   }
   
- -#ifdef CONFIG_PREEMPT
+ +#ifdef CONFIG_PREEMPT_COUNT
   static inline int rcu_read_lock_sched_held(void)
   {
         return preempt_count() != 0 || irqs_disabled();
   }
- -#else /* #ifdef CONFIG_PREEMPT */
+ +#else /* #ifdef CONFIG_PREEMPT_COUNT */
   static inline int rcu_read_lock_sched_held(void)
   {
         return 1;
   }
- -#endif /* #else #ifdef CONFIG_PREEMPT */
+ +#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
   
   #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
   
@@@ -297,19 -383,31 +383,31 @@@ extern int rcu_my_thread_group_empty(vo
   /**
    * rcu_lockdep_assert - emit lockdep splat if specified condition not met
    * @c: condition to check
+  * @s: informative message
    */
- #define rcu_lockdep_assert(c)                                         \
+ #define rcu_lockdep_assert(c, s)                                      \
         do {                                                            \
                 static bool __warned;                                   \
                 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
                         __warned = true;                                \
-                       lockdep_rcu_dereference(__FILE__, __LINE__);    \
+                       lockdep_rcu_suspicious(__FILE__, __LINE__, s);  \
                 }                                                       \
         } while (0)
   
+ #define rcu_sleep_check()                                             \
+       do {                                                            \
+               rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),     \
+                                  "Illegal context switch in RCU-bh"   \
+                                  " read-side critical section");      \
+               rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),  \
+                                  "Illegal context switch in RCU-sched"\
+                                  " read-side critical section");      \
+       } while (0)
+ 
   #else /* #ifdef CONFIG_PROVE_RCU */
   
- #define rcu_lockdep_assert(c) do { } while (0)
+ #define rcu_lockdep_assert(c, s) do { } while (0)
+ #define rcu_sleep_check() do { } while (0)
   
   #endif /* #else #ifdef CONFIG_PROVE_RCU */
   
@@@ -338,14 -436,16 +436,16 @@@
   #define __rcu_dereference_check(p, c, space) \
         ({ \
                 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
-               rcu_lockdep_assert(c); \
+               rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \
+                                     " usage"); \
                 rcu_dereference_sparse(p, space); \
                 smp_read_barrier_depends(); \
                 ((typeof(*p) __force __kernel *)(_________p1)); \
         })
   #define __rcu_dereference_protected(p, c, space) \
         ({ \
-               rcu_lockdep_assert(c); \
+               rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \
+                                     " usage"); \
                 rcu_dereference_sparse(p, space); \
                 ((typeof(*p) __force __kernel *)(p)); \
         })
@@@ -359,15 -459,15 +459,15 @@@
   #define __rcu_dereference_index_check(p, c) \
         ({ \
                 typeof(p) _________p1 = ACCESS_ONCE(p); \
-               rcu_lockdep_assert(c); \
+               rcu_lockdep_assert(c, \
+                                  "suspicious rcu_dereference_index_check()" \
+                                  " usage"); \
                 smp_read_barrier_depends(); \
                 (_________p1); \
         })
   #define __rcu_assign_pointer(p, v, space) \
         ({ \
-               if (!__builtin_constant_p(v) || \
-                   ((v) != NULL)) \
-                       smp_wmb(); \
+               smp_wmb(); \
                 (p) = (typeof(*v) __force space *)(v); \
         })
   
@@@ -500,26 -600,6 +600,6 @@@
   #define rcu_dereference_protected(p, c) \
         __rcu_dereference_protected((p), (c), __rcu)
   
- /**
-  * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
-  * @p: The pointer to read, prior to dereferencing
-  * @c: The conditions under which the dereference will take place
-  *
-  * This is the RCU-bh counterpart to rcu_dereference_protected().
-  */
- #define rcu_dereference_bh_protected(p, c) \
-       __rcu_dereference_protected((p), (c), __rcu)
- 
- /**
-  * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
-  * @p: The pointer to read, prior to dereferencing
-  * @c: The conditions under which the dereference will take place
-  *
-  * This is the RCU-sched counterpart to rcu_dereference_protected().
-  */
- #define rcu_dereference_sched_protected(p, c) \
-       __rcu_dereference_protected((p), (c), __rcu)
- 
   
   /**
    * rcu_dereference() - fetch RCU-protected pointer for dereferencing
@@@ -591,7 -671,7 +671,7 @@@ static inline void rcu_read_lock(void
   {
         __rcu_read_lock();
         __acquire(RCU);
-       rcu_read_acquire();
+       rcu_lock_acquire(&rcu_lock_map);
   }
   
   /*
@@@ -611,7 -691,7 +691,7 @@@
    */
   static inline void rcu_read_unlock(void)
   {
-       rcu_read_release();
+       rcu_lock_release(&rcu_lock_map);
         __release(RCU);
         __rcu_read_unlock();
   }
@@@ -630,9 -710,9 +710,9 @@@
    */
   static inline void rcu_read_lock_bh(void)
   {
-       __rcu_read_lock_bh();
+       local_bh_disable();
         __acquire(RCU_BH);
-       rcu_read_acquire_bh();
+       rcu_lock_acquire(&rcu_bh_lock_map);
   }
   
   /*
@@@ -642,9 -722,9 +722,9 @@@
    */
   static inline void rcu_read_unlock_bh(void)
   {
-       rcu_read_release_bh();
+       rcu_lock_release(&rcu_bh_lock_map);
         __release(RCU_BH);
-       __rcu_read_unlock_bh();
+       local_bh_enable();
   }
   
   /**
@@@ -659,7 -739,7 +739,7 @@@ static inline void rcu_read_lock_sched(
   {
         preempt_disable();
         __acquire(RCU_SCHED);
-       rcu_read_acquire_sched();
+       rcu_lock_acquire(&rcu_sched_lock_map);
   }
   
   /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@@ -676,7 -756,7 +756,7 @@@ static inline notrace void rcu_read_loc
    */
   static inline void rcu_read_unlock_sched(void)
   {
-       rcu_read_release_sched();
+       rcu_lock_release(&rcu_sched_lock_map);
         __release(RCU_SCHED);
         preempt_enable();
   }
@@@ -698,11 -778,18 +778,18 @@@ static inline notrace void rcu_read_unl
    * any prior initialization.  Returns the value assigned.
    *
    * Inserts memory barriers on architectures that require them
-  * (pretty much all of them other than x86), and also prevents
-  * the compiler from reordering the code that initializes the
-  * structure after the pointer assignment.  More importantly, this
-  * call documents which pointers will be dereferenced by RCU read-side
-  * code.
+  * (which is most of them), and also prevents the compiler from
+  * reordering the code that initializes the structure after the pointer
+  * assignment.  More importantly, this call documents which pointers
+  * will be dereferenced by RCU read-side code.
+  *
+  * In some special cases, you may use RCU_INIT_POINTER() instead
+  * of rcu_assign_pointer().  RCU_INIT_POINTER() is a bit faster due
+  * to the fact that it does not constrain either the CPU or the compiler.
+  * That said, using RCU_INIT_POINTER() when you should have used
+  * rcu_assign_pointer() is a very bad thing that results in
+  * impossible-to-diagnose memory corruption.  So please be careful.
+  * See the RCU_INIT_POINTER() comment header for details.
    */
   #define rcu_assign_pointer(p, v) \
         __rcu_assign_pointer((p), (v), __rcu)
@@@ -710,105 -797,38 +797,38 @@@
   /**
    * RCU_INIT_POINTER() - initialize an RCU protected pointer
    *
-  * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
-  * splats.
+  * Initialize an RCU-protected pointer in special cases where readers
+  * do not need ordering constraints on the CPU or the compiler.  These
+  * special cases are:
+  *
+  * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or-
+  * 2. The caller has taken whatever steps are required to prevent
+  *    RCU readers from concurrently accessing this pointer -or-
+  * 3. The referenced data structure has already been exposed to
+  *    readers either at compile time or via rcu_assign_pointer() -and-
+  *    a.      You have not made -any- reader-visible changes to
+  *            this structure since then -or-
+  *    b.      It is OK for readers accessing this structure from its
+  *            new location to see the old state of the structure.  (For
+  *            example, the changes were to statistical counters or to
+  *            other state where exact synchronization is not required.)
+  *
+  * Failure to follow these rules governing use of RCU_INIT_POINTER() will
+  * result in impossible-to-diagnose memory corruption.  As in the structures
+  * will look OK in crash dumps, but any concurrent RCU readers might
+  * see pre-initialized values of the referenced data structure.  So
+  * please be very careful how you use RCU_INIT_POINTER()!!!
+  *
+  * If you are creating an RCU-protected linked structure that is accessed
+  * by a single external-to-structure RCU-protected pointer, then you may
+  * use RCU_INIT_POINTER() to initialize the internal RCU-protected
+  * pointers, but you must use rcu_assign_pointer() to initialize the
+  * external-to-structure pointer -after- you have completely initialized
+  * the reader-accessible portions of the linked structure.
    */
   #define RCU_INIT_POINTER(p, v) \
                 p = (typeof(*v) __force __rcu *)(v)
   
- /* Infrastructure to implement the synchronize_() primitives. */
- 
- struct rcu_synchronize {
-       struct rcu_head head;
-       struct completion completion;
- };
- 
- extern void wakeme_after_rcu(struct rcu_head  *head);
- 
- #ifdef CONFIG_PREEMPT_RCU
- 
- /**
-  * call_rcu() - Queue an RCU callback for invocation after a grace period.
-  * @head: structure to be used for queueing the RCU updates.
-  * @func: actual callback function to be invoked after the grace period
-  *
-  * The callback function will be invoked some time after a full grace
-  * period elapses, in other words after all pre-existing RCU read-side
-  * critical sections have completed.  However, the callback function
-  * might well execute concurrently with RCU read-side critical sections
-  * that started after call_rcu() was invoked.  RCU read-side critical
-  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
-  * and may be nested.
-  */
- extern void call_rcu(struct rcu_head *head,
-                             void (*func)(struct rcu_head *head));
- 
- #else /* #ifdef CONFIG_PREEMPT_RCU */
- 
- /* In classic RCU, call_rcu() is just call_rcu_sched(). */
- #define       call_rcu        call_rcu_sched
- 
- #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
- 
- /**
-  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
-  * @head: structure to be used for queueing the RCU updates.
-  * @func: actual callback function to be invoked after the grace period
-  *
-  * The callback function will be invoked some time after a full grace
-  * period elapses, in other words after all currently executing RCU
-  * read-side critical sections have completed. call_rcu_bh() assumes
-  * that the read-side critical sections end on completion of a softirq
-  * handler. This means that read-side critical sections in process
-  * context must not be interrupted by softirqs. This interface is to be
-  * used when most of the read-side critical sections are in softirq context.
-  * RCU read-side critical sections are delimited by :
-  *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
-  *  OR
-  *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
-  *  These may be nested.
-  */
- extern void call_rcu_bh(struct rcu_head *head,
-                       void (*func)(struct rcu_head *head));
- 
- /*
-  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
-  * by call_rcu() and rcu callback execution, and are therefore not part of the
-  * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
-  */
- 
- #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
- # define STATE_RCU_HEAD_READY 0
- # define STATE_RCU_HEAD_QUEUED        1
- 
- extern struct debug_obj_descr rcuhead_debug_descr;
- 
- static inline void debug_rcu_head_queue(struct rcu_head *head)
- {
-       WARN_ON_ONCE((unsigned long)head & 0x3);
-       debug_object_activate(head, &rcuhead_debug_descr);
-       debug_object_active_state(head, &rcuhead_debug_descr,
-                                 STATE_RCU_HEAD_READY,
-                                 STATE_RCU_HEAD_QUEUED);
- }
- 
- static inline void debug_rcu_head_unqueue(struct rcu_head *head)
- {
-       debug_object_active_state(head, &rcuhead_debug_descr,
-                                 STATE_RCU_HEAD_QUEUED,
-                                 STATE_RCU_HEAD_READY);
-       debug_object_deactivate(head, &rcuhead_debug_descr);
- }
- #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
- static inline void debug_rcu_head_queue(struct rcu_head *head)
- {
- }
- 
- static inline void debug_rcu_head_unqueue(struct rcu_head *head)
- {
- }
- #endif        /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
- 
   static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
   {
         return offset < 4096;
@@@ -827,18 -847,6 +847,6 @@@ void __kfree_rcu(struct rcu_head *head
         call_rcu(head, (rcu_callback)offset);
   }
   
- extern void kfree(const void *);
- 
- static inline void __rcu_reclaim(struct rcu_head *head)
- {
-       unsigned long offset = (unsigned long)head->func;
- 
-       if (__is_kfree_rcu_offset(offset))
-               kfree((void *)head - offset);
-       else
-               head->func(head);
- }
- 
   /**
    * kfree_rcu() - kfree an object after a grace period.
    * @ptr:      pointer to kfree
diff --combined include/linux/sched.h

index 1bb33561f46ec07bc0e25ef65e99ccb48f1cafba,90b02be2a1655c866fab528567b4cf3d13ab8402..0c3d889b73da0020d2a4024f128d332ef4ae92d5
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -219,7 -219,7 +219,7 @@@ extern char ___assert_task_state[1 - 2*
                         ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
   #define task_contributes_to_load(task)        \
                                 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
- -                               (task->flags & PF_FREEZING) == 0)
+ +                               (task->flags & PF_FROZEN) == 0)
   
   #define __set_task_state(tsk, state_value)            \
         do { (tsk)->state = (state_value); } while (0)
@@@ -270,7 -270,6 +270,6 @@@ extern void init_idle_bootup_task(struc
   
   extern int runqueue_is_locked(int cpu);
   
- extern cpumask_var_t nohz_cpu_mask;
   #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
   extern void select_nohz_load_balancer(int stop_tick);
   extern int get_nohz_timer_target(void);
@@@ -1260,9 -1259,6 +1259,6 @@@ struct task_struct 
   #ifdef CONFIG_PREEMPT_RCU
         int rcu_read_lock_nesting;
         char rcu_read_unlock_special;
- #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
-       int rcu_boosted;
- #endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
         struct list_head rcu_node_entry;
   #endif /* #ifdef CONFIG_PREEMPT_RCU */
   #ifdef CONFIG_TREE_PREEMPT_RCU
@@@ -1292,7 -1288,7 +1288,7 @@@
         int exit_state;
         int exit_code, exit_signal;
         int pdeath_signal;  /*  The signal sent when the parent dies  */
- -      unsigned int group_stop;        /* GROUP_STOP_*, siglock protected */
+ +      unsigned int jobctl;    /* JOBCTL_*, siglock protected */
         /* ??? */
         unsigned int personality;
         unsigned did_exec:1;
@@@ -1512,6 -1508,7 +1508,6 @@@
         short il_next;
         short pref_node_fork;
   #endif
- -      atomic_t fs_excl;       /* holding fs exclusive resources */
         struct rcu_head rcu;
   
         /*
@@@ -1767,8 -1764,8 +1763,8 @@@ extern void thread_group_times(struct t
   #define PF_DUMPCORE   0x00000200      /* dumped core */
   #define PF_SIGNALED   0x00000400      /* killed by a signal */
   #define PF_MEMALLOC   0x00000800      /* Allocating memory */
+ +#define PF_NPROC_EXCEEDED 0x00001000  /* set_user noticed that RLIMIT_NPROC was exceeded */
   #define PF_USED_MATH  0x00002000      /* if unset the fpu must be initialized before use */
- -#define PF_FREEZING   0x00004000      /* freeze in progress. do not account to load */
   #define PF_NOFREEZE   0x00008000      /* this thread should not be frozen */
   #define PF_FROZEN     0x00010000      /* frozen for system suspend */
   #define PF_FSTRANS    0x00020000      /* inside a filesystem transaction */
@@@ -1812,34 -1809,15 +1808,34 @@@
   #define used_math() tsk_used_math(current)
   
   /*
- - * task->group_stop flags
+ + * task->jobctl flags
    */
- -#define GROUP_STOP_SIGMASK    0xffff    /* signr of the last group stop */
- -#define GROUP_STOP_PENDING    (1 << 16) /* task should stop for group stop */
- -#define GROUP_STOP_CONSUME    (1 << 17) /* consume group stop count */
- -#define GROUP_STOP_TRAPPING   (1 << 18) /* switching from STOPPED to TRACED */
- -#define GROUP_STOP_DEQUEUED   (1 << 19) /* stop signal dequeued */
- -
- -extern void task_clear_group_stop_pending(struct task_struct *task);
+ +#define JOBCTL_STOP_SIGMASK   0xffff  /* signr of the last group stop */
+ +
+ +#define JOBCTL_STOP_DEQUEUED_BIT 16   /* stop signal dequeued */
+ +#define JOBCTL_STOP_PENDING_BIT       17      /* task should stop for group stop */
+ +#define JOBCTL_STOP_CONSUME_BIT       18      /* consume group stop count */
+ +#define JOBCTL_TRAP_STOP_BIT  19      /* trap for STOP */
+ +#define JOBCTL_TRAP_NOTIFY_BIT        20      /* trap for NOTIFY */
+ +#define JOBCTL_TRAPPING_BIT   21      /* switching to TRACED */
+ +#define JOBCTL_LISTENING_BIT  22      /* ptracer is listening for events */
+ +
+ +#define JOBCTL_STOP_DEQUEUED  (1 << JOBCTL_STOP_DEQUEUED_BIT)
+ +#define JOBCTL_STOP_PENDING   (1 << JOBCTL_STOP_PENDING_BIT)
+ +#define JOBCTL_STOP_CONSUME   (1 << JOBCTL_STOP_CONSUME_BIT)
+ +#define JOBCTL_TRAP_STOP      (1 << JOBCTL_TRAP_STOP_BIT)
+ +#define JOBCTL_TRAP_NOTIFY    (1 << JOBCTL_TRAP_NOTIFY_BIT)
+ +#define JOBCTL_TRAPPING               (1 << JOBCTL_TRAPPING_BIT)
+ +#define JOBCTL_LISTENING      (1 << JOBCTL_LISTENING_BIT)
+ +
+ +#define JOBCTL_TRAP_MASK      (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
+ +#define JOBCTL_PENDING_MASK   (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+ +
+ +extern bool task_set_jobctl_pending(struct task_struct *task,
+ +                                  unsigned int mask);
+ +extern void task_clear_jobctl_trapping(struct task_struct *task);
+ +extern void task_clear_jobctl_pending(struct task_struct *task,
+ +                                    unsigned int mask);
   
   #ifdef CONFIG_PREEMPT_RCU
   
@@@ -2154,7 -2132,7 +2150,7 @@@ static inline int dequeue_signal_lock(s
         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
   
         return ret;
- -}     
+ +}
   
   extern void block_all_signals(int (*notifier)(void *priv), void *priv,
                               sigset_t *mask);
@@@ -2169,7 -2147,7 +2165,7 @@@ extern int kill_pid_info_as_uid(int, st
   extern int kill_pgrp(struct pid *pid, int sig, int priv);
   extern int kill_pid(struct pid *pid, int sig, int priv);
   extern int kill_proc_info(int, struct siginfo *, pid_t);
- -extern int do_notify_parent(struct task_struct *, int);
+ +extern __must_check bool do_notify_parent(struct task_struct *, int);
   extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
   extern void force_sig(int, struct task_struct *);
   extern int send_sig(int, struct task_struct *, int);
@@@ -2293,10 -2271,8 +2289,10 @@@ static inline int get_nr_threads(struc
         return tsk->signal->nr_threads;
   }
   
- -/* de_thread depends on thread_group_leader not being a pid based check */
- -#define thread_group_leader(p)        (p == p->group_leader)
+ +static inline bool thread_group_leader(struct task_struct *p)
+ +{
+ +      return p->exit_signal >= 0;
+ +}
   
   /* Do to the insanities of de_thread it is possible for a process
    * to have the pid of the thread group leader without actually being
@@@ -2329,6 -2305,11 +2325,6 @@@ static inline int thread_group_empty(st
   #define delay_group_leader(p) \
                 (thread_group_leader(p) && !thread_group_empty(p))
   
- -static inline int task_detached(struct task_struct *p)
- -{
- -      return p->exit_signal == -1;
- -}
- -
   /*
    * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
    * subscriptions and synchronises with wait4().  Also used in procfs.  Also
@@@ -2525,7 -2506,7 +2521,7 @@@ extern int _cond_resched(void)
   
   extern int __cond_resched_lock(spinlock_t *lock);
   
- -#ifdef CONFIG_PREEMPT
+ +#ifdef CONFIG_PREEMPT_COUNT
   #define PREEMPT_LOCK_OFFSET   PREEMPT_OFFSET
   #else
   #define PREEMPT_LOCK_OFFSET   0
diff --combined init/Kconfig

index d62778390e5556af22a95cc61d6b5e0cdeeb262e,e22a6910dc5f1226ae2b740034ad39fefb661aff..dc7e27bf89a860a5290d656a34b0369f0600d3b0
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -391,7 -391,7 +391,7 @@@ config TREE_RC
   
   config TREE_PREEMPT_RCU
         bool "Preemptible tree-based hierarchical RCU"
-       depends on PREEMPT
+       depends on PREEMPT && SMP
         help
           This option selects the RCU implementation that is
           designed for very large SMP systems with hundreds or
@@@ -401,7 -401,7 +401,7 @@@
   
   config TINY_RCU
         bool "UP-only small-memory-footprint RCU"
-       depends on !SMP
+       depends on !PREEMPT && !SMP
         help
           This option selects the RCU implementation that is
           designed for UP systems from which real-time response
@@@ -410,7 -410,7 +410,7 @@@
   
   config TINY_PREEMPT_RCU
         bool "Preemptible UP-only small-memory-footprint RCU"
-       depends on !SMP && PREEMPT
+       depends on PREEMPT && !SMP
         help
           This option selects the RCU implementation that is designed
           for real-time UP systems.  This option greatly reduces the
@@@ -673,7 -673,7 +673,7 @@@ config CGROUP_MEM_RES_CTLR_SWA
           be careful about enabling this. When memory resource controller
           is disabled by boot option, this will be automatically disabled and
           there will be no overhead from this. Even when you set this config=y,
- -        if boot option "noswapaccount" is set, swap will not be accounted.
+ +        if boot option "swapaccount=0" is set, swap will not be accounted.
           Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
           size is 4096bytes, 512k per 1Gbytes of swap.
   config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
@@@ -688,7 -688,7 +688,7 @@@
           parameter should have this option unselected.
           For those who want to have the feature enabled by default should
           select this option (if, for some reason, they need to disable it
- -        then noswapaccount does the trick).
+ +        then swapaccount=0 does the trick).
   
   config CGROUP_PERF
         bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
@@@ -917,8 -917,6 +917,8 @@@ config ANON_INODE
   
   menuconfig EXPERT
         bool "Configure standard kernel features (expert users)"
+ +      # Unhide debug options, to make the on-by-default options visible
+ +      select DEBUG_KERNEL
         help
           This option allows certain base kernel options and settings
             to be disabled or tweaked. This is for specialized
@@@ -1009,19 -1007,14 +1009,19 @@@ config ELF_COR
         help
           Enable support for generating core dumps. Disabling saves about 4k.
   
+ +
   config PCSPKR_PLATFORM
         bool "Enable PC-Speaker support" if EXPERT
- -      depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES
+ +      depends on HAVE_PCSPKR_PLATFORM
+ +      select I8253_LOCK
         default y
         help
             This option allows to disable the internal PC-Speaker
             support, saving some memory.
   
+ +config HAVE_PCSPKR_PLATFORM
+ +      bool
+ +
   config BASE_FULL
         default y
         bool "Enable full-sized data structures for core" if EXPERT
diff --combined kernel/lockdep.c

index 91d67ce3a8d520a5cdc43d7abe6534c59710e79d,ef5dd692ac49f7ddf85c7a1ee4b0fd417fdbf990..cec261c956c472f88ada58cb90ad5fc0df769cba
--- 1/kernel/lockdep.c
--- 2/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@@ -1129,10 -1129,11 +1129,11 @@@ print_circular_bug_header(struct lock_l
         if (debug_locks_silent)
                 return 0;
   
-       printk("\n=======================================================\n");
-       printk(  "[ INFO: possible circular locking dependency detected ]\n");
+       printk("\n");
+       printk("======================================================\n");
+       printk("[ INFO: possible circular locking dependency detected ]\n");
         print_kernel_version();
-       printk(  "-------------------------------------------------------\n");
+       printk("-------------------------------------------------------\n");
         printk("%s/%d is trying to acquire lock:\n",
                 curr->comm, task_pid_nr(curr));
         print_lock(check_src);
@@@ -1463,11 -1464,12 +1464,12 @@@ print_bad_irq_dependency(struct task_st
         if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                 return 0;
   
-       printk("\n======================================================\n");
-       printk(  "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+       printk("\n");
+       printk("======================================================\n");
+       printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
                 irqclass, irqclass);
         print_kernel_version();
-       printk(  "------------------------------------------------------\n");
+       printk("------------------------------------------------------\n");
         printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
                 curr->comm, task_pid_nr(curr),
                 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
@@@ -1692,10 -1694,11 +1694,11 @@@ print_deadlock_bug(struct task_struct *
         if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                 return 0;
   
-       printk("\n=============================================\n");
-       printk(  "[ INFO: possible recursive locking detected ]\n");
+       printk("\n");
+       printk("=============================================\n");
+       printk("[ INFO: possible recursive locking detected ]\n");
         print_kernel_version();
-       printk(  "---------------------------------------------\n");
+       printk("---------------------------------------------\n");
         printk("%s/%d is trying to acquire lock:\n",
                 curr->comm, task_pid_nr(curr));
         print_lock(next);
@@@ -2177,10 -2180,11 +2180,11 @@@ print_usage_bug(struct task_struct *cur
         if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                 return 0;
   
-       printk("\n=================================\n");
-       printk(  "[ INFO: inconsistent lock state ]\n");
+       printk("\n");
+       printk("=================================\n");
+       printk("[ INFO: inconsistent lock state ]\n");
         print_kernel_version();
-       printk(  "---------------------------------\n");
+       printk("---------------------------------\n");
   
         printk("inconsistent {%s} -> {%s} usage.\n",
                 usage_str[prev_bit], usage_str[new_bit]);
@@@ -2241,10 -2245,11 +2245,11 @@@ print_irq_inversion_bug(struct task_str
         if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                 return 0;
   
-       printk("\n=========================================================\n");
-       printk(  "[ INFO: possible irq lock inversion dependency detected ]\n");
+       printk("\n");
+       printk("=========================================================\n");
+       printk("[ INFO: possible irq lock inversion dependency detected ]\n");
         print_kernel_version();
-       printk(  "---------------------------------------------------------\n");
+       printk("---------------------------------------------------------\n");
         printk("%s/%d just changed the state of lock:\n",
                 curr->comm, task_pid_nr(curr));
         print_lock(this);
@@@ -2468,9 -2473,6 +2473,9 @@@ mark_held_locks(struct task_struct *cur
   
                 BUG_ON(usage_bit >= LOCK_USAGE_STATES);
   
+ +              if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
+ +                      continue;
+ +
                 if (!mark_lock(curr, hlock, usage_bit))
                         return 0;
         }
@@@ -2481,13 -2483,34 +2486,13 @@@
   /*
    * Hardirqs will be enabled:
    */
- -void trace_hardirqs_on_caller(unsigned long ip)
+ +static void __trace_hardirqs_on_caller(unsigned long ip)
   {
         struct task_struct *curr = current;
   
- -      time_hardirqs_on(CALLER_ADDR0, ip);
- -
- -      if (unlikely(!debug_locks || current->lockdep_recursion))
- -              return;
- -
- -      if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
- -              return;
- -
- -      if (unlikely(curr->hardirqs_enabled)) {
- -              /*
- -               * Neither irq nor preemption are disabled here
- -               * so this is racy by nature but losing one hit
- -               * in a stat is not a big deal.
- -               */
- -              __debug_atomic_inc(redundant_hardirqs_on);
- -              return;
- -      }
         /* we'll do an OFF -> ON transition: */
         curr->hardirqs_enabled = 1;
   
- -      if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
- -              return;
- -      if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
- -              return;
         /*
          * We are going to turn hardirqs on, so set the
          * usage bit for all held locks:
@@@ -2507,37 -2530,6 +2512,37 @@@
         curr->hardirq_enable_event = ++curr->irq_events;
         debug_atomic_inc(hardirqs_on_events);
   }
+ +
+ +void trace_hardirqs_on_caller(unsigned long ip)
+ +{
+ +      time_hardirqs_on(CALLER_ADDR0, ip);
+ +
+ +      if (unlikely(!debug_locks || current->lockdep_recursion))
+ +              return;
+ +
+ +      if (unlikely(current->hardirqs_enabled)) {
+ +              /*
+ +               * Neither irq nor preemption are disabled here
+ +               * so this is racy by nature but losing one hit
+ +               * in a stat is not a big deal.
+ +               */
+ +              __debug_atomic_inc(redundant_hardirqs_on);
+ +              return;
+ +      }
+ +
+ +      if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ +              return;
+ +
+ +      if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
+ +              return;
+ +
+ +      if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ +              return;
+ +
+ +      current->lockdep_recursion = 1;
+ +      __trace_hardirqs_on_caller(ip);
+ +      current->lockdep_recursion = 0;
+ +}
   EXPORT_SYMBOL(trace_hardirqs_on_caller);
   
   void trace_hardirqs_on(void)
@@@ -2587,7 -2579,7 +2592,7 @@@ void trace_softirqs_on(unsigned long ip
   {
         struct task_struct *curr = current;
   
- -      if (unlikely(!debug_locks))
+ +      if (unlikely(!debug_locks || current->lockdep_recursion))
                 return;
   
         if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
@@@ -2598,7 -2590,6 +2603,7 @@@
                 return;
         }
   
+ +      current->lockdep_recursion = 1;
         /*
          * We'll do an OFF -> ON transition:
          */
@@@ -2613,7 -2604,6 +2618,7 @@@
          */
         if (curr->hardirqs_enabled)
                 mark_held_locks(curr, SOFTIRQ);
+ +      current->lockdep_recursion = 0;
   }
   
   /*
@@@ -2623,7 -2613,7 +2628,7 @@@ void trace_softirqs_off(unsigned long i
   {
         struct task_struct *curr = current;
   
- -      if (unlikely(!debug_locks))
+ +      if (unlikely(!debug_locks || current->lockdep_recursion))
                 return;
   
         if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
@@@ -2874,7 -2864,10 +2879,7 @@@ static int mark_lock(struct task_struc
   void lockdep_init_map(struct lockdep_map *lock, const char *name,
                       struct lock_class_key *key, int subclass)
   {
- -      int i;
- -
- -      for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
- -              lock->class_cache[i] = NULL;
+ +      memset(lock, 0, sizeof(*lock));
   
   #ifdef CONFIG_LOCK_STAT
         lock->cpu = raw_smp_processor_id();
@@@ -3065,9 -3058,10 +3070,10 @@@ print_unlock_inbalance_bug(struct task_
         if (debug_locks_silent)
                 return 0;
   
-       printk("\n=====================================\n");
-       printk(  "[ BUG: bad unlock balance detected! ]\n");
-       printk(  "-------------------------------------\n");
+       printk("\n");
+       printk("=====================================\n");
+       printk("[ BUG: bad unlock balance detected! ]\n");
+       printk("-------------------------------------\n");
         printk("%s/%d is trying to release lock (",
                 curr->comm, task_pid_nr(curr));
         print_lockdep_cache(lock);
@@@ -3111,13 -3105,7 +3117,13 @@@ static int match_held_lock(struct held_
                 if (!class)
                         class = look_up_lock_class(lock, 0);
   
- -              if (DEBUG_LOCKS_WARN_ON(!class))
+ +              /*
+ +               * If look_up_lock_class() failed to find a class, we're trying
+ +               * to test if we hold a lock that has never yet been acquired.
+ +               * Clearly if the lock hasn't been acquired _ever_, we're not
+ +               * holding it either, so report failure.
+ +               */
+ +              if (!class)
                         return 0;
   
                 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
@@@ -3478,9 -3466,10 +3484,10 @@@ print_lock_contention_bug(struct task_s
         if (debug_locks_silent)
                 return 0;
   
-       printk("\n=================================\n");
-       printk(  "[ BUG: bad contention detected! ]\n");
-       printk(  "---------------------------------\n");
+       printk("\n");
+       printk("=================================\n");
+       printk("[ BUG: bad contention detected! ]\n");
+       printk("---------------------------------\n");
         printk("%s/%d is trying to contend lock (",
                 curr->comm, task_pid_nr(curr));
         print_lockdep_cache(lock);
@@@ -3839,9 -3828,10 +3846,10 @@@ print_freed_lock_bug(struct task_struc
         if (debug_locks_silent)
                 return;
   
-       printk("\n=========================\n");
-       printk(  "[ BUG: held lock freed! ]\n");
-       printk(  "-------------------------\n");
+       printk("\n");
+       printk("=========================\n");
+       printk("[ BUG: held lock freed! ]\n");
+       printk("-------------------------\n");
         printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
                 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
         print_lock(hlock);
@@@ -3895,9 -3885,10 +3903,10 @@@ static void print_held_locks_bug(struc
         if (debug_locks_silent)
                 return;
   
-       printk("\n=====================================\n");
-       printk(  "[ BUG: lock held at task exit time! ]\n");
-       printk(  "-------------------------------------\n");
+       printk("\n");
+       printk("=====================================\n");
+       printk("[ BUG: lock held at task exit time! ]\n");
+       printk("-------------------------------------\n");
         printk("%s/%d is exiting with locks still held!\n",
                 curr->comm, task_pid_nr(curr));
         lockdep_print_held_locks(curr);
@@@ -3991,16 -3982,17 +4000,17 @@@ void lockdep_sys_exit(void
         if (unlikely(curr->lockdep_depth)) {
                 if (!debug_locks_off())
                         return;
-               printk("\n================================================\n");
-               printk(  "[ BUG: lock held when returning to user space! ]\n");
-               printk(  "------------------------------------------------\n");
+               printk("\n");
+               printk("================================================\n");
+               printk("[ BUG: lock held when returning to user space! ]\n");
+               printk("------------------------------------------------\n");
                 printk("%s/%d is leaving the kernel with locks still held!\n",
                                 curr->comm, curr->pid);
                 lockdep_print_held_locks(curr);
         }
   }
   
- void lockdep_rcu_dereference(const char *file, const int line)
+ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
   {
         struct task_struct *curr = current;
   
@@@ -4009,15 -4001,35 +4019,35 @@@
                 return;
   #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
         /* Note: the following can be executed concurrently, so be careful. */
-       printk("\n===================================================\n");
-       printk(  "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
-       printk(  "---------------------------------------------------\n");
-       printk("%s:%d invoked rcu_dereference_check() without protection!\n",
-                       file, line);
+       printk("\n");
+       printk("===============================\n");
+       printk("[ INFO: suspicious RCU usage. ]\n");
+       printk("-------------------------------\n");
+       printk("%s:%d %s!\n", file, line, s);
         printk("\nother info that might help us debug this:\n\n");
         printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
+ 
+       /*
+        * If a CPU is in dyntick-idle mode (CONFIG_NO_HZ), then RCU
+        * considers that CPU to be in an "extended quiescent state",
+        * which means that RCU will be completely ignoring that CPU.
+        * Therefore, rcu_read_lock() and friends have absolutely no
+        * effect on a dyntick-idle CPU.  In other words, even if a
+        * dyntick-idle CPU has called rcu_read_lock(), RCU might well
+        * delete data structures out from under it.  RCU really has no
+        * choice here: if it were to consult the CPU, that would wake
+        * the CPU up, and the whole point of dyntick-idle mode is to
+        * allow CPUs to enter extremely deep sleep states.
+        *
+        * So complain bitterly if someone does call rcu_read_lock(),
+        * rcu_read_lock_bh() and so on from extended quiescent states
+        * such as dyntick-idle mode.
+        */
+       if (rcu_check_extended_qs())
+               printk("RCU used illegally from extended quiescent state!\n");
+ 
         lockdep_print_held_locks(curr);
         printk("\nstack backtrace:\n");
         dump_stack();
   }
- EXPORT_SYMBOL_GPL(lockdep_rcu_dereference);
+ EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --combined kernel/pid.c

index e432057f3b2147873f0de30ad00b16bbaecbedb1,a7577b3887c6bdd21fdeaf4cfb81db8057a74205..8cafe7e72ad2b83ff0bc7f7d0e12b710e6f9a519
--- 1/kernel/pid.c
--- 2/kernel/pid.c
+++ b/kernel/pid.c
@@@ -405,6 -405,7 +405,6 @@@ struct task_struct *pid_task(struct pi
         if (pid) {
                 struct hlist_node *first;
                 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
- -                                            rcu_read_lock_held() ||
                                               lockdep_tasklist_lock_is_held());
                 if (first)
                         result = hlist_entry(first, struct task_struct, pids[(type)].node);
@@@ -418,7 -419,9 +418,9 @@@ EXPORT_SYMBOL(pid_task)
    */
   struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
   {
-       rcu_lockdep_assert(rcu_read_lock_held());
+       rcu_lockdep_assert(rcu_read_lock_held(),
+                          "find_task_by_pid_ns() needs rcu_read_lock()"
+                          " protection");
         return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
   }
   
diff --combined kernel/rcupdate.c

index ddddb320be61ee463cc4e7ea163a379c908745de,e4d8a986c095151e0216cb0e42d9656c5d3c02ce..1a491edd9c09b39a42afb90561b230c00bef32d5
--- 1/kernel/rcupdate.c
--- 2/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@@ -37,7 -37,7 +37,7 @@@
   #include <linux/smp.h>
   #include <linux/interrupt.h>
   #include <linux/sched.h>
- -#include <asm/atomic.h>
+ +#include <linux/atomic.h>
   #include <linux/bitops.h>
   #include <linux/percpu.h>
   #include <linux/notifier.h>
@@@ -46,6 -46,11 +46,11 @@@
   #include <linux/module.h>
   #include <linux/hardirq.h>
   
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/rcu.h>
+ 
+ #include "rcu.h"
+ 
   #ifdef CONFIG_DEBUG_LOCK_ALLOC
   static struct lock_class_key rcu_lock_key;
   struct lockdep_map rcu_lock_map =
@@@ -82,23 -87,43 +87,43 @@@ EXPORT_SYMBOL_GPL(debug_lockdep_rcu_ena
    * that require that they be called within an RCU read-side critical
    * section.
    *
-  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
+  * and while lockdep is disabled.
+  *
+  * Note that if the CPU is in an extended quiescent state, for example,
+  * if the CPU is in dyntick-idle mode, then rcu_read_lock_held() returns
+  * false even if the CPU did an rcu_read_lock().  The reason for this is
+  * that RCU ignores CPUs that are in extended quiescent states, so such
+  * a CPU is effectively never in an RCU read-side critical section
+  * regardless of what RCU primitives it invokes.  This state of affairs
+  * is required -- RCU would otherwise need to periodically wake up
+  * dyntick-idle CPUs, which would defeat the whole purpose of dyntick-idle
+  * mode.
    */
   int rcu_read_lock_bh_held(void)
   {
         if (!debug_lockdep_rcu_enabled())
                 return 1;
+ 
+       if (rcu_check_extended_qs())
+               return 0;
+ 
         return in_softirq() || irqs_disabled();
   }
   EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
   
   #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
   
+ struct rcu_synchronize {
+       struct rcu_head head;
+       struct completion completion;
+ };
+ 
   /*
    * Awaken the corresponding synchronize_rcu() instance now that a
    * grace period has elapsed.
    */
- void wakeme_after_rcu(struct rcu_head  *head)
+ static void wakeme_after_rcu(struct rcu_head  *head)
   {
         struct rcu_synchronize *rcu;
   
@@@ -106,6 -131,20 +131,20 @@@
         complete(&rcu->completion);
   }
   
+ void wait_rcu_gp(call_rcu_func_t crf)
+ {
+       struct rcu_synchronize rcu;
+ 
+       init_rcu_head_on_stack(&rcu.head);
+       init_completion(&rcu.completion);
+       /* Will wake me after RCU finished. */
+       crf(&rcu.head, wakeme_after_rcu);
+       /* Wait for it. */
+       wait_for_completion(&rcu.completion);
+       destroy_rcu_head_on_stack(&rcu.head);
+ }
+ EXPORT_SYMBOL_GPL(wait_rcu_gp);
+ 
   #ifdef CONFIG_PROVE_RCU
   /*
    * wrapper function to avoid #include problems.
diff --combined kernel/rcutorture.c

index 98f51b13bb7ec659b3a40f22a4f52b7bb62dfae3,76fe8936c32eb6e5058ed1af0ecf6f98fc6e1d66..764825c2685c494b6839acaac1b25e500db610c5
--- 1/kernel/rcutorture.c
--- 2/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@@ -33,7 -33,7 +33,7 @@@
   #include <linux/rcupdate.h>
   #include <linux/interrupt.h>
   #include <linux/sched.h>
- -#include <asm/atomic.h>
+ +#include <linux/atomic.h>
   #include <linux/bitops.h>
   #include <linux/completion.h>
   #include <linux/moduleparam.h>
@@@ -73,7 -73,7 +73,7 @@@ module_param(nreaders, int, 0444)
   MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
   module_param(nfakewriters, int, 0444);
   MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
- module_param(stat_interval, int, 0444);
+ module_param(stat_interval, int, 0644);
   MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
   module_param(verbose, bool, 0444);
   MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
@@@ -480,30 -480,6 +480,6 @@@ static void rcu_bh_torture_deferred_fre
         call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
   }
   
- struct rcu_bh_torture_synchronize {
-       struct rcu_head head;
-       struct completion completion;
- };
- 
- static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head)
- {
-       struct rcu_bh_torture_synchronize *rcu;
- 
-       rcu = container_of(head, struct rcu_bh_torture_synchronize, head);
-       complete(&rcu->completion);
- }
- 
- static void rcu_bh_torture_synchronize(void)
- {
-       struct rcu_bh_torture_synchronize rcu;
- 
-       init_rcu_head_on_stack(&rcu.head);
-       init_completion(&rcu.completion);
-       call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
-       wait_for_completion(&rcu.completion);
-       destroy_rcu_head_on_stack(&rcu.head);
- }
- 
   static struct rcu_torture_ops rcu_bh_ops = {
         .init           = NULL,
         .cleanup        = NULL,
@@@ -512,7 -488,7 +488,7 @@@
         .readunlock     = rcu_bh_torture_read_unlock,
         .completed      = rcu_bh_torture_completed,
         .deferred_free  = rcu_bh_torture_deferred_free,
-       .sync           = rcu_bh_torture_synchronize,
+       .sync           = synchronize_rcu_bh,
         .cb_barrier     = rcu_barrier_bh,
         .fqs            = rcu_bh_force_quiescent_state,
         .stats          = NULL,
@@@ -528,7 -504,7 +504,7 @@@ static struct rcu_torture_ops rcu_bh_sy
         .readunlock     = rcu_bh_torture_read_unlock,
         .completed      = rcu_bh_torture_completed,
         .deferred_free  = rcu_sync_torture_deferred_free,
-       .sync           = rcu_bh_torture_synchronize,
+       .sync           = synchronize_rcu_bh,
         .cb_barrier     = NULL,
         .fqs            = rcu_bh_force_quiescent_state,
         .stats          = NULL,
@@@ -536,6 -512,22 +512,22 @@@
         .name           = "rcu_bh_sync"
   };
   
+ static struct rcu_torture_ops rcu_bh_expedited_ops = {
+       .init           = rcu_sync_torture_init,
+       .cleanup        = NULL,
+       .readlock       = rcu_bh_torture_read_lock,
+       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+       .readunlock     = rcu_bh_torture_read_unlock,
+       .completed      = rcu_bh_torture_completed,
+       .deferred_free  = rcu_sync_torture_deferred_free,
+       .sync           = synchronize_rcu_bh_expedited,
+       .cb_barrier     = NULL,
+       .fqs            = rcu_bh_force_quiescent_state,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .name           = "rcu_bh_expedited"
+ };
+ 
   /*
    * Definitions for srcu torture testing.
    */
@@@ -659,11 -651,6 +651,6 @@@ static void rcu_sched_torture_deferred_
         call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
   }
   
- static void sched_torture_synchronize(void)
- {
-       synchronize_sched();
- }
- 
   static struct rcu_torture_ops sched_ops = {
         .init           = rcu_sync_torture_init,
         .cleanup        = NULL,
@@@ -672,7 -659,7 +659,7 @@@
         .readunlock     = sched_torture_read_unlock,
         .completed      = rcu_no_completed,
         .deferred_free  = rcu_sched_torture_deferred_free,
-       .sync           = sched_torture_synchronize,
+       .sync           = synchronize_sched,
         .cb_barrier     = rcu_barrier_sched,
         .fqs            = rcu_sched_force_quiescent_state,
         .stats          = NULL,
@@@ -688,7 -675,7 +675,7 @@@ static struct rcu_torture_ops sched_syn
         .readunlock     = sched_torture_read_unlock,
         .completed      = rcu_no_completed,
         .deferred_free  = rcu_sync_torture_deferred_free,
-       .sync           = sched_torture_synchronize,
+       .sync           = synchronize_sched,
         .cb_barrier     = NULL,
         .fqs            = rcu_sched_force_quiescent_state,
         .stats          = NULL,
@@@ -754,7 -741,7 +741,7 @@@ static int rcu_torture_boost(void *arg
         do {
                 /* Wait for the next test interval. */
                 oldstarttime = boost_starttime;
-               while (jiffies - oldstarttime > ULONG_MAX / 2) {
+               while (ULONG_CMP_LT(jiffies, oldstarttime)) {
                         schedule_timeout_uninterruptible(1);
                         rcu_stutter_wait("rcu_torture_boost");
                         if (kthread_should_stop() ||
@@@ -765,7 -752,7 +752,7 @@@
                 /* Do one boost-test interval. */
                 endtime = oldstarttime + test_boost_duration * HZ;
                 call_rcu_time = jiffies;
-               while (jiffies - endtime > ULONG_MAX / 2) {
+               while (ULONG_CMP_LT(jiffies, endtime)) {
                         /* If we don't have a callback in flight, post one. */
                         if (!rbi.inflight) {
                                 smp_mb(); /* RCU core before ->inflight = 1. */
@@@ -792,7 -779,8 +779,8 @@@
                  * interval.  Besides, we are running at RT priority,
                  * so delays should be relatively rare.
                  */
-               while (oldstarttime == boost_starttime) {
+               while (oldstarttime == boost_starttime &&
+                      !kthread_should_stop()) {
                         if (mutex_trylock(&boost_mutex)) {
                                 boost_starttime = jiffies +
                                                   test_boost_interval * HZ;
@@@ -809,11 -797,11 +797,11 @@@ checkwait:      rcu_stutter_wait("rcu_tortur
   
         /* Clean up and exit. */
         VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
-       destroy_rcu_head_on_stack(&rbi.rcu);
         rcutorture_shutdown_absorb("rcu_torture_boost");
         while (!kthread_should_stop() || rbi.inflight)
                 schedule_timeout_uninterruptible(1);
         smp_mb(); /* order accesses to ->inflight before stack-frame death. */
+       destroy_rcu_head_on_stack(&rbi.rcu);
         return 0;
   }
   
@@@ -831,11 -819,13 +819,13 @@@ rcu_torture_fqs(void *arg
         VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
         do {
                 fqs_resume_time = jiffies + fqs_stutter * HZ;
-               while (jiffies - fqs_resume_time > LONG_MAX) {
+               while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
+                      !kthread_should_stop()) {
                         schedule_timeout_interruptible(1);
                 }
                 fqs_burst_remaining = fqs_duration;
-               while (fqs_burst_remaining > 0) {
+               while (fqs_burst_remaining > 0 &&
+                      !kthread_should_stop()) {
                         cur_ops->fqs();
                         udelay(fqs_holdoff);
                         fqs_burst_remaining -= fqs_holdoff;
@@@ -941,6 -931,7 +931,6 @@@ static void rcu_torture_timer(unsigned 
         idx = cur_ops->readlock();
         completed = cur_ops->completed();
         p = rcu_dereference_check(rcu_torture_current,
- -                                rcu_read_lock_held() ||
                                   rcu_read_lock_bh_held() ||
                                   rcu_read_lock_sched_held() ||
                                   srcu_read_lock_held(&srcu_ctl));
@@@ -1001,6 -992,7 +991,6 @@@ rcu_torture_reader(void *arg
                 idx = cur_ops->readlock();
                 completed = cur_ops->completed();
                 p = rcu_dereference_check(rcu_torture_current,
- -                                        rcu_read_lock_held() ||
                                           rcu_read_lock_bh_held() ||
                                           rcu_read_lock_sched_held() ||
                                           srcu_read_lock_held(&srcu_ctl));
@@@ -1280,8 -1272,9 +1270,9 @@@ static int rcutorture_booster_init(int 
         /* Don't allow time recalculation while creating a new task. */
         mutex_lock(&boost_mutex);
         VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
-       boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
-                                         "rcu_torture_boost");
+       boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
+                                                 cpu_to_node(cpu),
+                                                 "rcu_torture_boost");
         if (IS_ERR(boost_tasks[cpu])) {
                 retval = PTR_ERR(boost_tasks[cpu]);
                 VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
@@@ -1424,7 -1417,7 +1415,7 @@@ rcu_torture_init(void
         int firsterr = 0;
         static struct rcu_torture_ops *torture_ops[] =
                 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
-                 &rcu_bh_ops, &rcu_bh_sync_ops,
+                 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
                   &srcu_ops, &srcu_expedited_ops,
                   &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
   
diff --combined kernel/rcutree_trace.c

index 3b0c0986afc0f0ec6d190176aa9695ba4d998957,3f739cf3fb4599c2e9de1901988b88ee986e13d4..9feffa4c069567909452e276633f25f512191f58
--- 1/kernel/rcutree_trace.c
--- 2/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@@ -31,7 -31,7 +31,7 @@@
   #include <linux/rcupdate.h>
   #include <linux/interrupt.h>
   #include <linux/sched.h>
- -#include <asm/atomic.h>
+ +#include <linux/atomic.h>
   #include <linux/bitops.h>
   #include <linux/module.h>
   #include <linux/completion.h>
@@@ -48,11 -48,6 +48,6 @@@
   
   #ifdef CONFIG_RCU_BOOST
   
- DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
- DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
- DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
- DECLARE_PER_CPU(char, rcu_cpu_has_work);
- 
   static char convert_kthread_status(unsigned int kthread_status)
   {
         if (kthread_status > RCU_KTHREAD_MAX)
@@@ -66,11 -61,11 +61,11 @@@ static void print_one_rcu_data(struct s
   {
         if (!rdp->beenonline)
                 return;
-       seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d",
+       seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d",
                    rdp->cpu,
                    cpu_is_offline(rdp->cpu) ? '!' : ' ',
                    rdp->completed, rdp->gpnum,
-                  rdp->passed_quiesc, rdp->passed_quiesc_completed,
+                  rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
                    rdp->qs_pending);
   #ifdef CONFIG_NO_HZ
         seq_printf(m, " dt=%d/%d/%d df=%lu",
@@@ -144,7 -139,7 +139,7 @@@ static void print_one_rcu_data_csv(stru
                    rdp->cpu,
                    cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
                    rdp->completed, rdp->gpnum,
-                  rdp->passed_quiesc, rdp->passed_quiesc_completed,
+                  rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
                    rdp->qs_pending);
   #ifdef CONFIG_NO_HZ
         seq_printf(m, ",%d,%d,%d,%lu",
@@@ -175,7 -170,7 +170,7 @@@
   
   static int show_rcudata_csv(struct seq_file *m, void *unused)
   {
-       seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
+       seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
   #ifdef CONFIG_NO_HZ
         seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
   #endif /* #ifdef CONFIG_NO_HZ */
diff --combined kernel/rtmutex.c

index 255e1662acdb496b77c50205daa455d404f7749b,2548f4487eee6baa84a8370a3a3787f71ac1b80b..5e8d9cce7470dad6fd2427fd6c2349ddea6aeb9e
--- 1/kernel/rtmutex.c
--- 2/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@@ -579,6 -579,7 +579,7 @@@ __rt_mutex_slowlock(struct rt_mutex *lo
                     struct rt_mutex_waiter *waiter)
   {
         int ret = 0;
+       int was_disabled;
   
         for (;;) {
                 /* Try to acquire the lock: */
@@@ -601,10 -602,17 +602,17 @@@
   
                 raw_spin_unlock(&lock->wait_lock);
   
+               was_disabled = irqs_disabled();
+               if (was_disabled)
+                       local_irq_enable();
+ 
                 debug_rt_mutex_print_deadlock(waiter);
   
                 schedule_rt_mutex(lock);
   
+               if (was_disabled)
+                       local_irq_disable();
+ 
                 raw_spin_lock(&lock->wait_lock);
                 set_current_state(state);
         }
@@@ -890,7 -898,7 +898,7 @@@ void __rt_mutex_init(struct rt_mutex *l
   {
         lock->owner = NULL;
         raw_spin_lock_init(&lock->wait_lock);
- -      plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
+ +      plist_head_init(&lock->wait_list);
   
         debug_rt_mutex_init(lock, name);
   }
diff --combined kernel/sched.c

index ec5f472bc5b9cec2a5c43ab7f8a054ecca75b39a,313c0f63e29a9b36b47b11e7f284f8482fd3580f..3e552563045909ba881e1fb19d004619f8f88a27
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -75,9 -75,6 +75,9 @@@
   #include <asm/tlb.h>
   #include <asm/irq_regs.h>
   #include <asm/mutex.h>
+ +#ifdef CONFIG_PARAVIRT
+ +#include <asm/paravirt.h>
+ +#endif
   
   #include "sched_cpupri.h"
   #include "workqueue_sched.h"
@@@ -127,7 -124,7 +127,7 @@@
   
   static inline int rt_policy(int policy)
   {
- -      if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
+ +      if (policy == SCHED_FIFO || policy == SCHED_RR)
                 return 1;
         return 0;
   }
@@@ -425,7 -422,6 +425,7 @@@ struct rt_rq 
    */
   struct root_domain {
         atomic_t refcount;
+ +      atomic_t rto_count;
         struct rcu_head rcu;
         cpumask_var_t span;
         cpumask_var_t online;
@@@ -435,6 -431,7 +435,6 @@@
          * one runnable RT task.
          */
         cpumask_var_t rto_mask;
- -      atomic_t rto_count;
         struct cpupri cpupri;
   };
   
@@@ -531,12 -528,6 +531,12 @@@ struct rq 
   #ifdef CONFIG_IRQ_TIME_ACCOUNTING
         u64 prev_irq_time;
   #endif
+ +#ifdef CONFIG_PARAVIRT
+ +      u64 prev_steal_time;
+ +#endif
+ +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ +      u64 prev_steal_time_rq;
+ +#endif
   
         /* calc_load related fields */
         unsigned long calc_load_update;
@@@ -590,6 -581,7 +590,6 @@@ static inline int cpu_of(struct rq *rq
   
   #define rcu_dereference_check_sched_domain(p) \
         rcu_dereference_check((p), \
- -                            rcu_read_lock_held() || \
                               lockdep_is_held(&sched_domains_mutex))
   
   /*
@@@ -1576,6 -1568,38 +1576,6 @@@ static unsigned long cpu_avg_load_per_t
         return rq->avg_load_per_task;
   }
   
- -#ifdef CONFIG_FAIR_GROUP_SCHED
- -
- -/*
- - * Compute the cpu's hierarchical load factor for each task group.
- - * This needs to be done in a top-down fashion because the load of a child
- - * group is a fraction of its parents load.
- - */
- -static int tg_load_down(struct task_group *tg, void *data)
- -{
- -      unsigned long load;
- -      long cpu = (long)data;
- -
- -      if (!tg->parent) {
- -              load = cpu_rq(cpu)->load.weight;
- -      } else {
- -              load = tg->parent->cfs_rq[cpu]->h_load;
- -              load *= tg->se[cpu]->load.weight;
- -              load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
- -      }
- -
- -      tg->cfs_rq[cpu]->h_load = load;
- -
- -      return 0;
- -}
- -
- -static void update_h_load(long cpu)
- -{
- -      walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
- -}
- -
- -#endif
- -
   #ifdef CONFIG_PREEMPT
   
   static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@@ -1929,28 -1953,10 +1929,28 @@@ void account_system_vtime(struct task_s
   }
   EXPORT_SYMBOL_GPL(account_system_vtime);
   
- -static void update_rq_clock_task(struct rq *rq, s64 delta)
+ +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+ +
+ +#ifdef CONFIG_PARAVIRT
+ +static inline u64 steal_ticks(u64 steal)
   {
- -      s64 irq_delta;
+ +      if (unlikely(steal > NSEC_PER_SEC))
+ +              return div_u64(steal, TICK_NSEC);
+ +
+ +      return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
+ +}
+ +#endif
   
+ +static void update_rq_clock_task(struct rq *rq, s64 delta)
+ +{
+ +/*
+ + * In theory, the compile should just see 0 here, and optimize out the call
+ + * to sched_rt_avg_update. But I don't trust it...
+ + */
+ +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ +      s64 steal = 0, irq_delta = 0;
+ +#endif
+ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING
         irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
   
         /*
@@@ -1973,35 -1979,12 +1973,35 @@@
   
         rq->prev_irq_time += irq_delta;
         delta -= irq_delta;
+ +#endif
+ +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ +      if (static_branch((&paravirt_steal_rq_enabled))) {
+ +              u64 st;
+ +
+ +              steal = paravirt_steal_clock(cpu_of(rq));
+ +              steal -= rq->prev_steal_time_rq;
+ +
+ +              if (unlikely(steal > delta))
+ +                      steal = delta;
+ +
+ +              st = steal_ticks(steal);
+ +              steal = st * TICK_NSEC;
+ +
+ +              rq->prev_steal_time_rq += steal;
+ +
+ +              delta -= steal;
+ +      }
+ +#endif
+ +
         rq->clock_task += delta;
   
- -      if (irq_delta && sched_feat(NONIRQ_POWER))
- -              sched_rt_avg_update(rq, irq_delta);
+ +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ +      if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+ +              sched_rt_avg_update(rq, irq_delta + steal);
+ +#endif
   }
   
+ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING
   static int irqtime_account_hi_update(void)
   {
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
@@@ -2036,7 -2019,12 +2036,7 @@@ static int irqtime_account_si_update(vo
   
   #define sched_clock_irqtime   (0)
   
- -static void update_rq_clock_task(struct rq *rq, s64 delta)
- -{
- -      rq->clock_task += delta;
- -}
- -
- -#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+ +#endif
   
   #include "sched_idletask.c"
   #include "sched_fair.c"
@@@ -2232,7 -2220,7 +2232,7 @@@ void set_task_cpu(struct task_struct *p
   
         if (task_cpu(p) != new_cpu) {
                 p->se.nr_migrations++;
- -              perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
+ +              perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
         }
   
         __set_task_cpu(p, new_cpu);
@@@ -2509,7 -2497,7 +2509,7 @@@ ttwu_do_wakeup(struct rq *rq, struct ta
         if (p->sched_class->task_woken)
                 p->sched_class->task_woken(rq, p);
   
- -      if (unlikely(rq->idle_stamp)) {
+ +      if (rq->idle_stamp) {
                 u64 delta = rq->clock - rq->idle_stamp;
                 u64 max = 2*sysctl_sched_migration_cost;
   
@@@ -2898,7 -2886,7 +2898,7 @@@ void sched_fork(struct task_struct *p
   #if defined(CONFIG_SMP)
         p->on_cpu = 0;
   #endif
- -#ifdef CONFIG_PREEMPT
+ +#ifdef CONFIG_PREEMPT_COUNT
         /* Want to start with kernel preemption disabled. */
         task_thread_info(p)->preempt_count = 1;
   #endif
@@@ -3065,7 -3053,7 +3065,7 @@@ static void finish_task_switch(struct r
   #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
         local_irq_disable();
   #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- -      perf_event_task_sched_in(current);
+ +      perf_event_task_sched_in(prev, current);
   #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
         local_irq_enable();
   #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@@ -3889,25 -3877,6 +3889,25 @@@ void account_idle_time(cputime_t cputim
                 cpustat->idle = cputime64_add(cpustat->idle, cputime64);
   }
   
+ +static __always_inline bool steal_account_process_tick(void)
+ +{
+ +#ifdef CONFIG_PARAVIRT
+ +      if (static_branch(&paravirt_steal_enabled)) {
+ +              u64 steal, st = 0;
+ +
+ +              steal = paravirt_steal_clock(smp_processor_id());
+ +              steal -= this_rq()->prev_steal_time;
+ +
+ +              st = steal_ticks(steal);
+ +              this_rq()->prev_steal_time += st * TICK_NSEC;
+ +
+ +              account_steal_time(st);
+ +              return st;
+ +      }
+ +#endif
+ +      return false;
+ +}
+ +
   #ifndef CONFIG_VIRT_CPU_ACCOUNTING
   
   #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@@ -3939,9 -3908,6 +3939,9 @@@ static void irqtime_account_process_tic
         cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
   
+ +      if (steal_account_process_tick())
+ +              return;
+ +
         if (irqtime_account_hi_update()) {
                 cpustat->irq = cputime64_add(cpustat->irq, tmp);
         } else if (irqtime_account_si_update()) {
@@@ -3995,9 -3961,6 +3995,9 @@@ void account_process_tick(struct task_s
                 return;
         }
   
+ +      if (steal_account_process_tick())
+ +              return;
+ +
         if (user_tick)
                 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
         else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@@ -4237,6 -4200,7 +4237,7 @@@ static inline void schedule_debug(struc
          */
         if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
                 __schedule_bug(prev);
+       rcu_sleep_check();
   
         profile_hit(SCHED_PROFILING, __builtin_return_address(0));
   
@@@ -4279,9 -4243,9 +4280,9 @@@ pick_next_task(struct rq *rq
   }
   
   /*
- - * schedule() is the main scheduler function.
+ + * __schedule() is the main scheduler function.
    */
- -asmlinkage void __sched schedule(void)
+ +static void __sched __schedule(void)
   {
         struct task_struct *prev, *next;
         unsigned long *switch_count;
@@@ -4322,6 -4286,16 +4323,6 @@@ need_resched
                                 if (to_wakeup)
                                         try_to_wake_up_local(to_wakeup);
                         }
- -
- -                      /*
- -                       * If we are going to sleep and we have plugged IO
- -                       * queued, make sure to submit it to avoid deadlocks.
- -                       */
- -                      if (blk_needs_flush_plug(prev)) {
- -                              raw_spin_unlock(&rq->lock);
- -                              blk_schedule_flush_plug(prev);
- -                              raw_spin_lock(&rq->lock);
- -                      }
                 }
                 switch_count = &prev->nvcsw;
         }
@@@ -4359,34 -4333,17 +4360,34 @@@
         if (need_resched())
                 goto need_resched;
   }
+ +
+ +static inline void sched_submit_work(struct task_struct *tsk)
+ +{
+ +      if (!tsk->state)
+ +              return;
+ +      /*
+ +       * If we are going to sleep and we have plugged IO queued,
+ +       * make sure to submit it to avoid deadlocks.
+ +       */
+ +      if (blk_needs_flush_plug(tsk))
+ +              blk_schedule_flush_plug(tsk);
+ +}
+ +
+ +asmlinkage void schedule(void)
+ +{
+ +      struct task_struct *tsk = current;
+ +
+ +      sched_submit_work(tsk);
+ +      __schedule();
+ +}
   EXPORT_SYMBOL(schedule);
   
   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
   
   static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
   {
- -      bool ret = false;
- -
- -      rcu_read_lock();
         if (lock->owner != owner)
- -              goto fail;
+ +              return false;
   
         /*
          * Ensure we emit the owner->on_cpu, dereference _after_ checking
@@@ -4396,7 -4353,11 +4397,7 @@@
          */
         barrier();
   
- -      ret = owner->on_cpu;
- -fail:
- -      rcu_read_unlock();
- -
- -      return ret;
+ +      return owner->on_cpu;
   }
   
   /*
@@@ -4408,21 -4369,21 +4409,21 @@@ int mutex_spin_on_owner(struct mutex *l
         if (!sched_feat(OWNER_SPIN))
                 return 0;
   
+ +      rcu_read_lock();
         while (owner_running(lock, owner)) {
                 if (need_resched())
- -                      return 0;
+ +                      break;
   
                 arch_mutex_cpu_relax();
         }
+ +      rcu_read_unlock();
   
         /*
- -       * If the owner changed to another task there is likely
- -       * heavy contention, stop spinning.
+ +       * We break out the loop above on need_resched() and when the
+ +       * owner changed, which is a sign for heavy contention. Return
+ +       * success only when lock->owner is NULL.
          */
- -      if (lock->owner)
- -              return 0;
- -
- -      return 1;
+ +      return lock->owner == NULL;
   }
   #endif
   
@@@ -4445,7 -4406,7 +4446,7 @@@ asmlinkage void __sched notrace preempt
   
         do {
                 add_preempt_count_notrace(PREEMPT_ACTIVE);
- -              schedule();
+ +              __schedule();
                 sub_preempt_count_notrace(PREEMPT_ACTIVE);
   
                 /*
@@@ -4473,7 -4434,7 +4474,7 @@@ asmlinkage void __sched preempt_schedul
         do {
                 add_preempt_count(PREEMPT_ACTIVE);
                 local_irq_enable();
- -              schedule();
+ +              __schedule();
                 local_irq_disable();
                 sub_preempt_count(PREEMPT_ACTIVE);
   
@@@ -5598,7 -5559,7 +5599,7 @@@ static inline int should_resched(void
   static void __cond_resched(void)
   {
         add_preempt_count(PREEMPT_ACTIVE);
- -      schedule();
+ +      __schedule();
         sub_preempt_count(PREEMPT_ACTIVE);
   }
   
@@@ -5978,15 -5939,6 +5979,6 @@@ void __cpuinit init_idle(struct task_st
         ftrace_graph_init_idle_task(idle, cpu);
   }
   
- /*
-  * In a system that switches off the HZ timer nohz_cpu_mask
-  * indicates which cpus entered this state. This is used
-  * in the rcu update to wait only for active cpus. For system
-  * which do not switch off the HZ timer nohz_cpu_mask should
-  * always be CPU_BITS_NONE.
-  */
- cpumask_var_t nohz_cpu_mask;
- 
   /*
    * Increase the granularity value when there are more CPUs,
    * because with more CPUs the 'effective latency' as visible
@@@ -7453,7 -7405,6 +7445,7 @@@ static void __sdt_free(const struct cpu
                         struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
                         if (sd && (sd->flags & SD_OVERLAP))
                                 free_sched_groups(sd->groups, 0);
+ +                      kfree(*per_cpu_ptr(sdd->sd, j));
                         kfree(*per_cpu_ptr(sdd->sg, j));
                         kfree(*per_cpu_ptr(sdd->sgp, j));
                 }
@@@ -7939,10 -7890,17 +7931,10 @@@ int in_sched_functions(unsigned long ad
                 && addr < (unsigned long)__sched_text_end);
   }
   
- -static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
+ +static void init_cfs_rq(struct cfs_rq *cfs_rq)
   {
         cfs_rq->tasks_timeline = RB_ROOT;
         INIT_LIST_HEAD(&cfs_rq->tasks);
- -#ifdef CONFIG_FAIR_GROUP_SCHED
- -      cfs_rq->rq = rq;
- -      /* allow initial update_cfs_load() to truncate */
- -#ifdef CONFIG_SMP
- -      cfs_rq->load_stamp = 1;
- -#endif
- -#endif
         cfs_rq->min_vruntime = (u64)(-(1LL << 20));
   #ifndef CONFIG_64BIT
         cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
@@@ -7962,18 -7920,27 +7954,18 @@@ static void init_rt_rq(struct rt_rq *rt
         /* delimiter for bitsearch: */
         __set_bit(MAX_RT_PRIO, array->bitmap);
   
- -#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+ +#if defined CONFIG_SMP
         rt_rq->highest_prio.curr = MAX_RT_PRIO;
- -#ifdef CONFIG_SMP
         rt_rq->highest_prio.next = MAX_RT_PRIO;
- -#endif
- -#endif
- -#ifdef CONFIG_SMP
         rt_rq->rt_nr_migratory = 0;
         rt_rq->overloaded = 0;
- -      plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
+ +      plist_head_init(&rt_rq->pushable_tasks);
   #endif
   
         rt_rq->rt_time = 0;
         rt_rq->rt_throttled = 0;
         rt_rq->rt_runtime = 0;
         raw_spin_lock_init(&rt_rq->rt_runtime_lock);
- -
- -#ifdef CONFIG_RT_GROUP_SCHED
- -      rt_rq->rt_nr_boosted = 0;
- -      rt_rq->rq = rq;
- -#endif
   }
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
@@@ -7982,17 -7949,11 +7974,17 @@@ static void init_tg_cfs_entry(struct ta
                                 struct sched_entity *parent)
   {
         struct rq *rq = cpu_rq(cpu);
- -      tg->cfs_rq[cpu] = cfs_rq;
- -      init_cfs_rq(cfs_rq, rq);
+ +
         cfs_rq->tg = tg;
+ +      cfs_rq->rq = rq;
+ +#ifdef CONFIG_SMP
+ +      /* allow initial update_cfs_load() to truncate */
+ +      cfs_rq->load_stamp = 1;
+ +#endif
   
+ +      tg->cfs_rq[cpu] = cfs_rq;
         tg->se[cpu] = se;
+ +
         /* se could be NULL for root_task_group */
         if (!se)
                 return;
@@@ -8015,14 -7976,12 +8007,14 @@@ static void init_tg_rt_entry(struct tas
   {
         struct rq *rq = cpu_rq(cpu);
   
- -      tg->rt_rq[cpu] = rt_rq;
- -      init_rt_rq(rt_rq, rq);
+ +      rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ +      rt_rq->rt_nr_boosted = 0;
+ +      rt_rq->rq = rq;
         rt_rq->tg = tg;
- -      rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
   
+ +      tg->rt_rq[cpu] = rt_rq;
         tg->rt_se[cpu] = rt_se;
+ +
         if (!rt_se)
                 return;
   
@@@ -8104,7 -8063,7 +8096,7 @@@ void __init sched_init(void
                 rq->nr_running = 0;
                 rq->calc_load_active = 0;
                 rq->calc_load_update = jiffies + LOAD_FREQ;
- -              init_cfs_rq(&rq->cfs, rq);
+ +              init_cfs_rq(&rq->cfs);
                 init_rt_rq(&rq->rt, rq);
   #ifdef CONFIG_FAIR_GROUP_SCHED
                 root_task_group.shares = root_task_group_load;
@@@ -8175,7 -8134,7 +8167,7 @@@
   #endif
   
   #ifdef CONFIG_RT_MUTEXES
- -      plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
+ +      plist_head_init(&init_task.pi_waiters);
   #endif
   
         /*
@@@ -8199,8 -8158,6 +8191,6 @@@
          */
         current->sched_class = &fair_sched_class;
   
-       /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
-       zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
   #ifdef CONFIG_SMP
         zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
   #ifdef CONFIG_NO_HZ
@@@ -8218,7 -8175,7 +8208,7 @@@
         scheduler_running = 1;
   }
   
- -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+ +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
   static inline int preempt_count_equals(int preempt_offset)
   {
         int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
@@@ -8228,8 -8185,10 +8218,9 @@@
   
   void __might_sleep(const char *file, int line, int preempt_offset)
   {
- -#ifdef in_atomic
         static unsigned long prev_jiffy;        /* ratelimiting */
   
+       rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
         if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
             system_state != SYSTEM_RUNNING || oops_in_progress)
                 return;
@@@ -8249,6 -8208,7 +8240,6 @@@
         if (irqs_disabled())
                 print_irqtrace_events(current);
         dump_stack();
- -#endif
   }
   EXPORT_SYMBOL(__might_sleep);
   #endif
@@@ -8407,7 -8367,6 +8398,7 @@@ int alloc_fair_sched_group(struct task_
                 if (!se)
                         goto err_free_rq;
   
+ +              init_cfs_rq(cfs_rq);
                 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
         }
   
@@@ -8435,7 -8394,7 +8426,7 @@@ static inline void unregister_fair_sche
         list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
         raw_spin_unlock_irqrestore(&rq->lock, flags);
   }
- -#else /* !CONFG_FAIR_GROUP_SCHED */
+ +#else /* !CONFIG_FAIR_GROUP_SCHED */
   static inline void free_fair_sched_group(struct task_group *tg)
   {
   }
@@@ -8456,8 -8415,7 +8447,8 @@@ static void free_rt_sched_group(struct 
   {
         int i;
   
- -      destroy_rt_bandwidth(&tg->rt_bandwidth);
+ +      if (tg->rt_se)
+ +              destroy_rt_bandwidth(&tg->rt_bandwidth);
   
         for_each_possible_cpu(i) {
                 if (tg->rt_rq)
@@@ -8498,8 -8456,6 +8489,8 @@@ int alloc_rt_sched_group(struct task_gr
                 if (!rt_se)
                         goto err_free_rq;
   
+ +              init_rt_rq(rt_rq, cpu_rq(i));
+ +              rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
                 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
         }
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Tue, 13 Sep 2011 04:55:35 +0000 (14:55 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Tue, 13 Sep 2011 04:55:35 +0000 (14:55 +1000)
		1	2
arch/powerpc/platforms/pseries/lpar.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/rcupdate.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/lockdep.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcupdate.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcutorture.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcutree_trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rtmutex.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history