Merge tag 'random_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 15 Jul 2017 19:44:02 +0000 (12:44 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 15 Jul 2017 19:44:02 +0000 (12:44 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jul 2017 19:44:02 +0000 (12:44 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jul 2017 19:44:02 +0000 (12:44 -0700)
diff --combined drivers/char/random.c

index 23cab7a8c1c19141726d419f518946c41d546df1,05d255e1c112fef87c4c8c6da5cb810fea1999cf..afa3ce7d3e729a1ad1485d129aa1d26646292f74
--- 1/drivers/char/random.c
--- 2/drivers/char/random.c
+++ b/drivers/char/random.c
@@@ -288,7 -288,6 +288,6 @@@
   #define SEC_XFER_SIZE         512
   #define EXTRACT_SIZE          10
   
- #define DEBUG_RANDOM_BOOT 0
   
   #define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
   
@@@ -437,6 -436,7 +436,7 @@@ static void _extract_crng(struct crng_s
   static void _crng_backtrack_protect(struct crng_state *crng,
                                     __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
   static void process_random_ready_list(void);
+ static void _get_random_bytes(void *buf, int nbytes);
   
   /**********************************************************************
    *
@@@ -777,7 -777,7 +777,7 @@@ static void crng_initialize(struct crng
                 _extract_entropy(&input_pool, &crng->state[4],
                                  sizeof(__u32) * 12, 0);
         else
-               get_random_bytes(&crng->state[4], sizeof(__u32) * 12);
+               _get_random_bytes(&crng->state[4], sizeof(__u32) * 12);
         for (i = 4; i < 16; i++) {
                 if (!arch_get_random_seed_long(&rv) &&
                     !arch_get_random_long(&rv))
@@@ -851,11 -851,6 +851,6 @@@ static void crng_reseed(struct crng_sta
         }
   }
   
- static inline void crng_wait_ready(void)
- {
-       wait_event_interruptible(crng_init_wait, crng_ready());
- }
- 
   static void _extract_crng(struct crng_state *crng,
                           __u8 out[CHACHA20_BLOCK_SIZE])
   {
@@@ -987,11 -982,6 +982,11 @@@ void add_device_randomness(const void *
         unsigned long time = random_get_entropy() ^ jiffies;
         unsigned long flags;
   
+ +      if (!crng_ready()) {
+ +              crng_fast_load(buf, size);
+ +              return;
+ +      }
+ +
         trace_add_device_randomness(size, _RET_IP_);
         spin_lock_irqsave(&input_pool.lock, flags);
         _mix_pool_bytes(&input_pool, buf, size);
@@@ -1477,22 -1467,44 +1472,44 @@@ static ssize_t extract_entropy_user(str
         return ret;
   }
   
+ #define warn_unseeded_randomness(previous) \
+       _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous))
+ 
+ static void _warn_unseeded_randomness(const char *func_name, void *caller,
+                                     void **previous)
+ {
+ #ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+       const bool print_once = false;
+ #else
+       static bool print_once __read_mostly;
+ #endif
+ 
+       if (print_once ||
+           crng_ready() ||
+           (previous && (caller == READ_ONCE(*previous))))
+               return;
+       WRITE_ONCE(*previous, caller);
+ #ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+       print_once = true;
+ #endif
+       pr_notice("random: %s called from %pF with crng_init=%d\n",
+                 func_name, caller, crng_init);
+ }
+ 
   /*
    * This function is the exported kernel interface.  It returns some
    * number of good random numbers, suitable for key generation, seeding
    * TCP sequence numbers, etc.  It does not rely on the hardware random
    * number generator.  For random bytes direct from the hardware RNG
-  * (when available), use get_random_bytes_arch().
+  * (when available), use get_random_bytes_arch(). In order to ensure
+  * that the randomness provided by this function is okay, the function
+  * wait_for_random_bytes() should be called and return 0 at least once
+  * at any point prior.
    */
- void get_random_bytes(void *buf, int nbytes)
+ static void _get_random_bytes(void *buf, int nbytes)
   {
         __u8 tmp[CHACHA20_BLOCK_SIZE];
   
- #if DEBUG_RANDOM_BOOT > 0
-       if (!crng_ready())
-               printk(KERN_NOTICE "random: %pF get_random_bytes called "
-                      "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
- #endif
         trace_get_random_bytes(nbytes, _RET_IP_);
   
         while (nbytes >= CHACHA20_BLOCK_SIZE) {
@@@ -1509,8 -1521,34 +1526,34 @@@
                 crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE);
         memzero_explicit(tmp, sizeof(tmp));
   }
+ 
+ void get_random_bytes(void *buf, int nbytes)
+ {
+       static void *previous;
+ 
+       warn_unseeded_randomness(&previous);
+       _get_random_bytes(buf, nbytes);
+ }
   EXPORT_SYMBOL(get_random_bytes);
   
+ /*
+  * Wait for the urandom pool to be seeded and thus guaranteed to supply
+  * cryptographically secure random numbers. This applies to: the /dev/urandom
+  * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+  * family of functions. Using any of these functions without first calling
+  * this function forfeits the guarantee of security.
+  *
+  * Returns: 0 if the urandom pool has been seeded.
+  *          -ERESTARTSYS if the function was interrupted by a signal.
+  */
+ int wait_for_random_bytes(void)
+ {
+       if (likely(crng_ready()))
+               return 0;
+       return wait_event_interruptible(crng_init_wait, crng_ready());
+ }
+ EXPORT_SYMBOL(wait_for_random_bytes);
+ 
   /*
    * Add a callback function that will be invoked when the nonblocking
    * pool is initialised.
@@@ -1865,6 -1903,8 +1908,8 @@@ const struct file_operations urandom_fo
   SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
                 unsigned int, flags)
   {
+       int ret;
+ 
         if (flags & ~(GRND_NONBLOCK|GRND_RANDOM))
                 return -EINVAL;
   
@@@ -1877,9 -1917,9 +1922,9 @@@
         if (!crng_ready()) {
                 if (flags & GRND_NONBLOCK)
                         return -EAGAIN;
-               crng_wait_ready();
-               if (signal_pending(current))
-                       return -ERESTARTSYS;
+               ret = wait_for_random_bytes();
+               if (unlikely(ret))
+                       return ret;
         }
         return urandom_read(NULL, buf, count, NULL);
   }
@@@ -2040,15 -2080,19 +2085,19 @@@ static rwlock_t batched_entropy_reset_l
   /*
    * Get a random word for internal kernel use only. The quality of the random
    * number is either as good as RDRAND or as good as /dev/urandom, with the
-  * goal of being quite fast and not depleting entropy.
+  * goal of being quite fast and not depleting entropy. In order to ensure
+  * that the randomness provided by this function is okay, the function
+  * wait_for_random_bytes() should be called and return 0 at least once
+  * at any point prior.
    */
   static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
   u64 get_random_u64(void)
   {
         u64 ret;
-       bool use_lock = READ_ONCE(crng_init) < 2;
+       bool use_lock;
         unsigned long flags = 0;
         struct batched_entropy *batch;
+       static void *previous;
   
   #if BITS_PER_LONG == 64
         if (arch_get_random_long((unsigned long *)&ret))
@@@ -2059,6 -2103,9 +2108,9 @@@
             return ret;
   #endif
   
+       warn_unseeded_randomness(&previous);
+ 
+       use_lock = READ_ONCE(crng_init) < 2;
         batch = &get_cpu_var(batched_entropy_u64);
         if (use_lock)
                 read_lock_irqsave(&batched_entropy_reset_lock, flags);
@@@ -2078,13 -2125,17 +2130,17 @@@ static DEFINE_PER_CPU(struct batched_en
   u32 get_random_u32(void)
   {
         u32 ret;
-       bool use_lock = READ_ONCE(crng_init) < 2;
+       bool use_lock;
         unsigned long flags = 0;
         struct batched_entropy *batch;
+       static void *previous;
   
         if (arch_get_random_int(&ret))
                 return ret;
   
+       warn_unseeded_randomness(&previous);
+ 
+       use_lock = READ_ONCE(crng_init) < 2;
         batch = &get_cpu_var(batched_entropy_u32);
         if (use_lock)
                 read_lock_irqsave(&batched_entropy_reset_lock, flags);
diff --combined drivers/target/iscsi/iscsi_target_login.c

index 92b96b51d5068e77c45d85a5a4d16efc9ffa5a93,5ef028c117385e142d41106f9078db9327de7370..e9bdc8b86e7d1d71d77cf4388370af6fcf3fded7
--- 1/drivers/target/iscsi/iscsi_target_login.c
--- 2/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@@ -245,22 -245,26 +245,26 @@@ int iscsi_check_for_session_reinstateme
         return 0;
   }
   
- static void iscsi_login_set_conn_values(
+ static int iscsi_login_set_conn_values(
         struct iscsi_session *sess,
         struct iscsi_conn *conn,
         __be16 cid)
   {
+       int ret;
         conn->sess              = sess;
         conn->cid               = be16_to_cpu(cid);
         /*
          * Generate a random Status sequence number (statsn) for the new
          * iSCSI connection.
          */
-       get_random_bytes(&conn->stat_sn, sizeof(u32));
+       ret = get_random_bytes_wait(&conn->stat_sn, sizeof(u32));
+       if (unlikely(ret))
+               return ret;
   
         mutex_lock(&auth_id_lock);
         conn->auth_id           = iscsit_global->auth_id++;
         mutex_unlock(&auth_id_lock);
+       return 0;
   }
   
   __printf(2, 3) int iscsi_change_param_sprintf(
@@@ -306,7 -310,11 +310,11 @@@ static int iscsi_login_zero_tsih_s1
                 return -ENOMEM;
         }
   
-       iscsi_login_set_conn_values(sess, conn, pdu->cid);
+       ret = iscsi_login_set_conn_values(sess, conn, pdu->cid);
+       if (unlikely(ret)) {
+               kfree(sess);
+               return ret;
+       }
         sess->init_task_tag     = pdu->itt;
         memcpy(&sess->isid, pdu->isid, 6);
         sess->exp_cmd_sn        = be32_to_cpu(pdu->cmdsn);
@@@ -497,8 -505,7 +505,7 @@@ static int iscsi_login_non_zero_tsih_s1
   {
         struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
   
-       iscsi_login_set_conn_values(NULL, conn, pdu->cid);
-       return 0;
+       return iscsi_login_set_conn_values(NULL, conn, pdu->cid);
   }
   
   /*
@@@ -554,9 -561,8 +561,8 @@@ static int iscsi_login_non_zero_tsih_s2
                 atomic_set(&sess->session_continuation, 1);
         spin_unlock_bh(&sess->conn_lock);
   
-       iscsi_login_set_conn_values(sess, conn, pdu->cid);
- 
-       if (iscsi_copy_param_list(&conn->param_list,
+       if (iscsi_login_set_conn_values(sess, conn, pdu->cid) < 0 ||
+           iscsi_copy_param_list(&conn->param_list,
                         conn->tpg->param_list, 0) < 0) {
                 iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
                                 ISCSI_LOGIN_STATUS_NO_RESOURCES);
@@@ -1464,9 -1470,5 +1470,9 @@@ int iscsi_target_login_thread(void *arg
                         break;
         }
   
+ +      while (!kthread_should_stop()) {
+ +              msleep(100);
+ +      }
+ +
         return 0;
   }
diff --combined fs/cifs/cifsfs.c

index 556f480c6936a9ca5cd6d73a0feb674d10797f15,fe0c8dcc7dc7c7fe24d7a5a175515837e8b1c3b4..180b3356ff8612dfc7f3cd702e350bb284ca1a0d
--- 1/fs/cifs/cifsfs.c
--- 2/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@@ -51,7 -51,9 +51,7 @@@
   #include <linux/key-type.h>
   #include "cifs_spnego.h"
   #include "fscache.h"
- -#ifdef CONFIG_CIFS_SMB2
   #include "smb2pdu.h"
- -#endif
   
   int cifsFYI = 0;
   bool traceSMB;
@@@ -275,8 -277,9 +275,8 @@@ cifs_alloc_inode(struct super_block *sb
         cifs_inode->uniqueid = 0;
         cifs_inode->createtime = 0;
         cifs_inode->epoch = 0;
- -#ifdef CONFIG_CIFS_SMB2
         generate_random_uuid(cifs_inode->lease_key);
- -#endif
+ +
         /*
          * Can not set i_flags here - they get immediately overwritten to zero
          * by the VFS.
@@@ -1210,12 -1213,14 +1210,12 @@@ cifs_destroy_inodecache(void
   static int
   cifs_init_request_bufs(void)
   {
- -      size_t max_hdr_size = MAX_CIFS_HDR_SIZE;
- -#ifdef CONFIG_CIFS_SMB2
         /*
          * SMB2 maximum header size is bigger than CIFS one - no problems to
          * allocate some more bytes for CIFS.
          */
- -      max_hdr_size = MAX_SMB2_HDR_SIZE;
- -#endif
+ +      size_t max_hdr_size = MAX_SMB2_HDR_SIZE;
+ +
         if (CIFSMaxBufSize < 8192) {
         /* Buffer size can not be smaller than 2 * PATH_MAX since maximum
         Unicode path name has to fit in any SMB/CIFS path based frames */
@@@ -1354,7 -1359,7 +1354,7 @@@ init_cifs(void
         spin_lock_init(&cifs_tcp_ses_lock);
         spin_lock_init(&GlobalMid_Lock);
   
-       get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+       cifs_lock_secret = get_random_u32();
   
         if (cifs_max_pending < 2) {
                 cifs_max_pending = 2;
@@@ -1471,10 -1476,12 +1471,10 @@@ MODULE_SOFTDEP("pre: hmac")
   MODULE_SOFTDEP("pre: md4");
   MODULE_SOFTDEP("pre: md5");
   MODULE_SOFTDEP("pre: nls");
- -#ifdef CONFIG_CIFS_SMB2
   MODULE_SOFTDEP("pre: aes");
   MODULE_SOFTDEP("pre: cmac");
   MODULE_SOFTDEP("pre: sha256");
   MODULE_SOFTDEP("pre: aead2");
   MODULE_SOFTDEP("pre: ccm");
- -#endif /* CONFIG_CIFS_SMB2 */
   module_init(init_cifs)
   module_exit(exit_cifs)
diff --combined include/linux/random.h

index 1fa0dc880bd7878e70200264e2486a579dfe1332,4aecc339558d5ea1a30e73b8e9889b0cee34f961..eafea6a09361f8787bad4aa22d906dc17177d03f
--- 1/include/linux/random.h
--- 2/include/linux/random.h
+++ b/include/linux/random.h
@@@ -34,6 -34,7 +34,7 @@@ extern void add_input_randomness(unsign
   extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
   
   extern void get_random_bytes(void *buf, int nbytes);
+ extern int wait_for_random_bytes(void);
   extern int add_random_ready_callback(struct random_ready_callback *rdy);
   extern void del_random_ready_callback(struct random_ready_callback *rdy);
   extern void get_random_bytes_arch(void *buf, int nbytes);
@@@ -57,27 -58,31 +58,52 @@@ static inline unsigned long get_random_
   #endif
   }
   
+ +/*
+ + * On 64-bit architectures, protect against non-terminated C string overflows
+ + * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ + */
+ +#ifdef CONFIG_64BIT
+ +# ifdef __LITTLE_ENDIAN
+ +#  define CANARY_MASK 0xffffffffffffff00UL
+ +# else /* big endian, 64 bits: */
+ +#  define CANARY_MASK 0x00ffffffffffffffUL
+ +# endif
+ +#else /* 32 bits: */
+ +# define CANARY_MASK 0xffffffffUL
+ +#endif
+ +
+ +static inline unsigned long get_random_canary(void)
+ +{
+ +      unsigned long val = get_random_long();
+ +
+ +      return val & CANARY_MASK;
+ +}
+ +
+ /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
+  * Returns the result of the call to wait_for_random_bytes. */
+ static inline int get_random_bytes_wait(void *buf, int nbytes)
+ {
+       int ret = wait_for_random_bytes();
+       if (unlikely(ret))
+               return ret;
+       get_random_bytes(buf, nbytes);
+       return 0;
+ }
+ 
+ #define declare_get_random_var_wait(var) \
+       static inline int get_random_ ## var ## _wait(var *out) { \
+               int ret = wait_for_random_bytes(); \
+               if (unlikely(ret)) \
+                       return ret; \
+               *out = get_random_ ## var(); \
+               return 0; \
+       }
+ declare_get_random_var_wait(u32)
+ declare_get_random_var_wait(u64)
+ declare_get_random_var_wait(int)
+ declare_get_random_var_wait(long)
+ #undef declare_get_random_var
+ 
   unsigned long randomize_page(unsigned long start, unsigned long range);
   
   u32 prandom_u32(void);
diff --combined lib/Kconfig.debug

index 789c6e9e5e011f968f2958da42a8109fdbc5c9c1,9d0a244074b9c7ba88e293492fc9696c19c55af1..98fe715522e8d1834083e608d32a78ed0600deb9
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -286,7 -286,7 +286,7 @@@ config DEBUG_F
           write to these files.
   
           For detailed documentation on the debugfs API, see
- -        Documentation/DocBook/filesystems.
+ +        Documentation/filesystems/.
   
           If unsure, say N.
   
@@@ -778,45 -778,34 +778,45 @@@ config DEBUG_SHIR
   menu "Debug Lockups and Hangs"
   
   config LOCKUP_DETECTOR
- -      bool "Detect Hard and Soft Lockups"
+ +      bool
+ +
+ +config SOFTLOCKUP_DETECTOR
+ +      bool "Detect Soft Lockups"
         depends on DEBUG_KERNEL && !S390
+ +      select LOCKUP_DETECTOR
         help
           Say Y here to enable the kernel to act as a watchdog to detect
- -        hard and soft lockups.
+ +        soft lockups.
   
           Softlockups are bugs that cause the kernel to loop in kernel
           mode for more than 20 seconds, without giving other tasks a
           chance to run.  The current stack trace is displayed upon
           detection and the system will stay locked up.
   
+ +config HARDLOCKUP_DETECTOR_PERF
+ +      bool
+ +      select SOFTLOCKUP_DETECTOR
+ +
+ +#
+ +# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+ +# lockup detector rather than the perf based detector.
+ +#
+ +config HARDLOCKUP_DETECTOR
+ +      bool "Detect Hard Lockups"
+ +      depends on DEBUG_KERNEL && !S390
+ +      depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+ +      select LOCKUP_DETECTOR
+ +      select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+ +      select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+ +      help
+ +        Say Y here to enable the kernel to act as a watchdog to detect
+ +        hard lockups.
+ +
           Hardlockups are bugs that cause the CPU to loop in kernel mode
           for more than 10 seconds, without letting other interrupts have a
           chance to run.  The current stack trace is displayed upon detection
           and the system will stay locked up.
   
- -        The overhead should be minimal.  A periodic hrtimer runs to
- -        generate interrupts and kick the watchdog task every 4 seconds.
- -        An NMI is generated every 10 seconds or so to check for hardlockups.
- -
- -        The frequency of hrtimer and NMI events and the soft and hard lockup
- -        thresholds can be controlled through the sysctl watchdog_thresh.
- -
- -config HARDLOCKUP_DETECTOR
- -      def_bool y
- -      depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
- -      depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
- -
   config BOOTPARAM_HARDLOCKUP_PANIC
         bool "Panic (Reboot) On Hard Lockups"
         depends on HARDLOCKUP_DETECTOR
@@@ -837,7 -826,7 +837,7 @@@ config BOOTPARAM_HARDLOCKUP_PANIC_VALU
   
   config BOOTPARAM_SOFTLOCKUP_PANIC
         bool "Panic (Reboot) On Soft Lockups"
- -      depends on LOCKUP_DETECTOR
+ +      depends on SOFTLOCKUP_DETECTOR
         help
           Say Y here to enable the kernel to panic on "soft lockups",
           which are bugs that cause the kernel to loop in kernel
@@@ -854,7 -843,7 +854,7 @@@
   
   config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
         int
- -      depends on LOCKUP_DETECTOR
+ +      depends on SOFTLOCKUP_DETECTOR
         range 0 1
         default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
         default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@@ -862,7 -851,7 +862,7 @@@
   config DETECT_HUNG_TASK
         bool "Detect Hung Tasks"
         depends on DEBUG_KERNEL
- -      default LOCKUP_DETECTOR
+ +      default SOFTLOCKUP_DETECTOR
         help
           Say Y here to enable the kernel to detect "hung tasks",
           which are bugs that cause the task to be stuck in
@@@ -1063,7 -1052,6 +1063,7 @@@ config DEBUG_LOCK_ALLO
         depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
         select DEBUG_SPINLOCK
         select DEBUG_MUTEXES
+ +      select DEBUG_RT_MUTEXES if RT_MUTEXES
         select LOCKDEP
         help
          This feature will check whether any held lock (spinlock, rwlock,
@@@ -1079,7 -1067,6 +1079,7 @@@ config PROVE_LOCKIN
         select LOCKDEP
         select DEBUG_SPINLOCK
         select DEBUG_MUTEXES
+ +      select DEBUG_RT_MUTEXES if RT_MUTEXES
         select DEBUG_LOCK_ALLOC
         select TRACE_IRQFLAGS
         default n
@@@ -1134,7 -1121,6 +1134,7 @@@ config LOCK_STA
         select LOCKDEP
         select DEBUG_SPINLOCK
         select DEBUG_MUTEXES
+ +      select DEBUG_RT_MUTEXES if RT_MUTEXES
         select DEBUG_LOCK_ALLOC
         default n
         help
@@@ -1223,6 -1209,34 +1223,34 @@@ config STACKTRAC
           It is also used by various kernel debugging features that require
           stack trace generation.
   
+ config WARN_ALL_UNSEEDED_RANDOM
+       bool "Warn for all uses of unseeded randomness"
+       default n
+       help
+         Some parts of the kernel contain bugs relating to their use of
+         cryptographically secure random numbers before it's actually possible
+         to generate those numbers securely. This setting ensures that these
+         flaws don't go unnoticed, by enabling a message, should this ever
+         occur. This will allow people with obscure setups to know when things
+         are going wrong, so that they might contact developers about fixing
+         it.
+ 
+         Unfortunately, on some models of some architectures getting
+         a fully seeded CRNG is extremely difficult, and so this can
+         result in dmesg getting spammed for a surprisingly long
+         time.  This is really bad from a security perspective, and
+         so architecture maintainers really need to do what they can
+         to get the CRNG seeded sooner after the system is booted.
+         However, since users can not do anything actionble to
+         address this, by default the kernel will issue only a single
+         warning for the first use of unseeded randomness.
+ 
+         Say Y here if you want to receive warnings for all uses of
+         unseeded randomness.  This will be of use primarily for
+         those developers interersted in improving the security of
+         Linux kernels running on their architecture (or
+         subarchitecture).
+ 
   config DEBUG_KOBJECT
         bool "kobject debugging"
         depends on DEBUG_KERNEL
@@@ -1315,7 -1329,189 +1343,7 @@@ config DEBUG_CREDENTIAL
   
           If unsure, say N.
   
- -menu "RCU Debugging"
- -
- -config PROVE_RCU
- -      def_bool PROVE_LOCKING
- -
- -config PROVE_RCU_REPEATEDLY
- -      bool "RCU debugging: don't disable PROVE_RCU on first splat"
- -      depends on PROVE_RCU
- -      default n
- -      help
- -       By itself, PROVE_RCU will disable checking upon issuing the
- -       first warning (or "splat").  This feature prevents such
- -       disabling, allowing multiple RCU-lockdep warnings to be printed
- -       on a single reboot.
- -
- -       Say Y to allow multiple RCU-lockdep warnings per boot.
- -
- -       Say N if you are unsure.
- -
- -config SPARSE_RCU_POINTER
- -      bool "RCU debugging: sparse-based checks for pointer usage"
- -      default n
- -      help
- -       This feature enables the __rcu sparse annotation for
- -       RCU-protected pointers.  This annotation will cause sparse
- -       to flag any non-RCU used of annotated pointers.  This can be
- -       helpful when debugging RCU usage.  Please note that this feature
- -       is not intended to enforce code cleanliness; it is instead merely
- -       a debugging aid.
- -
- -       Say Y to make sparse flag questionable use of RCU-protected pointers
- -
- -       Say N if you are unsure.
- -
- -config TORTURE_TEST
- -      tristate
- -      default n
- -
- -config RCU_PERF_TEST
- -      tristate "performance tests for RCU"
- -      depends on DEBUG_KERNEL
- -      select TORTURE_TEST
- -      select SRCU
- -      select TASKS_RCU
- -      default n
- -      help
- -        This option provides a kernel module that runs performance
- -        tests on the RCU infrastructure.  The kernel module may be built
- -        after the fact on the running kernel to be tested, if desired.
- -
- -        Say Y here if you want RCU performance tests to be built into
- -        the kernel.
- -        Say M if you want the RCU performance tests to build as a module.
- -        Say N if you are unsure.
- -
- -config RCU_TORTURE_TEST
- -      tristate "torture tests for RCU"
- -      depends on DEBUG_KERNEL
- -      select TORTURE_TEST
- -      select SRCU
- -      select TASKS_RCU
- -      default n
- -      help
- -        This option provides a kernel module that runs torture tests
- -        on the RCU infrastructure.  The kernel module may be built
- -        after the fact on the running kernel to be tested, if desired.
- -
- -        Say Y here if you want RCU torture tests to be built into
- -        the kernel.
- -        Say M if you want the RCU torture tests to build as a module.
- -        Say N if you are unsure.
- -
- -config RCU_TORTURE_TEST_SLOW_PREINIT
- -      bool "Slow down RCU grace-period pre-initialization to expose races"
- -      depends on RCU_TORTURE_TEST
- -      help
- -        This option delays grace-period pre-initialization (the
- -        propagation of CPU-hotplug changes up the rcu_node combining
- -        tree) for a few jiffies between initializing each pair of
- -        consecutive rcu_node structures.  This helps to expose races
- -        involving grace-period pre-initialization, in other words, it
- -        makes your kernel less stable.  It can also greatly increase
- -        grace-period latency, especially on systems with large numbers
- -        of CPUs.  This is useful when torture-testing RCU, but in
- -        almost no other circumstance.
- -
- -        Say Y here if you want your system to crash and hang more often.
- -        Say N if you want a sane system.
- -
- -config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
- -      int "How much to slow down RCU grace-period pre-initialization"
- -      range 0 5
- -      default 3
- -      depends on RCU_TORTURE_TEST_SLOW_PREINIT
- -      help
- -        This option specifies the number of jiffies to wait between
- -        each rcu_node structure pre-initialization step.
- -
- -config RCU_TORTURE_TEST_SLOW_INIT
- -      bool "Slow down RCU grace-period initialization to expose races"
- -      depends on RCU_TORTURE_TEST
- -      help
- -        This option delays grace-period initialization for a few
- -        jiffies between initializing each pair of consecutive
- -        rcu_node structures.  This helps to expose races involving
- -        grace-period initialization, in other words, it makes your
- -        kernel less stable.  It can also greatly increase grace-period
- -        latency, especially on systems with large numbers of CPUs.
- -        This is useful when torture-testing RCU, but in almost no
- -        other circumstance.
- -
- -        Say Y here if you want your system to crash and hang more often.
- -        Say N if you want a sane system.
- -
- -config RCU_TORTURE_TEST_SLOW_INIT_DELAY
- -      int "How much to slow down RCU grace-period initialization"
- -      range 0 5
- -      default 3
- -      depends on RCU_TORTURE_TEST_SLOW_INIT
- -      help
- -        This option specifies the number of jiffies to wait between
- -        each rcu_node structure initialization.
- -
- -config RCU_TORTURE_TEST_SLOW_CLEANUP
- -      bool "Slow down RCU grace-period cleanup to expose races"
- -      depends on RCU_TORTURE_TEST
- -      help
- -        This option delays grace-period cleanup for a few jiffies
- -        between cleaning up each pair of consecutive rcu_node
- -        structures.  This helps to expose races involving grace-period
- -        cleanup, in other words, it makes your kernel less stable.
- -        It can also greatly increase grace-period latency, especially
- -        on systems with large numbers of CPUs.  This is useful when
- -        torture-testing RCU, but in almost no other circumstance.
- -
- -        Say Y here if you want your system to crash and hang more often.
- -        Say N if you want a sane system.
- -
- -config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
- -      int "How much to slow down RCU grace-period cleanup"
- -      range 0 5
- -      default 3
- -      depends on RCU_TORTURE_TEST_SLOW_CLEANUP
- -      help
- -        This option specifies the number of jiffies to wait between
- -        each rcu_node structure cleanup operation.
- -
- -config RCU_CPU_STALL_TIMEOUT
- -      int "RCU CPU stall timeout in seconds"
- -      depends on RCU_STALL_COMMON
- -      range 3 300
- -      default 21
- -      help
- -        If a given RCU grace period extends more than the specified
- -        number of seconds, a CPU stall warning is printed.  If the
- -        RCU grace period persists, additional CPU stall warnings are
- -        printed at more widely spaced intervals.
- -
- -config RCU_TRACE
- -      bool "Enable tracing for RCU"
- -      depends on DEBUG_KERNEL
- -      default y if TREE_RCU
- -      select TRACE_CLOCK
- -      help
- -        This option provides tracing in RCU which presents stats
- -        in debugfs for debugging RCU implementation.  It also enables
- -        additional tracepoints for ftrace-style event tracing.
- -
- -        Say Y here if you want to enable RCU tracing
- -        Say N if you are unsure.
- -
- -config RCU_EQS_DEBUG
- -      bool "Provide debugging asserts for adding NO_HZ support to an arch"
- -      depends on DEBUG_KERNEL
- -      help
- -        This option provides consistency checks in RCU's handling of
- -        NO_HZ.  These checks have proven quite helpful in detecting
- -        bugs in arch-specific NO_HZ code.
- -
- -        Say N here if you need ultimate kernel/user switch latencies
- -        Say Y if you are unsure
- -
- -endmenu # "RCU Debugging"
+ +source "kernel/rcu/Kconfig.debug"
   
   config DEBUG_WQ_FORCE_RR_CPU
         bool "Force round-robin CPU selection for unbound work items"
@@@ -1605,7 -1801,7 +1633,7 @@@ config RBTREE_TES
   
   config INTERVAL_TREE_TEST
         tristate "Interval tree test"
- -      depends on m && DEBUG_KERNEL
+ +      depends on DEBUG_KERNEL
         select INTERVAL_TREE
         help
           A benchmark measuring the performance of the interval tree library
@@@ -1796,17 -1992,6 +1824,17 @@@ config TEST_FIRMWAR
   
           If unsure, say N.
   
+ +config TEST_SYSCTL
+ +      tristate "sysctl test driver"
+ +      default n
+ +      depends on PROC_SYSCTL
+ +      help
+ +        This builds the "test_sysctl" module. This driver enables to test the
+ +        proc sysctl interfaces available to drivers safely without affecting
+ +        production knobs which might alter system functionality.
+ +
+ +        If unsure, say N.
+ +
   config TEST_UDELAY
         tristate "udelay test driver"
         default n
@@@ -1847,33 -2032,6 +1875,33 @@@ config BUG_ON_DATA_CORRUPTIO
   
           If unsure, say N.
   
+ +config TEST_KMOD
+ +      tristate "kmod stress tester"
+ +      default n
+ +      depends on m
+ +      depends on BLOCK && (64BIT || LBDAF)      # for XFS, BTRFS
+ +      depends on NETDEVICES && NET_CORE && INET # for TUN
+ +      select TEST_LKM
+ +      select XFS_FS
+ +      select TUN
+ +      select BTRFS_FS
+ +      help
+ +        Test the kernel's module loading mechanism: kmod. kmod implements
+ +        support to load modules using the Linux kernel's usermode helper.
+ +        This test provides a series of tests against kmod.
+ +
+ +        Although technically you can either build test_kmod as a module or
+ +        into the kernel we disallow building it into the kernel since
+ +        it stress tests request_module() and this will very likely cause
+ +        some issues by taking over precious threads available from other
+ +        module load requests, ultimately this could be fatal.
+ +
+ +        To run tests run:
+ +
+ +        tools/testing/selftests/kmod/kmod.sh --help
+ +
+ +        If unsure, say N.
+ +
   source "samples/Kconfig"
   
   source "lib/Kconfig.kgdb"
diff --combined lib/rhashtable.c

index 42466c167257cc08183357116e0c26c20f7dec3e,a1eb7c947f4670deb67135f8dcdb9353f5991212..707ca5d677c676a599442604d918c215d7709138
--- 1/lib/rhashtable.c
--- 2/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@@ -211,10 -211,11 +211,10 @@@ static struct bucket_table *bucket_tabl
         int i;
   
         size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- -      if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
- -          gfp != GFP_KERNEL)
+ +      if (gfp != GFP_KERNEL)
                 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
- -      if (tbl == NULL && gfp == GFP_KERNEL)
- -              tbl = vzalloc(size);
+ +      else
+ +              tbl = kvzalloc(size, gfp);
   
         size = nbuckets;
   
@@@ -234,7 -235,7 +234,7 @@@
   
         INIT_LIST_HEAD(&tbl->walkers);
   
-       get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+       tbl->hash_rnd = get_random_u32();
   
         for (i = 0; i < nbuckets; i++)
                 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
diff --combined net/ceph/ceph_common.c

index 3d265c5cb6d0bdd909884ecd77357d5632fef33e,26ab58665f772177c81baf24a09107de08696a1b..5c036d2f401e25b42ece6d7cc6c4fc30c00dea43
--- 1/net/ceph/ceph_common.c
--- 2/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@@ -56,6 -56,19 +56,6 @@@ static const struct kernel_param_ops pa
   module_param_cb(supported_features, &param_ops_supported_features, NULL,
                 S_IRUGO);
   
- -/*
- - * find filename portion of a path (/foo/bar/baz -> baz)
- - */
- -const char *ceph_file_part(const char *s, int len)
- -{
- -      const char *e = s + len;
- -
- -      while (e != s && *(e-1) != '/')
- -              e--;
- -      return e;
- -}
- -EXPORT_SYMBOL(ceph_file_part);
- -
   const char *ceph_msg_type_name(int type)
   {
         switch (type) {
@@@ -85,7 -98,6 +85,7 @@@
         case CEPH_MSG_OSD_OP: return "osd_op";
         case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
         case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
+ +      case CEPH_MSG_OSD_BACKOFF: return "osd_backoff";
         default: return "unknown";
         }
   }
@@@ -599,7 -611,11 +599,11 @@@ struct ceph_client *ceph_create_client(
   {
         struct ceph_client *client;
         struct ceph_entity_addr *myaddr = NULL;
-       int err = -ENOMEM;
+       int err;
+ 
+       err = wait_for_random_bytes();
+       if (err < 0)
+               return ERR_PTR(err);
   
         client = kzalloc(sizeof(*client), GFP_KERNEL);
         if (client == NULL)
diff --combined net/core/neighbour.c

index e31fc11a80001503a6c6224f8289378e4246d310,9784133b0cdb0574c3e4f0fc19776aa896caf52b..d0713627deb61623524c4b5cd26454abe92fc6d9
--- 1/net/core/neighbour.c
--- 2/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@@ -118,50 -118,6 +118,50 @@@ unsigned long neigh_rand_reach_time(uns
   EXPORT_SYMBOL(neigh_rand_reach_time);
   
   
+ +static bool neigh_del(struct neighbour *n, __u8 state,
+ +                    struct neighbour __rcu **np, struct neigh_table *tbl)
+ +{
+ +      bool retval = false;
+ +
+ +      write_lock(&n->lock);
+ +      if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+ +              struct neighbour *neigh;
+ +
+ +              neigh = rcu_dereference_protected(n->next,
+ +                                                lockdep_is_held(&tbl->lock));
+ +              rcu_assign_pointer(*np, neigh);
+ +              n->dead = 1;
+ +              retval = true;
+ +      }
+ +      write_unlock(&n->lock);
+ +      if (retval)
+ +              neigh_cleanup_and_release(n);
+ +      return retval;
+ +}
+ +
+ +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
+ +{
+ +      struct neigh_hash_table *nht;
+ +      void *pkey = ndel->primary_key;
+ +      u32 hash_val;
+ +      struct neighbour *n;
+ +      struct neighbour __rcu **np;
+ +
+ +      nht = rcu_dereference_protected(tbl->nht,
+ +                                      lockdep_is_held(&tbl->lock));
+ +      hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
+ +      hash_val = hash_val >> (32 - nht->hash_shift);
+ +
+ +      np = &nht->hash_buckets[hash_val];
+ +      while ((n = rcu_dereference_protected(*np,
+ +                                            lockdep_is_held(&tbl->lock)))) {
+ +              if (n == ndel)
+ +                      return neigh_del(n, 0, np, tbl);
+ +              np = &n->next;
+ +      }
+ +      return false;
+ +}
+ +
   static int neigh_forced_gc(struct neigh_table *tbl)
   {
         int shrunk = 0;
@@@ -184,10 -140,19 +184,10 @@@
                          * - nobody refers to it.
                          * - it is not permanent
                          */
- -                      write_lock(&n->lock);
- -                      if (atomic_read(&n->refcnt) == 1 &&
- -                          !(n->nud_state & NUD_PERMANENT)) {
- -                              rcu_assign_pointer(*np,
- -                                      rcu_dereference_protected(n->next,
- -                                                lockdep_is_held(&tbl->lock)));
- -                              n->dead = 1;
- -                              shrunk  = 1;
- -                              write_unlock(&n->lock);
- -                              neigh_cleanup_and_release(n);
+ +                      if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
+ +                              shrunk = 1;
                                 continue;
                         }
- -                      write_unlock(&n->lock);
                         np = &n->next;
                 }
         }
@@@ -254,7 -219,7 +254,7 @@@ static void neigh_flush_dev(struct neig
                         neigh_del_timer(n);
                         n->dead = 1;
   
- -                      if (atomic_read(&n->refcnt) != 1) {
+ +                      if (refcount_read(&n->refcnt) != 1) {
                                 /* The most unpleasant situation.
                                    We must destroy neighbour entry,
                                    but someone still uses it.
@@@ -335,7 -300,7 +335,7 @@@ static struct neighbour *neigh_alloc(st
   
         NEIGH_CACHE_STAT_INC(tbl, allocs);
         n->tbl            = tbl;
- -      atomic_set(&n->refcnt, 1);
+ +      refcount_set(&n->refcnt, 1);
         n->dead           = 1;
   out:
         return n;
@@@ -347,8 -312,7 +347,7 @@@ out_entries
   
   static void neigh_get_hash_rnd(u32 *x)
   {
-       get_random_bytes(x, sizeof(*x));
-       *x |= 1;
+       *x = get_random_u32() | 1;
   }
   
   static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
@@@ -444,7 -408,7 +443,7 @@@ struct neighbour *neigh_lookup(struct n
         rcu_read_lock_bh();
         n = __neigh_lookup_noref(tbl, pkey, dev);
         if (n) {
- -              if (!atomic_inc_not_zero(&n->refcnt))
+ +              if (!refcount_inc_not_zero(&n->refcnt))
                         n = NULL;
                 NEIGH_CACHE_STAT_INC(tbl, hits);
         }
@@@ -473,7 -437,7 +472,7 @@@ struct neighbour *neigh_lookup_nodev(st
              n = rcu_dereference_bh(n->next)) {
                 if (!memcmp(n->primary_key, pkey, key_len) &&
                     net_eq(dev_net(n->dev), net)) {
- -                      if (!atomic_inc_not_zero(&n->refcnt))
+ +                      if (!refcount_inc_not_zero(&n->refcnt))
                                 n = NULL;
                         NEIGH_CACHE_STAT_INC(tbl, hits);
                         break;
@@@ -709,7 -673,7 +708,7 @@@ static void neigh_parms_destroy(struct 
   
   static inline void neigh_parms_put(struct neigh_parms *parms)
   {
- -      if (atomic_dec_and_test(&parms->refcnt))
+ +      if (refcount_dec_and_test(&parms->refcnt))
                 neigh_parms_destroy(parms);
   }
   
@@@ -821,7 -785,7 +820,7 @@@ static void neigh_periodic_work(struct 
                         if (time_before(n->used, n->confirmed))
                                 n->used = n->confirmed;
   
- -                      if (atomic_read(&n->refcnt) == 1 &&
+ +                      if (refcount_read(&n->refcnt) == 1 &&
                             (state == NUD_FAILED ||
                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
                                 *np = n->next;
@@@ -1479,7 -1443,7 +1478,7 @@@ struct neigh_parms *neigh_parms_alloc(s
         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
         if (p) {
                 p->tbl            = tbl;
- -              atomic_set(&p->refcnt, 1);
+ +              refcount_set(&p->refcnt, 1);
                 p->reachable_time =
                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
                 dev_hold(dev);
@@@ -1542,7 -1506,7 +1541,7 @@@ void neigh_table_init(int index, struc
         INIT_LIST_HEAD(&tbl->parms_list);
         list_add(&tbl->parms.list, &tbl->parms_list);
         write_pnet(&tbl->parms.net, &init_net);
- -      atomic_set(&tbl->parms.refcnt, 1);
+ +      refcount_set(&tbl->parms.refcnt, 1);
         tbl->parms.reachable_time =
                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
   
@@@ -1684,10 -1648,7 +1683,10 @@@ static int neigh_delete(struct sk_buff 
                            NEIGH_UPDATE_F_OVERRIDE |
                            NEIGH_UPDATE_F_ADMIN,
                            NETLINK_CB(skb).portid);
+ +      write_lock_bh(&tbl->lock);
         neigh_release(neigh);
+ +      neigh_remove_one(neigh, tbl);
+ +      write_unlock_bh(&tbl->lock);
   
   out:
         return err;
@@@ -1796,7 -1757,7 +1795,7 @@@ static int neightbl_fill_parms(struct s
   
         if ((parms->dev &&
              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
- -          nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
+ +          nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
             /* approximative value for deprecated QUEUE_LEN (in packets) */
@@@ -2234,7 -2195,7 +2233,7 @@@ static int neigh_fill_info(struct sk_bu
         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
- -      ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
+ +      ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
         read_unlock_bh(&neigh->lock);
   
         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
diff --combined net/ipv4/route.c

index c816cd53f7fc26372d79a8ce063ca2e4da5d5705,11e001a420942517f210e082304f312f19ca3b4e..0383e66f59bcef3bd6b8627edae9aa2d34139f5d
--- 1/net/ipv4/route.c
--- 2/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -114,8 -114,6 +114,8 @@@
   #include <net/ip_tunnels.h>
   #include <net/l3mdev.h>
   
+ +#include "fib_lookup.h"
+ +
   #define RT_FL_TOS(oldflp4) \
         ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
   
@@@ -589,6 -587,11 +589,6 @@@ static void ip_rt_build_flow_key(struc
                 build_sk_flow_key(fl4, sk);
   }
   
- -static inline void rt_free(struct rtable *rt)
- -{
- -      call_rcu(&rt->dst.rcu_head, dst_rcu_free);
- -}
- -
   static DEFINE_SPINLOCK(fnhe_lock);
   
   static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
@@@ -598,14 -601,12 +598,14 @@@
         rt = rcu_dereference(fnhe->fnhe_rth_input);
         if (rt) {
                 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
- -              rt_free(rt);
+ +              dst_dev_put(&rt->dst);
+ +              dst_release(&rt->dst);
         }
         rt = rcu_dereference(fnhe->fnhe_rth_output);
         if (rt) {
                 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
- -              rt_free(rt);
+ +              dst_dev_put(&rt->dst);
+ +              dst_release(&rt->dst);
         }
   }
   
@@@ -1299,7 -1300,7 +1299,7 @@@ static struct fib_nh_exception *find_ex
   }
   
   static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
- -                            __be32 daddr)
+ +                            __be32 daddr, const bool do_cache)
   {
         bool ret = false;
   
@@@ -1328,13 -1329,10 +1328,13 @@@
                 if (!rt->rt_gateway)
                         rt->rt_gateway = daddr;
   
- -              if (!(rt->dst.flags & DST_NOCACHE)) {
+ +              if (do_cache) {
+ +                      dst_hold(&rt->dst);
                         rcu_assign_pointer(*porig, rt);
- -                      if (orig)
- -                              rt_free(orig);
+ +                      if (orig) {
+ +                              dst_dev_put(&orig->dst);
+ +                              dst_release(&orig->dst);
+ +                      }
                         ret = true;
                 }
   
@@@ -1357,20 -1355,12 +1357,20 @@@ static bool rt_cache_route(struct fib_n
         }
         orig = *p;
   
+ +      /* hold dst before doing cmpxchg() to avoid race condition
+ +       * on this dst
+ +       */
+ +      dst_hold(&rt->dst);
         prev = cmpxchg(p, orig, rt);
         if (prev == orig) {
- -              if (orig)
- -                      rt_free(orig);
- -      } else
+ +              if (orig) {
+ +                      dst_dev_put(&orig->dst);
+ +                      dst_release(&orig->dst);
+ +              }
+ +      } else {
+ +              dst_release(&rt->dst);
                 ret = false;
+ +      }
   
         return ret;
   }
@@@ -1395,12 -1385,8 +1395,12 @@@ static void rt_add_uncached_list(struc
   
   static void ipv4_dst_destroy(struct dst_entry *dst)
   {
+ +      struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
         struct rtable *rt = (struct rtable *) dst;
   
+ +      if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ +              kfree(p);
+ +
         if (!list_empty(&rt->rt_uncached)) {
                 struct uncached_list *ul = rt->rt_uncached_list;
   
@@@ -1441,8 -1427,7 +1441,8 @@@ static bool rt_cache_valid(const struc
   static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
                            const struct fib_result *res,
                            struct fib_nh_exception *fnhe,
- -                         struct fib_info *fi, u16 type, u32 itag)
+ +                         struct fib_info *fi, u16 type, u32 itag,
+ +                         const bool do_cache)
   {
         bool cached = false;
   
@@@ -1453,18 -1438,14 +1453,18 @@@
                         rt->rt_gateway = nh->nh_gw;
                         rt->rt_uses_gateway = 1;
                 }
- -              dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ +              dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+ +              if (fi->fib_metrics != &dst_default_metrics) {
+ +                      rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ +                      atomic_inc(&fi->fib_metrics->refcnt);
+ +              }
   #ifdef CONFIG_IP_ROUTE_CLASSID
                 rt->dst.tclassid = nh->nh_tclassid;
   #endif
                 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
                 if (unlikely(fnhe))
- -                      cached = rt_bind_exception(rt, fnhe, daddr);
- -              else if (!(rt->dst.flags & DST_NOCACHE))
+ +                      cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
+ +              else if (do_cache)
                         cached = rt_cache_route(nh, rt);
                 if (unlikely(!cached)) {
                         /* Routes we intend to cache in nexthop exception or
@@@ -1472,6 -1453,7 +1472,6 @@@
                          * However, if we are unsuccessful at storing this
                          * route into the cache we really need to set it.
                          */
- -                      rt->dst.flags |= DST_NOCACHE;
                         if (!rt->rt_gateway)
                                 rt->rt_gateway = daddr;
                         rt_add_uncached_list(rt);
@@@ -1494,7 -1476,7 +1494,7 @@@ struct rtable *rt_dst_alloc(struct net_
         struct rtable *rt;
   
         rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- -                     (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
+ +                     (will_cache ? 0 : DST_HOST) |
                        (nopolicy ? DST_NOPOLICY : 0) |
                        (noxfrm ? DST_NOXFRM : 0));
   
@@@ -1738,8 -1720,7 +1738,8 @@@ rt_cache
   
         rth->dst.input = ip_forward;
   
- -      rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
+ +      rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
+ +                     do_cache);
         set_lwt_redirect(rth);
         skb_dst_set(skb, &rth->dst);
   out:
@@@ -1871,9 -1852,9 +1871,9 @@@ static int ip_mkroute_input(struct sk_b
    */
   
   static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- -                             u8 tos, struct net_device *dev)
+ +                             u8 tos, struct net_device *dev,
+ +                             struct fib_result *res)
   {
- -      struct fib_result res;
         struct in_device *in_dev = __in_dev_get_rcu(dev);
         struct ip_tunnel_info *tun_info;
         struct flowi4   fl4;
@@@ -1903,8 -1884,8 +1903,8 @@@
         if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
                 goto martian_source;
   
- -      res.fi = NULL;
- -      res.table = NULL;
+ +      res->fi = NULL;
+ +      res->table = NULL;
         if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
                 goto brd_input;
   
@@@ -1940,17 -1921,17 +1940,17 @@@
         fl4.daddr = daddr;
         fl4.saddr = saddr;
         fl4.flowi4_uid = sock_net_uid(net, NULL);
- -      err = fib_lookup(net, &fl4, &res, 0);
+ +      err = fib_lookup(net, &fl4, res, 0);
         if (err != 0) {
                 if (!IN_DEV_FORWARD(in_dev))
                         err = -EHOSTUNREACH;
                 goto no_route;
         }
   
- -      if (res.type == RTN_BROADCAST)
+ +      if (res->type == RTN_BROADCAST)
                 goto brd_input;
   
- -      if (res.type == RTN_LOCAL) {
+ +      if (res->type == RTN_LOCAL) {
                 err = fib_validate_source(skb, saddr, daddr, tos,
                                           0, dev, in_dev, &itag);
                 if (err < 0)
@@@ -1962,10 -1943,10 +1962,10 @@@
                 err = -EHOSTUNREACH;
                 goto no_route;
         }
- -      if (res.type != RTN_UNICAST)
+ +      if (res->type != RTN_UNICAST)
                 goto martian_destination;
   
- -      err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos);
+ +      err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
   out:  return err;
   
   brd_input:
@@@ -1979,14 -1960,14 +1979,14 @@@
                         goto martian_source;
         }
         flags |= RTCF_BROADCAST;
- -      res.type = RTN_BROADCAST;
+ +      res->type = RTN_BROADCAST;
         RT_CACHE_STAT_INC(in_brd);
   
   local_input:
         do_cache = false;
- -      if (res.fi) {
+ +      if (res->fi) {
                 if (!itag) {
- -                      rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
+ +                      rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
                         if (rt_cache_valid(rth)) {
                                 skb_dst_set_noref(skb, &rth->dst);
                                 err = 0;
@@@ -1997,7 -1978,7 +1997,7 @@@
         }
   
         rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
- -                         flags | RTCF_LOCAL, res.type,
+ +                         flags | RTCF_LOCAL, res->type,
                            IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
         if (!rth)
                 goto e_nobufs;
@@@ -2007,18 -1988,18 +2007,18 @@@
         rth->dst.tclassid = itag;
   #endif
         rth->rt_is_input = 1;
- -      if (res.table)
- -              rth->rt_table_id = res.table->tb_id;
+ +      if (res->table)
+ +              rth->rt_table_id = res->table->tb_id;
   
         RT_CACHE_STAT_INC(in_slow_tot);
- -      if (res.type == RTN_UNREACHABLE) {
+ +      if (res->type == RTN_UNREACHABLE) {
                 rth->dst.input= ip_error;
                 rth->dst.error= -err;
                 rth->rt_flags   &= ~RTCF_LOCAL;
         }
   
         if (do_cache) {
- -              struct fib_nh *nh = &FIB_RES_NH(res);
+ +              struct fib_nh *nh = &FIB_RES_NH(*res);
   
                 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
                 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@@ -2027,8 -2008,10 +2027,8 @@@
                         rth->dst.input = lwtunnel_input;
                 }
   
- -              if (unlikely(!rt_cache_route(nh, rth))) {
- -                      rth->dst.flags |= DST_NOCACHE;
+ +              if (unlikely(!rt_cache_route(nh, rth)))
                         rt_add_uncached_list(rth);
- -              }
         }
         skb_dst_set(skb, &rth->dst);
         err = 0;
@@@ -2036,9 -2019,9 +2036,9 @@@
   
   no_route:
         RT_CACHE_STAT_INC(in_no_route);
- -      res.type = RTN_UNREACHABLE;
- -      res.fi = NULL;
- -      res.table = NULL;
+ +      res->type = RTN_UNREACHABLE;
+ +      res->fi = NULL;
+ +      res->table = NULL;
         goto local_input;
   
         /*
@@@ -2068,22 -2051,11 +2068,22 @@@ martian_source
   int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                          u8 tos, struct net_device *dev)
   {
- -      int res;
+ +      struct fib_result res;
+ +      int err;
   
         tos &= IPTOS_RT_MASK;
         rcu_read_lock();
+ +      err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ +      rcu_read_unlock();
   
+ +      return err;
+ +}
+ +EXPORT_SYMBOL(ip_route_input_noref);
+ +
+ +/* called with rcu_read_lock held */
+ +int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ +                     u8 tos, struct net_device *dev, struct fib_result *res)
+ +{
         /* Multicast recognition logic is moved from route cache to here.
            The problem was that too many Ethernet cards have broken/missing
            hardware multicast filters :-( As result the host on multicasting
@@@ -2098,7 -2070,6 +2098,7 @@@
         if (ipv4_is_multicast(daddr)) {
                 struct in_device *in_dev = __in_dev_get_rcu(dev);
                 int our = 0;
+ +              int err = -EINVAL;
   
                 if (in_dev)
                         our = ip_check_mc_rcu(in_dev, daddr, saddr,
@@@ -2114,6 -2085,7 +2114,6 @@@
                                                       ip_hdr(skb)->protocol);
                 }
   
- -              res = -EINVAL;
                 if (our
   #ifdef CONFIG_IP_MROUTE
                         ||
@@@ -2121,14 -2093,17 +2121,14 @@@
                      IN_DEV_MFORWARD(in_dev))
   #endif
                    ) {
- -                      res = ip_route_input_mc(skb, daddr, saddr,
+ +                      err = ip_route_input_mc(skb, daddr, saddr,
                                                 tos, dev, our);
                 }
- -              rcu_read_unlock();
- -              return res;
+ +              return err;
         }
- -      res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
- -      rcu_read_unlock();
- -      return res;
+ +
+ +      return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
   }
- -EXPORT_SYMBOL(ip_route_input_noref);
   
   /* called with rcu_read_lock() */
   static struct rtable *__mkroute_output(const struct fib_result *res,
@@@ -2224,8 -2199,10 +2224,8 @@@
                 rth = rcu_dereference(*prth);
   
   rt_cache:
- -              if (rt_cache_valid(rth)) {
- -                      dst_hold(&rth->dst);
+ +              if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
                         return rth;
- -              }
         }
   
   add:
@@@ -2259,7 -2236,7 +2259,7 @@@
   #endif
         }
   
- -      rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
+ +      rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
         set_lwt_redirect(rth);
   
         return rth;
@@@ -2269,40 -2246,29 +2269,40 @@@
    * Major route resolver routine.
    */
   
- -struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- -                                        const struct sk_buff *skb)
+ +struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+ +                                      const struct sk_buff *skb)
   {
- -      struct net_device *dev_out = NULL;
         __u8 tos = RT_FL_TOS(fl4);
- -      unsigned int flags = 0;
         struct fib_result res;
         struct rtable *rth;
- -      int orig_oif;
- -      int err = -ENETUNREACH;
   
         res.tclassid    = 0;
         res.fi          = NULL;
         res.table       = NULL;
   
- -      orig_oif = fl4->flowi4_oif;
- -
         fl4->flowi4_iif = LOOPBACK_IFINDEX;
         fl4->flowi4_tos = tos & IPTOS_RT_MASK;
         fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
                          RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
   
         rcu_read_lock();
+ +      rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
+ +      rcu_read_unlock();
+ +
+ +      return rth;
+ +}
+ +EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
+ +
+ +struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
+ +                                          struct fib_result *res,
+ +                                          const struct sk_buff *skb)
+ +{
+ +      struct net_device *dev_out = NULL;
+ +      int orig_oif = fl4->flowi4_oif;
+ +      unsigned int flags = 0;
+ +      struct rtable *rth;
+ +      int err = -ENETUNREACH;
+ +
         if (fl4->saddr) {
                 rth = ERR_PTR(-EINVAL);
                 if (ipv4_is_multicast(fl4->saddr) ||
@@@ -2388,15 -2354,15 +2388,15 @@@
                         fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
                 dev_out = net->loopback_dev;
                 fl4->flowi4_oif = LOOPBACK_IFINDEX;
- -              res.type = RTN_LOCAL;
+ +              res->type = RTN_LOCAL;
                 flags |= RTCF_LOCAL;
                 goto make_route;
         }
   
- -      err = fib_lookup(net, fl4, &res, 0);
+ +      err = fib_lookup(net, fl4, res, 0);
         if (err) {
- -              res.fi = NULL;
- -              res.table = NULL;
+ +              res->fi = NULL;
+ +              res->table = NULL;
                 if (fl4->flowi4_oif &&
                     (ipv4_is_multicast(fl4->daddr) ||
                     !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
@@@ -2421,41 -2387,43 +2421,41 @@@
                         if (fl4->saddr == 0)
                                 fl4->saddr = inet_select_addr(dev_out, 0,
                                                               RT_SCOPE_LINK);
- -                      res.type = RTN_UNICAST;
+ +                      res->type = RTN_UNICAST;
                         goto make_route;
                 }
                 rth = ERR_PTR(err);
                 goto out;
         }
   
- -      if (res.type == RTN_LOCAL) {
+ +      if (res->type == RTN_LOCAL) {
                 if (!fl4->saddr) {
- -                      if (res.fi->fib_prefsrc)
- -                              fl4->saddr = res.fi->fib_prefsrc;
+ +                      if (res->fi->fib_prefsrc)
+ +                              fl4->saddr = res->fi->fib_prefsrc;
                         else
                                 fl4->saddr = fl4->daddr;
                 }
   
                 /* L3 master device is the loopback for that domain */
- -              dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
+ +              dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
                         net->loopback_dev;
                 fl4->flowi4_oif = dev_out->ifindex;
                 flags |= RTCF_LOCAL;
                 goto make_route;
         }
   
- -      fib_select_path(net, &res, fl4, skb);
+ +      fib_select_path(net, res, fl4, skb);
   
- -      dev_out = FIB_RES_DEV(res);
+ +      dev_out = FIB_RES_DEV(*res);
         fl4->flowi4_oif = dev_out->ifindex;
   
   
   make_route:
- -      rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
+ +      rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
   
   out:
- -      rcu_read_unlock();
         return rth;
   }
- -EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
   
   static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
   {
@@@ -2509,7 -2477,7 +2509,7 @@@ struct dst_entry *ipv4_blackhole_route(
                 new->input = dst_discard;
                 new->output = dst_discard_out;
   
- -              new->dev = ort->dst.dev;
+ +              new->dev = net->loopback_dev;
                 if (new->dev)
                         dev_hold(new->dev);
   
@@@ -2524,6 -2492,7 +2524,6 @@@
                 rt->rt_uses_gateway = ort->rt_uses_gateway;
   
                 INIT_LIST_HEAD(&rt->rt_uncached);
- -              dst_free(new);
         }
   
         dst_release(dst_orig);
@@@ -2548,10 -2517,9 +2548,10 @@@ struct rtable *ip_route_output_flow(str
   }
   EXPORT_SYMBOL_GPL(ip_route_output_flow);
   
+ +/* called with rcu_read_lock held */
   static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
                         struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
- -                      u32 seq, int event)
+ +                      u32 seq)
   {
         struct rtable *rt = skb_rtable(skb);
         struct rtmsg *r;
@@@ -2560,7 -2528,7 +2560,7 @@@
         u32 error;
         u32 metrics[RTAX_MAX];
   
- -      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0);
+ +      nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
         if (!nlh)
                 return -EMSGSIZE;
   
@@@ -2668,7 -2636,6 +2668,7 @@@ static int inet_rtm_getroute(struct sk_
         struct net *net = sock_net(in_skb->sk);
         struct rtmsg *rtm;
         struct nlattr *tb[RTA_MAX+1];
+ +      struct fib_result res = {};
         struct rtable *rt = NULL;
         struct flowi4 fl4;
         __be32 dst = 0;
@@@ -2725,12 -2692,10 +2725,12 @@@
         fl4.flowi4_mark = mark;
         fl4.flowi4_uid = uid;
   
+ +      rcu_read_lock();
+ +
         if (iif) {
                 struct net_device *dev;
   
- -              dev = __dev_get_by_index(net, iif);
+ +              dev = dev_get_by_index_rcu(net, iif);
                 if (!dev) {
                         err = -ENODEV;
                         goto errout_free;
@@@ -2739,14 -2704,14 +2739,14 @@@
                 skb->protocol   = htons(ETH_P_IP);
                 skb->dev        = dev;
                 skb->mark       = mark;
- -              err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+ +              err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
+ +                                       dev, &res);
   
                 rt = skb_rtable(skb);
                 if (err == 0 && rt->dst.error)
                         err = -rt->dst.error;
         } else {
- -              rt = ip_route_output_key(net, &fl4);
- -
+ +              rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
                 err = 0;
                 if (IS_ERR(rt))
                         err = PTR_ERR(rt);
@@@ -2762,25 -2727,17 +2762,25 @@@
         if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
                 table_id = rt->rt_table_id;
   
- -      err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
- -                         NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- -                         RTM_NEWROUTE);
+ +      if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+ +              err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
+ +                                  nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
+ +                                  rt->rt_type, res.prefix, res.prefixlen,
+ +                                  fl4.flowi4_tos, res.fi, 0);
+ +      else
+ +              err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
+ +                                 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
         if (err < 0)
                 goto errout_free;
   
+ +      rcu_read_unlock();
+ +
         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
   errout:
         return err;
   
   errout_free:
+ +      rcu_read_unlock();
         kfree_skb(skb);
         goto errout;
   }
@@@ -2979,8 -2936,7 +2979,7 @@@ static __net_init int rt_genid_init(str
   {
         atomic_set(&net->ipv4.rt_genid, 0);
         atomic_set(&net->fnhe_genid, 0);
-       get_random_bytes(&net->ipv4.dev_addr_genid,
-                        sizeof(net->ipv4.dev_addr_genid));
+       atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
         return 0;
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 15 Jul 2017 19:44:02 +0000 (12:44 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 15 Jul 2017 19:44:02 +0000 (12:44 -0700)
		1	2
drivers/char/random.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/target/iscsi/iscsi_target_login.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/cifs/cifsfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/random.h	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history
lib/rhashtable.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ceph/ceph_common.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/neighbour.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/route.c	patch \|	diff1 \|	diff2 \|	blob \| history