Merge branch 'core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 31 Dec 2008 00:10:19 +0000 (16:10 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 31 Dec 2008 00:10:19 +0000 (16:10 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Dec 2008 00:10:19 +0000 (16:10 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Dec 2008 00:10:19 +0000 (16:10 -0800)
diff --combined arch/x86/include/asm/dma-mapping.h

index dc22c0733282b9ce631250d69cbcc00e0cd46617,3b43a65894c4ac44c5c96e6a736d12ec1ceeb664..4035357f5b9d6b830a16b09f39d30e90810aae9e
--- 1/arch/x86/include/asm/dma-mapping.h
--- 2/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@@ -65,16 -65,18 +65,16 @@@ static inline struct dma_mapping_ops *g
                 return dma_ops;
         else
                 return dev->archdata.dma_ops;
- #endif /* _ASM_X86_DMA_MAPPING_H */
+ #endif
   }
   
   /* Make sure we keep the same behaviour */
   static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
   {
- -#ifdef CONFIG_X86_64
         struct dma_mapping_ops *ops = get_dma_ops(dev);
         if (ops->mapping_error)
                 return ops->mapping_error(dev, dma_addr);
   
- -#endif
         return (dma_addr == bad_dma_address);
   }
   
diff --combined arch/x86/include/asm/iommu.h

index 295b13193f4df09b05aad53ec09c81c601a17a9f,35276ec5925b0ee7dcaad9aaadcb7d1a1382cb27..a6ee9e6f530f89cc2e86a5607ceff6597c9d1757
--- 1/arch/x86/include/asm/iommu.h
--- 2/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@@ -7,9 -7,40 +7,7 @@@ extern struct dma_mapping_ops nommu_dma
   extern int force_iommu, no_iommu;
   extern int iommu_detected;
   
- extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
- 
   /* 10 seconds */
   #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
   
- -#ifdef CONFIG_GART_IOMMU
- -extern int gart_iommu_aperture;
- -extern int gart_iommu_aperture_allowed;
- -extern int gart_iommu_aperture_disabled;
- -
- -extern void early_gart_iommu_check(void);
- -extern void gart_iommu_init(void);
- -extern void gart_iommu_shutdown(void);
- -extern void __init gart_parse_options(char *);
- -extern void gart_iommu_hole_init(void);
- -
- -#else
- -#define gart_iommu_aperture            0
- -#define gart_iommu_aperture_allowed    0
- -#define gart_iommu_aperture_disabled   1
- -
- -static inline void early_gart_iommu_check(void)
- -{
- -}
- -static inline void gart_iommu_init(void)
- -{
- -}
- -static inline void gart_iommu_shutdown(void)
- -{
- -}
- -static inline void gart_parse_options(char *options)
- -{
- -}
- -static inline void gart_iommu_hole_init(void)
- -{
- -}
- -#endif
- -
   #endif /* _ASM_X86_IOMMU_H */
diff --combined arch/x86/include/asm/pci.h

index 647781298e7ef7cbc98b14ad75f2552b0092507f,50ac542c9382ec371aa336e937bd6bc779a52581..66834c41c0493eccf1b117b1565443c10ec706b6
--- 1/arch/x86/include/asm/pci.h
--- 2/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@@ -19,8 -19,6 +19,8 @@@ struct pci_sysdata 
   };
   
   extern int pci_routeirq;
+ +extern int noioapicquirk;
+ +extern int noioapicreroute;
   
   /* scan a bus after allocating a pci_sysdata for it */
   extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
@@@ -84,6 -82,8 +84,8 @@@ static inline void pci_dma_burst_advice
   static inline void early_quirks(void) { }
   #endif
   
+ extern void pci_iommu_alloc(void);
+ 
   #endif  /* __KERNEL__ */
   
   #ifdef CONFIG_X86_32
diff --combined arch/x86/include/asm/uaccess.h

index 580c3ee6c58c4d0479dbce9ea491c78049255e04,99192bb55a53bf68afc30efeae89b9b40b5f4b4d..4340055b755918fb8fd777df62f811ba5e26820c
--- 1/arch/x86/include/asm/uaccess.h
--- 2/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@@ -157,6 -157,7 +157,7 @@@ extern int __get_user_bad(void)
         int __ret_gu;                                                   \
         unsigned long __val_gu;                                         \
         __chk_user_ptr(ptr);                                            \
+       might_fault();                                                  \
         switch (sizeof(*(ptr))) {                                       \
         case 1:                                                         \
                 __get_user_x(1, __ret_gu, __val_gu, ptr);               \
@@@ -241,6 -242,7 +242,7 @@@ extern void __put_user_8(void)
         int __ret_pu;                                           \
         __typeof__(*(ptr)) __pu_val;                            \
         __chk_user_ptr(ptr);                                    \
+       might_fault();                                          \
         __pu_val = x;                                           \
         switch (sizeof(*(ptr))) {                               \
         case 1:                                                 \
@@@ -350,14 -352,14 +352,14 @@@ do {                                                                    
   
   #define __put_user_nocheck(x, ptr, size)                      \
   ({                                                            \
- -      long __pu_err;                                          \
+ +      int __pu_err;                                           \
         __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
         __pu_err;                                               \
   })
   
   #define __get_user_nocheck(x, ptr, size)                              \
   ({                                                                    \
- -      long __gu_err;                                                  \
+ +      int __gu_err;                                                   \
         unsigned long __gu_val;                                         \
         __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);    \
         (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
diff --combined arch/x86/kernel/Makefile

index 88dd768eab6d34980d14aff41178115ed881045e,a9c656f2d6613c0339d05c4fb100022d80c1a755..d364df03c1d6419ce4473e23b420d86167343775
--- 1/arch/x86/kernel/Makefile
--- 2/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@@ -12,7 -12,6 +12,7 @@@ CFLAGS_REMOVE_tsc.o = -p
   CFLAGS_REMOVE_rtc.o = -pg
   CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
   CFLAGS_REMOVE_ftrace.o = -pg
+ +CFLAGS_REMOVE_early_printk.o = -pg
   endif
   
   #
@@@ -24,9 -23,9 +24,9 @@@ CFLAGS_vsyscall_64.o  := $(PROFILING) -g
   CFLAGS_hpet.o         := $(nostackp)
   CFLAGS_tsc.o          := $(nostackp)
   
- -obj-y                 := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
+ +obj-y                 := process_$(BITS).o signal.o entry_$(BITS).o
   obj-y                 += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
- -obj-y                 += time_$(BITS).o ioport.o ldt.o
+ +obj-y                 += time_$(BITS).o ioport.o ldt.o dumpstack.o
   obj-y                 += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
   obj-$(CONFIG_X86_VISWS)       += visws_quirks.o
   obj-$(CONFIG_X86_32)  += probe_roms_32.o
@@@ -66,7 -65,6 +66,7 @@@ obj-$(CONFIG_X86_LOCAL_APIC)  += apic.o 
   obj-$(CONFIG_X86_IO_APIC)     += io_apic.o
   obj-$(CONFIG_X86_REBOOTFIXUPS)        += reboot_fixups_32.o
   obj-$(CONFIG_DYNAMIC_FTRACE)  += ftrace.o
+ +obj-$(CONFIG_FUNCTION_GRAPH_TRACER)   += ftrace.o
   obj-$(CONFIG_KEXEC)           += machine_kexec_$(BITS).o
   obj-$(CONFIG_KEXEC)           += relocate_kernel_$(BITS).o crash.o
   obj-$(CONFIG_CRASH_DUMP)      += crash_dump_$(BITS).o
@@@ -107,8 -105,8 +107,10 @@@ microcode-$(CONFIG_MICROCODE_INTEL)       += 
   microcode-$(CONFIG_MICROCODE_AMD)     += microcode_amd.o
   obj-$(CONFIG_MICROCODE)                       += microcode.o
   
+ +obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
+ +
+ obj-$(CONFIG_SWIOTLB)                 += pci-swiotlb_64.o # NB rename without _64
+ 
   ###
   # 64 bit specific files
   ifeq ($(CONFIG_X86_64),y)
@@@ -122,7 -120,6 +124,6 @@@
           obj-$(CONFIG_GART_IOMMU)      += pci-gart_64.o aperture_64.o
           obj-$(CONFIG_CALGARY_IOMMU)   += pci-calgary_64.o tce_64.o
           obj-$(CONFIG_AMD_IOMMU)               += amd_iommu_init.o amd_iommu.o
-         obj-$(CONFIG_SWIOTLB)         += pci-swiotlb_64.o
   
           obj-$(CONFIG_PCI_MMCONFIG)    += mmconf-fam10h_64.o
   endif
diff --combined arch/x86/kernel/pci-dma.c

index 7a3dfceb90e47a341295ca3f1201c232b5beb814,00e07447a5bd4881f474e843f5a93abdf52fcb49..19a1044a0cd94fbdc64b6ffd1e3df8ecde4530c9
--- 1/arch/x86/kernel/pci-dma.c
--- 2/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@@ -6,7 -6,6 +6,7 @@@
   #include <asm/proto.h>
   #include <asm/dma.h>
   #include <asm/iommu.h>
+ +#include <asm/gart.h>
   #include <asm/calgary.h>
   #include <asm/amd_iommu.h>
   
@@@ -31,6 -30,11 +31,6 @@@ int no_iommu __read_mostly
   /* Set this to 1 if there is a HW IOMMU in the system */
   int iommu_detected __read_mostly = 0;
   
- -/* This tells the BIO block layer to assume merging. Default to off
- -   because we cannot guarantee merging later. */
- -int iommu_bio_merge __read_mostly = 0;
- -EXPORT_SYMBOL(iommu_bio_merge);
- -
   dma_addr_t bad_dma_address __read_mostly = 0;
   EXPORT_SYMBOL(bad_dma_address);
   
@@@ -101,11 -105,15 +101,15 @@@ static void __init dma32_free_bootmem(v
         dma32_bootmem_ptr = NULL;
         dma32_bootmem_size = 0;
   }
+ #endif
   
   void __init pci_iommu_alloc(void)
   {
+ #ifdef CONFIG_X86_64
         /* free the range so iommu could get some range less than 4G */
         dma32_free_bootmem();
+ #endif
+ 
         /*
          * The order of these functions is important for
          * fall-back/fail-over reasons
@@@ -121,15 -129,6 +125,6 @@@
         pci_swiotlb_init();
   }
   
- unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
- {
-       unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
- 
-       return size >> PAGE_SHIFT;
- }
- EXPORT_SYMBOL(iommu_nr_pages);
- #endif
- 
   void *dma_generic_alloc_coherent(struct device *dev, size_t size,
                                  dma_addr_t *dma_addr, gfp_t flag)
   {
@@@ -184,6 -183,7 +179,6 @@@ static __init int iommu_setup(char *p
                 }
   
                 if (!strncmp(p, "biomerge", 8)) {
- -                      iommu_bio_merge = 4096;
                         iommu_merge = 1;
                         force_iommu = 1;
                 }
@@@ -295,8 -295,8 +290,8 @@@ fs_initcall(pci_iommu_init)
   static __devinit void via_no_dac(struct pci_dev *dev)
   {
         if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
- -              printk(KERN_INFO "PCI: VIA PCI bridge detected."
- -                               "Disabling DAC.\n");
+ +              printk(KERN_INFO
+ +                      "PCI: VIA PCI bridge detected. Disabling DAC.\n");
                 forbid_dac = 1;
         }
   }
diff --combined arch/x86/mm/init_32.c

index 800e1d94c1b5580e627ddb0a2ef81cfd10d031cd,2b4b14fc0c048ba683776f59282b5d64574c4bd9..8655b5bb0963f807a0fe49dcd3da367db19379f0
--- 1/arch/x86/mm/init_32.c
--- 2/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@@ -21,6 -21,7 +21,7 @@@
   #include <linux/init.h>
   #include <linux/highmem.h>
   #include <linux/pagemap.h>
+ #include <linux/pci.h>
   #include <linux/pfn.h>
   #include <linux/poison.h>
   #include <linux/bootmem.h>
@@@ -67,7 -68,7 +68,7 @@@ static unsigned long __meminitdata tabl
   
   static int __initdata after_init_bootmem;
   
- -static __init void *alloc_low_page(unsigned long *phys)
+ +static __init void *alloc_low_page(void)
   {
         unsigned long pfn = table_end++;
         void *adr;
@@@ -77,6 -78,7 +78,6 @@@
   
         adr = __va(pfn * PAGE_SIZE);
         memset(adr, 0, PAGE_SIZE);
- -      *phys  = pfn * PAGE_SIZE;
         return adr;
   }
   
@@@ -91,17 -93,16 +92,17 @@@ static pmd_t * __init one_md_table_init
         pmd_t *pmd_table;
   
   #ifdef CONFIG_X86_PAE
- -      unsigned long phys;
         if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
                 if (after_init_bootmem)
                         pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
                 else
- -                      pmd_table = (pmd_t *)alloc_low_page(&phys);
+ +                      pmd_table = (pmd_t *)alloc_low_page();
                 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
                 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
                 pud = pud_offset(pgd, 0);
                 BUG_ON(pmd_table != pmd_offset(pud, 0));
+ +
+ +              return pmd_table;
         }
   #endif
         pud = pud_offset(pgd, 0);
@@@ -126,8 -127,10 +127,8 @@@ static pte_t * __init one_page_table_in
                         if (!page_table)
                                 page_table =
                                 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
- -              } else {
- -                      unsigned long phys;
- -                      page_table = (pte_t *)alloc_low_page(&phys);
- -              }
+ +              } else
+ +                      page_table = (pte_t *)alloc_low_page();
   
                 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
                 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@@ -967,6 -970,10 +968,8 @@@ void __init mem_init(void
         int codesize, reservedpages, datasize, initsize;
         int tmp;
   
- -      start_periodic_check_for_corruption();
- -
+       pci_iommu_alloc();
+ 
   #ifdef CONFIG_FLATMEM
         BUG_ON(!mem_map);
   #endif
@@@ -1036,25 -1043,11 +1039,25 @@@
                 (unsigned long)&_text, (unsigned long)&_etext,
                 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
   
+ +      /*
+ +       * Check boundaries twice: Some fundamental inconsistencies can
+ +       * be detected at build time already.
+ +       */
+ +#define __FIXADDR_TOP (-PAGE_SIZE)
+ +#ifdef CONFIG_HIGHMEM
+ +      BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE  > FIXADDR_START);
+ +      BUILD_BUG_ON(VMALLOC_END                        > PKMAP_BASE);
+ +#endif
+ +#define high_memory (-128UL << 20)
+ +      BUILD_BUG_ON(VMALLOC_START                      >= VMALLOC_END);
+ +#undef high_memory
+ +#undef __FIXADDR_TOP
+ +
   #ifdef CONFIG_HIGHMEM
         BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE        > FIXADDR_START);
         BUG_ON(VMALLOC_END                              > PKMAP_BASE);
   #endif
- -      BUG_ON(VMALLOC_START                            > VMALLOC_END);
+ +      BUG_ON(VMALLOC_START                            >= VMALLOC_END);
         BUG_ON((unsigned long)high_memory               > VMALLOC_START);
   
         if (boot_cpu_data.wp_works_ok < 0)
diff --combined include/asm-generic/bug.h

index 4c794d73fb8484e47fb0708beb7a470724707046,b8ba6941f587c7d12ac850f621826efd00107c35..8af276361bf26c662bd268fcec2c1431254ce03d
--- 1/include/asm-generic/bug.h
--- 2/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@@ -8,17 -8,9 +8,17 @@@
   #ifdef CONFIG_GENERIC_BUG
   #ifndef __ASSEMBLY__
   struct bug_entry {
+ +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
         unsigned long   bug_addr;
+ +#else
+ +      signed int      bug_addr_disp;
+ +#endif
   #ifdef CONFIG_DEBUG_BUGVERBOSE
+ +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
         const char      *file;
+ +#else
+ +      signed int      file_disp;
+ +#endif
         unsigned short  line;
   #endif
         unsigned short  flags;
@@@ -41,15 -33,14 +41,14 @@@
   
   #ifndef __WARN
   #ifndef __ASSEMBLY__
- extern void warn_on_slowpath(const char *file, const int line);
   extern void warn_slowpath(const char *file, const int line,
                 const char *fmt, ...) __attribute__((format(printf, 3, 4)));
   #define WANT_WARN_ON_SLOWPATH
   #endif
- #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
- #define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+ #define __WARN()              warn_slowpath(__FILE__, __LINE__, NULL)
+ #define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
   #else
- #define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
+ #define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
   #endif
   
   #ifndef WARN_ON
diff --combined include/linux/hardirq.h

index 89a56d79e4c6c4987531a10ad8fed17f3d597bf7,9b70b9231693fc07a446f892220d2c91a6300672..f83288347dda3455e2deaa057112707accacaa93
--- 1/include/linux/hardirq.h
--- 2/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@@ -4,7 -4,6 +4,7 @@@
   #include <linux/preempt.h>
   #include <linux/smp_lock.h>
   #include <linux/lockdep.h>
+ +#include <linux/ftrace_irq.h>
   #include <asm/hardirq.h>
   #include <asm/system.h>
   
@@@ -119,13 -118,17 +119,17 @@@ static inline void account_system_vtime
   }
   #endif
   
- #if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
+ #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU)
   extern void rcu_irq_enter(void);
   extern void rcu_irq_exit(void);
+ extern void rcu_nmi_enter(void);
+ extern void rcu_nmi_exit(void);
   #else
   # define rcu_irq_enter() do { } while (0)
   # define rcu_irq_exit() do { } while (0)
- #endif /* CONFIG_PREEMPT_RCU */
+ # define rcu_nmi_enter() do { } while (0)
+ # define rcu_nmi_exit() do { } while (0)
+ #endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */
   
   /*
    * It is safe to do non-atomic ops on ->hardirq_context,
@@@ -135,7 -138,6 +139,6 @@@
    */
   #define __irq_enter()                                 \
         do {                                            \
-               rcu_irq_enter();                        \
                 account_system_vtime(current);          \
                 add_preempt_count(HARDIRQ_OFFSET);      \
                 trace_hardirq_enter();                  \
@@@ -154,7 -156,6 +157,6 @@@ extern void irq_enter(void)
                 trace_hardirq_exit();                   \
                 account_system_vtime(current);          \
                 sub_preempt_count(HARDIRQ_OFFSET);      \
-               rcu_irq_exit();                         \
         } while (0)
   
   /*
@@@ -162,17 -163,7 +164,20 @@@
    */
   extern void irq_exit(void);
   
- -#define nmi_enter()           do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0)
- -#define nmi_exit()            do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0)
+ +#define nmi_enter()                           \
+ +      do {                                    \
+ +              ftrace_nmi_enter();             \
+ +              lockdep_off();                  \
++              rcu_nmi_enter();                \
+ +              __irq_enter();                  \
+ +      } while (0)
++
+ +#define nmi_exit()                            \
+ +      do {                                    \
+ +              __irq_exit();                   \
++              rcu_nmi_exit();                 \
+ +              lockdep_on();                   \
+ +              ftrace_nmi_exit();              \
+ +      } while (0)
   
   #endif /* LINUX_HARDIRQ_H */
diff --combined include/linux/kernel.h

index 6002ae76785c9aeba493c0960aa5de508b139b49,269df5a17b30af1b7349c131da05abec8aa95046..ca9ff6411dfa778fed80eae8360b6136529f8f11
--- 1/include/linux/kernel.h
--- 2/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@ -141,6 -141,15 +141,15 @@@ extern int _cond_resched(void)
                 (__x < 0) ? -__x : __x;         \
         })
   
+ #ifdef CONFIG_PROVE_LOCKING
+ void might_fault(void);
+ #else
+ static inline void might_fault(void)
+ {
+       might_sleep();
+ }
+ #endif
+ 
   extern struct atomic_notifier_head panic_notifier_list;
   extern long (*panic_blink)(long time);
   NORET_TYPE void panic(const char * fmt, ...)
@@@ -188,6 -197,8 +197,8 @@@ extern unsigned long long memparse(cons
   extern int core_kernel_text(unsigned long addr);
   extern int __kernel_text_address(unsigned long addr);
   extern int kernel_text_address(unsigned long addr);
+ extern int func_ptr_is_kernel_text(void *ptr);
+ 
   struct pid;
   extern struct pid *session_of_pgrp(struct pid *pgrp);
   
@@@ -361,6 -372,18 +372,6 @@@ static inline char *pack_hex_byte(char 
         ((unsigned char *)&addr)[3]
   #define NIPQUAD_FMT "%u.%u.%u.%u"
   
- -#define NIP6(addr) \
- -      ntohs((addr).s6_addr16[0]), \
- -      ntohs((addr).s6_addr16[1]), \
- -      ntohs((addr).s6_addr16[2]), \
- -      ntohs((addr).s6_addr16[3]), \
- -      ntohs((addr).s6_addr16[4]), \
- -      ntohs((addr).s6_addr16[5]), \
- -      ntohs((addr).s6_addr16[6]), \
- -      ntohs((addr).s6_addr16[7])
- -#define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x"
- -#define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x"
- -
   #if defined(__LITTLE_ENDIAN)
   #define HIPQUAD(addr) \
         ((unsigned char *)&addr)[3], \
diff --combined include/linux/rcupdate.h

index 895dc9c1088c767ce4706814b806c20248423241,bfd289aff5766bdd2921314fdbea014a7bf84a7f..1168fbcea8d4bc6a42d4abccf5b05e47266aa418
--- 1/include/linux/rcupdate.h
--- 2/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@@ -52,11 -52,15 +52,15 @@@ struct rcu_head 
         void (*func)(struct rcu_head *head);
   };
   
- #ifdef CONFIG_CLASSIC_RCU
+ #if defined(CONFIG_CLASSIC_RCU)
   #include <linux/rcuclassic.h>
- #else /* #ifdef CONFIG_CLASSIC_RCU */
+ #elif defined(CONFIG_TREE_RCU)
+ #include <linux/rcutree.h>
+ #elif defined(CONFIG_PREEMPT_RCU)
   #include <linux/rcupreempt.h>
- #endif /* #else #ifdef CONFIG_CLASSIC_RCU */
+ #else
+ #error "Unknown RCU implementation specified to kernel configuration"
+ #endif /* #else #if defined(CONFIG_CLASSIC_RCU) */
   
   #define RCU_HEAD_INIT         { .next = NULL, .func = NULL }
   #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
@@@ -142,7 -146,6 +146,7 @@@
    * on the write-side to insure proper synchronization.
    */
   #define rcu_read_lock_sched() preempt_disable()
+ +#define rcu_read_lock_sched_notrace() preempt_disable_notrace()
   
   /*
    * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
@@@ -150,7 -153,6 +154,7 @@@
    * See rcu_read_lock_sched for more information.
    */
   #define rcu_read_unlock_sched() preempt_enable()
+ +#define rcu_read_unlock_sched_notrace() preempt_enable_notrace()
   
   
   
diff --combined init/Kconfig

index 8a63c404ef449d391a08ffd5760f46325f76babf,6b0fdedf3596a44aabe5015af2afffc6b3c6e7ab..13627191a60d194de08aaa4b410aa752cfd7cb21
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -588,13 -588,6 +588,13 @@@ config KALLSYMS_AL
   
            Say N.
   
+ +config KALLSYMS_STRIP_GENERATED
+ +      bool "Strip machine generated symbols from kallsyms"
+ +      depends on KALLSYMS_ALL
+ +      default y
+ +      help
+ +        Say N if you want kallsyms to retain even machine generated symbols.
+ +
   config KALLSYMS_EXTRA_PASS
         bool "Do an extra kallsyms pass"
         depends on KALLSYMS
@@@ -815,7 -808,6 +815,7 @@@ config TRACEPOINT
   
   config MARKERS
         bool "Activate markers"
+ +      depends on TRACEPOINTS
         help
           Place an empty function call at each marker site. Can be
           dynamically changed for a probe function.
@@@ -936,10 -928,90 +936,90 @@@ source "block/Kconfig
   config PREEMPT_NOTIFIERS
         bool
   
+ choice
+       prompt "RCU Implementation"
+       default CLASSIC_RCU
+ 
   config CLASSIC_RCU
-       def_bool !PREEMPT_RCU
+       bool "Classic RCU"
         help
           This option selects the classic RCU implementation that is
           designed for best read-side performance on non-realtime
-         systems.  Classic RCU is the default.  Note that the
-         PREEMPT_RCU symbol is used to select/deselect this option.
+         systems.
+ 
+         Select this option if you are unsure.
+ 
+ config TREE_RCU
+       bool "Tree-based hierarchical RCU"
+       help
+         This option selects the RCU implementation that is
+         designed for very large SMP system with hundreds or
+         thousands of CPUs.
+ 
+ config PREEMPT_RCU
+       bool "Preemptible RCU"
+       depends on PREEMPT
+       help
+         This option reduces the latency of the kernel by making certain
+         RCU sections preemptible. Normally RCU code is non-preemptible, if
+         this option is selected then read-only RCU sections become
+         preemptible. This helps latency, but may expose bugs due to
+         now-naive assumptions about each RCU read-side critical section
+         remaining on a given CPU through its execution.
+ 
+ endchoice
+ 
+ config RCU_TRACE
+       bool "Enable tracing for RCU"
+       depends on TREE_RCU || PREEMPT_RCU
+       help
+         This option provides tracing in RCU which presents stats
+         in debugfs for debugging RCU implementation.
+ 
+         Say Y here if you want to enable RCU tracing
+         Say N if you are unsure.
+ 
+ config RCU_FANOUT
+       int "Tree-based hierarchical RCU fanout value"
+       range 2 64 if 64BIT
+       range 2 32 if !64BIT
+       depends on TREE_RCU
+       default 64 if 64BIT
+       default 32 if !64BIT
+       help
+         This option controls the fanout of hierarchical implementations
+         of RCU, allowing RCU to work efficiently on machines with
+         large numbers of CPUs.  This value must be at least the cube
+         root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+         systems and up to 262,144 for 64-bit systems.
+ 
+         Select a specific number if testing RCU itself.
+         Take the default if unsure.
+ 
+ config RCU_FANOUT_EXACT
+       bool "Disable tree-based hierarchical RCU auto-balancing"
+       depends on TREE_RCU
+       default n
+       help
+         This option forces use of the exact RCU_FANOUT value specified,
+         regardless of imbalances in the hierarchy.  This is useful for
+         testing RCU itself, and might one day be useful on systems with
+         strong NUMA behavior.
+ 
+         Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+ 
+         Say N if unsure.
+ 
+ config TREE_RCU_TRACE
+       def_bool RCU_TRACE && TREE_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the TREE_RCU implementation,
+         permitting Makefile to trivially select kernel/rcutree_trace.c.
+ 
+ config PREEMPT_RCU_TRACE
+       def_bool RCU_TRACE && PREEMPT_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the PREEMPT_RCU implementation,
+         permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --combined kernel/Makefile

index 027edda6351137b5f8c1ee085c2c5320d204e904,b4fdbbff5ec069feeaa21250c1ebd2f6cc28464b..e1c5bf3365c0a4cdee8e0279f31b3c6742a9c6ef
--- 1/kernel/Makefile
--- 2/kernel/Makefile
+++ b/kernel/Makefile
@@@ -9,7 -9,7 +9,7 @@@ obj-y     = sched.o fork.o exec_domain.
             rcupdate.o extable.o params.o posix-timers.o \
             kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
             hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- -          notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+ +          notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
   
   ifdef CONFIG_FUNCTION_TRACER
   # Do not trace debug files and internal ftrace files
@@@ -19,6 -19,7 +19,6 @@@ CFLAGS_REMOVE_mutex-debug.o = -p
   CFLAGS_REMOVE_rtmutex-debug.o = -pg
   CFLAGS_REMOVE_cgroup-debug.o = -pg
   CFLAGS_REMOVE_sched_clock.o = -pg
- -CFLAGS_REMOVE_sched.o = -pg
   endif
   
   obj-$(CONFIG_FREEZER) += freezer.o
@@@ -73,10 -74,10 +73,10 @@@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq
   obj-$(CONFIG_SECCOMP) += seccomp.o
   obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
   obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
+ obj-$(CONFIG_TREE_RCU) += rcutree.o
   obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
- ifeq ($(CONFIG_PREEMPT_RCU),y)
- obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
- endif
+ obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+ obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
   obj-$(CONFIG_RELAY) += relay.o
   obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
   obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
@@@ -89,7 -90,7 +89,7 @@@ obj-$(CONFIG_FUNCTION_TRACER) += trace
   obj-$(CONFIG_TRACING) += trace/
   obj-$(CONFIG_SMP) += sched_cpupri.o
   
- -ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
+ +ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
   # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
   # needed for x86 only.  Why this used to be enabled for all architectures is beyond
   # me.  I suspect most platforms don't need this, but until we know that for sure
diff --combined kernel/exit.c

index c7422ca920382b47445ace263bdba0fe6fbf6bf8,30fcdf16737a2bb013a78b62bc6b7f1eb6eb165b..a946221879d7c03b2263ae4842e2de15b00e30a9
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -46,18 -46,12 +46,18 @@@
   #include <linux/blkdev.h>
   #include <linux/task_io_accounting_ops.h>
   #include <linux/tracehook.h>
+ +#include <linux/init_task.h>
   #include <trace/sched.h>
   
   #include <asm/uaccess.h>
   #include <asm/unistd.h>
   #include <asm/pgtable.h>
   #include <asm/mmu_context.h>
+ +#include "cred-internals.h"
+ +
+ +DEFINE_TRACE(sched_process_free);
+ +DEFINE_TRACE(sched_process_exit);
+ +DEFINE_TRACE(sched_process_wait);
   
   static void exit_mm(struct task_struct * tsk);
   
@@@ -170,10 -164,7 +170,10 @@@ void release_task(struct task_struct * 
         int zap_leader;
   repeat:
         tracehook_prepare_release_task(p);
- -      atomic_dec(&p->user->processes);
+ +      /* don't need to get the RCU readlock here - the process is dead and
+ +       * can't be modifying its own credentials */
+ +      atomic_dec(&__task_cred(p)->user->processes);
+ +
         proc_flush_task(p);
         write_lock_irq(&tasklist_lock);
         tracehook_finish_release_task(p);
@@@ -348,12 -339,12 +348,12 @@@ static void reparent_to_kthreadd(void
         /* cpus_allowed? */
         /* rt_priority? */
         /* signals? */
- -      security_task_reparent_to_init(current);
         memcpy(current->signal->rlim, init_task.signal->rlim,
                sizeof(current->signal->rlim));
- -      atomic_inc(&(INIT_USER->__count));
+ +
+ +      atomic_inc(&init_cred.usage);
+ +      commit_creds(&init_cred);
         write_unlock_irq(&tasklist_lock);
- -      switch_uid(INIT_USER);
   }
   
   void __set_special_pids(struct pid *pid)
@@@ -1087,6 -1078,7 +1087,6 @@@ NORET_TYPE void do_exit(long code
         check_stack_usage();
         exit_thread();
         cgroup_exit(tsk, 1);
- -      exit_keys(tsk);
   
         if (group_dead && tsk->signal->leader)
                 disassociate_ctty(1);
@@@ -1131,6 -1123,7 +1131,6 @@@
         preempt_disable();
         /* causes final put_task_struct in finish_task_switch(). */
         tsk->state = TASK_DEAD;
- -
         schedule();
         BUG();
         /* Avoid "noreturn function does return".  */
@@@ -1270,12 -1263,12 +1270,12 @@@ static int wait_task_zombie(struct task
         unsigned long state;
         int retval, status, traced;
         pid_t pid = task_pid_vnr(p);
+ +      uid_t uid = __task_cred(p)->uid;
   
         if (!likely(options & WEXITED))
                 return 0;
   
         if (unlikely(options & WNOWAIT)) {
- -              uid_t uid = p->uid;
                 int exit_code = p->exit_code;
                 int why, status;
   
@@@ -1328,10 -1321,10 +1328,10 @@@
                  * group, which consolidates times for all threads in the
                  * group including the group leader.
                  */
+               thread_group_cputime(p, &cputime);
                 spin_lock_irq(&p->parent->sighand->siglock);
                 psig = p->parent->signal;
                 sig = p->signal;
-               thread_group_cputime(p, &cputime);
                 psig->cutime =
                         cputime_add(psig->cutime,
                         cputime_add(cputime.utime,
@@@ -1396,7 -1389,7 +1396,7 @@@
         if (!retval && infop)
                 retval = put_user(pid, &infop->si_pid);
         if (!retval && infop)
- -              retval = put_user(p->uid, &infop->si_uid);
+ +              retval = put_user(uid, &infop->si_uid);
         if (!retval)
                 retval = pid;
   
@@@ -1461,8 -1454,7 +1461,8 @@@ static int wait_task_stopped(int ptrace
         if (!unlikely(options & WNOWAIT))
                 p->exit_code = 0;
   
- -      uid = p->uid;
+ +      /* don't need the RCU readlock here as we're holding a spinlock */
+ +      uid = __task_cred(p)->uid;
   unlock_sig:
         spin_unlock_irq(&p->sighand->siglock);
         if (!exit_code)
@@@ -1536,10 -1528,10 +1536,10 @@@ static int wait_task_continued(struct t
         }
         if (!unlikely(options & WNOWAIT))
                 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+ +      uid = __task_cred(p)->uid;
         spin_unlock_irq(&p->sighand->siglock);
   
         pid = task_pid_vnr(p);
- -      uid = p->uid;
         get_task_struct(p);
         read_unlock(&tasklist_lock);
   
diff --combined kernel/extable.c

index feb0317cf09ab5960d0288a31ec00d03ae5a6335,adf0cc9c02d6873d50c48edf4c34b44a32bf48b9..e136ed8d82ba56ab81283a48fe3d6cfb41c23c79
--- 1/kernel/extable.c
--- 2/kernel/extable.c
+++ b/kernel/extable.c
@@@ -17,7 -17,6 +17,7 @@@
   */
   #include <linux/module.h>
   #include <linux/init.h>
+ +#include <linux/ftrace.h>
   #include <asm/uaccess.h>
   #include <asm/sections.h>
   
@@@ -41,7 -40,7 +41,7 @@@ const struct exception_table_entry *sea
         return e;
   }
   
- -int core_kernel_text(unsigned long addr)
+ +__notrace_funcgraph int core_kernel_text(unsigned long addr)
   {
         if (addr >= (unsigned long)_stext &&
             addr <= (unsigned long)_etext)
@@@ -54,7 -53,7 +54,7 @@@
         return 0;
   }
   
- -int __kernel_text_address(unsigned long addr)
+ +__notrace_funcgraph int __kernel_text_address(unsigned long addr)
   {
         if (core_kernel_text(addr))
                 return 1;
@@@ -67,3 -66,19 +67,19 @@@ int kernel_text_address(unsigned long a
                 return 1;
         return module_text_address(addr) != NULL;
   }
+ 
+ /*
+  * On some architectures (PPC64, IA64) function pointers
+  * are actually only tokens to some data that then holds the
+  * real function address. As a result, to find if a function
+  * pointer is part of the kernel text, we need to do some
+  * special dereferencing first.
+  */
+ int func_ptr_is_kernel_text(void *ptr)
+ {
+       unsigned long addr;
+       addr = (unsigned long) dereference_function_descriptor(ptr);
+       if (core_kernel_text(addr))
+               return 1;
+       return module_text_address(addr) != NULL;
+ }
diff --combined kernel/futex.c

index 4fe790e89d0f34af1cc24359d32bca3b58e970ca,b4f87bac91c1afb824404fe8fb6ce90d43669d41..7c6cbabe52b3c0368e800790b638e82eb00aa657
--- 1/kernel/futex.c
--- 2/kernel/futex.c
+++ b/kernel/futex.c
@@@ -92,11 -92,12 +92,12 @@@ struct futex_pi_state 
    * A futex_q has a woken state, just like tasks have TASK_RUNNING.
    * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
    * The order of wakup is always to make the first condition true, then
-  * wake up q->waiters, then make the second condition true.
+  * wake up q->waiter, then make the second condition true.
    */
   struct futex_q {
         struct plist_node list;
-       wait_queue_head_t waiters;
+       /* There can only be a single waiter */
+       wait_queue_head_t waiter;
   
         /* Which hash list lock to use: */
         spinlock_t *lock_ptr;
@@@ -122,24 -123,6 +123,6 @@@ struct futex_hash_bucket 
   
   static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
   
- /*
-  * Take mm->mmap_sem, when futex is shared
-  */
- static inline void futex_lock_mm(struct rw_semaphore *fshared)
- {
-       if (fshared)
-               down_read(fshared);
- }
- 
- /*
-  * Release mm->mmap_sem, when the futex is shared
-  */
- static inline void futex_unlock_mm(struct rw_semaphore *fshared)
- {
-       if (fshared)
-               up_read(fshared);
- }
- 
   /*
    * We hash on the keys returned from get_futex_key (see below).
    */
@@@ -161,6 -144,45 +144,45 @@@ static inline int match_futex(union fut
                 && key1->both.offset == key2->both.offset);
   }
   
+ /*
+  * Take a reference to the resource addressed by a key.
+  * Can be called while holding spinlocks.
+  *
+  */
+ static void get_futex_key_refs(union futex_key *key)
+ {
+       if (!key->both.ptr)
+               return;
+ 
+       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+       case FUT_OFF_INODE:
+               atomic_inc(&key->shared.inode->i_count);
+               break;
+       case FUT_OFF_MMSHARED:
+               atomic_inc(&key->private.mm->mm_count);
+               break;
+       }
+ }
+ 
+ /*
+  * Drop a reference to the resource addressed by a key.
+  * The hash bucket spinlock must not be held.
+  */
+ static void drop_futex_key_refs(union futex_key *key)
+ {
+       if (!key->both.ptr)
+               return;
+ 
+       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+       case FUT_OFF_INODE:
+               iput(key->shared.inode);
+               break;
+       case FUT_OFF_MMSHARED:
+               mmdrop(key->private.mm);
+               break;
+       }
+ }
+ 
   /**
    * get_futex_key - Get parameters which are the keys for a futex.
    * @uaddr: virtual address of the futex
@@@ -179,12 -201,10 +201,10 @@@
    * For other futexes, it points to &current->mm->mmap_sem and
    * caller must have taken the reader lock. but NOT any spinlocks.
    */
- static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
-                        union futex_key *key)
+ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
   {
         unsigned long address = (unsigned long)uaddr;
         struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
         struct page *page;
         int err;
   
@@@ -208,100 -228,50 +228,50 @@@
                         return -EFAULT;
                 key->private.mm = mm;
                 key->private.address = address;
+               get_futex_key_refs(key);
                 return 0;
         }
-       /*
-        * The futex is hashed differently depending on whether
-        * it's in a shared or private mapping.  So check vma first.
-        */
-       vma = find_extend_vma(mm, address);
-       if (unlikely(!vma))
-               return -EFAULT;
   
-       /*
-        * Permissions.
-        */
-       if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-               return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+ again:
+       err = get_user_pages_fast(address, 1, 0, &page);
+       if (err < 0)
+               return err;
+ 
+       lock_page(page);
+       if (!page->mapping) {
+               unlock_page(page);
+               put_page(page);
+               goto again;
+       }
   
         /*
          * Private mappings are handled in a simple way.
          *
          * NOTE: When userspace waits on a MAP_SHARED mapping, even if
          * it's a read-only handle, it's expected that futexes attach to
-        * the object not the particular process.  Therefore we use
-        * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-        * mappings of _writable_ handles.
+        * the object not the particular process.
          */
-       if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-               key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
+       if (PageAnon(page)) {
+               key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                 key->private.mm = mm;
                 key->private.address = address;
-               return 0;
+       } else {
+               key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+               key->shared.inode = page->mapping->host;
+               key->shared.pgoff = page->index;
         }
   
-       /*
-        * Linear file mappings are also simple.
-        */
-       key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-       key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-       if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-               key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-                                    + vma->vm_pgoff);
-               return 0;
-       }
+       get_futex_key_refs(key);
   
-       /*
-        * We could walk the page table to read the non-linear
-        * pte, and get the page index without fetching the page
-        * from swap.  But that's a lot of code to duplicate here
-        * for a rare case, so we simply fetch the page.
-        */
-       err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-       if (err >= 0) {
-               key->shared.pgoff =
-                       page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-               put_page(page);
-               return 0;
-       }
-       return err;
- }
- 
- /*
-  * Take a reference to the resource addressed by a key.
-  * Can be called while holding spinlocks.
-  *
-  */
- static void get_futex_key_refs(union futex_key *key)
- {
-       if (key->both.ptr == NULL)
-               return;
-       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-               case FUT_OFF_INODE:
-                       atomic_inc(&key->shared.inode->i_count);
-                       break;
-               case FUT_OFF_MMSHARED:
-                       atomic_inc(&key->private.mm->mm_count);
-                       break;
-       }
+       unlock_page(page);
+       put_page(page);
+       return 0;
   }
   
- /*
-  * Drop a reference to the resource addressed by a key.
-  * The hash bucket spinlock must not be held.
-  */
- static void drop_futex_key_refs(union futex_key *key)
+ static inline
+ void put_futex_key(int fshared, union futex_key *key)
   {
-       if (!key->both.ptr)
-               return;
-       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-               case FUT_OFF_INODE:
-                       iput(key->shared.inode);
-                       break;
-               case FUT_OFF_MMSHARED:
-                       mmdrop(key->private.mm);
-                       break;
-       }
+       drop_futex_key_refs(key);
   }
   
   static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@@ -328,10 -298,8 +298,8 @@@ static int get_futex_value_locked(u32 *
   
   /*
    * Fault handling.
-  * if fshared is non NULL, current->mm->mmap_sem is already held
    */
- static int futex_handle_fault(unsigned long address,
-                             struct rw_semaphore *fshared, int attempt)
+ static int futex_handle_fault(unsigned long address, int attempt)
   {
         struct vm_area_struct * vma;
         struct mm_struct *mm = current->mm;
@@@ -340,8 -308,7 +308,7 @@@
         if (attempt > 2)
                 return ret;
   
-       if (!fshared)
-               down_read(&mm->mmap_sem);
+       down_read(&mm->mmap_sem);
         vma = find_vma(mm, address);
         if (vma && address >= vma->vm_start &&
             (vma->vm_flags & VM_WRITE)) {
@@@ -361,8 -328,7 +328,7 @@@
                                 current->min_flt++;
                 }
         }
-       if (!fshared)
-               up_read(&mm->mmap_sem);
+       up_read(&mm->mmap_sem);
         return ret;
   }
   
@@@ -385,6 -351,7 +351,7 @@@ static int refill_pi_state_cache(void
         /* pi_mutex gets initialized later */
         pi_state->owner = NULL;
         atomic_set(&pi_state->refcount, 1);
+       pi_state->key = FUTEX_KEY_INIT;
   
         current->pi_state_cache = pi_state;
   
@@@ -439,20 -406,13 +406,20 @@@ static void free_pi_state(struct futex_
   static struct task_struct * futex_find_get_task(pid_t pid)
   {
         struct task_struct *p;
+ +      const struct cred *cred = current_cred(), *pcred;
   
         rcu_read_lock();
         p = find_task_by_vpid(pid);
- -      if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
+ +      if (!p) {
                 p = ERR_PTR(-ESRCH);
- -      else
- -              get_task_struct(p);
+ +      } else {
+ +              pcred = __task_cred(p);
+ +              if (cred->euid != pcred->euid &&
+ +                  cred->euid != pcred->uid)
+ +                      p = ERR_PTR(-ESRCH);
+ +              else
+ +                      get_task_struct(p);
+ +      }
   
         rcu_read_unlock();
   
@@@ -469,7 -429,7 +436,7 @@@ void exit_pi_state_list(struct task_str
         struct list_head *next, *head = &curr->pi_state_list;
         struct futex_pi_state *pi_state;
         struct futex_hash_bucket *hb;
-       union futex_key key;
+       union futex_key key = FUTEX_KEY_INIT;
   
         if (!futex_cmpxchg_enabled)
                 return;
@@@ -614,7 -574,7 +581,7 @@@ static void wake_futex(struct futex_q *
          * The lock in wake_up_all() is a crucial memory barrier after the
          * plist_del() and also before assigning to q->lock_ptr.
          */
-       wake_up_all(&q->waiters);
+       wake_up(&q->waiter);
         /*
          * The waiting task can free the futex_q as soon as this is written,
          * without taking any locks.  This must come last.
@@@ -726,20 -686,17 +693,17 @@@ double_lock_hb(struct futex_hash_bucke
    * Wake up all waiters hashed on the physical page that is mapped
    * to this virtual address:
    */
- static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
-                     int nr_wake, u32 bitset)
+ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
   {
         struct futex_hash_bucket *hb;
         struct futex_q *this, *next;
         struct plist_head *head;
-       union futex_key key;
+       union futex_key key = FUTEX_KEY_INIT;
         int ret;
   
         if (!bitset)
                 return -EINVAL;
   
-       futex_lock_mm(fshared);
- 
         ret = get_futex_key(uaddr, fshared, &key);
         if (unlikely(ret != 0))
                 goto out;
@@@ -767,7 -724,7 +731,7 @@@
   
         spin_unlock(&hb->lock);
   out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key);
         return ret;
   }
   
@@@ -776,19 -733,16 +740,16 @@@
    * to this virtual address:
    */
   static int
- futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
-             u32 __user *uaddr2,
+ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
               int nr_wake, int nr_wake2, int op)
   {
-       union futex_key key1, key2;
+       union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
         struct futex_hash_bucket *hb1, *hb2;
         struct plist_head *head;
         struct futex_q *this, *next;
         int ret, op_ret, attempt = 0;
   
   retryfull:
-       futex_lock_mm(fshared);
- 
         ret = get_futex_key(uaddr1, fshared, &key1);
         if (unlikely(ret != 0))
                 goto out;
@@@ -833,18 -787,12 +794,12 @@@ retry
                  */
                 if (attempt++) {
                         ret = futex_handle_fault((unsigned long)uaddr2,
-                                                fshared, attempt);
+                                                attempt);
                         if (ret)
                                 goto out;
                         goto retry;
                 }
   
-               /*
-                * If we would have faulted, release mmap_sem,
-                * fault it in and start all over again.
-                */
-               futex_unlock_mm(fshared);
- 
                 ret = get_user(dummy, uaddr2);
                 if (ret)
                         return ret;
@@@ -880,7 -828,8 +835,8 @@@
         if (hb1 != hb2)
                 spin_unlock(&hb2->lock);
   out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key2);
+       put_futex_key(fshared, &key1);
   
         return ret;
   }
@@@ -889,19 -838,16 +845,16 @@@
    * Requeue all waiters hashed on one physical page to another
    * physical page.
    */
- static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
-                        u32 __user *uaddr2,
+ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                          int nr_wake, int nr_requeue, u32 *cmpval)
   {
-       union futex_key key1, key2;
+       union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
         struct futex_hash_bucket *hb1, *hb2;
         struct plist_head *head1;
         struct futex_q *this, *next;
         int ret, drop_count = 0;
   
    retry:
-       futex_lock_mm(fshared);
- 
         ret = get_futex_key(uaddr1, fshared, &key1);
         if (unlikely(ret != 0))
                 goto out;
@@@ -924,12 -870,6 +877,6 @@@
                         if (hb1 != hb2)
                                 spin_unlock(&hb2->lock);
   
-                       /*
-                        * If we would have faulted, release mmap_sem, fault
-                        * it in and start all over again.
-                        */
-                       futex_unlock_mm(fshared);
- 
                         ret = get_user(curval, uaddr1);
   
                         if (!ret)
@@@ -981,7 -921,8 +928,8 @@@ out_unlock
                 drop_futex_key_refs(&key1);
   
   out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key2);
+       put_futex_key(fshared, &key1);
         return ret;
   }
   
@@@ -990,7 -931,7 +938,7 @@@ static inline struct futex_hash_bucket 
   {
         struct futex_hash_bucket *hb;
   
-       init_waitqueue_head(&q->waiters);
+       init_waitqueue_head(&q->waiter);
   
         get_futex_key_refs(&q->key);
         hb = hash_futex(&q->key);
@@@ -1103,8 -1044,7 +1051,7 @@@ static void unqueue_me_pi(struct futex_
    * private futexes.
    */
   static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                               struct task_struct *newowner,
-                               struct rw_semaphore *fshared)
+                               struct task_struct *newowner, int fshared)
   {
         u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
         struct futex_pi_state *pi_state = q->pi_state;
@@@ -1183,7 -1123,7 +1130,7 @@@ retry
   handle_fault:
         spin_unlock(q->lock_ptr);
   
-       ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
+       ret = futex_handle_fault((unsigned long)uaddr, attempt++);
   
         spin_lock(q->lock_ptr);
   
@@@ -1203,12 -1143,13 +1150,13 @@@
    * In case we must use restart_block to restart a futex_wait,
    * we encode in the 'flags' shared capability
    */
- #define FLAGS_SHARED  1
+ #define FLAGS_SHARED          0x01
+ #define FLAGS_CLOCKRT         0x02
   
   static long futex_wait_restart(struct restart_block *restart);
   
- static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
-                     u32 val, ktime_t *abs_time, u32 bitset)
+ static int futex_wait(u32 __user *uaddr, int fshared,
+                     u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
   {
         struct task_struct *curr = current;
         DECLARE_WAITQUEUE(wait, curr);
@@@ -1225,8 -1166,7 +1173,7 @@@
         q.pi_state = NULL;
         q.bitset = bitset;
    retry:
-       futex_lock_mm(fshared);
- 
+       q.key = FUTEX_KEY_INIT;
         ret = get_futex_key(uaddr, fshared, &q.key);
         if (unlikely(ret != 0))
                 goto out_release_sem;
@@@ -1258,12 -1198,6 +1205,6 @@@
         if (unlikely(ret)) {
                 queue_unlock(&q, hb);
   
-               /*
-                * If we would have faulted, release mmap_sem, fault it in and
-                * start all over again.
-                */
-               futex_unlock_mm(fshared);
- 
                 ret = get_user(uval, uaddr);
   
                 if (!ret)
@@@ -1277,12 -1211,6 +1218,6 @@@
         /* Only actually queue if *uaddr contained val.  */
         queue_me(&q, hb);
   
-       /*
-        * Now the futex is queued and we have checked the data, we
-        * don't want to hold mmap_sem while we sleep.
-        */
-       futex_unlock_mm(fshared);
- 
         /*
          * There might have been scheduling since the queue_me(), as we
          * cannot hold a spinlock across the get_user() in case it
@@@ -1294,7 -1222,7 +1229,7 @@@
   
         /* add_wait_queue is the barrier after __set_current_state. */
         __set_current_state(TASK_INTERRUPTIBLE);
-       add_wait_queue(&q.waiters, &wait);
+       add_wait_queue(&q.waiter, &wait);
         /*
          * !plist_node_empty() is safe here without any lock.
          * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
@@@ -1307,8 -1235,10 +1242,10 @@@
                         slack = current->timer_slack_ns;
                         if (rt_task(current))
                                 slack = 0;
-                       hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
-                                               HRTIMER_MODE_ABS);
+                       hrtimer_init_on_stack(&t.timer,
+                                             clockrt ? CLOCK_REALTIME :
+                                             CLOCK_MONOTONIC,
+                                             HRTIMER_MODE_ABS);
                         hrtimer_init_sleeper(&t, current);
                         hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
   
@@@ -1363,6 -1293,8 +1300,8 @@@
   
                 if (fshared)
                         restart->futex.flags |= FLAGS_SHARED;
+               if (clockrt)
+                       restart->futex.flags |= FLAGS_CLOCKRT;
                 return -ERESTART_RESTARTBLOCK;
         }
   
@@@ -1370,7 -1302,7 +1309,7 @@@
         queue_unlock(&q, hb);
   
    out_release_sem:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &q.key);
         return ret;
   }
   
@@@ -1378,15 -1310,16 +1317,16 @@@
   static long futex_wait_restart(struct restart_block *restart)
   {
         u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-       struct rw_semaphore *fshared = NULL;
+       int fshared = 0;
         ktime_t t;
   
         t.tv64 = restart->futex.time;
         restart->fn = do_no_restart_syscall;
         if (restart->futex.flags & FLAGS_SHARED)
-               fshared = &current->mm->mmap_sem;
+               fshared = 1;
         return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
-                               restart->futex.bitset);
+                               restart->futex.bitset,
+                               restart->futex.flags & FLAGS_CLOCKRT);
   }
   
   
@@@ -1396,7 -1329,7 +1336,7 @@@
    * if there are waiters then it will block, it does PI, etc. (Due to
    * races the kernel might see a 0 value of the futex too.)
    */
- static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
+ static int futex_lock_pi(u32 __user *uaddr, int fshared,
                          int detect, ktime_t *time, int trylock)
   {
         struct hrtimer_sleeper timeout, *to = NULL;
@@@ -1419,8 -1352,7 +1359,7 @@@
   
         q.pi_state = NULL;
    retry:
-       futex_lock_mm(fshared);
- 
+       q.key = FUTEX_KEY_INIT;
         ret = get_futex_key(uaddr, fshared, &q.key);
         if (unlikely(ret != 0))
                 goto out_release_sem;
@@@ -1509,7 -1441,6 +1448,6 @@@
                          * exit to complete.
                          */
                         queue_unlock(&q, hb);
-                       futex_unlock_mm(fshared);
                         cond_resched();
                         goto retry;
   
@@@ -1541,12 -1472,6 +1479,6 @@@
          */
         queue_me(&q, hb);
   
-       /*
-        * Now the futex is queued and we have checked the data, we
-        * don't want to hold mmap_sem while we sleep.
-        */
-       futex_unlock_mm(fshared);
- 
         WARN_ON(!q.pi_state);
         /*
          * Block on the PI mutex:
@@@ -1559,7 -1484,6 +1491,6 @@@
                 ret = ret ? 0 : -EWOULDBLOCK;
         }
   
-       futex_lock_mm(fshared);
         spin_lock(q.lock_ptr);
   
         if (!ret) {
@@@ -1625,7 -1549,6 +1556,6 @@@
   
         /* Unqueue and drop the lock */
         unqueue_me_pi(&q);
-       futex_unlock_mm(fshared);
   
         if (to)
                 destroy_hrtimer_on_stack(&to->timer);
@@@ -1635,34 -1558,30 +1565,30 @@@
         queue_unlock(&q, hb);
   
    out_release_sem:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &q.key);
         if (to)
                 destroy_hrtimer_on_stack(&to->timer);
         return ret;
   
    uaddr_faulted:
         /*
-        * We have to r/w  *(int __user *)uaddr, but we can't modify it
-        * non-atomically.  Therefore, if get_user below is not
-        * enough, we need to handle the fault ourselves, while
-        * still holding the mmap_sem.
-        *
-        * ... and hb->lock. :-) --ANK
+        * We have to r/w  *(int __user *)uaddr, and we have to modify it
+        * atomically.  Therefore, if we continue to fault after get_user()
+        * below, we need to handle the fault ourselves, while still holding
+        * the mmap_sem.  This can occur if the uaddr is under contention as
+        * we have to drop the mmap_sem in order to call get_user().
          */
         queue_unlock(&q, hb);
   
         if (attempt++) {
-               ret = futex_handle_fault((unsigned long)uaddr, fshared,
-                                        attempt);
+               ret = futex_handle_fault((unsigned long)uaddr, attempt);
                 if (ret)
                         goto out_release_sem;
                 goto retry_unlocked;
         }
   
-       futex_unlock_mm(fshared);
- 
         ret = get_user(uval, uaddr);
-       if (!ret && (uval != -EFAULT))
+       if (!ret)
                 goto retry;
   
         if (to)
@@@ -1675,13 -1594,13 +1601,13 @@@
    * This is the in-kernel slowpath: we look up the PI state (if any),
    * and do the rt-mutex unlock.
    */
- static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
+ static int futex_unlock_pi(u32 __user *uaddr, int fshared)
   {
         struct futex_hash_bucket *hb;
         struct futex_q *this, *next;
         u32 uval;
         struct plist_head *head;
-       union futex_key key;
+       union futex_key key = FUTEX_KEY_INIT;
         int ret, attempt = 0;
   
   retry:
@@@ -1692,10 -1611,6 +1618,6 @@@
          */
         if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                 return -EPERM;
-       /*
-        * First take all the futex related locks:
-        */
-       futex_lock_mm(fshared);
   
         ret = get_futex_key(uaddr, fshared, &key);
         if (unlikely(ret != 0))
@@@ -1754,34 -1669,30 +1676,30 @@@ retry_unlocked
   out_unlock:
         spin_unlock(&hb->lock);
   out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key);
   
         return ret;
   
   pi_faulted:
         /*
-        * We have to r/w  *(int __user *)uaddr, but we can't modify it
-        * non-atomically.  Therefore, if get_user below is not
-        * enough, we need to handle the fault ourselves, while
-        * still holding the mmap_sem.
-        *
-        * ... and hb->lock. --ANK
+        * We have to r/w  *(int __user *)uaddr, and we have to modify it
+        * atomically.  Therefore, if we continue to fault after get_user()
+        * below, we need to handle the fault ourselves, while still holding
+        * the mmap_sem.  This can occur if the uaddr is under contention as
+        * we have to drop the mmap_sem in order to call get_user().
          */
         spin_unlock(&hb->lock);
   
         if (attempt++) {
-               ret = futex_handle_fault((unsigned long)uaddr, fshared,
-                                        attempt);
+               ret = futex_handle_fault((unsigned long)uaddr, attempt);
                 if (ret)
                         goto out;
                 uval = 0;
                 goto retry_unlocked;
         }
   
-       futex_unlock_mm(fshared);
- 
         ret = get_user(uval, uaddr);
-       if (!ret && (uval != -EFAULT))
+       if (!ret)
                 goto retry;
   
         return ret;
@@@ -1836,7 -1747,6 +1754,7 @@@ sys_get_robust_list(int pid, struct rob
   {
         struct robust_list_head __user *head;
         unsigned long ret;
+ +      const struct cred *cred = current_cred(), *pcred;
   
         if (!futex_cmpxchg_enabled)
                 return -ENOSYS;
@@@ -1852,10 -1762,8 +1770,10 @@@
                 if (!p)
                         goto err_unlock;
                 ret = -EPERM;
- -              if ((current->euid != p->euid) && (current->euid != p->uid) &&
- -                              !capable(CAP_SYS_PTRACE))
+ +              pcred = __task_cred(p);
+ +              if (cred->euid != pcred->euid &&
+ +                  cred->euid != pcred->uid &&
+ +                  !capable(CAP_SYS_PTRACE))
                         goto err_unlock;
                 head = p->robust_list;
                 rcu_read_unlock();
@@@ -1908,8 -1816,7 +1826,7 @@@ retry
                  * PI futexes happens in exit_pi_state():
                  */
                 if (!pi && (uval & FUTEX_WAITERS))
-                       futex_wake(uaddr, &curr->mm->mmap_sem, 1,
-                                  FUTEX_BITSET_MATCH_ANY);
+                       futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
         }
         return 0;
   }
@@@ -2003,18 -1910,22 +1920,22 @@@ void exit_robust_list(struct task_struc
   long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                 u32 __user *uaddr2, u32 val2, u32 val3)
   {
-       int ret = -ENOSYS;
+       int clockrt, ret = -ENOSYS;
         int cmd = op & FUTEX_CMD_MASK;
-       struct rw_semaphore *fshared = NULL;
+       int fshared = 0;
   
         if (!(op & FUTEX_PRIVATE_FLAG))
-               fshared = &current->mm->mmap_sem;
+               fshared = 1;
+ 
+       clockrt = op & FUTEX_CLOCK_REALTIME;
+       if (clockrt && cmd != FUTEX_WAIT_BITSET)
+               return -ENOSYS;
   
         switch (cmd) {
         case FUTEX_WAIT:
                 val3 = FUTEX_BITSET_MATCH_ANY;
         case FUTEX_WAIT_BITSET:
-               ret = futex_wait(uaddr, fshared, val, timeout, val3);
+               ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
                 break;
         case FUTEX_WAKE:
                 val3 = FUTEX_BITSET_MATCH_ANY;
diff --combined kernel/lockdep.c

index 74b1878b8bb8170a21957e74600465437aa225bd,4fa6eeb4e8a7f0c6111a0cb5d67fa6f9671bad5b..06b0c3568f0b230a8c6b669d055c8c442a7eb790
--- 1/kernel/lockdep.c
--- 2/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@@ -25,7 -25,6 +25,7 @@@
    * Thanks to Arjan van de Ven for coming up with the initial idea of
    * mapping lock dependencies runtime.
    */
+ +#define DISABLE_BRANCH_PROFILING
   #include <linux/mutex.h>
   #include <linux/sched.h>
   #include <linux/delay.h>
@@@ -137,16 -136,16 +137,16 @@@ static inline struct lock_class *hlock_
   #ifdef CONFIG_LOCK_STAT
   static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
   
- static int lock_contention_point(struct lock_class *class, unsigned long ip)
+ static int lock_point(unsigned long points[], unsigned long ip)
   {
         int i;
   
-       for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
-               if (class->contention_point[i] == 0) {
-                       class->contention_point[i] = ip;
+       for (i = 0; i < LOCKSTAT_POINTS; i++) {
+               if (points[i] == 0) {
+                       points[i] = ip;
                         break;
                 }
-               if (class->contention_point[i] == ip)
+               if (points[i] == ip)
                         break;
         }
   
@@@ -186,6 -185,9 +186,9 @@@ struct lock_class_stats lock_stats(stru
                 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                         stats.contention_point[i] += pcs->contention_point[i];
   
+               for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+                       stats.contending_point[i] += pcs->contending_point[i];
+ 
                 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
                 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
   
@@@ -210,6 -212,7 +213,7 @@@ void clear_lock_stats(struct lock_clas
                 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
         }
         memset(class->contention_point, 0, sizeof(class->contention_point));
+       memset(class->contending_point, 0, sizeof(class->contending_point));
   }
   
   static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@@ -288,14 -291,12 +292,12 @@@ void lockdep_off(void
   {
         current->lockdep_recursion++;
   }
- 
   EXPORT_SYMBOL(lockdep_off);
   
   void lockdep_on(void)
   {
         current->lockdep_recursion--;
   }
- 
   EXPORT_SYMBOL(lockdep_on);
   
   /*
@@@ -577,7 -578,8 +579,8 @@@ static void print_lock_class_header(str
   /*
    * printk all lock dependencies starting at <entry>:
    */
- static void print_lock_dependencies(struct lock_class *class, int depth)
+ static void __used
+ print_lock_dependencies(struct lock_class *class, int depth)
   {
         struct lock_list *entry;
   
@@@ -2509,7 -2511,6 +2512,6 @@@ void lockdep_init_map(struct lockdep_ma
         if (subclass)
                 register_lock_class(lock, subclass, 1);
   }
- 
   EXPORT_SYMBOL_GPL(lockdep_init_map);
   
   /*
@@@ -2690,8 -2691,9 +2692,9 @@@ static int check_unlock(struct task_str
   }
   
   static int
- __lock_set_subclass(struct lockdep_map *lock,
-                   unsigned int subclass, unsigned long ip)
+ __lock_set_class(struct lockdep_map *lock, const char *name,
+                struct lock_class_key *key, unsigned int subclass,
+                unsigned long ip)
   {
         struct task_struct *curr = current;
         struct held_lock *hlock, *prev_hlock;
@@@ -2718,6 -2720,7 +2721,7 @@@
         return print_unlock_inbalance_bug(curr, lock, ip);
   
   found_it:
+       lockdep_init_map(lock, name, key, 0);
         class = register_lock_class(lock, subclass, 0);
         hlock->class_idx = class - lock_classes + 1;
   
@@@ -2902,9 -2905,9 +2906,9 @@@ static void check_flags(unsigned long f
   #endif
   }
   
- void
- lock_set_subclass(struct lockdep_map *lock,
-                 unsigned int subclass, unsigned long ip)
+ void lock_set_class(struct lockdep_map *lock, const char *name,
+                   struct lock_class_key *key, unsigned int subclass,
+                   unsigned long ip)
   {
         unsigned long flags;
   
@@@ -2914,13 -2917,12 +2918,12 @@@
         raw_local_irq_save(flags);
         current->lockdep_recursion = 1;
         check_flags(flags);
-       if (__lock_set_subclass(lock, subclass, ip))
+       if (__lock_set_class(lock, name, key, subclass, ip))
                 check_chain_key(current);
         current->lockdep_recursion = 0;
         raw_local_irq_restore(flags);
   }
- 
- EXPORT_SYMBOL_GPL(lock_set_subclass);
+ EXPORT_SYMBOL_GPL(lock_set_class);
   
   /*
    * We are not always called with irqs disabled - do that here,
@@@ -2944,7 -2946,6 +2947,6 @@@ void lock_acquire(struct lockdep_map *l
         current->lockdep_recursion = 0;
         raw_local_irq_restore(flags);
   }
- 
   EXPORT_SYMBOL_GPL(lock_acquire);
   
   void lock_release(struct lockdep_map *lock, int nested,
@@@ -2962,7 -2963,6 +2964,6 @@@
         current->lockdep_recursion = 0;
         raw_local_irq_restore(flags);
   }
- 
   EXPORT_SYMBOL_GPL(lock_release);
   
   #ifdef CONFIG_LOCK_STAT
@@@ -3000,7 -3000,7 +3001,7 @@@ __lock_contended(struct lockdep_map *lo
         struct held_lock *hlock, *prev_hlock;
         struct lock_class_stats *stats;
         unsigned int depth;
-       int i, point;
+       int i, contention_point, contending_point;
   
         depth = curr->lockdep_depth;
         if (DEBUG_LOCKS_WARN_ON(!depth))
@@@ -3024,18 -3024,22 +3025,22 @@@
   found_it:
         hlock->waittime_stamp = sched_clock();
   
-       point = lock_contention_point(hlock_class(hlock), ip);
+       contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+       contending_point = lock_point(hlock_class(hlock)->contending_point,
+                                     lock->ip);
   
         stats = get_lock_stats(hlock_class(hlock));
-       if (point < ARRAY_SIZE(stats->contention_point))
-               stats->contention_point[point]++;
+       if (contention_point < LOCKSTAT_POINTS)
+               stats->contention_point[contention_point]++;
+       if (contending_point < LOCKSTAT_POINTS)
+               stats->contending_point[contending_point]++;
         if (lock->cpu != smp_processor_id())
                 stats->bounces[bounce_contended + !!hlock->read]++;
         put_lock_stats(stats);
   }
   
   static void
- __lock_acquired(struct lockdep_map *lock)
+ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
   {
         struct task_struct *curr = current;
         struct held_lock *hlock, *prev_hlock;
@@@ -3084,6 -3088,7 +3089,7 @@@ found_it
         put_lock_stats(stats);
   
         lock->cpu = cpu;
+       lock->ip = ip;
   }
   
   void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@@ -3105,7 -3110,7 +3111,7 @@@
   }
   EXPORT_SYMBOL_GPL(lock_contended);
   
- void lock_acquired(struct lockdep_map *lock)
+ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
   {
         unsigned long flags;
   
@@@ -3118,7 -3123,7 +3124,7 @@@
         raw_local_irq_save(flags);
         check_flags(flags);
         current->lockdep_recursion = 1;
-       __lock_acquired(lock);
+       __lock_acquired(lock, ip);
         current->lockdep_recursion = 0;
         raw_local_irq_restore(flags);
   }
@@@ -3442,7 -3447,6 +3448,6 @@@ retry
         if (unlock)
                 read_unlock(&tasklist_lock);
   }
- 
   EXPORT_SYMBOL_GPL(debug_show_all_locks);
   
   /*
@@@ -3463,7 -3467,6 +3468,6 @@@ void debug_show_held_locks(struct task_
   {
                 __debug_show_held_locks(task);
   }
- 
   EXPORT_SYMBOL_GPL(debug_show_held_locks);
   
   void lockdep_sys_exit(void)
diff --combined kernel/sched.c

index 748ff924a29056e57f5c30058ce39ea7e470b23e,3e70963120a0c9e57daf482f9363fe751f90eacc..22aa9cab3fe5b350f38aa86d596325adf486382c
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -118,12 -118,6 +118,12 @@@
    */
   #define RUNTIME_INF   ((u64)~0ULL)
   
+ +DEFINE_TRACE(sched_wait_task);
+ +DEFINE_TRACE(sched_wakeup);
+ +DEFINE_TRACE(sched_wakeup_new);
+ +DEFINE_TRACE(sched_switch);
+ +DEFINE_TRACE(sched_migrate_task);
+ +
   #ifdef CONFIG_SMP
   /*
    * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@@ -267,10 -261,6 +267,10 @@@ struct task_group 
         struct cgroup_subsys_state css;
   #endif
   
+ +#ifdef CONFIG_USER_SCHED
+ +      uid_t uid;
+ +#endif
+ +
   #ifdef CONFIG_FAIR_GROUP_SCHED
         /* schedulable entities of this group on each cpu */
         struct sched_entity **se;
@@@ -296,12 -286,6 +296,12 @@@
   
   #ifdef CONFIG_USER_SCHED
   
+ +/* Helper function to pass uid information to create_sched_user() */
+ +void set_tg_uid(struct user_struct *user)
+ +{
+ +      user->tg->uid = user->uid;
+ +}
+ +
   /*
    * Root task group.
    *    Every UID task group (including init_task_group aka UID-0) will
@@@ -361,9 -345,7 +361,9 @@@ static inline struct task_group *task_g
         struct task_group *tg;
   
   #ifdef CONFIG_USER_SCHED
- -      tg = p->user->tg;
+ +      rcu_read_lock();
+ +      tg = __task_cred(p)->user->tg;
+ +      rcu_read_unlock();
   #elif defined(CONFIG_CGROUP_SCHED)
         tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
                                 struct task_group, css);
@@@ -604,8 -586,6 +604,8 @@@ struct rq 
   #ifdef CONFIG_SCHEDSTATS
         /* latency stats */
         struct sched_info rq_sched_info;
+ +      unsigned long long rq_cpu_time;
+ +      /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
   
         /* sys_sched_yield() stats */
         unsigned int yld_exp_empty;
@@@ -723,18 -703,45 +723,18 @@@ static __read_mostly char *sched_feat_n
   
   #undef SCHED_FEAT
   
- -static int sched_feat_open(struct inode *inode, struct file *filp)
- -{
- -      filp->private_data = inode->i_private;
- -      return 0;
- -}
- -
- -static ssize_t
- -sched_feat_read(struct file *filp, char __user *ubuf,
- -              size_t cnt, loff_t *ppos)
+ +static int sched_feat_show(struct seq_file *m, void *v)
   {
- -      char *buf;
- -      int r = 0;
- -      int len = 0;
         int i;
   
         for (i = 0; sched_feat_names[i]; i++) {
- -              len += strlen(sched_feat_names[i]);
- -              len += 4;
- -      }
- -
- -      buf = kmalloc(len + 2, GFP_KERNEL);
- -      if (!buf)
- -              return -ENOMEM;
- -
- -      for (i = 0; sched_feat_names[i]; i++) {
- -              if (sysctl_sched_features & (1UL << i))
- -                      r += sprintf(buf + r, "%s ", sched_feat_names[i]);
- -              else
- -                      r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
+ +              if (!(sysctl_sched_features & (1UL << i)))
+ +                      seq_puts(m, "NO_");
+ +              seq_printf(m, "%s ", sched_feat_names[i]);
         }
+ +      seq_puts(m, "\n");
   
- -      r += sprintf(buf + r, "\n");
- -      WARN_ON(r >= len + 2);
- -
- -      r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
- -
- -      kfree(buf);
- -
- -      return r;
+ +      return 0;
   }
   
   static ssize_t
@@@ -779,17 -786,10 +779,17 @@@ sched_feat_write(struct file *filp, con
         return cnt;
   }
   
+ +static int sched_feat_open(struct inode *inode, struct file *filp)
+ +{
+ +      return single_open(filp, sched_feat_show, NULL);
+ +}
+ +
   static struct file_operations sched_feat_fops = {
- -      .open   = sched_feat_open,
- -      .read   = sched_feat_read,
- -      .write  = sched_feat_write,
+ +      .open           = sched_feat_open,
+ +      .write          = sched_feat_write,
+ +      .read           = seq_read,
+ +      .llseek         = seq_lseek,
+ +      .release        = single_release,
   };
   
   static __init int sched_init_debug(void)
@@@ -1474,13 -1474,27 +1474,13 @@@ static voi
   update_group_shares_cpu(struct task_group *tg, int cpu,
                         unsigned long sd_shares, unsigned long sd_rq_weight)
   {
- -      int boost = 0;
         unsigned long shares;
         unsigned long rq_weight;
   
         if (!tg->se[cpu])
                 return;
   
- -      rq_weight = tg->cfs_rq[cpu]->load.weight;
- -
- -      /*
- -       * If there are currently no tasks on the cpu pretend there is one of
- -       * average load so that when a new task gets to run here it will not
- -       * get delayed by group starvation.
- -       */
- -      if (!rq_weight) {
- -              boost = 1;
- -              rq_weight = NICE_0_LOAD;
- -      }
- -
- -      if (unlikely(rq_weight > sd_rq_weight))
- -              rq_weight = sd_rq_weight;
+ +      rq_weight = tg->cfs_rq[cpu]->rq_weight;
   
         /*
          *           \Sum shares * rq_weight
@@@ -1488,7 -1502,7 +1488,7 @@@
          *               \Sum rq_weight
          *
          */
- -      shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ +      shares = (sd_shares * rq_weight) / sd_rq_weight;
         shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
   
         if (abs(shares - tg->se[cpu]->load.weight) >
@@@ -1497,7 -1511,11 +1497,7 @@@
                 unsigned long flags;
   
                 spin_lock_irqsave(&rq->lock, flags);
- -              /*
- -               * record the actual number of shares, not the boosted amount.
- -               */
- -              tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- -              tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ +              tg->cfs_rq[cpu]->shares = shares;
   
                 __set_se_shares(tg->se[cpu], shares);
                 spin_unlock_irqrestore(&rq->lock, flags);
@@@ -1511,23 -1529,13 +1511,23 @@@
    */
   static int tg_shares_up(struct task_group *tg, void *data)
   {
- -      unsigned long rq_weight = 0;
+ +      unsigned long weight, rq_weight = 0;
         unsigned long shares = 0;
         struct sched_domain *sd = data;
         int i;
   
         for_each_cpu_mask(i, sd->span) {
- -              rq_weight += tg->cfs_rq[i]->load.weight;
+ +              /*
+ +               * If there are currently no tasks on the cpu pretend there
+ +               * is one of average load so that when a new task gets to
+ +               * run here it will not get delayed by group starvation.
+ +               */
+ +              weight = tg->cfs_rq[i]->load.weight;
+ +              if (!weight)
+ +                      weight = NICE_0_LOAD;
+ +
+ +              tg->cfs_rq[i]->rq_weight = weight;
+ +              rq_weight += weight;
                 shares += tg->cfs_rq[i]->shares;
         }
   
@@@ -1537,6 -1545,9 +1537,6 @@@
         if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
                 shares = tg->shares;
   
- -      if (!rq_weight)
- -              rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
- -
         for_each_cpu_mask(i, sd->span)
                 update_group_shares_cpu(tg, i, shares, rq_weight);
   
@@@ -1601,39 -1612,6 +1601,39 @@@ static inline void update_shares_locked
   
   #endif
   
+ +/*
+ + * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ + */
+ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ +      __releases(this_rq->lock)
+ +      __acquires(busiest->lock)
+ +      __acquires(this_rq->lock)
+ +{
+ +      int ret = 0;
+ +
+ +      if (unlikely(!irqs_disabled())) {
+ +              /* printk() doesn't work good under rq->lock */
+ +              spin_unlock(&this_rq->lock);
+ +              BUG_ON(1);
+ +      }
+ +      if (unlikely(!spin_trylock(&busiest->lock))) {
+ +              if (busiest < this_rq) {
+ +                      spin_unlock(&this_rq->lock);
+ +                      spin_lock(&busiest->lock);
+ +                      spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+ +                      ret = 1;
+ +              } else
+ +                      spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+ +      }
+ +      return ret;
+ +}
+ +
+ +static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ +      __releases(busiest->lock)
+ +{
+ +      spin_unlock(&busiest->lock);
+ +      lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+ +}
   #endif
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
@@@ -1867,8 -1845,6 +1867,8 @@@ void set_task_cpu(struct task_struct *p
   
         clock_offset = old_rq->clock - new_rq->clock;
   
+ +      trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+ +
   #ifdef CONFIG_SCHEDSTATS
         if (p->se.wait_start)
                 p->se.wait_start -= clock_offset;
@@@ -2278,7 -2254,6 +2278,7 @@@ static int try_to_wake_up(struct task_s
   
         smp_wmb();
         rq = task_rq_lock(p, &flags);
+ +      update_rq_clock(rq);
         old_state = p->state;
         if (!(old_state & state))
                 goto out;
@@@ -2336,11 -2311,12 +2336,11 @@@ out_activate
                 schedstat_inc(p, se.nr_wakeups_local);
         else
                 schedstat_inc(p, se.nr_wakeups_remote);
- -      update_rq_clock(rq);
         activate_task(rq, p, 1);
         success = 1;
   
   out_running:
- -      trace_sched_wakeup(rq, p);
+ +      trace_sched_wakeup(rq, p, success);
         check_preempt_curr(rq, p, sync);
   
         p->state = TASK_RUNNING;
@@@ -2473,7 -2449,7 +2473,7 @@@ void wake_up_new_task(struct task_struc
                 p->sched_class->task_new(rq, p);
                 inc_nr_running(rq);
         }
- -      trace_sched_wakeup_new(rq, p);
+ +      trace_sched_wakeup_new(rq, p, 1);
         check_preempt_curr(rq, p, 0);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
@@@ -2835,6 -2811,40 +2835,6 @@@ static void double_rq_unlock(struct rq 
                 __release(rq2->lock);
   }
   
- -/*
- - * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- - */
- -static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
- -      __releases(this_rq->lock)
- -      __acquires(busiest->lock)
- -      __acquires(this_rq->lock)
- -{
- -      int ret = 0;
- -
- -      if (unlikely(!irqs_disabled())) {
- -              /* printk() doesn't work good under rq->lock */
- -              spin_unlock(&this_rq->lock);
- -              BUG_ON(1);
- -      }
- -      if (unlikely(!spin_trylock(&busiest->lock))) {
- -              if (busiest < this_rq) {
- -                      spin_unlock(&this_rq->lock);
- -                      spin_lock(&busiest->lock);
- -                      spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
- -                      ret = 1;
- -              } else
- -                      spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
- -      }
- -      return ret;
- -}
- -
- -static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
- -      __releases(busiest->lock)
- -{
- -      spin_unlock(&busiest->lock);
- -      lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
- -}
- -
   /*
    * If dest_cpu is allowed for this process, migrate the task to it.
    * This is accomplished by forcing the cpu_allowed mask to only
@@@ -2852,6 -2862,7 +2852,6 @@@ static void sched_migrate_task(struct t
             || unlikely(!cpu_active(dest_cpu)))
                 goto out;
   
- -      trace_sched_migrate_task(rq, p, dest_cpu);
         /* force the process onto the specified CPU */
         if (migrate_task(p, dest_cpu, &req)) {
                 /* Need to wait for migration thread (might exit: take ref). */
@@@ -3696,7 -3707,7 +3696,7 @@@ out_balanced
   static void idle_balance(int this_cpu, struct rq *this_rq)
   {
         struct sched_domain *sd;
- -      int pulled_task = -1;
+ +      int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
         cpumask_t tmpmask;
   
@@@ -4192,7 -4203,6 +4192,6 @@@ void account_steal_time(struct task_str
   
         if (p == rq->idle) {
                 p->stime = cputime_add(p->stime, steal);
-               account_group_system_time(p, steal);
                 if (atomic_read(&rq->nr_iowait) > 0)
                         cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                 else
@@@ -4328,7 -4338,7 +4327,7 @@@ void __kprobes sub_preempt_count(int va
         /*
          * Underflow?
          */
-       if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+        if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
                 return;
         /*
          * Is the spinlock portion underflowing?
@@@ -5123,22 -5133,6 +5122,22 @@@ __setscheduler(struct rq *rq, struct ta
         set_load_weight(p);
   }
   
+ +/*
+ + * check the target process has a UID that matches the current process's
+ + */
+ +static bool check_same_owner(struct task_struct *p)
+ +{
+ +      const struct cred *cred = current_cred(), *pcred;
+ +      bool match;
+ +
+ +      rcu_read_lock();
+ +      pcred = __task_cred(p);
+ +      match = (cred->euid == pcred->euid ||
+ +               cred->euid == pcred->uid);
+ +      rcu_read_unlock();
+ +      return match;
+ +}
+ +
   static int __sched_setscheduler(struct task_struct *p, int policy,
                                 struct sched_param *param, bool user)
   {
@@@ -5198,7 -5192,8 +5197,7 @@@ recheck
                         return -EPERM;
   
                 /* can't change other user's priorities */
- -              if ((current->euid != p->euid) &&
- -                  (current->euid != p->uid))
+ +              if (!check_same_owner(p))
                         return -EPERM;
         }
   
@@@ -5430,7 -5425,8 +5429,7 @@@ long sched_setaffinity(pid_t pid, cons
         read_unlock(&tasklist_lock);
   
         retval = -EPERM;
- -      if ((current->euid != p->euid) && (current->euid != p->uid) &&
- -                      !capable(CAP_SYS_NICE))
+ +      if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
                 goto out_unlock;
   
         retval = security_task_setscheduler(p, 0, NULL);
@@@ -5899,7 -5895,6 +5898,7 @@@ void __cpuinit init_idle(struct task_st
          * The idle tasks have their own, simple scheduling class:
          */
         idle->sched_class = &idle_sched_class;
+ +      ftrace_graph_init_task(idle);
   }
   
   /*
@@@ -6130,6 -6125,7 +6129,6 @@@ static int __migrate_task_irq(struct ta
   
   /*
    * Figure out where task on dead CPU should go, use force if necessary.
- - * NOTE: interrupts should be disabled by the caller
    */
   static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
   {
@@@ -6641,6 -6637,28 +6640,6 @@@ early_initcall(migration_init)
   
   #ifdef CONFIG_SCHED_DEBUG
   
- -static inline const char *sd_level_to_string(enum sched_domain_level lvl)
- -{
- -      switch (lvl) {
- -      case SD_LV_NONE:
- -                      return "NONE";
- -      case SD_LV_SIBLING:
- -                      return "SIBLING";
- -      case SD_LV_MC:
- -                      return "MC";
- -      case SD_LV_CPU:
- -                      return "CPU";
- -      case SD_LV_NODE:
- -                      return "NODE";
- -      case SD_LV_ALLNODES:
- -                      return "ALLNODES";
- -      case SD_LV_MAX:
- -                      return "MAX";
- -
- -      }
- -      return "MAX";
- -}
- -
   static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                   cpumask_t *groupmask)
   {
@@@ -6660,7 -6678,8 +6659,7 @@@
                 return -1;
         }
   
- -      printk(KERN_CONT "span %s level %s\n",
- -              str, sd_level_to_string(sd->level));
+ +      printk(KERN_CONT "span %s level %s\n", str, sd->name);
   
         if (!cpu_isset(cpu, sd->span)) {
                 printk(KERN_ERR "ERROR: domain->span does not contain "
@@@ -6796,8 -6815,6 +6795,8 @@@ sd_parent_degenerate(struct sched_domai
                                 SD_BALANCE_EXEC |
                                 SD_SHARE_CPUPOWER |
                                 SD_SHARE_PKG_RESOURCES);
+ +              if (nr_node_ids == 1)
+ +                      pflags &= ~SD_SERIALIZE;
         }
         if (~cflags & pflags)
                 return 0;
@@@ -7318,21 -7335,13 +7317,21 @@@ struct allmasks 
   };
   
   #if   NR_CPUS > 128
- -#define       SCHED_CPUMASK_ALLOC             1
- -#define       SCHED_CPUMASK_FREE(v)           kfree(v)
- -#define       SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
+ +#define SCHED_CPUMASK_DECLARE(v)      struct allmasks *v
+ +static inline void sched_cpumask_alloc(struct allmasks **masks)
+ +{
+ +      *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+ +}
+ +static inline void sched_cpumask_free(struct allmasks *masks)
+ +{
+ +      kfree(masks);
+ +}
   #else
- -#define       SCHED_CPUMASK_ALLOC             0
- -#define       SCHED_CPUMASK_FREE(v)
- -#define       SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
+ +#define SCHED_CPUMASK_DECLARE(v)      struct allmasks _v, *v = &_v
+ +static inline void sched_cpumask_alloc(struct allmasks **masks)
+ +{ }
+ +static inline void sched_cpumask_free(struct allmasks *masks)
+ +{ }
   #endif
   
   #define       SCHED_CPUMASK_VAR(v, a)         cpumask_t *v = (cpumask_t *) \
@@@ -7408,8 -7417,9 +7407,8 @@@ static int __build_sched_domains(const 
                 return -ENOMEM;
         }
   
- -#if SCHED_CPUMASK_ALLOC
         /* get space for all scratch cpumask variables */
- -      allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+ +      sched_cpumask_alloc(&allmasks);
         if (!allmasks) {
                 printk(KERN_WARNING "Cannot alloc cpumask array\n");
                 kfree(rd);
@@@ -7418,7 -7428,7 +7417,7 @@@
   #endif
                 return -ENOMEM;
         }
- -#endif
+ +
         tmpmask = (cpumask_t *)allmasks;
   
   
@@@ -7672,13 -7682,13 +7671,13 @@@
                 cpu_attach_domain(sd, rd, i);
         }
   
- -      SCHED_CPUMASK_FREE((void *)allmasks);
+ +      sched_cpumask_free(allmasks);
         return 0;
   
   #ifdef CONFIG_NUMA
   error:
         free_sched_groups(cpu_map, tmpmask);
- -      SCHED_CPUMASK_FREE((void *)allmasks);
+ +      sched_cpumask_free(allmasks);
         kfree(rd);
         return -ENOMEM;
   #endif
@@@ -7701,14 -7711,8 +7700,14 @@@ static struct sched_domain_attr *dattr_
    */
   static cpumask_t fallback_doms;
   
- -void __attribute__((weak)) arch_update_cpu_topology(void)
+ +/*
+ + * arch_update_cpu_topology lets virtualized architectures update the
+ + * cpu core maps. It is supposed to return 1 if the topology changed
+ + * or 0 if it stayed the same.
+ + */
+ +int __attribute__((weak)) arch_update_cpu_topology(void)
   {
+ +      return 0;
   }
   
   /*
@@@ -7748,6 -7752,8 +7747,6 @@@ static void detach_destroy_domains(cons
         cpumask_t tmpmask;
         int i;
   
- -      unregister_sched_domain_sysctl();
- -
         for_each_cpu_mask_nr(i, *cpu_map)
                 cpu_attach_domain(NULL, &def_root_domain, i);
         synchronize_sched();
@@@ -7800,21 -7806,17 +7799,21 @@@ void partition_sched_domains(int ndoms_
                              struct sched_domain_attr *dattr_new)
   {
         int i, j, n;
+ +      int new_topology;
   
         mutex_lock(&sched_domains_mutex);
   
         /* always unregister in case we don't destroy any domains */
         unregister_sched_domain_sysctl();
   
+ +      /* Let architecture update cpu core mappings. */
+ +      new_topology = arch_update_cpu_topology();
+ +
         n = doms_new ? ndoms_new : 0;
   
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
- -              for (j = 0; j < n; j++) {
+ +              for (j = 0; j < n && !new_topology; j++) {
                         if (cpus_equal(doms_cur[i], doms_new[j])
                             && dattrs_equal(dattr_cur, i, dattr_new, j))
                                 goto match1;
@@@ -7829,12 -7831,12 +7828,12 @@@ match1
                 ndoms_cur = 0;
                 doms_new = &fallback_doms;
                 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
- -              dattr_new = NULL;
+ +              WARN_ON_ONCE(dattr_new);
         }
   
         /* Build new domains */
         for (i = 0; i < ndoms_new; i++) {
- -              for (j = 0; j < ndoms_cur; j++) {
+ +              for (j = 0; j < ndoms_cur && !new_topology; j++) {
                         if (cpus_equal(doms_new[i], doms_cur[j])
                             && dattrs_equal(dattr_new, i, dattr_cur, j))
                                 goto match2;
@@@ -8489,7 -8491,7 +8488,7 @@@ stati
   int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
   {
         struct cfs_rq *cfs_rq;
- -      struct sched_entity *se, *parent_se;
+ +      struct sched_entity *se;
         struct rq *rq;
         int i;
   
@@@ -8505,17 -8507,18 +8504,17 @@@
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
   
- -              cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
- -                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ +              cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+ +                                    GFP_KERNEL, cpu_to_node(i));
                 if (!cfs_rq)
                         goto err;
   
- -              se = kmalloc_node(sizeof(struct sched_entity),
- -                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ +              se = kzalloc_node(sizeof(struct sched_entity),
+ +                                GFP_KERNEL, cpu_to_node(i));
                 if (!se)
                         goto err;
   
- -              parent_se = parent ? parent->se[i] : NULL;
- -              init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
+ +              init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
         }
   
         return 1;
@@@ -8576,7 -8579,7 +8575,7 @@@ stati
   int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
   {
         struct rt_rq *rt_rq;
- -      struct sched_rt_entity *rt_se, *parent_se;
+ +      struct sched_rt_entity *rt_se;
         struct rq *rq;
         int i;
   
@@@ -8593,17 -8596,18 +8592,17 @@@
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
   
- -              rt_rq = kmalloc_node(sizeof(struct rt_rq),
- -                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ +              rt_rq = kzalloc_node(sizeof(struct rt_rq),
+ +                                   GFP_KERNEL, cpu_to_node(i));
                 if (!rt_rq)
                         goto err;
   
- -              rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
- -                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ +              rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+ +                                   GFP_KERNEL, cpu_to_node(i));
                 if (!rt_se)
                         goto err;
   
- -              parent_se = parent ? parent->rt_se[i] : NULL;
- -              init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
+ +              init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
         }
   
         return 1;
@@@ -9246,12 -9250,11 +9245,12 @@@ struct cgroup_subsys cpu_cgroup_subsys 
    * (balbir@in.ibm.com).
    */
   
- -/* track cpu usage of a group of tasks */
+ +/* track cpu usage of a group of tasks and its child groups */
   struct cpuacct {
         struct cgroup_subsys_state css;
         /* cpuusage holds pointer to a u64-type object on every cpu */
         u64 *cpuusage;
+ +      struct cpuacct *parent;
   };
   
   struct cgroup_subsys cpuacct_subsys;
@@@ -9285,9 -9288,6 +9284,9 @@@ static struct cgroup_subsys_state *cpua
                 return ERR_PTR(-ENOMEM);
         }
   
+ +      if (cgrp->parent)
+ +              ca->parent = cgroup_ca(cgrp->parent);
+ +
         return &ca->css;
   }
   
@@@ -9301,41 -9301,6 +9300,41 @@@ cpuacct_destroy(struct cgroup_subsys *s
         kfree(ca);
   }
   
+ +static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+ +{
+ +      u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ +      u64 data;
+ +
+ +#ifndef CONFIG_64BIT
+ +      /*
+ +       * Take rq->lock to make 64-bit read safe on 32-bit platforms.
+ +       */
+ +      spin_lock_irq(&cpu_rq(cpu)->lock);
+ +      data = *cpuusage;
+ +      spin_unlock_irq(&cpu_rq(cpu)->lock);
+ +#else
+ +      data = *cpuusage;
+ +#endif
+ +
+ +      return data;
+ +}
+ +
+ +static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+ +{
+ +      u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ +
+ +#ifndef CONFIG_64BIT
+ +      /*
+ +       * Take rq->lock to make 64-bit write safe on 32-bit platforms.
+ +       */
+ +      spin_lock_irq(&cpu_rq(cpu)->lock);
+ +      *cpuusage = val;
+ +      spin_unlock_irq(&cpu_rq(cpu)->lock);
+ +#else
+ +      *cpuusage = val;
+ +#endif
+ +}
+ +
   /* return total cpu usage (in nanoseconds) of a group */
   static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
   {
@@@ -9343,8 -9308,17 +9342,8 @@@
         u64 totalcpuusage = 0;
         int i;
   
- -      for_each_possible_cpu(i) {
- -              u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
- -
- -              /*
- -               * Take rq->lock to make 64-bit addition safe on 32-bit
- -               * platforms.
- -               */
- -              spin_lock_irq(&cpu_rq(i)->lock);
- -              totalcpuusage += *cpuusage;
- -              spin_unlock_irq(&cpu_rq(i)->lock);
- -      }
+ +      for_each_present_cpu(i)
+ +              totalcpuusage += cpuacct_cpuusage_read(ca, i);
   
         return totalcpuusage;
   }
@@@ -9361,39 -9335,23 +9360,39 @@@ static int cpuusage_write(struct cgrou
                 goto out;
         }
   
- -      for_each_possible_cpu(i) {
- -              u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
+ +      for_each_present_cpu(i)
+ +              cpuacct_cpuusage_write(ca, i, 0);
   
- -              spin_lock_irq(&cpu_rq(i)->lock);
- -              *cpuusage = 0;
- -              spin_unlock_irq(&cpu_rq(i)->lock);
- -      }
   out:
         return err;
   }
   
+ +static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
+ +                                 struct seq_file *m)
+ +{
+ +      struct cpuacct *ca = cgroup_ca(cgroup);
+ +      u64 percpu;
+ +      int i;
+ +
+ +      for_each_present_cpu(i) {
+ +              percpu = cpuacct_cpuusage_read(ca, i);
+ +              seq_printf(m, "%llu ", (unsigned long long) percpu);
+ +      }
+ +      seq_printf(m, "\n");
+ +      return 0;
+ +}
+ +
   static struct cftype files[] = {
         {
                 .name = "usage",
                 .read_u64 = cpuusage_read,
                 .write_u64 = cpuusage_write,
         },
+ +      {
+ +              .name = "usage_percpu",
+ +              .read_seq_string = cpuacct_percpu_seq_read,
+ +      },
+ +
   };
   
   static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@@ -9409,16 -9367,14 +9408,16 @@@
   static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
   {
         struct cpuacct *ca;
+ +      int cpu;
   
         if (!cpuacct_subsys.active)
                 return;
   
+ +      cpu = task_cpu(tsk);
         ca = task_ca(tsk);
- -      if (ca) {
- -              u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
   
+ +      for (; ca; ca = ca->parent) {
+ +              u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
                 *cpuusage += cputime;
         }
   }
diff --combined kernel/sys.c

index ebe65c2c9873382a6017c35fb2954ad5bb54029b,5fc3a0cfb9946e01efc3907f06523964f39d59c1..d356d79e84ac5682553e76d1bb7f449e5f808e69
--- 1/kernel/sys.c
--- 2/kernel/sys.c
+++ b/kernel/sys.c
@@@ -112,17 -112,12 +112,17 @@@ EXPORT_SYMBOL(cad_pid)
   
   void (*pm_power_off_prepare)(void);
   
+ +/*
+ + * set the priority of a task
+ + * - the caller must hold the RCU read lock
+ + */
   static int set_one_prio(struct task_struct *p, int niceval, int error)
   {
+ +      const struct cred *cred = current_cred(), *pcred = __task_cred(p);
         int no_nice;
   
- -      if (p->uid != current->euid &&
- -              p->euid != current->euid && !capable(CAP_SYS_NICE)) {
+ +      if (pcred->uid  != cred->euid &&
+ +          pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
                 error = -EPERM;
                 goto out;
         }
@@@ -146,7 -141,6 +146,7 @@@ asmlinkage long sys_setpriority(int whi
   {
         struct task_struct *g, *p;
         struct user_struct *user;
+ +      const struct cred *cred = current_cred();
         int error = -EINVAL;
         struct pid *pgrp;
   
@@@ -180,18 -174,18 +180,18 @@@
                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                         break;
                 case PRIO_USER:
- -                      user = current->user;
+ +                      user = (struct user_struct *) cred->user;
                         if (!who)
- -                              who = current->uid;
- -                      else
- -                              if ((who != current->uid) && !(user = find_user(who)))
- -                                      goto out_unlock;        /* No processes for this user */
+ +                              who = cred->uid;
+ +                      else if ((who != cred->uid) &&
+ +                               !(user = find_user(who)))
+ +                              goto out_unlock;        /* No processes for this user */
   
                         do_each_thread(g, p)
- -                              if (p->uid == who)
+ +                              if (__task_cred(p)->uid == who)
                                         error = set_one_prio(p, niceval, error);
                         while_each_thread(g, p);
- -                      if (who != current->uid)
+ +                      if (who != cred->uid)
                                 free_uid(user);         /* For find_user() */
                         break;
         }
@@@ -211,7 -205,6 +211,7 @@@ asmlinkage long sys_getpriority(int whi
   {
         struct task_struct *g, *p;
         struct user_struct *user;
+ +      const struct cred *cred = current_cred();
         long niceval, retval = -ESRCH;
         struct pid *pgrp;
   
@@@ -243,21 -236,21 +243,21 @@@
                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                         break;
                 case PRIO_USER:
- -                      user = current->user;
+ +                      user = (struct user_struct *) cred->user;
                         if (!who)
- -                              who = current->uid;
- -                      else
- -                              if ((who != current->uid) && !(user = find_user(who)))
- -                                      goto out_unlock;        /* No processes for this user */
+ +                              who = cred->uid;
+ +                      else if ((who != cred->uid) &&
+ +                               !(user = find_user(who)))
+ +                              goto out_unlock;        /* No processes for this user */
   
                         do_each_thread(g, p)
- -                              if (p->uid == who) {
+ +                              if (__task_cred(p)->uid == who) {
                                         niceval = 20 - task_nice(p);
                                         if (niceval > retval)
                                                 retval = niceval;
                                 }
                         while_each_thread(g, p);
- -                      if (who != current->uid)
+ +                      if (who != cred->uid)
                                 free_uid(user);         /* for find_user() */
                         break;
         }
@@@ -479,48 -472,46 +479,48 @@@ void ctrl_alt_del(void
    */
   asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
   {
- -      int old_rgid = current->gid;
- -      int old_egid = current->egid;
- -      int new_rgid = old_rgid;
- -      int new_egid = old_egid;
+ +      const struct cred *old;
+ +      struct cred *new;
         int retval;
   
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
+ +      old = current_cred();
+ +
         retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
         if (retval)
- -              return retval;
+ +              goto error;
   
+ +      retval = -EPERM;
         if (rgid != (gid_t) -1) {
- -              if ((old_rgid == rgid) ||
- -                  (current->egid==rgid) ||
+ +              if (old->gid == rgid ||
+ +                  old->egid == rgid ||
                     capable(CAP_SETGID))
- -                      new_rgid = rgid;
+ +                      new->gid = rgid;
                 else
- -                      return -EPERM;
+ +                      goto error;
         }
         if (egid != (gid_t) -1) {
- -              if ((old_rgid == egid) ||
- -                  (current->egid == egid) ||
- -                  (current->sgid == egid) ||
+ +              if (old->gid == egid ||
+ +                  old->egid == egid ||
+ +                  old->sgid == egid ||
                     capable(CAP_SETGID))
- -                      new_egid = egid;
+ +                      new->egid = egid;
                 else
- -                      return -EPERM;
- -      }
- -      if (new_egid != old_egid) {
- -              set_dumpable(current->mm, suid_dumpable);
- -              smp_wmb();
+ +                      goto error;
         }
+ +
         if (rgid != (gid_t) -1 ||
- -          (egid != (gid_t) -1 && egid != old_rgid))
- -              current->sgid = new_egid;
- -      current->fsgid = new_egid;
- -      current->egid = new_egid;
- -      current->gid = new_rgid;
- -      key_fsgid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_GID);
- -      return 0;
+ +          (egid != (gid_t) -1 && egid != old->gid))
+ +              new->sgid = new->egid;
+ +      new->fsgid = new->egid;
+ +
+ +      return commit_creds(new);
+ +
+ +error:
+ +      abort_creds(new);
+ +      return retval;
   }
   
   /*
@@@ -530,54 -521,56 +530,54 @@@
    */
   asmlinkage long sys_setgid(gid_t gid)
   {
- -      int old_egid = current->egid;
+ +      const struct cred *old;
+ +      struct cred *new;
         int retval;
   
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
+ +      old = current_cred();
+ +
         retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
         if (retval)
- -              return retval;
+ +              goto error;
   
- -      if (capable(CAP_SETGID)) {
- -              if (old_egid != gid) {
- -                      set_dumpable(current->mm, suid_dumpable);
- -                      smp_wmb();
- -              }
- -              current->gid = current->egid = current->sgid = current->fsgid = gid;
- -      } else if ((gid == current->gid) || (gid == current->sgid)) {
- -              if (old_egid != gid) {
- -                      set_dumpable(current->mm, suid_dumpable);
- -                      smp_wmb();
- -              }
- -              current->egid = current->fsgid = gid;
- -      }
+ +      retval = -EPERM;
+ +      if (capable(CAP_SETGID))
+ +              new->gid = new->egid = new->sgid = new->fsgid = gid;
+ +      else if (gid == old->gid || gid == old->sgid)
+ +              new->egid = new->fsgid = gid;
         else
- -              return -EPERM;
+ +              goto error;
   
- -      key_fsgid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_GID);
- -      return 0;
+ +      return commit_creds(new);
+ +
+ +error:
+ +      abort_creds(new);
+ +      return retval;
   }
     
- -static int set_user(uid_t new_ruid, int dumpclear)
+ +/*
+ + * change the user struct in a credentials set to match the new UID
+ + */
+ +static int set_user(struct cred *new)
   {
         struct user_struct *new_user;
   
- -      new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
+ +      new_user = alloc_uid(current_user_ns(), new->uid);
         if (!new_user)
                 return -EAGAIN;
   
         if (atomic_read(&new_user->processes) >=
                                 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
- -                      new_user != current->nsproxy->user_ns->root_user) {
+ +                      new_user != INIT_USER) {
                 free_uid(new_user);
                 return -EAGAIN;
         }
   
- -      switch_uid(new_user);
- -
- -      if (dumpclear) {
- -              set_dumpable(current->mm, suid_dumpable);
- -              smp_wmb();
- -      }
- -      current->uid = new_ruid;
+ +      free_uid(new->user);
+ +      new->user = new_user;
         return 0;
   }
   
@@@ -598,56 -591,54 +598,56 @@@
    */
   asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
   {
- -      int old_ruid, old_euid, old_suid, new_ruid, new_euid;
+ +      const struct cred *old;
+ +      struct cred *new;
         int retval;
   
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
+ +      old = current_cred();
+ +
         retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
         if (retval)
- -              return retval;
- -
- -      new_ruid = old_ruid = current->uid;
- -      new_euid = old_euid = current->euid;
- -      old_suid = current->suid;
+ +              goto error;
   
+ +      retval = -EPERM;
         if (ruid != (uid_t) -1) {
- -              new_ruid = ruid;
- -              if ((old_ruid != ruid) &&
- -                  (current->euid != ruid) &&
+ +              new->uid = ruid;
+ +              if (old->uid != ruid &&
+ +                  old->euid != ruid &&
                     !capable(CAP_SETUID))
- -                      return -EPERM;
+ +                      goto error;
         }
   
         if (euid != (uid_t) -1) {
- -              new_euid = euid;
- -              if ((old_ruid != euid) &&
- -                  (current->euid != euid) &&
- -                  (current->suid != euid) &&
+ +              new->euid = euid;
+ +              if (old->uid != euid &&
+ +                  old->euid != euid &&
+ +                  old->suid != euid &&
                     !capable(CAP_SETUID))
- -                      return -EPERM;
+ +                      goto error;
         }
   
- -      if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
- -              return -EAGAIN;
+ +      retval = -EAGAIN;
+ +      if (new->uid != old->uid && set_user(new) < 0)
+ +              goto error;
   
- -      if (new_euid != old_euid) {
- -              set_dumpable(current->mm, suid_dumpable);
- -              smp_wmb();
- -      }
- -      current->fsuid = current->euid = new_euid;
         if (ruid != (uid_t) -1 ||
- -          (euid != (uid_t) -1 && euid != old_ruid))
- -              current->suid = current->euid;
- -      current->fsuid = current->euid;
+ +          (euid != (uid_t) -1 && euid != old->uid))
+ +              new->suid = new->euid;
+ +      new->fsuid = new->euid;
   
- -      key_fsuid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_UID);
- -
- -      return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
- -}
+ +      retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
+ +      if (retval < 0)
+ +              goto error;
   
+ +      return commit_creds(new);
   
+ +error:
+ +      abort_creds(new);
+ +      return retval;
+ +}
                 
   /*
    * setuid() is implemented like SysV with SAVED_IDS 
@@@ -662,41 -653,36 +662,41 @@@
    */
   asmlinkage long sys_setuid(uid_t uid)
   {
- -      int old_euid = current->euid;
- -      int old_ruid, old_suid, new_suid;
+ +      const struct cred *old;
+ +      struct cred *new;
         int retval;
   
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
+ +      old = current_cred();
+ +
         retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
         if (retval)
- -              return retval;
+ +              goto error;
   
- -      old_ruid = current->uid;
- -      old_suid = current->suid;
- -      new_suid = old_suid;
- -      
+ +      retval = -EPERM;
         if (capable(CAP_SETUID)) {
- -              if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
- -                      return -EAGAIN;
- -              new_suid = uid;
- -      } else if ((uid != current->uid) && (uid != new_suid))
- -              return -EPERM;
- -
- -      if (old_euid != uid) {
- -              set_dumpable(current->mm, suid_dumpable);
- -              smp_wmb();
+ +              new->suid = new->uid = uid;
+ +              if (uid != old->uid && set_user(new) < 0) {
+ +                      retval = -EAGAIN;
+ +                      goto error;
+ +              }
+ +      } else if (uid != old->uid && uid != new->suid) {
+ +              goto error;
         }
- -      current->fsuid = current->euid = uid;
- -      current->suid = new_suid;
   
- -      key_fsuid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_UID);
+ +      new->fsuid = new->euid = uid;
+ +
+ +      retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
+ +      if (retval < 0)
+ +              goto error;
   
- -      return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
+ +      return commit_creds(new);
+ +
+ +error:
+ +      abort_creds(new);
+ +      return retval;
   }
   
   
@@@ -706,63 -692,54 +706,63 @@@
    */
   asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
   {
- -      int old_ruid = current->uid;
- -      int old_euid = current->euid;
- -      int old_suid = current->suid;
+ +      const struct cred *old;
+ +      struct cred *new;
         int retval;
   
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
+ +
         retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
         if (retval)
- -              return retval;
+ +              goto error;
+ +      old = current_cred();
   
+ +      retval = -EPERM;
         if (!capable(CAP_SETUID)) {
- -              if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
- -                  (ruid != current->euid) && (ruid != current->suid))
- -                      return -EPERM;
- -              if ((euid != (uid_t) -1) && (euid != current->uid) &&
- -                  (euid != current->euid) && (euid != current->suid))
- -                      return -EPERM;
- -              if ((suid != (uid_t) -1) && (suid != current->uid) &&
- -                  (suid != current->euid) && (suid != current->suid))
- -                      return -EPERM;
+ +              if (ruid != (uid_t) -1 && ruid != old->uid &&
+ +                  ruid != old->euid  && ruid != old->suid)
+ +                      goto error;
+ +              if (euid != (uid_t) -1 && euid != old->uid &&
+ +                  euid != old->euid  && euid != old->suid)
+ +                      goto error;
+ +              if (suid != (uid_t) -1 && suid != old->uid &&
+ +                  suid != old->euid  && suid != old->suid)
+ +                      goto error;
         }
+ +
+ +      retval = -EAGAIN;
         if (ruid != (uid_t) -1) {
- -              if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
- -                      return -EAGAIN;
+ +              new->uid = ruid;
+ +              if (ruid != old->uid && set_user(new) < 0)
+ +                      goto error;
         }
- -      if (euid != (uid_t) -1) {
- -              if (euid != current->euid) {
- -                      set_dumpable(current->mm, suid_dumpable);
- -                      smp_wmb();
- -              }
- -              current->euid = euid;
- -      }
- -      current->fsuid = current->euid;
+ +      if (euid != (uid_t) -1)
+ +              new->euid = euid;
         if (suid != (uid_t) -1)
- -              current->suid = suid;
+ +              new->suid = suid;
+ +      new->fsuid = new->euid;
+ +
+ +      retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
+ +      if (retval < 0)
+ +              goto error;
   
- -      key_fsuid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_UID);
+ +      return commit_creds(new);
   
- -      return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
+ +error:
+ +      abort_creds(new);
+ +      return retval;
   }
   
   asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
   {
+ +      const struct cred *cred = current_cred();
         int retval;
   
- -      if (!(retval = put_user(current->uid, ruid)) &&
- -          !(retval = put_user(current->euid, euid)))
- -              retval = put_user(current->suid, suid);
+ +      if (!(retval   = put_user(cred->uid,  ruid)) &&
+ +          !(retval   = put_user(cred->euid, euid)))
+ +              retval = put_user(cred->suid, suid);
   
         return retval;
   }
@@@ -772,55 -749,48 +772,55 @@@
    */
   asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
   {
+ +      const struct cred *old;
+ +      struct cred *new;
         int retval;
   
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
+ +      old = current_cred();
+ +
         retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
         if (retval)
- -              return retval;
+ +              goto error;
   
+ +      retval = -EPERM;
         if (!capable(CAP_SETGID)) {
- -              if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
- -                  (rgid != current->egid) && (rgid != current->sgid))
- -                      return -EPERM;
- -              if ((egid != (gid_t) -1) && (egid != current->gid) &&
- -                  (egid != current->egid) && (egid != current->sgid))
- -                      return -EPERM;
- -              if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
- -                  (sgid != current->egid) && (sgid != current->sgid))
- -                      return -EPERM;
+ +              if (rgid != (gid_t) -1 && rgid != old->gid &&
+ +                  rgid != old->egid  && rgid != old->sgid)
+ +                      goto error;
+ +              if (egid != (gid_t) -1 && egid != old->gid &&
+ +                  egid != old->egid  && egid != old->sgid)
+ +                      goto error;
+ +              if (sgid != (gid_t) -1 && sgid != old->gid &&
+ +                  sgid != old->egid  && sgid != old->sgid)
+ +                      goto error;
         }
- -      if (egid != (gid_t) -1) {
- -              if (egid != current->egid) {
- -                      set_dumpable(current->mm, suid_dumpable);
- -                      smp_wmb();
- -              }
- -              current->egid = egid;
- -      }
- -      current->fsgid = current->egid;
+ +
         if (rgid != (gid_t) -1)
- -              current->gid = rgid;
+ +              new->gid = rgid;
+ +      if (egid != (gid_t) -1)
+ +              new->egid = egid;
         if (sgid != (gid_t) -1)
- -              current->sgid = sgid;
+ +              new->sgid = sgid;
+ +      new->fsgid = new->egid;
   
- -      key_fsgid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_GID);
- -      return 0;
+ +      return commit_creds(new);
+ +
+ +error:
+ +      abort_creds(new);
+ +      return retval;
   }
   
   asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
   {
+ +      const struct cred *cred = current_cred();
         int retval;
   
- -      if (!(retval = put_user(current->gid, rgid)) &&
- -          !(retval = put_user(current->egid, egid)))
- -              retval = put_user(current->sgid, sgid);
+ +      if (!(retval   = put_user(cred->gid,  rgid)) &&
+ +          !(retval   = put_user(cred->egid, egid)))
+ +              retval = put_user(cred->sgid, sgid);
   
         return retval;
   }
@@@ -834,35 -804,27 +834,35 @@@
    */
   asmlinkage long sys_setfsuid(uid_t uid)
   {
- -      int old_fsuid;
+ +      const struct cred *old;
+ +      struct cred *new;
+ +      uid_t old_fsuid;
   
- -      old_fsuid = current->fsuid;
- -      if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
- -              return old_fsuid;
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return current_fsuid();
+ +      old = current_cred();
+ +      old_fsuid = old->fsuid;
   
- -      if (uid == current->uid || uid == current->euid ||
- -          uid == current->suid || uid == current->fsuid || 
+ +      if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
+ +              goto error;
+ +
+ +      if (uid == old->uid  || uid == old->euid  ||
+ +          uid == old->suid || uid == old->fsuid ||
             capable(CAP_SETUID)) {
                 if (uid != old_fsuid) {
- -                      set_dumpable(current->mm, suid_dumpable);
- -                      smp_wmb();
+ +                      new->fsuid = uid;
+ +                      if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
+ +                              goto change_okay;
                 }
- -              current->fsuid = uid;
         }
   
- -      key_fsuid_changed(current);
- -      proc_id_connector(current, PROC_EVENT_UID);
- -
- -      security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
+ +error:
+ +      abort_creds(new);
+ +      return old_fsuid;
   
+ +change_okay:
+ +      commit_creds(new);
         return old_fsuid;
   }
   
@@@ -871,34 -833,23 +871,34 @@@
    */
   asmlinkage long sys_setfsgid(gid_t gid)
   {
- -      int old_fsgid;
+ +      const struct cred *old;
+ +      struct cred *new;
+ +      gid_t old_fsgid;
+ +
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return current_fsgid();
+ +      old = current_cred();
+ +      old_fsgid = old->fsgid;
   
- -      old_fsgid = current->fsgid;
         if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
- -              return old_fsgid;
+ +              goto error;
   
- -      if (gid == current->gid || gid == current->egid ||
- -          gid == current->sgid || gid == current->fsgid || 
+ +      if (gid == old->gid  || gid == old->egid  ||
+ +          gid == old->sgid || gid == old->fsgid ||
             capable(CAP_SETGID)) {
                 if (gid != old_fsgid) {
- -                      set_dumpable(current->mm, suid_dumpable);
- -                      smp_wmb();
+ +                      new->fsgid = gid;
+ +                      goto change_okay;
                 }
- -              current->fsgid = gid;
- -              key_fsgid_changed(current);
- -              proc_id_connector(current, PROC_EVENT_GID);
         }
+ +
+ +error:
+ +      abort_creds(new);
+ +      return old_fsgid;
+ +
+ +change_okay:
+ +      commit_creds(new);
         return old_fsgid;
   }
   
@@@ -907,8 -858,8 +907,8 @@@ void do_sys_times(struct tms *tms
         struct task_cputime cputime;
         cputime_t cutime, cstime;
   
-       spin_lock_irq(&current->sighand->siglock);
         thread_group_cputime(current, &cputime);
+       spin_lock_irq(&current->sighand->siglock);
         cutime = current->signal->cutime;
         cstime = current->signal->cstime;
         spin_unlock_irq(&current->sighand->siglock);
@@@ -1167,7 -1118,7 +1167,7 @@@ EXPORT_SYMBOL(groups_free)
   
   /* export the group_info to a user-space array */
   static int groups_to_user(gid_t __user *grouplist,
- -    struct group_info *group_info)
+ +                        const struct group_info *group_info)
   {
         int i;
         unsigned int count = group_info->ngroups;
@@@ -1235,7 -1186,7 +1235,7 @@@ static void groups_sort(struct group_in
   }
   
   /* a simple bsearch */
- -int groups_search(struct group_info *group_info, gid_t grp)
+ +int groups_search(const struct group_info *group_info, gid_t grp)
   {
         unsigned int left, right;
   
@@@ -1257,74 -1208,51 +1257,74 @@@
         return 0;
   }
   
- -/* validate and set current->group_info */
- -int set_current_groups(struct group_info *group_info)
+ +/**
+ + * set_groups - Change a group subscription in a set of credentials
+ + * @new: The newly prepared set of credentials to alter
+ + * @group_info: The group list to install
+ + *
+ + * Validate a group subscription and, if valid, insert it into a set
+ + * of credentials.
+ + */
+ +int set_groups(struct cred *new, struct group_info *group_info)
   {
         int retval;
- -      struct group_info *old_info;
   
         retval = security_task_setgroups(group_info);
         if (retval)
                 return retval;
   
+ +      put_group_info(new->group_info);
         groups_sort(group_info);
         get_group_info(group_info);
+ +      new->group_info = group_info;
+ +      return 0;
+ +}
+ +
+ +EXPORT_SYMBOL(set_groups);
   
- -      task_lock(current);
- -      old_info = current->group_info;
- -      current->group_info = group_info;
- -      task_unlock(current);
+ +/**
+ + * set_current_groups - Change current's group subscription
+ + * @group_info: The group list to impose
+ + *
+ + * Validate a group subscription and, if valid, impose it upon current's task
+ + * security record.
+ + */
+ +int set_current_groups(struct group_info *group_info)
+ +{
+ +      struct cred *new;
+ +      int ret;
   
- -      put_group_info(old_info);
+ +      new = prepare_creds();
+ +      if (!new)
+ +              return -ENOMEM;
   
- -      return 0;
+ +      ret = set_groups(new, group_info);
+ +      if (ret < 0) {
+ +              abort_creds(new);
+ +              return ret;
+ +      }
+ +
+ +      return commit_creds(new);
   }
   
   EXPORT_SYMBOL(set_current_groups);
   
   asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
   {
- -      int i = 0;
- -
- -      /*
- -       *      SMP: Nobody else can change our grouplist. Thus we are
- -       *      safe.
- -       */
+ +      const struct cred *cred = current_cred();
+ +      int i;
   
         if (gidsetsize < 0)
                 return -EINVAL;
   
         /* no need to grab task_lock here; it cannot change */
- -      i = current->group_info->ngroups;
+ +      i = cred->group_info->ngroups;
         if (gidsetsize) {
                 if (i > gidsetsize) {
                         i = -EINVAL;
                         goto out;
                 }
- -              if (groups_to_user(grouplist, current->group_info)) {
+ +              if (groups_to_user(grouplist, cred->group_info)) {
                         i = -EFAULT;
                         goto out;
                 }
@@@ -1368,11 -1296,9 +1368,11 @@@ asmlinkage long sys_setgroups(int gidse
    */
   int in_group_p(gid_t grp)
   {
+ +      const struct cred *cred = current_cred();
         int retval = 1;
- -      if (grp != current->fsgid)
- -              retval = groups_search(current->group_info, grp);
+ +
+ +      if (grp != cred->fsgid)
+ +              retval = groups_search(cred->group_info, grp);
         return retval;
   }
   
@@@ -1380,11 -1306,9 +1380,11 @@@ EXPORT_SYMBOL(in_group_p)
   
   int in_egroup_p(gid_t grp)
   {
+ +      const struct cred *cred = current_cred();
         int retval = 1;
- -      if (grp != current->egid)
- -              retval = groups_search(current->group_info, grp);
+ +
+ +      if (grp != cred->egid)
+ +              retval = groups_search(cred->group_info, grp);
         return retval;
   }
   
@@@ -1700,56 -1624,50 +1700,56 @@@ asmlinkage long sys_umask(int mask
   asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
                           unsigned long arg4, unsigned long arg5)
   {
- -      long error = 0;
+ +      struct task_struct *me = current;
+ +      unsigned char comm[sizeof(me->comm)];
+ +      long error;
   
- -      if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error))
+ +      error = security_task_prctl(option, arg2, arg3, arg4, arg5);
+ +      if (error != -ENOSYS)
                 return error;
   
+ +      error = 0;
         switch (option) {
                 case PR_SET_PDEATHSIG:
                         if (!valid_signal(arg2)) {
                                 error = -EINVAL;
                                 break;
                         }
- -                      current->pdeath_signal = arg2;
+ +                      me->pdeath_signal = arg2;
+ +                      error = 0;
                         break;
                 case PR_GET_PDEATHSIG:
- -                      error = put_user(current->pdeath_signal, (int __user *)arg2);
+ +                      error = put_user(me->pdeath_signal, (int __user *)arg2);
                         break;
                 case PR_GET_DUMPABLE:
- -                      error = get_dumpable(current->mm);
+ +                      error = get_dumpable(me->mm);
                         break;
                 case PR_SET_DUMPABLE:
                         if (arg2 < 0 || arg2 > 1) {
                                 error = -EINVAL;
                                 break;
                         }
- -                      set_dumpable(current->mm, arg2);
+ +                      set_dumpable(me->mm, arg2);
+ +                      error = 0;
                         break;
   
                 case PR_SET_UNALIGN:
- -                      error = SET_UNALIGN_CTL(current, arg2);
+ +                      error = SET_UNALIGN_CTL(me, arg2);
                         break;
                 case PR_GET_UNALIGN:
- -                      error = GET_UNALIGN_CTL(current, arg2);
+ +                      error = GET_UNALIGN_CTL(me, arg2);
                         break;
                 case PR_SET_FPEMU:
- -                      error = SET_FPEMU_CTL(current, arg2);
+ +                      error = SET_FPEMU_CTL(me, arg2);
                         break;
                 case PR_GET_FPEMU:
- -                      error = GET_FPEMU_CTL(current, arg2);
+ +                      error = GET_FPEMU_CTL(me, arg2);
                         break;
                 case PR_SET_FPEXC:
- -                      error = SET_FPEXC_CTL(current, arg2);
+ +                      error = SET_FPEXC_CTL(me, arg2);
                         break;
                 case PR_GET_FPEXC:
- -                      error = GET_FPEXC_CTL(current, arg2);
+ +                      error = GET_FPEXC_CTL(me, arg2);
                         break;
                 case PR_GET_TIMING:
                         error = PR_TIMING_STATISTICAL;
@@@ -1757,28 -1675,33 +1757,28 @@@
                 case PR_SET_TIMING:
                         if (arg2 != PR_TIMING_STATISTICAL)
                                 error = -EINVAL;
+ +                      else
+ +                              error = 0;
                         break;
   
- -              case PR_SET_NAME: {
- -                      struct task_struct *me = current;
- -                      unsigned char ncomm[sizeof(me->comm)];
- -
- -                      ncomm[sizeof(me->comm)-1] = 0;
- -                      if (strncpy_from_user(ncomm, (char __user *)arg2,
- -                                              sizeof(me->comm)-1) < 0)
+ +              case PR_SET_NAME:
+ +                      comm[sizeof(me->comm)-1] = 0;
+ +                      if (strncpy_from_user(comm, (char __user *)arg2,
+ +                                            sizeof(me->comm) - 1) < 0)
                                 return -EFAULT;
- -                      set_task_comm(me, ncomm);
+ +                      set_task_comm(me, comm);
                         return 0;
- -              }
- -              case PR_GET_NAME: {
- -                      struct task_struct *me = current;
- -                      unsigned char tcomm[sizeof(me->comm)];
- -
- -                      get_task_comm(tcomm, me);
- -                      if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
+ +              case PR_GET_NAME:
+ +                      get_task_comm(comm, me);
+ +                      if (copy_to_user((char __user *)arg2, comm,
+ +                                       sizeof(comm)))
                                 return -EFAULT;
                         return 0;
- -              }
                 case PR_GET_ENDIAN:
- -                      error = GET_ENDIAN(current, arg2);
+ +                      error = GET_ENDIAN(me, arg2);
                         break;
                 case PR_SET_ENDIAN:
- -                      error = SET_ENDIAN(current, arg2);
+ +                      error = SET_ENDIAN(me, arg2);
                         break;
   
                 case PR_GET_SECCOMP:
@@@ -1802,7 -1725,6 +1802,7 @@@
                                         current->default_timer_slack_ns;
                         else
                                 current->timer_slack_ns = arg2;
+ +                      error = 0;
                         break;
                 default:
                         error = -EINVAL;
diff --combined mm/memory.c

index f01b7eed6e16c4e3f32e039b7f2223ca598f40aa,fc031d68327e5fad33130b15d50a020b7b9b31a8..0a2010a9518c499efe694c9ed0d42f5bd18e459b
--- 1/mm/memory.c
--- 2/mm/memory.c
+++ b/mm/memory.c
@@@ -669,16 -669,6 +669,16 @@@ int copy_page_range(struct mm_struct *d
         if (is_vm_hugetlb_page(vma))
                 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
   
+ +      if (unlikely(is_pfn_mapping(vma))) {
+ +              /*
+ +               * We do not free on error cases below as remove_vma
+ +               * gets called on error from higher level routine
+ +               */
+ +              ret = track_pfn_vma_copy(vma);
+ +              if (ret)
+ +                      return ret;
+ +      }
+ +
         /*
          * We need to invalidate the secondary MMU mappings only when
          * there could be a permission downgrade on the ptes of the
@@@ -925,9 -915,6 +925,9 @@@ unsigned long unmap_vmas(struct mmu_gat
                 if (vma->vm_flags & VM_ACCOUNT)
                         *nr_accounted += (end - start) >> PAGE_SHIFT;
   
+ +              if (unlikely(is_pfn_mapping(vma)))
+ +                      untrack_pfn_vma(vma, 0, 0);
+ +
                 while (start != end) {
                         if (!tlb_start_valid) {
                                 tlb_start = start;
@@@ -1443,7 -1430,6 +1443,7 @@@ out
   int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                         unsigned long pfn)
   {
+ +      int ret;
         /*
          * Technically, architectures with pte_special can avoid all these
          * restrictions (same for remap_pfn_range).  However we would like
@@@ -1458,15 -1444,7 +1458,15 @@@
   
         if (addr < vma->vm_start || addr >= vma->vm_end)
                 return -EFAULT;
- -      return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+ +      if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
+ +              return -EINVAL;
+ +
+ +      ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+ +
+ +      if (ret)
+ +              untrack_pfn_vma(vma, pfn, PAGE_SIZE);
+ +
+ +      return ret;
   }
   EXPORT_SYMBOL(vm_insert_pfn);
   
@@@ -1597,17 -1575,14 +1597,17 @@@ int remap_pfn_range(struct vm_area_stru
          * behaviour that some programs depend on. We mark the "original"
          * un-COW'ed pages by matching them up with "vma->vm_pgoff".
          */
- -      if (is_cow_mapping(vma->vm_flags)) {
- -              if (addr != vma->vm_start || end != vma->vm_end)
- -                      return -EINVAL;
+ +      if (addr == vma->vm_start && end == vma->vm_end)
                 vma->vm_pgoff = pfn;
- -      }
+ +      else if (is_cow_mapping(vma->vm_flags))
+ +              return -EINVAL;
   
         vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
   
+ +      err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
+ +      if (err)
+ +              return -EINVAL;
+ +
         BUG_ON(addr >= end);
         pfn -= addr >> PAGE_SHIFT;
         pgd = pgd_offset(mm, addr);
@@@ -1619,10 -1594,6 +1619,10 @@@
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
+ +
+ +      if (err)
+ +              untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
+ +
         return err;
   }
   EXPORT_SYMBOL(remap_pfn_range);
@@@ -2894,9 -2865,9 +2894,9 @@@ int in_gate_area_no_task(unsigned long 
   #endif        /* __HAVE_ARCH_GATE_AREA */
   
   #ifdef CONFIG_HAVE_IOREMAP_PROT
- -static resource_size_t follow_phys(struct vm_area_struct *vma,
- -                      unsigned long address, unsigned int flags,
- -                      unsigned long *prot)
+ +int follow_phys(struct vm_area_struct *vma,
+ +              unsigned long address, unsigned int flags,
+ +              unsigned long *prot, resource_size_t *phys)
   {
         pgd_t *pgd;
         pud_t *pud;
@@@ -2905,26 -2876,24 +2905,26 @@@
         spinlock_t *ptl;
         resource_size_t phys_addr = 0;
         struct mm_struct *mm = vma->vm_mm;
+ +      int ret = -EINVAL;
   
- -      VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+ +      if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ +              goto out;
   
         pgd = pgd_offset(mm, address);
         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- -              goto no_page_table;
+ +              goto out;
   
         pud = pud_offset(pgd, address);
         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- -              goto no_page_table;
+ +              goto out;
   
         pmd = pmd_offset(pud, address);
         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- -              goto no_page_table;
+ +              goto out;
   
         /* We cannot handle huge page PFN maps. Luckily they don't exist. */
         if (pmd_huge(*pmd))
- -              goto no_page_table;
+ +              goto out;
   
         ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
         if (!ptep)
@@@ -2939,13 -2908,13 +2939,13 @@@
         phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
   
         *prot = pgprot_val(pte_pgprot(pte));
+ +      *phys = phys_addr;
+ +      ret = 0;
   
   unlock:
         pte_unmap_unlock(ptep, ptl);
   out:
- -      return phys_addr;
- -no_page_table:
- -      return 0;
+ +      return ret;
   }
   
   int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
@@@ -2956,7 -2925,12 +2956,7 @@@
         void *maddr;
         int offset = addr & (PAGE_SIZE-1);
   
- -      if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- -              return -EINVAL;
- -
- -      phys_addr = follow_phys(vma, addr, write, &prot);
- -
- -      if (!phys_addr)
+ +      if (follow_phys(vma, addr, write, &prot, &phys_addr))
                 return -EINVAL;
   
         maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
@@@ -3075,3 -3049,18 +3075,18 @@@ void print_vma_addr(char *prefix, unsig
         }
         up_read(&current->mm->mmap_sem);
   }
+ 
+ #ifdef CONFIG_PROVE_LOCKING
+ void might_fault(void)
+ {
+       might_sleep();
+       /*
+        * it would be nicer only to annotate paths which are not under
+        * pagefault_disable, however that requires a larger audit and
+        * providing helpers like get_user_atomic.
+        */
+       if (!in_atomic() && current->mm)
+               might_lock_read(&current->mm->mmap_sem);
+ }
+ EXPORT_SYMBOL(might_fault);
+ #endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 31 Dec 2008 00:10:19 +0000 (16:10 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 31 Dec 2008 00:10:19 +0000 (16:10 -0800)
		1	2
arch/x86/include/asm/dma-mapping.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/iommu.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pci.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/uaccess.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/pci-dma.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/init_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-generic/bug.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/hardirq.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kernel.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/rcupdate.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/extable.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/futex.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/lockdep.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sys.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory.c	patch \|	diff1 \|	diff2 \|	blob \| history