Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c

index a89fdbc1f0beb7e7198c7a625767a2cfe32ca9e3..03663740c86655cabf21504578e97d73d98595be 100644 (file)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
         regs->ip = landing_pad;
  
         /*
-        * Fetch ECX from where the vDSO stashed it.
+        * Fetch EBP from where the vDSO stashed it.
          *
          * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
          */
@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
                  * Micro-optimization: the pointer we're following is explicitly
                  * 32 bits, so it can't be out of range.
                  */
-               __get_user(*(u32 *)&regs->cx,
+               __get_user(*(u32 *)&regs->bp,
                             (u32 __user __force *)(unsigned long)(u32)regs->sp)
  #else
-               get_user(*(u32 *)&regs->cx,
+               get_user(*(u32 *)&regs->bp,
                          (u32 __user __force *)(unsigned long)(u32)regs->sp)
  #endif
                 ) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S

index 3eb572ed3d7ad438d8dfd1627b5b4121314c9f67..f3b6d54e0042b7f08c25a82283f88e8193c70c4a 100644 (file)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
         movl    TSS_sysenter_sp0(%esp), %esp
  sysenter_past_esp:
         pushl   $__USER_DS              /* pt_regs->ss */
-       pushl   %ecx                    /* pt_regs->cx */
+       pushl   %ebp                    /* pt_regs->sp (stashed in bp) */
         pushfl                          /* pt_regs->flags (except IF = 0) */
         orl     $X86_EFLAGS_IF, (%esp)  /* Fix IF */
         pushl   $__USER_CS              /* pt_regs->cs */
@@ -308,8 +308,9 @@ sysenter_past_esp:
  
         movl    %esp, %eax
         call    do_fast_syscall_32
-       testl   %eax, %eax
-       jz      .Lsyscall_32_done
+       /* XEN PV guests always use IRET path */
+       ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+                   "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
  
  /* Opportunistic SYSEXIT */
         TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S

index c3201830a85ee8dcddabb0cab864545d99d47fd3..6a1ae3751e824d9917e65c136e9f7de3cc5f4f47 100644 (file)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
  
         /* Construct struct pt_regs on stack */
         pushq   $__USER32_DS            /* pt_regs->ss */
-       pushq   %rcx                    /* pt_regs->sp */
+       pushq   %rbp                    /* pt_regs->sp (stashed in bp) */
  
         /*
          * Push flags.  This is nasty.  First, interrupts are currently
@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
         pushq   %rdi                    /* pt_regs->di */
         pushq   %rsi                    /* pt_regs->si */
         pushq   %rdx                    /* pt_regs->dx */
-       pushq   %rcx                    /* pt_regs->cx (will be overwritten) */
+       pushq   %rcx                    /* pt_regs->cx */
         pushq   $-ENOSYS                /* pt_regs->ax */
         pushq   %r8                     /* pt_regs->r8  = 0 */
         pushq   %r8                     /* pt_regs->r9  = 0 */
         pushq   %r8                     /* pt_regs->r10 = 0 */
         pushq   %r8                     /* pt_regs->r11 = 0 */
         pushq   %rbx                    /* pt_regs->rbx */
-       pushq   %rbp                    /* pt_regs->rbp */
+       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
         pushq   %r8                     /* pt_regs->r12 = 0 */
         pushq   %r8                     /* pt_regs->r13 = 0 */
         pushq   %r8                     /* pt_regs->r14 = 0 */
@@ -121,8 +121,9 @@ sysenter_flags_fixed:
  
         movq    %rsp, %rdi
         call    do_fast_syscall_32
-       testl   %eax, %eax
-       jz      .Lsyscall_32_done
+       /* XEN PV guests always use IRET path */
+       ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+                   "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
         jmp     sysret32_from_system_call
  
  sysenter_fix_flags:
@@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
         pushq   %rdi                    /* pt_regs->di */
         pushq   %rsi                    /* pt_regs->si */
         pushq   %rdx                    /* pt_regs->dx */
-       pushq   %rcx                    /* pt_regs->cx (will be overwritten) */
+       pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
         pushq   $-ENOSYS                /* pt_regs->ax */
         xorq    %r8,%r8
         pushq   %r8                     /* pt_regs->r8  = 0 */
@@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
         pushq   %r8                     /* pt_regs->r10 = 0 */
         pushq   %r8                     /* pt_regs->r11 = 0 */
         pushq   %rbx                    /* pt_regs->rbx */
-       pushq   %rbp                    /* pt_regs->rbp */
+       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
         pushq   %r8                     /* pt_regs->r12 = 0 */
         pushq   %r8                     /* pt_regs->r13 = 0 */
         pushq   %r8                     /* pt_regs->r14 = 0 */
@@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat)
  
         movq    %rsp, %rdi
         call    do_fast_syscall_32
-       testl   %eax, %eax
-       jz      .Lsyscall_32_done
+       /* XEN PV guests always use IRET path */
+       ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+                   "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
  
         /* Opportunistic SYSRET */
  sysret32_from_system_call:
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S

index 93bd8452383f8e355fcc5743a54947f46ef1a32e..3a1d9297074bc5e1d2e559735bb5247acffa6164 100644 (file)
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -1,5 +1,5 @@
  /*
- * Code for the vDSO.  This version uses the old int $0x80 method.
+ * AT_SYSINFO entry point
  */
  
  #include <asm/dwarf2.h>
@@ -21,35 +21,67 @@ __kernel_vsyscall:
         /*
          * Reshuffle regs so that all of any of the entry instructions
          * will preserve enough state.
+        *
+        * A really nice entry sequence would be:
+        *  pushl %edx
+        *  pushl %ecx
+        *  movl  %esp, %ecx
+        *
+        * Unfortunately, naughty Android versions between July and December
+        * 2015 actually hardcode the traditional Linux SYSENTER entry
+        * sequence.  That is severely broken for a number of reasons (ask
+        * anyone with an AMD CPU, for example).  Nonetheless, we try to keep
+        * it working approximately as well as it ever worked.
+        *
+        * This link may eludicate some of the history:
+        *   https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
+        * personally, I find it hard to understand what's going on there.
+        *
+        * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
+        * Execute an indirect call to the address in the AT_SYSINFO auxv
+        * entry.  That is the ONLY correct way to make a fast 32-bit system
+        * call on Linux.  (Open-coding int $0x80 is also fine, but it's
+        * slow.)
          */
+       pushl   %ecx
+       CFI_ADJUST_CFA_OFFSET   4
+       CFI_REL_OFFSET          ecx, 0
         pushl   %edx
         CFI_ADJUST_CFA_OFFSET   4
         CFI_REL_OFFSET          edx, 0
-       pushl   %ecx
+       pushl   %ebp
         CFI_ADJUST_CFA_OFFSET   4
-       CFI_REL_OFFSET          ecx, 0
-       movl    %esp, %ecx
+       CFI_REL_OFFSET          ebp, 0
+
+       #define SYSENTER_SEQUENCE       "movl %esp, %ebp; sysenter"
+       #define SYSCALL_SEQUENCE        "movl %ecx, %ebp; syscall"
  
  #ifdef CONFIG_X86_64
         /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
-       ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
-                         "syscall",  X86_FEATURE_SYSCALL32
+       ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
+                         SYSCALL_SEQUENCE,  X86_FEATURE_SYSCALL32
  #else
-       ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
+       ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
  #endif
  
         /* Enter using int $0x80 */
-       movl    (%esp), %ecx
         int     $0x80
  GLOBAL(int80_landing_pad)
  
-       /* Restore ECX and EDX in case they were clobbered. */
-       popl    %ecx
-       CFI_RESTORE             ecx
+       /*
+        * Restore EDX and ECX in case they were clobbered.  EBP is not
+        * clobbered (the kernel restores it), but it's cleaner and
+        * probably faster to pop it than to adjust ESP using addl.
+        */
+       popl    %ebp
+       CFI_RESTORE             ebp
         CFI_ADJUST_CFA_OFFSET   -4
         popl    %edx
         CFI_RESTORE             edx
         CFI_ADJUST_CFA_OFFSET   -4
+       popl    %ecx
+       CFI_RESTORE             ecx
+       CFI_ADJUST_CFA_OFFSET   -4
         ret
         CFI_ENDPROC
  
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h

index e4f8010f22e04d2f261bb73bc7745f68f36a392f..f7ba9fbf12eeb8770280823b6030d09d044deb66 100644 (file)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -216,6 +216,7 @@
  #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
  #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
  #define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
  
  
  /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h

index 10d0596433f89b849f91c164d7252a29bcac921b..c759b3cca66343bda8096f41a462e0b13fc45cd8 100644 (file)
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
         return pv_info.paravirt_enabled;
  }
  
+static inline int paravirt_has_feature(unsigned int feature)
+{
+       WARN_ON_ONCE(!pv_info.paravirt_enabled);
+       return (pv_info.features & feature);
+}
+
  static inline void load_sp0(struct tss_struct *tss,
                              struct thread_struct *thread)
  {
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h

index 31247b5bff7c8ff86d893851dc9073b72a647cc2..3d44191185f8ca345d4ad3e928101172621d573d 100644 (file)
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -70,9 +70,14 @@ struct pv_info {
  #endif
  
         int paravirt_enabled;
+       unsigned int features;    /* valid only if paravirt_enabled is set */
         const char *name;
  };
  
+#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
+/* Supported features */
+#define PV_SUPPORTED_RTC        (1<<0)
+
  struct pv_init_ops {
         /*
          * Patch may replace one of the defined code sequences with
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index 67522256c7ffaf610aa70ef885bd8df584d1bbbd..2d5a50cb61a2d6ad5c68d5563636edcc112ff4f9 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
  #else
  #define __cpuid                        native_cpuid
  #define paravirt_enabled()     0
+#define paravirt_has(x)        0
  
  static inline void load_sp0(struct tss_struct *tss,
                             struct thread_struct *thread)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c

index 38dd5efdd04c33aa58b2b20b19596bcd12e3e4af..2bd2292a316d474ea917b6a8b247b7cd5726a16a 100644 (file)
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
         case 1:
                 init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
                 numachip_apic_icr_write = numachip1_apic_icr_write;
-               x86_init.pci.arch_init = pci_numachip_init;
                 break;
         case 2:
                 init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
                 numachip_apic_icr_write = numachip2_apic_icr_write;
-
-               /* Use MCFG config cycles rather than locked CF8 cycles */
-               raw_pci_ops = &pci_mmcfg;
                 break;
         default:
                 return 0;
         }
  
         x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+       x86_init.pci.arch_init = pci_numachip_init;
  
         return 0;
  }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c

index c5b0d562dbf55064685c78b5d0fa6280748086d1..7e8a736d09db1df950e37a1746a270299f83685f 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
         int flags = MF_ACTION_REQUIRED;
         int lmce = 0;
  
+       /* If this CPU is offline, just bail out. */
+       if (cpu_is_offline(smp_processor_id())) {
+               u64 mcgstatus;
+
+               mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+               if (mcgstatus & MCG_STATUS_RIPV) {
+                       mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+                       return;
+               }
+       }
+
         ist_enter(regs);
  
         this_cpu_inc(mce_exception_count);
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c

index cd9685235df91b69f5e39885e55850964c67e8fc..4af8d063fb362cd2bf92b97a48fa8c1d5d95fd3b 100644 (file)
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
         }
  #endif
  
+       if (paravirt_enabled() && !paravirt_has(RTC))
+               return -ENODEV;
+
         platform_device_register(&rtc_device);
         dev_info(&rtc_device.dev,
                  "registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c

index a0d09f6c65337fc381353783639f45251bf28d78..a43b2eafc466f5c552d6f1b805d6b727ad8371f1 100644 (file)
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
         pv_info.kernel_rpl = 1;
         /* Everyone except Xen runs with this set. */
         pv_info.shared_kernel_pmd = 1;
+       pv_info.features = 0;
  
         /*
          * We set up all the lguest overrides for sensitive operations.  These
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c

index 5774800ff583ca33916e365e408b057d982a8384..b7de78bdc09c12b3e6cea070e4d35b346d24c8be 100644 (file)
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
  #ifdef CONFIG_X86_64
         .extra_user_64bit_cs = FLAT_USER_CS64,
  #endif
-
+       .features = 0,
         .name = "Xen",
  };
  
@@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
  
         /* Install Xen paravirt ops */
         pv_info = xen_info;
+       if (xen_initial_domain())
+               pv_info.features |= PV_SUPPORTED_RTC;
         pv_init_ops = xen_init_ops;
         pv_apic_ops = xen_apic_ops;
         if (!xen_pvh_domain()) {
@@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
  
  static void xen_set_cpu_features(struct cpuinfo_x86 *c)
  {
-       if (xen_pv_domain())
+       if (xen_pv_domain()) {
                 clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+               set_cpu_cap(c, X86_FEATURE_XENPV);
+       }
  }
  
  const struct hypervisor_x86 x86_hyper_xen = {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
arch/x86/entry/common.c		patch \| blob \| history
arch/x86/entry/entry_32.S		patch \| blob \| history
arch/x86/entry/entry_64_compat.S		patch \| blob \| history
arch/x86/entry/vdso/vdso32/system_call.S		patch \| blob \| history
arch/x86/include/asm/cpufeature.h		patch \| blob \| history
arch/x86/include/asm/paravirt.h		patch \| blob \| history
arch/x86/include/asm/paravirt_types.h		patch \| blob \| history
arch/x86/include/asm/processor.h		patch \| blob \| history
arch/x86/kernel/apic/apic_numachip.c		patch \| blob \| history
arch/x86/kernel/cpu/mcheck/mce.c		patch \| blob \| history
arch/x86/kernel/rtc.c		patch \| blob \| history
arch/x86/lguest/boot.c		patch \| blob \| history
arch/x86/xen/enlighten.c		patch \| blob \| history