]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Jan 2016 23:21:48 +0000 (15:21 -0800)
Pull x86 fixes from Ingo Molnar:
 "A handful of x86 fixes:

   - a syscall ABI fix, fixing an Android breakage
   - a Xen PV guest fix relating to the RTC device, causing a
     non-working console
   - a Xen guest syscall stack frame fix
   - an MCE hotplug CPU crash fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/numachip: Fix NumaConnect2 MMCFG PCI access
  x86/entry: Restore traditional SYSENTER calling convention
  x86/entry: Fix some comments
  x86/paravirt: Prevent rtc_cmos platform device init on PV guests
  x86/xen: Avoid fast syscall path for Xen PV guests
  x86/mce: Ensure offline CPUs don't participate in rendezvous process

13 files changed:
arch/x86/entry/common.c
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vdso/vdso32/system_call.S
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/processor.h
arch/x86/kernel/apic/apic_numachip.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/rtc.c
arch/x86/lguest/boot.c
arch/x86/xen/enlighten.c

index a89fdbc1f0beb7e7198c7a625767a2cfe32ca9e3..03663740c86655cabf21504578e97d73d98595be 100644 (file)
@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
        regs->ip = landing_pad;
 
        /*
-        * Fetch ECX from where the vDSO stashed it.
+        * Fetch EBP from where the vDSO stashed it.
         *
         * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
         */
@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
                 * Micro-optimization: the pointer we're following is explicitly
                 * 32 bits, so it can't be out of range.
                 */
-               __get_user(*(u32 *)&regs->cx,
+               __get_user(*(u32 *)&regs->bp,
                            (u32 __user __force *)(unsigned long)(u32)regs->sp)
 #else
-               get_user(*(u32 *)&regs->cx,
+               get_user(*(u32 *)&regs->bp,
                         (u32 __user __force *)(unsigned long)(u32)regs->sp)
 #endif
                ) {
index 3eb572ed3d7ad438d8dfd1627b5b4121314c9f67..f3b6d54e0042b7f08c25a82283f88e8193c70c4a 100644 (file)
@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
        movl    TSS_sysenter_sp0(%esp), %esp
 sysenter_past_esp:
        pushl   $__USER_DS              /* pt_regs->ss */
-       pushl   %ecx                    /* pt_regs->cx */
+       pushl   %ebp                    /* pt_regs->sp (stashed in bp) */
        pushfl                          /* pt_regs->flags (except IF = 0) */
        orl     $X86_EFLAGS_IF, (%esp)  /* Fix IF */
        pushl   $__USER_CS              /* pt_regs->cs */
@@ -308,8 +308,9 @@ sysenter_past_esp:
 
        movl    %esp, %eax
        call    do_fast_syscall_32
-       testl   %eax, %eax
-       jz      .Lsyscall_32_done
+       /* XEN PV guests always use IRET path */
+       ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+                   "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
 /* Opportunistic SYSEXIT */
        TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
index c3201830a85ee8dcddabb0cab864545d99d47fd3..6a1ae3751e824d9917e65c136e9f7de3cc5f4f47 100644 (file)
@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
 
        /* Construct struct pt_regs on stack */
        pushq   $__USER32_DS            /* pt_regs->ss */
-       pushq   %rcx                    /* pt_regs->sp */
+       pushq   %rbp                    /* pt_regs->sp (stashed in bp) */
 
        /*
         * Push flags.  This is nasty.  First, interrupts are currently
@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
-       pushq   %rcx                    /* pt_regs->cx (will be overwritten) */
+       pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   %r8                     /* pt_regs->r8  = 0 */
        pushq   %r8                     /* pt_regs->r9  = 0 */
        pushq   %r8                     /* pt_regs->r10 = 0 */
        pushq   %r8                     /* pt_regs->r11 = 0 */
        pushq   %rbx                    /* pt_regs->rbx */
-       pushq   %rbp                    /* pt_regs->rbp */
+       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
        pushq   %r8                     /* pt_regs->r12 = 0 */
        pushq   %r8                     /* pt_regs->r13 = 0 */
        pushq   %r8                     /* pt_regs->r14 = 0 */
@@ -121,8 +121,9 @@ sysenter_flags_fixed:
 
        movq    %rsp, %rdi
        call    do_fast_syscall_32
-       testl   %eax, %eax
-       jz      .Lsyscall_32_done
+       /* XEN PV guests always use IRET path */
+       ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+                   "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
        jmp     sysret32_from_system_call
 
 sysenter_fix_flags:
@@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
-       pushq   %rcx                    /* pt_regs->cx (will be overwritten) */
+       pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
        xorq    %r8,%r8
        pushq   %r8                     /* pt_regs->r8  = 0 */
@@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
        pushq   %r8                     /* pt_regs->r10 = 0 */
        pushq   %r8                     /* pt_regs->r11 = 0 */
        pushq   %rbx                    /* pt_regs->rbx */
-       pushq   %rbp                    /* pt_regs->rbp */
+       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
        pushq   %r8                     /* pt_regs->r12 = 0 */
        pushq   %r8                     /* pt_regs->r13 = 0 */
        pushq   %r8                     /* pt_regs->r14 = 0 */
@@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat)
 
        movq    %rsp, %rdi
        call    do_fast_syscall_32
-       testl   %eax, %eax
-       jz      .Lsyscall_32_done
+       /* XEN PV guests always use IRET path */
+       ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+                   "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
        /* Opportunistic SYSRET */
 sysret32_from_system_call:
index 93bd8452383f8e355fcc5743a54947f46ef1a32e..3a1d9297074bc5e1d2e559735bb5247acffa6164 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Code for the vDSO.  This version uses the old int $0x80 method.
+ * AT_SYSINFO entry point
 */
 
 #include <asm/dwarf2.h>
@@ -21,35 +21,67 @@ __kernel_vsyscall:
        /*
         * Reshuffle regs so that all of any of the entry instructions
         * will preserve enough state.
+        *
+        * A really nice entry sequence would be:
+        *  pushl %edx
+        *  pushl %ecx
+        *  movl  %esp, %ecx
+        *
+        * Unfortunately, naughty Android versions between July and December
+        * 2015 actually hardcode the traditional Linux SYSENTER entry
+        * sequence.  That is severely broken for a number of reasons (ask
+        * anyone with an AMD CPU, for example).  Nonetheless, we try to keep
+        * it working approximately as well as it ever worked.
+        *
+        * This link may eludicate some of the history:
+        *   https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
+        * personally, I find it hard to understand what's going on there.
+        *
+        * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
+        * Execute an indirect call to the address in the AT_SYSINFO auxv
+        * entry.  That is the ONLY correct way to make a fast 32-bit system
+        * call on Linux.  (Open-coding int $0x80 is also fine, but it's
+        * slow.)
         */
+       pushl   %ecx
+       CFI_ADJUST_CFA_OFFSET   4
+       CFI_REL_OFFSET          ecx, 0
        pushl   %edx
        CFI_ADJUST_CFA_OFFSET   4
        CFI_REL_OFFSET          edx, 0
-       pushl   %ecx
+       pushl   %ebp
        CFI_ADJUST_CFA_OFFSET   4
-       CFI_REL_OFFSET          ecx, 0
-       movl    %esp, %ecx
+       CFI_REL_OFFSET          ebp, 0
+
+       #define SYSENTER_SEQUENCE       "movl %esp, %ebp; sysenter"
+       #define SYSCALL_SEQUENCE        "movl %ecx, %ebp; syscall"
 
 #ifdef CONFIG_X86_64
        /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
-       ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
-                         "syscall",  X86_FEATURE_SYSCALL32
+       ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
+                         SYSCALL_SEQUENCE,  X86_FEATURE_SYSCALL32
 #else
-       ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
+       ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
 #endif
 
        /* Enter using int $0x80 */
-       movl    (%esp), %ecx
        int     $0x80
 GLOBAL(int80_landing_pad)
 
-       /* Restore ECX and EDX in case they were clobbered. */
-       popl    %ecx
-       CFI_RESTORE             ecx
+       /*
+        * Restore EDX and ECX in case they were clobbered.  EBP is not
+        * clobbered (the kernel restores it), but it's cleaner and
+        * probably faster to pop it than to adjust ESP using addl.
+        */
+       popl    %ebp
+       CFI_RESTORE             ebp
        CFI_ADJUST_CFA_OFFSET   -4
        popl    %edx
        CFI_RESTORE             edx
        CFI_ADJUST_CFA_OFFSET   -4
+       popl    %ecx
+       CFI_RESTORE             ecx
+       CFI_ADJUST_CFA_OFFSET   -4
        ret
        CFI_ENDPROC
 
index e4f8010f22e04d2f261bb73bc7745f68f36a392f..f7ba9fbf12eeb8770280823b6030d09d044deb66 100644 (file)
 #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
 #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
 #define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
 
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
index 10d0596433f89b849f91c164d7252a29bcac921b..c759b3cca66343bda8096f41a462e0b13fc45cd8 100644 (file)
@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
        return pv_info.paravirt_enabled;
 }
 
+static inline int paravirt_has_feature(unsigned int feature)
+{
+       WARN_ON_ONCE(!pv_info.paravirt_enabled);
+       return (pv_info.features & feature);
+}
+
 static inline void load_sp0(struct tss_struct *tss,
                             struct thread_struct *thread)
 {
index 31247b5bff7c8ff86d893851dc9073b72a647cc2..3d44191185f8ca345d4ad3e928101172621d573d 100644 (file)
@@ -70,9 +70,14 @@ struct pv_info {
 #endif
 
        int paravirt_enabled;
+       unsigned int features;    /* valid only if paravirt_enabled is set */
        const char *name;
 };
 
+#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
+/* Supported features */
+#define PV_SUPPORTED_RTC        (1<<0)
+
 struct pv_init_ops {
        /*
         * Patch may replace one of the defined code sequences with
index 67522256c7ffaf610aa70ef885bd8df584d1bbbd..2d5a50cb61a2d6ad5c68d5563636edcc112ff4f9 100644 (file)
@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
 #else
 #define __cpuid                        native_cpuid
 #define paravirt_enabled()     0
+#define paravirt_has(x)        0
 
 static inline void load_sp0(struct tss_struct *tss,
                            struct thread_struct *thread)
index 38dd5efdd04c33aa58b2b20b19596bcd12e3e4af..2bd2292a316d474ea917b6a8b247b7cd5726a16a 100644 (file)
@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
        case 1:
                init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
                numachip_apic_icr_write = numachip1_apic_icr_write;
-               x86_init.pci.arch_init = pci_numachip_init;
                break;
        case 2:
                init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
                numachip_apic_icr_write = numachip2_apic_icr_write;
-
-               /* Use MCFG config cycles rather than locked CF8 cycles */
-               raw_pci_ops = &pci_mmcfg;
                break;
        default:
                return 0;
        }
 
        x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+       x86_init.pci.arch_init = pci_numachip_init;
 
        return 0;
 }
index c5b0d562dbf55064685c78b5d0fa6280748086d1..7e8a736d09db1df950e37a1746a270299f83685f 100644 (file)
@@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
        int flags = MF_ACTION_REQUIRED;
        int lmce = 0;
 
+       /* If this CPU is offline, just bail out. */
+       if (cpu_is_offline(smp_processor_id())) {
+               u64 mcgstatus;
+
+               mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+               if (mcgstatus & MCG_STATUS_RIPV) {
+                       mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+                       return;
+               }
+       }
+
        ist_enter(regs);
 
        this_cpu_inc(mce_exception_count);
index cd9685235df91b69f5e39885e55850964c67e8fc..4af8d063fb362cd2bf92b97a48fa8c1d5d95fd3b 100644 (file)
@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
        }
 #endif
 
+       if (paravirt_enabled() && !paravirt_has(RTC))
+               return -ENODEV;
+
        platform_device_register(&rtc_device);
        dev_info(&rtc_device.dev,
                 "registered platform RTC device (no PNP device found)\n");
index a0d09f6c65337fc381353783639f45251bf28d78..a43b2eafc466f5c552d6f1b805d6b727ad8371f1 100644 (file)
@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
        pv_info.kernel_rpl = 1;
        /* Everyone except Xen runs with this set. */
        pv_info.shared_kernel_pmd = 1;
+       pv_info.features = 0;
 
        /*
         * We set up all the lguest overrides for sensitive operations.  These
index 5774800ff583ca33916e365e408b057d982a8384..b7de78bdc09c12b3e6cea070e4d35b346d24c8be 100644 (file)
@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
 #ifdef CONFIG_X86_64
        .extra_user_64bit_cs = FLAT_USER_CS64,
 #endif
-
+       .features = 0,
        .name = "Xen",
 };
 
@@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
        /* Install Xen paravirt ops */
        pv_info = xen_info;
+       if (xen_initial_domain())
+               pv_info.features |= PV_SUPPORTED_RTC;
        pv_init_ops = xen_init_ops;
        pv_apic_ops = xen_apic_ops;
        if (!xen_pvh_domain()) {
@@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
 static void xen_set_cpu_features(struct cpuinfo_x86 *c)
 {
-       if (xen_pv_domain())
+       if (xen_pv_domain()) {
                clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+               set_cpu_cap(c, X86_FEATURE_XENPV);
+       }
 }
 
 const struct hypervisor_x86 x86_hyper_xen = {