]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 12 Dec 2016 21:49:57 +0000 (13:49 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 12 Dec 2016 21:49:57 +0000 (13:49 -0800)
Pull x86 asm updates from Ingo Molnar:
 "The main changes in this development cycle were:

   - a large number of call stack dumping/printing improvements: higher
     robustness, better cross-context dumping, improved output, etc.
     (Josh Poimboeuf)

   - vDSO getcpu() performance improvement for future Intel CPUs with
     the RDPID instruction (Andy Lutomirski)

   - add two new Intel AVX512 features and the CPUID support
     infrastructure for it: AVX512IFMA and AVX512VBMI. (Gayatri Kammela,
     He Chen)

   - more copy-user unification (Borislav Petkov)

   - entry code assembly macro simplifications (Alexander Kuleshov)

   - vDSO C/R support improvements (Dmitry Safonov)

   - misc fixes and cleanups (Borislav Petkov, Paul Bolle)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits)
  scripts/decode_stacktrace.sh: Fix address line detection on x86
  x86/boot/64: Use defines for page size
  x86/dumpstack: Make stack name tags more comprehensible
  selftests/x86: Add test_vdso to test getcpu()
  x86/vdso: Use RDPID in preference to LSL when available
  x86/dumpstack: Handle NULL stack pointer in show_trace_log_lvl()
  x86/cpufeatures: Enable new AVX512 cpu features
  x86/cpuid: Provide get_scattered_cpuid_leaf()
  x86/cpuid: Cleanup cpuid_regs definitions
  x86/copy_user: Unify the code by removing the 64-bit asm _copy_*_user() variants
  x86/unwind: Ensure stack grows down
  x86/vdso: Set vDSO pointer only after success
  x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE
  x86/unwind: Detect bad stack return address
  x86/dumpstack: Warn on stack recursion
  x86/unwind: Warn on bad frame pointer
  x86/decoder: Use stderr if insn sanity test fails
  x86/decoder: Use stdout if insn decoder test is successful
  mm/page_alloc: Remove kernel address exposure in free_reserved_area()
  x86/dumpstack: Remove raw stack dump
  ...

41 files changed:
Documentation/kernel-parameters.txt
Documentation/sysctl/kernel.txt
Documentation/x86/x86_64/boot-options.txt
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/vdso/vma.c
arch/x86/events/intel/pt.c
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/unwind.h
arch/x86/include/asm/vgtod.h
arch/x86/include/uapi/asm/prctl.h
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpuid.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/unwind_frame.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/lib/copy_user_64.S
arch/x86/lib/usercopy.c
arch/x86/lib/usercopy_32.c
arch/x86/mm/fault.c
arch/x86/platform/uv/uv_nmi.c
arch/x86/tools/insn_sanity.c
arch/x86/tools/test_get_len.c
kernel/sysctl.c
mm/page_alloc.c
scripts/decode_stacktrace.sh
scripts/faddr2line
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/test_vdso.c [new file with mode: 0644]

index 86a31dfc036e5c6d135828068d2e03ddda3f6833..7c817dc45d45b4fc1365aff26681b9cfbe8c4eee 100644 (file)
@@ -1963,9 +1963,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        kmemcheck=2 (one-shot mode)
                        Default: 2 (one-shot mode)
 
-       kstack=N        [X86] Print N words from the kernel stack
-                       in oops dumps.
-
        kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
                        Default is 0 (don't ignore, but inject #GP)
 
index ffab8b5caa603ddbf82c5cc694472bd373991b75..065f18478c1c49f8e17ff8dc8eed6ce6ffa707da 100644 (file)
@@ -40,7 +40,6 @@ show up in /proc/sys/kernel:
 - hung_task_warnings
 - kexec_load_disabled
 - kptr_restrict
-- kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modules_disabled
@@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed using
 
 ==============================================================
 
-kstack_depth_to_print: (X86 only)
-
-Controls the number of words to print when dumping the raw
-kernel stack.
-
-==============================================================
-
 l2cr: (PPC only)
 
 This flag controls the L2 cache of G3 processor boards. If
index 0965a71f994243e70b3ae04fe78f73263666c1af..61b611e9eeafe773cd9b2eaa631f37287ebf112e 100644 (file)
@@ -277,10 +277,6 @@ IOMMU (input/output memory management unit)
     space might stop working. Use this option if you have devices that
     are accessed from userspace directly on some PCI host bridge.
 
-Debugging
-
-  kstack=N     Print N words from the kernel stack in oops dumps.
-
 Miscellaneous
 
        nogbpages
index 9a9e5884066c6581878b56cde27e75f542b21249..05ed3d393da797e648cf2e7d477d5c7a5ba523fe 100644 (file)
@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS  21*8
 
-       .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
-       addq    $-(15*8+\addskip), %rsp
+       .macro ALLOC_PT_GPREGS_ON_STACK
+       addq    $-(15*8), %rsp
        .endm
 
        .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is built with
        movq 5*8+\offset(%rsp), %rbx
        .endm
 
-       .macro ZERO_EXTRA_REGS
-       xorl    %r15d, %r15d
-       xorl    %r14d, %r14d
-       xorl    %r13d, %r13d
-       xorl    %r12d, %r12d
-       xorl    %ebp, %ebp
-       xorl    %ebx, %ebx
-       .endm
-
        .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
        .if \rstor_r11
        movq 6*8(%rsp), %r11
@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
        .byte 0xf1
        .endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+       .if \ptregs_offset
+               leaq \ptregs_offset(%rsp), %rbp
+       .else
+               mov %rsp, %rbp
+       .endif
+       orq     $0x1, %rbp
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 /*
index 21b352a11b493f4868b1a091dc416c7e2279ad34..acc0c6f36f3f4c3a66f9ea81b92a409f680f1687 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 
        .section .entry.text, "ax"
 
        SET_KERNEL_GS %edx
 .endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+       mov %esp, %ebp
+       orl $0x1, %ebp
+#endif
+.endm
+
 .macro RESTORE_INT_REGS
        popl    %ebx
        popl    %ecx
@@ -237,6 +254,23 @@ ENTRY(__switch_to_asm)
        jmp     __switch_to
 END(__switch_to_asm)
 
+/*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack.  This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+       FRAME_BEGIN
+
+       pushl   %eax
+       call    schedule_tail
+       popl    %eax
+
+       FRAME_END
+       ret
+ENDPROC(schedule_tail_wrapper)
 /*
  * A newly forked process directly context switches into this address.
  *
@@ -245,9 +279,7 @@ END(__switch_to_asm)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-       pushl   %eax
-       call    schedule_tail
-       popl    %eax
+       call    schedule_tail_wrapper
 
        testl   %ebx, %ebx
        jnz     1f              /* kernel threads are uncommon */
@@ -307,13 +339,13 @@ END(ret_from_exception)
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
        DISABLE_INTERRUPTS(CLBR_ANY)
-need_resched:
+.Lneed_resched:
        cmpl    $0, PER_CPU_VAR(__preempt_count)
        jnz     restore_all
        testl   $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
        jz      restore_all
        call    preempt_schedule_irq
-       jmp     need_resched
+       jmp     .Lneed_resched
 END(resume_kernel)
 #endif
 
@@ -334,7 +366,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
  */
 ENTRY(xen_sysenter_target)
        addl    $5*4, %esp                      /* remove xen-provided frame */
-       jmp     sysenter_past_esp
+       jmp     .Lsysenter_past_esp
 #endif
 
 /*
@@ -371,7 +403,7 @@ ENTRY(xen_sysenter_target)
  */
 ENTRY(entry_SYSENTER_32)
        movl    TSS_sysenter_sp0(%esp), %esp
-sysenter_past_esp:
+.Lsysenter_past_esp:
        pushl   $__USER_DS              /* pt_regs->ss */
        pushl   %ebp                    /* pt_regs->sp (stashed in bp) */
        pushfl                          /* pt_regs->flags (except IF = 0) */
@@ -504,9 +536,9 @@ ENTRY(entry_INT80_32)
 
 restore_all:
        TRACE_IRQS_IRET
-restore_all_notrace:
+.Lrestore_all_notrace:
 #ifdef CONFIG_X86_ESPFIX32
-       ALTERNATIVE     "jmp restore_nocheck", "", X86_BUG_ESPFIX
+       ALTERNATIVE     "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
 
        movl    PT_EFLAGS(%esp), %eax           # mix EFLAGS, SS and CS
        /*
@@ -518,22 +550,23 @@ restore_all_notrace:
        movb    PT_CS(%esp), %al
        andl    $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
        cmpl    $((SEGMENT_LDT << 8) | USER_RPL), %eax
-       je ldt_ss                               # returning to user-space with LDT SS
+       je .Lldt_ss                             # returning to user-space with LDT SS
 #endif
-restore_nocheck:
+.Lrestore_nocheck:
        RESTORE_REGS 4                          # skip orig_eax/error_code
-irq_return:
+.Lirq_return:
        INTERRUPT_RETURN
+
 .section .fixup, "ax"
 ENTRY(iret_exc )
        pushl   $0                              # no error code
        pushl   $do_iret_error
-       jmp     error_code
+       jmp     common_exception
 .previous
-       _ASM_EXTABLE(irq_return, iret_exc)
+       _ASM_EXTABLE(.Lirq_return, iret_exc)
 
 #ifdef CONFIG_X86_ESPFIX32
-ldt_ss:
+.Lldt_ss:
 /*
  * Setup and switch to ESPFIX stack
  *
@@ -562,7 +595,7 @@ ldt_ss:
         */
        DISABLE_INTERRUPTS(CLBR_EAX)
        lss     (%esp), %esp                    /* switch to espfix segment */
-       jmp     restore_nocheck
+       jmp     .Lrestore_nocheck
 #endif
 ENDPROC(entry_INT80_32)
 
@@ -624,6 +657,7 @@ common_interrupt:
        ASM_CLAC
        addl    $-0x80, (%esp)                  /* Adjust vector into the [-256, -1] range */
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
        movl    %esp, %eax
        call    do_IRQ
@@ -635,6 +669,7 @@ ENTRY(name)                         \
        ASM_CLAC;                       \
        pushl   $~(nr);                 \
        SAVE_ALL;                       \
+       ENCODE_FRAME_POINTER;           \
        TRACE_IRQS_OFF                  \
        movl    %esp, %eax;             \
        call    fn;                     \
@@ -659,7 +694,7 @@ ENTRY(coprocessor_error)
        ASM_CLAC
        pushl   $0
        pushl   $do_coprocessor_error
-       jmp     error_code
+       jmp     common_exception
 END(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
@@ -673,14 +708,14 @@ ENTRY(simd_coprocessor_error)
 #else
        pushl   $do_simd_coprocessor_error
 #endif
-       jmp     error_code
+       jmp     common_exception
 END(simd_coprocessor_error)
 
 ENTRY(device_not_available)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
        pushl   $do_device_not_available
-       jmp     error_code
+       jmp     common_exception
 END(device_not_available)
 
 #ifdef CONFIG_PARAVIRT
@@ -694,59 +729,59 @@ ENTRY(overflow)
        ASM_CLAC
        pushl   $0
        pushl   $do_overflow
-       jmp     error_code
+       jmp     common_exception
 END(overflow)
 
 ENTRY(bounds)
        ASM_CLAC
        pushl   $0
        pushl   $do_bounds
-       jmp     error_code
+       jmp     common_exception
 END(bounds)
 
 ENTRY(invalid_op)
        ASM_CLAC
        pushl   $0
        pushl   $do_invalid_op
-       jmp     error_code
+       jmp     common_exception
 END(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
        ASM_CLAC
        pushl   $0
        pushl   $do_coprocessor_segment_overrun
-       jmp     error_code
+       jmp     common_exception
 END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
        ASM_CLAC
        pushl   $do_invalid_TSS
-       jmp     error_code
+       jmp     common_exception
 END(invalid_TSS)
 
 ENTRY(segment_not_present)
        ASM_CLAC
        pushl   $do_segment_not_present
-       jmp     error_code
+       jmp     common_exception
 END(segment_not_present)
 
 ENTRY(stack_segment)
        ASM_CLAC
        pushl   $do_stack_segment
-       jmp     error_code
+       jmp     common_exception
 END(stack_segment)
 
 ENTRY(alignment_check)
        ASM_CLAC
        pushl   $do_alignment_check
-       jmp     error_code
+       jmp     common_exception
 END(alignment_check)
 
 ENTRY(divide_error)
        ASM_CLAC
        pushl   $0                              # no error code
        pushl   $do_divide_error
-       jmp     error_code
+       jmp     common_exception
 END(divide_error)
 
 #ifdef CONFIG_X86_MCE
@@ -754,7 +789,7 @@ ENTRY(machine_check)
        ASM_CLAC
        pushl   $0
        pushl   machine_check_vector
-       jmp     error_code
+       jmp     common_exception
 END(machine_check)
 #endif
 
@@ -762,13 +797,14 @@ ENTRY(spurious_interrupt_bug)
        ASM_CLAC
        pushl   $0
        pushl   $do_spurious_interrupt_bug
-       jmp     error_code
+       jmp     common_exception
 END(spurious_interrupt_bug)
 
 #ifdef CONFIG_XEN
 ENTRY(xen_hypervisor_callback)
        pushl   $-1                             /* orig_ax = -1 => not a system call */
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
 
        /*
@@ -823,6 +859,7 @@ ENTRY(xen_failsafe_callback)
        jmp     iret_exc
 5:     pushl   $-1                             /* orig_ax = -1 => not a system call */
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        jmp     ret_from_exception
 
 .section .fixup, "ax"
@@ -882,7 +919,7 @@ ftrace_call:
        popl    %edx
        popl    %ecx
        popl    %eax
-ftrace_ret:
+.Lftrace_ret:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
 ftrace_graph_call:
@@ -952,7 +989,7 @@ GLOBAL(ftrace_regs_call)
        popl    %gs
        addl    $8, %esp                        /* Skip orig_ax and ip */
        popf                                    /* Pop flags at end (no addl to corrupt flags) */
-       jmp     ftrace_ret
+       jmp     .Lftrace_ret
 
        popf
        jmp     ftrace_stub
@@ -963,7 +1000,7 @@ ENTRY(mcount)
        jb      ftrace_stub                     /* Paging not enabled yet? */
 
        cmpl    $ftrace_stub, ftrace_trace_function
-       jnz     trace
+       jnz     .Ltrace
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        cmpl    $ftrace_stub, ftrace_graph_return
        jnz     ftrace_graph_caller
@@ -976,7 +1013,7 @@ ftrace_stub:
        ret
 
        /* taken from glibc */
-trace:
+.Ltrace:
        pushl   %eax
        pushl   %ecx
        pushl   %edx
@@ -1027,7 +1064,7 @@ return_to_handler:
 ENTRY(trace_page_fault)
        ASM_CLAC
        pushl   $trace_do_page_fault
-       jmp     error_code
+       jmp     common_exception
 END(trace_page_fault)
 #endif
 
@@ -1035,7 +1072,10 @@ ENTRY(page_fault)
        ASM_CLAC
        pushl   $do_page_fault
        ALIGN
-error_code:
+       jmp common_exception
+END(page_fault)
+
+common_exception:
        /* the function address is in %gs's slot on the stack */
        pushl   %fs
        pushl   %es
@@ -1047,6 +1087,7 @@ error_code:
        pushl   %edx
        pushl   %ecx
        pushl   %ebx
+       ENCODE_FRAME_POINTER
        cld
        movl    $(__KERNEL_PERCPU), %ecx
        movl    %ecx, %fs
@@ -1064,7 +1105,7 @@ error_code:
        movl    %esp, %eax                      # pt_regs pointer
        call    *%edi
        jmp     ret_from_exception
-END(page_fault)
+END(common_exception)
 
 ENTRY(debug)
        /*
@@ -1079,6 +1120,7 @@ ENTRY(debug)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        xorl    %edx, %edx                      # error code 0
        movl    %esp, %eax                      # pt_regs pointer
 
@@ -1094,11 +1136,11 @@ ENTRY(debug)
 
 .Ldebug_from_sysenter_stack:
        /* We're on the SYSENTER stack.  Switch off. */
-       movl    %esp, %ebp
+       movl    %esp, %ebx
        movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
        TRACE_IRQS_OFF
        call    do_debug
-       movl    %ebp, %esp
+       movl    %ebx, %esp
        jmp     ret_from_exception
 END(debug)
 
@@ -1116,11 +1158,12 @@ ENTRY(nmi)
        movl    %ss, %eax
        cmpw    $__ESPFIX_SS, %ax
        popl    %eax
-       je      nmi_espfix_stack
+       je      .Lnmi_espfix_stack
 #endif
 
        pushl   %eax                            # pt_regs->orig_ax
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        xorl    %edx, %edx                      # zero error code
        movl    %esp, %eax                      # pt_regs pointer
 
@@ -1132,21 +1175,21 @@ ENTRY(nmi)
 
        /* Not on SYSENTER stack. */
        call    do_nmi
-       jmp     restore_all_notrace
+       jmp     .Lrestore_all_notrace
 
 .Lnmi_from_sysenter_stack:
        /*
         * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
         * is using the thread stack right now, so it's safe for us to use it.
         */
-       movl    %esp, %ebp
+       movl    %esp, %ebx
        movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
        call    do_nmi
-       movl    %ebp, %esp
-       jmp     restore_all_notrace
+       movl    %ebx, %esp
+       jmp     .Lrestore_all_notrace
 
 #ifdef CONFIG_X86_ESPFIX32
-nmi_espfix_stack:
+.Lnmi_espfix_stack:
        /*
         * create the pointer to lss back
         */
@@ -1159,12 +1202,13 @@ nmi_espfix_stack:
        .endr
        pushl   %eax
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        FIXUP_ESPFIX_STACK                      # %eax == %esp
        xorl    %edx, %edx                      # zero error code
        call    do_nmi
        RESTORE_REGS
        lss     12+4(%esp), %esp                # back to espfix stack
-       jmp     irq_return
+       jmp     .Lirq_return
 #endif
 END(nmi)
 
@@ -1172,6 +1216,7 @@ ENTRY(int3)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
        xorl    %edx, %edx                      # zero error code
        movl    %esp, %eax                      # pt_regs pointer
@@ -1181,14 +1226,14 @@ END(int3)
 
 ENTRY(general_protection)
        pushl   $do_general_protection
-       jmp     error_code
+       jmp     common_exception
 END(general_protection)
 
 #ifdef CONFIG_KVM_GUEST
 ENTRY(async_page_fault)
        ASM_CLAC
        pushl   $do_async_page_fault
-       jmp     error_code
+       jmp     common_exception
 END(async_page_fault)
 #endif
 
index ef766a358b37dd355e8d5d75c10a61d47a83e04f..5b219707c2f236cfcf1a856cf9a5023dd93775c3 100644 (file)
 #include <asm/export.h>
 #include <linux/err.h>
 
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_X86_64                      (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_64BIT                     0x80000000
-#define __AUDIT_ARCH_LE                                0x40000000
-
 .code64
 .section .entry.text, "ax"
 
@@ -469,6 +463,7 @@ END(irq_entries_start)
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
+       ENCODE_FRAME_POINTER
 
        testb   $3, CS(%rsp)
        jz      1f
@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback)
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
+       ENCODE_FRAME_POINTER
        jmp     error_exit
 END(xen_failsafe_callback)
 
@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry)
        cld
        SAVE_C_REGS 8
        SAVE_EXTRA_REGS 8
+       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1075,6 +1072,7 @@ ENTRY(error_entry)
        cld
        SAVE_C_REGS 8
        SAVE_EXTRA_REGS 8
+       ENCODE_FRAME_POINTER 8
        xorl    %ebx, %ebx
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
@@ -1257,6 +1255,7 @@ ENTRY(nmi)
        pushq   %r13            /* pt_regs->r13 */
        pushq   %r14            /* pt_regs->r14 */
        pushq   %r15            /* pt_regs->r15 */
+       ENCODE_FRAME_POINTER
 
        /*
         * At this point we no longer need to worry about stack damage
@@ -1270,11 +1269,10 @@ ENTRY(nmi)
 
        /*
         * Return back to user mode.  We must *not* do the normal exit
-        * work, because we don't want to enable interrupts.  Fortunately,
-        * do_nmi doesn't modify pt_regs.
+        * work, because we don't want to enable interrupts.
         */
        SWAPGS
-       jmp     restore_c_regs_and_iret
+       jmp     restore_regs_and_iret
 
 .Lnmi_from_kernel:
        /*
index 23c881caabd1ce1d702ef86c0c5e2ba170283516..e739002427ed12fcbd4924d22938676643b6478e 100644 (file)
@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
        }
 
        text_start = addr - image->sym_vvar_start;
-       current->mm->context.vdso = (void __user *)text_start;
-       current->mm->context.vdso_image = image;
 
        /*
         * MAYWRITE to allow gdb to COW and set breakpoints
@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                do_munmap(mm, text_start, image->size);
+       } else {
+               current->mm->context.vdso = (void __user *)text_start;
+               current->mm->context.vdso_image = image;
        }
 
 up_fail:
-       if (ret) {
-               current->mm->context.vdso = NULL;
-               current->mm->context.vdso_image = NULL;
-       }
-
        up_write(&mm->mmap_sem);
        return ret;
 }
index c5047b8f777b27e918b3c5e0982dd274cf652dd0..1c1b9fe705c84d218cbbf3aeea92edde4bbc40cd 100644 (file)
@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx);
 
 static struct pt_pmu pt_pmu;
 
-enum cpuid_regs {
-       CR_EAX = 0,
-       CR_ECX,
-       CR_EDX,
-       CR_EBX
-};
-
 /*
  * Capabilities of Intel PT hardware, such as number of address bits or
  * supported output schemes, are cached and exported to userspace as "caps"
@@ -64,21 +57,21 @@ static struct pt_cap_desc {
        u8              reg;
        u32             mask;
 } pt_caps[] = {
-       PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
-       PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
-       PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
-       PT_CAP(ip_filtering,            0, CR_EBX, BIT(2)),
-       PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
-       PT_CAP(ptwrite,                 0, CR_EBX, BIT(4)),
-       PT_CAP(power_event_trace,       0, CR_EBX, BIT(5)),
-       PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
-       PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
-       PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
-       PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
-       PT_CAP(num_address_ranges,      1, CR_EAX, 0x3),
-       PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
-       PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
-       PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
+       PT_CAP(max_subleaf,             0, CPUID_EAX, 0xffffffff),
+       PT_CAP(cr3_filtering,           0, CPUID_EBX, BIT(0)),
+       PT_CAP(psb_cyc,                 0, CPUID_EBX, BIT(1)),
+       PT_CAP(ip_filtering,            0, CPUID_EBX, BIT(2)),
+       PT_CAP(mtc,                     0, CPUID_EBX, BIT(3)),
+       PT_CAP(ptwrite,                 0, CPUID_EBX, BIT(4)),
+       PT_CAP(power_event_trace,       0, CPUID_EBX, BIT(5)),
+       PT_CAP(topa_output,             0, CPUID_ECX, BIT(0)),
+       PT_CAP(topa_multiple_entries,   0, CPUID_ECX, BIT(1)),
+       PT_CAP(single_range_output,     0, CPUID_ECX, BIT(2)),
+       PT_CAP(payloads_lip,            0, CPUID_ECX, BIT(31)),
+       PT_CAP(num_address_ranges,      1, CPUID_EAX, 0x3),
+       PT_CAP(mtc_periods,             1, CPUID_EAX, 0xffff0000),
+       PT_CAP(cycle_thresholds,        1, CPUID_EBX, 0xffff),
+       PT_CAP(psb_periods,             1, CPUID_EBX, 0xffff0000),
 };
 
 static u32 pt_cap_get(enum pt_capabilities cap)
@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void)
 
        for (i = 0; i < PT_CPUID_LEAVES; i++) {
                cpuid_count(20, i,
-                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+                           &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
        }
 
        ret = -ENOMEM;
index d625b651e526605dad64ab369b13bbd450fcba3e..4dba597c5807033c3edec86ff9f3efde249c785f 100644 (file)
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVIC       (15*32+13) /* Virtual Interrupt Controller */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
 #define X86_FEATURE_PKU                (16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE      (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_RDPID      (16*32+ 22) /* RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
index d318811884318aa8691cac64a7e34384446ec17e..29a594a3b82aeedccc60f080a5186769cfb1df74 100644 (file)
@@ -21,7 +21,6 @@ enum die_val {
        DIE_NMIUNKNOWN,
 };
 
-extern void printk_address(unsigned long address);
 extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
index c84605bb2a15d089502d0214d5625d463bd69d7b..1f6a92903b09379e4d7152e219e04cef15a60177 100644 (file)
@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
        u32                     microcode;
 };
 
+struct cpuid_regs {
+       u32 eax, ebx, ecx, edx;
+};
+
+enum cpuid_regs_idx {
+       CPUID_EAX = 0,
+       CPUID_EBX,
+       CPUID_ECX,
+       CPUID_EDX,
+};
+
 #define X86_VENDOR_INTEL       0
 #define X86_VENDOR_CYRIX       1
 #define X86_VENDOR_AMD         2
@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+                                   unsigned int sub_leaf,
+                                   enum cpuid_regs_idx reg);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 
index 37f2e0b377ad9d5d8c7bed16f33f4fe7a3235a3e..a3269c897ec578508fd41c6f7ab9b3622934e623 100644 (file)
@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
 int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask);
 
-void stack_type_str(enum stack_type type, const char **begin,
-                   const char **end);
+const char *stack_type_name(enum stack_type type);
 
 static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 {
@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
                addr + len > begin && addr + len <= end);
 }
 
-extern int kstack_depth_to_print;
-
 #ifdef CONFIG_X86_32
 #define STACKSLOTS_PER_LINE 8
 #else
@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        unsigned long *stack, char *log_lvl);
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *sp, char *log_lvl);
-
 extern unsigned int code_bytes;
 
 /* The form of the top of the frame on the stack */
index 46de9ac4b9905b9e152a937b2b76613b01e0f5ee..c5a7f3a930dd5bf8ae18b0c89059aca7cf23aaac 100644 (file)
@@ -13,6 +13,7 @@ struct unwind_state {
        int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
        unsigned long *bp;
+       struct pt_regs *regs;
 #else
        unsigned long *sp;
 #endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
        if (unwind_done(state))
                return NULL;
 
-       return state->bp + 1;
+       return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+       if (unwind_done(state))
+               return NULL;
+
+       return state->regs;
 }
 
 #else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
        return NULL;
 }
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_FRAME_POINTER */
 
 #endif /* _ASM_X86_UNWIND_H */
index e728699db7741f0282441a79635a88afd23259c8..3a01996db58fbf7bfa41ca52c86f323d4bb6c937 100644 (file)
@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
         * works on all CPUs.  This is volatile so that it orders
         * correctly wrt barrier() and to keep gcc from cleverly
         * hoisting it out of the calling function.
+        *
+        * If RDPID is available, use it.
         */
-       asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+       alternative_io ("lsl %[p],%[seg]",
+                       ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+                       X86_FEATURE_RDPID,
+                       [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
 
        return p;
 }
index ae135de547f561d5888afcf3da408b128904d64e..835aa51c7f6ebb914592752373f55287ca8cc235 100644 (file)
@@ -6,10 +6,8 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
-# define ARCH_MAP_VDSO_X32     0x2001
-# define ARCH_MAP_VDSO_32      0x2002
-# define ARCH_MAP_VDSO_64      0x2003
-#endif
+#define ARCH_MAP_VDSO_X32      0x2001
+#define ARCH_MAP_VDSO_32       0x2002
+#define ARCH_MAP_VDSO_64       0x2003
 
 #endif /* _ASM_X86_PRCTL_H */
index 1db8dc490b665e751f43f3411cc21079eea75ae2..d1316f9c8329846b0d3f7dede754fa0eef256bd3 100644 (file)
@@ -17,11 +17,17 @@ struct cpuid_bit {
        u32 sub_leaf;
 };
 
-enum cpuid_regs {
-       CR_EAX = 0,
-       CR_ECX,
-       CR_EDX,
-       CR_EBX
+/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
+static const struct cpuid_bit cpuid_bits[] = {
+       { X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
+       { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
+       { X86_FEATURE_INTEL_PT,         CPUID_EBX, 25, 0x00000007, 0 },
+       { X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
+       { X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
+       { X86_FEATURE_HW_PSTATE,        CPUID_EDX,  7, 0x80000007, 0 },
+       { X86_FEATURE_CPB,              CPUID_EDX,  9, 0x80000007, 0 },
+       { X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
+       { 0, 0, 0, 0, 0 }
 };
 
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
        u32 regs[4];
        const struct cpuid_bit *cb;
 
-       static const struct cpuid_bit cpuid_bits[] = {
-               { X86_FEATURE_INTEL_PT,         CR_EBX,25, 0x00000007, 0 },
-               { X86_FEATURE_AVX512_4VNNIW,    CR_EDX, 2, 0x00000007, 0 },
-               { X86_FEATURE_AVX512_4FMAPS,    CR_EDX, 3, 0x00000007, 0 },
-               { X86_FEATURE_APERFMPERF,       CR_ECX, 0, 0x00000006, 0 },
-               { X86_FEATURE_EPB,              CR_ECX, 3, 0x00000006, 0 },
-               { X86_FEATURE_HW_PSTATE,        CR_EDX, 7, 0x80000007, 0 },
-               { X86_FEATURE_CPB,              CR_EDX, 9, 0x80000007, 0 },
-               { X86_FEATURE_PROC_FEEDBACK,    CR_EDX,11, 0x80000007, 0 },
-               { 0, 0, 0, 0, 0 }
-       };
-
        for (cb = cpuid_bits; cb->feature; cb++) {
 
                /* Verify that the level is valid */
@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
                    max_level > (cb->level | 0xffff))
                        continue;
 
-               cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX],
-                           &regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
+               cpuid_count(cb->level, cb->sub_leaf, &regs[CPUID_EAX],
+                           &regs[CPUID_EBX], &regs[CPUID_ECX],
+                           &regs[CPUID_EDX]);
 
                if (regs[cb->reg] & (1 << cb->bit))
                        set_cpu_cap(c, cb->feature);
        }
 }
+
+u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
+                            enum cpuid_regs_idx reg)
+{
+       const struct cpuid_bit *cb;
+       u32 cpuid_val = 0;
+
+       for (cb = cpuid_bits; cb->feature; cb++) {
+
+               if (level > cb->level)
+                       continue;
+
+               if (level < cb->level)
+                       break;
+
+               if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
+                       if (cpu_has(&boot_cpu_data, cb->feature))
+                               cpuid_val |= BIT(cb->bit);
+               }
+       }
+
+       return cpuid_val;
+}
+EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);
index 2836de390f95cdad34ff11331d9f0afd8df08998..9095c80723d6be01e4fbe378ef4e0a5772f1c6b4 100644 (file)
 
 static struct class *cpuid_class;
 
-struct cpuid_regs {
-       u32 eax, ebx, ecx, edx;
-};
-
 static void cpuid_smp_cpuid(void *cmd_block)
 {
        struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
index 85f854b98a9d24c3e0e6a3d9d83fd6c5b6c57e3f..0cfd01d2754cc9e074c5f44cb79b212e3afa7e68 100644 (file)
@@ -22,7 +22,6 @@
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
 unsigned int code_bytes = 64;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int reliable,
                                 char *log_lvl)
 {
        touch_nmi_watchdog();
-       printk("%s [<%p>] %s%pB\n",
-               log_lvl, (void *)address, reliable ? "" : "? ",
-               (void *)address);
-}
-
-void printk_address(unsigned long address)
-{
-       pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
+       printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
        printk("%sCall Trace:\n", log_lvl);
 
        unwind_start(&state, task, regs, stack);
+       stack = stack ? : get_stack_pointer(task, regs);
 
        /*
         * Iterate through the stacks, starting with the current stack pointer.
@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
         * - softirq stack
         * - hardirq stack
         */
-       for (; stack; stack = stack_info.next_sp) {
-               const char *str_begin, *str_end;
+       for (regs = NULL; stack; stack = stack_info.next_sp) {
+               const char *stack_name;
 
                /*
                 * If we overflowed the task stack into a guard page, jump back
@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                if (get_stack_info(stack, task, &stack_info, &visit_mask))
                        break;
 
-               stack_type_str(stack_info.type, &str_begin, &str_end);
-               if (str_begin)
-                       printk("%s <%s> ", log_lvl, str_begin);
+               stack_name = stack_type_name(stack_info.type);
+               if (stack_name)
+                       printk("%s <%s>\n", log_lvl, stack_name);
 
                /*
                 * Scan the stack, printing any text addresses we find.  At the
@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        if (!__kernel_text_address(addr))
                                continue;
 
+                       /*
+                        * Don't print regs->ip again if it was already printed
+                        * by __show_regs() below.
+                        */
+                       if (regs && stack == &regs->ip) {
+                               unwind_next_frame(&state);
+                               continue;
+                       }
+
                        if (stack == ret_addr_p)
                                reliable = 1;
 
@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                         * of the addresses will just be printed as unreliable.
                         */
                        unwind_next_frame(&state);
+
+                       /* if the frame has entry regs, print them */
+                       regs = unwind_get_entry_regs(&state);
+                       if (regs)
+                               __show_regs(regs, 0);
                }
 
-               if (str_end)
-                       printk("%s <%s> ", log_lvl, str_end);
+               if (stack_name)
+                       printk("%s </%s>\n", log_lvl, stack_name);
        }
 }
 
@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
        if (!sp && task == current)
                sp = get_stack_pointer(current, NULL);
 
-       show_stack_log_lvl(task, NULL, sp, "");
+       show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
 }
 
 void show_stack_regs(struct pt_regs *regs)
 {
-       show_stack_log_lvl(current, regs, NULL, "");
+       show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 }
 
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
                sp = kernel_stack_pointer(regs);
                savesegment(ss, ss);
        }
-       printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-       print_symbol("%s", regs->ip);
-       printk(" SS:ESP %04x:%08lx\n", ss, sp);
+       printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
+              (void *)regs->ip, ss, sp);
 #else
        /* Executive summary in case the oops scrolled away */
-       printk(KERN_ALERT "RIP ");
-       printk_address(regs->ip);
-       printk(" RSP <%016lx>\n", regs->sp);
+       printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
 #endif
        return 0;
 }
@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err)
        oops_end(flags, regs, sig);
 }
 
-static int __init kstack_setup(char *s)
-{
-       ssize_t ret;
-       unsigned long val;
-
-       if (!s)
-               return -EINVAL;
-
-       ret = kstrtoul(s, 0, &val);
-       if (ret)
-               return ret;
-       kstack_depth_to_print = val;
-       return 0;
-}
-early_param("kstack", kstack_setup);
-
 static int __init code_bytes_setup(char *s)
 {
        ssize_t ret;
index 06eb322b5f9ff569ac0e6eaccb582711fbe7f1ef..bb3b5b9a6899215ff3ff456367c93002d35eece8 100644 (file)
 
 #include <asm/stacktrace.h>
 
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
 {
-       switch (type) {
-       case STACK_TYPE_IRQ:
-       case STACK_TYPE_SOFTIRQ:
-               *begin = "IRQ";
-               *end   = "EOI";
-               break;
-       default:
-               *begin = NULL;
-               *end   = NULL;
-       }
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+
+       if (type == STACK_TYPE_SOFTIRQ)
+               return "SOFTIRQ";
+
+       return NULL;
 }
 
 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
@@ -109,8 +106,10 @@ recursion_check:
         * just break out and report an unknown stack type.
         */
        if (visit_mask) {
-               if (*visit_mask & (1UL << info->type))
+               if (*visit_mask & (1UL << info->type)) {
+                       printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
                        goto unknown;
+               }
                *visit_mask |= 1UL << info->type;
        }
 
@@ -121,36 +120,6 @@ unknown:
        return -EINVAL;
 }
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *sp, char *log_lvl)
-{
-       unsigned long *stack;
-       int i;
-
-       if (!try_get_task_stack(task))
-               return;
-
-       sp = sp ? : get_stack_pointer(task, regs);
-
-       stack = sp;
-       for (i = 0; i < kstack_depth_to_print; i++) {
-               if (kstack_end(stack))
-                       break;
-               if ((i % STACKSLOTS_PER_LINE) == 0) {
-                       if (i != 0)
-                               pr_cont("\n");
-                       printk("%s %08lx", log_lvl, *stack++);
-               } else
-                       pr_cont(" %08lx", *stack++);
-               touch_nmi_watchdog();
-       }
-       pr_cont("\n");
-       show_trace_log_lvl(task, regs, sp, log_lvl);
-
-       put_task_stack(task);
-}
-
-
 void show_regs(struct pt_regs *regs)
 {
        int i;
@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs)
                unsigned char c;
                u8 *ip;
 
-               pr_emerg("Stack:\n");
-               show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
+               show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
 
                pr_emerg("Code:");
 
index 36cf1a49822746da21d2950c9324c1669555e3d4..fac189efcc347e09773b9b42ec341e99b17a70d4 100644 (file)
@@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
        [DEBUG_STACK - 1]                       = DEBUG_STKSZ
 };
 
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
 {
        BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
 
-       switch (type) {
-       case STACK_TYPE_IRQ:
-               *begin = "IRQ";
-               *end   = "EOI";
-               break;
-       case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
-               *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
-               *end   = "EOE";
-               break;
-       default:
-               *begin = NULL;
-               *end   = NULL;
-       }
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+
+       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+
+       return NULL;
 }
 
 static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
@@ -128,8 +122,10 @@ recursion_check:
         * just break out and report an unknown stack type.
         */
        if (visit_mask) {
-               if (*visit_mask & (1UL << info->type))
+               if (*visit_mask & (1UL << info->type)) {
+                       printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
                        goto unknown;
+               }
                *visit_mask |= 1UL << info->type;
        }
 
@@ -140,56 +136,6 @@ unknown:
        return -EINVAL;
 }
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *sp, char *log_lvl)
-{
-       unsigned long *irq_stack_end;
-       unsigned long *irq_stack;
-       unsigned long *stack;
-       int i;
-
-       if (!try_get_task_stack(task))
-               return;
-
-       irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
-       irq_stack     = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
-
-       sp = sp ? : get_stack_pointer(task, regs);
-
-       stack = sp;
-       for (i = 0; i < kstack_depth_to_print; i++) {
-               unsigned long word;
-
-               if (stack >= irq_stack && stack <= irq_stack_end) {
-                       if (stack == irq_stack_end) {
-                               stack = (unsigned long *) (irq_stack_end[-1]);
-                               pr_cont(" <EOI> ");
-                       }
-               } else {
-               if (kstack_end(stack))
-                       break;
-               }
-
-               if (probe_kernel_address(stack, word))
-                       break;
-
-               if ((i % STACKSLOTS_PER_LINE) == 0) {
-                       if (i != 0)
-                               pr_cont("\n");
-                       printk("%s %016lx", log_lvl, word);
-               } else
-                       pr_cont(" %016lx", word);
-
-               stack++;
-               touch_nmi_watchdog();
-       }
-
-       pr_cont("\n");
-       show_trace_log_lvl(task, regs, sp, log_lvl);
-
-       put_task_stack(task);
-}
-
 void show_regs(struct pt_regs *regs)
 {
        int i;
@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs)
                unsigned char c;
                u8 *ip;
 
-               printk(KERN_DEFAULT "Stack:\n");
-               show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
+               show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 
                printk(KERN_DEFAULT "Code: ");
 
index 095ef7ddd6ae4d1c6d5476d6e63561963786e793..ce47452879fd5e3f01d9f183fb3da2526356b81d 100644 (file)
@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
        setup_clear_cpu_cap(X86_FEATURE_AVX);
        setup_clear_cpu_cap(X86_FEATURE_AVX2);
        setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+       setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
        setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
        setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
        setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
        setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
        setup_clear_cpu_cap(X86_FEATURE_MPX);
        setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+       setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
        setup_clear_cpu_cap(X86_FEATURE_PKU);
        setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
        setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
index 2dabea46f03935f435493117c058a297f5011878..4e8577d03372c747c61e254e093a8c2ae7e4298c 100644 (file)
@@ -63,6 +63,8 @@
 #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
 #endif
 
+#define SIZEOF_PTREGS 17*4
+
 /*
  * Number of possible pages in the lowmem region.
  *
@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 #ifdef CONFIG_PARAVIRT
        /* This is can only trip for a broken bootloader... */
        cmpw $0x207, pa(boot_params + BP_version)
-       jb default_entry
+       jb .Ldefault_entry
 
        /* Paravirt-compatible boot parameters.  Look to see what architecture
                we're booting under. */
        movl pa(boot_params + BP_hardware_subarch), %eax
        cmpl $num_subarch_entries, %eax
-       jae bad_subarch
+       jae .Lbad_subarch
 
        movl pa(subarch_entries)(,%eax,4), %eax
        subl $__PAGE_OFFSET, %eax
        jmp *%eax
 
-bad_subarch:
+.Lbad_subarch:
 WEAK(lguest_entry)
 WEAK(xen_entry)
        /* Unknown implementation; there's really
@@ -270,14 +272,14 @@ WEAK(xen_entry)
        __INITDATA
 
 subarch_entries:
-       .long default_entry             /* normal x86/PC */
+       .long .Ldefault_entry           /* normal x86/PC */
        .long lguest_entry              /* lguest hypervisor */
        .long xen_entry                 /* Xen hypervisor */
-       .long default_entry             /* Moorestown MID */
+       .long .Ldefault_entry           /* Moorestown MID */
 num_subarch_entries = (. - subarch_entries) / 4
 .previous
 #else
-       jmp default_entry
+       jmp .Ldefault_entry
 #endif /* CONFIG_PARAVIRT */
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4
 ENTRY(start_cpu0)
        movl initial_stack, %ecx
        movl %ecx, %esp
-       jmp  *(initial_code)
+       call *(initial_code)
+1:     jmp 1b
 ENDPROC(start_cpu0)
 #endif
 
@@ -317,7 +320,7 @@ ENTRY(startup_32_smp)
        call load_ucode_ap
 #endif
 
-default_entry:
+.Ldefault_entry:
 #define CR0_STATE      (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
                         X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
                         X86_CR0_PG)
@@ -347,7 +350,7 @@ default_entry:
        pushfl
        popl %eax                       # get EFLAGS
        testl $X86_EFLAGS_ID,%eax       # did EFLAGS.ID remained set?
-       jz enable_paging                # hw disallowed setting of ID bit
+       jz .Lenable_paging              # hw disallowed setting of ID bit
                                        # which means no CPUID and no CR4
 
        xorl %eax,%eax
@@ -357,13 +360,13 @@ default_entry:
        movl $1,%eax
        cpuid
        andl $~1,%edx                   # Ignore CPUID.FPU
-       jz enable_paging                # No flags or only CPUID.FPU = no CR4
+       jz .Lenable_paging              # No flags or only CPUID.FPU = no CR4
 
        movl pa(mmu_cr4_features),%eax
        movl %eax,%cr4
 
        testb $X86_CR4_PAE, %al         # check if PAE is enabled
-       jz enable_paging
+       jz .Lenable_paging
 
        /* Check if extended functions are implemented */
        movl $0x80000000, %eax
@@ -371,7 +374,7 @@ default_entry:
        /* Value must be in the range 0x80000001 to 0x8000ffff */
        subl $0x80000001, %eax
        cmpl $(0x8000ffff-0x80000001), %eax
-       ja enable_paging
+       ja .Lenable_paging
 
        /* Clear bogus XD_DISABLE bits */
        call verify_cpu
@@ -380,7 +383,7 @@ default_entry:
        cpuid
        /* Execute Disable bit supported? */
        btl $(X86_FEATURE_NX & 31), %edx
-       jnc enable_paging
+       jnc .Lenable_paging
 
        /* Setup EFER (Extended Feature Enable Register) */
        movl $MSR_EFER, %ecx
@@ -390,7 +393,7 @@ default_entry:
        /* Make changes effective */
        wrmsr
 
-enable_paging:
+.Lenable_paging:
 
 /*
  * Enable paging
@@ -419,7 +422,7 @@ enable_paging:
  */
        movb $4,X86                     # at least 486
        cmpl $-1,X86_CPUID
-       je is486
+       je .Lis486
 
        /* get vendor info */
        xorl %eax,%eax                  # call CPUID with 0 -> return vendor ID
@@ -430,7 +433,7 @@ enable_paging:
        movl %ecx,X86_VENDOR_ID+8       # last 4 chars
 
        orl %eax,%eax                   # do we have processor info as well?
-       je is486
+       je .Lis486
 
        movl $1,%eax            # Use the CPUID instruction to get CPU type
        cpuid
@@ -444,7 +447,7 @@ enable_paging:
        movb %cl,X86_MASK
        movl %edx,X86_CAPABILITY
 
-is486:
+.Lis486:
        movl $0x50022,%ecx      # set AM, WP, NE and MP
        movl %cr0,%eax
        andl $0x80000011,%eax   # Save PG,PE,ET
@@ -470,8 +473,9 @@ is486:
        xorl %eax,%eax                  # Clear LDT
        lldt %ax
 
-       pushl $0                # fake return address for unwinder
-       jmp *(initial_code)
+       call *(initial_code)
+1:     jmp 1b
+ENDPROC(startup_32_smp)
 
 #include "verify_cpu.S"
 
@@ -709,7 +713,12 @@ ENTRY(initial_page_table)
 .data
 .balign 4
 ENTRY(initial_stack)
-       .long init_thread_union+THREAD_SIZE
+       /*
+        * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+        * unwinder reliably detect the end of the stack.
+        */
+       .long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
+             TOP_OF_KERNEL_STACK_PADDING;
 
 __INITRODATA
 int_msg:
index b4421cc191b056727f8f8c0def78a750b319a1c4..a15d381e602030dd91be680699099912295475ba 100644 (file)
@@ -66,13 +66,8 @@ startup_64:
         * tables and then reload them.
         */
 
-       /*
-        * Setup stack for verify_cpu(). "-8" because initial_stack is defined
-        * this way, see below. Our best guess is a NULL ptr for stack
-        * termination heuristics and we don't want to break anything which
-        * might depend on it (kgdb, ...).
-        */
-       leaq    (__end_init_task - 8)(%rip), %rsp
+       /* Set up the stack for verify_cpu(), similar to initial_stack below */
+       leaq    (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
 
        /* Sanitize CPU configuration */
        call verify_cpu
@@ -117,20 +112,20 @@ startup_64:
        movq    %rdi, %rax
        shrq    $PGDIR_SHIFT, %rax
 
-       leaq    (4096 + _KERNPG_TABLE)(%rbx), %rdx
+       leaq    (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
        movq    %rdx, 0(%rbx,%rax,8)
        movq    %rdx, 8(%rbx,%rax,8)
 
-       addq    $4096, %rdx
+       addq    $PAGE_SIZE, %rdx
        movq    %rdi, %rax
        shrq    $PUD_SHIFT, %rax
        andl    $(PTRS_PER_PUD-1), %eax
-       movq    %rdx, 4096(%rbx,%rax,8)
+       movq    %rdx, PAGE_SIZE(%rbx,%rax,8)
        incl    %eax
        andl    $(PTRS_PER_PUD-1), %eax
-       movq    %rdx, 4096(%rbx,%rax,8)
+       movq    %rdx, PAGE_SIZE(%rbx,%rax,8)
 
-       addq    $8192, %rbx
+       addq    $PAGE_SIZE * 2, %rbx
        movq    %rdi, %rax
        shrq    $PMD_SHIFT, %rdi
        addq    $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
@@ -265,13 +260,17 @@ ENTRY(secondary_startup_64)
        movl    $MSR_GS_BASE,%ecx
        movl    initial_gs(%rip),%eax
        movl    initial_gs+4(%rip),%edx
-       wrmsr   
+       wrmsr
 
        /* rsi is pointer to real mode structure with interesting info.
           pass it to C */
        movq    %rsi, %rdi
-       
-       /* Finally jump to run C code and to be on real kernel address
+       jmp     start_cpu
+ENDPROC(secondary_startup_64)
+
+ENTRY(start_cpu)
+       /*
+        * Jump to run C code and to be on a real kernel address.
         * Since we are running on identity-mapped space we have to jump
         * to the full 64bit address, this is only possible as indirect
         * jump.  In addition we need to ensure %cs is set so we make this
@@ -295,12 +294,13 @@ ENTRY(secondary_startup_64)
         *      REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
         *              address given in m16:64.
         */
-       movq    initial_code(%rip),%rax
-       pushq   $0              # fake return address to stop unwinder
+       call    1f              # put return address on stack for unwinder
+1:     xorq    %rbp, %rbp      # clear frame pointer
+       movq    initial_code(%rip), %rax
        pushq   $__KERNEL_CS    # set correct cs
        pushq   %rax            # target address in negative space
        lretq
-ENDPROC(secondary_startup_64)
+ENDPROC(start_cpu)
 
 #include "verify_cpu.S"
 
@@ -308,15 +308,11 @@ ENDPROC(secondary_startup_64)
 /*
  * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
  * up already except stack. We just set up stack here. Then call
- * start_secondary().
+ * start_secondary() via start_cpu().
  */
 ENTRY(start_cpu0)
-       movq initial_stack(%rip),%rsp
-       movq    initial_code(%rip),%rax
-       pushq   $0              # fake return address to stop unwinder
-       pushq   $__KERNEL_CS    # set correct cs
-       pushq   %rax            # target address in negative space
-       lretq
+       movq    initial_stack(%rip), %rsp
+       jmp     start_cpu
 ENDPROC(start_cpu0)
 #endif
 
@@ -328,7 +324,11 @@ ENDPROC(start_cpu0)
        GLOBAL(initial_gs)
        .quad   INIT_PER_CPU_VAR(irq_stack_union)
        GLOBAL(initial_stack)
-       .quad  init_thread_union+THREAD_SIZE-8
+       /*
+        * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+        * unwinder reliably detect the end of the stack.
+        */
+       .quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
        __FINITDATA
 
 bad_address:
index bd7be8efdc4ce7ae49f155b7f701f6d6d800293f..e3223bc78cb6484600219dcdf943fe3ff278a759 100644 (file)
@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all)
                savesegment(gs, gs);
        }
 
-       printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-                       (u16)regs->cs, regs->ip, regs->flags,
-                       smp_processor_id());
-       print_symbol("EIP is at %s\n", regs->ip);
+       printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
+       printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
+               smp_processor_id());
 
        printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
                regs->ax, regs->bx, regs->cx, regs->dx);
index b3760b3c1ca09734a4479f63f3787dac42275a24..c99f1ca35eb5841c6d20f9979ff74bd369be9f63 100644 (file)
@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned int fsindex, gsindex;
        unsigned int ds, cs, es;
 
-       printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
-       printk_address(regs->ip);
-       printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
-                       regs->sp, regs->flags);
+       printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
+               (void *)regs->ip);
+       printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
+               regs->sp, regs->flags);
+       if (regs->orig_ax != -1)
+               pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+       else
+               pr_cont("\n");
+
        printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
               regs->ax, regs->bx, regs->cx);
        printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
index 118e792a7be62a875d9e8b6026ff91c398e490b4..8add3a5b8635f20120bc68b60e3ea995bf236bd7 100644 (file)
@@ -987,9 +987,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
        int cpu0_nmi_registered = 0;
        unsigned long timeout;
 
-       idle->thread.sp = (unsigned long) (((struct pt_regs *)
-                         (THREAD_SIZE +  task_stack_page(idle))) - 1);
-
+       idle->thread.sp = (unsigned long)task_pt_regs(idle);
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
        initial_stack  = idle->thread.sp;
index a2456d4d286adcbccbfef572b3f60d7c59236c39..ea7b7f9a3b9e29c891509429f55238e17dcdc66c 100644 (file)
@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
        if (unwind_done(state))
                return 0;
 
+       if (state->regs && user_mode(state->regs))
+               return 0;
+
        addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
                                     addr_p);
 
-       return __kernel_text_address(addr) ? addr : 0;
+       if (!__kernel_text_address(addr)) {
+               printk_deferred_once(KERN_WARNING
+                       "WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
+                       (void *)addr, addr_p, state->task->comm,
+                       state->task->pid);
+               return 0;
+       }
+
+       return addr;
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+static size_t regs_size(struct pt_regs *regs)
+{
+       /* x86_32 regs from kernel mode are two words shorter: */
+       if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
+               return sizeof(*regs) - 2*sizeof(long);
+
+       return sizeof(*regs);
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+       unsigned long bp = (unsigned long)state->bp;
+       unsigned long regs = (unsigned long)task_pt_regs(state->task);
+
+       return bp == regs - FRAME_HEADER_SIZE;
+}
+
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+       unsigned long regs = (unsigned long)bp;
+
+       if (!(regs & 0x1))
+               return NULL;
+
+       return (struct pt_regs *)(regs & ~0x1);
+}
+
 static bool update_stack_state(struct unwind_state *state, void *addr,
                               size_t len)
 {
@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 
 bool unwind_next_frame(struct unwind_state *state)
 {
-       unsigned long *next_bp;
+       struct pt_regs *regs;
+       unsigned long *next_bp, *next_frame;
+       size_t next_len;
+       enum stack_type prev_type = state->stack_info.type;
 
        if (unwind_done(state))
                return false;
 
-       next_bp = (unsigned long *)*state->bp;
+       /* have we reached the end? */
+       if (state->regs && user_mode(state->regs))
+               goto the_end;
+
+       if (is_last_task_frame(state)) {
+               regs = task_pt_regs(state->task);
+
+               /*
+                * kthreads (other than the boot CPU's idle thread) have some
+                * partial regs at the end of their stack which were placed
+                * there by copy_thread_tls().  But the regs don't have any
+                * useful information, so we can skip them.
+                *
+                * This user_mode() check is slightly broader than a PF_KTHREAD
+                * check because it also catches the awkward situation where a
+                * newly forked kthread transitions into a user task by calling
+                * do_execve(), which eventually clears PF_KTHREAD.
+                */
+               if (!user_mode(regs))
+                       goto the_end;
+
+               /*
+                * We're almost at the end, but not quite: there's still the
+                * syscall regs frame.  Entry code doesn't encode the regs
+                * pointer for syscalls, so we have to set it manually.
+                */
+               state->regs = regs;
+               state->bp = NULL;
+               return true;
+       }
+
+       /* get the next frame pointer */
+       if (state->regs)
+               next_bp = (unsigned long *)state->regs->bp;
+       else
+               next_bp = (unsigned long *)*state->bp;
+
+       /* is the next frame pointer an encoded pointer to pt_regs? */
+       regs = decode_frame_pointer(next_bp);
+       if (regs) {
+               next_frame = (unsigned long *)regs;
+               next_len = sizeof(*regs);
+       } else {
+               next_frame = next_bp;
+               next_len = FRAME_HEADER_SIZE;
+       }
 
        /* make sure the next frame's data is accessible */
-       if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
-               return false;
+       if (!update_stack_state(state, next_frame, next_len)) {
+               /*
+                * Don't warn on bad regs->bp.  An interrupt in entry code
+                * might cause a false positive warning.
+                */
+               if (state->regs)
+                       goto the_end;
+
+               goto bad_address;
+       }
+
+       /* Make sure it only unwinds up and doesn't overlap the last frame: */
+       if (state->stack_info.type == prev_type) {
+               if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
+                       goto bad_address;
+
+               if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
+                       goto bad_address;
+       }
 
        /* move to the next frame */
-       state->bp = next_bp;
+       if (regs) {
+               state->regs = regs;
+               state->bp = NULL;
+       } else {
+               state->bp = next_bp;
+               state->regs = NULL;
+       }
+
        return true;
+
+bad_address:
+       if (state->regs) {
+               printk_deferred_once(KERN_WARNING
+                       "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
+                       state->regs, state->task->comm,
+                       state->task->pid, next_frame);
+       } else {
+               printk_deferred_once(KERN_WARNING
+                       "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
+                       state->bp, state->task->comm,
+                       state->task->pid, next_frame);
+       }
+the_end:
+       state->stack_info.type = STACK_TYPE_UNKNOWN;
+       return false;
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
                    struct pt_regs *regs, unsigned long *first_frame)
 {
+       unsigned long *bp, *frame;
+       size_t len;
+
        memset(state, 0, sizeof(*state));
        state->task = task;
 
@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        }
 
        /* set up the starting stack frame */
-       state->bp = get_frame_pointer(task, regs);
+       bp = get_frame_pointer(task, regs);
+       regs = decode_frame_pointer(bp);
+       if (regs) {
+               state->regs = regs;
+               frame = (unsigned long *)regs;
+               len = sizeof(*regs);
+       } else {
+               state->bp = bp;
+               frame = bp;
+               len = FRAME_HEADER_SIZE;
+       }
 
        /* initialize stack info and make sure the frame data is accessible */
-       get_stack_info(state->bp, state->task, &state->stack_info,
+       get_stack_info(frame, state->task, &state->stack_info,
                       &state->stack_mask);
-       update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+       update_stack_state(state, frame, len);
 
        /*
         * The caller can provide the address of the first frame directly
index dbf67f64d5ecf76cee128b2c89d43a4267b57223..e79f15f108a8d43d8d5644f7fc967f32c3282b47 100644 (file)
@@ -91,10 +91,10 @@ SECTIONS
        /* Text and read-only data */
        .text :  AT(ADDR(.text) - LOAD_OFFSET) {
                _text = .;
+               _stext = .;
                /* bootstrapping code */
                HEAD_TEXT
                . = ALIGN(8);
-               _stext = .;
                TEXT_TEXT
                SCHED_TEXT
                CPUIDLE_TEXT
index d376e4b48f881b89170802ab7b6aa072c458ed8f..c5959576c315242d74d68ce075d48f8d62301d5c 100644 (file)
 #include <asm/smap.h>
 #include <asm/export.h>
 
-/* Standard copy_to_user with segment limit checking */
-ENTRY(_copy_to_user)
-       mov PER_CPU_VAR(current_task), %rax
-       movq %rdi,%rcx
-       addq %rdx,%rcx
-       jc bad_to_user
-       cmpq TASK_addr_limit(%rax),%rcx
-       ja bad_to_user
-       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
-                     "jmp copy_user_generic_string",           \
-                     X86_FEATURE_REP_GOOD,                     \
-                     "jmp copy_user_enhanced_fast_string",     \
-                     X86_FEATURE_ERMS
-ENDPROC(_copy_to_user)
-EXPORT_SYMBOL(_copy_to_user)
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(_copy_from_user)
-       mov PER_CPU_VAR(current_task), %rax
-       movq %rsi,%rcx
-       addq %rdx,%rcx
-       jc bad_from_user
-       cmpq TASK_addr_limit(%rax),%rcx
-       ja bad_from_user
-       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
-                     "jmp copy_user_generic_string",           \
-                     X86_FEATURE_REP_GOOD,                     \
-                     "jmp copy_user_enhanced_fast_string",     \
-                     X86_FEATURE_ERMS
-ENDPROC(_copy_from_user)
-EXPORT_SYMBOL(_copy_from_user)
-
-
-       .section .fixup,"ax"
-       /* must zero dest */
-ENTRY(bad_from_user)
-bad_from_user:
-       movl %edx,%ecx
-       xorl %eax,%eax
-       rep
-       stosb
-bad_to_user:
-       movl %edx,%eax
-       ret
-ENDPROC(bad_from_user)
-       .previous
-
 /*
  * copy_user_generic_unrolled - memory copy with exception handling.
  * This version is for CPUs like P4 that don't have efficient micro
index b4908789484e39870f006afb66e43c95bd6281d3..c074799bddae178257b0bfe78766faccac75f737 100644 (file)
@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
        return ret;
 }
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               n = __copy_to_user(to, from, n);
+       return n;
+}
+EXPORT_SYMBOL(_copy_to_user);
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               n = __copy_from_user(to, from, n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+EXPORT_SYMBOL(_copy_from_user);
index 3bc7baf2a711fa3a88066ee3c3527c441216441b..0b281217c890195870c6492351e849a93e6704d0 100644 (file)
@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
        return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
-
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
-{
-       if (access_ok(VERIFY_WRITE, to, n))
-               n = __copy_to_user(to, from, n);
-       return n;
-}
-EXPORT_SYMBOL(_copy_to_user);
-
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to:   Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
-{
-       if (access_ok(VERIFY_READ, from, n))
-               n = __copy_from_user(to, from, n);
-       else
-               memset(to, 0, n);
-       return n;
-}
-EXPORT_SYMBOL(_copy_from_user);
index 9f72ca3b2669a62f5d512a01367d7af16a5e1ba3..17c55a536fdd2ce1af8b376a319e43cea3e7d45a 100644 (file)
@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
                printk(KERN_CONT "paging request");
 
        printk(KERN_CONT " at %p\n", (void *) address);
-       printk(KERN_ALERT "IP:");
-       printk_address(regs->ip);
+       printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
 
        dump_pagetable(address);
 }
index cd5173a2733f51a8d091334a86027aeda3ef9776..8410e7d0a5b531dcc412a15f297255eb6a409579 100644 (file)
@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void)
 /* Dump Instruction Pointer info */
 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
 {
-       pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
-       printk_address(regs->ip);
+       pr_info("UV: %4d %6d %-32.32s %pS",
+               cpu, current->pid, current->comm, (void *)regs->ip);
 }
 
 /*
index ba70ff2329176f0569847c4cd09053761376ccc1..1972565ab10665725bf01029fb81d3a9d92fcba8 100644 (file)
@@ -269,7 +269,8 @@ int main(int argc, char **argv)
                insns++;
        }
 
-       fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
+       fprintf((errors) ? stderr : stdout,
+               "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
                prog,
                (errors) ? "Failure" : "Success",
                insns,
index 56f04db0c9c0d8db9c459cc6199af85477fa0883..ecf31e0358c8140f5c0b468a3f3a98b5b877c728 100644 (file)
@@ -167,7 +167,7 @@ int main(int argc, char **argv)
                fprintf(stderr, "Warning: decoded and checked %d"
                        " instructions with %d warnings\n", insns, warnings);
        else
-               fprintf(stderr, "Succeed: decoded and checked %d"
+               fprintf(stdout, "Success: decoded and checked %d"
                        " instructions\n", insns);
        return 0;
 }
index 739fb17371af149a28c1d59ecde507a94217fe25..39b3368f6de67119b830f3bc3db4382b00a54728 100644 (file)
@@ -982,13 +982,6 @@ static struct ctl_table kern_table[] = {
                .mode           = 0444,
                .proc_handler   = proc_dointvec,
        },
-       {
-               .procname       = "kstack_depth_to_print",
-               .data           = &kstack_depth_to_print,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
        {
                .procname       = "io_delay_type",
                .data           = &io_delay_type,
index 6de9440e3ae2d995b28577dc4a000fc23f4182c0..61b0988bba8c9a249b234ec98cb35f737b5e75a6 100644 (file)
@@ -6399,8 +6399,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
        }
 
        if (pages && s)
-               pr_info("Freeing %s memory: %ldK (%p - %p)\n",
-                       s, pages << (PAGE_SHIFT - 10), start, end);
+               pr_info("Freeing %s memory: %ldK\n",
+                       s, pages << (PAGE_SHIFT - 10));
 
        return pages;
 }
index c332684e1b5ace1e4ea8a357ed83b5931e04cdfd..5206d99ddeb876e0456e6aa0d088e168219005a7 100755 (executable)
@@ -139,7 +139,8 @@ handle_line() {
 
 while read line; do
        # Let's see if we have an address in the line
-       if [[ $line =~ \[\<([^]]+)\>\]  ]]; then
+       if [[ $line =~ \[\<([^]]+)\>\] ]] ||
+          [[ $line =~ [^+\ ]+\+0x[0-9a-f]+/0x[0-9a-f]+ ]]; then
                # Translate address to line numbers
                handle_line "$line"
        # Is it a code line?
index 450b332573397ee16ba343673f014ef954b37193..29df825d375c6eb4b5f2ac4d15b5f3df90ae38c4 100755 (executable)
@@ -105,9 +105,18 @@ __faddr2line() {
        # In rare cases there might be duplicates.
        while read symbol; do
                local fields=($symbol)
-               local sym_base=0x${fields[1]}
-               local sym_size=${fields[2]}
-               local sym_type=${fields[3]}
+               local sym_base=0x${fields[0]}
+               local sym_type=${fields[1]}
+               local sym_end=0x${fields[3]}
+
+               # calculate the size
+               local sym_size=$(($sym_end - $sym_base))
+               if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
+                       warn "bad symbol size: base: $sym_base end: $sym_end"
+                       DONE=1
+                       return
+               fi
+               sym_size=0x$(printf %x $sym_size)
 
                # calculate the address
                local addr=$(($sym_base + $offset))
@@ -116,26 +125,26 @@ __faddr2line() {
                        DONE=1
                        return
                fi
-               local hexaddr=0x$(printf %x $addr)
+               addr=0x$(printf %x $addr)
 
                # weed out non-function symbols
-               if [[ $sym_type != "FUNC" ]]; then
+               if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $hexaddr due to non-function symbol"
+                               echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'"
                        continue
                fi
 
                # if the user provided a size, make sure it matches the symbol's size
                if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)"
+                               echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)"
                        continue;
                fi
 
                # make sure the provided offset is within the symbol's range
                if [[ $offset -gt $sym_size ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)"
+                               echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)"
                        continue
                fi
 
@@ -143,12 +152,12 @@ __faddr2line() {
                [[ $FIRST = 0 ]] && echo
                FIRST=0
 
-               local hexsize=0x$(printf %x $sym_size)
-               echo "$func+$offset/$hexsize:"
-               addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;"
+               # pass real address to addr2line
+               echo "$func+$offset/$sym_size:"
+               addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
                DONE=1
 
-       done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
+       done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, $1 }')
 }
 
 [[ $# -lt 2 ]] && usage
index a89f80a5b711683c6ec4c64acb5729cc8a1c9715..8c1cb423cfe6939addceb2da403bed74cfc97e30 100644 (file)
@@ -6,7 +6,7 @@ include ../lib.mk
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
                        check_initial_reg_state sigreturn ldt_gdt iopl \
-                       protection_keys
+                       protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
new file mode 100644 (file)
index 0000000..65d7a2b
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+int nerrs = 0;
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+void fill_function_pointers()
+{
+       void *vdso = dlopen("linux-vdso.so.1",
+                           RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso)
+               vdso = dlopen("linux-gate.so.1",
+                             RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso) {
+               printf("[WARN]\tfailed to find vDSO\n");
+               return;
+       }
+
+       vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+       if (!vdso_getcpu)
+               printf("Warning: failed to find getcpu in vDSO\n");
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+                      void* cache)
+{
+       return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static void test_getcpu(void)
+{
+       printf("[RUN]\tTesting getcpu...\n");
+
+       for (int cpu = 0; ; cpu++) {
+               cpu_set_t cpuset;
+               CPU_ZERO(&cpuset);
+               CPU_SET(cpu, &cpuset);
+               if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+                       return;
+
+               unsigned cpu_sys, cpu_vdso, cpu_vsys,
+                       node_sys, node_vdso, node_vsys;
+               long ret_sys, ret_vdso = 1, ret_vsys = 1;
+               unsigned node;
+
+               ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+               if (vdso_getcpu)
+                       ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+               if (vgetcpu)
+                       ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+               if (!ret_sys)
+                       node = node_sys;
+               else if (!ret_vdso)
+                       node = node_vdso;
+               else if (!ret_vsys)
+                       node = node_vsys;
+
+               bool ok = true;
+               if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+                       ok = false;
+               if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+                       ok = false;
+               if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+                       ok = false;
+
+               printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+               if (!ret_sys)
+                       printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+               if (!ret_vdso)
+                       printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+               if (!ret_vsys)
+                       printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+                              node_vsys);
+               printf("\n");
+
+               if (!ok)
+                       nerrs++;
+       }
+}
+
+int main(int argc, char **argv)
+{
+       fill_function_pointers();
+
+       test_getcpu();
+
+       return nerrs ? 1 : 0;
+}