]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'arm64/for-next/core'
authorStephen Rothwell <sfr@canb.auug.org.au>
Wed, 4 Nov 2015 23:23:01 +0000 (10:23 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Wed, 4 Nov 2015 23:23:01 +0000 (10:23 +1100)
100 files changed:
Documentation/arm/uefi.txt
Documentation/arm64/booting.txt
Documentation/devicetree/bindings/arm/pmu.txt
Documentation/features/debug/KASAN/arch-support.txt
MAINTAINERS
arch/arm64/Kconfig
arch/arm64/Kconfig.debug
arch/arm64/Makefile
arch/arm64/boot/dts/arm/juno-r1.dts
arch/arm64/boot/dts/arm/juno.dts
arch/arm64/configs/defconfig
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/cache.h
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/cachetype.h
arch/arm64/include/asm/cmpxchg.h
arch/arm64/include/asm/cpu.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/fixmap.h
arch/arm64/include/asm/hw_breakpoint.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/irq.h
arch/arm64/include/asm/kasan.h [new file with mode: 0644]
arch/arm64/include/asm/kernel-pgtable.h [new file with mode: 0644]
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/page.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/pmu.h [deleted file]
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/string.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/thread_info.h
arch/arm64/include/asm/tlb.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/arm64ksyms.c
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/efi-entry.S
arch/arm64/kernel/efi.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/head.S
arch/arm64/kernel/hw_breakpoint.c
arch/arm64/kernel/image.h
arch/arm64/kernel/irq.c
arch/arm64/kernel/module.c
arch/arm64/kernel/perf_event.c
arch/arm64/kernel/process.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/smp.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/Kconfig
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/copy_from_user.S
arch/arm64/lib/copy_in_user.S
arch/arm64/lib/copy_template.S [new file with mode: 0644]
arch/arm64/lib/copy_to_user.S
arch/arm64/lib/memchr.S
arch/arm64/lib/memcmp.S
arch/arm64/lib/memcpy.S
arch/arm64/lib/memmove.S
arch/arm64/lib/memset.S
arch/arm64/lib/strcmp.S
arch/arm64/lib/strlen.S
arch/arm64/lib/strncmp.S
arch/arm64/mm/Makefile
arch/arm64/mm/cache.S
arch/arm64/mm/context.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/kasan_init.c [new file with mode: 0644]
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/pgd.c
arch/arm64/mm/proc.S
drivers/firmware/efi/Makefile
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/arm64-stub.c [moved from arch/arm64/kernel/efi-stub.c with 100% similarity]
drivers/firmware/efi/libstub/fdt.c
drivers/firmware/efi/libstub/string.c [new file with mode: 0644]
drivers/perf/Kconfig
kernel/irq/cpuhotplug.c
scripts/Makefile.kasan

index 7b3fdfe0f7ba37a7ff6a0e46cfec18e1fcfbe68e..6543a0adea8a9741798570108719c5e968c25088 100644 (file)
@@ -58,5 +58,3 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
 --------------------------------------------------------------------------------
 linux,uefi-mmap-desc-ver  | 32-bit | Version of the mmap descriptor format.
 --------------------------------------------------------------------------------
-linux,uefi-stub-kern-ver  | string | Copy of linux_banner from build.
---------------------------------------------------------------------------------
index 369a4f48eb0dac3c80d5f22054fc4b02f1006f28..701d39d3171a74d8c2eb670c0b1be2931f326ee8 100644 (file)
@@ -104,7 +104,12 @@ Header notes:
 - The flags field (introduced in v3.17) is a little-endian 64-bit field
   composed as follows:
   Bit 0:       Kernel endianness.  1 if BE, 0 if LE.
-  Bits 1-63:   Reserved.
+  Bit 1-2:     Kernel Page size.
+                       0 - Unspecified.
+                       1 - 4K
+                       2 - 16K
+                       3 - 64K
+  Bits 3-63:   Reserved.
 
 - When image_size is zero, a bootloader should attempt to keep as much
   memory as possible free for use by the kernel immediately after the
index 80625ae59e08414b22a8c837b1870a31fc3c1f01..97ba45af04fc693f831c00f15f9388df864434a6 100644 (file)
@@ -9,6 +9,8 @@ Required properties:
 - compatible : should be one of
        "apm,potenza-pmu"
        "arm,armv8-pmuv3"
+       "arm.cortex-a57-pmu"
+       "arm.cortex-a53-pmu"
        "arm,cortex-a17-pmu"
        "arm,cortex-a15-pmu"
        "arm,cortex-a12-pmu"
index 14531da2fb54eb1761095373bd62583557310163..703f5784bc90d9647d015c086330b59a48b4631d 100644 (file)
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |       avr32: | TODO |
     |    blackfin: | TODO |
     |         c6x: | TODO |
index b15c48f1117081534e5296a60e22bb1b5af91292..1accdbdf68def6c602d789551e9842a65ea51cdd 100644 (file)
@@ -828,12 +828,13 @@ F:        arch/arm/include/asm/floppy.h
 
 ARM PMU PROFILING AND DEBUGGING
 M:     Will Deacon <will.deacon@arm.com>
+R:     Mark Rutland <mark.rutland@arm.com>
 S:     Maintained
-F:     arch/arm/kernel/perf_*
+F:     arch/arm*/kernel/perf_*
 F:     arch/arm/oprofile/common.c
-F:     arch/arm/kernel/hw_breakpoint.c
-F:     arch/arm/include/asm/hw_breakpoint.h
-F:     arch/arm/include/asm/perf_event.h
+F:     arch/arm*/kernel/hw_breakpoint.c
+F:     arch/arm*/include/asm/hw_breakpoint.h
+F:     arch/arm*/include/asm/perf_event.h
 F:     drivers/perf/arm_pmu.c
 F:     include/linux/perf/arm_pmu.h
 
index 440d906429deab9c1daf0de497ead020378891af..7b10647cab223b976d56dc101b202f199a3d0e32 100644 (file)
@@ -48,6 +48,7 @@ config ARM64
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_BITREVERSE
        select HAVE_ARCH_JUMP_LABEL
+       select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
@@ -169,10 +170,12 @@ config FIX_EARLYCON_MEM
 
 config PGTABLE_LEVELS
        int
+       default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
        default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
        default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
        default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
-       default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
+       default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
+       default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
 
 source "init/Kconfig"
 
@@ -389,25 +392,37 @@ config ARM64_4K_PAGES
        help
          This feature enables 4KB pages support.
 
+config ARM64_16K_PAGES
+       bool "16KB"
+       help
+         The system will use 16KB pages support. AArch32 emulation
+         requires applications compiled with 16K (or a multiple of 16K)
+         aligned segments.
+
 config ARM64_64K_PAGES
        bool "64KB"
        help
          This feature enables 64KB pages support (4KB by default)
          allowing only two levels of page tables and faster TLB
-         look-up. AArch32 emulation is not available when this feature
-         is enabled.
+         look-up. AArch32 emulation requires applications compiled
+         with 64K aligned segments.
 
 endchoice
 
 choice
        prompt "Virtual address space size"
        default ARM64_VA_BITS_39 if ARM64_4K_PAGES
+       default ARM64_VA_BITS_47 if ARM64_16K_PAGES
        default ARM64_VA_BITS_42 if ARM64_64K_PAGES
        help
          Allows choosing one of multiple possible virtual address
          space sizes. The level of translation table is determined by
          a combination of page size and virtual address space size.
 
+config ARM64_VA_BITS_36
+       bool "36-bit" if EXPERT
+       depends on ARM64_16K_PAGES
+
 config ARM64_VA_BITS_39
        bool "39-bit"
        depends on ARM64_4K_PAGES
@@ -416,6 +431,10 @@ config ARM64_VA_BITS_42
        bool "42-bit"
        depends on ARM64_64K_PAGES
 
+config ARM64_VA_BITS_47
+       bool "47-bit"
+       depends on ARM64_16K_PAGES
+
 config ARM64_VA_BITS_48
        bool "48-bit"
 
@@ -423,8 +442,10 @@ endchoice
 
 config ARM64_VA_BITS
        int
+       default 36 if ARM64_VA_BITS_36
        default 39 if ARM64_VA_BITS_39
        default 42 if ARM64_VA_BITS_42
+       default 47 if ARM64_VA_BITS_47
        default 48 if ARM64_VA_BITS_48
 
 config CPU_BIG_ENDIAN
@@ -454,15 +475,13 @@ config NR_CPUS
 
 config HOTPLUG_CPU
        bool "Support for hot-pluggable CPUs"
+       select GENERIC_IRQ_MIGRATION
        help
          Say Y here to experiment with turning CPUs off and on.  CPUs
          can be controlled through /sys/devices/system/cpu.
 
 source kernel/Kconfig.preempt
-
-config HZ
-       int
-       default 100
+source kernel/Kconfig.hz
 
 config ARCH_HAS_HOLES_MEMORYMODEL
        def_bool y if SPARSEMEM
@@ -481,12 +500,8 @@ config HAVE_ARCH_PFN_VALID
        def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
 
 config HW_PERF_EVENTS
-       bool "Enable hardware performance counter support for perf events"
-       depends on PERF_EVENTS
-       default y
-       help
-         Enable hardware performance counter support for perf events. If
-         disabled, perf events will use software events only.
+       def_bool y
+       depends on ARM_PMU
 
 config SYS_SUPPORTS_HUGETLBFS
        def_bool y
@@ -495,7 +510,7 @@ config ARCH_WANT_GENERAL_HUGETLB
        def_bool y
 
 config ARCH_WANT_HUGE_PMD_SHARE
-       def_bool y if !ARM64_64K_PAGES
+       def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
        def_bool y
@@ -532,7 +547,25 @@ config XEN
 config FORCE_MAX_ZONEORDER
        int
        default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+       default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
        default "11"
+       help
+         The kernel memory allocator divides physically contiguous memory
+         blocks into "zones", where each zone is a power of two number of
+         pages.  This option selects the largest power of two that the kernel
+         keeps in the memory allocator.  If you need to allocate very large
+         blocks of physically contiguous memory, then you may need to
+         increase this value.
+
+         This config option is actually maximum order plus one. For example,
+         a value of 11 means that the largest free memory block is 2^10 pages.
+
+         We make sure that we can allocate upto a HugePage size for each configuration.
+         Hence we have :
+               MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
+
+         However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
+         4M allocations matching the default size used by generic code.
 
 menuconfig ARMV8_DEPRECATED
        bool "Emulate deprecated/obsolete ARMv8 instructions"
@@ -707,7 +740,7 @@ source "fs/Kconfig.binfmt"
 
 config COMPAT
        bool "Kernel support for 32-bit EL0"
-       depends on !ARM64_64K_PAGES || EXPERT
+       depends on ARM64_4K_PAGES || EXPERT
        select COMPAT_BINFMT_ELF
        select HAVE_UID16
        select OLD_SIGSUSPEND3
@@ -718,9 +751,9 @@ config COMPAT
          the user helper functions, VFP support and the ptrace interface are
          handled appropriately by the kernel.
 
-         If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
-         will only be able to execute AArch32 binaries that were compiled with
-         64k aligned segments.
+         If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
+         that you will only be able to execute AArch32 binaries that were compiled
+         with page size aligned segments.
 
          If you want to execute 32-bit userspace applications, say Y.
 
index d6285ef9b5f976639630606b04d7244a05ac6130..c24d6adc0420c78bb0efbf747101c87c0be9e81a 100644 (file)
@@ -77,7 +77,7 @@ config DEBUG_RODATA
           If in doubt, say Y
 
 config DEBUG_ALIGN_RODATA
-       depends on DEBUG_RODATA && !ARM64_64K_PAGES
+       depends on DEBUG_RODATA && ARM64_4K_PAGES
        bool "Align linker sections up to SECTION_SIZE"
        help
          If this option is enabled, sections that may potentially be marked as
index d10b5d483022f5374fa16c7783b7fb62014c3acb..cd822d8454c0536165810004089c3a7ce5d0068d 100644 (file)
@@ -55,6 +55,13 @@ else
 TEXT_OFFSET := 0x00080000
 endif
 
+# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
+# in 32-bit arithmetic
+KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
+                       (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+                       + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
+                       - (1 << (64 - 32 - 3)) )) )
+
 export TEXT_OFFSET GZFLAGS
 
 core-y         += arch/arm64/kernel/ arch/arm64/mm/
index 62a5a3cf44143d3027943fbc518d8b2b9ff5a9b4..93bc3d7d51c0f32f6894e9210ef9f41df3913e0a 100644 (file)
                };
        };
 
-       pmu {
-               compatible = "arm,armv8-pmuv3";
+       pmu_a57 {
+               compatible = "arm,cortex-a57-pmu";
                interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-affinity = <&A57_0>,
+                                    <&A57_1>;
+       };
+
+       pmu_a53 {
+               compatible = "arm,cortex-a53-pmu";
+               interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-               interrupt-affinity = <&A57_0>,
-                                    <&A57_1>,
-                                    <&A53_0>,
+               interrupt-affinity = <&A53_0>,
                                     <&A53_1>,
                                     <&A53_2>,
                                     <&A53_3>;
index c02f880584e886bb2e67231d17b8c5ac6ccd63ef..53442b5ee4ff99170056ddb15eee296461d167a0 100644 (file)
                };
        };
 
-       pmu {
-               compatible = "arm,armv8-pmuv3";
+       pmu_a57 {
+               compatible = "arm,cortex-a57-pmu";
                interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-affinity = <&A57_0>,
+                                    <&A57_1>;
+       };
+
+       pmu_a53 {
+               compatible = "arm,cortex-a53-pmu";
+               interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-               interrupt-affinity = <&A57_0>,
-                                    <&A57_1>,
-                                    <&A53_0>,
+               interrupt-affinity = <&A53_0>,
                                     <&A53_1>,
                                     <&A53_2>,
                                     <&A53_3>;
index e8e1644ea01780a0289d43adcffd179388961b9a..2f71f9cdd39c90be282b09d44bb0e795ab0181c6 100644 (file)
@@ -53,6 +53,7 @@ CONFIG_PCI_MSI=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
 CONFIG_SMP=y
+CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
@@ -111,6 +112,10 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_SAMSUNG=y
+CONFIG_SERIAL_SAMSUNG_UARTS_4=y
+CONFIG_SERIAL_SAMSUNG_UARTS=4
+CONFIG_SERIAL_SAMSUNG_CONSOLE=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
@@ -151,6 +156,10 @@ CONFIG_MMC_ARMMMCI=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SPI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+CONFIG_MMC_DW_PLTFM=y
+CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_SYSCON=y
index b51f2cc22ca99731f2fa5efdced173f0a50b6c92..12eff928ef8b38dd18ae3bd157b12eb918f797a6 100644 (file)
@@ -193,4 +193,15 @@ lr .req    x30             // link register
        str     \src, [\tmp, :lo12:\sym]
        .endm
 
+/*
+ * Annotate a function as position independent, i.e., safe to be called before
+ * the kernel virtual mapping is activated.
+ */
+#define ENDPIPROC(x)                   \
+       .globl  __pi_##x;               \
+       .type   __pi_##x, %function;    \
+       .set    __pi_##x, x;            \
+       .size   __pi_##x, . - x;        \
+       ENDPROC(x)
+
 #endif /* __ASM_ASSEMBLER_H */
index 1e247ac2601af41012823f0a639e2f9393af6d0b..f3a3586a421c8869d6e88219c686491d71e2ecd5 100644 (file)
 
 #define atomic_read(v)                 READ_ONCE((v)->counter)
 #define atomic_set(v, i)               WRITE_ONCE(((v)->counter), (i))
+
+#define atomic_add_return_relaxed      atomic_add_return_relaxed
+#define atomic_add_return_acquire      atomic_add_return_acquire
+#define atomic_add_return_release      atomic_add_return_release
+#define atomic_add_return              atomic_add_return
+
+#define atomic_inc_return_relaxed(v)   atomic_add_return_relaxed(1, (v))
+#define atomic_inc_return_acquire(v)   atomic_add_return_acquire(1, (v))
+#define atomic_inc_return_release(v)   atomic_add_return_release(1, (v))
+#define atomic_inc_return(v)           atomic_add_return(1, (v))
+
+#define atomic_sub_return_relaxed      atomic_sub_return_relaxed
+#define atomic_sub_return_acquire      atomic_sub_return_acquire
+#define atomic_sub_return_release      atomic_sub_return_release
+#define atomic_sub_return              atomic_sub_return
+
+#define atomic_dec_return_relaxed(v)   atomic_sub_return_relaxed(1, (v))
+#define atomic_dec_return_acquire(v)   atomic_sub_return_acquire(1, (v))
+#define atomic_dec_return_release(v)   atomic_sub_return_release(1, (v))
+#define atomic_dec_return(v)           atomic_sub_return(1, (v))
+
+#define atomic_xchg_relaxed(v, new)    xchg_relaxed(&((v)->counter), (new))
+#define atomic_xchg_acquire(v, new)    xchg_acquire(&((v)->counter), (new))
+#define atomic_xchg_release(v, new)    xchg_release(&((v)->counter), (new))
 #define atomic_xchg(v, new)            xchg(&((v)->counter), (new))
+
+#define atomic_cmpxchg_relaxed(v, old, new)                            \
+       cmpxchg_relaxed(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_acquire(v, old, new)                            \
+       cmpxchg_acquire(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_release(v, old, new)                            \
+       cmpxchg_release(&((v)->counter), (old), (new))
 #define atomic_cmpxchg(v, old, new)    cmpxchg(&((v)->counter), (old), (new))
 
 #define atomic_inc(v)                  atomic_add(1, (v))
 #define atomic_dec(v)                  atomic_sub(1, (v))
-#define atomic_inc_return(v)           atomic_add_return(1, (v))
-#define atomic_dec_return(v)           atomic_sub_return(1, (v))
 #define atomic_inc_and_test(v)         (atomic_inc_return(v) == 0)
 #define atomic_dec_and_test(v)         (atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)      (atomic_sub_return((i), (v)) == 0)
 #define ATOMIC64_INIT                  ATOMIC_INIT
 #define atomic64_read                  atomic_read
 #define atomic64_set                   atomic_set
+
+#define atomic64_add_return_relaxed    atomic64_add_return_relaxed
+#define atomic64_add_return_acquire    atomic64_add_return_acquire
+#define atomic64_add_return_release    atomic64_add_return_release
+#define atomic64_add_return            atomic64_add_return
+
+#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
+#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
+#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
+#define atomic64_inc_return(v)         atomic64_add_return(1, (v))
+
+#define atomic64_sub_return_relaxed    atomic64_sub_return_relaxed
+#define atomic64_sub_return_acquire    atomic64_sub_return_acquire
+#define atomic64_sub_return_release    atomic64_sub_return_release
+#define atomic64_sub_return            atomic64_sub_return
+
+#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
+#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
+#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
+#define atomic64_dec_return(v)         atomic64_sub_return(1, (v))
+
+#define atomic64_xchg_relaxed          atomic_xchg_relaxed
+#define atomic64_xchg_acquire          atomic_xchg_acquire
+#define atomic64_xchg_release          atomic_xchg_release
 #define atomic64_xchg                  atomic_xchg
+
+#define atomic64_cmpxchg_relaxed       atomic_cmpxchg_relaxed
+#define atomic64_cmpxchg_acquire       atomic_cmpxchg_acquire
+#define atomic64_cmpxchg_release       atomic_cmpxchg_release
 #define atomic64_cmpxchg               atomic_cmpxchg
 
 #define atomic64_inc(v)                        atomic64_add(1, (v))
 #define atomic64_dec(v)                        atomic64_sub(1, (v))
-#define atomic64_inc_return(v)         atomic64_add_return(1, (v))
-#define atomic64_dec_return(v)         atomic64_sub_return(1, (v))
 #define atomic64_inc_and_test(v)       (atomic64_inc_return(v) == 0)
 #define atomic64_dec_and_test(v)       (atomic64_dec_return(v) == 0)
 #define atomic64_sub_and_test(i, v)    (atomic64_sub_return((i), (v)) == 0)
index b3b5c4ae3800b061d5ad2fdd911aea677feedc32..74d0b8eb0799cb6635b999f7afb1c7e7c0a361bf 100644 (file)
@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))                             \
 }                                                                      \
 __LL_SC_EXPORT(atomic_##op);
 
-#define ATOMIC_OP_RETURN(op, asm_op)                                   \
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)           \
 __LL_SC_INLINE int                                                     \
-__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))               \
+__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v))         \
 {                                                                      \
        unsigned long tmp;                                              \
        int result;                                                     \
                                                                        \
-       asm volatile("// atomic_" #op "_return\n"                       \
+       asm volatile("// atomic_" #op "_return" #name "\n"              \
 "      prfm    pstl1strm, %2\n"                                        \
-"1:    ldxr    %w0, %2\n"                                              \
+"1:    ld" #acq "xr    %w0, %2\n"                                      \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
-"      stlxr   %w1, %w0, %2\n"                                         \
-"      cbnz    %w1, 1b"                                                \
+"      st" #rel "xr    %w1, %w0, %2\n"                                 \
+"      cbnz    %w1, 1b\n"                                              \
+"      " #mb                                                           \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : "Ir" (i)                                                      \
-       : "memory");                                                    \
+       : cl);                                                          \
                                                                        \
-       smp_mb();                                                       \
        return result;                                                  \
 }                                                                      \
-__LL_SC_EXPORT(atomic_##op##_return);
+__LL_SC_EXPORT(atomic_##op##_return##name);
+
+#define ATOMIC_OPS(...)                                                        \
+       ATOMIC_OP(__VA_ARGS__)                                          \
+       ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)
 
-#define ATOMIC_OPS(op, asm_op)                                         \
-       ATOMIC_OP(op, asm_op)                                           \
-       ATOMIC_OP_RETURN(op, asm_op)
+#define ATOMIC_OPS_RLX(...)                                            \
+       ATOMIC_OPS(__VA_ARGS__)                                         \
+       ATOMIC_OP_RETURN(_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+       ATOMIC_OP_RETURN(_acquire,        , a,  , "memory", __VA_ARGS__)\
+       ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS_RLX(add, add)
+ATOMIC_OPS_RLX(sub, sub)
 
 ATOMIC_OP(and, and)
 ATOMIC_OP(andnot, bic)
 ATOMIC_OP(or, orr)
 ATOMIC_OP(xor, eor)
 
+#undef ATOMIC_OPS_RLX
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))                      \
 }                                                                      \
 __LL_SC_EXPORT(atomic64_##op);
 
-#define ATOMIC64_OP_RETURN(op, asm_op)                                 \
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)         \
 __LL_SC_INLINE long                                                    \
-__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))          \
+__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v))    \
 {                                                                      \
        long result;                                                    \
        unsigned long tmp;                                              \
                                                                        \
-       asm volatile("// atomic64_" #op "_return\n"                     \
+       asm volatile("// atomic64_" #op "_return" #name "\n"            \
 "      prfm    pstl1strm, %2\n"                                        \
-"1:    ldxr    %0, %2\n"                                               \
+"1:    ld" #acq "xr    %0, %2\n"                                       \
 "      " #asm_op "     %0, %0, %3\n"                                   \
-"      stlxr   %w1, %0, %2\n"                                          \
-"      cbnz    %w1, 1b"                                                \
+"      st" #rel "xr    %w1, %0, %2\n"                                  \
+"      cbnz    %w1, 1b\n"                                              \
+"      " #mb                                                           \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : "Ir" (i)                                                      \
-       : "memory");                                                    \
+       : cl);                                                          \
                                                                        \
-       smp_mb();                                                       \
        return result;                                                  \
 }                                                                      \
-__LL_SC_EXPORT(atomic64_##op##_return);
+__LL_SC_EXPORT(atomic64_##op##_return##name);
+
+#define ATOMIC64_OPS(...)                                              \
+       ATOMIC64_OP(__VA_ARGS__)                                        \
+       ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)
 
-#define ATOMIC64_OPS(op, asm_op)                                       \
-       ATOMIC64_OP(op, asm_op)                                         \
-       ATOMIC64_OP_RETURN(op, asm_op)
+#define ATOMIC64_OPS_RLX(...)                                          \
+       ATOMIC64_OPS(__VA_ARGS__)                                       \
+       ATOMIC64_OP_RETURN(_relaxed,,  ,  ,         , __VA_ARGS__)      \
+       ATOMIC64_OP_RETURN(_acquire,, a,  , "memory", __VA_ARGS__)      \
+       ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS_RLX(add, add)
+ATOMIC64_OPS_RLX(sub, sub)
 
 ATOMIC64_OP(and, and)
 ATOMIC64_OP(andnot, bic)
 ATOMIC64_OP(or, orr)
 ATOMIC64_OP(xor, eor)
 
+#undef ATOMIC64_OPS_RLX
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 }
 __LL_SC_EXPORT(atomic64_dec_if_positive);
 
-#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl)                       \
+#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl)                  \
 __LL_SC_INLINE unsigned long                                           \
 __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,               \
                                     unsigned long old,                 \
@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,            \
                                                                        \
        asm volatile(                                                   \
        "       prfm    pstl1strm, %[v]\n"                              \
-       "1:     ldxr" #sz "\t%" #w "[oldval], %[v]\n"                   \
+       "1:     ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n"           \
        "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
        "       cbnz    %" #w "[tmp], 2f\n"                             \
        "       st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"     \
@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,          \
 }                                                                      \
 __LL_SC_EXPORT(__cmpxchg_case_##name);
 
-__CMPXCHG_CASE(w, b,    1,        ,  ,         )
-__CMPXCHG_CASE(w, h,    2,        ,  ,         )
-__CMPXCHG_CASE(w,  ,    4,        ,  ,         )
-__CMPXCHG_CASE( ,  ,    8,        ,  ,         )
-__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory")
-__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory")
-__CMPXCHG_CASE(w,  , mb_4, dmb ish, l, "memory")
-__CMPXCHG_CASE( ,  , mb_8, dmb ish, l, "memory")
+__CMPXCHG_CASE(w, b,     1,        ,  ,  ,         )
+__CMPXCHG_CASE(w, h,     2,        ,  ,  ,         )
+__CMPXCHG_CASE(w,  ,     4,        ,  ,  ,         )
+__CMPXCHG_CASE( ,  ,     8,        ,  ,  ,         )
+__CMPXCHG_CASE(w, b, acq_1,        , a,  , "memory")
+__CMPXCHG_CASE(w, h, acq_2,        , a,  , "memory")
+__CMPXCHG_CASE(w,  , acq_4,        , a,  , "memory")
+__CMPXCHG_CASE( ,  , acq_8,        , a,  , "memory")
+__CMPXCHG_CASE(w, b, rel_1,        ,  , l, "memory")
+__CMPXCHG_CASE(w, h, rel_2,        ,  , l, "memory")
+__CMPXCHG_CASE(w,  , rel_4,        ,  , l, "memory")
+__CMPXCHG_CASE( ,  , rel_8,        ,  , l, "memory")
+__CMPXCHG_CASE(w, b,  mb_1, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, h,  mb_2, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w,  ,  mb_4, dmb ish,  , l, "memory")
+__CMPXCHG_CASE( ,  ,  mb_8, dmb ish,  , l, "memory")
 
 #undef __CMPXCHG_CASE
 
index 55d740e634596363f6cbfbdeacd77113a00a486e..1fce7908e6904a43791a385b5df76ef080ebefa2 100644 (file)
@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
        : "x30");
 }
 
-static inline int atomic_add_return(int i, atomic_t *v)
-{
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_OP_ADD_RETURN(name, mb, cl...)                          \
+static inline int atomic_add_return##name(int i, atomic_t *v)          \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC(add_return##name),                               \
+       /* LSE atomics */                                               \
+       "       ldadd" #mb "    %w[i], w30, %[v]\n"                     \
+       "       add     %w[i], %w[i], w30")                             \
+       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : "x30" , ##cl);                                                \
+                                                                       \
+       return w0;                                                      \
+}
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       "       nop\n"
-       __LL_SC_ATOMIC(add_return),
-       /* LSE atomics */
-       "       ldaddal %w[i], w30, %[v]\n"
-       "       add     %w[i], %w[i], w30")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : "x30", "memory");
+ATOMIC_OP_ADD_RETURN(_relaxed,   )
+ATOMIC_OP_ADD_RETURN(_acquire,  a, "memory")
+ATOMIC_OP_ADD_RETURN(_release,  l, "memory")
+ATOMIC_OP_ADD_RETURN(        , al, "memory")
 
-       return w0;
-}
+#undef ATOMIC_OP_ADD_RETURN
 
 static inline void atomic_and(int i, atomic_t *v)
 {
@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v)
        : "x30");
 }
 
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       "       nop\n"
-       __LL_SC_ATOMIC(sub_return)
-       "       nop",
-       /* LSE atomics */
-       "       neg     %w[i], %w[i]\n"
-       "       ldaddal %w[i], w30, %[v]\n"
-       "       add     %w[i], %w[i], w30")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : "x30", "memory");
-
-       return w0;
+#define ATOMIC_OP_SUB_RETURN(name, mb, cl...)                          \
+static inline int atomic_sub_return##name(int i, atomic_t *v)          \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC(sub_return##name)                                \
+       "       nop",                                                   \
+       /* LSE atomics */                                               \
+       "       neg     %w[i], %w[i]\n"                                 \
+       "       ldadd" #mb "    %w[i], w30, %[v]\n"                     \
+       "       add     %w[i], %w[i], w30")                             \
+       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : "x30" , ##cl);                                                \
+                                                                       \
+       return w0;                                                      \
 }
 
+ATOMIC_OP_SUB_RETURN(_relaxed,   )
+ATOMIC_OP_SUB_RETURN(_acquire,  a, "memory")
+ATOMIC_OP_SUB_RETURN(_release,  l, "memory")
+ATOMIC_OP_SUB_RETURN(        , al, "memory")
+
+#undef ATOMIC_OP_SUB_RETURN
 #undef __LL_SC_ATOMIC
 
 #define __LL_SC_ATOMIC64(op)   __LL_SC_CALL(atomic64_##op)
@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v)
        : "x30");
 }
 
-static inline long atomic64_add_return(long i, atomic64_t *v)
-{
-       register long x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)                                \
+static inline long atomic64_add_return##name(long i, atomic64_t *v)    \
+{                                                                      \
+       register long x0 asm ("x0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC64(add_return##name),                             \
+       /* LSE atomics */                                               \
+       "       ldadd" #mb "    %[i], x30, %[v]\n"                      \
+       "       add     %[i], %[i], x30")                               \
+       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : "x30" , ##cl);                                                \
+                                                                       \
+       return x0;                                                      \
+}
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       "       nop\n"
-       __LL_SC_ATOMIC64(add_return),
-       /* LSE atomics */
-       "       ldaddal %[i], x30, %[v]\n"
-       "       add     %[i], %[i], x30")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : "x30", "memory");
+ATOMIC64_OP_ADD_RETURN(_relaxed,   )
+ATOMIC64_OP_ADD_RETURN(_acquire,  a, "memory")
+ATOMIC64_OP_ADD_RETURN(_release,  l, "memory")
+ATOMIC64_OP_ADD_RETURN(        , al, "memory")
 
-       return x0;
-}
+#undef ATOMIC64_OP_ADD_RETURN
 
 static inline void atomic64_and(long i, atomic64_t *v)
 {
@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v)
        : "x30");
 }
 
-static inline long atomic64_sub_return(long i, atomic64_t *v)
-{
-       register long x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)                                \
+static inline long atomic64_sub_return##name(long i, atomic64_t *v)    \
+{                                                                      \
+       register long x0 asm ("x0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC64(sub_return##name)                              \
+       "       nop",                                                   \
+       /* LSE atomics */                                               \
+       "       neg     %[i], %[i]\n"                                   \
+       "       ldadd" #mb "    %[i], x30, %[v]\n"                      \
+       "       add     %[i], %[i], x30")                               \
+       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : "x30" , ##cl);                                                \
+                                                                       \
+       return x0;                                                      \
+}
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       "       nop\n"
-       __LL_SC_ATOMIC64(sub_return)
-       "       nop",
-       /* LSE atomics */
-       "       neg     %[i], %[i]\n"
-       "       ldaddal %[i], x30, %[v]\n"
-       "       add     %[i], %[i], x30")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : "x30", "memory");
+ATOMIC64_OP_SUB_RETURN(_relaxed,   )
+ATOMIC64_OP_SUB_RETURN(_acquire,  a, "memory")
+ATOMIC64_OP_SUB_RETURN(_release,  l, "memory")
+ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 
-       return x0;
-}
+#undef ATOMIC64_OP_SUB_RETURN
 
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,     \
        return x0;                                                      \
 }
 
-__CMPXCHG_CASE(w, b,    1,   )
-__CMPXCHG_CASE(w, h,    2,   )
-__CMPXCHG_CASE(w,  ,    4,   )
-__CMPXCHG_CASE(x,  ,    8,   )
-__CMPXCHG_CASE(w, b, mb_1, al, "memory")
-__CMPXCHG_CASE(w, h, mb_2, al, "memory")
-__CMPXCHG_CASE(w,  , mb_4, al, "memory")
-__CMPXCHG_CASE(x,  , mb_8, al, "memory")
+__CMPXCHG_CASE(w, b,     1,   )
+__CMPXCHG_CASE(w, h,     2,   )
+__CMPXCHG_CASE(w,  ,     4,   )
+__CMPXCHG_CASE(x,  ,     8,   )
+__CMPXCHG_CASE(w, b, acq_1,  a, "memory")
+__CMPXCHG_CASE(w, h, acq_2,  a, "memory")
+__CMPXCHG_CASE(w,  , acq_4,  a, "memory")
+__CMPXCHG_CASE(x,  , acq_8,  a, "memory")
+__CMPXCHG_CASE(w, b, rel_1,  l, "memory")
+__CMPXCHG_CASE(w, h, rel_2,  l, "memory")
+__CMPXCHG_CASE(w,  , rel_4,  l, "memory")
+__CMPXCHG_CASE(x,  , rel_8,  l, "memory")
+__CMPXCHG_CASE(w, b,  mb_1, al, "memory")
+__CMPXCHG_CASE(w, h,  mb_2, al, "memory")
+__CMPXCHG_CASE(w,  ,  mb_4, al, "memory")
+__CMPXCHG_CASE(x,  ,  mb_8, al, "memory")
 
 #undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
index bde449936e2f07fda4abf1e3c840fe9401620708..5082b30bc2c05fbf7b970141dd1d69800e7ed3b3 100644 (file)
@@ -18,7 +18,7 @@
 
 #include <asm/cachetype.h>
 
-#define L1_CACHE_SHIFT         6
+#define L1_CACHE_SHIFT         7
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
 /*
index c75b8d027eb1e657efaa2ec219e14af27c9fcfa9..54efedaf331fda55478d001d860d6137be5d08e8 100644 (file)
@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
+static inline void __local_flush_icache_all(void)
+{
+       asm("ic iallu");
+       dsb(nsh);
+       isb();
+}
+
 static inline void __flush_icache_all(void)
 {
        asm("ic ialluis");
index da2fc9e3cedd8960f5cb05b865e6507ba66a1199..f5588692f1d42429d6e245bd62204085aa7cee45 100644 (file)
@@ -34,8 +34,8 @@
 
 #define CTR_L1IP(ctr)  (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
-#define ICACHEF_ALIASING       BIT(0)
-#define ICACHEF_AIVIVT         BIT(1)
+#define ICACHEF_ALIASING       0
+#define ICACHEF_AIVIVT         1
 
 extern unsigned long __icache_flags;
 
index 899e9f1d19e486defa413d087f6518ef38d35e29..9ea611ea69df739009d0a6d432bbbedcab05284b 100644 (file)
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
-{
-       unsigned long ret, tmp;
-
-       switch (size) {
-       case 1:
-               asm volatile(ARM64_LSE_ATOMIC_INSN(
-               /* LL/SC */
-               "       prfm    pstl1strm, %2\n"
-               "1:     ldxrb   %w0, %2\n"
-               "       stlxrb  %w1, %w3, %2\n"
-               "       cbnz    %w1, 1b\n"
-               "       dmb     ish",
-               /* LSE atomics */
-               "       nop\n"
-               "       nop\n"
-               "       swpalb  %w3, %w0, %2\n"
-               "       nop\n"
-               "       nop")
-                       : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
-                       : "r" (x)
-                       : "memory");
-               break;
-       case 2:
-               asm volatile(ARM64_LSE_ATOMIC_INSN(
-               /* LL/SC */
-               "       prfm    pstl1strm, %2\n"
-               "1:     ldxrh   %w0, %2\n"
-               "       stlxrh  %w1, %w3, %2\n"
-               "       cbnz    %w1, 1b\n"
-               "       dmb     ish",
-               /* LSE atomics */
-               "       nop\n"
-               "       nop\n"
-               "       swpalh  %w3, %w0, %2\n"
-               "       nop\n"
-               "       nop")
-                       : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
-                       : "r" (x)
-                       : "memory");
-               break;
-       case 4:
-               asm volatile(ARM64_LSE_ATOMIC_INSN(
-               /* LL/SC */
-               "       prfm    pstl1strm, %2\n"
-               "1:     ldxr    %w0, %2\n"
-               "       stlxr   %w1, %w3, %2\n"
-               "       cbnz    %w1, 1b\n"
-               "       dmb     ish",
-               /* LSE atomics */
-               "       nop\n"
-               "       nop\n"
-               "       swpal   %w3, %w0, %2\n"
-               "       nop\n"
-               "       nop")
-                       : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
-                       : "r" (x)
-                       : "memory");
-               break;
-       case 8:
-               asm volatile(ARM64_LSE_ATOMIC_INSN(
-               /* LL/SC */
-               "       prfm    pstl1strm, %2\n"
-               "1:     ldxr    %0, %2\n"
-               "       stlxr   %w1, %3, %2\n"
-               "       cbnz    %w1, 1b\n"
-               "       dmb     ish",
-               /* LSE atomics */
-               "       nop\n"
-               "       nop\n"
-               "       swpal   %3, %0, %2\n"
-               "       nop\n"
-               "       nop")
-                       : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
-                       : "r" (x)
-                       : "memory");
-               break;
-       default:
-               BUILD_BUG();
-       }
-
-       return ret;
+/*
+ * We need separate acquire parameters for ll/sc and lse, since the full
+ * barrier case is generated as release+dmb for the former and
+ * acquire+release for the latter.
+ */
+#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl)   \
+static inline unsigned long __xchg_case_##name(unsigned long x,                \
+                                              volatile void *ptr)      \
+{                                                                      \
+       unsigned long ret, tmp;                                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       prfm    pstl1strm, %2\n"                                \
+       "1:     ld" #acq "xr" #sz "\t%" #w "0, %2\n"                    \
+       "       st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n"               \
+       "       cbnz    %w1, 1b\n"                                      \
+       "       " #mb,                                                  \
+       /* LSE atomics */                                               \
+       "       nop\n"                                                  \
+       "       nop\n"                                                  \
+       "       swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"     \
+       "       nop\n"                                                  \
+       "       " #nop_lse)                                             \
+       : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)                   \
+       : "r" (x)                                                       \
+       : cl);                                                          \
+                                                                       \
+       return ret;                                                     \
 }
 
-#define xchg(ptr,x) \
-({ \
-       __typeof__(*(ptr)) __ret; \
-       __ret = (__typeof__(*(ptr))) \
-               __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
-       __ret; \
+__XCHG_CASE(w, b,     1,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     2,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     4,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_1,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_2,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_4,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_1,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_2,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_4,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_1, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_2, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_4, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_8, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
+#define __XCHG_GEN(sfx)                                                        \
+static inline unsigned long __xchg##sfx(unsigned long x,               \
+                                       volatile void *ptr,             \
+                                       int size)                       \
+{                                                                      \
+       switch (size) {                                                 \
+       case 1:                                                         \
+               return __xchg_case##sfx##_1(x, ptr);                    \
+       case 2:                                                         \
+               return __xchg_case##sfx##_2(x, ptr);                    \
+       case 4:                                                         \
+               return __xchg_case##sfx##_4(x, ptr);                    \
+       case 8:                                                         \
+               return __xchg_case##sfx##_8(x, ptr);                    \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+                                                                       \
+       unreachable();                                                  \
+}
+
+__XCHG_GEN()
+__XCHG_GEN(_acq)
+__XCHG_GEN(_rel)
+__XCHG_GEN(_mb)
+
+#undef __XCHG_GEN
+
+#define __xchg_wrapper(sfx, ptr, x)                                    \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       __ret = (__typeof__(*(ptr)))                                    \
+               __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+       __ret;                                                          \
 })
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-                                     unsigned long new, int size)
-{
-       switch (size) {
-       case 1:
-               return __cmpxchg_case_1(ptr, (u8)old, new);
-       case 2:
-               return __cmpxchg_case_2(ptr, (u16)old, new);
-       case 4:
-               return __cmpxchg_case_4(ptr, old, new);
-       case 8:
-               return __cmpxchg_case_8(ptr, old, new);
-       default:
-               BUILD_BUG();
-       }
-
-       unreachable();
+/* xchg */
+#define xchg_relaxed(...)      __xchg_wrapper(    , __VA_ARGS__)
+#define xchg_acquire(...)      __xchg_wrapper(_acq, __VA_ARGS__)
+#define xchg_release(...)      __xchg_wrapper(_rel, __VA_ARGS__)
+#define xchg(...)              __xchg_wrapper( _mb, __VA_ARGS__)
+
+#define __CMPXCHG_GEN(sfx)                                             \
+static inline unsigned long __cmpxchg##sfx(volatile void *ptr,         \
+                                          unsigned long old,           \
+                                          unsigned long new,           \
+                                          int size)                    \
+{                                                                      \
+       switch (size) {                                                 \
+       case 1:                                                         \
+               return __cmpxchg_case##sfx##_1(ptr, (u8)old, new);      \
+       case 2:                                                         \
+               return __cmpxchg_case##sfx##_2(ptr, (u16)old, new);     \
+       case 4:                                                         \
+               return __cmpxchg_case##sfx##_4(ptr, old, new);          \
+       case 8:                                                         \
+               return __cmpxchg_case##sfx##_8(ptr, old, new);          \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+                                                                       \
+       unreachable();                                                  \
 }
 
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
-                                        unsigned long new, int size)
-{
-       switch (size) {
-       case 1:
-               return __cmpxchg_case_mb_1(ptr, (u8)old, new);
-       case 2:
-               return __cmpxchg_case_mb_2(ptr, (u16)old, new);
-       case 4:
-               return __cmpxchg_case_mb_4(ptr, old, new);
-       case 8:
-               return __cmpxchg_case_mb_8(ptr, old, new);
-       default:
-               BUILD_BUG();
-       }
-
-       unreachable();
-}
+__CMPXCHG_GEN()
+__CMPXCHG_GEN(_acq)
+__CMPXCHG_GEN(_rel)
+__CMPXCHG_GEN(_mb)
 
-#define cmpxchg(ptr, o, n) \
-({ \
-       __typeof__(*(ptr)) __ret; \
-       __ret = (__typeof__(*(ptr))) \
-               __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
-                            sizeof(*(ptr))); \
-       __ret; \
-})
+#undef __CMPXCHG_GEN
 
-#define cmpxchg_local(ptr, o, n) \
-({ \
-       __typeof__(*(ptr)) __ret; \
-       __ret = (__typeof__(*(ptr))) \
-               __cmpxchg((ptr), (unsigned long)(o), \
-                         (unsigned long)(n), sizeof(*(ptr))); \
-       __ret; \
+#define __cmpxchg_wrapper(sfx, ptr, o, n)                              \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       __ret = (__typeof__(*(ptr)))                                    \
+               __cmpxchg##sfx((ptr), (unsigned long)(o),               \
+                               (unsigned long)(n), sizeof(*(ptr)));    \
+       __ret;                                                          \
 })
 
+/* cmpxchg */
+#define cmpxchg_relaxed(...)   __cmpxchg_wrapper(    , __VA_ARGS__)
+#define cmpxchg_acquire(...)   __cmpxchg_wrapper(_acq, __VA_ARGS__)
+#define cmpxchg_release(...)   __cmpxchg_wrapper(_rel, __VA_ARGS__)
+#define cmpxchg(...)           __cmpxchg_wrapper( _mb, __VA_ARGS__)
+#define cmpxchg_local          cmpxchg_relaxed
+
+/* cmpxchg64 */
+#define cmpxchg64_relaxed      cmpxchg_relaxed
+#define cmpxchg64_acquire      cmpxchg_acquire
+#define cmpxchg64_release      cmpxchg_release
+#define cmpxchg64              cmpxchg
+#define cmpxchg64_local                cmpxchg_local
+
+/* cmpxchg_double */
 #define system_has_cmpxchg_double()     1
 
 #define __cmpxchg_double_check(ptr1, ptr2)                                     \
@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
        __ret; \
 })
 
+/* this_cpu_cmpxchg */
 #define _protect_cmpxchg_local(pcp, o, n)                      \
 ({                                                             \
        typeof(*raw_cpu_ptr(&(pcp))) __ret;                     \
@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
        __ret;                                                          \
 })
 
-#define cmpxchg64(ptr,o,n)             cmpxchg((ptr),(o),(n))
-#define cmpxchg64_local(ptr,o,n)       cmpxchg_local((ptr),(o),(n))
-
-#define cmpxchg64_relaxed(ptr,o,n)     cmpxchg_local((ptr),(o),(n))
-
 #endif /* __ASM_CMPXCHG_H */
index 8e797b2fcc0186b6f5f505303b51fbea0eff2e94..b5e9cee4b5f81a3498a67b934dc9157e2d4cf45b 100644 (file)
@@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 void cpuinfo_store_cpu(void);
 void __init cpuinfo_store_boot_cpu(void);
 
+void __init init_cpu_features(struct cpuinfo_arm64 *info);
+void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+                                struct cpuinfo_arm64 *boot);
+
 #endif /* __ASM_CPU_H */
index dbc78d2b8cc6bbd43af2bf4781b6fffaa54a4fe1..11d5bb0fdd541052b9cdd49197b2dc90e9ece737 100644 (file)
@@ -10,6 +10,7 @@
 #define __ASM_CPUFEATURE_H
 
 #include <asm/hwcap.h>
+#include <asm/sysreg.h>
 
 /*
  * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
 
 #include <linux/kernel.h>
 
+/* CPU feature register tracking */
+enum ftr_type {
+       FTR_EXACT,      /* Use a predefined safe value */
+       FTR_LOWER_SAFE, /* Smaller value is safe */
+       FTR_HIGHER_SAFE,/* Bigger value is safe */
+};
+
+#define FTR_STRICT     true    /* SANITY check strict matching required */
+#define FTR_NONSTRICT  false   /* SANITY check ignored */
+
+struct arm64_ftr_bits {
+       bool            strict;   /* CPU Sanity check: strict matching required ? */
+       enum ftr_type   type;
+       u8              shift;
+       u8              width;
+       s64             safe_val; /* safe value for discrete features */
+};
+
+/*
+ * @arm64_ftr_reg - Feature register
+ * @strict_mask                Bits which should match across all CPUs for sanity.
+ * @sys_val            Safe value across the CPUs (system view)
+ */
+struct arm64_ftr_reg {
+       u32                     sys_id;
+       const char              *name;
+       u64                     strict_mask;
+       u64                     sys_val;
+       struct arm64_ftr_bits   *ftr_bits;
+};
+
 struct arm64_cpu_capabilities {
        const char *desc;
        u16 capability;
        bool (*matches)(const struct arm64_cpu_capabilities *);
-       void (*enable)(void);
+       void (*enable)(void *);         /* Called on all active CPUs */
        union {
                struct {        /* To be used for erratum handling only */
                        u32 midr_model;
@@ -47,8 +79,11 @@ struct arm64_cpu_capabilities {
                };
 
                struct {        /* Feature register checking */
+                       u32 sys_reg;
                        int field_pos;
                        int min_field_value;
+                       int hwcap_type;
+                       unsigned long hwcap;
                };
        };
 };
@@ -76,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num)
                __set_bit(num, cpu_hwcaps);
 }
 
-static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
-                                                                 int field)
+static inline int __attribute_const__
+cpuid_feature_extract_field_width(u64 features, int field, int width)
+{
+       return (s64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field)
+{
+       return cpuid_feature_extract_field_width(features, field, 4);
+}
+
+static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
+{
+       return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
+}
+
+static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
+{
+       return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
+}
+
+static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 {
-       return (s64)(features << (64 - 4 - field)) >> (64 - 4);
+       return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+               cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
 }
 
+void __init setup_cpu_features(void);
 
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
                            const char *info);
 void check_local_cpu_errata(void);
-void check_local_cpu_features(void);
-bool cpu_supports_mixed_endian_el0(void);
-bool system_supports_mixed_endian_el0(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void verify_local_cpu_capabilities(void);
+#else
+static inline void verify_local_cpu_capabilities(void)
+{
+}
+#endif
+
+u64 read_system_reg(u32 id);
+
+static inline bool cpu_supports_mixed_endian_el0(void)
+{
+       return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+static inline bool system_supports_mixed_endian_el0(void)
+{
+       return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
+}
 
 #endif /* __ASSEMBLY__ */
 
index 100a3d1b17c854d6c1a2c465b56ad8f101ab5efa..1a5949364ed0f43eee2be4b61c3497fe4fdbbb7b 100644 (file)
 
 #define CAVIUM_CPU_PART_THUNDERX       0x0A1
 
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT   16
-#define ID_AA64MMFR0_BIGENDEL0_MASK    (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGENDEL0(mmfr0)  \
-       (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGEND_SHIFT      8
-#define ID_AA64MMFR0_BIGEND_MASK       (0xf << ID_AA64MMFR0_BIGEND_SHIFT)
-#define ID_AA64MMFR0_BIGEND(mmfr0)     \
-       (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
-
 #ifndef __ASSEMBLY__
 
 /*
@@ -115,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
        return read_cpuid(CTR_EL0);
 }
-
-static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
-{
-       return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
-               (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
-}
 #endif /* __ASSEMBLY__ */
 
 #endif
index 8b9884c726adc78f37eb59e7288298267bec55b4..309704544d22763d6348095814fbdce935172c1b 100644 (file)
@@ -17,6 +17,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
+#include <linux/sizes.h>
 #include <asm/boot.h>
 #include <asm/page.h>
 
@@ -55,11 +56,7 @@ enum fixed_addresses {
         * Temporary boot-time mappings, used by early_ioremap(),
         * before ioremap() is functional.
         */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define NR_FIX_BTMAPS          4
-#else
-#define NR_FIX_BTMAPS          64
-#endif
+#define NR_FIX_BTMAPS          (SZ_256K / PAGE_SIZE)
 #define FIX_BTMAPS_SLOTS       7
 #define TOTAL_FIX_BTMAPS       (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
 
index 4c47cb2fbb526f7ae445e4f1b46178ba99934283..e54415ec693571d1d4195d57b1928e2e58173353 100644 (file)
@@ -17,6 +17,7 @@
 #define __ASM_HW_BREAKPOINT_H
 
 #include <asm/cputype.h>
+#include <asm/cpufeature.h>
 
 #ifdef __KERNEL__
 
@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp;
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
-       return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+       return 1 +
+               cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+                                               ID_AA64DFR0_BRPS_SHIFT);
 }
 
 /* Determine number of WRP registers available. */
 static inline int get_num_wrps(void)
 {
-       return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+       return 1 +
+               cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+                                               ID_AA64DFR0_WRPS_SHIFT);
 }
 
 #endif /* __KERNEL__ */
index 0ad735166d9fa7303eaa961d6f07d833c95b84ff..400b80b49595dc147fb5a2110ff347405f58bd0b 100644 (file)
 extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
 #endif
 
+enum {
+       CAP_HWCAP = 1,
+#ifdef CONFIG_COMPAT
+       CAP_COMPAT_HWCAP,
+       CAP_COMPAT_HWCAP2,
+#endif
+};
+
 extern unsigned long elf_hwcap;
 #endif
 #endif
index bbb251b14746cb7005d1be35d50fdfb453464870..09169296c3cc4c235d10a0b040c68938eb4a4c80 100644 (file)
@@ -7,7 +7,6 @@
 
 struct pt_regs;
 
-extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline void acpi_irq_init(void)
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
new file mode 100644 (file)
index 0000000..2774fa3
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_KASAN
+
+#include <linux/linkage.h>
+#include <asm/memory.h>
+
+/*
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
+ */
+#define KASAN_SHADOW_START      (VA_START)
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+
+/*
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ *     shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ *
+ * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
+ * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
+ * should satisfy the following equation:
+ *      KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
+ */
+#define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
new file mode 100644 (file)
index 0000000..a459714
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Kernel page table mapping
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_KERNEL_PGTABLE_H
+#define __ASM_KERNEL_PGTABLE_H
+
+
+/*
+ * The linear mapping and the start of memory are both 2M aligned (per
+ * the arm64 booting.txt requirements). Hence we can use section mapping
+ * with 4K (section size = 2M) but not with 16K (section size = 32M) or
+ * 64K (section size = 512M).
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define ARM64_SWAPPER_USES_SECTION_MAPS 1
+#else
+#define ARM64_SWAPPER_USES_SECTION_MAPS 0
+#endif
+
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
+ * map the kernel. With the 64K page configuration, swapper and idmap need to
+ * map to pte level. The swapper also maps the FDT (see __create_page_tables
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so pages required to map highest possible PA are reserved in all
+ * cases.
+ */
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
+#define IDMAP_PGTABLE_LEVELS   (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
+#else
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
+#define IDMAP_PGTABLE_LEVELS   (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
+#endif
+
+#define SWAPPER_DIR_SIZE       (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
+#define IDMAP_DIR_SIZE         (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+
+/* Initial memory map size */
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_BLOCK_SHIFT    SECTION_SHIFT
+#define SWAPPER_BLOCK_SIZE     SECTION_SIZE
+#define SWAPPER_TABLE_SHIFT    PUD_SHIFT
+#else
+#define SWAPPER_BLOCK_SHIFT    PAGE_SHIFT
+#define SWAPPER_BLOCK_SIZE     PAGE_SIZE
+#define SWAPPER_TABLE_SHIFT    PMD_SHIFT
+#endif
+
+/* The size of the initial kernel direct mapping */
+#define SWAPPER_INIT_MAP_SIZE  (_AC(1, UL) << SWAPPER_TABLE_SHIFT)
+
+/*
+ * Initial memory map attributes.
+ */
+#define SWAPPER_PTE_FLAGS      (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define SWAPPER_PMD_FLAGS      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_MM_MMUFLAGS    (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#else
+#define SWAPPER_MM_MMUFLAGS    (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#endif
+
+
+#endif /* __ASM_KERNEL_PGTABLE_H */
index 67027c611dbd51e66b72492943394ff317c3998a..853953cd1f0813fd562b68b7cdddd95582f1e392 100644 (file)
  * PAGE_OFFSET - the virtual address of the start of the kernel image (top
  *              (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
+ * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  * The module space lives between the addresses given by TASK_SIZE
  * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS                        (CONFIG_ARM64_VA_BITS)
+#define VA_START               (UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET            (UL(0xffffffffffffffff) << (VA_BITS - 1))
 #define MODULES_END            (PAGE_OFFSET)
 #define MODULES_VADDR          (MODULES_END - SZ_64M)
 
 #define TASK_UNMAPPED_BASE     (PAGE_ALIGN(TASK_SIZE / 4))
 
-#if TASK_SIZE_64 > MODULES_VADDR
-#error Top of 64-bit user space clashes with start of module space
-#endif
-
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
index 030208767185bd56edce4b01e9d5aa91c08c3058..990124a67eebd4b10a19ae9509cfd5b6d9ac1712 100644 (file)
 #define __ASM_MMU_H
 
 typedef struct {
-       unsigned int id;
-       raw_spinlock_t id_lock;
-       void *vdso;
+       atomic64_t      id;
+       void            *vdso;
 } mm_context_t;
 
-#define INIT_MM_CONTEXT(name) \
-       .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
-
-#define ASID(mm)       ((mm)->context.id & 0xffff)
+/*
+ * This macro is only used by the TLBI code, which cannot race with an
+ * ASID change and therefore doesn't need to reload the counter using
+ * atomic64_read.
+ */
+#define ASID(mm)       ((mm)->context.id.counter & 0xffff)
 
 extern void paging_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
index 8ec41e5f56f081b9d65d2ed58769a0f22060716d..c0e87898ba96b04f2e5b847a0dacf01f4eb1dff1 100644 (file)
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
 
-#define MAX_ASID_BITS  16
-
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
 {
@@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void)
                unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
 }
 
-static inline void __cpu_set_tcr_t0sz(u64 t0sz)
-{
-       unsigned long tcr;
-
-       if (__cpu_uses_extended_idmap())
-               asm volatile (
-               "       mrs     %0, tcr_el1     ;"
-               "       bfi     %0, %1, %2, %3  ;"
-               "       msr     tcr_el1, %0     ;"
-               "       isb"
-               : "=&r" (tcr)
-               : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
-}
-
-/*
- * Set TCR.T0SZ to the value appropriate for activating the identity map.
- */
-static inline void cpu_set_idmap_tcr_t0sz(void)
-{
-       __cpu_set_tcr_t0sz(idmap_t0sz);
-}
-
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
 static inline void cpu_set_default_tcr_t0sz(void)
 {
-       __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
-}
-
-static inline void switch_new_context(struct mm_struct *mm)
-{
-       unsigned long flags;
-
-       __new_context(mm);
+       unsigned long tcr;
 
-       local_irq_save(flags);
-       cpu_switch_mm(mm->pgd, mm);
-       local_irq_restore(flags);
-}
+       if (!__cpu_uses_extended_idmap())
+               return;
 
-static inline void check_and_switch_context(struct mm_struct *mm,
-                                           struct task_struct *tsk)
-{
-       /*
-        * Required during context switch to avoid speculative page table
-        * walking with the wrong TTBR.
-        */
-       cpu_set_reserved_ttbr0();
-
-       if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
-               /*
-                * The ASID is from the current generation, just switch to the
-                * new pgd. This condition is only true for calls from
-                * context_switch() and interrupts are already disabled.
-                */
-               cpu_switch_mm(mm->pgd, mm);
-       else if (irqs_disabled())
-               /*
-                * Defer the new ASID allocation until after the context
-                * switch critical region since __new_context() cannot be
-                * called with interrupts disabled.
-                */
-               set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
-       else
-               /*
-                * That is a direct call to switch_mm() or activate_mm() with
-                * interrupts enabled and a new context.
-                */
-               switch_new_context(mm);
+       asm volatile (
+       "       mrs     %0, tcr_el1     ;"
+       "       bfi     %0, %1, %2, %3  ;"
+       "       msr     tcr_el1, %0     ;"
+       "       isb"
+       : "=&r" (tcr)
+       : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
-#define init_new_context(tsk,mm)       (__init_new_context(tsk,mm),0)
+/*
+ * It would be nice to return ASIDs back to the allocator, but unfortunately
+ * that introduces a race with a generation rollover where we could erroneously
+ * free an ASID allocated in a future generation. We could workaround this by
+ * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
+ * but we'd then need to make sure that we didn't dirty any TLBs afterwards.
+ * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
+ * take CPU migration into account.
+ */
 #define destroy_context(mm)            do { } while(0)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
 
-#define finish_arch_post_lock_switch \
-       finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
-{
-       if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-               struct mm_struct *mm = current->mm;
-               unsigned long flags;
-
-               __new_context(mm);
-
-               local_irq_save(flags);
-               cpu_switch_mm(mm->pgd, mm);
-               local_irq_restore(flags);
-       }
-}
+#define init_new_context(tsk,mm)       ({ atomic64_set(&mm->context.id, 0); 0; })
 
 /*
  * This is called when "tsk" is about to enter lazy TLB mode.
@@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
        unsigned int cpu = smp_processor_id();
 
+       if (prev == next)
+               return;
+
        /*
         * init_mm.pgd does not contain any user mappings and it is always
         * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
                return;
        }
 
-       if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
-               check_and_switch_context(next, tsk);
+       check_and_switch_context(next, cpu);
 }
 
 #define deactivate_mm(tsk,mm)  do { } while (0)
index 7d9c7e4a424bddd87dfdbd34e2be0d05b2078f58..9b2f5a9d019df493fa6021ee3ca6b4779401d8c4 100644 (file)
 #define __ASM_PAGE_H
 
 /* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define PAGE_SHIFT             16
+#define CONT_SHIFT             5
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define PAGE_SHIFT             14
+#define CONT_SHIFT             7
 #else
 #define PAGE_SHIFT             12
+#define CONT_SHIFT             4
 #endif
-#define PAGE_SIZE              (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_SIZE              (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK              (~(PAGE_SIZE-1))
 
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so 3 pages are reserved in all cases.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
-#else
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
-#endif
-
-#define SWAPPER_DIR_SIZE       (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE         (3 * PAGE_SIZE)
+#define CONT_SIZE              (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK              (~(CONT_SIZE-1))
 
 #ifndef __ASSEMBLY__
 
index 76420568d66a463d7ba74fdf8a74ba916dd1dbd8..c15053902942e0a3a34ba4882545c5b6d163c23c 100644 (file)
@@ -27,6 +27,7 @@
 #define check_pgt_cache()              do { } while (0)
 
 #define PGALLOC_GFP    (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGD_SIZE       (PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
index 24154b055835b05469903bb38d8d2f5ddea5f348..d6739e836f7bb919a7e49b7e14fc43d2454f46de 100644 (file)
 #ifndef __ASM_PGTABLE_HWDEF_H
 #define __ASM_PGTABLE_HWDEF_H
 
+/*
+ * Number of page-table levels required to address 'va_bits' wide
+ * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
+ * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ *
+ *  levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ *
+ * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
+ *
+ * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
+ * due to build issues. So we open code DIV_ROUND_UP here:
+ *
+ *     ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ *
+ * which gets simplified as :
+ */
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+
+/*
+ * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * in the final page. The maximum number of translation levels supported by
+ * the architecture is 4. Hence, starting at at level n, we have further
+ * ((4 - n) - 1) levels of translation excluding the offset within the page.
+ * So, the total number of bits mapped by an entry at level n is :
+ *
+ *  ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ *
+ * Rearranging it a bit we get :
+ *   (4 - n) * (PAGE_SHIFT - 3) + 3
+ */
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n)        ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+
 #define PTRS_PER_PTE           (1 << (PAGE_SHIFT - 3))
 
 /*
  * PMD_SHIFT determines the size a level 2 page table entry can map.
  */
 #if CONFIG_PGTABLE_LEVELS > 2
-#define PMD_SHIFT              ((PAGE_SHIFT - 3) * 2 + 3)
+#define PMD_SHIFT              ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
 #define PMD_SIZE               (_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK               (~(PMD_SIZE-1))
 #define PTRS_PER_PMD           PTRS_PER_PTE
@@ -32,7 +65,7 @@
  * PUD_SHIFT determines the size a level 1 page table entry can map.
  */
 #if CONFIG_PGTABLE_LEVELS > 3
-#define PUD_SHIFT              ((PAGE_SHIFT - 3) * 3 + 3)
+#define PUD_SHIFT              ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
 #define PUD_SIZE               (_AC(1, UL) << PUD_SHIFT)
 #define PUD_MASK               (~(PUD_SIZE-1))
 #define PTRS_PER_PUD           PTRS_PER_PTE
@@ -42,7 +75,7 @@
  * PGDIR_SHIFT determines the size a top-level page table entry can map
  * (depending on the configuration, this level can be 0, 1 or 2).
  */
-#define PGDIR_SHIFT            ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
+#define PGDIR_SHIFT            ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE             (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK             (~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD           (1 << (VA_BITS - PGDIR_SHIFT))
 #define SECTION_SIZE           (_AC(1, UL) << SECTION_SHIFT)
 #define SECTION_MASK           (~(SECTION_SIZE-1))
 
+/*
+ * Contiguous page definitions.
+ */
+#define CONT_PTES              (_AC(1, UL) << CONT_SHIFT)
+/* the the numerical offset of the PTE within a range of CONT_PTES */
+#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
+
 /*
  * Hardware page table definitions.
  *
 #define PMD_SECT_S             (_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF            (_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_NG            (_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_CONT          (_AT(pmdval_t, 1) << 52)
 #define PMD_SECT_PXN           (_AT(pmdval_t, 1) << 53)
 #define PMD_SECT_UXN           (_AT(pmdval_t, 1) << 54)
 
 #define PTE_AF                 (_AT(pteval_t, 1) << 10)        /* Access Flag */
 #define PTE_NG                 (_AT(pteval_t, 1) << 11)        /* nG */
 #define PTE_DBM                        (_AT(pteval_t, 1) << 51)        /* Dirty Bit Management */
+#define PTE_CONT               (_AT(pteval_t, 1) << 52)        /* Contiguous range */
 #define PTE_PXN                        (_AT(pteval_t, 1) << 53)        /* Privileged XN */
 #define PTE_UXN                        (_AT(pteval_t, 1) << 54)        /* User XN */
 
index 571ca0ed4f0565378131b25f317ef8b2f819c3b2..f3acf421ded4f55616abd7b68a7dcf83081e6f38 100644 (file)
  *     fixed mappings and modules
  */
 #define VMEMMAP_SIZE           ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-#define VMALLOC_START          (UL(0xffffffffffffffff) << VA_BITS)
+
+#ifndef CONFIG_KASAN
+#define VMALLOC_START          (VA_START)
+#else
+#include <asm/kasan.h>
+#define VMALLOC_START          (KASAN_SHADOW_END + SZ_64K)
+#endif
+
 #define VMALLOC_END            (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define vmemmap                        ((struct page *)(VMALLOC_END + SZ_64K))
@@ -74,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
 #define PAGE_HYP               __pgprot(_PAGE_DEFAULT | PTE_HYP)
 #define PAGE_HYP_DEVICE                __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
@@ -142,6 +150,7 @@ extern struct page *empty_zero_page;
 #define pte_special(pte)       (!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)         (!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)          (!(pte_val(pte) & PTE_UXN))
+#define pte_cont(pte)          (!!(pte_val(pte) & PTE_CONT))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)      (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -204,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte)
        return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
 }
 
+static inline pte_t pte_mkcont(pte_t pte)
+{
+       return set_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
+static inline pte_t pte_mknoncont(pte_t pte)
+{
+       return clear_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
        *ptep = pte;
@@ -648,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
                                    unsigned long addr, pte_t *ptep)
 {
        /*
-        * set_pte() does not have a DSB for user mappings, so make sure that
-        * the page table write is visible.
+        * We don't do anything here, so there's a very small chance of
+        * us retaking a user fault which we just fixed up. The alternative
+        * is doing a dsb(ishst), but that penalises the fastpath.
         */
-       dsb(ishst);
 }
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
+#define kc_vaddr_to_offset(v)  ((v) & ~VA_START)
+#define kc_offset_to_vaddr(o)  ((o) | VA_START)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
deleted file mode 100644 (file)
index b7710a5..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Based on arch/arm/include/asm/pmu.h
- *
- * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PMU_H
-#define __ASM_PMU_H
-
-#ifdef CONFIG_HW_PERF_EVENTS
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
-       /*
-        * The events that are active on the PMU for the given index.
-        */
-       struct perf_event       **events;
-
-       /*
-        * A 1 bit for an index indicates that the counter is being used for
-        * an event. A 0 means that the counter can be used.
-        */
-       unsigned long           *used_mask;
-
-       /*
-        * Hardware lock to serialize accesses to PMU registers. Needed for the
-        * read/modify/write sequences.
-        */
-       raw_spinlock_t          pmu_lock;
-};
-
-struct arm_pmu {
-       struct pmu              pmu;
-       cpumask_t               active_irqs;
-       int                     *irq_affinity;
-       const char              *name;
-       irqreturn_t             (*handle_irq)(int irq_num, void *dev);
-       void                    (*enable)(struct hw_perf_event *evt, int idx);
-       void                    (*disable)(struct hw_perf_event *evt, int idx);
-       int                     (*get_event_idx)(struct pmu_hw_events *hw_events,
-                                                struct hw_perf_event *hwc);
-       int                     (*set_event_filter)(struct hw_perf_event *evt,
-                                                   struct perf_event_attr *attr);
-       u32                     (*read_counter)(int idx);
-       void                    (*write_counter)(int idx, u32 val);
-       void                    (*start)(void);
-       void                    (*stop)(void);
-       void                    (*reset)(void *);
-       int                     (*map_event)(struct perf_event *event);
-       int                     num_events;
-       atomic_t                active_events;
-       struct mutex            reserve_mutex;
-       u64                     max_period;
-       struct platform_device  *plat_device;
-       struct pmu_hw_events    *(*get_hw_events)(void);
-};
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
-
-u64 armpmu_event_update(struct perf_event *event,
-                       struct hw_perf_event *hwc,
-                       int idx);
-
-int armpmu_event_set_period(struct perf_event *event,
-                           struct hw_perf_event *hwc,
-                           int idx);
-
-#endif /* CONFIG_HW_PERF_EVENTS */
-#endif /* __ASM_PMU_H */
index 98f32355dc972817eadbe3809ef92aaae9f9cfba..4acb7ca94fcd9c05569f3103ab09097c19ea72d5 100644 (file)
@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x)
 
 #endif
 
-void cpu_enable_pan(void);
+void cpu_enable_pan(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
index 536274ed292ea6c40935ab56497ae5bde6d40729..e9e5467e0bf4523a3de2ceb7fc11cdb0f81e8154 100644 (file)
 #define compat_sp      regs[13]
 #define compat_lr      regs[14]
 #define compat_sp_hyp  regs[15]
-#define compat_sp_irq  regs[16]
-#define compat_lr_irq  regs[17]
-#define compat_sp_svc  regs[18]
-#define compat_lr_svc  regs[19]
-#define compat_sp_abt  regs[20]
-#define compat_lr_abt  regs[21]
-#define compat_sp_und  regs[22]
-#define compat_lr_und  regs[23]
+#define compat_lr_irq  regs[16]
+#define compat_sp_irq  regs[17]
+#define compat_lr_svc  regs[18]
+#define compat_sp_svc  regs[19]
+#define compat_lr_abt  regs[20]
+#define compat_sp_abt  regs[21]
+#define compat_lr_und  regs[22]
+#define compat_sp_und  regs[23]
 #define compat_r8_fiq  regs[24]
 #define compat_r9_fiq  regs[25]
 #define compat_r10_fiq regs[26]
index 64d2d4884a9db1e663b1d025eca2a8db3bbdcce4..2eb714c4639f5669b1012aae0c77ca74fdb72881 100644 (file)
@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *, const void *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *, int, __kernel_size_t);
+extern void *__memset(void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *, const void *, size_t);
 
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+#endif
+
 #endif
index a7f3d4b2514d615e191c86feb3da5da9351f173f..d48ab5b41f521c23819c0b3927a9ea0f73db242c 100644 (file)
@@ -22,9 +22,6 @@
 
 #include <asm/opcodes.h>
 
-#define SCTLR_EL1_CP15BEN      (0x1 << 5)
-#define SCTLR_EL1_SED          (0x1 << 8)
-
 /*
  * ARMv8 ARM reserves the following encoding for system registers:
  * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
 #define sys_reg(op0, op1, crn, crm, op2) \
        ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
 
-#define REG_PSTATE_PAN_IMM                     sys_reg(0, 0, 4, 0, 4)
-#define SCTLR_EL1_SPAN                         (1 << 23)
+#define SYS_MIDR_EL1                   sys_reg(3, 0, 0, 0, 0)
+#define SYS_MPIDR_EL1                  sys_reg(3, 0, 0, 0, 5)
+#define SYS_REVIDR_EL1                 sys_reg(3, 0, 0, 0, 6)
+
+#define SYS_ID_PFR0_EL1                        sys_reg(3, 0, 0, 1, 0)
+#define SYS_ID_PFR1_EL1                        sys_reg(3, 0, 0, 1, 1)
+#define SYS_ID_DFR0_EL1                        sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_MMFR0_EL1               sys_reg(3, 0, 0, 1, 4)
+#define SYS_ID_MMFR1_EL1               sys_reg(3, 0, 0, 1, 5)
+#define SYS_ID_MMFR2_EL1               sys_reg(3, 0, 0, 1, 6)
+#define SYS_ID_MMFR3_EL1               sys_reg(3, 0, 0, 1, 7)
+
+#define SYS_ID_ISAR0_EL1               sys_reg(3, 0, 0, 2, 0)
+#define SYS_ID_ISAR1_EL1               sys_reg(3, 0, 0, 2, 1)
+#define SYS_ID_ISAR2_EL1               sys_reg(3, 0, 0, 2, 2)
+#define SYS_ID_ISAR3_EL1               sys_reg(3, 0, 0, 2, 3)
+#define SYS_ID_ISAR4_EL1               sys_reg(3, 0, 0, 2, 4)
+#define SYS_ID_ISAR5_EL1               sys_reg(3, 0, 0, 2, 5)
+#define SYS_ID_MMFR4_EL1               sys_reg(3, 0, 0, 2, 6)
+
+#define SYS_MVFR0_EL1                  sys_reg(3, 0, 0, 3, 0)
+#define SYS_MVFR1_EL1                  sys_reg(3, 0, 0, 3, 1)
+#define SYS_MVFR2_EL1                  sys_reg(3, 0, 0, 3, 2)
+
+#define SYS_ID_AA64PFR0_EL1            sys_reg(3, 0, 0, 4, 0)
+#define SYS_ID_AA64PFR1_EL1            sys_reg(3, 0, 0, 4, 1)
+
+#define SYS_ID_AA64DFR0_EL1            sys_reg(3, 0, 0, 5, 0)
+#define SYS_ID_AA64DFR1_EL1            sys_reg(3, 0, 0, 5, 1)
+
+#define SYS_ID_AA64ISAR0_EL1           sys_reg(3, 0, 0, 6, 0)
+#define SYS_ID_AA64ISAR1_EL1           sys_reg(3, 0, 0, 6, 1)
+
+#define SYS_ID_AA64MMFR0_EL1           sys_reg(3, 0, 0, 7, 0)
+#define SYS_ID_AA64MMFR1_EL1           sys_reg(3, 0, 0, 7, 1)
+
+#define SYS_CNTFRQ_EL0                 sys_reg(3, 3, 14, 0, 0)
+#define SYS_CTR_EL0                    sys_reg(3, 3, 0, 0, 1)
+#define SYS_DCZID_EL0                  sys_reg(3, 3, 0, 0, 7)
+
+#define REG_PSTATE_PAN_IMM             sys_reg(0, 0, 4, 0, 4)
 
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
                                     (!!x)<<8 | 0x1f)
 
+/* SCTLR_EL1 */
+#define SCTLR_EL1_CP15BEN      (0x1 << 5)
+#define SCTLR_EL1_SED          (0x1 << 8)
+#define SCTLR_EL1_SPAN         (0x1 << 23)
+
+
+/* id_aa64isar0 */
+#define ID_AA64ISAR0_RDM_SHIFT         28
+#define ID_AA64ISAR0_ATOMICS_SHIFT     20
+#define ID_AA64ISAR0_CRC32_SHIFT       16
+#define ID_AA64ISAR0_SHA2_SHIFT                12
+#define ID_AA64ISAR0_SHA1_SHIFT                8
+#define ID_AA64ISAR0_AES_SHIFT         4
+
+/* id_aa64pfr0 */
+#define ID_AA64PFR0_GIC_SHIFT          24
+#define ID_AA64PFR0_ASIMD_SHIFT                20
+#define ID_AA64PFR0_FP_SHIFT           16
+#define ID_AA64PFR0_EL3_SHIFT          12
+#define ID_AA64PFR0_EL2_SHIFT          8
+#define ID_AA64PFR0_EL1_SHIFT          4
+#define ID_AA64PFR0_EL0_SHIFT          0
+
+#define ID_AA64PFR0_FP_NI              0xf
+#define ID_AA64PFR0_FP_SUPPORTED       0x0
+#define ID_AA64PFR0_ASIMD_NI           0xf
+#define ID_AA64PFR0_ASIMD_SUPPORTED    0x0
+#define ID_AA64PFR0_EL1_64BIT_ONLY     0x1
+#define ID_AA64PFR0_EL0_64BIT_ONLY     0x1
+
+/* id_aa64mmfr0 */
+#define ID_AA64MMFR0_TGRAN4_SHIFT      28
+#define ID_AA64MMFR0_TGRAN64_SHIFT     24
+#define ID_AA64MMFR0_TGRAN16_SHIFT     20
+#define ID_AA64MMFR0_BIGENDEL0_SHIFT   16
+#define ID_AA64MMFR0_SNSMEM_SHIFT      12
+#define ID_AA64MMFR0_BIGENDEL_SHIFT    8
+#define ID_AA64MMFR0_ASID_SHIFT                4
+#define ID_AA64MMFR0_PARANGE_SHIFT     0
+
+#define ID_AA64MMFR0_TGRAN4_NI         0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED  0x0
+#define ID_AA64MMFR0_TGRAN64_NI                0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
+#define ID_AA64MMFR0_TGRAN16_NI                0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
+
+/* id_aa64mmfr1 */
+#define ID_AA64MMFR1_PAN_SHIFT         20
+#define ID_AA64MMFR1_LOR_SHIFT         16
+#define ID_AA64MMFR1_HPD_SHIFT         12
+#define ID_AA64MMFR1_VHE_SHIFT         8
+#define ID_AA64MMFR1_VMIDBITS_SHIFT    4
+#define ID_AA64MMFR1_HADBS_SHIFT       0
+
+/* id_aa64dfr0 */
+#define ID_AA64DFR0_CTX_CMPS_SHIFT     28
+#define ID_AA64DFR0_WRPS_SHIFT         20
+#define ID_AA64DFR0_BRPS_SHIFT         12
+#define ID_AA64DFR0_PMUVER_SHIFT       8
+#define ID_AA64DFR0_TRACEVER_SHIFT     4
+#define ID_AA64DFR0_DEBUGVER_SHIFT     0
+
+#define ID_ISAR5_RDM_SHIFT             24
+#define ID_ISAR5_CRC32_SHIFT           16
+#define ID_ISAR5_SHA2_SHIFT            12
+#define ID_ISAR5_SHA1_SHIFT            8
+#define ID_ISAR5_AES_SHIFT             4
+#define ID_ISAR5_SEVL_SHIFT            0
+
+#define MVFR0_FPROUND_SHIFT            28
+#define MVFR0_FPSHVEC_SHIFT            24
+#define MVFR0_FPSQRT_SHIFT             20
+#define MVFR0_FPDIVIDE_SHIFT           16
+#define MVFR0_FPTRAP_SHIFT             12
+#define MVFR0_FPDP_SHIFT               8
+#define MVFR0_FPSP_SHIFT               4
+#define MVFR0_SIMD_SHIFT               0
+
+#define MVFR1_SIMDFMAC_SHIFT           28
+#define MVFR1_FPHP_SHIFT               24
+#define MVFR1_SIMDHP_SHIFT             20
+#define MVFR1_SIMDSP_SHIFT             16
+#define MVFR1_SIMDINT_SHIFT            12
+#define MVFR1_SIMDLS_SHIFT             8
+#define MVFR1_FPDNAN_SHIFT             4
+#define MVFR1_FPFTZ_SHIFT              0
+
+
+#define ID_AA64MMFR0_TGRAN4_SHIFT      28
+#define ID_AA64MMFR0_TGRAN64_SHIFT     24
+#define ID_AA64MMFR0_TGRAN16_SHIFT     20
+
+#define ID_AA64MMFR0_TGRAN4_NI         0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED  0x0
+#define ID_AA64MMFR0_TGRAN64_NI                0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
+#define ID_AA64MMFR0_TGRAN16_NI                0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
+
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT       ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN4_SUPPORTED
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT       ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN16_SUPPORTED
+#elif defined(CONFIG_ARM64_64K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT       ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN64_SUPPORTED
+#endif
+
 #ifdef __ASSEMBLY__
 
        .irp    num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
index dcd06d18a42a36a4859c34370819d323f7e171d7..90c7ff233735d7691bf3b34b7075dffd07dbf939 100644 (file)
 
 #include <linux/compiler.h>
 
-#ifndef CONFIG_ARM64_64K_PAGES
+#ifdef CONFIG_ARM64_4K_PAGES
 #define THREAD_SIZE_ORDER      2
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define THREAD_SIZE_ORDER      0
 #endif
 
 #define THREAD_SIZE            16384
@@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK    20
 #define TIF_SINGLESTEP         21
 #define TIF_32BIT              22      /* 32bit process */
-#define TIF_SWITCH_MM          23      /* deferred switch_mm */
 
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
index d6e6b666038032e0ff472bea05a744f6515f41c0..ffdaea7954bb620daf19aba8b855d4c04b1a33c1 100644 (file)
@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table)
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-       if (tlb->fullmm) {
-               flush_tlb_mm(tlb->mm);
-       } else {
-               struct vm_area_struct vma = { .vm_mm = tlb->mm, };
-               /*
-                * The intermediate page table levels are already handled by
-                * the __(pte|pmd|pud)_free_tlb() functions, so last level
-                * TLBI is sufficient here.
-                */
-               __flush_tlb_range(&vma, tlb->start, tlb->end, true);
-       }
+       struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+
+       /*
+        * The ASID allocator will either invalidate the ASID or mark
+        * it as used.
+        */
+       if (tlb->fullmm)
+               return;
+
+       /*
+        * The intermediate page table levels are already handled by
+        * the __(pte|pmd|pud)_free_tlb() functions, so last level
+        * TLBI is sufficient here.
+        */
+       __flush_tlb_range(&vma, tlb->start, tlb->end, true);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
index 7bd2da021658ad765aa6bc93374f443ad0d7b594..b460ae28e3463db46816f678c5f90988c3f059de 100644 (file)
  *             only require the D-TLB to be invalidated.
  *             - kaddr - Kernel virtual memory address
  */
+static inline void local_flush_tlb_all(void)
+{
+       dsb(nshst);
+       asm("tlbi       vmalle1");
+       dsb(nsh);
+       isb();
+}
+
 static inline void flush_tlb_all(void)
 {
        dsb(ishst);
@@ -73,7 +81,7 @@ static inline void flush_tlb_all(void)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-       unsigned long asid = (unsigned long)ASID(mm) << 48;
+       unsigned long asid = ASID(mm) << 48;
 
        dsb(ishst);
        asm("tlbi       aside1is, %0" : : "r" (asid));
@@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_page(struct vm_area_struct *vma,
                                  unsigned long uaddr)
 {
-       unsigned long addr = uaddr >> 12 |
-               ((unsigned long)ASID(vma->vm_mm) << 48);
+       unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
 
        dsb(ishst);
        asm("tlbi       vale1is, %0" : : "r" (addr));
@@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
                                     bool last_level)
 {
-       unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+       unsigned long asid = ASID(vma->vm_mm) << 48;
        unsigned long addr;
 
        if ((end - start) > MAX_TLB_RANGE) {
@@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 static inline void __flush_tlb_pgtable(struct mm_struct *mm,
                                       unsigned long uaddr)
 {
-       unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+       unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
 
-       dsb(ishst);
        asm("tlbi       vae1is, %0" : : "r" (addr));
        dsb(ish);
 }
index 22dc9bc781be60d34f0285c7e7e295fe62e809dd..474691f8b13ab893cf403b8c1737a91aeff87bc0 100644 (file)
@@ -4,7 +4,6 @@
 
 CPPFLAGS_vmlinux.lds   := -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o          := -DTEXT_OFFSET=$(TEXT_OFFSET)
-CFLAGS_efi-stub.o      := -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
 CFLAGS_REMOVE_ftrace.o = -pg
@@ -20,6 +19,12 @@ arm64-obj-y          := debug-monitors.o entry.o irq.o fpsimd.o              \
                           cpufeature.o alternative.o cacheinfo.o               \
                           smp.o smp_spin_table.o topology.o
 
+extra-$(CONFIG_EFI)                    := efi-entry.o
+
+OBJCOPYFLAGS := --prefix-symbols=__efistub_
+$(obj)/%.stub.o: $(obj)/%.o FORCE
+       $(call if_changed,objcopy)
+
 arm64-obj-$(CONFIG_COMPAT)             += sys32.o kuser32.o signal32.o         \
                                           sys_compat.o entry32.o               \
                                           ../../arm/kernel/opcodes.o
@@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM)            += sleep.o suspend.o
 arm64-obj-$(CONFIG_CPU_IDLE)           += cpuidle.o
 arm64-obj-$(CONFIG_JUMP_LABEL)         += jump_label.o
 arm64-obj-$(CONFIG_KGDB)               += kgdb.o
-arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_EFI)                        += efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)                        += pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)               += acpi.o
@@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI)              += acpi.o
 obj-y                                  += $(arm64-obj-y) vdso/
 obj-m                                  += $(arm64-obj-m)
 head-y                                 := head.o
-extra-y                                        := $(head-y) vmlinux.lds
+extra-y                                        += $(head-y) vmlinux.lds
 
 # vDSO - this must be built first to generate the symbol offsets
 $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
index a85843ddbde8892e456f29636fed7d7a66b03825..3b6d8cc9dfe00ce14b764a3102ad0cd73f546c82 100644 (file)
@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(memcmp);
 
index 8d89cf8dae5556851365e2cee335599b4d8eb359..25de8b244961312c4b55c02ffdc1e91e0e765b6c 100644 (file)
@@ -60,7 +60,7 @@ int main(void)
   DEFINE(S_SYSCALLNO,          offsetof(struct pt_regs, syscallno));
   DEFINE(S_FRAME_SIZE,         sizeof(struct pt_regs));
   BLANK();
-  DEFINE(MM_CONTEXT_ID,                offsetof(struct mm_struct, context.id));
+  DEFINE(MM_CONTEXT_ID,                offsetof(struct mm_struct, context.id.counter));
   BLANK();
   DEFINE(VMA_VM_MM,            offsetof(struct vm_area_struct, vm_mm));
   DEFINE(VMA_VM_FLAGS,         offsetof(struct vm_area_struct, vm_flags));
index 574450c257a4d038e17d4eb8a47b954b20b2b9bb..24926f2504f7aaf2e37a6f9ecf1692ce3fff422b 100644 (file)
@@ -97,5 +97,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 
 void check_local_cpu_errata(void)
 {
-       check_cpu_capabilities(arm64_errata, "enabling workaround for");
+       update_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
index 305f30dc9e633fe86947621e54d802744e59df52..369975c3a9956efc14ee2f2f393eea9b2f225bbe 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define pr_fmt(fmt) "alternatives: " fmt
+#define pr_fmt(fmt) "CPU features: " fmt
 
+#include <linux/bsearch.h>
+#include <linux/sort.h>
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
+#include <asm/cpu_ops.h>
 #include <asm/processor.h>
+#include <asm/sysreg.h>
+
+unsigned long elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT       \
+                               (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+                                COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+                                COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+                                COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+                                COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+                                COMPAT_HWCAP_LPAE)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
+#endif
+
+DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
+#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+       {                                               \
+               .strict = STRICT,                       \
+               .type = TYPE,                           \
+               .shift = SHIFT,                         \
+               .width = WIDTH,                         \
+               .safe_val = SAFE_VAL,                   \
+       }
+
+#define ARM64_FTR_END                                  \
+       {                                               \
+               .width = 0,                             \
+       }
+
+static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+       /* Linux doesn't care about the EL3 */
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+       /* Linux shouldn't care about secure memory */
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+       /*
+        * Differing PARange is fine as long as all peripherals and memory are mapped
+        * within the minimum PARange of all CPUs
+        */
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_ctr[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),        /* RAO */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),  /* CWG */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),   /* ERG */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),   /* DminLine */
+       /*
+        * Linux can handle differing I-cache policies. Userspace JITs will
+        * make use of *minLine
+        */
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),     /* L1Ip */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),        /* RAZ */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),    /* IminLine */
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),        /* InnerShr */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),        /* FCSE */
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),        /* AuxReg */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0),        /* TCM */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),        /* ShareLvl */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_mvfr2[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),        /* RAZ */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),         /* FPMisc */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),         /* SIMDMisc */
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_dczid[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0),        /* RAZ */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1),         /* DZP */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),    /* BS */
+       ARM64_FTR_END,
+};
+
+
+static struct arm64_ftr_bits ftr_id_isar5[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0),        /* RAZ */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr4[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),        /* RAZ */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),         /* ac2 */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),         /* RAZ */
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_pfr0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0),       /* RAZ */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),        /* State3 */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),         /* State2 */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),         /* State1 */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),         /* State0 */
+       ARM64_FTR_END,
+};
+
+/*
+ * Common ftr bits for a 32bit register with all hidden, strict
+ * attributes, with 4bit feature fields and a default safe value of
+ * 0. Covers the following 32bit registers:
+ * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ */
+static struct arm64_ftr_bits ftr_generic_32bits[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic32[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
+       ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_aa64raz[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+       ARM64_FTR_END,
+};
+
+#define ARM64_FTR_REG(id, table)               \
+       {                                       \
+               .sys_id = id,                   \
+               .name = #id,                    \
+               .ftr_bits = &((table)[0]),      \
+       }
+
+static struct arm64_ftr_reg arm64_ftr_regs[] = {
+
+       /* Op1 = 0, CRn = 0, CRm = 1 */
+       ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
+       ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
+       ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
+
+       /* Op1 = 0, CRn = 0, CRm = 2 */
+       ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
+       ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+
+       /* Op1 = 0, CRn = 0, CRm = 3 */
+       ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+
+       /* Op1 = 0, CRn = 0, CRm = 4 */
+       ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+       ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
+
+       /* Op1 = 0, CRn = 0, CRm = 5 */
+       ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
+       ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
+
+       /* Op1 = 0, CRn = 0, CRm = 6 */
+       ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
+       ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
+
+       /* Op1 = 0, CRn = 0, CRm = 7 */
+       ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+       ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+
+       /* Op1 = 3, CRn = 0, CRm = 0 */
+       ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
+       ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
+
+       /* Op1 = 3, CRn = 14, CRm = 0 */
+       ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
+};
+
+static int search_cmp_ftr_reg(const void *id, const void *regp)
+{
+       return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
+}
+
+/*
+ * get_arm64_ftr_reg - Lookup a feature register entry using its
+ * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id , we use binary search to find a matching
+ * entry.
+ *
+ * returns - Upon success,  matching ftr_reg entry for id.
+ *         - NULL on failure. It is upto the caller to decide
+ *          the impact of a failure.
+ */
+static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+       return bsearch((const void *)(unsigned long)sys_id,
+                       arm64_ftr_regs,
+                       ARRAY_SIZE(arm64_ftr_regs),
+                       sizeof(arm64_ftr_regs[0]),
+                       search_cmp_ftr_reg);
+}
+
+static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
+{
+       u64 mask = arm64_ftr_mask(ftrp);
+
+       reg &= ~mask;
+       reg |= (ftr_val << ftrp->shift) & mask;
+       return reg;
+}
+
+static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
+{
+       s64 ret = 0;
+
+       switch (ftrp->type) {
+       case FTR_EXACT:
+               ret = ftrp->safe_val;
+               break;
+       case FTR_LOWER_SAFE:
+               ret = new < cur ? new : cur;
+               break;
+       case FTR_HIGHER_SAFE:
+               ret = new > cur ? new : cur;
+               break;
+       default:
+               BUG();
+       }
+
+       return ret;
+}
+
+static int __init sort_cmp_ftr_regs(const void *a, const void *b)
+{
+       return ((const struct arm64_ftr_reg *)a)->sys_id -
+                ((const struct arm64_ftr_reg *)b)->sys_id;
+}
+
+static void __init swap_ftr_regs(void *a, void *b, int size)
+{
+       struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
+       *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
+       *(struct arm64_ftr_reg *)b = tmp;
+}
+
+static void __init sort_ftr_regs(void)
+{
+       /* Keep the array sorted so that we can do the binary search */
+       sort(arm64_ftr_regs,
+               ARRAY_SIZE(arm64_ftr_regs),
+               sizeof(arm64_ftr_regs[0]),
+               sort_cmp_ftr_regs,
+               swap_ftr_regs);
+}
+
+/*
+ * Initialise the CPU feature register from Boot CPU values.
+ * Also initiliases the strict_mask for the register.
+ */
+static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+{
+       u64 val = 0;
+       u64 strict_mask = ~0x0ULL;
+       struct arm64_ftr_bits *ftrp;
+       struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
+
+       BUG_ON(!reg);
+
+       for (ftrp  = reg->ftr_bits; ftrp->width; ftrp++) {
+               s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+               val = arm64_ftr_set_value(ftrp, val, ftr_new);
+               if (!ftrp->strict)
+                       strict_mask &= ~arm64_ftr_mask(ftrp);
+       }
+       reg->sys_val = val;
+       reg->strict_mask = strict_mask;
+}
+
+void __init init_cpu_features(struct cpuinfo_arm64 *info)
+{
+       /* Before we start using the tables, make sure it is sorted */
+       sort_ftr_regs();
+
+       init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
+       init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
+       init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
+       init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
+       init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
+       init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
+       init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+       init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
+       init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+       init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
+       init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+       init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+       init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+       init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+       init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+       init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+       init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+       init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+       init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+       init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+       init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+       init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+       init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+       init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+       init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+       init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+       init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
+static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
+{
+       struct arm64_ftr_bits *ftrp;
+
+       for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+               s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
+               s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+               if (ftr_cur == ftr_new)
+                       continue;
+               /* Find a safe value */
+               ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur);
+               reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new);
+       }
+
+}
+
+static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
+{
+       struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+       BUG_ON(!regp);
+       update_cpu_ftr_reg(regp, val);
+       if ((boot & regp->strict_mask) == (val & regp->strict_mask))
+               return 0;
+       pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n",
+                       regp->name, boot, cpu, val);
+       return 1;
+}
+
+/*
+ * Update system wide CPU feature registers with the values from a
+ * non-boot CPU. Also performs SANITY checks to make sure that there
+ * aren't any insane variations from that of the boot CPU.
+ */
+void update_cpu_features(int cpu,
+                        struct cpuinfo_arm64 *info,
+                        struct cpuinfo_arm64 *boot)
+{
+       int taint = 0;
+
+       /*
+        * The kernel can handle differing I-cache policies, but otherwise
+        * caches should look identical. Userspace JITs will make use of
+        * *minLine.
+        */
+       taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu,
+                                     info->reg_ctr, boot->reg_ctr);
+
+       /*
+        * Userspace may perform DC ZVA instructions. Mismatched block sizes
+        * could result in too much or too little memory being zeroed if a
+        * process is preempted and migrated between CPUs.
+        */
+       taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu,
+                                     info->reg_dczid, boot->reg_dczid);
+
+       /* If different, timekeeping will be broken (especially with KVM) */
+       taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu,
+                                     info->reg_cntfrq, boot->reg_cntfrq);
+
+       /*
+        * The kernel uses self-hosted debug features and expects CPUs to
+        * support identical debug features. We presently need CTX_CMPs, WRPs,
+        * and BRPs to be identical.
+        * ID_AA64DFR1 is currently RES0.
+        */
+       taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu,
+                                     info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0);
+       taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu,
+                                     info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1);
+       /*
+        * Even in big.LITTLE, processors should be identical instruction-set
+        * wise.
+        */
+       taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu,
+                                     info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
+       taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
+                                     info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+
+       /*
+        * Differing PARange support is fine as long as all peripherals and
+        * memory are mapped within the minimum PARange of all CPUs.
+        * Linux should not care about secure memory.
+        */
+       taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu,
+                                     info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
+       taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
+                                     info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+
+       /*
+        * EL3 is not our concern.
+        * ID_AA64PFR1 is currently RES0.
+        */
+       taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
+                                     info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
+       taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
+                                     info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+
+       /*
+        * If we have AArch32, we care about 32-bit features for compat. These
+        * registers should be RES0 otherwise.
+        */
+       taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+                                       info->reg_id_dfr0, boot->reg_id_dfr0);
+       taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+                                       info->reg_id_isar0, boot->reg_id_isar0);
+       taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+                                       info->reg_id_isar1, boot->reg_id_isar1);
+       taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+                                       info->reg_id_isar2, boot->reg_id_isar2);
+       taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+                                       info->reg_id_isar3, boot->reg_id_isar3);
+       taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+                                       info->reg_id_isar4, boot->reg_id_isar4);
+       taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+                                       info->reg_id_isar5, boot->reg_id_isar5);
+
+       /*
+        * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+        * ACTLR formats could differ across CPUs and therefore would have to
+        * be trapped for virtualization anyway.
+        */
+       taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+                                       info->reg_id_mmfr0, boot->reg_id_mmfr0);
+       taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+                                       info->reg_id_mmfr1, boot->reg_id_mmfr1);
+       taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+                                       info->reg_id_mmfr2, boot->reg_id_mmfr2);
+       taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+                                       info->reg_id_mmfr3, boot->reg_id_mmfr3);
+       taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+                                       info->reg_id_pfr0, boot->reg_id_pfr0);
+       taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+                                       info->reg_id_pfr1, boot->reg_id_pfr1);
+       taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+                                       info->reg_mvfr0, boot->reg_mvfr0);
+       taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+                                       info->reg_mvfr1, boot->reg_mvfr1);
+       taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+                                       info->reg_mvfr2, boot->reg_mvfr2);
+
+       /*
+        * Mismatched CPU features are a recipe for disaster. Don't even
+        * pretend to support them.
+        */
+       WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
+                       "Unsupported CPU feature variation.\n");
+}
+
+u64 read_system_reg(u32 id)
+{
+       struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
+
+       /* We shouldn't get a request for an unsupported register */
+       BUG_ON(!regp);
+       return regp->sys_val;
+}
 
 #include <linux/irqchip/arm-gic-v3.h>
 
@@ -33,31 +588,25 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
        return val >= entry->min_field_value;
 }
 
-#define __ID_FEAT_CHK(reg)                                             \
-static bool __maybe_unused                                             \
-has_##reg##_feature(const struct arm64_cpu_capabilities *entry)                \
-{                                                                      \
-       u64 val;                                                        \
-                                                                       \
-       val = read_cpuid(reg##_el1);                                    \
-       return feature_matches(val, entry);                             \
-}
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
+{
+       u64 val;
 
-__ID_FEAT_CHK(id_aa64pfr0);
-__ID_FEAT_CHK(id_aa64mmfr1);
-__ID_FEAT_CHK(id_aa64isar0);
+       val = read_system_reg(entry->sys_reg);
+       return feature_matches(val, entry);
+}
 
 static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
 {
        bool has_sre;
 
-       if (!has_id_aa64pfr0_feature(entry))
+       if (!has_cpuid_feature(entry))
                return false;
-
        has_sre = gic_enable_sre();
        if (!has_sre)
                pr_warn_once("%s present but disabled by higher exception level\n",
-                            entry->desc);
+                               entry->desc);
 
        return has_sre;
 }
@@ -67,15 +616,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .desc = "GIC system register CPU interface",
                .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
                .matches = has_useable_gicv3_cpuif,
-               .field_pos = 24,
+               .sys_reg = SYS_ID_AA64PFR0_EL1,
+               .field_pos = ID_AA64PFR0_GIC_SHIFT,
                .min_field_value = 1,
        },
 #ifdef CONFIG_ARM64_PAN
        {
                .desc = "Privileged Access Never",
                .capability = ARM64_HAS_PAN,
-               .matches = has_id_aa64mmfr1_feature,
-               .field_pos = 20,
+               .matches = has_cpuid_feature,
+               .sys_reg = SYS_ID_AA64MMFR1_EL1,
+               .field_pos = ID_AA64MMFR1_PAN_SHIFT,
                .min_field_value = 1,
                .enable = cpu_enable_pan,
        },
@@ -84,15 +635,101 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
        {
                .desc = "LSE atomic instructions",
                .capability = ARM64_HAS_LSE_ATOMICS,
-               .matches = has_id_aa64isar0_feature,
-               .field_pos = 20,
+               .matches = has_cpuid_feature,
+               .sys_reg = SYS_ID_AA64ISAR0_EL1,
+               .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
                .min_field_value = 2,
        },
 #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
        {},
 };
 
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+#define HWCAP_CAP(reg, field, min_value, type, cap)            \
+       {                                                       \
+               .desc = #cap,                                   \
+               .matches = has_cpuid_feature,                   \
+               .sys_reg = reg,                                 \
+               .field_pos = field,                             \
+               .min_field_value = min_value,                   \
+               .hwcap_type = type,                             \
+               .hwcap = cap,                                   \
+       }
+
+static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
+       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
+       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
+#ifdef CONFIG_COMPAT
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+#endif
+       {},
+};
+
+static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+       switch (cap->hwcap_type) {
+       case CAP_HWCAP:
+               elf_hwcap |= cap->hwcap;
+               break;
+#ifdef CONFIG_COMPAT
+       case CAP_COMPAT_HWCAP:
+               compat_elf_hwcap |= (u32)cap->hwcap;
+               break;
+       case CAP_COMPAT_HWCAP2:
+               compat_elf_hwcap2 |= (u32)cap->hwcap;
+               break;
+#endif
+       default:
+               WARN_ON(1);
+               break;
+       }
+}
+
+/* Check if we have a particular HWCAP enabled */
+static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+       bool rc;
+
+       switch (cap->hwcap_type) {
+       case CAP_HWCAP:
+               rc = (elf_hwcap & cap->hwcap) != 0;
+               break;
+#ifdef CONFIG_COMPAT
+       case CAP_COMPAT_HWCAP:
+               rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
+               break;
+       case CAP_COMPAT_HWCAP2:
+               rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
+               break;
+#endif
+       default:
+               WARN_ON(1);
+               rc = false;
+       }
+
+       return rc;
+}
+
+static void setup_cpu_hwcaps(void)
+{
+       int i;
+       const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
+
+       for (i = 0; hwcaps[i].desc; i++)
+               if (hwcaps[i].matches(&hwcaps[i]))
+                       cap_set_hwcap(&hwcaps[i]);
+}
+
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
                            const char *info)
 {
        int i;
@@ -105,15 +742,178 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
                        pr_info("%s %s\n", info, caps[i].desc);
                cpus_set_cap(caps[i].capability);
        }
+}
 
-       /* second pass allows enable() to consider interacting capabilities */
+/*
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs
+ */
+static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+{
+       int i;
+
+       for (i = 0; caps[i].desc; i++)
+               if (caps[i].enable && cpus_have_cap(caps[i].capability))
+                       on_each_cpu(caps[i].enable, NULL, true);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+       sys_caps_initialised = true;
+}
+
+/*
+ * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ */
+static u64 __raw_read_system_reg(u32 sys_id)
+{
+       switch (sys_id) {
+       case SYS_ID_PFR0_EL1:           return (u64)read_cpuid(ID_PFR0_EL1);
+       case SYS_ID_PFR1_EL1:           return (u64)read_cpuid(ID_PFR1_EL1);
+       case SYS_ID_DFR0_EL1:           return (u64)read_cpuid(ID_DFR0_EL1);
+       case SYS_ID_MMFR0_EL1:          return (u64)read_cpuid(ID_MMFR0_EL1);
+       case SYS_ID_MMFR1_EL1:          return (u64)read_cpuid(ID_MMFR1_EL1);
+       case SYS_ID_MMFR2_EL1:          return (u64)read_cpuid(ID_MMFR2_EL1);
+       case SYS_ID_MMFR3_EL1:          return (u64)read_cpuid(ID_MMFR3_EL1);
+       case SYS_ID_ISAR0_EL1:          return (u64)read_cpuid(ID_ISAR0_EL1);
+       case SYS_ID_ISAR1_EL1:          return (u64)read_cpuid(ID_ISAR1_EL1);
+       case SYS_ID_ISAR2_EL1:          return (u64)read_cpuid(ID_ISAR2_EL1);
+       case SYS_ID_ISAR3_EL1:          return (u64)read_cpuid(ID_ISAR3_EL1);
+       case SYS_ID_ISAR4_EL1:          return (u64)read_cpuid(ID_ISAR4_EL1);
+       case SYS_ID_ISAR5_EL1:          return (u64)read_cpuid(ID_ISAR4_EL1);
+       case SYS_MVFR0_EL1:             return (u64)read_cpuid(MVFR0_EL1);
+       case SYS_MVFR1_EL1:             return (u64)read_cpuid(MVFR1_EL1);
+       case SYS_MVFR2_EL1:             return (u64)read_cpuid(MVFR2_EL1);
+
+       case SYS_ID_AA64PFR0_EL1:       return (u64)read_cpuid(ID_AA64PFR0_EL1);
+       case SYS_ID_AA64PFR1_EL1:       return (u64)read_cpuid(ID_AA64PFR0_EL1);
+       case SYS_ID_AA64DFR0_EL1:       return (u64)read_cpuid(ID_AA64DFR0_EL1);
+       case SYS_ID_AA64DFR1_EL1:       return (u64)read_cpuid(ID_AA64DFR0_EL1);
+       case SYS_ID_AA64MMFR0_EL1:      return (u64)read_cpuid(ID_AA64MMFR0_EL1);
+       case SYS_ID_AA64MMFR1_EL1:      return (u64)read_cpuid(ID_AA64MMFR1_EL1);
+       case SYS_ID_AA64ISAR0_EL1:      return (u64)read_cpuid(ID_AA64ISAR0_EL1);
+       case SYS_ID_AA64ISAR1_EL1:      return (u64)read_cpuid(ID_AA64ISAR1_EL1);
+
+       case SYS_CNTFRQ_EL0:            return (u64)read_cpuid(CNTFRQ_EL0);
+       case SYS_CTR_EL0:               return (u64)read_cpuid(CTR_EL0);
+       case SYS_DCZID_EL0:             return (u64)read_cpuid(DCZID_EL0);
+       default:
+               BUG();
+               return 0;
+       }
+}
+
+/*
+ * Park the CPU which doesn't have the capability as advertised
+ * by the system.
+ */
+static void fail_incapable_cpu(char *cap_type,
+                                const struct arm64_cpu_capabilities *cap)
+{
+       int cpu = smp_processor_id();
+
+       pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
+       /* Mark this CPU absent */
+       set_cpu_present(cpu, 0);
+
+       /* Check if we can park ourselves */
+       if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+               cpu_ops[cpu]->cpu_die(cpu);
+       asm(
+       "1:     wfe\n"
+       "       wfi\n"
+       "       b       1b");
+}
+
+/*
+ * Run through the enabled system capabilities and enable() it on this CPU.
+ * The capabilities were decided based on the available CPUs at the boot time.
+ * Any new CPU should match the system wide status of the capability. If the
+ * new CPU doesn't have a capability which the system now has enabled, we
+ * cannot do anything to fix it up and could cause unexpected failures. So
+ * we park the CPU.
+ */
+void verify_local_cpu_capabilities(void)
+{
+       int i;
+       const struct arm64_cpu_capabilities *caps;
+
+       /*
+        * If we haven't computed the system capabilities, there is nothing
+        * to verify.
+        */
+       if (!sys_caps_initialised)
+               return;
+
+       caps = arm64_features;
        for (i = 0; caps[i].desc; i++) {
-               if (cpus_have_cap(caps[i].capability) && caps[i].enable)
-                       caps[i].enable();
+               if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
+                       continue;
+               /*
+                * If the new CPU misses an advertised feature, we cannot proceed
+                * further, park the cpu.
+                */
+               if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+                       fail_incapable_cpu("arm64_features", &caps[i]);
+               if (caps[i].enable)
+                       caps[i].enable(NULL);
        }
+
+       for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+               if (!cpus_have_hwcap(&caps[i]))
+                       continue;
+               if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+                       fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+       }
+}
+
+#else  /* !CONFIG_HOTPLUG_CPU */
+
+static inline void set_sys_caps_initialised(void)
+{
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void setup_feature_capabilities(void)
+{
+       update_cpu_capabilities(arm64_features, "detected feature:");
+       enable_cpu_capabilities(arm64_features);
 }
 
-void check_local_cpu_features(void)
+void __init setup_cpu_features(void)
 {
-       check_cpu_capabilities(arm64_features, "detected feature:");
+       u32 cwg;
+       int cls;
+
+       /* Set the CPU feature capabilies */
+       setup_feature_capabilities();
+       setup_cpu_hwcaps();
+
+       /* Advertise that we have computed the system capabilities */
+       set_sys_caps_initialised();
+
+       /*
+        * Check for sane CTR_EL0.CWG value.
+        */
+       cwg = cache_type_cwg();
+       cls = cache_line_size();
+       if (!cwg)
+               pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
+                       cls);
+       if (L1_CACHE_BYTES < cls)
+               pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
+                       L1_CACHE_BYTES, cls);
 }
index 75d5a867e7fb1420172ef5e7cb281c9d9aa1f206..706679d0a0b4227c4a7267cd85142192576d7ab3 100644 (file)
 #include <linux/bug.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/personality.h>
 #include <linux/preempt.h>
 #include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
 #include <linux/smp.h>
 
 /*
@@ -35,7 +38,6 @@
  */
 DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 static struct cpuinfo_arm64 boot_cpu_data;
-static bool mixed_endian_el0 = true;
 
 static char *icache_policy_str[] = {
        [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
@@ -46,157 +48,148 @@ static char *icache_policy_str[] = {
 
 unsigned long __icache_flags;
 
-static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
+static const char *const hwcap_str[] = {
+       "fp",
+       "asimd",
+       "evtstrm",
+       "aes",
+       "pmull",
+       "sha1",
+       "sha2",
+       "crc32",
+       "atomics",
+       NULL
+};
+
+#ifdef CONFIG_COMPAT
+static const char *const compat_hwcap_str[] = {
+       "swp",
+       "half",
+       "thumb",
+       "26bit",
+       "fastmult",
+       "fpa",
+       "vfp",
+       "edsp",
+       "java",
+       "iwmmxt",
+       "crunch",
+       "thumbee",
+       "neon",
+       "vfpv3",
+       "vfpv3d16",
+       "tls",
+       "vfpv4",
+       "idiva",
+       "idivt",
+       "vfpd32",
+       "lpae",
+       "evtstrm"
+};
+
+static const char *const compat_hwcap2_str[] = {
+       "aes",
+       "pmull",
+       "sha1",
+       "sha2",
+       "crc32",
+       NULL
+};
+#endif /* CONFIG_COMPAT */
+
+static int c_show(struct seq_file *m, void *v)
 {
-       unsigned int cpu = smp_processor_id();
-       u32 l1ip = CTR_L1IP(info->reg_ctr);
+       int i, j;
+
+       for_each_online_cpu(i) {
+               struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+               u32 midr = cpuinfo->reg_midr;
 
-       if (l1ip != ICACHE_POLICY_PIPT) {
                /*
-                * VIPT caches are non-aliasing if the VA always equals the PA
-                * in all bit positions that are covered by the index. This is
-                * the case if the size of a way (# of sets * line size) does
-                * not exceed PAGE_SIZE.
+                * glibc reads /proc/cpuinfo to determine the number of
+                * online processors, looking for lines beginning with
+                * "processor".  Give glibc what it expects.
                 */
-               u32 waysize = icache_get_numsets() * icache_get_linesize();
+               seq_printf(m, "processor\t: %d\n", i);
 
-               if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
-                       set_bit(ICACHEF_ALIASING, &__icache_flags);
+               /*
+                * Dump out the common processor features in a single line.
+                * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+                * rather than attempting to parse this, but there's a body of
+                * software which does already (at least for 32-bit).
+                */
+               seq_puts(m, "Features\t:");
+               if (personality(current->personality) == PER_LINUX32) {
+#ifdef CONFIG_COMPAT
+                       for (j = 0; compat_hwcap_str[j]; j++)
+                               if (compat_elf_hwcap & (1 << j))
+                                       seq_printf(m, " %s", compat_hwcap_str[j]);
+
+                       for (j = 0; compat_hwcap2_str[j]; j++)
+                               if (compat_elf_hwcap2 & (1 << j))
+                                       seq_printf(m, " %s", compat_hwcap2_str[j]);
+#endif /* CONFIG_COMPAT */
+               } else {
+                       for (j = 0; hwcap_str[j]; j++)
+                               if (elf_hwcap & (1 << j))
+                                       seq_printf(m, " %s", hwcap_str[j]);
+               }
+               seq_puts(m, "\n");
+
+               seq_printf(m, "CPU implementer\t: 0x%02x\n",
+                          MIDR_IMPLEMENTOR(midr));
+               seq_printf(m, "CPU architecture: 8\n");
+               seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+               seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+               seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
        }
-       if (l1ip == ICACHE_POLICY_AIVIVT)
-               set_bit(ICACHEF_AIVIVT, &__icache_flags);
 
-       pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
-}
-
-bool cpu_supports_mixed_endian_el0(void)
-{
-       return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
-}
-
-bool system_supports_mixed_endian_el0(void)
-{
-       return mixed_endian_el0;
+       return 0;
 }
 
-static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
+static void *c_start(struct seq_file *m, loff_t *pos)
 {
-       mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
+       return *pos < 1 ? (void *)1 : NULL;
 }
 
-static void update_cpu_features(struct cpuinfo_arm64 *info)
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       update_mixed_endian_el0_support(info);
+       ++*pos;
+       return NULL;
 }
 
-static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
+static void c_stop(struct seq_file *m, void *v)
 {
-       if ((boot & mask) == (cur & mask))
-               return 0;
-
-       pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
-               name, (unsigned long)boot, cpu, (unsigned long)cur);
-
-       return 1;
 }
 
-#define CHECK_MASK(field, mask, boot, cur, cpu) \
-       check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
-
-#define CHECK(field, boot, cur, cpu) \
-       CHECK_MASK(field, ~0ULL, boot, cur, cpu)
+const struct seq_operations cpuinfo_op = {
+       .start  = c_start,
+       .next   = c_next,
+       .stop   = c_stop,
+       .show   = c_show
+};
 
-/*
- * Verify that CPUs don't have unexpected differences that will cause problems.
- */
-static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
+static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 {
        unsigned int cpu = smp_processor_id();
-       struct cpuinfo_arm64 *boot = &boot_cpu_data;
-       unsigned int diff = 0;
-
-       /*
-        * The kernel can handle differing I-cache policies, but otherwise
-        * caches should look identical. Userspace JITs will make use of
-        * *minLine.
-        */
-       diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
-
-       /*
-        * Userspace may perform DC ZVA instructions. Mismatched block sizes
-        * could result in too much or too little memory being zeroed if a
-        * process is preempted and migrated between CPUs.
-        */
-       diff |= CHECK(dczid, boot, cur, cpu);
-
-       /* If different, timekeeping will be broken (especially with KVM) */
-       diff |= CHECK(cntfrq, boot, cur, cpu);
-
-       /*
-        * The kernel uses self-hosted debug features and expects CPUs to
-        * support identical debug features. We presently need CTX_CMPs, WRPs,
-        * and BRPs to be identical.
-        * ID_AA64DFR1 is currently RES0.
-        */
-       diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
-       diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
-
-       /*
-        * Even in big.LITTLE, processors should be identical instruction-set
-        * wise.
-        */
-       diff |= CHECK(id_aa64isar0, boot, cur, cpu);
-       diff |= CHECK(id_aa64isar1, boot, cur, cpu);
-
-       /*
-        * Differing PARange support is fine as long as all peripherals and
-        * memory are mapped within the minimum PARange of all CPUs.
-        * Linux should not care about secure memory.
-        * ID_AA64MMFR1 is currently RES0.
-        */
-       diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
-       diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
-
-       /*
-        * EL3 is not our concern.
-        * ID_AA64PFR1 is currently RES0.
-        */
-       diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
-       diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
+       u32 l1ip = CTR_L1IP(info->reg_ctr);
 
-       /*
-        * If we have AArch32, we care about 32-bit features for compat. These
-        * registers should be RES0 otherwise.
-        */
-       diff |= CHECK(id_dfr0, boot, cur, cpu);
-       diff |= CHECK(id_isar0, boot, cur, cpu);
-       diff |= CHECK(id_isar1, boot, cur, cpu);
-       diff |= CHECK(id_isar2, boot, cur, cpu);
-       diff |= CHECK(id_isar3, boot, cur, cpu);
-       diff |= CHECK(id_isar4, boot, cur, cpu);
-       diff |= CHECK(id_isar5, boot, cur, cpu);
-       /*
-        * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
-        * ACTLR formats could differ across CPUs and therefore would have to
-        * be trapped for virtualization anyway.
-        */
-       diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
-       diff |= CHECK(id_mmfr1, boot, cur, cpu);
-       diff |= CHECK(id_mmfr2, boot, cur, cpu);
-       diff |= CHECK(id_mmfr3, boot, cur, cpu);
-       diff |= CHECK(id_pfr0, boot, cur, cpu);
-       diff |= CHECK(id_pfr1, boot, cur, cpu);
+       if (l1ip != ICACHE_POLICY_PIPT) {
+               /*
+                * VIPT caches are non-aliasing if the VA always equals the PA
+                * in all bit positions that are covered by the index. This is
+                * the case if the size of a way (# of sets * line size) does
+                * not exceed PAGE_SIZE.
+                */
+               u32 waysize = icache_get_numsets() * icache_get_linesize();
 
-       diff |= CHECK(mvfr0, boot, cur, cpu);
-       diff |= CHECK(mvfr1, boot, cur, cpu);
-       diff |= CHECK(mvfr2, boot, cur, cpu);
+               if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
+                       set_bit(ICACHEF_ALIASING, &__icache_flags);
+       }
+       if (l1ip == ICACHE_POLICY_AIVIVT)
+               set_bit(ICACHEF_AIVIVT, &__icache_flags);
 
-       /*
-        * Mismatched CPU features are a recipe for disaster. Don't even
-        * pretend to support them.
-        */
-       WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
-                       "Unsupported CPU feature variation.\n");
+       pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }
 
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
@@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
        cpuinfo_detect_icache_policy(info);
 
        check_local_cpu_errata();
-       check_local_cpu_features();
-       update_cpu_features(info);
 }
 
 void cpuinfo_store_cpu(void)
 {
        struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
        __cpuinfo_store_cpu(info);
-       cpuinfo_sanity_check(info);
+       update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
 }
 
 void __init cpuinfo_store_boot_cpu(void)
@@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void)
        __cpuinfo_store_cpu(info);
 
        boot_cpu_data = *info;
+       init_cpu_features(&boot_cpu_data);
 }
index 253021ef2769078e69793288a8cc067aebb76d34..cd9ea8f078b39e048a466e861d9b17936aa8cdfc 100644 (file)
 #include <linux/stat.h>
 #include <linux/uaccess.h>
 
-#include <asm/debug-monitors.h>
+#include <asm/cpufeature.h>
 #include <asm/cputype.h>
+#include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
 
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {
-       return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
+       return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+                                               ID_AA64DFR0_DEBUGVER_SHIFT);
 }
 
 /*
index 8ce9b0577442395df912ca934f0fca338948b4a8..a773db92908b03d325c26dba0dc5ea9c287ef49d 100644 (file)
@@ -29,7 +29,7 @@
         * we want to be. The kernel image wants to be placed at TEXT_OFFSET
         * from start of RAM.
         */
-ENTRY(efi_stub_entry)
+ENTRY(entry)
        /*
         * Create a stack frame to save FP/LR with extra space
         * for image_addr variable passed to efi_entry().
@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry)
         * entries for the VA range of the current image, so no maintenance is
         * necessary.
         */
-       adr     x0, efi_stub_entry
-       adr     x1, efi_stub_entry_end
+       adr     x0, entry
+       adr     x1, entry_end
        sub     x1, x1, x0
        bl      __flush_dcache_area
 
@@ -120,5 +120,5 @@ efi_load_fail:
        ldp     x29, x30, [sp], #32
        ret
 
-efi_stub_entry_end:
-ENDPROC(efi_stub_entry)
+entry_end:
+ENDPROC(entry)
index 61eb1d17586a859a1fcc1d0d0fd653028d736b11..de46b50f4cdf952087e77d473314e75728efeec0 100644 (file)
@@ -48,7 +48,6 @@ static struct mm_struct efi_mm = {
        .mmap_sem               = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
        .page_table_lock        = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
        .mmlist                 = LIST_HEAD_INIT(efi_mm.mmlist),
-       INIT_MM_CONTEXT(efi_mm)
 };
 
 static int __init is_normal_ram(efi_memory_desc_t *md)
@@ -335,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm)
        else
                cpu_switch_mm(mm->pgd, mm);
 
-       flush_tlb_all();
+       local_flush_tlb_all();
        if (icache_is_aivivt())
-               __flush_icache_all();
+               __local_flush_icache_all();
 }
 
 void efi_virtmap_load(void)
index 4306c937b1ffc4fb224f88d176d30e8fb65a3c70..7ed3d75f630418b56a1add8c91b308b48cd69774 100644 (file)
@@ -430,6 +430,8 @@ el0_sync_compat:
        b.eq    el0_fpsimd_acc
        cmp     x24, #ESR_ELx_EC_FP_EXC32       // FP/ASIMD exception
        b.eq    el0_fpsimd_exc
+       cmp     x24, #ESR_ELx_EC_PC_ALIGN       // pc alignment exception
+       b.eq    el0_sp_pc
        cmp     x24, #ESR_ELx_EC_UNKNOWN        // unknown exception in EL0
        b.eq    el0_undef
        cmp     x24, #ESR_ELx_EC_CP15_32        // CP15 MRC/MCR trap
index c56956a16d3f0c9875e0bef4ff09e05e3fe2506d..4c46c54a3ad7ad817b8ba410565b8eff47cd3c08 100644 (file)
@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { }
  */
 static int __init fpsimd_init(void)
 {
-       u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-
-       if (pfr & (0xf << 16)) {
+       if (elf_hwcap & HWCAP_FP) {
+               fpsimd_pm_init();
+               fpsimd_hotplug_init();
+       } else {
                pr_notice("Floating-point is not implemented\n");
-               return 0;
        }
-       elf_hwcap |= HWCAP_FP;
 
-       if (pfr & (0xf << 20))
+       if (!(elf_hwcap & HWCAP_ASIMD))
                pr_notice("Advanced SIMD is not implemented\n");
-       else
-               elf_hwcap |= HWCAP_ASIMD;
-
-       fpsimd_pm_init();
-       fpsimd_hotplug_init();
 
        return 0;
 }
index 351a4de1b1e26e07e3b1920d06b199e778042811..23cfc08fc8ba88683600d7618acb9bdb400ae2b9 100644 (file)
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
-#include <asm/thread_info.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
 #include <asm/virt.h>
 
 #define __PHYS_OFFSET  (KERNEL_START - TEXT_OFFSET)
 #error TEXT_OFFSET must be less than 2MB
 #endif
 
-#ifdef CONFIG_ARM64_64K_PAGES
-#define BLOCK_SHIFT    PAGE_SHIFT
-#define BLOCK_SIZE     PAGE_SIZE
-#define TABLE_SHIFT    PMD_SHIFT
-#else
-#define BLOCK_SHIFT    SECTION_SHIFT
-#define BLOCK_SIZE     SECTION_SIZE
-#define TABLE_SHIFT    PUD_SHIFT
-#endif
-
 #define KERNEL_START   _text
 #define KERNEL_END     _end
 
-/*
- * Initial memory map attributes.
- */
-#define PTE_FLAGS      PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
-#define PMD_FLAGS      PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
-
-#ifdef CONFIG_ARM64_64K_PAGES
-#define MM_MMUFLAGS    PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
-#else
-#define MM_MMUFLAGS    PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
-#endif
-
 /*
  * Kernel startup entry point.
  * ---------------------------
@@ -120,8 +100,8 @@ efi_head:
 #endif
 
 #ifdef CONFIG_EFI
-       .globl  stext_offset
-       .set    stext_offset, stext - efi_head
+       .globl  __efistub_stext_offset
+       .set    __efistub_stext_offset, stext - efi_head
        .align 3
 pe_header:
        .ascii  "PE"
@@ -144,8 +124,8 @@ optional_header:
        .long   _end - stext                    // SizeOfCode
        .long   0                               // SizeOfInitializedData
        .long   0                               // SizeOfUninitializedData
-       .long   efi_stub_entry - efi_head       // AddressOfEntryPoint
-       .long   stext_offset                    // BaseOfCode
+       .long   __efistub_entry - efi_head      // AddressOfEntryPoint
+       .long   __efistub_stext_offset          // BaseOfCode
 
 extra_header_fields:
        .quad   0                               // ImageBase
@@ -162,7 +142,7 @@ extra_header_fields:
        .long   _end - efi_head                 // SizeOfImage
 
        // Everything before the kernel image is considered part of the header
-       .long   stext_offset                    // SizeOfHeaders
+       .long   __efistub_stext_offset          // SizeOfHeaders
        .long   0                               // CheckSum
        .short  0xa                             // Subsystem (EFI application)
        .short  0                               // DllCharacteristics
@@ -207,9 +187,9 @@ section_table:
        .byte   0
        .byte   0                       // end of 0 padding of section name
        .long   _end - stext            // VirtualSize
-       .long   stext_offset            // VirtualAddress
+       .long   __efistub_stext_offset  // VirtualAddress
        .long   _edata - stext          // SizeOfRawData
-       .long   stext_offset            // PointerToRawData
+       .long   __efistub_stext_offset  // PointerToRawData
 
        .long   0               // PointerToRelocations (0 for executables)
        .long   0               // PointerToLineNumbers (0 for executables)
@@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args)
  */
        .macro  create_pgd_entry, tbl, virt, tmp1, tmp2
        create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
-       create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS > 3
+       create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
+#endif
+#if SWAPPER_PGTABLE_LEVELS > 2
+       create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
 #endif
        .endm
 
@@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args)
  * Corrupts:   phys, start, end, pstate
  */
        .macro  create_block_map, tbl, flags, phys, start, end
-       lsr     \phys, \phys, #BLOCK_SHIFT
-       lsr     \start, \start, #BLOCK_SHIFT
+       lsr     \phys, \phys, #SWAPPER_BLOCK_SHIFT
+       lsr     \start, \start, #SWAPPER_BLOCK_SHIFT
        and     \start, \start, #PTRS_PER_PTE - 1       // table index
-       orr     \phys, \flags, \phys, lsl #BLOCK_SHIFT  // table entry
-       lsr     \end, \end, #BLOCK_SHIFT
+       orr     \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT  // table entry
+       lsr     \end, \end, #SWAPPER_BLOCK_SHIFT
        and     \end, \end, #PTRS_PER_PTE - 1           // table end index
 9999:  str     \phys, [\tbl, \start, lsl #3]           // store the entry
        add     \start, \start, #1                      // next entry
-       add     \phys, \phys, #BLOCK_SIZE               // next block
+       add     \phys, \phys, #SWAPPER_BLOCK_SIZE               // next block
        cmp     \start, \end
        b.ls    9999b
        .endm
@@ -350,7 +333,7 @@ __create_page_tables:
        cmp     x0, x6
        b.lo    1b
 
-       ldr     x7, =MM_MMUFLAGS
+       ldr     x7, =SWAPPER_MM_MMUFLAGS
 
        /*
         * Create the identity mapping.
@@ -444,6 +427,9 @@ __mmap_switched:
        str_l   x21, __fdt_pointer, x5          // Save FDT pointer
        str_l   x24, memstart_addr, x6          // Save PHYS_OFFSET
        mov     x29, #0
+#ifdef CONFIG_KASAN
+       bl      kasan_early_init
+#endif
        b       start_kernel
 ENDPROC(__mmap_switched)
 
@@ -630,10 +616,17 @@ ENDPROC(__secondary_switched)
  *  x0  = SCTLR_EL1 value for turning on the MMU.
  *  x27 = *virtual* address to jump to upon completion
  *
- * other registers depend on the function called upon completion
+ * Other registers depend on the function called upon completion.
+ *
+ * Checks if the selected granule size is supported by the CPU.
+ * If it isn't, park the CPU
  */
        .section        ".idmap.text", "ax"
 __enable_mmu:
+       mrs     x1, ID_AA64MMFR0_EL1
+       ubfx    x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
+       b.ne    __no_granule_support
        ldr     x5, =vectors
        msr     vbar_el1, x5
        msr     ttbr0_el1, x25                  // load TTBR0
@@ -651,3 +644,8 @@ __enable_mmu:
        isb
        br      x27
 ENDPROC(__enable_mmu)
+
+__no_granule_support:
+       wfe
+       b __no_granule_support
+ENDPROC(__no_granule_support)
index bba85c8f80373937ef9fe746e1a2ed4fc39f58ee..b45c95d34b8323e74992e0a4a56e6da0e1257c60 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/ptrace.h>
 #include <linux/smp.h>
 
+#include <asm/compat.h>
 #include <asm/current.h>
 #include <asm/debug-monitors.h>
 #include <asm/hw_breakpoint.h>
@@ -163,6 +164,20 @@ enum hw_breakpoint_ops {
        HW_BREAKPOINT_RESTORE
 };
 
+static int is_compat_bp(struct perf_event *bp)
+{
+       struct task_struct *tsk = bp->hw.target;
+
+       /*
+        * tsk can be NULL for per-cpu (non-ptrace) breakpoints.
+        * In this case, use the native interface, since we don't have
+        * the notion of a "compat CPU" and could end up relying on
+        * deprecated behaviour if we use unaligned watchpoints in
+        * AArch64 state.
+        */
+       return tsk && is_compat_thread(task_thread_info(tsk));
+}
+
 /**
  * hw_breakpoint_slot_setup - Find and setup a perf slot according to
  *                           operations
@@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp)
         * Watchpoints can be of length 1, 2, 4 or 8 bytes.
         */
        if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
-               if (is_compat_task()) {
+               if (is_compat_bp(bp)) {
                        if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
                            info->ctrl.len != ARM_BREAKPOINT_LEN_4)
                                return -EINVAL;
@@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
         * AArch32 tasks expect some simple alignment fixups, so emulate
         * that here.
         */
-       if (is_compat_task()) {
+       if (is_compat_bp(bp)) {
                if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
                        alignment_mask = 0x7;
                else
index 8fae0756e1759bad77406b0e3c76cad64b998ae0..bc2abb8b1599576ae2dec02bce0c46c48fc707dd 100644 (file)
 #define __HEAD_FLAG_BE 0
 #endif
 
-#define __HEAD_FLAGS   (__HEAD_FLAG_BE << 0)
+#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAGS   ((__HEAD_FLAG_BE << 0) |        \
+                        (__HEAD_FLAG_PAGE_SIZE << 1))
 
 /*
  * These will output as part of the Image header, which should be little-endian
        _kernel_offset_le       = DATA_LE64(TEXT_OFFSET);       \
        _kernel_flags_le        = DATA_LE64(__HEAD_FLAGS);
 
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp               = __pi_memcmp;
+__efistub_memchr               = __pi_memchr;
+__efistub_memcpy               = __pi_memcpy;
+__efistub_memmove              = __pi_memmove;
+__efistub_memset               = __pi_memset;
+__efistub_strlen               = __pi_strlen;
+__efistub_strcmp               = __pi_strcmp;
+__efistub_strncmp              = __pi_strncmp;
+__efistub___flush_dcache_area  = __pi___flush_dcache_area;
+
+#ifdef CONFIG_KASAN
+__efistub___memcpy             = __pi_memcpy;
+__efistub___memmove            = __pi_memmove;
+__efistub___memset             = __pi_memset;
+#endif
+
+__efistub__text                        = _text;
+__efistub__end                 = _end;
+__efistub__edata               = _edata;
+
+#endif
+
 #endif /* __ASM_IMAGE_H */
index 11dc3fd478537236387ff0dd795235d45c92e300..9f17ec071ee0e8a8b1380133cf319a9ad1c80de5 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
-#include <linux/ratelimit.h>
 
 unsigned long irq_err_count;
 
@@ -54,64 +53,3 @@ void __init init_IRQ(void)
        if (!handle_arch_irq)
                panic("No interrupt controller found.");
 }
-
-#ifdef CONFIG_HOTPLUG_CPU
-static bool migrate_one_irq(struct irq_desc *desc)
-{
-       struct irq_data *d = irq_desc_get_irq_data(desc);
-       const struct cpumask *affinity = irq_data_get_affinity_mask(d);
-       struct irq_chip *c;
-       bool ret = false;
-
-       /*
-        * If this is a per-CPU interrupt, or the affinity does not
-        * include this CPU, then we have nothing to do.
-        */
-       if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
-               return false;
-
-       if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-               affinity = cpu_online_mask;
-               ret = true;
-       }
-
-       c = irq_data_get_irq_chip(d);
-       if (!c->irq_set_affinity)
-               pr_debug("IRQ%u: unable to set affinity\n", d->irq);
-       else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
-               cpumask_copy(irq_data_get_affinity_mask(d), affinity);
-
-       return ret;
-}
-
-/*
- * The current CPU has been marked offline.  Migrate IRQs off this CPU.
- * If the affinity settings do not allow other CPUs, force them onto any
- * available CPU.
- *
- * Note: we must iterate over all IRQs, whether they have an attached
- * action structure or not, as we need to get chained interrupts too.
- */
-void migrate_irqs(void)
-{
-       unsigned int i;
-       struct irq_desc *desc;
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       for_each_irq_desc(i, desc) {
-               bool affinity_broken;
-
-               raw_spin_lock(&desc->lock);
-               affinity_broken = migrate_one_irq(desc);
-               raw_spin_unlock(&desc->lock);
-
-               if (affinity_broken)
-                       pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
-                                           i, smp_processor_id());
-       }
-
-       local_irq_restore(flags);
-}
-#endif /* CONFIG_HOTPLUG_CPU */
index 876eb8df50bf3355ac8432a2ddf2a5c46810a5de..f4bc779e62e887547b7a17b7672487f0851e1479 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/moduleloader.h>
 
 void *module_alloc(unsigned long size)
 {
-       return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-                                   GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
-                                   NUMA_NO_NODE, __builtin_return_address(0));
+       void *p;
+
+       p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+                               GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+                               NUMA_NO_NODE, __builtin_return_address(0));
+
+       if (p && (kasan_module_alloc(p, size) < 0)) {
+               vfree(p);
+               return NULL;
+       }
+
+       return p;
 }
 
 enum aarch64_reloc_op {
index f9a74d4fff3b5ee8f37f1cdfedda00dfb8b5156d..5b1897e8ca2476b20336658c65e3e19665bf0cdd 100644 (file)
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#define pr_fmt(fmt) "hw perfevents: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/of_device.h>
-#include <linux/perf_event.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
 
-#include <asm/cputype.h>
-#include <asm/irq.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/*
- * ARMv8 supports a maximum of 32 events.
- * The cycle counter is included in this total.
- */
-#define ARMPMU_MAX_HWEVENTS            32
-
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
-
-int
-armpmu_get_max_events(void)
-{
-       int max_events = 0;
-
-       if (cpu_pmu != NULL)
-               max_events = cpu_pmu->num_events;
-
-       return max_events;
-}
-EXPORT_SYMBOL_GPL(armpmu_get_max_events);
-
-int perf_num_counters(void)
-{
-       return armpmu_get_max_events();
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-#define HW_OP_UNSUPPORTED              0xFFFF
-
-#define C(_x) \
-       PERF_COUNT_HW_CACHE_##_x
-
-#define CACHE_OP_UNSUPPORTED           0xFFFF
-
-#define PERF_MAP_ALL_UNSUPPORTED                                       \
-       [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
-
-#define PERF_CACHE_MAP_ALL_UNSUPPORTED                                 \
-[0 ... C(MAX) - 1] = {                                                 \
-       [0 ... C(OP_MAX) - 1] = {                                       \
-               [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
-       },                                                              \
-}
-
-static int
-armpmu_map_cache_event(const unsigned (*cache_map)
-                                     [PERF_COUNT_HW_CACHE_MAX]
-                                     [PERF_COUNT_HW_CACHE_OP_MAX]
-                                     [PERF_COUNT_HW_CACHE_RESULT_MAX],
-                      u64 config)
-{
-       unsigned int cache_type, cache_op, cache_result, ret;
-
-       cache_type = (config >>  0) & 0xff;
-       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-               return -EINVAL;
-
-       cache_op = (config >>  8) & 0xff;
-       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-               return -EINVAL;
-
-       cache_result = (config >> 16) & 0xff;
-       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-               return -EINVAL;
-
-       ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
-       if (ret == CACHE_OP_UNSUPPORTED)
-               return -ENOENT;
-
-       return ret;
-}
-
-static int
-armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
-{
-       int mapping;
-
-       if (config >= PERF_COUNT_HW_MAX)
-               return -EINVAL;
-
-       mapping = (*event_map)[config];
-       return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int
-armpmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
-       return (int)(config & raw_event_mask);
-}
-
-static int map_cpu_event(struct perf_event *event,
-                        const unsigned (*event_map)[PERF_COUNT_HW_MAX],
-                        const unsigned (*cache_map)
-                                       [PERF_COUNT_HW_CACHE_MAX]
-                                       [PERF_COUNT_HW_CACHE_OP_MAX]
-                                       [PERF_COUNT_HW_CACHE_RESULT_MAX],
-                        u32 raw_event_mask)
-{
-       u64 config = event->attr.config;
-
-       switch (event->attr.type) {
-       case PERF_TYPE_HARDWARE:
-               return armpmu_map_event(event_map, config);
-       case PERF_TYPE_HW_CACHE:
-               return armpmu_map_cache_event(cache_map, config);
-       case PERF_TYPE_RAW:
-               return armpmu_map_raw_event(raw_event_mask, config);
-       }
-
-       return -ENOENT;
-}
-
-int
-armpmu_event_set_period(struct perf_event *event,
-                       struct hw_perf_event *hwc,
-                       int idx)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       s64 left = local64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
-       int ret = 0;
-
-       if (unlikely(left <= -period)) {
-               left = period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-
-       if (unlikely(left <= 0)) {
-               left += period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-
-       /*
-        * Limit the maximum period to prevent the counter value
-        * from overtaking the one we are about to program. In
-        * effect we are reducing max_period to account for
-        * interrupt latency (and we are being very conservative).
-        */
-       if (left > (armpmu->max_period >> 1))
-               left = armpmu->max_period >> 1;
-
-       local64_set(&hwc->prev_count, (u64)-left);
-
-       armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
-
-       perf_event_update_userpage(event);
-
-       return ret;
-}
-
-u64
-armpmu_event_update(struct perf_event *event,
-                   struct hw_perf_event *hwc,
-                   int idx)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       u64 delta, prev_raw_count, new_raw_count;
-
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       new_raw_count = armpmu->read_counter(idx);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                            new_raw_count) != prev_raw_count)
-               goto again;
-
-       delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
-
-       local64_add(delta, &event->count);
-       local64_sub(delta, &hwc->period_left);
-
-       return new_raw_count;
-}
-
-static void
-armpmu_read(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       /* Don't read disabled counters! */
-       if (hwc->idx < 0)
-               return;
-
-       armpmu_event_update(event, hwc, hwc->idx);
-}
-
-static void
-armpmu_stop(struct perf_event *event, int flags)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       struct hw_perf_event *hwc = &event->hw;
-
-       /*
-        * ARM pmu always has to update the counter, so ignore
-        * PERF_EF_UPDATE, see comments in armpmu_start().
-        */
-       if (!(hwc->state & PERF_HES_STOPPED)) {
-               armpmu->disable(hwc, hwc->idx);
-               barrier(); /* why? */
-               armpmu_event_update(event, hwc, hwc->idx);
-               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-       }
-}
-
-static void
-armpmu_start(struct perf_event *event, int flags)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       struct hw_perf_event *hwc = &event->hw;
-
-       /*
-        * ARM pmu always has to reprogram the period, so ignore
-        * PERF_EF_RELOAD, see the comment below.
-        */
-       if (flags & PERF_EF_RELOAD)
-               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-       hwc->state = 0;
-       /*
-        * Set the period again. Some counters can't be stopped, so when we
-        * were stopped we simply disabled the IRQ source and the counter
-        * may have been left counting. If we don't do this step then we may
-        * get an interrupt too soon or *way* too late if the overflow has
-        * happened since disabling.
-        */
-       armpmu_event_set_period(event, hwc, hwc->idx);
-       armpmu->enable(hwc, hwc->idx);
-}
-
-static void
-armpmu_del(struct perf_event *event, int flags)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-       struct hw_perf_event *hwc = &event->hw;
-       int idx = hwc->idx;
-
-       WARN_ON(idx < 0);
-
-       armpmu_stop(event, PERF_EF_UPDATE);
-       hw_events->events[idx] = NULL;
-       clear_bit(idx, hw_events->used_mask);
-
-       perf_event_update_userpage(event);
-}
-
-static int
-armpmu_add(struct perf_event *event, int flags)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-       struct hw_perf_event *hwc = &event->hw;
-       int idx;
-       int err = 0;
-
-       perf_pmu_disable(event->pmu);
-
-       /* If we don't have a space for the counter then finish early. */
-       idx = armpmu->get_event_idx(hw_events, hwc);
-       if (idx < 0) {
-               err = idx;
-               goto out;
-       }
-
-       /*
-        * If there is an event in the counter we are going to use then make
-        * sure it is disabled.
-        */
-       event->hw.idx = idx;
-       armpmu->disable(hwc, idx);
-       hw_events->events[idx] = event;
-
-       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-       if (flags & PERF_EF_START)
-               armpmu_start(event, PERF_EF_RELOAD);
-
-       /* Propagate our changes to the userspace mapping. */
-       perf_event_update_userpage(event);
-
-out:
-       perf_pmu_enable(event->pmu);
-       return err;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
-                               struct perf_event *event)
-{
-       struct arm_pmu *armpmu;
-       struct hw_perf_event fake_event = event->hw;
-       struct pmu *leader_pmu = event->group_leader->pmu;
-
-       if (is_software_event(event))
-               return 1;
-
-       /*
-        * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
-        * core perf code won't check that the pmu->ctx == leader->ctx
-        * until after pmu->event_init(event).
-        */
-       if (event->pmu != pmu)
-               return 0;
-
-       if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
-               return 1;
-
-       if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
-               return 1;
-
-       armpmu = to_arm_pmu(event->pmu);
-       return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
-}
-
-static int
-validate_group(struct perf_event *event)
-{
-       struct perf_event *sibling, *leader = event->group_leader;
-       struct pmu_hw_events fake_pmu;
-       DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
-
-       /*
-        * Initialise the fake PMU. We only need to populate the
-        * used_mask for the purposes of validation.
-        */
-       memset(fake_used_mask, 0, sizeof(fake_used_mask));
-       fake_pmu.used_mask = fake_used_mask;
-
-       if (!validate_event(event->pmu, &fake_pmu, leader))
-               return -EINVAL;
-
-       list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-               if (!validate_event(event->pmu, &fake_pmu, sibling))
-                       return -EINVAL;
-       }
-
-       if (!validate_event(event->pmu, &fake_pmu, event))
-               return -EINVAL;
-
-       return 0;
-}
-
-static void
-armpmu_disable_percpu_irq(void *data)
-{
-       unsigned int irq = *(unsigned int *)data;
-       disable_percpu_irq(irq);
-}
-
-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
-       int irq;
-       unsigned int i, irqs;
-       struct platform_device *pmu_device = armpmu->plat_device;
-
-       irqs = min(pmu_device->num_resources, num_possible_cpus());
-       if (!irqs)
-               return;
-
-       irq = platform_get_irq(pmu_device, 0);
-       if (irq <= 0)
-               return;
-
-       if (irq_is_percpu(irq)) {
-               on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
-               free_percpu_irq(irq, &cpu_hw_events);
-       } else {
-               for (i = 0; i < irqs; ++i) {
-                       int cpu = i;
-
-                       if (armpmu->irq_affinity)
-                               cpu = armpmu->irq_affinity[i];
-
-                       if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
-                               continue;
-                       irq = platform_get_irq(pmu_device, i);
-                       if (irq > 0)
-                               free_irq(irq, armpmu);
-               }
-       }
-}
-
-static void
-armpmu_enable_percpu_irq(void *data)
-{
-       unsigned int irq = *(unsigned int *)data;
-       enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
-       int err, irq;
-       unsigned int i, irqs;
-       struct platform_device *pmu_device = armpmu->plat_device;
-
-       if (!pmu_device)
-               return -ENODEV;
-
-       irqs = min(pmu_device->num_resources, num_possible_cpus());
-       if (!irqs) {
-               pr_err("no irqs for PMUs defined\n");
-               return -ENODEV;
-       }
-
-       irq = platform_get_irq(pmu_device, 0);
-       if (irq <= 0) {
-               pr_err("failed to get valid irq for PMU device\n");
-               return -ENODEV;
-       }
-
-       if (irq_is_percpu(irq)) {
-               err = request_percpu_irq(irq, armpmu->handle_irq,
-                               "arm-pmu", &cpu_hw_events);
-
-               if (err) {
-                       pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
-                                       irq);
-                       armpmu_release_hardware(armpmu);
-                       return err;
-               }
-
-               on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
-       } else {
-               for (i = 0; i < irqs; ++i) {
-                       int cpu = i;
-
-                       err = 0;
-                       irq = platform_get_irq(pmu_device, i);
-                       if (irq <= 0)
-                               continue;
-
-                       if (armpmu->irq_affinity)
-                               cpu = armpmu->irq_affinity[i];
-
-                       /*
-                        * If we have a single PMU interrupt that we can't shift,
-                        * assume that we're running on a uniprocessor machine and
-                        * continue. Otherwise, continue without this interrupt.
-                        */
-                       if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-                               pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-                                               irq, cpu);
-                               continue;
-                       }
-
-                       err = request_irq(irq, armpmu->handle_irq,
-                                       IRQF_NOBALANCING | IRQF_NO_THREAD,
-                                       "arm-pmu", armpmu);
-                       if (err) {
-                               pr_err("unable to request IRQ%d for ARM PMU counters\n",
-                                               irq);
-                               armpmu_release_hardware(armpmu);
-                               return err;
-                       }
-
-                       cpumask_set_cpu(cpu, &armpmu->active_irqs);
-               }
-       }
-
-       return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       atomic_t *active_events  = &armpmu->active_events;
-       struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
-       if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
-               armpmu_release_hardware(armpmu);
-               mutex_unlock(pmu_reserve_mutex);
-       }
-}
-
-static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
-       return attr->exclude_idle || attr->exclude_user ||
-              attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
-__hw_perf_event_init(struct perf_event *event)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       struct hw_perf_event *hwc = &event->hw;
-       int mapping, err;
-
-       mapping = armpmu->map_event(event);
-
-       if (mapping < 0) {
-               pr_debug("event %x:%llx not supported\n", event->attr.type,
-                        event->attr.config);
-               return mapping;
-       }
-
-       /*
-        * We don't assign an index until we actually place the event onto
-        * hardware. Use -1 to signify that we haven't decided where to put it
-        * yet. For SMP systems, each core has it's own PMU so we can't do any
-        * clever allocation or constraints checking at this point.
-        */
-       hwc->idx                = -1;
-       hwc->config_base        = 0;
-       hwc->config             = 0;
-       hwc->event_base         = 0;
-
-       /*
-        * Check whether we need to exclude the counter from certain modes.
-        */
-       if ((!armpmu->set_event_filter ||
-            armpmu->set_event_filter(hwc, &event->attr)) &&
-            event_requires_mode_exclusion(&event->attr)) {
-               pr_debug("ARM performance counters do not support mode exclusion\n");
-               return -EPERM;
-       }
-
-       /*
-        * Store the event encoding into the config_base field.
-        */
-       hwc->config_base            |= (unsigned long)mapping;
-
-       if (!hwc->sample_period) {
-               /*
-                * For non-sampling runs, limit the sample_period to half
-                * of the counter width. That way, the new counter value
-                * is far less likely to overtake the previous one unless
-                * you have some serious IRQ latency issues.
-                */
-               hwc->sample_period  = armpmu->max_period >> 1;
-               hwc->last_period    = hwc->sample_period;
-               local64_set(&hwc->period_left, hwc->sample_period);
-       }
-
-       err = 0;
-       if (event->group_leader != event) {
-               err = validate_group(event);
-               if (err)
-                       return -EINVAL;
-       }
-
-       return err;
-}
-
-static int armpmu_event_init(struct perf_event *event)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-       int err = 0;
-       atomic_t *active_events = &armpmu->active_events;
-
-       if (armpmu->map_event(event) == -ENOENT)
-               return -ENOENT;
-
-       event->destroy = hw_perf_event_destroy;
-
-       if (!atomic_inc_not_zero(active_events)) {
-               mutex_lock(&armpmu->reserve_mutex);
-               if (atomic_read(active_events) == 0)
-                       err = armpmu_reserve_hardware(armpmu);
-
-               if (!err)
-                       atomic_inc(active_events);
-               mutex_unlock(&armpmu->reserve_mutex);
-       }
 
-       if (err)
-               return err;
-
-       err = __hw_perf_event_init(event);
-       if (err)
-               hw_perf_event_destroy(event);
-
-       return err;
-}
-
-static void armpmu_enable(struct pmu *pmu)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(pmu);
-       struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
-
-       if (enabled)
-               armpmu->start();
-}
-
-static void armpmu_disable(struct pmu *pmu)
-{
-       struct arm_pmu *armpmu = to_arm_pmu(pmu);
-       armpmu->stop();
-}
-
-static void __init armpmu_init(struct arm_pmu *armpmu)
-{
-       atomic_set(&armpmu->active_events, 0);
-       mutex_init(&armpmu->reserve_mutex);
-
-       armpmu->pmu = (struct pmu) {
-               .pmu_enable     = armpmu_enable,
-               .pmu_disable    = armpmu_disable,
-               .event_init     = armpmu_event_init,
-               .add            = armpmu_add,
-               .del            = armpmu_del,
-               .start          = armpmu_start,
-               .stop           = armpmu_stop,
-               .read           = armpmu_read,
-       };
-}
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
-{
-       armpmu_init(armpmu);
-       return perf_pmu_register(&armpmu->pmu, name, type);
-}
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
 
 /*
  * ARMv8 PMUv3 Performance Events handling code.
@@ -708,6 +69,21 @@ enum armv8_pmuv3_perf_types {
        ARMV8_PMUV3_PERFCTR_BUS_CYCLES                          = 0x1D,
 };
 
+/* ARMv8 Cortex-A53 specific event types. */
+enum armv8_a53_pmu_perf_types {
+       ARMV8_A53_PERFCTR_PREFETCH_LINEFILL                     = 0xC2,
+};
+
+/* ARMv8 Cortex-A57 specific event types. */
+enum armv8_a57_perf_types {
+       ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD                   = 0x40,
+       ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST                   = 0x41,
+       ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD                   = 0x42,
+       ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST                   = 0x43,
+       ARMV8_A57_PERFCTR_DTLB_REFILL_LD                        = 0x4c,
+       ARMV8_A57_PERFCTR_DTLB_REFILL_ST                        = 0x4d,
+};
+
 /* PMUv3 HW events mapping. */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
        PERF_MAP_ALL_UNSUPPORTED,
@@ -718,6 +94,28 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
        [PERF_COUNT_HW_BRANCH_MISSES]           = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+/* ARM Cortex-A53 HW events mapping. */
+static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
+       PERF_MAP_ALL_UNSUPPORTED,
+       [PERF_COUNT_HW_CPU_CYCLES]              = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+       [PERF_COUNT_HW_INSTRUCTIONS]            = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+       [PERF_COUNT_HW_CACHE_MISSES]            = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = ARMV8_PMUV3_PERFCTR_PC_WRITE,
+       [PERF_COUNT_HW_BRANCH_MISSES]           = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+       [PERF_COUNT_HW_BUS_CYCLES]              = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
+       PERF_MAP_ALL_UNSUPPORTED,
+       [PERF_COUNT_HW_CPU_CYCLES]              = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+       [PERF_COUNT_HW_INSTRUCTIONS]            = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+       [PERF_COUNT_HW_CACHE_MISSES]            = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+       [PERF_COUNT_HW_BRANCH_MISSES]           = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+       [PERF_COUNT_HW_BUS_CYCLES]              = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
                                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                                [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -734,12 +132,60 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
        [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+                                             [PERF_COUNT_HW_CACHE_OP_MAX]
+                                             [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+       [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]  = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+       [C(L1D)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+       [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+       [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+       [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
+
+       [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]  = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+       [C(L1I)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+       [C(ITLB)][C(OP_READ)][C(RESULT_MISS)]   = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+       [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]  = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+       [C(BPU)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+       [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+       [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+                                             [PERF_COUNT_HW_CACHE_OP_MAX]
+                                             [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+       [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]  = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
+       [C(L1D)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
+       [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
+       [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
+
+       [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]  = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+       [C(L1I)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+       [C(DTLB)][C(OP_READ)][C(RESULT_MISS)]   = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
+       [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]  = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
+
+       [C(ITLB)][C(OP_READ)][C(RESULT_MISS)]   = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+       [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]  = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+       [C(BPU)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+       [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+       [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+
 /*
  * Perf Events' indices
  */
 #define        ARMV8_IDX_CYCLE_COUNTER 0
 #define        ARMV8_IDX_COUNTER0      1
-#define        ARMV8_IDX_COUNTER_LAST  (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+#define        ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
+       (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
 #define        ARMV8_MAX_COUNTERS      32
 #define        ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
@@ -805,49 +251,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr)
        return pmovsr & ARMV8_OVERFLOWED_MASK;
 }
 
-static inline int armv8pmu_counter_valid(int idx)
+static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
 {
-       return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
+       return idx >= ARMV8_IDX_CYCLE_COUNTER &&
+               idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
 }
 
 static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 {
-       int ret = 0;
-       u32 counter;
-
-       if (!armv8pmu_counter_valid(idx)) {
-               pr_err("CPU%u checking wrong counter %d overflow status\n",
-                       smp_processor_id(), idx);
-       } else {
-               counter = ARMV8_IDX_TO_COUNTER(idx);
-               ret = pmnc & BIT(counter);
-       }
-
-       return ret;
+       return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
 }
 
 static inline int armv8pmu_select_counter(int idx)
 {
-       u32 counter;
-
-       if (!armv8pmu_counter_valid(idx)) {
-               pr_err("CPU%u selecting wrong PMNC counter %d\n",
-                       smp_processor_id(), idx);
-               return -EINVAL;
-       }
-
-       counter = ARMV8_IDX_TO_COUNTER(idx);
+       u32 counter = ARMV8_IDX_TO_COUNTER(idx);
        asm volatile("msr pmselr_el0, %0" :: "r" (counter));
        isb();
 
        return idx;
 }
 
-static inline u32 armv8pmu_read_counter(int idx)
+static inline u32 armv8pmu_read_counter(struct perf_event *event)
 {
+       struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
        u32 value = 0;
 
-       if (!armv8pmu_counter_valid(idx))
+       if (!armv8pmu_counter_valid(cpu_pmu, idx))
                pr_err("CPU%u reading wrong counter %d\n",
                        smp_processor_id(), idx);
        else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -858,9 +289,13 @@ static inline u32 armv8pmu_read_counter(int idx)
        return value;
 }
 
-static inline void armv8pmu_write_counter(int idx, u32 value)
+static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
 {
-       if (!armv8pmu_counter_valid(idx))
+       struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       if (!armv8pmu_counter_valid(cpu_pmu, idx))
                pr_err("CPU%u writing wrong counter %d\n",
                        smp_processor_id(), idx);
        else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -879,65 +314,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val)
 
 static inline int armv8pmu_enable_counter(int idx)
 {
-       u32 counter;
-
-       if (!armv8pmu_counter_valid(idx)) {
-               pr_err("CPU%u enabling wrong PMNC counter %d\n",
-                       smp_processor_id(), idx);
-               return -EINVAL;
-       }
-
-       counter = ARMV8_IDX_TO_COUNTER(idx);
+       u32 counter = ARMV8_IDX_TO_COUNTER(idx);
        asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
        return idx;
 }
 
 static inline int armv8pmu_disable_counter(int idx)
 {
-       u32 counter;
-
-       if (!armv8pmu_counter_valid(idx)) {
-               pr_err("CPU%u disabling wrong PMNC counter %d\n",
-                       smp_processor_id(), idx);
-               return -EINVAL;
-       }
-
-       counter = ARMV8_IDX_TO_COUNTER(idx);
+       u32 counter = ARMV8_IDX_TO_COUNTER(idx);
        asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
        return idx;
 }
 
 static inline int armv8pmu_enable_intens(int idx)
 {
-       u32 counter;
-
-       if (!armv8pmu_counter_valid(idx)) {
-               pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
-                       smp_processor_id(), idx);
-               return -EINVAL;
-       }
-
-       counter = ARMV8_IDX_TO_COUNTER(idx);
+       u32 counter = ARMV8_IDX_TO_COUNTER(idx);
        asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
        return idx;
 }
 
 static inline int armv8pmu_disable_intens(int idx)
 {
-       u32 counter;
-
-       if (!armv8pmu_counter_valid(idx)) {
-               pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
-                       smp_processor_id(), idx);
-               return -EINVAL;
-       }
-
-       counter = ARMV8_IDX_TO_COUNTER(idx);
+       u32 counter = ARMV8_IDX_TO_COUNTER(idx);
        asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
        isb();
        /* Clear the overflow flag in case an interrupt is pending. */
        asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
        isb();
+
        return idx;
 }
 
@@ -955,10 +359,13 @@ static inline u32 armv8pmu_getreset_flags(void)
        return value;
 }
 
-static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_enable_event(struct perf_event *event)
 {
        unsigned long flags;
-       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+       struct hw_perf_event *hwc = &event->hw;
+       struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+       struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+       int idx = hwc->idx;
 
        /*
         * Enable counter and interrupt, and set the counter to count
@@ -989,10 +396,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_disable_event(struct perf_event *event)
 {
        unsigned long flags;
-       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+       struct hw_perf_event *hwc = &event->hw;
+       struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+       struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+       int idx = hwc->idx;
 
        /*
         * Disable counter and interrupt
@@ -1016,7 +426,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 {
        u32 pmovsr;
        struct perf_sample_data data;
-       struct pmu_hw_events *cpuc;
+       struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+       struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
        struct pt_regs *regs;
        int idx;
 
@@ -1036,7 +447,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
         */
        regs = get_irq_regs();
 
-       cpuc = this_cpu_ptr(&cpu_hw_events);
        for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
                struct perf_event *event = cpuc->events[idx];
                struct hw_perf_event *hwc;
@@ -1053,13 +463,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
                        continue;
 
                hwc = &event->hw;
-               armpmu_event_update(event, hwc, idx);
+               armpmu_event_update(event);
                perf_sample_data_init(&data, 0, hwc->last_period);
-               if (!armpmu_event_set_period(event, hwc, idx))
+               if (!armpmu_event_set_period(event))
                        continue;
 
                if (perf_event_overflow(event, &data, regs))
-                       cpu_pmu->disable(hwc, idx);
+                       cpu_pmu->disable(event);
        }
 
        /*
@@ -1074,10 +484,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
        return IRQ_HANDLED;
 }
 
-static void armv8pmu_start(void)
+static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 {
        unsigned long flags;
-       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+       struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
        raw_spin_lock_irqsave(&events->pmu_lock, flags);
        /* Enable all counters */
@@ -1085,10 +495,10 @@ static void armv8pmu_start(void)
        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv8pmu_stop(void)
+static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
 {
        unsigned long flags;
-       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+       struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
        raw_spin_lock_irqsave(&events->pmu_lock, flags);
        /* Disable all counters */
@@ -1097,10 +507,12 @@ static void armv8pmu_stop(void)
 }
 
 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
-                                 struct hw_perf_event *event)
+                                 struct perf_event *event)
 {
        int idx;
-       unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
+       struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
 
        /* Always place a cycle counter into the cycle counter. */
        if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
@@ -1151,11 +563,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 
 static void armv8pmu_reset(void *info)
 {
+       struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
        u32 idx, nb_cnt = cpu_pmu->num_events;
 
        /* The counter and interrupt enable registers are unknown at reset. */
-       for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
-               armv8pmu_disable_event(NULL, idx);
+       for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+               armv8pmu_disable_counter(idx);
+               armv8pmu_disable_intens(idx);
+       }
 
        /* Initialize & Reset PMNC: C and P bits. */
        armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
@@ -1166,169 +581,104 @@ static void armv8pmu_reset(void *info)
 
 static int armv8_pmuv3_map_event(struct perf_event *event)
 {
-       return map_cpu_event(event, &armv8_pmuv3_perf_map,
+       return armpmu_map_event(event, &armv8_pmuv3_perf_map,
                                &armv8_pmuv3_perf_cache_map,
                                ARMV8_EVTYPE_EVENT);
 }
 
-static struct arm_pmu armv8pmu = {
-       .handle_irq             = armv8pmu_handle_irq,
-       .enable                 = armv8pmu_enable_event,
-       .disable                = armv8pmu_disable_event,
-       .read_counter           = armv8pmu_read_counter,
-       .write_counter          = armv8pmu_write_counter,
-       .get_event_idx          = armv8pmu_get_event_idx,
-       .start                  = armv8pmu_start,
-       .stop                   = armv8pmu_stop,
-       .reset                  = armv8pmu_reset,
-       .max_period             = (1LLU << 32) - 1,
-};
+static int armv8_a53_map_event(struct perf_event *event)
+{
+       return armpmu_map_event(event, &armv8_a53_perf_map,
+                               &armv8_a53_perf_cache_map,
+                               ARMV8_EVTYPE_EVENT);
+}
 
-static u32 __init armv8pmu_read_num_pmnc_events(void)
+static int armv8_a57_map_event(struct perf_event *event)
 {
-       u32 nb_cnt;
+       return armpmu_map_event(event, &armv8_a57_perf_map,
+                               &armv8_a57_perf_cache_map,
+                               ARMV8_EVTYPE_EVENT);
+}
+
+static void armv8pmu_read_num_pmnc_events(void *info)
+{
+       int *nb_cnt = info;
 
        /* Read the nb of CNTx counters supported from PMNC */
-       nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
+       *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
 
-       /* Add the CPU cycles counter and return */
-       return nb_cnt + 1;
+       /* Add the CPU cycles counter */
+       *nb_cnt += 1;
 }
 
-static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
+static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
 {
-       armv8pmu.name                   = "arm/armv8-pmuv3";
-       armv8pmu.map_event              = armv8_pmuv3_map_event;
-       armv8pmu.num_events             = armv8pmu_read_num_pmnc_events();
-       armv8pmu.set_event_filter       = armv8pmu_set_event_filter;
-       return &armv8pmu;
+       return smp_call_function_any(&arm_pmu->supported_cpus,
+                                   armv8pmu_read_num_pmnc_events,
+                                   &arm_pmu->num_events, 1);
 }
 
-/*
- * Ensure the PMU has sane values out of reset.
- * This requires SMP to be available, so exists as a separate initcall.
- */
-static int __init
-cpu_pmu_reset(void)
+static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
 {
-       if (cpu_pmu && cpu_pmu->reset)
-               return on_each_cpu(cpu_pmu->reset, NULL, 1);
-       return 0;
+       cpu_pmu->handle_irq             = armv8pmu_handle_irq,
+       cpu_pmu->enable                 = armv8pmu_enable_event,
+       cpu_pmu->disable                = armv8pmu_disable_event,
+       cpu_pmu->read_counter           = armv8pmu_read_counter,
+       cpu_pmu->write_counter          = armv8pmu_write_counter,
+       cpu_pmu->get_event_idx          = armv8pmu_get_event_idx,
+       cpu_pmu->start                  = armv8pmu_start,
+       cpu_pmu->stop                   = armv8pmu_stop,
+       cpu_pmu->reset                  = armv8pmu_reset,
+       cpu_pmu->max_period             = (1LLU << 32) - 1,
+       cpu_pmu->set_event_filter       = armv8pmu_set_event_filter;
 }
-arch_initcall(cpu_pmu_reset);
-
-/*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id armpmu_of_device_ids[] = {
-       {.compatible = "arm,armv8-pmuv3"},
-       {},
-};
 
-static int armpmu_device_probe(struct platform_device *pdev)
+static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 {
-       int i, irq, *irqs;
-
-       if (!cpu_pmu)
-               return -ENODEV;
-
-       /* Don't bother with PPIs; they're already affine */
-       irq = platform_get_irq(pdev, 0);
-       if (irq >= 0 && irq_is_percpu(irq))
-               goto out;
-
-       irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-       if (!irqs)
-               return -ENOMEM;
-
-       for (i = 0; i < pdev->num_resources; ++i) {
-               struct device_node *dn;
-               int cpu;
-
-               dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
-                                     i);
-               if (!dn) {
-                       pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-                               of_node_full_name(pdev->dev.of_node), i);
-                       break;
-               }
-
-               for_each_possible_cpu(cpu)
-                       if (dn == of_cpu_device_node_get(cpu))
-                               break;
-
-               if (cpu >= nr_cpu_ids) {
-                       pr_warn("Failed to find logical CPU for %s\n",
-                               dn->name);
-                       of_node_put(dn);
-                       break;
-               }
-               of_node_put(dn);
-
-               irqs[i] = cpu;
-       }
-
-       if (i == pdev->num_resources)
-               cpu_pmu->irq_affinity = irqs;
-       else
-               kfree(irqs);
-
-out:
-       cpu_pmu->plat_device = pdev;
-       return 0;
+       armv8_pmu_init(cpu_pmu);
+       cpu_pmu->name                   = "armv8_pmuv3";
+       cpu_pmu->map_event              = armv8_pmuv3_map_event;
+       return armv8pmu_probe_num_events(cpu_pmu);
 }
 
-static struct platform_driver armpmu_driver = {
-       .driver         = {
-               .name   = "arm-pmu",
-               .of_match_table = armpmu_of_device_ids,
-       },
-       .probe          = armpmu_device_probe,
-};
-
-static int __init register_pmu_driver(void)
+static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 {
-       return platform_driver_register(&armpmu_driver);
+       armv8_pmu_init(cpu_pmu);
+       cpu_pmu->name                   = "armv8_cortex_a53";
+       cpu_pmu->map_event              = armv8_a53_map_event;
+       return armv8pmu_probe_num_events(cpu_pmu);
 }
-device_initcall(register_pmu_driver);
 
-static struct pmu_hw_events *armpmu_get_cpu_events(void)
+static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
 {
-       return this_cpu_ptr(&cpu_hw_events);
+       armv8_pmu_init(cpu_pmu);
+       cpu_pmu->name                   = "armv8_cortex_a57";
+       cpu_pmu->map_event              = armv8_a57_map_event;
+       return armv8pmu_probe_num_events(cpu_pmu);
 }
 
-static void __init cpu_pmu_init(struct arm_pmu *armpmu)
-{
-       int cpu;
-       for_each_possible_cpu(cpu) {
-               struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-               events->events = per_cpu(hw_events, cpu);
-               events->used_mask = per_cpu(used_mask, cpu);
-               raw_spin_lock_init(&events->pmu_lock);
-       }
-       armpmu->get_hw_events = armpmu_get_cpu_events;
-}
+static const struct of_device_id armv8_pmu_of_device_ids[] = {
+       {.compatible = "arm,armv8-pmuv3",       .data = armv8_pmuv3_init},
+       {.compatible = "arm,cortex-a53-pmu",    .data = armv8_a53_pmu_init},
+       {.compatible = "arm,cortex-a57-pmu",    .data = armv8_a57_pmu_init},
+       {},
+};
 
-static int __init init_hw_perf_events(void)
+static int armv8_pmu_device_probe(struct platform_device *pdev)
 {
-       u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-
-       switch ((dfr >> 8) & 0xf) {
-       case 0x1:       /* PMUv3 */
-               cpu_pmu = armv8_pmuv3_pmu_init();
-               break;
-       }
+       return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+}
 
-       if (cpu_pmu) {
-               pr_info("enabled with %s PMU driver, %d counters available\n",
-                       cpu_pmu->name, cpu_pmu->num_events);
-               cpu_pmu_init(cpu_pmu);
-               armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
-       } else {
-               pr_info("no hardware support available\n");
-       }
+static struct platform_driver armv8_pmu_driver = {
+       .driver         = {
+               .name   = "armv8-pmu",
+               .of_match_table = armv8_pmu_of_device_ids,
+       },
+       .probe          = armv8_pmu_device_probe,
+};
 
-       return 0;
+static int __init register_armv8_pmu_driver(void)
+{
+       return platform_driver_register(&armv8_pmu_driver);
 }
-early_initcall(init_hw_perf_events);
-
+device_initcall(register_armv8_pmu_driver);
index 223b093c9440933f46fb2aa4cbc83729be1e6f57..f75b540bc3b4b0daae4a8773cd0eddf1a86a90aa 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/personality.h>
 #include <linux/notifier.h>
+#include <trace/events/power.h>
 
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
@@ -75,8 +76,10 @@ void arch_cpu_idle(void)
         * This should do all the clock switching and wait for interrupt
         * tricks
         */
+       trace_cpu_idle_rcuidle(1, smp_processor_id());
        cpu_do_idle();
        local_irq_enable();
+       trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
index 232247945b1c215c25fbfd708573fe3def5c68c5..8119479147db147c33800f76aa0d07c6072e8559 100644 (file)
@@ -28,7 +28,6 @@
 #include <linux/console.h>
 #include <linux/cache.h>
 #include <linux/bootmem.h>
-#include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
@@ -44,7 +43,6 @@
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/efi.h>
-#include <linux/personality.h>
 #include <linux/psci.h>
 
 #include <asm/acpi.h>
@@ -54,6 +52,7 @@
 #include <asm/elf.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
+#include <asm/kasan.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
 
-unsigned long elf_hwcap __read_mostly;
-EXPORT_SYMBOL_GPL(elf_hwcap);
-
-#ifdef CONFIG_COMPAT
-#define COMPAT_ELF_HWCAP_DEFAULT       \
-                               (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
-                                COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
-                                COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
-                                COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-                                COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
-                                COMPAT_HWCAP_LPAE)
-unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
-unsigned int compat_elf_hwcap2 __read_mostly;
-#endif
-
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void)
        __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
 }
 
-static void __init setup_processor(void)
-{
-       u64 features;
-       s64 block;
-       u32 cwg;
-       int cls;
-
-       printk("CPU: AArch64 Processor [%08x] revision %d\n",
-              read_cpuid_id(), read_cpuid_id() & 15);
-
-       sprintf(init_utsname()->machine, ELF_PLATFORM);
-       elf_hwcap = 0;
-
-       cpuinfo_store_boot_cpu();
-
-       /*
-        * Check for sane CTR_EL0.CWG value.
-        */
-       cwg = cache_type_cwg();
-       cls = cache_line_size();
-       if (!cwg)
-               pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
-                       cls);
-       if (L1_CACHE_BYTES < cls)
-               pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
-                       L1_CACHE_BYTES, cls);
-
-       /*
-        * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
-        * The blocks we test below represent incremental functionality
-        * for non-negative values. Negative values are reserved.
-        */
-       features = read_cpuid(ID_AA64ISAR0_EL1);
-       block = cpuid_feature_extract_field(features, 4);
-       if (block > 0) {
-               switch (block) {
-               default:
-               case 2:
-                       elf_hwcap |= HWCAP_PMULL;
-               case 1:
-                       elf_hwcap |= HWCAP_AES;
-               case 0:
-                       break;
-               }
-       }
-
-       if (cpuid_feature_extract_field(features, 8) > 0)
-               elf_hwcap |= HWCAP_SHA1;
-
-       if (cpuid_feature_extract_field(features, 12) > 0)
-               elf_hwcap |= HWCAP_SHA2;
-
-       if (cpuid_feature_extract_field(features, 16) > 0)
-               elf_hwcap |= HWCAP_CRC32;
-
-       block = cpuid_feature_extract_field(features, 20);
-       if (block > 0) {
-               switch (block) {
-               default:
-               case 2:
-                       elf_hwcap |= HWCAP_ATOMICS;
-               case 1:
-                       /* RESERVED */
-               case 0:
-                       break;
-               }
-       }
-
-#ifdef CONFIG_COMPAT
-       /*
-        * ID_ISAR5_EL1 carries similar information as above, but pertaining to
-        * the AArch32 32-bit execution state.
-        */
-       features = read_cpuid(ID_ISAR5_EL1);
-       block = cpuid_feature_extract_field(features, 4);
-       if (block > 0) {
-               switch (block) {
-               default:
-               case 2:
-                       compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
-               case 1:
-                       compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
-               case 0:
-                       break;
-               }
-       }
-
-       if (cpuid_feature_extract_field(features, 8) > 0)
-               compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
-
-       if (cpuid_feature_extract_field(features, 12) > 0)
-               compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
-
-       if (cpuid_feature_extract_field(features, 16) > 0)
-               compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
-#endif
-}
-
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
 {
        void *dt_virt = fixmap_remap_fdt(dt_phys);
@@ -406,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)
 {
-       setup_processor();
+       pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
 
+       sprintf(init_utsname()->machine, ELF_PLATFORM);
        init_mm.start_code = (unsigned long) _text;
        init_mm.end_code   = (unsigned long) _etext;
        init_mm.end_data   = (unsigned long) _edata;
@@ -436,6 +321,9 @@ void __init setup_arch(char **cmdline_p)
 
        paging_init();
        relocate_initrd();
+
+       kasan_init();
+
        request_standard_resources();
 
        early_ioremap_reset();
@@ -493,124 +381,3 @@ static int __init topology_init(void)
        return 0;
 }
 subsys_initcall(topology_init);
-
-static const char *hwcap_str[] = {
-       "fp",
-       "asimd",
-       "evtstrm",
-       "aes",
-       "pmull",
-       "sha1",
-       "sha2",
-       "crc32",
-       "atomics",
-       NULL
-};
-
-#ifdef CONFIG_COMPAT
-static const char *compat_hwcap_str[] = {
-       "swp",
-       "half",
-       "thumb",
-       "26bit",
-       "fastmult",
-       "fpa",
-       "vfp",
-       "edsp",
-       "java",
-       "iwmmxt",
-       "crunch",
-       "thumbee",
-       "neon",
-       "vfpv3",
-       "vfpv3d16",
-       "tls",
-       "vfpv4",
-       "idiva",
-       "idivt",
-       "vfpd32",
-       "lpae",
-       "evtstrm"
-};
-
-static const char *compat_hwcap2_str[] = {
-       "aes",
-       "pmull",
-       "sha1",
-       "sha2",
-       "crc32",
-       NULL
-};
-#endif /* CONFIG_COMPAT */
-
-static int c_show(struct seq_file *m, void *v)
-{
-       int i, j;
-
-       for_each_online_cpu(i) {
-               struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
-               u32 midr = cpuinfo->reg_midr;
-
-               /*
-                * glibc reads /proc/cpuinfo to determine the number of
-                * online processors, looking for lines beginning with
-                * "processor".  Give glibc what it expects.
-                */
-               seq_printf(m, "processor\t: %d\n", i);
-
-               /*
-                * Dump out the common processor features in a single line.
-                * Userspace should read the hwcaps with getauxval(AT_HWCAP)
-                * rather than attempting to parse this, but there's a body of
-                * software which does already (at least for 32-bit).
-                */
-               seq_puts(m, "Features\t:");
-               if (personality(current->personality) == PER_LINUX32) {
-#ifdef CONFIG_COMPAT
-                       for (j = 0; compat_hwcap_str[j]; j++)
-                               if (compat_elf_hwcap & (1 << j))
-                                       seq_printf(m, " %s", compat_hwcap_str[j]);
-
-                       for (j = 0; compat_hwcap2_str[j]; j++)
-                               if (compat_elf_hwcap2 & (1 << j))
-                                       seq_printf(m, " %s", compat_hwcap2_str[j]);
-#endif /* CONFIG_COMPAT */
-               } else {
-                       for (j = 0; hwcap_str[j]; j++)
-                               if (elf_hwcap & (1 << j))
-                                       seq_printf(m, " %s", hwcap_str[j]);
-               }
-               seq_puts(m, "\n");
-
-               seq_printf(m, "CPU implementer\t: 0x%02x\n",
-                          MIDR_IMPLEMENTOR(midr));
-               seq_printf(m, "CPU architecture: 8\n");
-               seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
-               seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
-               seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
-       }
-
-       return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-       return *pos < 1 ? (void *)1 : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       ++*pos;
-       return NULL;
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-       .start  = c_start,
-       .next   = c_next,
-       .stop   = c_stop,
-       .show   = c_show
-};
index dbdaacddd9a562bc709193284dbe7fdaf41aed24..2bbdc0e4fd140581706d17b54b8a9089b77e7630 100644 (file)
@@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void)
         */
        atomic_inc(&mm->mm_count);
        current->active_mm = mm;
-       cpumask_set_cpu(cpu, mm_cpumask(mm));
 
        set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-       printk("CPU%u: Booted secondary processor\n", cpu);
 
        /*
         * TTBR0 is only used for the identity mapping at this stage. Make it
         * point to zero page to avoid speculatively fetching new entries.
         */
        cpu_set_reserved_ttbr0();
-       flush_tlb_all();
+       local_flush_tlb_all();
        cpu_set_default_tcr_t0sz();
 
        preempt_disable();
        trace_hardirqs_off();
 
+       /*
+        * If the system has established the capabilities, make sure
+        * this CPU ticks all of those. If it doesn't, the CPU will
+        * fail to come online.
+        */
+       verify_local_cpu_capabilities();
+
        if (cpu_ops[cpu]->cpu_postboot)
                cpu_ops[cpu]->cpu_postboot();
 
@@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void)
         * the CPU migration code to notice that the CPU is online
         * before we continue.
         */
+       pr_info("CPU%u: Booted secondary processor [%08x]\n",
+                                        cpu, read_cpuid_id());
        set_cpu_online(cpu, true);
        complete(&cpu_running);
 
@@ -232,12 +239,7 @@ int __cpu_disable(void)
        /*
         * OK - migrate IRQs away from this CPU
         */
-       migrate_irqs();
-
-       /*
-        * Remove this CPU from the vm mask set of all processes.
-        */
-       clear_tasks_mm_cpumask(cpu);
+       irq_migrate_all_off_this_cpu();
 
        return 0;
 }
@@ -325,12 +327,14 @@ static void __init hyp_mode_check(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
        pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+       setup_cpu_features();
        hyp_mode_check();
        apply_alternatives_all();
 }
 
 void __init smp_prepare_boot_cpu(void)
 {
+       cpuinfo_store_boot_cpu();
        set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
index 44ca4143b013227ebf9eb40eafd721454ab45074..40f7b33a22dafce27c3491d181170760916aec04 100644 (file)
@@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
                 * restoration before returning.
                 */
                cpu_set_reserved_ttbr0();
-               flush_tlb_all();
+               local_flush_tlb_all();
                cpu_set_default_tcr_t0sz();
 
                if (mm != &init_mm)
index f93aae5e43075ccd4e6db45c2198965e566330a5..e9b9b53643936a121e8c73db99373d7e7cab9b48 100644 (file)
@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
        set_fs(fs);
 }
 
-static void dump_backtrace_entry(unsigned long where, unsigned long stack)
+static void dump_backtrace_entry(unsigned long where)
 {
+       /*
+        * Note that 'where' can have a physical address, but it's not handled.
+        */
        print_ip_sym(where);
-       if (in_exception_text(where))
-               dump_mem("", "Exception stack", stack,
-                        stack + sizeof(struct pt_regs), false);
 }
 
 static void dump_instr(const char *lvl, struct pt_regs *regs)
@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
        pr_emerg("Call trace:\n");
        while (1) {
                unsigned long where = frame.pc;
+               unsigned long stack;
                int ret;
 
+               dump_backtrace_entry(where);
                ret = unwind_frame(&frame);
                if (ret < 0)
                        break;
-               dump_backtrace_entry(where, frame.sp);
+               stack = frame.sp;
+               if (in_exception_text(where))
+                       dump_mem("", "Exception stack", stack,
+                                stack + sizeof(struct pt_regs), false);
        }
 }
 
index 98073332e2d05b62c12be5c9f4172ddcc3363736..1ee2c3937d4e8badf3ccec9a816ad23f9814506f 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
@@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
 #define PECOFF_EDATA_PADDING
 #endif
 
-#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
 #define ALIGN_DEBUG_RO                 . = ALIGN(1<<SECTION_SHIFT);
 #define ALIGN_DEBUG_RO_MIN(min)                ALIGN_DEBUG_RO
+#elif defined(CONFIG_DEBUG_RODATA)
+#define ALIGN_DEBUG_RO                 . = ALIGN(1<<PAGE_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min)                ALIGN_DEBUG_RO
 #else
 #define ALIGN_DEBUG_RO
 #define ALIGN_DEBUG_RO_MIN(min)                . = ALIGN(min);
index ff5292c6277c4764734a1a1769af06347ccb8c4b..c9d1f34daab152028d8bcd795504ac4ed2919cc8 100644 (file)
@@ -22,6 +22,7 @@ config KVM_ARM_VGIC_V3
 config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
        depends on OF
+       depends on !ARM64_16K_PAGES
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select ANON_INODES
@@ -37,6 +38,8 @@ config KVM
        select KVM_ARM_VGIC_V3
        ---help---
          Support hosting virtualized guest machines.
+         We don't support KVM with 16K page tables yet, due to the multiple
+         levels of fake page tables.
 
          If unsure, say N.
 
index 91cf5350b3283232cd6d88aca9af669ce972c697..f34745cb3d236fe0a4731f8d02031f8ff764d69c 100644 (file)
@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void)
 {
        u64 pfr0;
 
-       pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+       pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
        return !!(pfr0 & 0x20);
 }
 
index d03d3af17e7eef784d528479e2f4fed305ee5f75..87a64e8db04c4dac07a5d289ad0c2dd22860f0e2 100644 (file)
@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
        if (p->is_write) {
                return ignore_write(vcpu, p);
        } else {
-               u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-               u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-               u32 el3 = !!((pfr >> 12) & 0xf);
+               u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
+               u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
+               u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
 
-               *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
-                                         (((dfr >> 12) & 0xf) << 24) |
-                                         (((dfr >> 28) & 0xf) << 20) |
+               *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+                                         (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+                                         (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
                                          (6 << 16) | (el3 << 14) | (el3 << 12));
                return true;
        }
index 1be9ef27be9704b2ae99bade58d36356a894e78b..4699cd74f87e4af7bf69da8ce48a88a7f4f69b74 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
  * Returns:
  *     x0 - bytes not copied
  */
+
+       .macro ldrb1 ptr, regB, val
+       USER(9998f, ldrb  \ptr, [\regB], \val)
+       .endm
+
+       .macro strb1 ptr, regB, val
+       strb \ptr, [\regB], \val
+       .endm
+
+       .macro ldrh1 ptr, regB, val
+       USER(9998f, ldrh  \ptr, [\regB], \val)
+       .endm
+
+       .macro strh1 ptr, regB, val
+       strh \ptr, [\regB], \val
+       .endm
+
+       .macro ldr1 ptr, regB, val
+       USER(9998f, ldr \ptr, [\regB], \val)
+       .endm
+
+       .macro str1 ptr, regB, val
+       str \ptr, [\regB], \val
+       .endm
+
+       .macro ldp1 ptr, regB, regC, val
+       USER(9998f, ldp \ptr, \regB, [\regC], \val)
+       .endm
+
+       .macro stp1 ptr, regB, regC, val
+       stp \ptr, \regB, [\regC], \val
+       .endm
+
+end    .req    x5
 ENTRY(__copy_from_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
-       add     x5, x1, x2                      // upper user buffer boundary
-       subs    x2, x2, #16
-       b.mi    1f
-0:
-USER(9f, ldp   x3, x4, [x1], #16)
-       subs    x2, x2, #16
-       stp     x3, x4, [x0], #16
-       b.pl    0b
-1:     adds    x2, x2, #8
-       b.mi    2f
-USER(9f, ldr   x3, [x1], #8    )
-       sub     x2, x2, #8
-       str     x3, [x0], #8
-2:     adds    x2, x2, #4
-       b.mi    3f
-USER(9f, ldr   w3, [x1], #4    )
-       sub     x2, x2, #4
-       str     w3, [x0], #4
-3:     adds    x2, x2, #2
-       b.mi    4f
-USER(9f, ldrh  w3, [x1], #2    )
-       sub     x2, x2, #2
-       strh    w3, [x0], #2
-4:     adds    x2, x2, #1
-       b.mi    5f
-USER(9f, ldrb  w3, [x1]        )
-       strb    w3, [x0]
-5:     mov     x0, #0
+       add     end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
+       mov     x0, #0                          // Nothing to copy
        ret
 ENDPROC(__copy_from_user)
 
        .section .fixup,"ax"
        .align  2
-9:     sub     x2, x5, x1
-       mov     x3, x2
-10:    strb    wzr, [x0], #1                   // zero remaining buffer space
-       subs    x3, x3, #1
-       b.ne    10b
-       mov     x0, x2                          // bytes not copied
+9998:
+       sub     x0, end, dst
+9999:
+       strb    wzr, [dst], #1                  // zero remaining buffer space
+       cmp     dst, end
+       b.lo    9999b
        ret
        .previous
index 1b94661e22b3f4dc3cf131f1afb6487333daee0b..81c8fc93c100b7be7da17ebf96b1edeeb806671f 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
  * Returns:
  *     x0 - bytes not copied
  */
+       .macro ldrb1 ptr, regB, val
+       USER(9998f, ldrb  \ptr, [\regB], \val)
+       .endm
+
+       .macro strb1 ptr, regB, val
+       USER(9998f, strb \ptr, [\regB], \val)
+       .endm
+
+       .macro ldrh1 ptr, regB, val
+       USER(9998f, ldrh  \ptr, [\regB], \val)
+       .endm
+
+       .macro strh1 ptr, regB, val
+       USER(9998f, strh \ptr, [\regB], \val)
+       .endm
+
+       .macro ldr1 ptr, regB, val
+       USER(9998f, ldr \ptr, [\regB], \val)
+       .endm
+
+       .macro str1 ptr, regB, val
+       USER(9998f, str \ptr, [\regB], \val)
+       .endm
+
+       .macro ldp1 ptr, regB, regC, val
+       USER(9998f, ldp \ptr, \regB, [\regC], \val)
+       .endm
+
+       .macro stp1 ptr, regB, regC, val
+       USER(9998f, stp \ptr, \regB, [\regC], \val)
+       .endm
+
+end    .req    x5
 ENTRY(__copy_in_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
-       add     x5, x0, x2                      // upper user buffer boundary
-       subs    x2, x2, #16
-       b.mi    1f
-0:
-USER(9f, ldp   x3, x4, [x1], #16)
-       subs    x2, x2, #16
-USER(9f, stp   x3, x4, [x0], #16)
-       b.pl    0b
-1:     adds    x2, x2, #8
-       b.mi    2f
-USER(9f, ldr   x3, [x1], #8    )
-       sub     x2, x2, #8
-USER(9f, str   x3, [x0], #8    )
-2:     adds    x2, x2, #4
-       b.mi    3f
-USER(9f, ldr   w3, [x1], #4    )
-       sub     x2, x2, #4
-USER(9f, str   w3, [x0], #4    )
-3:     adds    x2, x2, #2
-       b.mi    4f
-USER(9f, ldrh  w3, [x1], #2    )
-       sub     x2, x2, #2
-USER(9f, strh  w3, [x0], #2    )
-4:     adds    x2, x2, #1
-       b.mi    5f
-USER(9f, ldrb  w3, [x1]        )
-USER(9f, strb  w3, [x0]        )
-5:     mov     x0, #0
+       add     end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
+       mov     x0, #0
        ret
 ENDPROC(__copy_in_user)
 
        .section .fixup,"ax"
        .align  2
-9:     sub     x0, x5, x0                      // bytes not copied
+9998:  sub     x0, end, dst                    // bytes not copied
        ret
        .previous
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
new file mode 100644 (file)
index 0000000..410fbdb
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*
+ * Copy a buffer from src to dest (alignment handled by the hardware)
+ *
+ * Parameters:
+ *     x0 - dest
+ *     x1 - src
+ *     x2 - n
+ * Returns:
+ *     x0 - dest
+ */
+dstin  .req    x0
+src    .req    x1
+count  .req    x2
+tmp1   .req    x3
+tmp1w  .req    w3
+tmp2   .req    x4
+tmp2w  .req    w4
+dst    .req    x6
+
+A_l    .req    x7
+A_h    .req    x8
+B_l    .req    x9
+B_h    .req    x10
+C_l    .req    x11
+C_h    .req    x12
+D_l    .req    x13
+D_h    .req    x14
+
+       mov     dst, dstin
+       cmp     count, #16
+       /*When memory length is less than 16, the accessed are not aligned.*/
+       b.lo    .Ltiny15
+
+       neg     tmp2, src
+       ands    tmp2, tmp2, #15/* Bytes to reach alignment. */
+       b.eq    .LSrcAligned
+       sub     count, count, tmp2
+       /*
+       * Copy the leading memory data from src to dst in an increasing
+       * address order.By this way,the risk of overwritting the source
+       * memory data is eliminated when the distance between src and
+       * dst is less than 16. The memory accesses here are alignment.
+       */
+       tbz     tmp2, #0, 1f
+       ldrb1   tmp1w, src, #1
+       strb1   tmp1w, dst, #1
+1:
+       tbz     tmp2, #1, 2f
+       ldrh1   tmp1w, src, #2
+       strh1   tmp1w, dst, #2
+2:
+       tbz     tmp2, #2, 3f
+       ldr1    tmp1w, src, #4
+       str1    tmp1w, dst, #4
+3:
+       tbz     tmp2, #3, .LSrcAligned
+       ldr1    tmp1, src, #8
+       str1    tmp1, dst, #8
+
+.LSrcAligned:
+       cmp     count, #64
+       b.ge    .Lcpy_over64
+       /*
+       * Deal with small copies quickly by dropping straight into the
+       * exit block.
+       */
+.Ltail63:
+       /*
+       * Copy up to 48 bytes of data. At this point we only need the
+       * bottom 6 bits of count to be accurate.
+       */
+       ands    tmp1, count, #0x30
+       b.eq    .Ltiny15
+       cmp     tmp1w, #0x20
+       b.eq    1f
+       b.lt    2f
+       ldp1    A_l, A_h, src, #16
+       stp1    A_l, A_h, dst, #16
+1:
+       ldp1    A_l, A_h, src, #16
+       stp1    A_l, A_h, dst, #16
+2:
+       ldp1    A_l, A_h, src, #16
+       stp1    A_l, A_h, dst, #16
+.Ltiny15:
+       /*
+       * Prefer to break one ldp/stp into several load/store to access
+       * memory in an increasing address order,rather than to load/store 16
+       * bytes from (src-16) to (dst-16) and to backward the src to aligned
+       * address,which way is used in original cortex memcpy. If keeping
+       * the original memcpy process here, memmove need to satisfy the
+       * precondition that src address is at least 16 bytes bigger than dst
+       * address,otherwise some source data will be overwritten when memove
+       * call memcpy directly. To make memmove simpler and decouple the
+       * memcpy's dependency on memmove, withdrew the original process.
+       */
+       tbz     count, #3, 1f
+       ldr1    tmp1, src, #8
+       str1    tmp1, dst, #8
+1:
+       tbz     count, #2, 2f
+       ldr1    tmp1w, src, #4
+       str1    tmp1w, dst, #4
+2:
+       tbz     count, #1, 3f
+       ldrh1   tmp1w, src, #2
+       strh1   tmp1w, dst, #2
+3:
+       tbz     count, #0, .Lexitfunc
+       ldrb1   tmp1w, src, #1
+       strb1   tmp1w, dst, #1
+
+       b       .Lexitfunc
+
+.Lcpy_over64:
+       subs    count, count, #128
+       b.ge    .Lcpy_body_large
+       /*
+       * Less than 128 bytes to copy, so handle 64 here and then jump
+       * to the tail.
+       */
+       ldp1    A_l, A_h, src, #16
+       stp1    A_l, A_h, dst, #16
+       ldp1    B_l, B_h, src, #16
+       ldp1    C_l, C_h, src, #16
+       stp1    B_l, B_h, dst, #16
+       stp1    C_l, C_h, dst, #16
+       ldp1    D_l, D_h, src, #16
+       stp1    D_l, D_h, dst, #16
+
+       tst     count, #0x3f
+       b.ne    .Ltail63
+       b       .Lexitfunc
+
+       /*
+       * Critical loop.  Start at a new cache line boundary.  Assuming
+       * 64 bytes per line this ensures the entire loop is in one line.
+       */
+       .p2align        L1_CACHE_SHIFT
+.Lcpy_body_large:
+       /* pre-get 64 bytes data. */
+       ldp1    A_l, A_h, src, #16
+       ldp1    B_l, B_h, src, #16
+       ldp1    C_l, C_h, src, #16
+       ldp1    D_l, D_h, src, #16
+1:
+       /*
+       * interlace the load of next 64 bytes data block with store of the last
+       * loaded 64 bytes data.
+       */
+       stp1    A_l, A_h, dst, #16
+       ldp1    A_l, A_h, src, #16
+       stp1    B_l, B_h, dst, #16
+       ldp1    B_l, B_h, src, #16
+       stp1    C_l, C_h, dst, #16
+       ldp1    C_l, C_h, src, #16
+       stp1    D_l, D_h, dst, #16
+       ldp1    D_l, D_h, src, #16
+       subs    count, count, #64
+       b.ge    1b
+       stp1    A_l, A_h, dst, #16
+       stp1    B_l, B_h, dst, #16
+       stp1    C_l, C_h, dst, #16
+       stp1    D_l, D_h, dst, #16
+
+       tst     count, #0x3f
+       b.ne    .Ltail63
+.Lexitfunc:
index a257b47e2dc4934f0d37b3586e6a2ef5b29f3453..7512bbbc07ac39dbe8c963745281f25c2d60efa4 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
  * Returns:
  *     x0 - bytes not copied
  */
+       .macro ldrb1 ptr, regB, val
+       ldrb  \ptr, [\regB], \val
+       .endm
+
+       .macro strb1 ptr, regB, val
+       USER(9998f, strb \ptr, [\regB], \val)
+       .endm
+
+       .macro ldrh1 ptr, regB, val
+       ldrh  \ptr, [\regB], \val
+       .endm
+
+       .macro strh1 ptr, regB, val
+       USER(9998f, strh \ptr, [\regB], \val)
+       .endm
+
+       .macro ldr1 ptr, regB, val
+       ldr \ptr, [\regB], \val
+       .endm
+
+       .macro str1 ptr, regB, val
+       USER(9998f, str \ptr, [\regB], \val)
+       .endm
+
+       .macro ldp1 ptr, regB, regC, val
+       ldp \ptr, \regB, [\regC], \val
+       .endm
+
+       .macro stp1 ptr, regB, regC, val
+       USER(9998f, stp \ptr, \regB, [\regC], \val)
+       .endm
+
+end    .req    x5
 ENTRY(__copy_to_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
-       add     x5, x0, x2                      // upper user buffer boundary
-       subs    x2, x2, #16
-       b.mi    1f
-0:
-       ldp     x3, x4, [x1], #16
-       subs    x2, x2, #16
-USER(9f, stp   x3, x4, [x0], #16)
-       b.pl    0b
-1:     adds    x2, x2, #8
-       b.mi    2f
-       ldr     x3, [x1], #8
-       sub     x2, x2, #8
-USER(9f, str   x3, [x0], #8    )
-2:     adds    x2, x2, #4
-       b.mi    3f
-       ldr     w3, [x1], #4
-       sub     x2, x2, #4
-USER(9f, str   w3, [x0], #4    )
-3:     adds    x2, x2, #2
-       b.mi    4f
-       ldrh    w3, [x1], #2
-       sub     x2, x2, #2
-USER(9f, strh  w3, [x0], #2    )
-4:     adds    x2, x2, #1
-       b.mi    5f
-       ldrb    w3, [x1]
-USER(9f, strb  w3, [x0]        )
-5:     mov     x0, #0
+       add     end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
            CONFIG_ARM64_PAN)
+       mov     x0, #0
        ret
 ENDPROC(__copy_to_user)
 
        .section .fixup,"ax"
        .align  2
-9:     sub     x0, x5, x0                      // bytes not copied
+9998:  sub     x0, end, dst                    // bytes not copied
        ret
        .previous
index 8636b7549163a20078734f26ad984e54a804d1c3..4444c1d25f4bb7217f540715e8cde1b27d28913d 100644 (file)
@@ -41,4 +41,4 @@ ENTRY(memchr)
        ret
 2:     mov     x0, #0
        ret
-ENDPROC(memchr)
+ENDPIPROC(memchr)
index 6ea0776ba6de1014c049c740fc041e30f33a5b2c..ffbdec00327d0463ca0bd608b7612e471a08347d 100644 (file)
@@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 )
 .Lret0:
        mov     result, #0
        ret
-ENDPROC(memcmp)
+ENDPIPROC(memcmp)
index 8a9a96d3ddae04331828c9744b4d94368ef70620..67613937711f10d209b3be73ce6a322f674581b5 100644 (file)
  * Returns:
  *     x0 - dest
  */
-dstin  .req    x0
-src    .req    x1
-count  .req    x2
-tmp1   .req    x3
-tmp1w  .req    w3
-tmp2   .req    x4
-tmp2w  .req    w4
-tmp3   .req    x5
-tmp3w  .req    w5
-dst    .req    x6
+       .macro ldrb1 ptr, regB, val
+       ldrb  \ptr, [\regB], \val
+       .endm
 
-A_l    .req    x7
-A_h    .req    x8
-B_l    .req    x9
-B_h    .req    x10
-C_l    .req    x11
-C_h    .req    x12
-D_l    .req    x13
-D_h    .req    x14
+       .macro strb1 ptr, regB, val
+       strb \ptr, [\regB], \val
+       .endm
 
-ENTRY(memcpy)
-       mov     dst, dstin
-       cmp     count, #16
-       /*When memory length is less than 16, the accessed are not aligned.*/
-       b.lo    .Ltiny15
+       .macro ldrh1 ptr, regB, val
+       ldrh  \ptr, [\regB], \val
+       .endm
 
-       neg     tmp2, src
-       ands    tmp2, tmp2, #15/* Bytes to reach alignment. */
-       b.eq    .LSrcAligned
-       sub     count, count, tmp2
-       /*
-       * Copy the leading memory data from src to dst in an increasing
-       * address order.By this way,the risk of overwritting the source
-       * memory data is eliminated when the distance between src and
-       * dst is less than 16. The memory accesses here are alignment.
-       */
-       tbz     tmp2, #0, 1f
-       ldrb    tmp1w, [src], #1
-       strb    tmp1w, [dst], #1
-1:
-       tbz     tmp2, #1, 2f
-       ldrh    tmp1w, [src], #2
-       strh    tmp1w, [dst], #2
-2:
-       tbz     tmp2, #2, 3f
-       ldr     tmp1w, [src], #4
-       str     tmp1w, [dst], #4
-3:
-       tbz     tmp2, #3, .LSrcAligned
-       ldr     tmp1, [src],#8
-       str     tmp1, [dst],#8
+       .macro strh1 ptr, regB, val
+       strh \ptr, [\regB], \val
+       .endm
 
-.LSrcAligned:
-       cmp     count, #64
-       b.ge    .Lcpy_over64
-       /*
-       * Deal with small copies quickly by dropping straight into the
-       * exit block.
-       */
-.Ltail63:
-       /*
-       * Copy up to 48 bytes of data. At this point we only need the
-       * bottom 6 bits of count to be accurate.
-       */
-       ands    tmp1, count, #0x30
-       b.eq    .Ltiny15
-       cmp     tmp1w, #0x20
-       b.eq    1f
-       b.lt    2f
-       ldp     A_l, A_h, [src], #16
-       stp     A_l, A_h, [dst], #16
-1:
-       ldp     A_l, A_h, [src], #16
-       stp     A_l, A_h, [dst], #16
-2:
-       ldp     A_l, A_h, [src], #16
-       stp     A_l, A_h, [dst], #16
-.Ltiny15:
-       /*
-       * Prefer to break one ldp/stp into several load/store to access
-       * memory in an increasing address order,rather than to load/store 16
-       * bytes from (src-16) to (dst-16) and to backward the src to aligned
-       * address,which way is used in original cortex memcpy. If keeping
-       * the original memcpy process here, memmove need to satisfy the
-       * precondition that src address is at least 16 bytes bigger than dst
-       * address,otherwise some source data will be overwritten when memove
-       * call memcpy directly. To make memmove simpler and decouple the
-       * memcpy's dependency on memmove, withdrew the original process.
-       */
-       tbz     count, #3, 1f
-       ldr     tmp1, [src], #8
-       str     tmp1, [dst], #8
-1:
-       tbz     count, #2, 2f
-       ldr     tmp1w, [src], #4
-       str     tmp1w, [dst], #4
-2:
-       tbz     count, #1, 3f
-       ldrh    tmp1w, [src], #2
-       strh    tmp1w, [dst], #2
-3:
-       tbz     count, #0, .Lexitfunc
-       ldrb    tmp1w, [src]
-       strb    tmp1w, [dst]
+       .macro ldr1 ptr, regB, val
+       ldr \ptr, [\regB], \val
+       .endm
 
-.Lexitfunc:
-       ret
+       .macro str1 ptr, regB, val
+       str \ptr, [\regB], \val
+       .endm
 
-.Lcpy_over64:
-       subs    count, count, #128
-       b.ge    .Lcpy_body_large
-       /*
-       * Less than 128 bytes to copy, so handle 64 here and then jump
-       * to the tail.
-       */
-       ldp     A_l, A_h, [src],#16
-       stp     A_l, A_h, [dst],#16
-       ldp     B_l, B_h, [src],#16
-       ldp     C_l, C_h, [src],#16
-       stp     B_l, B_h, [dst],#16
-       stp     C_l, C_h, [dst],#16
-       ldp     D_l, D_h, [src],#16
-       stp     D_l, D_h, [dst],#16
+       .macro ldp1 ptr, regB, regC, val
+       ldp \ptr, \regB, [\regC], \val
+       .endm
 
-       tst     count, #0x3f
-       b.ne    .Ltail63
-       ret
+       .macro stp1 ptr, regB, regC, val
+       stp \ptr, \regB, [\regC], \val
+       .endm
 
-       /*
-       * Critical loop.  Start at a new cache line boundary.  Assuming
-       * 64 bytes per line this ensures the entire loop is in one line.
-       */
-       .p2align        L1_CACHE_SHIFT
-.Lcpy_body_large:
-       /* pre-get 64 bytes data. */
-       ldp     A_l, A_h, [src],#16
-       ldp     B_l, B_h, [src],#16
-       ldp     C_l, C_h, [src],#16
-       ldp     D_l, D_h, [src],#16
-1:
-       /*
-       * interlace the load of next 64 bytes data block with store of the last
-       * loaded 64 bytes data.
-       */
-       stp     A_l, A_h, [dst],#16
-       ldp     A_l, A_h, [src],#16
-       stp     B_l, B_h, [dst],#16
-       ldp     B_l, B_h, [src],#16
-       stp     C_l, C_h, [dst],#16
-       ldp     C_l, C_h, [src],#16
-       stp     D_l, D_h, [dst],#16
-       ldp     D_l, D_h, [src],#16
-       subs    count, count, #64
-       b.ge    1b
-       stp     A_l, A_h, [dst],#16
-       stp     B_l, B_h, [dst],#16
-       stp     C_l, C_h, [dst],#16
-       stp     D_l, D_h, [dst],#16
-
-       tst     count, #0x3f
-       b.ne    .Ltail63
+       .weak memcpy
+ENTRY(__memcpy)
+ENTRY(memcpy)
+#include "copy_template.S"
        ret
-ENDPROC(memcpy)
+ENDPIPROC(memcpy)
+ENDPROC(__memcpy)
index 57b19ea2dad467d885f09991b902d0a52bd6f747..a5a4459013b1a59d5a54b2a70fd1ad8e0f256fd9 100644 (file)
@@ -57,12 +57,14 @@ C_h .req    x12
 D_l    .req    x13
 D_h    .req    x14
 
+       .weak memmove
+ENTRY(__memmove)
 ENTRY(memmove)
        cmp     dstin, src
-       b.lo    memcpy
+       b.lo    __memcpy
        add     tmp1, src, count
        cmp     dstin, tmp1
-       b.hs    memcpy          /* No overlap.  */
+       b.hs    __memcpy                /* No overlap.  */
 
        add     dst, dstin, count
        add     src, src, count
@@ -194,4 +196,5 @@ ENTRY(memmove)
        tst     count, #0x3f
        b.ne    .Ltail63
        ret
-ENDPROC(memmove)
+ENDPIPROC(memmove)
+ENDPROC(__memmove)
index 7c72dfd36b6396a921b7d2b7d66e5880f8314d72..f2670a9f218c919ff68a1ffcfdbce468e693ae60 100644 (file)
@@ -54,6 +54,8 @@ dst           .req    x8
 tmp3w          .req    w9
 tmp3           .req    x9
 
+       .weak memset
+ENTRY(__memset)
 ENTRY(memset)
        mov     dst, dstin      /* Preserve return value.  */
        and     A_lw, val, #255
@@ -213,4 +215,5 @@ ENTRY(memset)
        ands    count, count, zva_bits_x
        b.ne    .Ltail_maybe_long
        ret
-ENDPROC(memset)
+ENDPIPROC(memset)
+ENDPROC(__memset)
index 42f828b06c59a4daab35562e03aa70516a5d864a..471fe61760ef661213007542df37c7c8fdcb1a18 100644 (file)
@@ -231,4 +231,4 @@ CPU_BE(     orr     syndrome, diff, has_nul )
        lsr     data1, data1, #56
        sub     result, data1, data2, lsr #56
        ret
-ENDPROC(strcmp)
+ENDPIPROC(strcmp)
index 987b68b9ce4474bb9cfd44e77ef61d3a871d40f8..55ccc8e24c08440399034d41bf8aa04699e09812 100644 (file)
@@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
        csinv   data1, data1, xzr, le
        csel    data2, data2, data2a, le
        b       .Lrealigned
-ENDPROC(strlen)
+ENDPIPROC(strlen)
index 0224cf5a55334a297c1cc079f08644fc09707156..e267044761c6f2c1b4cadcba729e8d0dbe79f766 100644 (file)
@@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul )
 .Lret0:
        mov     result, #0
        ret
-ENDPROC(strncmp)
+ENDPIPROC(strncmp)
index 773d37a14039d9bb456f0896e022c4a3e1415773..57f57fde5722a99075f257012c059aff9f836078 100644 (file)
@@ -4,3 +4,6 @@ obj-y                           := dma-mapping.o extable.o fault.o init.o \
                                   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)     += dump.o
+
+obj-$(CONFIG_KASAN)            += kasan_init.o
+KASAN_SANITIZE_kasan_init.o    := n
index eb48d5df4a0f7252462bd34b6209f27960d95e93..cfa44a6adc0ad5ec29f78228196b7e834b65df40 100644 (file)
@@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area)
        b.lo    1b
        dsb     sy
        ret
-ENDPROC(__flush_dcache_area)
+ENDPIPROC(__flush_dcache_area)
 
 /*
  *     __inval_cache_range(start, end)
@@ -131,7 +131,7 @@ __dma_inv_range:
        b.lo    2b
        dsb     sy
        ret
-ENDPROC(__inval_cache_range)
+ENDPIPROC(__inval_cache_range)
 ENDPROC(__dma_inv_range)
 
 /*
@@ -171,7 +171,7 @@ ENTRY(__dma_flush_range)
        b.lo    1b
        dsb     sy
        ret
-ENDPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_range)
 
 /*
  *     __dma_map_area(start, size, dir)
@@ -184,7 +184,7 @@ ENTRY(__dma_map_area)
        cmp     w2, #DMA_FROM_DEVICE
        b.eq    __dma_inv_range
        b       __dma_clean_range
-ENDPROC(__dma_map_area)
+ENDPIPROC(__dma_map_area)
 
 /*
  *     __dma_unmap_area(start, size, dir)
@@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area)
        cmp     w2, #DMA_TO_DEVICE
        b.ne    __dma_inv_range
        ret
-ENDPROC(__dma_unmap_area)
+ENDPIPROC(__dma_unmap_area)
index d70ff14dbdbdd33ef4ed65b75caa4fa575c6497b..f636a2639f031dd03d0b5058ccf378721d67dbf4 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/init.h>
+#include <linux/bitops.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
 
+#include <asm/cpufeature.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
-#include <asm/cachetype.h>
 
-#define asid_bits(reg) \
-       (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
+static u32 asid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-#define ASID_FIRST_VERSION     (1 << MAX_ASID_BITS)
+static atomic64_t asid_generation;
+static unsigned long *asid_map;
 
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
 
-/*
- * We fork()ed a process, and we need a new context for the child to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-       mm->context.id = 0;
-       raw_spin_lock_init(&mm->context.id_lock);
-}
+#define ASID_MASK              (~GENMASK(asid_bits - 1, 0))
+#define ASID_FIRST_VERSION     (1UL << asid_bits)
+#define NUM_USER_ASIDS         ASID_FIRST_VERSION
 
-static void flush_context(void)
+static void flush_context(unsigned int cpu)
 {
-       /* set the reserved TTBR0 before flushing the TLB */
-       cpu_set_reserved_ttbr0();
-       flush_tlb_all();
-       if (icache_is_aivivt())
-               __flush_icache_all();
-}
+       int i;
+       u64 asid;
 
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
-       unsigned long flags;
+       /* Update the list of reserved ASIDs and the ASID bitmap. */
+       bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 
        /*
-        * Locking needed for multi-threaded applications where the same
-        * mm->context.id could be set from different CPUs during the
-        * broadcast. This function is also called via IPI so the
-        * mm->context.id_lock has to be IRQ-safe.
+        * Ensure the generation bump is observed before we xchg the
+        * active_asids.
         */
-       raw_spin_lock_irqsave(&mm->context.id_lock, flags);
-       if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+       smp_wmb();
+
+       for_each_possible_cpu(i) {
+               asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
                /*
-                * Old version of ASID found. Set the new one and reset
-                * mm_cpumask(mm).
+                * If this CPU has already been through a
+                * rollover, but hasn't run another task in
+                * the meantime, we must preserve its reserved
+                * ASID, as this is the only trace we have of
+                * the process it is still running.
                 */
-               mm->context.id = asid;
-               cpumask_clear(mm_cpumask(mm));
+               if (asid == 0)
+                       asid = per_cpu(reserved_asids, i);
+               __set_bit(asid & ~ASID_MASK, asid_map);
+               per_cpu(reserved_asids, i) = asid;
        }
-       raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
 
-       /*
-        * Set the mm_cpumask(mm) bit for the current CPU.
-        */
-       cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+       /* Queue a TLB invalidate and flush the I-cache if necessary. */
+       cpumask_setall(&tlb_flush_pending);
+
+       if (icache_is_aivivt())
+               __flush_icache_all();
 }
 
-/*
- * Reset the ASID on the current CPU. This function call is broadcast from the
- * CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+static int is_reserved_asid(u64 asid)
+{
+       int cpu;
+       for_each_possible_cpu(cpu)
+               if (per_cpu(reserved_asids, cpu) == asid)
+                       return 1;
+       return 0;
+}
+
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 {
-       unsigned int asid;
-       unsigned int cpu = smp_processor_id();
-       struct mm_struct *mm = current->active_mm;
+       static u32 cur_idx = 1;
+       u64 asid = atomic64_read(&mm->context.id);
+       u64 generation = atomic64_read(&asid_generation);
+
+       if (asid != 0) {
+               /*
+                * If our current ASID was active during a rollover, we
+                * can continue to use it and this was just a false alarm.
+                */
+               if (is_reserved_asid(asid))
+                       return generation | (asid & ~ASID_MASK);
+
+               /*
+                * We had a valid ASID in a previous life, so try to re-use
+                * it if possible.
+                */
+               asid &= ~ASID_MASK;
+               if (!__test_and_set_bit(asid, asid_map))
+                       goto bump_gen;
+       }
 
        /*
-        * current->active_mm could be init_mm for the idle thread immediately
-        * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
-        * the reserved value, so no need to reset any context.
+        * Allocate a free ASID. If we can't find one, take a note of the
+        * currently active ASIDs and mark the TLBs as requiring flushes.
+        * We always count from ASID #1, as we use ASID #0 when setting a
+        * reserved TTBR0 for the init_mm.
         */
-       if (mm == &init_mm)
-               return;
+       asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+       if (asid != NUM_USER_ASIDS)
+               goto set_asid;
 
-       smp_rmb();
-       asid = cpu_last_asid + cpu;
+       /* We're out of ASIDs, so increment the global generation count */
+       generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+                                                &asid_generation);
+       flush_context(cpu);
 
-       flush_context();
-       set_mm_context(mm, asid);
+       /* We have at least 1 ASID per CPU, so this will always succeed */
+       asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
-       /* set the new ASID */
-       cpu_switch_mm(mm->pgd, mm);
+set_asid:
+       __set_bit(asid, asid_map);
+       cur_idx = asid;
+
+bump_gen:
+       asid |= generation;
+       return asid;
 }
 
-void __new_context(struct mm_struct *mm)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 {
-       unsigned int asid;
-       unsigned int bits = asid_bits();
+       unsigned long flags;
+       u64 asid;
+
+       asid = atomic64_read(&mm->context.id);
 
-       raw_spin_lock(&cpu_asid_lock);
        /*
-        * Check the ASID again, in case the change was broadcast from another
-        * CPU before we acquired the lock.
+        * The memory ordering here is subtle. We rely on the control
+        * dependency between the generation read and the update of
+        * active_asids to ensure that we are synchronised with a
+        * parallel rollover (i.e. this pairs with the smp_wmb() in
+        * flush_context).
         */
-       if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
-               cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-               raw_spin_unlock(&cpu_asid_lock);
-               return;
+       if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
+           && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+               goto switch_mm_fastpath;
+
+       raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+       /* Check that our ASID belongs to the current generation. */
+       asid = atomic64_read(&mm->context.id);
+       if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+               asid = new_context(mm, cpu);
+               atomic64_set(&mm->context.id, asid);
        }
-       /*
-        * At this point, it is guaranteed that the current mm (with an old
-        * ASID) isn't active on any other CPU since the ASIDs are changed
-        * simultaneously via IPI.
-        */
-       asid = ++cpu_last_asid;
 
-       /*
-        * If we've used up all our ASIDs, we need to start a new version and
-        * flush the TLB.
-        */
-       if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
-               /* increment the ASID version */
-               cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
-               if (cpu_last_asid == 0)
-                       cpu_last_asid = ASID_FIRST_VERSION;
-               asid = cpu_last_asid + smp_processor_id();
-               flush_context();
-               smp_wmb();
-               smp_call_function(reset_context, NULL, 1);
-               cpu_last_asid += NR_CPUS - 1;
+       if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+               local_flush_tlb_all();
+
+       atomic64_set(&per_cpu(active_asids, cpu), asid);
+       raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+       cpu_switch_mm(mm->pgd, mm);
+}
+
+static int asids_init(void)
+{
+       int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+
+       switch (fld) {
+       default:
+               pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
+               /* Fallthrough */
+       case 0:
+               asid_bits = 8;
+               break;
+       case 2:
+               asid_bits = 16;
        }
 
-       set_mm_context(mm, asid);
-       raw_spin_unlock(&cpu_asid_lock);
+       /* If we end up with more CPUs than ASIDs, expect things to crash */
+       WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+       atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+       asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+                          GFP_KERNEL);
+       if (!asid_map)
+               panic("Failed to allocate bitmap for %lu ASIDs\n",
+                     NUM_USER_ASIDS);
+
+       pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+       return 0;
 }
+early_initcall(asids_init);
index f3d6221cd5bdd4c7bf59fd99c71415d3922e2572..5a22a119a74c87b4b5b54e114701b3c6eed233e6 100644 (file)
@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
        { -1,                   NULL },
 };
 
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
 struct pg_state {
        struct seq_file *seq;
        const struct addr_marker *marker;
@@ -113,6 +119,16 @@ static const struct prot_bits pte_bits[] = {
                .val    = PTE_NG,
                .set    = "NG",
                .clear  = "  ",
+       }, {
+               .mask   = PTE_CONT,
+               .val    = PTE_CONT,
+               .set    = "CON",
+               .clear  = "   ",
+       }, {
+               .mask   = PTE_TABLE_BIT,
+               .val    = PTE_TABLE_BIT,
+               .set    = "   ",
+               .clear  = "BLK",
        }, {
                .mask   = PTE_UXN,
                .val    = PTE_UXN,
@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
                unsigned long delta;
 
                if (st->current_prot) {
-                       seq_printf(st->seq, "0x%16lx-0x%16lx   ",
+                       seq_printf(st->seq, "0x%016lx-0x%016lx   ",
                                   st->start_address, addr);
 
                        delta = (addr - st->start_address) >> 10;
index 9fadf6d7039b721b072379b5af51abce726f5b92..19211c4a891111cee301552a234821646f522ea6 100644 (file)
@@ -556,7 +556,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 }
 
 #ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void)
+void cpu_enable_pan(void *__unused)
 {
        config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 }
index f5c0680d17d9efd701f7c261fd3a8b0210ad9d7a..17bf39ac83ba073109118817c3ab72346ae3824b 100644 (file)
@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
        memset(zone_size, 0, sizeof(zone_size));
 
        /* 4GB maximum for 32-bit only capable devices */
-       if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-               max_dma = PFN_DOWN(arm64_dma_phys_limit);
-               zone_size[ZONE_DMA] = max_dma - min;
-       }
+#ifdef CONFIG_ZONE_DMA
+       max_dma = PFN_DOWN(arm64_dma_phys_limit);
+       zone_size[ZONE_DMA] = max_dma - min;
+#endif
        zone_size[ZONE_NORMAL] = max - max_dma;
 
        memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
                if (start >= max)
                        continue;
 
-               if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+               if (start < max_dma) {
                        unsigned long dma_end = min(end, max_dma);
                        zhole_size[ZONE_DMA] -= dma_end - start;
                }
-
+#endif
                if (end > max_dma) {
                        unsigned long normal_end = min(end, max);
                        unsigned long normal_start = max(start, max_dma);
@@ -298,6 +299,9 @@ void __init mem_init(void)
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
        pr_notice("Virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+                 "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+#endif
                  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
                  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
@@ -310,6 +314,9 @@ void __init mem_init(void)
                  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
                  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
                  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+#ifdef CONFIG_KASAN
+                 MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
+#endif
                  MLG(VMALLOC_START, VMALLOC_END),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
                  MLG((unsigned long)vmemmap,
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
new file mode 100644 (file)
index 0000000..cf038c7
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * This file contains kasan initialization code for ARM64.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/start_kernel.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
+                                       unsigned long end)
+{
+       pte_t *pte;
+       unsigned long next;
+
+       if (pmd_none(*pmd))
+               pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+
+       pte = pte_offset_kernel(pmd, addr);
+       do {
+               next = addr + PAGE_SIZE;
+               set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+                                       PAGE_KERNEL));
+       } while (pte++, addr = next, addr != end && pte_none(*pte));
+}
+
+static void __init kasan_early_pmd_populate(pud_t *pud,
+                                       unsigned long addr,
+                                       unsigned long end)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       if (pud_none(*pud))
+               pud_populate(&init_mm, pud, kasan_zero_pmd);
+
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               kasan_early_pte_populate(pmd, addr, next);
+       } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+}
+
+static void __init kasan_early_pud_populate(pgd_t *pgd,
+                                       unsigned long addr,
+                                       unsigned long end)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       if (pgd_none(*pgd))
+               pgd_populate(&init_mm, pgd, kasan_zero_pud);
+
+       pud = pud_offset(pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               kasan_early_pmd_populate(pud, addr, next);
+       } while (pud++, addr = next, addr != end && pud_none(*pud));
+}
+
+static void __init kasan_map_early_shadow(void)
+{
+       unsigned long addr = KASAN_SHADOW_START;
+       unsigned long end = KASAN_SHADOW_END;
+       unsigned long next;
+       pgd_t *pgd;
+
+       pgd = pgd_offset_k(addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               kasan_early_pud_populate(pgd, addr, next);
+       } while (pgd++, addr = next, addr != end);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+       BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+       kasan_map_early_shadow();
+}
+
+static void __init clear_pgds(unsigned long start,
+                       unsigned long end)
+{
+       /*
+        * Remove references to kasan page tables from
+        * swapper_pg_dir. pgd_clear() can't be used
+        * here because it's nop on 2,3-level pagetable setups
+        */
+       for (; start < end; start += PGDIR_SIZE)
+               set_pgd(pgd_offset_k(start), __pgd(0));
+}
+
+static void __init cpu_set_ttbr1(unsigned long ttbr1)
+{
+       asm(
+       "       msr     ttbr1_el1, %0\n"
+       "       isb"
+       :
+       : "r" (ttbr1));
+}
+
+void __init kasan_init(void)
+{
+       struct memblock_region *reg;
+
+       /*
+        * We are going to perform proper setup of shadow memory.
+        * At first we should unmap early shadow (clear_pgds() call bellow).
+        * However, instrumented code couldn't execute without shadow memory.
+        * tmp_pg_dir used to keep early shadow mapped until full shadow
+        * setup will be finished.
+        */
+       memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+       cpu_set_ttbr1(__pa(tmp_pg_dir));
+       flush_tlb_all();
+
+       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+       kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+                       kasan_mem_to_shadow((void *)MODULES_VADDR));
+
+       for_each_memblock(memory, reg) {
+               void *start = (void *)__phys_to_virt(reg->base);
+               void *end = (void *)__phys_to_virt(reg->base + reg->size);
+
+               if (start >= end)
+                       break;
+
+               /*
+                * end + 1 here is intentional. We check several shadow bytes in
+                * advance to slightly speed up fastpath. In some rare cases
+                * we could cross boundary of mapped shadow, so we just map
+                * some more here.
+                */
+               vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
+                               (unsigned long)kasan_mem_to_shadow(end) + 1,
+                               pfn_to_nid(virt_to_pfn(start)));
+       }
+
+       memset(kasan_zero_page, 0, PAGE_SIZE);
+       cpu_set_ttbr1(__pa(swapper_pg_dir));
+       flush_tlb_all();
+
+       /* At this point kasan is fully initialized. Enable error messages */
+       init_task.kasan_depth = 0;
+       pr_info("KernelAddressSanitizer initialized\n");
+}
index 9211b8527f2580aeb561b8c7cc3cdc75f728571f..c2fa6b56613c23ba111c94310df522454e2ea732 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -80,19 +81,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
        do {
                /*
                 * Need to have the least restrictive permissions available
-                * permissions will be fixed up later
+                * permissions will be fixed up later. Default the new page
+                * range as contiguous ptes.
                 */
-               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
                pfn++;
        } while (pte++, i++, i < PTRS_PER_PTE);
 }
 
+/*
+ * Given a PTE with the CONT bit set, determine where the CONT range
+ * starts, and clear the entire range of PTE CONT bits.
+ */
+static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
+{
+       int i;
+
+       pte -= CONT_RANGE_OFFSET(addr);
+       for (i = 0; i < CONT_PTES; i++) {
+               set_pte(pte, pte_mknoncont(*pte));
+               pte++;
+       }
+       flush_tlb_all();
+}
+
+/*
+ * Given a range of PTEs set the pfn and provided page protection flags
+ */
+static void __populate_init_pte(pte_t *pte, unsigned long addr,
+                               unsigned long end, phys_addr_t phys,
+                               pgprot_t prot)
+{
+       unsigned long pfn = __phys_to_pfn(phys);
+
+       do {
+               /* clear all the bits except the pfn, then apply the prot */
+               set_pte(pte, pfn_pte(pfn, prot));
+               pte++;
+               pfn++;
+               addr += PAGE_SIZE;
+       } while (addr != end);
+}
+
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
-                                 unsigned long end, unsigned long pfn,
+                                 unsigned long end, phys_addr_t phys,
                                  pgprot_t prot,
                                  void *(*alloc)(unsigned long size))
 {
        pte_t *pte;
+       unsigned long next;
 
        if (pmd_none(*pmd) || pmd_sect(*pmd)) {
                pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -105,9 +142,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 
        pte = pte_offset_kernel(pmd, addr);
        do {
-               set_pte(pte, pfn_pte(pfn, prot));
-               pfn++;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+               next = min(end, (addr + CONT_SIZE) & CONT_MASK);
+               if (((addr | next | phys) & ~CONT_MASK) == 0) {
+                       /* a block of CONT_PTES  */
+                       __populate_init_pte(pte, addr, next, phys,
+                                           prot | __pgprot(PTE_CONT));
+               } else {
+                       /*
+                        * If the range being split is already inside of a
+                        * contiguous range but this PTE isn't going to be
+                        * contiguous, then we want to unmark the adjacent
+                        * ranges, then update the portion of the range we
+                        * are interrested in.
+                        */
+                        clear_cont_pte_range(pte, addr);
+                        __populate_init_pte(pte, addr, next, phys, prot);
+               }
+
+               pte += (next - addr) >> PAGE_SHIFT;
+               phys += next - addr;
+               addr = next;
+       } while (addr != end);
 }
 
 void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -168,8 +223,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
                                }
                        }
                } else {
-                       alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-                                      prot, alloc);
+                       alloc_init_pte(pmd, addr, next, phys, prot, alloc);
                }
                phys += next - addr;
        } while (pmd++, addr = next, addr != end);
@@ -353,14 +407,11 @@ static void __init map_mem(void)
         * memory addressable from the initial direct kernel mapping.
         *
         * The initial direct kernel mapping, located at swapper_pg_dir, gives
-        * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
-        * PHYS_OFFSET (which must be aligned to 2MB as per
-        * Documentation/arm64/booting.txt).
+        * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
+        * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
+        * per Documentation/arm64/booting.txt).
         */
-       if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
-               limit = PHYS_OFFSET + PMD_SIZE;
-       else
-               limit = PHYS_OFFSET + PUD_SIZE;
+       limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
        memblock_set_current_limit(limit);
 
        /* map all the memory banks */
@@ -371,21 +422,24 @@ static void __init map_mem(void)
                if (start >= end)
                        break;
 
-#ifndef CONFIG_ARM64_64K_PAGES
-               /*
-                * For the first memory bank align the start address and
-                * current memblock limit to prevent create_mapping() from
-                * allocating pte page tables from unmapped memory.
-                * When 64K pages are enabled, the pte page table for the
-                * first PGDIR_SIZE is already present in swapper_pg_dir.
-                */
-               if (start < limit)
-                       start = ALIGN(start, PMD_SIZE);
-               if (end < limit) {
-                       limit = end & PMD_MASK;
-                       memblock_set_current_limit(limit);
+               if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+                       /*
+                        * For the first memory bank align the start address and
+                        * current memblock limit to prevent create_mapping() from
+                        * allocating pte page tables from unmapped memory. With
+                        * the section maps, if the first block doesn't end on section
+                        * size boundary, create_mapping() will try to allocate a pte
+                        * page, which may be returned from an unmapped area.
+                        * When section maps are not used, the pte page table for the
+                        * current limit is already present in swapper_pg_dir.
+                        */
+                       if (start < limit)
+                               start = ALIGN(start, SECTION_SIZE);
+                       if (end < limit) {
+                               limit = end & SECTION_MASK;
+                               memblock_set_current_limit(limit);
+                       }
                }
-#endif
                __map_memblock(start, end);
        }
 
@@ -456,7 +510,7 @@ void __init paging_init(void)
         * point to zero page to avoid speculatively fetching new entries.
         */
        cpu_set_reserved_ttbr0();
-       flush_tlb_all();
+       local_flush_tlb_all();
        cpu_set_default_tcr_t0sz();
 }
 
@@ -498,12 +552,12 @@ int kern_addr_valid(unsigned long addr)
        return pfn_valid(pte_pfn(*pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-#ifdef CONFIG_ARM64_64K_PAGES
+#if !ARM64_SWAPPER_USES_SECTION_MAPS
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
        return vmemmap_populate_basepages(start, end, node);
 }
-#else  /* !CONFIG_ARM64_64K_PAGES */
+#else  /* !ARM64_SWAPPER_USES_SECTION_MAPS */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
        unsigned long addr = start;
@@ -638,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 {
        const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
        pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
-       int granularity, size, offset;
+       int size, offset;
        void *dt_virt;
 
        /*
@@ -664,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
         */
        BUILD_BUG_ON(dt_virt_base % SZ_2M);
 
-       if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
-               BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT !=
-                            __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT);
-
-               granularity = PAGE_SIZE;
-       } else {
-               BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT !=
-                            __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT);
-
-               granularity = PMD_SIZE;
-       }
+       BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
+                    __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
 
-       offset = dt_phys % granularity;
+       offset = dt_phys % SWAPPER_BLOCK_SIZE;
        dt_virt = (void *)dt_virt_base + offset;
 
        /* map the first chunk so we can read the size from the header */
-       create_mapping(round_down(dt_phys, granularity), dt_virt_base,
-                      granularity, prot);
+       create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+                      SWAPPER_BLOCK_SIZE, prot);
 
        if (fdt_check_header(dt_virt) != 0)
                return NULL;
@@ -690,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
        if (size > MAX_FDT_SIZE)
                return NULL;
 
-       if (offset + size > granularity)
-               create_mapping(round_down(dt_phys, granularity), dt_virt_base,
-                              round_up(offset + size, granularity), prot);
+       if (offset + size > SWAPPER_BLOCK_SIZE)
+               create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+                              round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
 
        memblock_reserve(dt_phys, size);
 
index e47ed1c5dce1bbe50c22094b17cc12e82dde6cdf..3571c7309c5e79f0d2d3e20986a581ffb6454dae 100644 (file)
@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
        int ret;
        struct page_change_data data;
 
-       if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+       if (!PAGE_ALIGNED(addr)) {
                start &= PAGE_MASK;
                end = start + size;
                WARN_ON_ONCE(1);
index 71ca104f97bde3a7b9f11e2aa4e443357e2e29b9..cb3ba1b812e74dcd1acbc167756d60da331d105f 100644 (file)
@@ -28,8 +28,6 @@
 
 #include "mm.h"
 
-#define PGD_SIZE       (PTRS_PER_PGD * sizeof(pgd_t))
-
 static struct kmem_cache *pgd_cache;
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
index 7783ff05f74cc262c643b1a96932a717e47445c5..cacecc4ad3e5bafc07ed0f6de5b2eda8da84a00c 100644 (file)
@@ -30,7 +30,9 @@
 
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS   TCR_TG0_64K | TCR_TG1_64K
-#else
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define TCR_TG_FLAGS   TCR_TG0_16K | TCR_TG1_16K
+#else /* CONFIG_ARM64_4K_PAGES */
 #define TCR_TG_FLAGS   TCR_TG0_4K | TCR_TG1_4K
 #endif
 
@@ -130,7 +132,7 @@ ENDPROC(cpu_do_resume)
  *     - pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
-       mmid    w1, x1                          // get mm->context.id
+       mmid    x1, x1                          // get mm->context.id
        bfi     x0, x1, #48, #16                // set the ASID
        msr     ttbr0_el1, x0                   // set TTBR0
        isb
@@ -146,8 +148,8 @@ ENDPROC(cpu_do_switch_mm)
  *     value of the SCTLR_EL1 register.
  */
 ENTRY(__cpu_setup)
-       tlbi    vmalle1is                       // invalidate I + D TLBs
-       dsb     ish
+       tlbi    vmalle1                         // Invalidate local TLB
+       dsb     nsh
 
        mov     x0, #3 << 20
        msr     cpacr_el1, x0                   // Enable FP/ASIMD
index c24f00569acb3b92c5e0a3a91cc04f257336f96f..ec379a4164cc07474fd5f2c193cbe6437df0e6fd 100644 (file)
@@ -1,6 +1,14 @@
 #
 # Makefile for linux kernel
 #
+
+#
+# ARM64 maps efi runtime services in userspace addresses
+# which don't have KASAN shadow. So dereference of these addresses
+# in efi_call_virt() will cause crash if this code instrumented.
+#
+KASAN_SANITIZE_runtime-wrappers.o      := n
+
 obj-$(CONFIG_EFI)                      += efi.o vars.o reboot.o
 obj-$(CONFIG_EFI_VARS)                 += efivars.o
 obj-$(CONFIG_EFI_ESRT)                 += esrt.o
index 816dbe9f4b82e68314f5c5408b52649e4b9b4383..3c0467d3688cff14df877fea66d61c3fbb3279bd 100644 (file)
@@ -14,6 +14,8 @@ cflags-$(CONFIG_ARM64)                := $(subst -pg,,$(KBUILD_CFLAGS))
 cflags-$(CONFIG_ARM)           := $(subst -pg,,$(KBUILD_CFLAGS)) \
                                   -fno-builtin -fpic -mno-single-pic-base
 
+cflags-$(CONFIG_EFI_ARMSTUB)   += -I$(srctree)/scripts/dtc/libfdt
+
 KBUILD_CFLAGS                  := $(cflags-y) \
                                   $(call cc-option,-ffreestanding) \
                                   $(call cc-option,-fno-stack-protector)
@@ -22,7 +24,18 @@ GCOV_PROFILE                 := n
 KASAN_SANITIZE                 := n
 
 lib-y                          := efi-stub-helper.o
-lib-$(CONFIG_EFI_ARMSTUB)      += arm-stub.o fdt.o
+
+# include the stub's generic dependencies from lib/ when building for ARM/arm64
+arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+       $(call if_changed_rule,cc_o_c)
+
+lib-$(CONFIG_EFI_ARMSTUB)      += arm-stub.o fdt.o string.o \
+                                  $(patsubst %.c,lib-%.o,$(arm-deps))
+
+lib-$(CONFIG_ARM64)            += arm64-stub.o
+CFLAGS_arm64-stub.o            := -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
 # arm64 puts the stub in the kernel proper, which will unnecessarily retain all
@@ -30,10 +43,27 @@ lib-$(CONFIG_EFI_ARMSTUB)   += arm-stub.o fdt.o
 # So let's apply the __init annotations at the section level, by prefixing
 # the section names directly. This will ensure that even all the inline string
 # literals are covered.
+# The fact that the stub and the kernel proper are essentially the same binary
+# also means that we need to be extra careful to make sure that the stub does
+# not rely on any absolute symbol references, considering that the virtual
+# kernel mapping that the linker uses is not active yet when the stub is
+# executing. So build all C dependencies of the EFI stub into libstub, and do
+# a verification pass to see if any absolute relocations exist in any of the
+# object files.
 #
-extra-$(CONFIG_ARM64)          := $(lib-y)
-lib-$(CONFIG_ARM64)            := $(patsubst %.o,%.init.o,$(lib-y))
+extra-$(CONFIG_EFI_ARMSTUB)    := $(lib-y)
+lib-$(CONFIG_EFI_ARMSTUB)      := $(patsubst %.o,%.stub.o,$(lib-y))
+
+STUBCOPY_FLAGS-y               := -R .debug* -R *ksymtab* -R *kcrctab*
+STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \
+                                  --prefix-symbols=__efistub_
+STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
+
+$(obj)/%.stub.o: $(obj)/%.o FORCE
+       $(call if_changed,stubcopy)
 
-OBJCOPYFLAGS := --prefix-alloc-sections=.init
-$(obj)/%.init.o: $(obj)/%.o FORCE
-       $(call if_changed,objcopy)
+quiet_cmd_stubcopy = STUBCPY $@
+      cmd_stubcopy = if $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; then     \
+                    $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y)        \
+                    && (echo >&2 "$@: absolute symbol references not allowed in the EFI stub"; \
+                        rm -f $@; /bin/false); else /bin/false; fi
index ef5d764e2a27ea506775e7c117dc291b9749ded1..b62e2f5dcab3b2d95074b534de803915145dc20c 100644 (file)
@@ -147,15 +147,6 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
        if (status)
                goto fdt_set_fail;
 
-       /*
-        * Add kernel version banner so stub/kernel match can be
-        * verified.
-        */
-       status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver",
-                            linux_banner);
-       if (status)
-               goto fdt_set_fail;
-
        return EFI_SUCCESS;
 
 fdt_set_fail:
diff --git a/drivers/firmware/efi/libstub/string.c b/drivers/firmware/efi/libstub/string.c
new file mode 100644 (file)
index 0000000..09d5a08
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Taken from:
+ *  linux/lib/string.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+#ifndef __HAVE_ARCH_STRSTR
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+       size_t l1, l2;
+
+       l2 = strlen(s2);
+       if (!l2)
+               return (char *)s1;
+       l1 = strlen(s1);
+       while (l1 >= l2) {
+               l1--;
+               if (!memcmp(s1, s2, l2))
+                       return (char *)s1;
+               s1++;
+       }
+       return NULL;
+}
+#endif
+
+#ifndef __HAVE_ARCH_STRNCMP
+/**
+ * strncmp - Compare two length-limited strings
+ * @cs: One string
+ * @ct: Another string
+ * @count: The maximum number of bytes to compare
+ */
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+       unsigned char c1, c2;
+
+       while (count) {
+               c1 = *cs++;
+               c2 = *ct++;
+               if (c1 != c2)
+                       return c1 < c2 ? -1 : 1;
+               if (!c1)
+                       break;
+               count--;
+       }
+       return 0;
+}
+#endif
index d9de36ee165de119904936c7ba5bec20408ad447..04e2653bb8c02cd9aede61bca14c9638da62c432 100644 (file)
@@ -5,7 +5,7 @@
 menu "Performance monitor support"
 
 config ARM_PMU
-       depends on PERF_EVENTS && ARM
+       depends on PERF_EVENTS && (ARM || ARM64)
        bool "ARM PMU framework"
        default y
        help
index 80f4f4e56fed173be5732099efb96d190c29c312..011f8c4c63da056cad5bb7ac575ff740192dd76e 100644 (file)
@@ -36,7 +36,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 
        c = irq_data_get_irq_chip(d);
        if (!c->irq_set_affinity) {
-               pr_warn_ratelimited("IRQ%u: unable to set affinity\n", d->irq);
+               pr_debug("IRQ%u: unable to set affinity\n", d->irq);
        } else {
                int r = irq_do_set_affinity(d, affinity, false);
                if (r)
index 3f874d24234f6b4c2e40ac6d2bf0390326b8a8bf..37323b0df374b1a89fb256a4077165bc05376421 100644 (file)
@@ -5,10 +5,12 @@ else
        call_threshold := 0
 endif
 
+KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET)
+
 CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address
 
 CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \
-               -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \
+               -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET) \
                --param asan-stack=1 --param asan-globals=1 \
                --param asan-instrumentation-with-call-threshold=$(call_threshold))