Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas: - kdump support, including two necessary memblock additions: memblock_clear_nomap() and memblock_cap_memory_range() - ARMv8.3 HWCAP bits for JavaScript conversion instructions, complex numbers and weaker release consistency - arm64 ACPI platform MSI support - arm perf updates: ACPI PMU support, L3 cache PMU in some Qualcomm SoCs, Cortex-A53 L2 cache events and DTLB refills, MAINTAINERS update for DT perf bindings - architected timer errata framework (the arch/arm64 changes only) - support for DMA_ATTR_FORCE_CONTIGUOUS in the arm64 iommu DMA API - arm64 KVM refactoring to use common system register definitions - remove support for ASID-tagged VIVT I-cache (no ARMv8 implementation using it and deprecated in the architecture) together with some I-cache handling clean-up - PE/COFF EFI header clean-up/hardening - define BUG() instruction without CONFIG_BUG * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (92 commits) arm64: Fix the DMA mmap and get_sgtable API with DMA_ATTR_FORCE_CONTIGUOUS arm64: Print DT machine model in setup_machine_fdt() arm64: pmu: Wire-up Cortex A53 L2 cache events and DTLB refills arm64: module: split core and init PLT sections arm64: pmuv3: handle pmuv3+ arm64: Add CNTFRQ_EL0 trap handler arm64: Silence spurious kbuild warning on menuconfig arm64: pmuv3: use arm_pmu ACPI framework arm64: pmuv3: handle !PMUv3 when probing drivers/perf: arm_pmu: add ACPI framework arm64: add function to get a cpu's MADT GICC table drivers/perf: arm_pmu: split out platform device probe logic drivers/perf: arm_pmu: move irq request/free into probe drivers/perf: arm_pmu: split cpu-local irq request/free drivers/perf: arm_pmu: rename irq request/free functions drivers/perf: arm_pmu: handle no platform_device drivers/perf: arm_pmu: simplify cpu_pmu_request_irqs() drivers/perf: arm_pmu: factor out pmu registration drivers/perf: arm_pmu: fold init into alloc drivers/perf: arm_pmu: define armpmu_init_fn ...

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - kdump support, including two necessary memblock additions: memblock_clear_nomap() and memblock_cap_memory_range() - ARMv8.3 HWCAP bits for JavaScript conversion instructions, complex numbers and weaker release consistency - arm64 ACPI platform MSI support - arm perf updates: ACPI PMU support, L3 cache PMU in some Qualcomm SoCs, Cortex-A53 L2 cache events and DTLB refills, MAINTAINERS update for DT perf bindings - architected timer errata framework (the arch/arm64 changes only) - support for DMA_ATTR_FORCE_CONTIGUOUS in the arm64 iommu DMA API - arm64 KVM refactoring to use common system register definitions - remove support for ASID-tagged VIVT I-cache (no ARMv8 implementation using it and deprecated in the architecture) together with some I-cache handling clean-up - PE/COFF EFI header clean-up/hardening - define BUG() instruction without CONFIG_BUG * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (92 commits) arm64: Fix the DMA mmap and get_sgtable API with DMA_ATTR_FORCE_CONTIGUOUS arm64: Print DT machine model in setup_machine_fdt() arm64: pmu: Wire-up Cortex A53 L2 cache events and DTLB refills arm64: module: split core and init PLT sections arm64: pmuv3: handle pmuv3+ arm64: Add CNTFRQ_EL0 trap handler arm64: Silence spurious kbuild warning on menuconfig arm64: pmuv3: use arm_pmu ACPI framework arm64: pmuv3: handle !PMUv3 when probing drivers/perf: arm_pmu: add ACPI framework arm64: add function to get a cpu's MADT GICC table drivers/perf: arm_pmu: split out platform device probe logic drivers/perf: arm_pmu: move irq request/free into probe drivers/perf: arm_pmu: split cpu-local irq request/free drivers/perf: arm_pmu: rename irq request/free functions drivers/perf: arm_pmu: handle no platform_device drivers/perf: arm_pmu: simplify cpu_pmu_request_irqs() drivers/perf: arm_pmu: factor out pmu registration drivers/perf: arm_pmu: fold init into alloc drivers/perf: arm_pmu: define armpmu_init_fn ...
ab182e67 · Linus Torvalds · 7246f600 · 92f66f84 · ab182e67 · ab182e67
Commit ab182e67 authored May 05, 2017 by Linus Torvalds
77 changed files
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -169,6 +169,18 @@ infrastructure:
   as available on the CPU where it is fetched and is not a system
   wide safe value.

+  4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+
+     x--------------------------------------------------x
+     | Name                         |  bits   | visible |
+     |--------------------------------------------------|
+     | LRCPC                        | [23-20] |    y    |
+     |--------------------------------------------------|
+     | FCMA                         | [19-16] |    y    |
+     |--------------------------------------------------|
+     | JSCVT                        | [15-12] |    y    |
+     x--------------------------------------------------x
+
 Appendix I: Example
 ---------------------------


--- a/Documentation/devicetree/bindings/chosen.txt
+++ b/Documentation/devicetree/bindings/chosen.txt
@@ -52,3 +52,48 @@ This property is set (currently only on PowerPC, and only needed on
 book3e) by some versions of kexec-tools to tell the new kernel that it
 is being booted by kexec, as the booting environment may differ (e.g.
 a different secondary CPU release mechanism)
+
+linux,usable-memory-range
+-------------------------
+
+This property (arm64 only) holds a base address and size, describing a
+limited region in which memory may be considered available for use by
+the kernel. Memory outside of this range is not available for use.
+
+This property describes a limitation: memory within this range is only
+valid when also described through another mechanism that the kernel
+would otherwise use to determine available memory (e.g. memory nodes
+or the EFI memory map). Valid memory may be sparse within the range.
+e.g.
+
+/ {
+	chosen {
+		linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>;
+	};
+};
+
+The main usage is for crash dump kernel to identify its own usable
+memory and exclude, at its boot time, any other memory areas that are
+part of the panicked kernel's memory.
+
+While this property does not represent a real hardware, the address
+and the size are expressed in #address-cells and #size-cells,
+respectively, of the root node.
+
+linux,elfcorehdr
+----------------
+
+This property (currently used only on arm64) holds the memory range,
+the address and the size, of the elf core header which mainly describes
+the panicked kernel's memory layout as PT_LOAD segments of elf format.
+e.g.
+
+/ {
+	chosen {
+		linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>;
+	};
+};
+
+While this property does not represent a real hardware, the address
+and the size are expressed in #address-cells and #size-cells,
+respectively, of the root node.
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
 a remote system.

 Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
-s390x and arm architectures.
+s390x, arm and arm64 architectures.

 When the system kernel boots, it reserves a small section of memory for
 the dump-capture kernel. This ensures that ongoing Direct Memory Access
@@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm)

    AUTO_ZRELADDR=y

+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+- Please note that kvm of the dump-capture kernel will not be enabled
+  on non-VHE systems even if it is configured. This is because the CPU
+  will not be reset to EL2 on panic.
+
 Extended crashkernel syntax
 ===========================

@@ -305,6 +312,8 @@ Boot into System Kernel
   kernel will automatically locate the crash kernel image within the
   first 512MB of RAM if X is not given.

+   On arm64, use "crashkernel=Y[@X]".  Note that the start address of
+   the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).

 Load the Dump-capture Kernel
 ============================
@@ -327,6 +336,8 @@ For s390x:
 	- Use image or bzImage
 For arm:
 	- Use zImage
+For arm64:
+	- Use vmlinux or Image

 If you are using a uncompressed vmlinux image then use following command
 to load dump-capture kernel.
@@ -370,6 +381,9 @@ For s390x:
 For arm:
 	"1 maxcpus=1 reset_devices"

+For arm64:
+	"1 maxcpus=1 reset_devices"
+
 Notes on loading the dump-capture kernel:

 * By default, the ELF headers are stored in ELF64 format to support

--- a/Documentation/perf/qcom_l3_pmu.txt
+++ b/Documentation/perf/qcom_l3_pmu.txt
+Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
+===========================================================================
+
+This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
+Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
+by all cores within a socket. Each slice is exposed as a separate uncore perf
+PMU with device name l3cache_<socket>_<instance>. User space is responsible
+for aggregating across slices.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
+the driver also exposes a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per socket which will be used to handle all the PMU
+events on that socket.
+
+The hardware implements 32bit event counters and has a flat 8bit event space
+exposed via the "event" format attribute. In addition to the 32bit physical
+counters the driver supports virtual 64bit hardware counters by using hardware
+counter chaining. This feature is exposed via the "lc" (long counter) format
+flag. E.g.:
+
+  perf stat -e l3cache_0_0/read-miss,lc/
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -983,6 +983,7 @@ F:	arch/arm*/include/asm/perf_event.h
 F:	drivers/perf/*
 F:	include/linux/perf/arm_pmu.h
 F:	Documentation/devicetree/bindings/arm/pmu.txt
+F:	Documentation/devicetree/bindings/perf/

 ARM PORT
 M:	Russell King <linux@armlinux.org.uk>

--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -736,6 +736,17 @@ config KEXEC
 	  but it is independent of the system firmware.   And like a reboot
 	  you can start any kernel with it, not just Linux.

+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/kdump/kdump.txt
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN

--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -92,6 +92,10 @@ config DEBUG_EFI
 	  the kernel that are only useful when using a debug build of the
 	  UEFI firmware

+config ARM64_RELOC_TEST
+	depends on m
+	tristate "Relocation testing module"
+
 source "drivers/hwtracing/coresight/Kconfig"

 endmenu
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,10 +37,12 @@ $(warning LSE atomics not supported by binutils)
  endif
 endif

+ifeq ($(CONFIG_ARM64), y)
 brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)

-ifneq ($(brokengasinst),)
+  ifneq ($(brokengasinst),)
 $(warning Detected assembler with broken .inst; disassembly will be unreliable)
+  endif
 endif

 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)

--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -82,6 +82,7 @@ CONFIG_CMA=y
 CONFIG_SECCOMP=y
 CONFIG_XEN=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
 CONFIG_CPU_IDLE=y

--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -85,6 +85,8 @@ static inline bool acpi_has_cpu_in_madt(void)
 	return true;
 }

+struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);
+
 static inline void arch_fix_phys_package_id(int num, u32 slot) { }
 void __init acpi_init_cpus(void);


--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -20,69 +20,14 @@

 #include <asm/sysreg.h>

-#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
-#define ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
-#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
-#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
-#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
-#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
-#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
-#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
-#define ICC_BPR1_EL1			sys_reg(3, 0, 12, 12, 3)
-
-#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
-
-/*
- * System register definitions
- */
-#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
-#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
-#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
-#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
-#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
-#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
-#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
-
-#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
-#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
-
-#define ICH_LR0_EL2			__LR0_EL2(0)
-#define ICH_LR1_EL2			__LR0_EL2(1)
-#define ICH_LR2_EL2			__LR0_EL2(2)
-#define ICH_LR3_EL2			__LR0_EL2(3)
-#define ICH_LR4_EL2			__LR0_EL2(4)
-#define ICH_LR5_EL2			__LR0_EL2(5)
-#define ICH_LR6_EL2			__LR0_EL2(6)
-#define ICH_LR7_EL2			__LR0_EL2(7)
-#define ICH_LR8_EL2			__LR8_EL2(0)
-#define ICH_LR9_EL2			__LR8_EL2(1)
-#define ICH_LR10_EL2			__LR8_EL2(2)
-#define ICH_LR11_EL2			__LR8_EL2(3)
-#define ICH_LR12_EL2			__LR8_EL2(4)
-#define ICH_LR13_EL2			__LR8_EL2(5)
-#define ICH_LR14_EL2			__LR8_EL2(6)
-#define ICH_LR15_EL2			__LR8_EL2(7)
-
-#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
-#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
-#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
-#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
-#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
-
-#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
-#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
-#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
-#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
-#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
-
 #ifndef __ASSEMBLY__

 #include <linux/stringify.h>
 #include <asm/barrier.h>
 #include <asm/cacheflush.h>

-#define read_gicreg			read_sysreg_s
-#define write_gicreg			write_sysreg_s
+#define read_gicreg(r)			read_sysreg_s(SYS_ ## r)
+#define write_gicreg(v, r)		write_sysreg_s(v, SYS_ ## r)

 /*
 * Low-level accessors
@@ -93,13 +38,13 @@

 static inline void gic_write_eoir(u32 irq)
 {
-	write_sysreg_s(irq, ICC_EOIR1_EL1);
+	write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
 	isb();
 }

 static inline void gic_write_dir(u32 irq)
 {
-	write_sysreg_s(irq, ICC_DIR_EL1);
+	write_sysreg_s(irq, SYS_ICC_DIR_EL1);
 	isb();
 }

@@ -107,7 +52,7 @@ static inline u64 gic_read_iar_common(void)
 {
 	u64 irqstat;

-	irqstat = read_sysreg_s(ICC_IAR1_EL1);
+	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	dsb(sy);
 	return irqstat;
 }
@@ -124,7 +69,7 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
 	u64 irqstat;

 	nops(8);
-	irqstat = read_sysreg_s(ICC_IAR1_EL1);
+	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	nops(4);
 	mb();

@@ -133,40 +78,40 @@ static inline u64 gic_read_iar_cavium_thunderx(void)

 static inline void gic_write_pmr(u32 val)
 {
-	write_sysreg_s(val, ICC_PMR_EL1);
+	write_sysreg_s(val, SYS_ICC_PMR_EL1);
 }

 static inline void gic_write_ctlr(u32 val)
 {
-	write_sysreg_s(val, ICC_CTLR_EL1);
+	write_sysreg_s(val, SYS_ICC_CTLR_EL1);
 	isb();
 }

 static inline void gic_write_grpen1(u32 val)
 {
-	write_sysreg_s(val, ICC_GRPEN1_EL1);
+	write_sysreg_s(val, SYS_ICC_GRPEN1_EL1);
 	isb();
 }

 static inline void gic_write_sgi1r(u64 val)
 {
-	write_sysreg_s(val, ICC_SGI1R_EL1);
+	write_sysreg_s(val, SYS_ICC_SGI1R_EL1);
 }

 static inline u32 gic_read_sre(void)
 {
-	return read_sysreg_s(ICC_SRE_EL1);
+	return read_sysreg_s(SYS_ICC_SRE_EL1);
 }

 static inline void gic_write_sre(u32 val)
 {
-	write_sysreg_s(val, ICC_SRE_EL1);
+	write_sysreg_s(val, SYS_ICC_SRE_EL1);
 	isb();
 }

 static inline void gic_write_bpr1(u32 val)
 {
-	asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val));
+	write_sysreg_s(val, SYS_ICC_BPR1_EL1);
 }

 #define gic_read_typer(c)		readq_relaxed(c)

--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -20,9 +20,6 @@

 #include <asm/brk-imm.h>

-#ifdef CONFIG_GENERIC_BUG
-#define HAVE_ARCH_BUG
-
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
 #define __BUGVERBOSE_LOCATION(file, line)				\
@@ -36,28 +33,35 @@
 #define _BUGVERBOSE_LOCATION(file, line)
 #endif

-#define _BUG_FLAGS(flags) __BUG_FLAGS(flags)
+#ifdef CONFIG_GENERIC_BUG

-#define __BUG_FLAGS(flags) asm volatile (		\
+#define __BUG_ENTRY(flags) 				\
 		".pushsection __bug_table,\"a\"\n\t"	\
 		".align 2\n\t"				\
 	"0:	.long 1f - 0b\n\t"			\
 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
 		".short " #flags "\n\t"			\
 		".popsection\n"				\
-							\
-	"1:	brk %[imm]"				\
-		:: [imm] "i" (BUG_BRK_IMM)		\
-)
+	"1:	"
+#else
+#define __BUG_ENTRY(flags) ""
+#endif
+
+#define __BUG_FLAGS(flags)				\
+	asm volatile (					\
+		__BUG_ENTRY(flags)			\
+		"brk %[imm]" :: [imm] "i" (BUG_BRK_IMM)	\
+	);

-#define BUG() do {				\
-	_BUG_FLAGS(0);				\
-	unreachable();				\
+
+#define BUG() do {					\
+	__BUG_FLAGS(0);					\
+	unreachable();					\
 } while (0)

-#define __WARN_FLAGS(flags) _BUG_FLAGS(BUGFLAG_WARNING|(flags))
+#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags))

-#endif /* ! CONFIG_GENERIC_BUG */
+#define HAVE_ARCH_BUG

 #include <asm-generic/bug.h>


--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -16,7 +16,18 @@
 #ifndef __ASM_CACHE_H
 #define __ASM_CACHE_H

-#include <asm/cachetype.h>
+#include <asm/cputype.h>
+
+#define CTR_L1IP_SHIFT		14
+#define CTR_L1IP_MASK		3
+#define CTR_CWG_SHIFT		24
+#define CTR_CWG_MASK		15
+
+#define CTR_L1IP(ctr)		(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
+
+#define ICACHE_POLICY_VPIPT	0
+#define ICACHE_POLICY_VIPT	2
+#define ICACHE_POLICY_PIPT	3

 #define L1_CACHE_SHIFT		7
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
@@ -32,6 +43,31 @@

 #ifndef __ASSEMBLY__

+#include <linux/bitops.h>
+
+#define ICACHEF_ALIASING	0
+#define ICACHEF_VPIPT		1
+extern unsigned long __icache_flags;
+
+/*
+ * Whilst the D-side always behaves as PIPT on AArch64, aliasing is
+ * permitted in the I-cache.
+ */
+static inline int icache_is_aliasing(void)
+{
+	return test_bit(ICACHEF_ALIASING, &__icache_flags);
+}
+
+static inline int icache_is_vpipt(void)
+{
+	return test_bit(ICACHEF_VPIPT, &__icache_flags);
+}
+
+static inline u32 cache_type_cwg(void)
+{
+	return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
+}
+
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))

 static inline int cache_line_size(void)

--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -154,5 +154,6 @@ int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_valid(unsigned long addr, unsigned long size, int enable);

 #endif
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CACHETYPE_H
-#define __ASM_CACHETYPE_H
-
-#include <asm/cputype.h>
-
-#define CTR_L1IP_SHIFT		14
-#define CTR_L1IP_MASK		3
-#define CTR_CWG_SHIFT		24
-#define CTR_CWG_MASK		15
-
-#define ICACHE_POLICY_RESERVED	0
-#define ICACHE_POLICY_AIVIVT	1
-#define ICACHE_POLICY_VIPT	2
-#define ICACHE_POLICY_PIPT	3
-
-#ifndef __ASSEMBLY__
-
-#include <linux/bitops.h>
-
-#define CTR_L1IP(ctr)	(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
-
-#define ICACHEF_ALIASING	0
-#define ICACHEF_AIVIVT		1
-
-extern unsigned long __icache_flags;
-
-/*
- * NumSets, bits[27:13] - (Number of sets in cache) - 1
- * Associativity, bits[12:3] - (Associativity of cache) - 1
- * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2
- */
-#define CCSIDR_EL1_WRITE_THROUGH	BIT(31)
-#define CCSIDR_EL1_WRITE_BACK		BIT(30)
-#define CCSIDR_EL1_READ_ALLOCATE	BIT(29)
-#define CCSIDR_EL1_WRITE_ALLOCATE	BIT(28)
-#define CCSIDR_EL1_LINESIZE_MASK	0x7
-#define CCSIDR_EL1_LINESIZE(x)		((x) & CCSIDR_EL1_LINESIZE_MASK)
-#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT	3
-#define CCSIDR_EL1_ASSOCIATIVITY_MASK	0x3ff
-#define CCSIDR_EL1_ASSOCIATIVITY(x)	\
-	(((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK)
-#define CCSIDR_EL1_NUMSETS_SHIFT	13
-#define CCSIDR_EL1_NUMSETS_MASK		0x7fff
-#define CCSIDR_EL1_NUMSETS(x) \
-	(((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK)
-
-#define CACHE_LINESIZE(x)	(16 << CCSIDR_EL1_LINESIZE(x))
-#define CACHE_NUMSETS(x)	(CCSIDR_EL1_NUMSETS(x) + 1)
-#define CACHE_ASSOCIATIVITY(x)	(CCSIDR_EL1_ASSOCIATIVITY(x) + 1)
-
-extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr);
-
-/* Helpers for Level 1 Instruction cache csselr = 1L */
-static inline int icache_get_linesize(void)
-{
-	return CACHE_LINESIZE(cache_get_ccsidr(1L));
-}
-
-static inline int icache_get_numsets(void)
-{
-	return CACHE_NUMSETS(cache_get_ccsidr(1L));
-}
-
-/*
- * Whilst the D-side always behaves as PIPT on AArch64, aliasing is
- * permitted in the I-cache.
- */
-static inline int icache_is_aliasing(void)
-{
-	return test_bit(ICACHEF_ALIASING, &__icache_flags);
-}
-
-static inline int icache_is_aivivt(void)
-{
-	return test_bit(ICACHEF_AIVIVT, &__icache_flags);
-}
-
-static inline u32 cache_type_cwg(void)
-{
-	return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
-}
-
-#endif	/* __ASSEMBLY__ */
-
-#endif	/* __ASM_CACHETYPE_H */
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -226,7 +226,7 @@ void update_cpu_errata_workarounds(void);
 void __init enable_errata_workarounds(void);
 void verify_local_cpu_errata_workarounds(void);

-u64 read_system_reg(u32 id);
+u64 read_sanitised_ftr_reg(u32 id);

 static inline bool cpu_supports_mixed_endian_el0(void)
 {
@@ -240,7 +240,7 @@ static inline bool system_supports_32bit_el0(void)

 static inline bool system_supports_mixed_endian_el0(void)
 {
-	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
+	return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1));
 }

 static inline bool system_supports_fpsimd(void)

--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -177,6 +177,10 @@

 #define ESR_ELx_SYS64_ISS_SYS_CNTVCT	(ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \
 					 ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ	(ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
+					 ESR_ELx_SYS64_ISS_DIR_READ)
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>


--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>

-#define NR_IPI	6
+#define NR_IPI	7

 typedef struct {
 	unsigned int __softirq_pending;

--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -149,7 +149,7 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
-	u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1);
+	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
 	return 1 +
 		cpuid_feature_extract_unsigned_field(dfr0,
 						ID_AA64DFR0_BRPS_SHIFT);
@@ -158,7 +158,7 @@ static inline int get_num_brps(void)
 /* Determine number of WRP registers available. */
 static inline int get_num_wrps(void)
 {
-	u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1);
+	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
 	return 1 +
 		cpuid_feature_extract_unsigned_field(dfr0,
 						ID_AA64DFR0_WRPS_SHIFT);

--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -40,9 +40,59 @@
 static inline void crash_setup_regs(struct pt_regs *newregs,
 				    struct pt_regs *oldregs)
 {
-	/* Empty routine needed to avoid build errors. */
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		u64 tmp1, tmp2;
+
+		__asm__ __volatile__ (
+			"stp	 x0,   x1, [%2, #16 *  0]\n"
+			"stp	 x2,   x3, [%2, #16 *  1]\n"
+			"stp	 x4,   x5, [%2, #16 *  2]\n"
+			"stp	 x6,   x7, [%2, #16 *  3]\n"
+			"stp	 x8,   x9, [%2, #16 *  4]\n"
+			"stp	x10,  x11, [%2, #16 *  5]\n"
+			"stp	x12,  x13, [%2, #16 *  6]\n"
+			"stp	x14,  x15, [%2, #16 *  7]\n"
+			"stp	x16,  x17, [%2, #16 *  8]\n"
+			"stp	x18,  x19, [%2, #16 *  9]\n"
+			"stp	x20,  x21, [%2, #16 * 10]\n"
+			"stp	x22,  x23, [%2, #16 * 11]\n"
+			"stp	x24,  x25, [%2, #16 * 12]\n"
+			"stp	x26,  x27, [%2, #16 * 13]\n"
+			"stp	x28,  x29, [%2, #16 * 14]\n"
+			"mov	 %0,  sp\n"
+			"stp	x30,  %0,  [%2, #16 * 15]\n"
+
+			"/* faked current PSTATE */\n"
+			"mrs	 %0, CurrentEL\n"
+			"mrs	 %1, SPSEL\n"
+			"orr	 %0, %0, %1\n"
+			"mrs	 %1, DAIF\n"
+			"orr	 %0, %0, %1\n"
+			"mrs	 %1, NZCV\n"
+			"orr	 %0, %0, %1\n"
+			/* pc */
+			"adr	 %1, 1f\n"
+		"1:\n"
+			"stp	 %1, %0,   [%2, #16 * 16]\n"
+			: "=&r" (tmp1), "=&r" (tmp2)
+			: "r" (newregs)
+			: "memory"
+		);
+	}
 }

+#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
+extern bool crash_is_nosave(unsigned long pfn);
+extern void crash_prepare_suspend(void);
+extern void crash_post_resume(void);
+#else
+static inline bool crash_is_nosave(unsigned long pfn) {return false; }
+static inline void crash_prepare_suspend(void) {}
+static inline void crash_post_resume(void) {}
+#endif
+
 #endif /* __ASSEMBLY__ */

 #endif
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -108,7 +108,7 @@ alternative_else_nop_endif
 #else

 #include <asm/pgalloc.h>
-#include <asm/cachetype.h>
+#include <asm/cache.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -242,12 +242,13 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,

 	kvm_flush_dcache_to_poc(va, size);

-	if (!icache_is_aliasing()) {		/* PIPT */
-		flush_icache_range((unsigned long)va,
-				   (unsigned long)va + size);
-	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
+	if (icache_is_aliasing()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
+	} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
+		/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
+		flush_icache_range((unsigned long)va,
+				   (unsigned long)va + size);
 	}
 }

@@ -307,7 +308,7 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,

 static inline unsigned int kvm_get_vmid_bits(void)
 {
-	int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
+	int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);

 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }

--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -37,5 +37,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       unsigned long virt, phys_addr_t size,
 			       pgprot_t prot, bool page_mappings_only);
 extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
+extern void mark_linear_text_alias_ro(void);

 #endif
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -17,26 +17,26 @@
 #define __ASM_MODULE_H

 #include <asm-generic/module.h>
-#include <asm/memory.h>

 #define MODULE_ARCH_VERMAGIC	"aarch64"

 #ifdef CONFIG_ARM64_MODULE_PLTS
-struct mod_arch_specific {
+struct mod_plt_sec {
 	struct elf64_shdr	*plt;
 	int			plt_num_entries;
 	int			plt_max_entries;
 };
+
+struct mod_arch_specific {
+	struct mod_plt_sec	core;
+	struct mod_plt_sec	init;
+};
 #endif

-u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 			  Elf64_Sym *sym);

 #ifdef CONFIG_RANDOMIZE_BASE
-#ifdef CONFIG_MODVERSIONS
-#define ARCH_RELOCATES_KCRCTAB
-#define reloc_start 		(kimage_vaddr - KIMAGE_VADDR)
-#endif
 extern u64 module_alloc_base;
 #else
 #define module_alloc_base	((u64)_etext - MODULES_VSIZE)

--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -74,6 +74,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))

+#define pte_cont_addr_end(addr, end)						\
+({	unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK;	\
+	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
+})
+
+#define pmd_cont_addr_end(addr, end)						\
+({	unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK;	\
+	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
+})
+
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
 #else

--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -50,6 +50,7 @@ extern phys_addr_t arm64_dma_phys_limit;
 #define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)

 struct debug_info {
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
 	/* Have we suspended stepping by a debugger? */
 	int			suspended_step;
 	/* Allow breakpoints and watchpoints to be disabled for this thread. */
@@ -58,6 +59,7 @@ struct debug_info {
 	/* Hardware breakpoints pinned to this task. */
 	struct perf_event	*hbp_break[ARM_MAX_BRP];
 	struct perf_event	*hbp_watch[ARM_MAX_WRP];
+#endif
 };

 struct cpu_context {

--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -24,6 +24,8 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 extern char __hyp_text_start[], __hyp_text_end[];
 extern char __idmap_text_start[], __idmap_text_end[];
+extern char __initdata_begin[], __initdata_end[];
+extern char __inittext_begin[], __inittext_end[];
 extern char __irqentry_text_start[], __irqentry_text_end[];
 extern char __mmuoff_data_start[], __mmuoff_data_end[];


--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -148,6 +148,9 @@ static inline void cpu_panic_kernel(void)
 */
 bool cpus_are_stuck_in_kernel(void);

+extern void smp_send_crash_stop(void);
+extern bool smp_crash_stop_failed(void);
+
 #endif /* ifndef __ASSEMBLY__ */

 #endif /* ifndef __ASM_SMP_H */
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -48,6 +48,8 @@
 	 ((crn) << CRn_shift) | ((crm) << CRm_shift) | \
 	 ((op2) << Op2_shift))

+#define sys_insn	sys_reg
+
 #define sys_reg_Op0(id)	(((id) >> Op0_shift) & Op0_mask)
 #define sys_reg_Op1(id)	(((id) >> Op1_shift) & Op1_mask)
 #define sys_reg_CRn(id)	(((id) >> CRn_shift) & CRn_mask)
@@ -81,6 +83,41 @@

 #endif	/* CONFIG_BROKEN_GAS_INST */

+#define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
+
+#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
+				      (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
+				      (!!x)<<8 | 0x1f)
+
+#define SYS_DC_ISW			sys_insn(1, 0, 7, 6, 2)
+#define SYS_DC_CSW			sys_insn(1, 0, 7, 10, 2)
+#define SYS_DC_CISW			sys_insn(1, 0, 7, 14, 2)
+
+#define SYS_OSDTRRX_EL1			sys_reg(2, 0, 0, 0, 2)
+#define SYS_MDCCINT_EL1			sys_reg(2, 0, 0, 2, 0)
+#define SYS_MDSCR_EL1			sys_reg(2, 0, 0, 2, 2)
+#define SYS_OSDTRTX_EL1			sys_reg(2, 0, 0, 3, 2)
+#define SYS_OSECCR_EL1			sys_reg(2, 0, 0, 6, 2)
+#define SYS_DBGBVRn_EL1(n)		sys_reg(2, 0, 0, n, 4)
+#define SYS_DBGBCRn_EL1(n)		sys_reg(2, 0, 0, n, 5)
+#define SYS_DBGWVRn_EL1(n)		sys_reg(2, 0, 0, n, 6)
+#define SYS_DBGWCRn_EL1(n)		sys_reg(2, 0, 0, n, 7)
+#define SYS_MDRAR_EL1			sys_reg(2, 0, 1, 0, 0)
+#define SYS_OSLAR_EL1			sys_reg(2, 0, 1, 0, 4)
+#define SYS_OSLSR_EL1			sys_reg(2, 0, 1, 1, 4)
+#define SYS_OSDLR_EL1			sys_reg(2, 0, 1, 3, 4)
+#define SYS_DBGPRCR_EL1			sys_reg(2, 0, 1, 4, 4)
+#define SYS_DBGCLAIMSET_EL1		sys_reg(2, 0, 7, 8, 6)
+#define SYS_DBGCLAIMCLR_EL1		sys_reg(2, 0, 7, 9, 6)
+#define SYS_DBGAUTHSTATUS_EL1		sys_reg(2, 0, 7, 14, 6)
+#define SYS_MDCCSR_EL0			sys_reg(2, 3, 0, 1, 0)
+#define SYS_DBGDTR_EL0			sys_reg(2, 3, 0, 4, 0)
+#define SYS_DBGDTRRX_EL0		sys_reg(2, 3, 0, 5, 0)
+#define SYS_DBGDTRTX_EL0		sys_reg(2, 3, 0, 5, 0)
+#define SYS_DBGVCR32_EL2		sys_reg(2, 4, 0, 7, 0)
+
 #define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
@@ -88,6 +125,7 @@
 #define SYS_ID_PFR0_EL1			sys_reg(3, 0, 0, 1, 0)
 #define SYS_ID_PFR1_EL1			sys_reg(3, 0, 0, 1, 1)
 #define SYS_ID_DFR0_EL1			sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_AFR0_EL1			sys_reg(3, 0, 0, 1, 3)
 #define SYS_ID_MMFR0_EL1		sys_reg(3, 0, 0, 1, 4)
 #define SYS_ID_MMFR1_EL1		sys_reg(3, 0, 0, 1, 5)
 #define SYS_ID_MMFR2_EL1		sys_reg(3, 0, 0, 1, 6)
@@ -118,17 +156,127 @@
 #define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
 #define SYS_ID_AA64MMFR2_EL1		sys_reg(3, 0, 0, 7, 2)

-#define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
+#define SYS_SCTLR_EL1			sys_reg(3, 0, 1, 0, 0)
+#define SYS_ACTLR_EL1			sys_reg(3, 0, 1, 0, 1)
+#define SYS_CPACR_EL1			sys_reg(3, 0, 1, 0, 2)
+
+#define SYS_TTBR0_EL1			sys_reg(3, 0, 2, 0, 0)
+#define SYS_TTBR1_EL1			sys_reg(3, 0, 2, 0, 1)
+#define SYS_TCR_EL1			sys_reg(3, 0, 2, 0, 2)
+
+#define SYS_ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+
+#define SYS_AFSR0_EL1			sys_reg(3, 0, 5, 1, 0)
+#define SYS_AFSR1_EL1			sys_reg(3, 0, 5, 1, 1)
+#define SYS_ESR_EL1			sys_reg(3, 0, 5, 2, 0)
+#define SYS_FAR_EL1			sys_reg(3, 0, 6, 0, 0)
+#define SYS_PAR_EL1			sys_reg(3, 0, 7, 4, 0)
+
+#define SYS_PMINTENSET_EL1		sys_reg(3, 0, 9, 14, 1)
+#define SYS_PMINTENCLR_EL1		sys_reg(3, 0, 9, 14, 2)
+
+#define SYS_MAIR_EL1			sys_reg(3, 0, 10, 2, 0)
+#define SYS_AMAIR_EL1			sys_reg(3, 0, 10, 3, 0)
+
+#define SYS_VBAR_EL1			sys_reg(3, 0, 12, 0, 0)
+
+#define SYS_ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_SGI1R_EL1		sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_IAR1_EL1		sys_reg(3, 0, 12, 12, 0)
+#define SYS_ICC_EOIR1_EL1		sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_BPR1_EL1		sys_reg(3, 0, 12, 12, 3)
+#define SYS_ICC_CTLR_EL1		sys_reg(3, 0, 12, 12, 4)
+#define SYS_ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define SYS_ICC_GRPEN1_EL1		sys_reg(3, 0, 12, 12, 7)
+
+#define SYS_CONTEXTIDR_EL1		sys_reg(3, 0, 13, 0, 1)
+#define SYS_TPIDR_EL1			sys_reg(3, 0, 13, 0, 4)
+
+#define SYS_CNTKCTL_EL1			sys_reg(3, 0, 14, 1, 0)
+
+#define SYS_CLIDR_EL1			sys_reg(3, 1, 0, 0, 1)
+#define SYS_AIDR_EL1			sys_reg(3, 1, 0, 0, 7)
+
+#define SYS_CSSELR_EL1			sys_reg(3, 2, 0, 0, 0)
+
 #define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
 #define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)

-#define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
-#define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
+#define SYS_PMCR_EL0			sys_reg(3, 3, 9, 12, 0)
+#define SYS_PMCNTENSET_EL0		sys_reg(3, 3, 9, 12, 1)
+#define SYS_PMCNTENCLR_EL0		sys_reg(3, 3, 9, 12, 2)
+#define SYS_PMOVSCLR_EL0		sys_reg(3, 3, 9, 12, 3)
+#define SYS_PMSWINC_EL0			sys_reg(3, 3, 9, 12, 4)
+#define SYS_PMSELR_EL0			sys_reg(3, 3, 9, 12, 5)
+#define SYS_PMCEID0_EL0			sys_reg(3, 3, 9, 12, 6)
+#define SYS_PMCEID1_EL0			sys_reg(3, 3, 9, 12, 7)
+#define SYS_PMCCNTR_EL0			sys_reg(3, 3, 9, 13, 0)
+#define SYS_PMXEVTYPER_EL0		sys_reg(3, 3, 9, 13, 1)
+#define SYS_PMXEVCNTR_EL0		sys_reg(3, 3, 9, 13, 2)
+#define SYS_PMUSERENR_EL0		sys_reg(3, 3, 9, 14, 0)
+#define SYS_PMOVSSET_EL0		sys_reg(3, 3, 9, 14, 3)
+
+#define SYS_TPIDR_EL0			sys_reg(3, 3, 13, 0, 2)
+#define SYS_TPIDRRO_EL0			sys_reg(3, 3, 13, 0, 3)

-#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
-				      (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
-				      (!!x)<<8 | 0x1f)
+#define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
+
+#define SYS_CNTP_TVAL_EL0		sys_reg(3, 3, 14, 2, 0)
+#define SYS_CNTP_CTL_EL0		sys_reg(3, 3, 14, 2, 1)
+#define SYS_CNTP_CVAL_EL0		sys_reg(3, 3, 14, 2, 2)
+
+#define __PMEV_op2(n)			((n) & 0x7)
+#define __CNTR_CRm(n)			(0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTRn_EL0(n)		sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
+#define __TYPER_CRm(n)			(0xc | (((n) >> 3) & 0x3))
+#define SYS_PMEVTYPERn_EL0(n)		sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
+
+#define SYS_PMCCFILTR_EL0		sys_reg (3, 3, 14, 15, 7)
+
+#define SYS_DACR32_EL2			sys_reg(3, 4, 3, 0, 0)
+#define SYS_IFSR32_EL2			sys_reg(3, 4, 5, 0, 1)
+#define SYS_FPEXC32_EL2			sys_reg(3, 4, 5, 3, 0)
+
+#define __SYS__AP0Rx_EL2(x)		sys_reg(3, 4, 12, 8, x)
+#define SYS_ICH_AP0R0_EL2		__SYS__AP0Rx_EL2(0)
+#define SYS_ICH_AP0R1_EL2		__SYS__AP0Rx_EL2(1)
+#define SYS_ICH_AP0R2_EL2		__SYS__AP0Rx_EL2(2)
+#define SYS_ICH_AP0R3_EL2		__SYS__AP0Rx_EL2(3)
+
+#define __SYS__AP1Rx_EL2(x)		sys_reg(3, 4, 12, 9, x)
+#define SYS_ICH_AP1R0_EL2		__SYS__AP1Rx_EL2(0)
+#define SYS_ICH_AP1R1_EL2		__SYS__AP1Rx_EL2(1)
+#define SYS_ICH_AP1R2_EL2		__SYS__AP1Rx_EL2(2)
+#define SYS_ICH_AP1R3_EL2		__SYS__AP1Rx_EL2(3)
+
+#define SYS_ICH_VSEIR_EL2		sys_reg(3, 4, 12, 9, 4)
+#define SYS_ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
+#define SYS_ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define SYS_ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define SYS_ICH_MISR_EL2		sys_reg(3, 4, 12, 11, 2)
+#define SYS_ICH_EISR_EL2		sys_reg(3, 4, 12, 11, 3)
+#define SYS_ICH_ELSR_EL2		sys_reg(3, 4, 12, 11, 5)
+#define SYS_ICH_VMCR_EL2		sys_reg(3, 4, 12, 11, 7)
+
+#define __SYS__LR0_EL2(x)		sys_reg(3, 4, 12, 12, x)
+#define SYS_ICH_LR0_EL2			__SYS__LR0_EL2(0)
+#define SYS_ICH_LR1_EL2			__SYS__LR0_EL2(1)
+#define SYS_ICH_LR2_EL2			__SYS__LR0_EL2(2)
+#define SYS_ICH_LR3_EL2			__SYS__LR0_EL2(3)
+#define SYS_ICH_LR4_EL2			__SYS__LR0_EL2(4)
+#define SYS_ICH_LR5_EL2			__SYS__LR0_EL2(5)
+#define SYS_ICH_LR6_EL2			__SYS__LR0_EL2(6)
+#define SYS_ICH_LR7_EL2			__SYS__LR0_EL2(7)
+
+#define __SYS__LR8_EL2(x)		sys_reg(3, 4, 12, 13, x)
+#define SYS_ICH_LR8_EL2			__SYS__LR8_EL2(0)
+#define SYS_ICH_LR9_EL2			__SYS__LR8_EL2(1)
+#define SYS_ICH_LR10_EL2		__SYS__LR8_EL2(2)
+#define SYS_ICH_LR11_EL2		__SYS__LR8_EL2(3)
+#define SYS_ICH_LR12_EL2		__SYS__LR8_EL2(4)
+#define SYS_ICH_LR13_EL2		__SYS__LR8_EL2(5)
+#define SYS_ICH_LR14_EL2		__SYS__LR8_EL2(6)
+#define SYS_ICH_LR15_EL2		__SYS__LR8_EL2(7)

 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_EE    (1 << 25)
@@ -156,6 +304,11 @@
 #define ID_AA64ISAR0_SHA1_SHIFT		8
 #define ID_AA64ISAR0_AES_SHIFT		4

+/* id_aa64isar1 */
+#define ID_AA64ISAR1_LRCPC_SHIFT	20
+#define ID_AA64ISAR1_FCMA_SHIFT		16
+#define ID_AA64ISAR1_JSCVT_SHIFT	12
+
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_GIC_SHIFT		24
 #define ID_AA64PFR0_ASIMD_SHIFT		20

--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -32,5 +32,8 @@
 #define HWCAP_ASIMDHP		(1 << 10)
 #define HWCAP_CPUID		(1 << 11)
 #define HWCAP_ASIMDRDM		(1 << 12)
+#define HWCAP_JSCVT		(1 << 13)
+#define HWCAP_FCMA		(1 << 14)
+#define HWCAP_LRCPC		(1 << 15)

 #endif /* _UAPI__ASM_HWCAP_H */
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -50,6 +50,9 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+arm64-obj-$(CONFIG_ARM64_RELOC_TEST)	+= arm64-reloc-test.o
+arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
+arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o

 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)

--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -105,11 +105,11 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
 	return insn;
 }

-static void __apply_alternatives(void *alt_region)
+static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
-	u32 *origptr, *replptr;
+	u32 *origptr, *replptr, *updptr;

 	for (alt = region->begin; alt < region->end; alt++) {
 		u32 insn;
@@ -124,11 +124,12 @@ static void __apply_alternatives(void *alt_region)

 		origptr = ALT_ORIG_PTR(alt);
 		replptr = ALT_REPL_PTR(alt);
+		updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr;
 		nr_inst = alt->alt_len / sizeof(insn);

 		for (i = 0; i < nr_inst; i++) {
 			insn = get_alt_insn(alt, origptr + i, replptr + i);
-			*(origptr + i) = cpu_to_le32(insn);
+			updptr[i] = cpu_to_le32(insn);
 		}

 		flush_icache_range((uintptr_t)origptr,
@@ -155,7 +156,7 @@ static int __apply_alternatives_multi_stop(void *unused)
 		isb();
 	} else {
 		BUG_ON(patched);
-		__apply_alternatives(&region);
+		__apply_alternatives(&region, true);
 		/* Barriers provided by the cache flushing */
 		WRITE_ONCE(patched, 1);
 	}
@@ -176,5 +177,5 @@ void apply_alternatives(void *start, size_t length)
 		.end	= start + length,
 	};

-	__apply_alternatives(&region);
+	__apply_alternatives(&region, false);
 }
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,15 +17,9 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <linux/bitops.h>
 #include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/compiler.h>
 #include <linux/of.h>

-#include <asm/cachetype.h>
-#include <asm/processor.h>
-
 #define MAX_CACHE_LEVEL			7	/* Max 7 level supported */
 /* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
 #define CLIDR_CTYPE_SHIFT(level)	(3 * (level - 1))
@@ -43,43 +37,11 @@ static inline enum cache_type get_cache_type(int level)
 	return CLIDR_CTYPE(clidr, level);
 }

-/*
- * Cache Size Selection Register(CSSELR) selects which Cache Size ID
- * Register(CCSIDR) is accessible by specifying the required cache
- * level and the cache type. We need to ensure that no one else changes
- * CSSELR by calling this in non-preemtible context
- */
-u64 __attribute_const__ cache_get_ccsidr(u64 csselr)
-{
-	u64 ccsidr;
-
-	WARN_ON(preemptible());
-
-	write_sysreg(csselr, csselr_el1);
-	isb();
-	ccsidr = read_sysreg(ccsidr_el1);
-
-	return ccsidr;
-}
-
 static void ci_leaf_init(struct cacheinfo *this_leaf,
 			 enum cache_type type, unsigned int level)
 {
-	bool is_icache = type & CACHE_TYPE_INST;
-	u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache);
-
 	this_leaf->level = level;
 	this_leaf->type = type;
-	this_leaf->coherency_line_size = CACHE_LINESIZE(tmp);
-	this_leaf->number_of_sets = CACHE_NUMSETS(tmp);
-	this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp);
-	this_leaf->size = this_leaf->number_of_sets *
-	    this_leaf->coherency_line_size * this_leaf->ways_of_associativity;
-	this_leaf->attributes =
-		((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) |
-		((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) |
-		((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) |
-		((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0);
 }

 static int __init_cache_level(unsigned int cpu)

--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -97,6 +97,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 	ARM64_FTR_END,
 };

+static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
@@ -153,9 +160,9 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
 	/*
 	 * Linux can handle differing I-cache policies. Userspace JITs will
 	 * make use of *minLine.
-	 * If we have differing I-cache policies, report it as the weakest - AIVIVT.
+	 * If we have differing I-cache policies, report it as the weakest - VIPT.
 	 */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT),	/* L1Ip */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT),	/* L1Ip */
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
 	ARM64_FTR_END,
 };
@@ -314,7 +321,7 @@ static const struct __ftr_reg_entry {

 	/* Op1 = 0, CRn = 0, CRm = 6 */
 	ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
-	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz),
+	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),

 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -585,7 +592,7 @@ void update_cpu_features(int cpu,
 	 * If we have AArch32, we care about 32-bit features for compat.
 	 * If the system doesn't support AArch32, don't update them.
 	 */
-	if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) &&
+	if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
 		id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {

 		taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
@@ -636,7 +643,7 @@ void update_cpu_features(int cpu,
 			"Unsupported CPU feature variation.\n");
 }

-u64 read_system_reg(u32 id)
+u64 read_sanitised_ftr_reg(u32 id)
 {
 	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);

@@ -649,10 +656,10 @@ u64 read_system_reg(u32 id)
 	case r:		return read_sysreg_s(r)

 /*
- * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated.
 * Read the system register on the current CPU
 */
-static u64 __raw_read_system_reg(u32 sys_id)
+static u64 __read_sysreg_by_encoding(u32 sys_id)
 {
 	switch (sys_id) {
 	read_sysreg_case(SYS_ID_PFR0_EL1);
@@ -709,9 +716,9 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)

 	WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
 	if (scope == SCOPE_SYSTEM)
-		val = read_system_reg(entry->sys_reg);
+		val = read_sanitised_ftr_reg(entry->sys_reg);
 	else
-		val = __raw_read_system_reg(entry->sys_reg);
+		val = __read_sysreg_by_encoding(entry->sys_reg);

 	return feature_matches(val, entry);
 }
@@ -761,7 +768,7 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,

 static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
 {
-	u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
+	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);

 	return cpuid_feature_extract_signed_field(pfr0,
 					ID_AA64PFR0_FP_SHIFT) < 0;
@@ -888,6 +895,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
 	{},
 };


--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -15,7 +15,7 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include <asm/arch_timer.h>
-#include <asm/cachetype.h>
+#include <asm/cache.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
@@ -43,10 +43,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 static struct cpuinfo_arm64 boot_cpu_data;

 static char *icache_policy_str[] = {
-	[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
-	[ICACHE_POLICY_AIVIVT] = "AIVIVT",
-	[ICACHE_POLICY_VIPT] = "VIPT",
-	[ICACHE_POLICY_PIPT] = "PIPT",
+	[0 ... ICACHE_POLICY_PIPT]	= "RESERVED/UNKNOWN",
+	[ICACHE_POLICY_VIPT]		= "VIPT",
+	[ICACHE_POLICY_PIPT]		= "PIPT",
+	[ICACHE_POLICY_VPIPT]		= "VPIPT",
 };

 unsigned long __icache_flags;
@@ -65,6 +65,9 @@ static const char *const hwcap_str[] = {
 	"asimdhp",
 	"cpuid",
 	"asimdrdm",
+	"jscvt",
+	"fcma",
+	"lrcpc",
 	NULL
 };

@@ -289,20 +292,18 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 	unsigned int cpu = smp_processor_id();
 	u32 l1ip = CTR_L1IP(info->reg_ctr);

-	if (l1ip != ICACHE_POLICY_PIPT) {
-		/*
-		 * VIPT caches are non-aliasing if the VA always equals the PA
-		 * in all bit positions that are covered by the index. This is
-		 * the case if the size of a way (# of sets * line size) does
-		 * not exceed PAGE_SIZE.
-		 */
-		u32 waysize = icache_get_numsets() * icache_get_linesize();
-
-		if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
-			set_bit(ICACHEF_ALIASING, &__icache_flags);
+	switch (l1ip) {
+	case ICACHE_POLICY_PIPT:
+		break;
+	case ICACHE_POLICY_VPIPT:
+		set_bit(ICACHEF_VPIPT, &__icache_flags);
+		break;
+	default:
+		/* Fallthrough */
+	case ICACHE_POLICY_VIPT:
+		/* Assume aliasing */
+		set_bit(ICACHEF_ALIASING, &__icache_flags);
 	}
-	if (l1ip == ICACHE_POLICY_AIVIVT)
-		set_bit(ICACHEF_AIVIVT, &__icache_flags);

 	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }

--- a/arch/arm64/kernel/crash_dump.c
+++ b/arch/arm64/kernel/crash_dump.c
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2017 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+			memunmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	memunmap(vaddr);
+
+	return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+	return count;
+}
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -36,7 +36,7 @@
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {
-	return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+	return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1),
 						ID_AA64DFR0_DEBUGVER_SHIFT);
 }


--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
+/*
+ * Copyright (C) 2013 - 2017 Linaro, Ltd.
+ * Copyright (C) 2013, 2014 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/pe.h>
+#include <linux/sizes.h>
+
+	.macro	__EFI_PE_HEADER
+	.long	PE_MAGIC
+coff_header:
+	.short	IMAGE_FILE_MACHINE_ARM64		// Machine
+	.short	section_count				// NumberOfSections
+	.long	0 					// TimeDateStamp
+	.long	0					// PointerToSymbolTable
+	.long	0					// NumberOfSymbols
+	.short	section_table - optional_header		// SizeOfOptionalHeader
+	.short	IMAGE_FILE_DEBUG_STRIPPED | \
+		IMAGE_FILE_EXECUTABLE_IMAGE | \
+		IMAGE_FILE_LINE_NUMS_STRIPPED		// Characteristics
+
+optional_header:
+	.short	PE_OPT_MAGIC_PE32PLUS			// PE32+ format
+	.byte	0x02					// MajorLinkerVersion
+	.byte	0x14					// MinorLinkerVersion
+	.long	__initdata_begin - efi_header_end	// SizeOfCode
+	.long	__pecoff_data_size			// SizeOfInitializedData
+	.long	0					// SizeOfUninitializedData
+	.long	__efistub_entry - _head			// AddressOfEntryPoint
+	.long	efi_header_end - _head			// BaseOfCode
+
+extra_header_fields:
+	.quad	0					// ImageBase
+	.long	SZ_4K					// SectionAlignment
+	.long	PECOFF_FILE_ALIGNMENT			// FileAlignment
+	.short	0					// MajorOperatingSystemVersion
+	.short	0					// MinorOperatingSystemVersion
+	.short	0					// MajorImageVersion
+	.short	0					// MinorImageVersion
+	.short	0					// MajorSubsystemVersion
+	.short	0					// MinorSubsystemVersion
+	.long	0					// Win32VersionValue
+
+	.long	_end - _head				// SizeOfImage
+
+	// Everything before the kernel image is considered part of the header
+	.long	efi_header_end - _head			// SizeOfHeaders
+	.long	0					// CheckSum
+	.short	IMAGE_SUBSYSTEM_EFI_APPLICATION		// Subsystem
+	.short	0					// DllCharacteristics
+	.quad	0					// SizeOfStackReserve
+	.quad	0					// SizeOfStackCommit
+	.quad	0					// SizeOfHeapReserve
+	.quad	0					// SizeOfHeapCommit
+	.long	0					// LoaderFlags
+	.long	(section_table - .) / 8			// NumberOfRvaAndSizes
+
+	.quad	0					// ExportTable
+	.quad	0					// ImportTable
+	.quad	0					// ResourceTable
+	.quad	0					// ExceptionTable
+	.quad	0					// CertificationTable
+	.quad	0					// BaseRelocationTable
+
+#ifdef CONFIG_DEBUG_EFI
+	.long	efi_debug_table - _head			// DebugTable
+	.long	efi_debug_table_size
+#endif
+
+	// Section table
+section_table:
+	.ascii	".text\0\0\0"
+	.long	__initdata_begin - efi_header_end	// VirtualSize
+	.long	efi_header_end - _head			// VirtualAddress
+	.long	__initdata_begin - efi_header_end	// SizeOfRawData
+	.long	efi_header_end - _head			// PointerToRawData
+
+	.long	0					// PointerToRelocations
+	.long	0					// PointerToLineNumbers
+	.short	0					// NumberOfRelocations
+	.short	0					// NumberOfLineNumbers
+	.long	IMAGE_SCN_CNT_CODE | \
+		IMAGE_SCN_MEM_READ | \
+		IMAGE_SCN_MEM_EXECUTE			// Characteristics
+
+	.ascii	".data\0\0\0"
+	.long	__pecoff_data_size			// VirtualSize
+	.long	__initdata_begin - _head		// VirtualAddress
+	.long	__pecoff_data_rawsize			// SizeOfRawData
+	.long	__initdata_begin - _head		// PointerToRawData
+
+	.long	0					// PointerToRelocations
+	.long	0					// PointerToLineNumbers
+	.short	0					// NumberOfRelocations
+	.short	0					// NumberOfLineNumbers
+	.long	IMAGE_SCN_CNT_INITIALIZED_DATA | \
+		IMAGE_SCN_MEM_READ | \
+		IMAGE_SCN_MEM_WRITE			// Characteristics
+
+	.set	section_count, (. - section_table) / 40
+
+#ifdef CONFIG_DEBUG_EFI
+	/*
+	 * The debug table is referenced via its Relative Virtual Address (RVA),
+	 * which is only defined for those parts of the image that are covered
+	 * by a section declaration. Since this header is not covered by any
+	 * section, the debug table must be emitted elsewhere. So stick it in
+	 * the .init.rodata section instead.
+	 *
+	 * Note that the EFI debug entry itself may legally have a zero RVA,
+	 * which means we can simply put it right after the section headers.
+	 */
+	__INITRODATA
+
+	.align	2
+efi_debug_table:
+	// EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
+	.long	0					// Characteristics
+	.long	0					// TimeDateStamp
+	.short	0					// MajorVersion
+	.short	0					// MinorVersion
+	.long	IMAGE_DEBUG_TYPE_CODEVIEW		// Type
+	.long	efi_debug_entry_size			// SizeOfData
+	.long	0					// RVA
+	.long	efi_debug_entry - _head			// FileOffset
+
+	.set	efi_debug_table_size, . - efi_debug_table
+	.previous
+
+efi_debug_entry:
+	// EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
+	.ascii	"NB10"					// Signature
+	.long	0					// Unknown
+	.long	0					// Unknown2
+	.long	0					// Unknown3
+
+	.asciz	VMLINUX_PATH
+
+	.set	efi_debug_entry_size, . - efi_debug_entry
+#endif
+
+	/*
+	 * EFI will load .text onwards at the 4k section alignment
+	 * described in the PE/COFF header. To ensure that instruction
+	 * sequences using an adrp and a :lo12: immediate will function
+	 * correctly at this alignment, we must ensure that .text is
+	 * placed at a 4k boundary in the Image to begin with.
+	 */
+	.align 12
+efi_header_end:
+	.endm
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -42,6 +42,8 @@
 #include <asm/thread_info.h>
 #include <asm/virt.h>

+#include "efi-header.S"
+
 #define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)

 #if (TEXT_OFFSET & 0xfff) != 0
@@ -89,166 +91,14 @@ _head:
 	.quad	0				// reserved
 	.quad	0				// reserved
 	.quad	0				// reserved
-	.byte	0x41				// Magic number, "ARM\x64"
-	.byte	0x52
-	.byte	0x4d
-	.byte	0x64
+	.ascii	"ARM\x64"			// Magic number
 #ifdef CONFIG_EFI
 	.long	pe_header - _head		// Offset to the PE header.
-#else
-	.word	0				// reserved
-#endif

-#ifdef CONFIG_EFI
-	.align 3
 pe_header:
-	.ascii	"PE"
-	.short 	0
-coff_header:
-	.short	0xaa64				// AArch64
-	.short	2				// nr_sections
-	.long	0 				// TimeDateStamp
-	.long	0				// PointerToSymbolTable
-	.long	1				// NumberOfSymbols
-	.short	section_table - optional_header	// SizeOfOptionalHeader
-	.short	0x206				// Characteristics.
-						// IMAGE_FILE_DEBUG_STRIPPED |
-						// IMAGE_FILE_EXECUTABLE_IMAGE |
-						// IMAGE_FILE_LINE_NUMS_STRIPPED
-optional_header:
-	.short	0x20b				// PE32+ format
-	.byte	0x02				// MajorLinkerVersion
-	.byte	0x14				// MinorLinkerVersion
-	.long	_end - efi_header_end		// SizeOfCode
-	.long	0				// SizeOfInitializedData
-	.long	0				// SizeOfUninitializedData
-	.long	__efistub_entry - _head		// AddressOfEntryPoint
-	.long	efi_header_end - _head		// BaseOfCode
-
-extra_header_fields:
-	.quad	0				// ImageBase
-	.long	0x1000				// SectionAlignment
-	.long	PECOFF_FILE_ALIGNMENT		// FileAlignment
-	.short	0				// MajorOperatingSystemVersion
-	.short	0				// MinorOperatingSystemVersion
-	.short	0				// MajorImageVersion
-	.short	0				// MinorImageVersion
-	.short	0				// MajorSubsystemVersion
-	.short	0				// MinorSubsystemVersion
-	.long	0				// Win32VersionValue
-
-	.long	_end - _head			// SizeOfImage
-
-	// Everything before the kernel image is considered part of the header
-	.long	efi_header_end - _head		// SizeOfHeaders
-	.long	0				// CheckSum
-	.short	0xa				// Subsystem (EFI application)
-	.short	0				// DllCharacteristics
-	.quad	0				// SizeOfStackReserve
-	.quad	0				// SizeOfStackCommit
-	.quad	0				// SizeOfHeapReserve
-	.quad	0				// SizeOfHeapCommit
-	.long	0				// LoaderFlags
-	.long	(section_table - .) / 8		// NumberOfRvaAndSizes
-
-	.quad	0				// ExportTable
-	.quad	0				// ImportTable
-	.quad	0				// ResourceTable
-	.quad	0				// ExceptionTable
-	.quad	0				// CertificationTable
-	.quad	0				// BaseRelocationTable
-
-#ifdef CONFIG_DEBUG_EFI
-	.long	efi_debug_table - _head		// DebugTable
-	.long	efi_debug_table_size
-#endif
-
-	// Section table
-section_table:
-
-	/*
-	 * The EFI application loader requires a relocation section
-	 * because EFI applications must be relocatable.  This is a
-	 * dummy section as far as we are concerned.
-	 */
-	.ascii	".reloc"
-	.byte	0
-	.byte	0			// end of 0 padding of section name
-	.long	0
-	.long	0
-	.long	0			// SizeOfRawData
-	.long	0			// PointerToRawData
-	.long	0			// PointerToRelocations
-	.long	0			// PointerToLineNumbers
-	.short	0			// NumberOfRelocations
-	.short	0			// NumberOfLineNumbers
-	.long	0x42100040		// Characteristics (section flags)
-
-
-	.ascii	".text"
-	.byte	0
-	.byte	0
-	.byte	0        		// end of 0 padding of section name
-	.long	_end - efi_header_end	// VirtualSize
-	.long	efi_header_end - _head	// VirtualAddress
-	.long	_edata - efi_header_end	// SizeOfRawData
-	.long	efi_header_end - _head	// PointerToRawData
-
-	.long	0		// PointerToRelocations (0 for executables)
-	.long	0		// PointerToLineNumbers (0 for executables)
-	.short	0		// NumberOfRelocations  (0 for executables)
-	.short	0		// NumberOfLineNumbers  (0 for executables)
-	.long	0xe0500020	// Characteristics (section flags)
-
-#ifdef CONFIG_DEBUG_EFI
-	/*
-	 * The debug table is referenced via its Relative Virtual Address (RVA),
-	 * which is only defined for those parts of the image that are covered
-	 * by a section declaration. Since this header is not covered by any
-	 * section, the debug table must be emitted elsewhere. So stick it in
-	 * the .init.rodata section instead.
-	 *
-	 * Note that the EFI debug entry itself may legally have a zero RVA,
-	 * which means we can simply put it right after the section headers.
-	 */
-	__INITRODATA
-
-	.align	2
-efi_debug_table:
-	// EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
-	.long	0			// Characteristics
-	.long	0			// TimeDateStamp
-	.short	0			// MajorVersion
-	.short	0			// MinorVersion
-	.long	2			// Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW
-	.long	efi_debug_entry_size	// SizeOfData
-	.long	0			// RVA
-	.long	efi_debug_entry - _head	// FileOffset
-
-	.set	efi_debug_table_size, . - efi_debug_table
-	.previous
-
-efi_debug_entry:
-	// EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
-	.ascii	"NB10"			// Signature
-	.long	0			// Unknown
-	.long	0			// Unknown2
-	.long	0			// Unknown3
-
-	.asciz	VMLINUX_PATH
-
-	.set	efi_debug_entry_size, . - efi_debug_entry
-#endif
-
-	/*
-	 * EFI will load .text onwards at the 4k section alignment
-	 * described in the PE/COFF header. To ensure that instruction
-	 * sequences using an adrp and a :lo12: immediate will function
-	 * correctly at this alignment, we must ensure that .text is
-	 * placed at a 4k boundary in the Image to begin with.
-	 */
-	.align 12
-efi_header_end:
+	__EFI_PE_HEADER
+#else
+	.long	0				// reserved
 #endif

 	__INIT
@@ -534,13 +384,8 @@ ENTRY(kimage_vaddr)
 ENTRY(el2_setup)
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
-	b.ne	1f
-	mrs	x0, sctlr_el2
-CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2
-CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2
-	msr	sctlr_el2, x0
-	b	2f
-1:	mrs	x0, sctlr_el1
+	b.eq	1f
+	mrs	x0, sctlr_el1
 CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1
 CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
 	msr	sctlr_el1, x0
@@ -548,7 +393,11 @@ CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
 	isb
 	ret

-2:
+1:	mrs	x0, sctlr_el2
+CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2
+CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2
+	msr	sctlr_el2, x0
+
 #ifdef CONFIG_ARM64_VHE
 	/*
 	 * Check for VHE being present. For the rest of the EL2 setup,
@@ -594,14 +443,14 @@ set_hcr:
 	cmp	x0, #1
 	b.ne	3f

-	mrs_s	x0, ICC_SRE_EL2
+	mrs_s	x0, SYS_ICC_SRE_EL2
 	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
 	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
-	msr_s	ICC_SRE_EL2, x0
+	msr_s	SYS_ICC_SRE_EL2, x0
 	isb					// Make sure SRE is now set
-	mrs_s	x0, ICC_SRE_EL2			// Read SRE back,
+	mrs_s	x0, SYS_ICC_SRE_EL2		// Read SRE back,
 	tbz	x0, #0, 3f			// and check that it sticks
-	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+	msr_s	SYS_ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults

 3:
 #endif
@@ -612,26 +461,6 @@ set_hcr:
 	msr	vpidr_el2, x0
 	msr	vmpidr_el2, x1

-	/*
-	 * When VHE is not in use, early init of EL2 and EL1 needs to be
-	 * done here.
-	 * When VHE _is_ in use, EL1 will not be used in the host and
-	 * requires no configuration, and all non-hyp-specific EL2 setup
-	 * will be done via the _EL1 system register aliases in __cpu_setup.
-	 */
-	cbnz	x2, 1f
-
-	/* sctlr_el1 */
-	mov	x0, #0x0800			// Set/clear RES{1,0} bits
-CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems
-CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
-	msr	sctlr_el1, x0
-
-	/* Coprocessor traps. */
-	mov	x0, #0x33ff
-	msr	cptr_el2, x0			// Disable copro. traps to EL2
-1:
-
 #ifdef CONFIG_COMPAT
 	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
 #endif
@@ -668,6 +497,23 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	ret

 install_el2_stub:
+	/*
+	 * When VHE is not in use, early init of EL2 and EL1 needs to be
+	 * done here.
+	 * When VHE _is_ in use, EL1 will not be used in the host and
+	 * requires no configuration, and all non-hyp-specific EL2 setup
+	 * will be done via the _EL1 system register aliases in __cpu_setup.
+	 */
+	/* sctlr_el1 */
+	mov	x0, #0x0800			// Set/clear RES{1,0} bits
+CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems
+CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
+	msr	sctlr_el1, x0
+
+	/* Coprocessor traps. */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0			// Disable copro. traps to EL2
+
 	/* Hypervisor stub */
 	adr_l	x0, __hyp_stub_vectors
 	msr	vbar_el2, x0

--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -28,6 +28,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <asm/irqflags.h>
+#include <asm/kexec.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
@@ -102,7 +103,8 @@ int pfn_is_nosave(unsigned long pfn)
 	unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin);
 	unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1);

-	return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+	return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
+		crash_is_nosave(pfn);
 }

 void notrace save_processor_state(void)
@@ -286,6 +288,9 @@ int swsusp_arch_suspend(void)
 	local_dbg_save(flags);

 	if (__cpu_suspend_enter(&state)) {
+		/* make the crash dump kernel image visible/saveable */
+		crash_prepare_suspend();
+
 		sleep_cpu = smp_processor_id();
 		ret = swsusp_save();
 	} else {
@@ -297,6 +302,9 @@ int swsusp_arch_suspend(void)
 		if (el2_reset_needed())
 			dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);

+		/* make the crash dump kernel image protected again */
+		crash_post_resume();
+
 		/*
 		 * Tell the hibernation core that we've just restored
 		 * the memory

--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -9,12 +9,19 @@
 * published by the Free Software Foundation.
 */

+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/kexec.h>
+#include <linux/page-flags.h>
 #include <linux/smp.h>

 #include <asm/cacheflush.h>
 #include <asm/cpu_ops.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
 #include <asm/mmu_context.h>
+#include <asm/page.h>

 #include "cpu-reset.h"

@@ -22,8 +29,6 @@
 extern const unsigned char arm64_relocate_new_kernel[];
 extern const unsigned long arm64_relocate_new_kernel_size;

-static unsigned long kimage_start;
-
 /**
 * kexec_image_info - For debugging output.
 */
@@ -64,8 +69,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
 */
 int machine_kexec_prepare(struct kimage *kimage)
 {
-	kimage_start = kimage->start;
-
 	kexec_image_info(kimage);

 	if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
@@ -144,11 +147,15 @@ void machine_kexec(struct kimage *kimage)
 {
 	phys_addr_t reboot_code_buffer_phys;
 	void *reboot_code_buffer;
+	bool in_kexec_crash = (kimage == kexec_crash_image);
+	bool stuck_cpus = cpus_are_stuck_in_kernel();

 	/*
 	 * New cpus may have become stuck_in_kernel after we loaded the image.
 	 */
-	BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
+	BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
+	WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
+		"Some CPUs may be stale, kdump will be unreliable.\n");

 	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
 	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
@@ -183,7 +190,7 @@ void machine_kexec(struct kimage *kimage)
 	kexec_list_flush(kimage);

 	/* Flush the new image if already in place. */
-	if (kimage->head & IND_DONE)
+	if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
 		kexec_segment_flush(kimage);

 	pr_info("Bye!\n");
@@ -200,13 +207,158 @@ void machine_kexec(struct kimage *kimage)
 	 * relocation is complete.
 	 */

-	cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
-		kimage_start, 0);
+	cpu_soft_restart(kimage != kexec_crash_image,
+		reboot_code_buffer_phys, kimage->head, kimage->start, 0);

 	BUG(); /* Should never get here. */
 }

+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+		int ret;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		/*
+		 * First try to remove the active state. If this
+		 * fails, try to EOI the interrupt.
+		 */
+		ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+		if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+		    chip->irq_eoi)
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-crashing cpus and save registers
+ */
 void machine_crash_shutdown(struct pt_regs *regs)
 {
-	/* Empty routine needed to avoid build errors. */
+	local_irq_disable();
+
+	/* shutdown non-crashing cpus */
+	smp_send_crash_stop();
+
+	/* for crashing cpu */
+	crash_save_cpu(regs, smp_processor_id());
+	machine_kexec_mask_interrupts();
+
+	pr_info("Starting crashdump kernel...\n");
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+	int i;
+
+	kexec_segment_flush(kexec_crash_image);
+
+	for (i = 0; i < kexec_crash_image->nr_segments; i++)
+		set_memory_valid(
+			__phys_to_virt(kexec_crash_image->segment[i].mem),
+			kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+	int i;
+
+	for (i = 0; i < kexec_crash_image->nr_segments; i++)
+		set_memory_valid(
+			__phys_to_virt(kexec_crash_image->segment[i].mem),
+			kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
+}
+
+#ifdef CONFIG_HIBERNATION
+/*
+ * To preserve the crash dump kernel image, the relevant memory segments
+ * should be mapped again around the hibernation.
+ */
+void crash_prepare_suspend(void)
+{
+	if (kexec_crash_image)
+		arch_kexec_unprotect_crashkres();
+}
+
+void crash_post_resume(void)
+{
+	if (kexec_crash_image)
+		arch_kexec_protect_crashkres();
+}
+
+/*
+ * crash_is_nosave
+ *
+ * Return true only if a page is part of reserved memory for crash dump kernel,
+ * but does not hold any data of loaded kernel image.
+ *
+ * Note that all the pages in crash dump kernel memory have been initially
+ * marked as Reserved in kexec_reserve_crashkres_pages().
+ *
+ * In hibernation, the pages which are Reserved and yet "nosave" are excluded
+ * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * dump kernel and will reduce the total size of hibernation image.
+ */
+
+bool crash_is_nosave(unsigned long pfn)
+{
+	int i;
+	phys_addr_t addr;
+
+	if (!crashk_res.end)
+		return false;
+
+	/* in reserved memory? */
+	addr = __pfn_to_phys(pfn);
+	if ((addr < crashk_res.start) || (crashk_res.end < addr))
+		return false;
+
+	if (!kexec_crash_image)
+		return true;
+
+	/* not part of loaded kernel image? */
+	for (i = 0; i < kexec_crash_image->nr_segments; i++)
+		if (addr >= kexec_crash_image->segment[i].mem &&
+				addr < (kexec_crash_image->segment[i].mem +
+					kexec_crash_image->segment[i].memsz))
+			return false;
+
+	return true;
+}
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+	struct page *page;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		page = phys_to_page(addr);
+		ClearPageReserved(page);
+		free_reserved_page(page);
+	}
+}
+#endif /* CONFIG_HIBERNATION */
+
+void arch_crash_save_vmcoreinfo(void)
+{
+	VMCOREINFO_NUMBER(VA_BITS);
+	/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+	vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
+						kimage_voffset);
+	vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
+						PHYS_OFFSET);
 }
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
 /*
- * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
@@ -26,34 +26,20 @@ struct plt_entry {
 	__le32	br;	/* br	x16				*/
 };

-u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+static bool in_init(const struct module *mod, void *loc)
+{
+	return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
+}
+
+u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 			  Elf64_Sym *sym)
 {
-	struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
-	int i = mod->arch.plt_num_entries;
+	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
+							  &mod->arch.init;
+	struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+	int i = pltsec->plt_num_entries;
 	u64 val = sym->st_value + rela->r_addend;

-	/*
-	 * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
-	 * which are listed in the ELF symtab section, but without a type
-	 * or a size.
-	 * So, similar to how the module loader uses the Elf64_Sym::st_value
-	 * field to store the resolved addresses of undefined symbols, let's
-	 * borrow the Elf64_Sym::st_size field (whose value is never used by
-	 * the module loader, even for symbols that are defined) to record
-	 * the address of a symbol's associated PLT entry as we emit it for a
-	 * zero addend relocation (which is the only kind we have to deal with
-	 * in practice). This allows us to find duplicates without having to
-	 * go through the table every time.
-	 */
-	if (rela->r_addend == 0 && sym->st_size != 0) {
-		BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
-		return sym->st_size;
-	}
-
-	mod->arch.plt_num_entries++;
-	BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
-
 	/*
 	 * MOVK/MOVN/MOVZ opcode:
 	 * +--------+------------+--------+-----------+-------------+---------+
@@ -72,8 +58,19 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
 		cpu_to_le32(0xd61f0200)
 	};

-	if (rela->r_addend == 0)
-		sym->st_size = (u64)&plt[i];
+	/*
+	 * Check if the entry we just created is a duplicate. Given that the
+	 * relocations are sorted, this will be the last entry we allocated.
+	 * (if one exists).
+	 */
+	if (i > 0 &&
+	    plt[i].mov0 == plt[i - 1].mov0 &&
+	    plt[i].mov1 == plt[i - 1].mov1 &&
+	    plt[i].mov2 == plt[i - 1].mov2)
+		return (u64)&plt[i - 1];
+
+	pltsec->plt_num_entries++;
+	BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);

 	return (u64)&plt[i];
 }
@@ -104,7 +101,8 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
 	return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
 }

-static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
+			       Elf64_Word dstidx)
 {
 	unsigned int ret = 0;
 	Elf64_Sym *s;
@@ -116,13 +114,17 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
 		case R_AARCH64_CALL26:
 			/*
 			 * We only have to consider branch targets that resolve
-			 * to undefined symbols. This is not simply a heuristic,
-			 * it is a fundamental limitation, since the PLT itself
-			 * is part of the module, and needs to be within 128 MB
-			 * as well, so modules can never grow beyond that limit.
+			 * to symbols that are defined in a different section.
+			 * This is not simply a heuristic, it is a fundamental
+			 * limitation, since there is no guaranteed way to emit
+			 * PLT entries sufficiently close to the branch if the
+			 * section size exceeds the range of a branch
+			 * instruction. So ignore relocations against defined
+			 * symbols if they live in the same section as the
+			 * relocation target.
 			 */
 			s = syms + ELF64_R_SYM(rela[i].r_info);
-			if (s->st_shndx != SHN_UNDEF)
+			if (s->st_shndx == dstidx)
 				break;

 			/*
@@ -149,7 +151,8 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
-	unsigned long plt_max_entries = 0;
+	unsigned long core_plts = 0;
+	unsigned long init_plts = 0;
 	Elf64_Sym *syms = NULL;
 	int i;

@@ -158,14 +161,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	 * entries. Record the symtab address as well.
 	 */
 	for (i = 0; i < ehdr->e_shnum; i++) {
-		if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
-			mod->arch.plt = sechdrs + i;
+		if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
+			mod->arch.core.plt = sechdrs + i;
+		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
+			mod->arch.init.plt = sechdrs + i;
 		else if (sechdrs[i].sh_type == SHT_SYMTAB)
 			syms = (Elf64_Sym *)sechdrs[i].sh_addr;
 	}

-	if (!mod->arch.plt) {
-		pr_err("%s: module PLT section missing\n", mod->name);
+	if (!mod->arch.core.plt || !mod->arch.init.plt) {
+		pr_err("%s: module PLT section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
 	if (!syms) {
@@ -188,14 +193,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		/* sort by type, symbol index and addend */
 		sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);

-		plt_max_entries += count_plts(syms, rels, numrels);
+		if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
+			core_plts += count_plts(syms, rels, numrels,
+						sechdrs[i].sh_info);
+		else
+			init_plts += count_plts(syms, rels, numrels,
+						sechdrs[i].sh_info);
 	}

-	mod->arch.plt->sh_type = SHT_NOBITS;
-	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
-	mod->arch.plt_num_entries = 0;
-	mod->arch.plt_max_entries = plt_max_entries;
+	mod->arch.core.plt->sh_type = SHT_NOBITS;
+	mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.core.plt->sh_size = (core_plts  + 1) * sizeof(struct plt_entry);
+	mod->arch.core.plt_num_entries = 0;
+	mod->arch.core.plt_max_entries = core_plts;
+
+	mod->arch.init.plt->sh_type = SHT_NOBITS;
+	mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
+	mod->arch.init.plt_num_entries = 0;
+	mod->arch.init.plt_max_entries = init_plts;
+
 	return 0;
 }
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -380,7 +380,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,

 			if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
 			    ovf == -ERANGE) {
-				val = module_emit_plt_entry(me, &rel[i], sym);
+				val = module_emit_plt_entry(me, loc, &rel[i], sym);
 				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
 						     26, AARCH64_INSN_IMM_26);
 			}

--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
 SECTIONS {
 	.plt (NOLOAD) : { BYTE(0) }
+	.init.plt (NOLOAD) : { BYTE(0) }
 }
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -290,6 +290,12 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
 	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,

+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
+	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
+	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
 	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,

 	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
@@ -957,10 +963,26 @@ static int armv8_vulcan_map_event(struct perf_event *event)
 				ARMV8_PMU_EVTYPE_EVENT);
 }

+struct armv8pmu_probe_info {
+	struct arm_pmu *pmu;
+	bool present;
+};
+
 static void __armv8pmu_probe_pmu(void *info)
 {
-	struct arm_pmu *cpu_pmu = info;
+	struct armv8pmu_probe_info *probe = info;
+	struct arm_pmu *cpu_pmu = probe->pmu;
+	u64 dfr0;
 	u32 pmceid[2];
+	int pmuver;
+
+	dfr0 = read_sysreg(id_aa64dfr0_el1);
+	pmuver = cpuid_feature_extract_signed_field(dfr0,
+			ID_AA64DFR0_PMUVER_SHIFT);
+	if (pmuver < 1)
+		return;
+
+	probe->present = true;

 	/* Read the nb of CNTx counters supported from PMNC */
 	cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
@@ -979,13 +1001,27 @@ static void __armv8pmu_probe_pmu(void *info)

 static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
 {
-	return smp_call_function_any(&cpu_pmu->supported_cpus,
+	struct armv8pmu_probe_info probe = {
+		.pmu = cpu_pmu,
+		.present = false,
+	};
+	int ret;
+
+	ret = smp_call_function_any(&cpu_pmu->supported_cpus,
 				    __armv8pmu_probe_pmu,
-				    cpu_pmu, 1);
+				    &probe, 1);
+	if (ret)
+		return ret;
+
+	return probe.present ? 0 : -ENODEV;
 }

-static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
 {
+	int ret = armv8pmu_probe_pmu(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->handle_irq		= armv8pmu_handle_irq,
 	cpu_pmu->enable			= armv8pmu_enable_event,
 	cpu_pmu->disable		= armv8pmu_disable_event,
@@ -997,78 +1033,104 @@ static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->reset			= armv8pmu_reset,
 	cpu_pmu->max_period		= (1LLU << 32) - 1,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+
+	return 0;
 }

 static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 {
-	armv8_pmu_init(cpu_pmu);
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->name			= "armv8_pmuv3";
 	cpu_pmu->map_event		= armv8_pmuv3_map_event;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
 		&armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
 		&armv8_pmuv3_format_attr_group;
-	return armv8pmu_probe_pmu(cpu_pmu);
+
+	return 0;
 }

 static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv8_pmu_init(cpu_pmu);
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->name			= "armv8_cortex_a53";
 	cpu_pmu->map_event		= armv8_a53_map_event;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
 		&armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
 		&armv8_pmuv3_format_attr_group;
-	return armv8pmu_probe_pmu(cpu_pmu);
+
+	return 0;
 }

 static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv8_pmu_init(cpu_pmu);
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->name			= "armv8_cortex_a57";
 	cpu_pmu->map_event		= armv8_a57_map_event;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
 		&armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
 		&armv8_pmuv3_format_attr_group;
-	return armv8pmu_probe_pmu(cpu_pmu);
+
+	return 0;
 }

 static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv8_pmu_init(cpu_pmu);
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->name			= "armv8_cortex_a72";
 	cpu_pmu->map_event		= armv8_a57_map_event;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
 		&armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
 		&armv8_pmuv3_format_attr_group;
-	return armv8pmu_probe_pmu(cpu_pmu);
+
+	return 0;
 }

 static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv8_pmu_init(cpu_pmu);
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->name			= "armv8_cavium_thunder";
 	cpu_pmu->map_event		= armv8_thunder_map_event;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
 		&armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
 		&armv8_pmuv3_format_attr_group;
-	return armv8pmu_probe_pmu(cpu_pmu);
+
+	return 0;
 }

 static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv8_pmu_init(cpu_pmu);
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
 	cpu_pmu->name			= "armv8_brcm_vulcan";
 	cpu_pmu->map_event		= armv8_vulcan_map_event;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
 		&armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
 		&armv8_pmuv3_format_attr_group;
-	return armv8pmu_probe_pmu(cpu_pmu);
+
+	return 0;
 }

 static const struct of_device_id armv8_pmu_of_device_ids[] = {
@@ -1081,24 +1143,9 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{},
 };

-/*
- * Non DT systems have their micro/arch events probed at run-time.
- * A fairly complete list of generic events are provided and ones that
- * aren't supported by the current PMU are disabled.
- */
-static const struct pmu_probe_info armv8_pmu_probe_table[] = {
-	PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */
-	{ /* sentinel value */ }
-};
-
 static int armv8_pmu_device_probe(struct platform_device *pdev)
 {
-	if (acpi_disabled)
-		return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
-					    NULL);
-
-	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
-				    armv8_pmu_probe_table);
+	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
 }

 static struct platform_driver armv8_pmu_driver = {
@@ -1109,4 +1156,11 @@ static struct platform_driver armv8_pmu_driver = {
 	.probe		= armv8_pmu_device_probe,
 };

-builtin_platform_driver(armv8_pmu_driver);
+static int __init armv8_pmu_driver_init(void)
+{
+	if (acpi_disabled)
+		return platform_driver_register(&armv8_pmu_driver);
+	else
+		return arm_pmu_acpi_probe(armv8_pmuv3_init);
+}
+device_initcall(armv8_pmu_driver_init)
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -205,12 +205,10 @@ void __show_regs(struct pt_regs *regs)

 		pr_cont("\n");
 	}
-	printk("\n");
 }

 void show_regs(struct pt_regs * regs)
 {
-	printk("\n");
 	__show_regs(regs);
 }


--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
+/*
+ * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+
+int sym64_rel;
+
+#define SYM64_ABS_VAL		0xffff880000cccccc
+#define SYM32_ABS_VAL		0xf800cccc
+#define SYM16_ABS_VAL		0xf8cc
+
+#define __SET_ABS(name, val)	asm(".globl " #name "; .set "#name ", " #val)
+#define SET_ABS(name, val)	__SET_ABS(name, val)
+
+SET_ABS(sym64_abs, SYM64_ABS_VAL);
+SET_ABS(sym32_abs, SYM32_ABS_VAL);
+SET_ABS(sym16_abs, SYM16_ABS_VAL);
+
+asmlinkage u64 absolute_data64(void);
+asmlinkage u64 absolute_data32(void);
+asmlinkage u64 absolute_data16(void);
+asmlinkage u64 signed_movw(void);
+asmlinkage u64 unsigned_movw(void);
+asmlinkage u64 relative_adrp(void);
+asmlinkage u64 relative_adr(void);
+asmlinkage u64 relative_data64(void);
+asmlinkage u64 relative_data32(void);
+asmlinkage u64 relative_data16(void);
+
+static struct {
+	char	name[32];
+	u64	(*f)(void);
+	u64	expect;
+} const funcs[] = {
+	{ "R_AARCH64_ABS64",		absolute_data64, UL(SYM64_ABS_VAL) },
+	{ "R_AARCH64_ABS32",		absolute_data32, UL(SYM32_ABS_VAL) },
+	{ "R_AARCH64_ABS16",		absolute_data16, UL(SYM16_ABS_VAL) },
+	{ "R_AARCH64_MOVW_SABS_Gn",	signed_movw, UL(SYM64_ABS_VAL) },
+	{ "R_AARCH64_MOVW_UABS_Gn",	unsigned_movw, UL(SYM64_ABS_VAL) },
+#ifndef CONFIG_ARM64_ERRATUM_843419
+	{ "R_AARCH64_ADR_PREL_PG_HI21",	relative_adrp, (u64)&sym64_rel },
+#endif
+	{ "R_AARCH64_ADR_PREL_LO21",	relative_adr, (u64)&sym64_rel },
+	{ "R_AARCH64_PREL64",		relative_data64, (u64)&sym64_rel },
+	{ "R_AARCH64_PREL32",		relative_data32, (u64)&sym64_rel },
+	{ "R_AARCH64_PREL16",		relative_data16, (u64)&sym64_rel },
+};
+
+static int reloc_test_init(void)
+{
+	int i;
+
+	pr_info("Relocation test:\n");
+	pr_info("-------------------------------------------------------\n");
+
+	for (i = 0; i < ARRAY_SIZE(funcs); i++) {
+		u64 ret = funcs[i].f();
+
+		pr_info("%-31s 0x%016llx %s\n", funcs[i].name, ret,
+			ret == funcs[i].expect ? "pass" : "fail");
+		if (ret != funcs[i].expect)
+			pr_err("Relocation failed, expected 0x%016llx, not 0x%016llx\n",
+			       funcs[i].expect, ret);
+	}
+	return 0;
+}
+
+static void reloc_test_exit(void)
+{
+}
+
+module_init(reloc_test_init);
+module_exit(reloc_test_exit);
+
+MODULE_LICENSE("GPL v2");
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ b/arch/arm64/kernel/reloc_test_syms.S
+/*
+ * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(absolute_data64)
+	ldr	x0, 0f
+	ret
+0:	.quad	sym64_abs
+ENDPROC(absolute_data64)
+
+ENTRY(absolute_data32)
+	ldr	w0, 0f
+	ret
+0:	.long	sym32_abs
+ENDPROC(absolute_data32)
+
+ENTRY(absolute_data16)
+	adr	x0, 0f
+	ldrh	w0, [x0]
+	ret
+0:	.short	sym16_abs, 0
+ENDPROC(absolute_data16)
+
+ENTRY(signed_movw)
+	movz	x0, #:abs_g2_s:sym64_abs
+	movk	x0, #:abs_g1_nc:sym64_abs
+	movk	x0, #:abs_g0_nc:sym64_abs
+	ret
+ENDPROC(signed_movw)
+
+ENTRY(unsigned_movw)
+	movz	x0, #:abs_g3:sym64_abs
+	movk	x0, #:abs_g2_nc:sym64_abs
+	movk	x0, #:abs_g1_nc:sym64_abs
+	movk	x0, #:abs_g0_nc:sym64_abs
+	ret
+ENDPROC(unsigned_movw)
+
+#ifndef CONFIG_ARM64_ERRATUM_843419
+
+ENTRY(relative_adrp)
+	adrp	x0, sym64_rel
+	add	x0, x0, #:lo12:sym64_rel
+	ret
+ENDPROC(relative_adrp)
+
+#endif
+
+ENTRY(relative_adr)
+	adr	x0, sym64_rel
+	ret
+ENDPROC(relative_adr)
+
+ENTRY(relative_data64)
+	adr	x1, 0f
+	ldr	x0, [x1]
+	add	x0, x0, x1
+	ret
+0:	.quad	sym64_rel - .
+ENDPROC(relative_data64)
+
+ENTRY(relative_data32)
+	adr	x1, 0f
+	ldr	w0, [x1]
+	add	x0, x0, x1
+	ret
+0:	.long	sym64_rel - .
+ENDPROC(relative_data32)
+
+ENTRY(relative_data16)
+	adr	x1, 0f
+	ldrsh	w0, [x1]
+	add	x0, x0, x1
+	ret
+0:	.short	sym64_rel - ., 0
+ENDPROC(relative_data16)
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
-#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -181,6 +180,7 @@ static void __init smp_build_mpidr_hash(void)
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
 {
 	void *dt_virt = fixmap_remap_fdt(dt_phys);
+	const char *name;

 	if (!dt_virt || !early_init_dt_scan(dt_virt)) {
 		pr_crit("\n"
@@ -193,7 +193,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
 			cpu_relax();
 	}

-	dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
+	name = of_flat_dt_get_machine_name();
+	pr_info("Machine model: %s\n", name);
+	dump_stack_set_arch_desc("%s (DT)", name);
 }

 static void __init request_standard_resources(void)
@@ -226,6 +228,12 @@ static void __init request_standard_resources(void)
 		if (kernel_data.start >= res->start &&
 		    kernel_data.end <= res->end)
 			request_resource(res, &kernel_data);
+#ifdef CONFIG_KEXEC_CORE
+		/* Userspace will find "Crash kernel" region in /proc/iomem. */
+		if (crashk_res.end && crashk_res.start >= res->start &&
+		    crashk_res.end <= res->end)
+			request_resource(res, &crashk_res);
+#endif
 	}
 }


--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -39,6 +39,7 @@
 #include <linux/completion.h>
 #include <linux/of.h>
 #include <linux/irq_work.h>
+#include <linux/kexec.h>

 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -76,6 +77,7 @@ enum ipi_msg_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
 	IPI_CPU_STOP,
+	IPI_CPU_CRASH_STOP,
 	IPI_TIMER,
 	IPI_IRQ_WORK,
 	IPI_WAKEUP
@@ -434,6 +436,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	setup_cpu_features();
 	hyp_mode_check();
 	apply_alternatives_all();
+	mark_linear_text_alias_ro();
 }

 void __init smp_prepare_boot_cpu(void)
@@ -518,6 +521,13 @@ static bool bootcpu_valid __initdata;
 static unsigned int cpu_count = 1;

 #ifdef CONFIG_ACPI
+static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
+
+struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
+{
+	return &cpu_madt_gicc[cpu];
+}
+
 /*
 * acpi_map_gic_cpu_interface - parse processor MADT entry
 *
@@ -552,6 +562,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
 			return;
 		}
 		bootcpu_valid = true;
+		cpu_madt_gicc[0] = *processor;
 		early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid));
 		return;
 	}
@@ -562,6 +573,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
 	/* map the logical cpu id to cpu MPIDR */
 	cpu_logical_map(cpu_count) = hwid;

+	cpu_madt_gicc[cpu_count] = *processor;
+
 	/*
 	 * Set-up the ACPI parking protocol cpu entries
 	 * while initializing the cpu_logical_map to
@@ -755,6 +768,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 	S(IPI_RESCHEDULE, "Rescheduling interrupts"),
 	S(IPI_CALL_FUNC, "Function call interrupts"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
 	S(IPI_TIMER, "Timer broadcast interrupts"),
 	S(IPI_IRQ_WORK, "IRQ work interrupts"),
 	S(IPI_WAKEUP, "CPU wake-up interrupts"),
@@ -829,6 +843,29 @@ static void ipi_cpu_stop(unsigned int cpu)
 		cpu_relax();
 }

+#ifdef CONFIG_KEXEC_CORE
+static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
+#endif
+
+static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+#ifdef CONFIG_KEXEC_CORE
+	crash_save_cpu(regs, cpu);
+
+	atomic_dec(&waiting_for_crash_ipi);
+
+	local_irq_disable();
+
+#ifdef CONFIG_HOTPLUG_CPU
+	if (cpu_ops[cpu]->cpu_die)
+		cpu_ops[cpu]->cpu_die(cpu);
+#endif
+
+	/* just in case */
+	cpu_park_loop();
+#endif
+}
+
 /*
 * Main handler for inter-processor interrupts
 */
@@ -859,6 +896,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;

+	case IPI_CPU_CRASH_STOP:
+		if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+			irq_enter();
+			ipi_cpu_crash_stop(cpu, regs);
+
+			unreachable();
+		}
+		break;
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 	case IPI_TIMER:
 		irq_enter();
@@ -931,6 +977,39 @@ void smp_send_stop(void)
 			   cpumask_pr_args(cpu_online_mask));
 }

+#ifdef CONFIG_KEXEC_CORE
+void smp_send_crash_stop(void)
+{
+	cpumask_t mask;
+	unsigned long timeout;
+
+	if (num_online_cpus() == 1)
+		return;
+
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+
+	pr_crit("SMP: stopping secondary CPUs\n");
+	smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
+
+	/* Wait up to one second for other CPUs to stop */
+	timeout = USEC_PER_SEC;
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
+		udelay(1);
+
+	if (atomic_read(&waiting_for_crash_ipi) > 0)
+		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+			   cpumask_pr_args(&mask));
+}
+
+bool smp_crash_stop_failed(void)
+{
+	return (atomic_read(&waiting_for_crash_ipi) > 0);
+}
+#endif
+
 /*
 * not supported here
 */

--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -513,6 +513,14 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
 	regs->pc += 4;
 }

+static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+	pt_regs_write_reg(regs, rt, read_sysreg(cntfrq_el0));
+	regs->pc += 4;
+}
+
 struct sys64_hook {
 	unsigned int esr_mask;
 	unsigned int esr_val;
@@ -537,6 +545,12 @@ static struct sys64_hook sys64_hooks[] = {
 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT,
 		.handler = cntvct_read_handler,
 	},
+	{
+		/* Trap read access to CNTFRQ_EL0 */
+		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
+		.handler = cntfrq_read_handler,
+	},
 	{},
 };


--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -143,12 +143,27 @@ SECTIONS

 	. = ALIGN(SEGMENT_ALIGN);
 	__init_begin = .;
+	__inittext_begin = .;

 	INIT_TEXT_SECTION(8)
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
 	}

+	. = ALIGN(4);
+	.altinstructions : {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+	.altinstr_replacement : {
+		*(.altinstr_replacement)
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	__inittext_end = .;
+	__initdata_begin = .;
+
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)
@@ -164,15 +179,6 @@ SECTIONS

 	PERCPU_SECTION(L1_CACHE_BYTES)

-	. = ALIGN(4);
-	.altinstructions : {
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-	.altinstr_replacement : {
-		*(.altinstr_replacement)
-	}
 	.rela : ALIGN(8) {
 		*(.rela .rela*)
 	}
@@ -181,6 +187,7 @@ SECTIONS
 	__rela_size	= SIZEOF(.rela);

 	. = ALIGN(SEGMENT_ALIGN);
+	__initdata_end = .;
 	__init_end = .;

 	_data = .;
@@ -206,6 +213,7 @@ SECTIONS
 	}

 	PECOFF_EDATA_PADDING
+	__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
 	_edata = .;

 	BSS_SECTION(0, 0, 0)
@@ -221,6 +229,7 @@ SECTIONS
 	. += RESERVED_TTBR0_SIZE;
 #endif

+	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
 	_end = .;

 	STABS_DEBUG

--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -94,6 +94,28 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	dsb(ish);
 	isb();

+	/*
+	 * If the host is running at EL1 and we have a VPIPT I-cache,
+	 * then we must perform I-cache maintenance at EL2 in order for
+	 * it to have an effect on the guest. Since the guest cannot hit
+	 * I-cache lines allocated with a different VMID, we don't need
+	 * to worry about junk out of guest reset (we nuke the I-cache on
+	 * VMID rollover), but we do need to be careful when remapping
+	 * executable pages for the same guest. This can happen when KSM
+	 * takes a CoW fault on an executable page, copies the page into
+	 * a page that was previously mapped in the guest and then needs
+	 * to invalidate the guest view of the I-cache for that page
+	 * from EL1. To solve this, we invalidate the entire I-cache when
+	 * unmapping a page from a guest if we have a VPIPT I-cache but
+	 * the host is running at EL1. As above, we could do better if
+	 * we had the VA.
+	 *
+	 * The moral of this story is: if you have a VPIPT I-cache, then
+	 * you should be running with VHE enabled.
+	 */
+	if (!has_vhe() && icache_is_vpipt())
+		__flush_icache_all();
+
 	__tlb_switch_to_host()(kvm);
 }


--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -60,7 +60,7 @@ static bool cpu_has_32bit_el1(void)
 {
 	u64 pfr0;

-	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
+	pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 	return !!(pfr0 & 0x20);
 }


--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1183,8 +1183,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 	if (p->is_write) {
 		return ignore_write(vcpu, p);
 	} else {
-		u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
-		u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
+		u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+		u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 		u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);

 		p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |

--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -119,9 +119,6 @@ static void flush_context(unsigned int cpu)

 	/* Queue a TLB invalidate and flush the I-cache if necessary. */
 	cpumask_setall(&tlb_flush_pending);
-
-	if (icache_is_aivivt())
-		__flush_icache_all();
 }

 static bool check_update_reserved_asid(u64 asid, u64 newasid)

--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -308,24 +308,15 @@ static void __swiotlb_sync_sg_for_device(struct device *dev,
 				       sg->length, dir);
 }

-static int __swiotlb_mmap(struct device *dev,
-			  struct vm_area_struct *vma,
-			  void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			  unsigned long attrs)
+static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
+			      unsigned long pfn, size_t size)
 {
 	int ret = -ENXIO;
 	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
 					PAGE_SHIFT;
 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
 	unsigned long off = vma->vm_pgoff;

-	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-					     is_device_dma_coherent(dev));
-
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
-		return ret;
-
 	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
 		ret = remap_pfn_range(vma, vma->vm_start,
 				      pfn + off,
@@ -336,19 +327,43 @@ static int __swiotlb_mmap(struct device *dev,
 	return ret;
 }

-static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
-				 void *cpu_addr, dma_addr_t handle, size_t size,
-				 unsigned long attrs)
+static int __swiotlb_mmap(struct device *dev,
+			  struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			  unsigned long attrs)
+{
+	int ret;
+	unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
+
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+					     is_device_dma_coherent(dev));
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	return __swiotlb_mmap_pfn(vma, pfn, size);
+}
+
+static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+				      struct page *page, size_t size)
 {
 	int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);

 	if (!ret)
-		sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)),
-			    PAGE_ALIGN(size), 0);
+		sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);

 	return ret;
 }

+static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+				 void *cpu_addr, dma_addr_t handle, size_t size,
+				 unsigned long attrs)
+{
+	struct page *page = phys_to_page(dma_to_phys(dev, handle));
+
+	return __swiotlb_get_sgtable_page(sgt, page, size);
+}
+
 static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
 	if (swiotlb)
@@ -584,20 +599,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 	 */
 	gfp |= __GFP_ZERO;

-	if (gfpflags_allow_blocking(gfp)) {
-		struct page **pages;
-		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
-
-		pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
-					handle, flush_page);
-		if (!pages)
-			return NULL;
-
-		addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
-					      __builtin_return_address(0));
-		if (!addr)
-			iommu_dma_free(dev, pages, iosize, handle);
-	} else {
+	if (!gfpflags_allow_blocking(gfp)) {
 		struct page *page;
 		/*
 		 * In atomic context we can't remap anything, so we'll only
@@ -621,6 +623,45 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				__free_from_pool(addr, size);
 			addr = NULL;
 		}
+	} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+		struct page *page;
+
+		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
+						 get_order(size), gfp);
+		if (!page)
+			return NULL;
+
+		*handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
+		if (iommu_dma_mapping_error(dev, *handle)) {
+			dma_release_from_contiguous(dev, page,
+						    size >> PAGE_SHIFT);
+			return NULL;
+		}
+		if (!coherent)
+			__dma_flush_area(page_to_virt(page), iosize);
+
+		addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
+						   prot,
+						   __builtin_return_address(0));
+		if (!addr) {
+			iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
+			dma_release_from_contiguous(dev, page,
+						    size >> PAGE_SHIFT);
+		}
+	} else {
+		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+		struct page **pages;
+
+		pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
+					handle, flush_page);
+		if (!pages)
+			return NULL;
+
+		addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
+					      __builtin_return_address(0));
+		if (!addr)
+			iommu_dma_free(dev, pages, iosize, handle);
 	}
 	return addr;
 }
@@ -632,7 +673,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,

 	size = PAGE_ALIGN(size);
 	/*
-	 * @cpu_addr will be one of 3 things depending on how it was allocated:
+	 * @cpu_addr will be one of 4 things depending on how it was allocated:
+	 * - A remapped array of pages for contiguous allocations.
 	 * - A remapped array of pages from iommu_dma_alloc(), for all
 	 *   non-atomic allocations.
 	 * - A non-cacheable alias from the atomic pool, for atomic
@@ -644,6 +686,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	if (__in_atomic_pool(cpu_addr, size)) {
 		iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
 		__free_from_pool(cpu_addr, size);
+	} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		struct page *page = vmalloc_to_page(cpu_addr);
+
+		iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
+		dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
+		dma_common_free_remap(cpu_addr, size, VM_USERMAP);
 	} else if (is_vmalloc_addr(cpu_addr)){
 		struct vm_struct *area = find_vm_area(cpu_addr);

@@ -670,6 +718,15 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;

+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		/*
+		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
+		 * hence in the vmalloc space.
+		 */
+		unsigned long pfn = vmalloc_to_pfn(cpu_addr);
+		return __swiotlb_mmap_pfn(vma, pfn, size);
+	}
+
 	area = find_vm_area(cpu_addr);
 	if (WARN_ON(!area || !area->pages))
 		return -ENXIO;
@@ -684,6 +741,15 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct vm_struct *area = find_vm_area(cpu_addr);

+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		/*
+		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
+		 * hence in the vmalloc space.
+		 */
+		struct page *page = vmalloc_to_page(cpu_addr);
+		return __swiotlb_get_sgtable_page(sgt, page, size);
+	}
+
 	if (WARN_ON(!area || !area->pages))
 		return -ENXIO;


--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -174,12 +174,33 @@ static bool is_el1_instruction_abort(unsigned int esr)
 	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
 }

+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
+				       unsigned long addr)
+{
+	unsigned int ec       = ESR_ELx_EC(esr);
+	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+	if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+		return false;
+
+	if (fsc_type == ESR_ELx_FSC_PERM)
+		return true;
+
+	if (addr < USER_DS && system_uses_ttbr0_pan())
+		return fsc_type == ESR_ELx_FSC_FAULT &&
+			(regs->pstate & PSR_PAN_BIT);
+
+	return false;
+}
+
 /*
 * The kernel tried to access some page that wasn't present.
 */
 static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
 			      unsigned int esr, struct pt_regs *regs)
 {
+	const char *msg;
+
 	/*
 	 * Are we prepared to handle this kernel fault?
 	 * We are almost certainly not prepared to handle instruction faults.
@@ -191,9 +212,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
 	 * No handler, we'll have to terminate things with extreme prejudice.
 	 */
 	bust_spinlocks(1);
-	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
-		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
-		 "paging request", addr);
+
+	if (is_permission_fault(esr, regs, addr)) {
+		if (esr & ESR_ELx_WNR)
+			msg = "write to read-only memory";
+		else
+			msg = "read from unreadable memory";
+	} else if (addr < PAGE_SIZE) {
+		msg = "NULL pointer dereference";
+	} else {
+		msg = "paging request";
+	}
+
+	pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
+		 addr);

 	show_pte(mm, addr);
 	die("Oops", regs, esr);
@@ -287,21 +319,6 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 	return fault;
 }

-static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
-{
-	unsigned int ec       = ESR_ELx_EC(esr);
-	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
-
-	if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
-		return false;
-
-	if (system_uses_ttbr0_pan())
-		return fsc_type == ESR_ELx_FSC_FAULT &&
-			(regs->pstate & PSR_PAN_BIT);
-	else
-		return fsc_type == ESR_ELx_FSC_PERM;
-}
-
 static bool is_el0_instruction_abort(unsigned int esr)
 {
 	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -339,7 +356,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 		mm_flags |= FAULT_FLAG_WRITE;
 	}

-	if (addr < USER_DS && is_permission_fault(esr, regs)) {
+	if (addr < USER_DS && is_permission_fault(esr, regs, addr)) {
 		/* regs->orig_addr_limit may be 0 if we entered from EL0 */
 		if (regs->orig_addr_limit == KERNEL_DS)
 			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);

--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -22,7 +22,7 @@
 #include <linux/pagemap.h>

 #include <asm/cacheflush.h>
-#include <asm/cachetype.h>
+#include <asm/cache.h>
 #include <asm/tlbflush.h>

 void sync_icache_aliases(void *kaddr, unsigned long len)
@@ -65,8 +65,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		sync_icache_aliases(page_address(page),
 				    PAGE_SIZE << compound_order(page));
-	else if (icache_is_aivivt())
-		__flush_icache_all();
 }

 /*

--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/sort.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-contiguous.h>
@@ -37,6 +38,8 @@
 #include <linux/swiotlb.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>

 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -77,6 +80,142 @@ static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif

+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_base, crash_size;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+				&crash_size, &crash_base);
+	/* no crashkernel= or invalid value specified */
+	if (ret || !crash_size)
+		return;
+
+	crash_size = PAGE_ALIGN(crash_size);
+
+	if (crash_base == 0) {
+		/* Current arm64 boot protocol requires 2MB alignment */
+		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+				crash_size, SZ_2M);
+		if (crash_base == 0) {
+			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+				crash_size);
+			return;
+		}
+	} else {
+		/* User specifies base address explicitly. */
+		if (!memblock_is_region_memory(crash_base, crash_size)) {
+			pr_warn("cannot reserve crashkernel: region is not memory\n");
+			return;
+		}
+
+		if (memblock_is_region_reserved(crash_base, crash_size)) {
+			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
+			return;
+		}
+
+		if (!IS_ALIGNED(crash_base, SZ_2M)) {
+			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
+			return;
+		}
+	}
+	memblock_reserve(crash_base, crash_size);
+
+	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+		crash_base, crash_base + crash_size, crash_size >> 20);
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+}
+
+static void __init kexec_reserve_crashkres_pages(void)
+{
+#ifdef CONFIG_HIBERNATION
+	phys_addr_t addr;
+	struct page *page;
+
+	if (!crashk_res.end)
+		return;
+
+	/*
+	 * To reduce the size of hibernation image, all the pages are
+	 * marked as Reserved initially.
+	 */
+	for (addr = crashk_res.start; addr < (crashk_res.end + 1);
+			addr += PAGE_SIZE) {
+		page = phys_to_page(addr);
+		SetPageReserved(page);
+	}
+#endif
+}
+#else
+static void __init reserve_crashkernel(void)
+{
+}
+
+static void __init kexec_reserve_crashkres_pages(void)
+{
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_DUMP
+static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
+		const char *uname, int depth, void *data)
+{
+	const __be32 *reg;
+	int len;
+
+	if (depth != 1 || strcmp(uname, "chosen") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
+	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+		return 1;
+
+	elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, &reg);
+	elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+	return 1;
+}
+
+/*
+ * reserve_elfcorehdr() - reserves memory for elf core header
+ *
+ * This function reserves the memory occupied by an elf core header
+ * described in the device tree. This region contains all the
+ * information about primary kernel's core image and is used by a dump
+ * capture kernel to access the system memory on primary kernel.
+ */
+static void __init reserve_elfcorehdr(void)
+{
+	of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL);
+
+	if (!elfcorehdr_size)
+		return;
+
+	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+		pr_warn("elfcorehdr is overlapped\n");
+		return;
+	}
+
+	memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+	pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+		elfcorehdr_size >> 10, elfcorehdr_addr);
+}
+#else
+static void __init reserve_elfcorehdr(void)
+{
+}
+#endif /* CONFIG_CRASH_DUMP */
 /*
 * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
 * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -188,10 +327,45 @@ static int __init early_mem(char *p)
 }
 early_param("mem", early_mem);

+static int __init early_init_dt_scan_usablemem(unsigned long node,
+		const char *uname, int depth, void *data)
+{
+	struct memblock_region *usablemem = data;
+	const __be32 *reg;
+	int len;
+
+	if (depth != 1 || strcmp(uname, "chosen") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
+	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+		return 1;
+
+	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+	return 1;
+}
+
+static void __init fdt_enforce_memory_region(void)
+{
+	struct memblock_region reg = {
+		.size = 0,
+	};
+
+	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
+
+	if (reg.size)
+		memblock_cap_memory_range(reg.base, reg.size);
+}
+
 void __init arm64_memblock_init(void)
 {
 	const s64 linear_region_size = -(s64)PAGE_OFFSET;

+	/* Handle linux,usable-memory-range property */
+	fdt_enforce_memory_region();
+
 	/*
 	 * Ensure that the linear region takes up exactly half of the kernel
 	 * virtual address space. This way, we can distinguish a linear address
@@ -297,6 +471,11 @@ void __init arm64_memblock_init(void)
 		arm64_dma_phys_limit = max_zone_dma_phys();
 	else
 		arm64_dma_phys_limit = PHYS_MASK + 1;
+
+	reserve_crashkernel();
+
+	reserve_elfcorehdr();
+
 	dma_contiguous_reserve(arm64_dma_phys_limit);

 	memblock_allow_resize();
@@ -416,6 +595,8 @@ void __init mem_init(void)
 	/* this will put all unused low memory onto the freelists */
 	free_all_bootmem();

+	kexec_reserve_crashkres_pages();
+
 	mem_init_print_info(NULL);

 #define MLK(b, t) b, t, ((t) - (b)) >> 10

--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -22,6 +22,8 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kexec.h>
 #include <linux/libfdt.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
@@ -43,6 +45,9 @@
 #include <asm/mmu_context.h>
 #include <asm/ptdump.h>

+#define NO_BLOCK_MAPPINGS	BIT(0)
+#define NO_CONT_MAPPINGS	BIT(1)
+
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);

 u64 kimage_voffset __ro_after_init;
@@ -103,33 +108,27 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 	 */
 	static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE;

-	return old  == 0 || new  == 0 || ((old ^ new) & ~mask) == 0;
+	/* creating or taking down mappings is always safe */
+	if (old == 0 || new == 0)
+		return true;
+
+	/* live contiguous mappings may not be manipulated at all */
+	if ((old | new) & PTE_CONT)
+		return false;
+
+	return ((old ^ new) & ~mask) == 0;
 }

-static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
-				  unsigned long end, unsigned long pfn,
-				  pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void))
+static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+		     phys_addr_t phys, pgprot_t prot)
 {
 	pte_t *pte;

-	BUG_ON(pmd_sect(*pmd));
-	if (pmd_none(*pmd)) {
-		phys_addr_t pte_phys;
-		BUG_ON(!pgtable_alloc);
-		pte_phys = pgtable_alloc();
-		pte = pte_set_fixmap(pte_phys);
-		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
-		pte_clear_fixmap();
-	}
-	BUG_ON(pmd_bad(*pmd));
-
 	pte = pte_set_fixmap_offset(pmd, addr);
 	do {
 		pte_t old_pte = *pte;

-		set_pte(pte, pfn_pte(pfn, prot));
-		pfn++;
+		set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));

 		/*
 		 * After the PTE entry has been populated once, we
@@ -137,32 +136,51 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 		 */
 		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));

+		phys += PAGE_SIZE;
 	} while (pte++, addr += PAGE_SIZE, addr != end);

 	pte_clear_fixmap();
 }

-static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
-				  phys_addr_t phys, pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void),
-				  bool page_mappings_only)
+static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+				unsigned long end, phys_addr_t phys,
+				pgprot_t prot,
+				phys_addr_t (*pgtable_alloc)(void),
+				int flags)
 {
-	pmd_t *pmd;
 	unsigned long next;

-	/*
-	 * Check for initial section mappings in the pgd/pud and remove them.
-	 */
-	BUG_ON(pud_sect(*pud));
-	if (pud_none(*pud)) {
-		phys_addr_t pmd_phys;
+	BUG_ON(pmd_sect(*pmd));
+	if (pmd_none(*pmd)) {
+		phys_addr_t pte_phys;
 		BUG_ON(!pgtable_alloc);
-		pmd_phys = pgtable_alloc();
-		pmd = pmd_set_fixmap(pmd_phys);
-		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
-		pmd_clear_fixmap();
+		pte_phys = pgtable_alloc();
+		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
 	}
-	BUG_ON(pud_bad(*pud));
+	BUG_ON(pmd_bad(*pmd));
+
+	do {
+		pgprot_t __prot = prot;
+
+		next = pte_cont_addr_end(addr, end);
+
+		/* use a contiguous mapping if the range is suitably aligned */
+		if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
+		    (flags & NO_CONT_MAPPINGS) == 0)
+			__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+
+		init_pte(pmd, addr, next, phys, __prot);
+
+		phys += next - addr;
+	} while (addr = next, addr != end);
+}
+
+static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+		     phys_addr_t phys, pgprot_t prot,
+		     phys_addr_t (*pgtable_alloc)(void), int flags)
+{
+	unsigned long next;
+	pmd_t *pmd;

 	pmd = pmd_set_fixmap_offset(pud, addr);
 	do {
@@ -172,7 +190,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,

 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
-		      !page_mappings_only) {
+		    (flags & NO_BLOCK_MAPPINGS) == 0) {
 			pmd_set_huge(pmd, phys, prot);

 			/*
@@ -182,8 +200,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 			BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
 						      pmd_val(*pmd)));
 		} else {
-			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot, pgtable_alloc);
+			alloc_init_cont_pte(pmd, addr, next, phys, prot,
+					    pgtable_alloc, flags);

 			BUG_ON(pmd_val(old_pmd) != 0 &&
 			       pmd_val(old_pmd) != pmd_val(*pmd));
@@ -194,6 +212,41 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 	pmd_clear_fixmap();
 }

+static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+				unsigned long end, phys_addr_t phys,
+				pgprot_t prot,
+				phys_addr_t (*pgtable_alloc)(void), int flags)
+{
+	unsigned long next;
+
+	/*
+	 * Check for initial section mappings in the pgd/pud.
+	 */
+	BUG_ON(pud_sect(*pud));
+	if (pud_none(*pud)) {
+		phys_addr_t pmd_phys;
+		BUG_ON(!pgtable_alloc);
+		pmd_phys = pgtable_alloc();
+		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+	}
+	BUG_ON(pud_bad(*pud));
+
+	do {
+		pgprot_t __prot = prot;
+
+		next = pmd_cont_addr_end(addr, end);
+
+		/* use a contiguous mapping if the range is suitably aligned */
+		if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
+		    (flags & NO_CONT_MAPPINGS) == 0)
+			__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+
+		init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+
+		phys += next - addr;
+	} while (addr = next, addr != end);
+}
+
 static inline bool use_1G_block(unsigned long addr, unsigned long next,
 			unsigned long phys)
 {
@@ -209,7 +262,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void),
-				  bool page_mappings_only)
+				  int flags)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -231,7 +284,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (use_1G_block(addr, next, phys) && !page_mappings_only) {
+		if (use_1G_block(addr, next, phys) &&
+		    (flags & NO_BLOCK_MAPPINGS) == 0) {
 			pud_set_huge(pud, phys, prot);

 			/*
@@ -241,8 +295,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 			BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
 						      pud_val(*pud)));
 		} else {
-			alloc_init_pmd(pud, addr, next, phys, prot,
-				       pgtable_alloc, page_mappings_only);
+			alloc_init_cont_pmd(pud, addr, next, phys, prot,
+					    pgtable_alloc, flags);

 			BUG_ON(pud_val(old_pud) != 0 &&
 			       pud_val(old_pud) != pud_val(*pud));
@@ -257,7 +311,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 				 unsigned long virt, phys_addr_t size,
 				 pgprot_t prot,
 				 phys_addr_t (*pgtable_alloc)(void),
-				 bool page_mappings_only)
+				 int flags)
 {
 	unsigned long addr, length, end, next;
 	pgd_t *pgd = pgd_offset_raw(pgdir, virt);
@@ -277,7 +331,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	do {
 		next = pgd_addr_end(addr, end);
 		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
-			       page_mappings_only);
+			       flags);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
@@ -306,82 +360,80 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 			&phys, virt);
 		return;
 	}
-	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false);
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
+			     NO_CONT_MAPPINGS);
 }

 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       unsigned long virt, phys_addr_t size,
 			       pgprot_t prot, bool page_mappings_only)
 {
+	int flags = 0;
+
 	BUG_ON(mm == &init_mm);

+	if (page_mappings_only)
+		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+
 	__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
-			     pgd_pgtable_alloc, page_mappings_only);
+			     pgd_pgtable_alloc, flags);
 }

-static void create_mapping_late(phys_addr_t phys, unsigned long virt,
-				  phys_addr_t size, pgprot_t prot)
+static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
+				phys_addr_t size, pgprot_t prot)
 {
 	if (virt < VMALLOC_START) {
-		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+		pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
 			&phys, virt);
 		return;
 	}

-	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
-			     NULL, debug_pagealloc_enabled());
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
+			     NO_CONT_MAPPINGS);
+
+	/* flush the TLBs after updating live kernel mappings */
+	flush_tlb_kernel_range(virt, virt + size);
 }

-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+				  phys_addr_t end, pgprot_t prot, int flags)
 {
-	phys_addr_t kernel_start = __pa_symbol(_text);
-	phys_addr_t kernel_end = __pa_symbol(__init_begin);
-
-	/*
-	 * Take care not to create a writable alias for the
-	 * read-only text and rodata sections of the kernel image.
-	 */
-
-	/* No overlap with the kernel text/rodata */
-	if (end < kernel_start || start >= kernel_end) {
-		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
-				     end - start, PAGE_KERNEL,
-				     early_pgtable_alloc,
-				     debug_pagealloc_enabled());
-		return;
-	}
-
-	/*
-	 * This block overlaps the kernel text/rodata mappings.
-	 * Map the portion(s) which don't overlap.
-	 */
-	if (start < kernel_start)
-		__create_pgd_mapping(pgd, start,
-				     __phys_to_virt(start),
-				     kernel_start - start, PAGE_KERNEL,
-				     early_pgtable_alloc,
-				     debug_pagealloc_enabled());
-	if (kernel_end < end)
-		__create_pgd_mapping(pgd, kernel_end,
-				     __phys_to_virt(kernel_end),
-				     end - kernel_end, PAGE_KERNEL,
-				     early_pgtable_alloc,
-				     debug_pagealloc_enabled());
+	__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+			     prot, early_pgtable_alloc, flags);
+}

+void __init mark_linear_text_alias_ro(void)
+{
 	/*
-	 * Map the linear alias of the [_text, __init_begin) interval as
-	 * read-only/non-executable. This makes the contents of the
-	 * region accessible to subsystems such as hibernate, but
-	 * protects it from inadvertent modification or execution.
+	 * Remove the write permissions from the linear alias of .text/.rodata
 	 */
-	__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
-			     kernel_end - kernel_start, PAGE_KERNEL_RO,
-			     early_pgtable_alloc, debug_pagealloc_enabled());
+	update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
+			    (unsigned long)__init_begin - (unsigned long)_text,
+			    PAGE_KERNEL_RO);
 }

 static void __init map_mem(pgd_t *pgd)
 {
+	phys_addr_t kernel_start = __pa_symbol(_text);
+	phys_addr_t kernel_end = __pa_symbol(__init_begin);
 	struct memblock_region *reg;
+	int flags = 0;
+
+	if (debug_pagealloc_enabled())
+		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+
+	/*
+	 * Take care not to create a writable alias for the
+	 * read-only text and rodata sections of the kernel image.
+	 * So temporarily mark them as NOMAP to skip mappings in
+	 * the following for-loop
+	 */
+	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+	if (crashk_res.end)
+		memblock_mark_nomap(crashk_res.start,
+				    resource_size(&crashk_res));
+#endif

 	/* map all the memory banks */
 	for_each_memblock(memory, reg) {
@@ -393,33 +445,57 @@ static void __init map_mem(pgd_t *pgd)
 		if (memblock_is_nomap(reg))
 			continue;

-		__map_memblock(pgd, start, end);
+		__map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+	}
+
+	/*
+	 * Map the linear alias of the [_text, __init_begin) interval
+	 * as non-executable now, and remove the write permission in
+	 * mark_linear_text_alias_ro() below (which will be called after
+	 * alternative patching has completed). This makes the contents
+	 * of the region accessible to subsystems such as hibernate,
+	 * but protects it from inadvertent modification or execution.
+	 * Note that contiguous mappings cannot be remapped in this way,
+	 * so we should avoid them here.
+	 */
+	__map_memblock(pgd, kernel_start, kernel_end,
+		       PAGE_KERNEL, NO_CONT_MAPPINGS);
+	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+#ifdef CONFIG_KEXEC_CORE
+	/*
+	 * Use page-level mappings here so that we can shrink the region
+	 * in page granularity and put back unused memory to buddy system
+	 * through /sys/kernel/kexec_crash_size interface.
+	 */
+	if (crashk_res.end) {
+		__map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+			       PAGE_KERNEL,
+			       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+		memblock_clear_nomap(crashk_res.start,
+				     resource_size(&crashk_res));
 	}
+#endif
 }

 void mark_rodata_ro(void)
 {
 	unsigned long section_size;

-	section_size = (unsigned long)_etext - (unsigned long)_text;
-	create_mapping_late(__pa_symbol(_text), (unsigned long)_text,
-			    section_size, PAGE_KERNEL_ROX);
 	/*
 	 * mark .rodata as read only. Use __init_begin rather than __end_rodata
 	 * to cover NOTES and EXCEPTION_TABLE.
 	 */
 	section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
-	create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
+	update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
 			    section_size, PAGE_KERNEL_RO);

-	/* flush the TLBs after updating live kernel mappings */
-	flush_tlb_all();
-
 	debug_checkwx();
 }

 static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
-				      pgprot_t prot, struct vm_struct *vma)
+				      pgprot_t prot, struct vm_struct *vma,
+				      int flags)
 {
 	phys_addr_t pa_start = __pa_symbol(va_start);
 	unsigned long size = va_end - va_start;
@@ -428,7 +504,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 	BUG_ON(!PAGE_ALIGNED(size));

 	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
-			     early_pgtable_alloc, debug_pagealloc_enabled());
+			     early_pgtable_alloc, flags);

 	vma->addr	= va_start;
 	vma->phys_addr	= pa_start;
@@ -439,18 +515,39 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 	vm_area_add_early(vma);
 }

+static int __init parse_rodata(char *arg)
+{
+	return strtobool(arg, &rodata_enabled);
+}
+early_param("rodata", parse_rodata);
+
 /*
 * Create fine-grained mappings for the kernel.
 */
 static void __init map_kernel(pgd_t *pgd)
 {
-	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
+	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
+				vmlinux_initdata, vmlinux_data;

-	map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
-	map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
-	map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
-			   &vmlinux_init);
-	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+	/*
+	 * External debuggers may need to write directly to the text
+	 * mapping to install SW breakpoints. Allow this (only) when
+	 * explicitly requested with rodata=off.
+	 */
+	pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+
+	/*
+	 * Only rodata will be remapped with different permissions later on,
+	 * all other segments are allowed to use contiguous mappings.
+	 */
+	map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0);
+	map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+			   &vmlinux_rodata, NO_CONT_MAPPINGS);
+	map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+			   &vmlinux_inittext, 0);
+	map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+			   &vmlinux_initdata, 0);
+	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0);

 	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
 		/*

--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -125,20 +125,23 @@ int set_memory_x(unsigned long addr, int numpages)
 }
 EXPORT_SYMBOL_GPL(set_memory_x);

-#ifdef CONFIG_DEBUG_PAGEALLOC
-void __kernel_map_pages(struct page *page, int numpages, int enable)
+int set_memory_valid(unsigned long addr, int numpages, int enable)
 {
-	unsigned long addr = (unsigned long) page_address(page);
-
 	if (enable)
-		__change_memory_common(addr, PAGE_SIZE * numpages,
+		return __change_memory_common(addr, PAGE_SIZE * numpages,
 					__pgprot(PTE_VALID),
 					__pgprot(0));
 	else
-		__change_memory_common(addr, PAGE_SIZE * numpages,
+		return __change_memory_common(addr, PAGE_SIZE * numpages,
 					__pgprot(0),
 					__pgprot(PTE_VALID));
 }
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	set_memory_valid((unsigned long)page_address(page), numpages, enable);
+}
 #ifdef CONFIG_HIBERNATION
 /*
 * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function

--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -225,7 +225,7 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,

 		if (iort_node->type == type &&
 		    ACPI_SUCCESS(callback(iort_node, context)))
-				return iort_node;
+			return iort_node;

 		iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
 					 iort_node->length);
@@ -253,17 +253,15 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
 					    void *context)
 {
 	struct device *dev = context;
-	acpi_status status;
+	acpi_status status = AE_NOT_FOUND;

 	if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) {
 		struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
 		struct acpi_device *adev = to_acpi_device_node(dev->fwnode);
 		struct acpi_iort_named_component *ncomp;

-		if (!adev) {
-			status = AE_NOT_FOUND;
+		if (!adev)
 			goto out;
-		}

 		status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf);
 		if (ACPI_FAILURE(status)) {
@@ -289,8 +287,6 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
 		 */
 		status = pci_rc->pci_segment_number == pci_domain_nr(bus) ?
 							AE_OK : AE_NOT_FOUND;
-	} else {
-		status = AE_NOT_FOUND;
 	}
 out:
 	return status;
@@ -322,8 +318,7 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,

 static
 struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
-					u32 *id_out, u8 type_mask,
-					int index)
+					u32 *id_out, int index)
 {
 	struct acpi_iort_node *parent;
 	struct acpi_iort_id_mapping *map;
@@ -345,9 +340,6 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
 			       map->output_reference);

-	if (!(IORT_TYPE_MASK(parent->type) & type_mask))
-		return NULL;
-
 	if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
 		if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
 		    node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
@@ -359,11 +351,11 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	return NULL;
 }

-static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
-						u32 rid_in, u32 *rid_out,
-						u8 type_mask)
+static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
+					       u32 id_in, u32 *id_out,
+					       u8 type_mask)
 {
-	u32 rid = rid_in;
+	u32 id = id_in;

 	/* Parse the ID mapping tree to find specified node type */
 	while (node) {
@@ -371,8 +363,8 @@ static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
 		int i;

 		if (IORT_TYPE_MASK(node->type) & type_mask) {
-			if (rid_out)
-				*rid_out = rid;
+			if (id_out)
+				*id_out = id;
 			return node;
 		}

@@ -389,9 +381,9 @@ static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
 			goto fail_map;
 		}

-		/* Do the RID translation */
+		/* Do the ID translation */
 		for (i = 0; i < node->mapping_count; i++, map++) {
-			if (!iort_id_map(map, node->type, rid, &rid))
+			if (!iort_id_map(map, node->type, id, &id))
 				break;
 		}

@@ -403,13 +395,41 @@ static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
 	}

 fail_map:
-	/* Map input RID to output RID unchanged on mapping failure*/
-	if (rid_out)
-		*rid_out = rid_in;
+	/* Map input ID to output ID unchanged on mapping failure */
+	if (id_out)
+		*id_out = id_in;

 	return NULL;
 }

+static
+struct acpi_iort_node *iort_node_map_platform_id(struct acpi_iort_node *node,
+						 u32 *id_out, u8 type_mask,
+						 int index)
+{
+	struct acpi_iort_node *parent;
+	u32 id;
+
+	/* step 1: retrieve the initial dev id */
+	parent = iort_node_get_id(node, &id, index);
+	if (!parent)
+		return NULL;
+
+	/*
+	 * optional step 2: map the initial dev id if its parent is not
+	 * the target type we want, map it again for the use cases such
+	 * as NC (named component) -> SMMU -> ITS. If the type is matched,
+	 * return the initial dev id and its parent pointer directly.
+	 */
+	if (!(IORT_TYPE_MASK(parent->type) & type_mask))
+		parent = iort_node_map_id(parent, id, id_out, type_mask);
+	else
+		if (id_out)
+			*id_out = id;
+
+	return parent;
+}
+
 static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
 {
 	struct pci_bus *pbus;
@@ -443,13 +463,38 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 	if (!node)
 		return req_id;

-	iort_node_map_rid(node, req_id, &dev_id, IORT_MSI_TYPE);
+	iort_node_map_id(node, req_id, &dev_id, IORT_MSI_TYPE);
 	return dev_id;
 }

+/**
+ * iort_pmsi_get_dev_id() - Get the device id for a device
+ * @dev: The device for which the mapping is to be done.
+ * @dev_id: The device ID found.
+ *
+ * Returns: 0 for successful find a dev id, -ENODEV on error
+ */
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
+{
+	int i;
+	struct acpi_iort_node *node;
+
+	node = iort_find_dev_node(dev);
+	if (!node)
+		return -ENODEV;
+
+	for (i = 0; i < node->mapping_count; i++) {
+		if (iort_node_map_platform_id(node, dev_id, IORT_MSI_TYPE, i))
+			return 0;
+	}
+
+	return -ENODEV;
+}
+
 /**
 * iort_dev_find_its_id() - Find the ITS identifier for a device
 * @dev: The device.
+ * @req_id: Device's requester ID
 * @idx: Index of the ITS identifier list.
 * @its_id: ITS identifier.
 *
@@ -465,7 +510,7 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id,
 	if (!node)
 		return -ENXIO;

-	node = iort_node_map_rid(node, req_id, NULL, IORT_MSI_TYPE);
+	node = iort_node_map_id(node, req_id, NULL, IORT_MSI_TYPE);
 	if (!node)
 		return -ENXIO;

@@ -503,6 +548,56 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
 	return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
 }

+/**
+ * iort_get_platform_device_domain() - Find MSI domain related to a
+ * platform device
+ * @dev: the dev pointer associated with the platform device
+ *
+ * Returns: the MSI domain for this device, NULL otherwise
+ */
+static struct irq_domain *iort_get_platform_device_domain(struct device *dev)
+{
+	struct acpi_iort_node *node, *msi_parent;
+	struct fwnode_handle *iort_fwnode;
+	struct acpi_iort_its_group *its;
+	int i;
+
+	/* find its associated iort node */
+	node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
+			      iort_match_node_callback, dev);
+	if (!node)
+		return NULL;
+
+	/* then find its msi parent node */
+	for (i = 0; i < node->mapping_count; i++) {
+		msi_parent = iort_node_map_platform_id(node, NULL,
+						       IORT_MSI_TYPE, i);
+		if (msi_parent)
+			break;
+	}
+
+	if (!msi_parent)
+		return NULL;
+
+	/* Move to ITS specific data */
+	its = (struct acpi_iort_its_group *)msi_parent->node_data;
+
+	iort_fwnode = iort_find_domain_token(its->identifiers[0]);
+	if (!iort_fwnode)
+		return NULL;
+
+	return irq_find_matching_fwnode(iort_fwnode, DOMAIN_BUS_PLATFORM_MSI);
+}
+
+void acpi_configure_pmsi_domain(struct device *dev)
+{
+	struct irq_domain *msi_domain;
+
+	msi_domain = iort_get_platform_device_domain(dev);
+	if (msi_domain)
+		dev_set_msi_domain(dev, msi_domain);
+}
+
 static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
 {
 	u32 *rid = data;
@@ -594,8 +689,8 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 		if (!node)
 			return NULL;

-		parent = iort_node_map_rid(node, rid, &streamid,
-					   IORT_IOMMU_TYPE);
+		parent = iort_node_map_id(node, rid, &streamid,
+					  IORT_IOMMU_TYPE);

 		ops = iort_iommu_xlate(dev, parent, streamid);

@@ -607,14 +702,15 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 		if (!node)
 			return NULL;

-		parent = iort_node_get_id(node, &streamid,
-					  IORT_IOMMU_TYPE, i++);
+		parent = iort_node_map_platform_id(node, &streamid,
+						   IORT_IOMMU_TYPE, i++);

 		while (parent) {
 			ops = iort_iommu_xlate(dev, parent, streamid);

-			parent = iort_node_get_id(node, &streamid,
-						  IORT_IOMMU_TYPE, i++);
+			parent = iort_node_map_platform_id(node, &streamid,
+							   IORT_IOMMU_TYPE,
+							   i++);
 		}
 	}


--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -6,6 +6,8 @@
 *
 * This file is released under the GPLv2.
 */
+
+#include <linux/acpi_iort.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -14,6 +16,7 @@
 #include <linux/rwsem.h>
 #include <linux/acpi.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>

 #include "internal.h"

@@ -322,6 +325,9 @@ static int acpi_platform_notify(struct device *dev)
 	if (!adev)
 		goto out;

+	if (dev->bus == &platform_bus_type)
+		acpi_configure_pmsi_domain(dev);
+
 	if (type && type->setup)
 		type->setup(dev);
 	else if (adev->handler && adev->handler->bind)

--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -16,6 +16,22 @@

 #include "efistub.h"

+#define EFI_DT_ADDR_CELLS_DEFAULT 2
+#define EFI_DT_SIZE_CELLS_DEFAULT 2
+
+static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt)
+{
+	int offset;
+
+	offset = fdt_path_offset(fdt, "/");
+	/* Set the #address-cells and #size-cells values for an empty tree */
+
+	fdt_setprop_u32(fdt, offset, "#address-cells",
+			EFI_DT_ADDR_CELLS_DEFAULT);
+
+	fdt_setprop_u32(fdt, offset, "#size-cells", EFI_DT_SIZE_CELLS_DEFAULT);
+}
+
 static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 			       unsigned long orig_fdt_size,
 			       void *fdt, int new_fdt_size, char *cmdline_ptr,
@@ -42,10 +58,18 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 		}
 	}

-	if (orig_fdt)
+	if (orig_fdt) {
 		status = fdt_open_into(orig_fdt, fdt, new_fdt_size);
-	else
+	} else {
 		status = fdt_create_empty_tree(fdt, new_fdt_size);
+		if (status == 0) {
+			/*
+			 * Any failure from the following function is non
+			 * critical
+			 */
+			fdt_update_cell_size(sys_table, fdt);
+		}
+	}

 	if (status != 0)
 		goto fdt_set_fail;

--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -12,6 +12,10 @@ config ARM_PMU
 	  Say y if you want to use CPU performance monitors on ARM-based
 	  systems.

+config ARM_PMU_ACPI
+	depends on ARM_PMU && ACPI
+	def_bool y
+
 config QCOM_L2_PMU
 	bool "Qualcomm Technologies L2-cache PMU"
 	depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
@@ -21,6 +25,16 @@ config QCOM_L2_PMU
 	  Adds the L2 cache PMU into the perf events subsystem for
 	  monitoring L2 cache events.

+config QCOM_L3_PMU
+	bool "Qualcomm Technologies L3-cache PMU"
+	depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+	select QCOM_IRQ_COMBINER
+	help
+	   Provides support for the L3 cache performance monitor unit (PMU)
+	   in Qualcomm Technologies processors.
+	   Adds the L3 cache PMU into the perf events subsystem for
+	   monitoring L3 cache events.
+
 config XGENE_PMU
        depends on PERF_EVENTS && ARCH_XGENE
        bool "APM X-Gene SoC PMU"

--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
-obj-$(CONFIG_ARM_PMU) += arm_pmu.o
+obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
+obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
+obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -16,7 +16,6 @@
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
-#include <linux/of_device.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -25,7 +24,6 @@
 #include <linux/irq.h>
 #include <linux/irqdesc.h>

-#include <asm/cputype.h>
 #include <asm/irq_regs.h>

 static int
@@ -235,20 +233,15 @@ armpmu_add(struct perf_event *event, int flags)
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
-	int err = 0;

 	/* An event following a process won't be stopped earlier */
 	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 		return -ENOENT;

-	perf_pmu_disable(event->pmu);
-
 	/* If we don't have a space for the counter then finish early. */
 	idx = armpmu->get_event_idx(hw_events, event);
-	if (idx < 0) {
-		err = idx;
-		goto out;
-	}
+	if (idx < 0)
+		return idx;

 	/*
 	 * If there is an event in the counter we are going to use then make
@@ -265,9 +258,7 @@ armpmu_add(struct perf_event *event, int flags)
 	/* Propagate our changes to the userspace mapping. */
 	perf_event_update_userpage(event);

-out:
-	perf_pmu_enable(event->pmu);
-	return err;
+	return 0;
 }

 static int
@@ -323,10 +314,16 @@ validate_group(struct perf_event *event)
 	return 0;
 }

+static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu)
+{
+	struct platform_device *pdev = armpmu->plat_device;
+
+	return pdev ? dev_get_platdata(&pdev->dev) : NULL;
+}
+
 static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 {
 	struct arm_pmu *armpmu;
-	struct platform_device *plat_device;
 	struct arm_pmu_platdata *plat;
 	int ret;
 	u64 start_clock, finish_clock;
@@ -338,8 +335,8 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 	 * dereference.
 	 */
 	armpmu = *(void **)dev;
-	plat_device = armpmu->plat_device;
-	plat = dev_get_platdata(&plat_device->dev);
+
+	plat = armpmu_get_platdata(armpmu);

 	start_clock = sched_clock();
 	if (plat && plat->handle_irq)
@@ -352,37 +349,6 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 	return ret;
 }

-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
-	armpmu->free_irq(armpmu);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
-	int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
-	if (err) {
-		armpmu_release_hardware(armpmu);
-		return err;
-	}
-
-	return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	atomic_t *active_events	 = &armpmu->active_events;
-	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
-	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
-		armpmu_release_hardware(armpmu);
-		mutex_unlock(pmu_reserve_mutex);
-	}
-}
-
 static int
 event_requires_mode_exclusion(struct perf_event_attr *attr)
 {
@@ -455,8 +421,6 @@ __hw_perf_event_init(struct perf_event *event)
 static int armpmu_event_init(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	int err = 0;
-	atomic_t *active_events = &armpmu->active_events;

 	/*
 	 * Reject CPU-affine events for CPUs that are of a different class to
@@ -476,26 +440,7 @@ static int armpmu_event_init(struct perf_event *event)
 	if (armpmu->map_event(event) == -ENOENT)
 		return -ENOENT;

-	event->destroy = hw_perf_event_destroy;
-
-	if (!atomic_inc_not_zero(active_events)) {
-		mutex_lock(&armpmu->reserve_mutex);
-		if (atomic_read(active_events) == 0)
-			err = armpmu_reserve_hardware(armpmu);
-
-		if (!err)
-			atomic_inc(active_events);
-		mutex_unlock(&armpmu->reserve_mutex);
-	}
-
-	if (err)
-		return err;
-
-	err = __hw_perf_event_init(event);
-	if (err)
-		hw_perf_event_destroy(event);
-
-	return err;
+	return __hw_perf_event_init(event);
 }

 static void armpmu_enable(struct pmu *pmu)
@@ -553,27 +498,6 @@ static struct attribute_group armpmu_common_attr_group = {
 	.attrs = armpmu_common_attrs,
 };

-static void armpmu_init(struct arm_pmu *armpmu)
-{
-	atomic_set(&armpmu->active_events, 0);
-	mutex_init(&armpmu->reserve_mutex);
-
-	armpmu->pmu = (struct pmu) {
-		.pmu_enable	= armpmu_enable,
-		.pmu_disable	= armpmu_disable,
-		.event_init	= armpmu_event_init,
-		.add		= armpmu_add,
-		.del		= armpmu_del,
-		.start		= armpmu_start,
-		.stop		= armpmu_stop,
-		.read		= armpmu_read,
-		.filter_match	= armpmu_filter_match,
-		.attr_groups	= armpmu->attr_groups,
-	};
-	armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
-		&armpmu_common_attr_group;
-}
-
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *__oprofile_cpu_pmu;

@@ -601,113 +525,85 @@ int perf_num_counters(void)
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);

-static void cpu_pmu_enable_percpu_irq(void *data)
+void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
 {
-	int irq = *(int *)data;
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+	int irq = per_cpu(hw_events->irq, cpu);

-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
+	if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
+		return;

-static void cpu_pmu_disable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
+	if (irq_is_percpu(irq)) {
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
+		cpumask_clear(&armpmu->active_irqs);
+		return;
+	}

-	disable_percpu_irq(irq);
+	free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 }

-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+void armpmu_free_irqs(struct arm_pmu *armpmu)
 {
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq > 0 && irq_is_percpu(irq)) {
-		on_each_cpu_mask(&cpu_pmu->supported_cpus,
-				 cpu_pmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &hw_events->percpu_pmu);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
+	int cpu;

-			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq > 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-		}
-	}
+	for_each_cpu(cpu, &armpmu->supported_cpus)
+		armpmu_free_irq(armpmu, cpu);
 }

-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
 {
-	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
-		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+	int err = 0;
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+	const irq_handler_t handler = armpmu_dispatch_irq;
+	int irq = per_cpu(hw_events->irq, cpu);
+	if (!irq)
 		return 0;
-	}

-	irq = platform_get_irq(pmu_device, 0);
-	if (irq > 0 && irq_is_percpu(irq)) {
+	if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) {
 		err = request_percpu_irq(irq, handler, "arm-pmu",
 					 &hw_events->percpu_pmu);
-		if (err) {
-			pr_err("unable to request IRQ%d for ARM PMU counters\n",
-				irq);
-			return err;
-		}
+	} else if (irq_is_percpu(irq)) {
+		int other_cpu = cpumask_first(&armpmu->active_irqs);
+		int other_irq = per_cpu(hw_events->irq, other_cpu);

-		on_each_cpu_mask(&cpu_pmu->supported_cpus,
-				 cpu_pmu_enable_percpu_irq, &irq, 1);
+		if (irq != other_irq) {
+			pr_warn("mismatched PPIs detected.\n");
+			err = -EINVAL;
+		}
 	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
+		err = request_irq(irq, handler,
+				  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+				  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+	}

-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq < 0)
-				continue;
+	if (err) {
+		pr_err("unable to request IRQ%d for ARM PMU counters\n",
+			irq);
+		return err;
+	}

-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
+	cpumask_set_cpu(cpu, &armpmu->active_irqs);

-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, cpu);
-				continue;
-			}
-
-			err = request_irq(irq, handler,
-					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-					irq);
-				return err;
-			}
-
-			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
-		}
+	return 0;
+}
+
+int armpmu_request_irqs(struct arm_pmu *armpmu)
+{
+	int cpu, err;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus) {
+		err = armpmu_request_irq(armpmu, cpu);
+		if (err)
+			break;
 	}

-	return 0;
+	return err;
+}
+
+static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
+{
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	return per_cpu(hw_events->irq, cpu);
 }

 /*
@@ -719,11 +615,42 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 {
 	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
+	int irq;

 	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
 		return 0;
 	if (pmu->reset)
 		pmu->reset(pmu);
+
+	irq = armpmu_get_cpu_irq(pmu, cpu);
+	if (irq) {
+		if (irq_is_percpu(irq)) {
+			enable_percpu_irq(irq, IRQ_TYPE_NONE);
+			return 0;
+		}
+
+		if (irq_force_affinity(irq, cpumask_of(cpu)) &&
+		    num_possible_cpus() > 1) {
+			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				irq, cpu);
+		}
+	}
+
+	return 0;
+}
+
+static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
+	int irq;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return 0;
+
+	irq = armpmu_get_cpu_irq(pmu, cpu);
+	if (irq && irq_is_percpu(irq))
+		disable_percpu_irq(irq);
+
 	return 0;
 }

@@ -828,56 +755,22 @@ static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
 static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int err;
-	int cpu;
-	struct pmu_hw_events __percpu *cpu_hw_events;
-
-	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
-	if (!cpu_hw_events)
-		return -ENOMEM;

-	err = cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
-					       &cpu_pmu->node);
+	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_STARTING,
+				       &cpu_pmu->node);
 	if (err)
-		goto out_free;
+		goto out;

 	err = cpu_pm_pmu_register(cpu_pmu);
 	if (err)
 		goto out_unregister;

-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-		events->percpu_pmu = cpu_pmu;
-	}
-
-	cpu_pmu->hw_events	= cpu_hw_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
-
-	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
-			 cpu_pmu, 1);
-
-	/* If no interrupts available, set the corresponding capability flag */
-	if (!platform_get_irq(cpu_pmu->plat_device, 0))
-		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-	/*
-	 * This is a CPU PMU potentially in a heterogeneous configuration (e.g.
-	 * big.LITTLE). This is not an uncore PMU, and we have taken ctx
-	 * sharing into account (e.g. with our pmu::filter_match callback and
-	 * pmu::event_init group validation).
-	 */
-	cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
-
 	return 0;

 out_unregister:
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
 					    &cpu_pmu->node);
-out_free:
-	free_percpu(cpu_hw_events);
+out:
 	return err;
 }

@@ -886,177 +779,78 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 	cpu_pm_pmu_unregister(cpu_pmu);
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
 					    &cpu_pmu->node);
-	free_percpu(cpu_pmu->hw_events);
 }

-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct arm_pmu *pmu,
-			     const struct pmu_probe_info *info)
+struct arm_pmu *armpmu_alloc(void)
 {
-	int cpu = get_cpu();
-	unsigned int cpuid = read_cpuid_id();
-	int ret = -ENODEV;
-
-	pr_info("probing PMU on CPU %d\n", cpu);
+	struct arm_pmu *pmu;
+	int cpu;

-	for (; info->init != NULL; info++) {
-		if ((cpuid & info->mask) != info->cpuid)
-			continue;
-		ret = info->init(pmu);
-		break;
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!\n");
+		goto out;
 	}

-	put_cpu();
-	return ret;
-}
-
-static int of_pmu_irq_cfg(struct arm_pmu *pmu)
-{
-	int *irqs, i = 0;
-	bool using_spi = false;
-	struct platform_device *pdev = pmu->plat_device;
-
-	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-	if (!irqs)
-		return -ENOMEM;
-
-	do {
-		struct device_node *dn;
-		int cpu, irq;
-
-		/* See if we have an affinity entry */
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i);
-		if (!dn)
-			break;
-
-		/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
-		irq = platform_get_irq(pdev, i);
-		if (irq > 0) {
-			bool spi = !irq_is_percpu(irq);
-
-			if (i > 0 && spi != using_spi) {
-				pr_err("PPI/SPI IRQ type mismatch for %s!\n",
-					dn->name);
-				of_node_put(dn);
-				kfree(irqs);
-				return -EINVAL;
-			}
-
-			using_spi = spi;
-		}
-
-		/* Now look up the logical CPU number */
-		for_each_possible_cpu(cpu) {
-			struct device_node *cpu_dn;
-
-			cpu_dn = of_cpu_device_node_get(cpu);
-			of_node_put(cpu_dn);
-
-			if (dn == cpu_dn)
-				break;
-		}
+	pmu->hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!pmu->hw_events) {
+		pr_info("failed to allocate per-cpu PMU data.\n");
+		goto out_free_pmu;
+	}

-		if (cpu >= nr_cpu_ids) {
-			pr_warn("Failed to find logical CPU for %s\n",
-				dn->name);
-			of_node_put(dn);
-			cpumask_setall(&pmu->supported_cpus);
-			break;
-		}
-		of_node_put(dn);
+	pmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+		.filter_match	= armpmu_filter_match,
+		.attr_groups	= pmu->attr_groups,
+		/*
+		 * This is a CPU PMU potentially in a heterogeneous
+		 * configuration (e.g. big.LITTLE). This is not an uncore PMU,
+		 * and we have taken ctx sharing into account (e.g. with our
+		 * pmu::filter_match callback and pmu::event_init group
+		 * validation).
+		 */
+		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS,
+	};

-		/* For SPIs, we need to track the affinity per IRQ */
-		if (using_spi) {
-			if (i >= pdev->num_resources)
-				break;
+	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
+		&armpmu_common_attr_group;

-			irqs[i] = cpu;
-		}
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events;

-		/* Keep track of the CPUs containing this PMU type */
-		cpumask_set_cpu(cpu, &pmu->supported_cpus);
-		i++;
-	} while (1);
-
-	/* If we didn't manage to parse anything, try the interrupt affinity */
-	if (cpumask_weight(&pmu->supported_cpus) == 0) {
-		int irq = platform_get_irq(pdev, 0);
-
-		if (irq > 0 && irq_is_percpu(irq)) {
-			/* If using PPIs, check the affinity of the partition */
-			int ret;
-
-			ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
-			if (ret) {
-				kfree(irqs);
-				return ret;
-			}
-		} else {
-			/* Otherwise default to all CPUs */
-			cpumask_setall(&pmu->supported_cpus);
-		}
+		events = per_cpu_ptr(pmu->hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = pmu;
 	}

-	/* If we matched up the IRQ affinities, use them to route the SPIs */
-	if (using_spi && i == pdev->num_resources)
-		pmu->irq_affinity = irqs;
-	else
-		kfree(irqs);
+	return pmu;

-	return 0;
+out_free_pmu:
+	kfree(pmu);
+out:
+	return NULL;
 }

-int arm_pmu_device_probe(struct platform_device *pdev,
-			 const struct of_device_id *of_table,
-			 const struct pmu_probe_info *probe_table)
+void armpmu_free(struct arm_pmu *pmu)
 {
-	const struct of_device_id *of_id;
-	const int (*init_fn)(struct arm_pmu *);
-	struct device_node *node = pdev->dev.of_node;
-	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!\n");
-		return -ENOMEM;
-	}
-
-	armpmu_init(pmu);
-
-	pmu->plat_device = pdev;
-
-	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-
-		pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
-							   "secure-reg-access");
-
-		/* arm64 systems boot only as non-secure */
-		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
-			pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
-			pmu->secure_access = false;
-		}
-
-		ret = of_pmu_irq_cfg(pmu);
-		if (!ret)
-			ret = init_fn(pmu);
-	} else if (probe_table) {
-		cpumask_setall(&pmu->supported_cpus);
-		ret = probe_current_pmu(pmu, probe_table);
-	}
-
-	if (ret) {
-		pr_info("%s: failed to probe PMU!\n", of_node_full_name(node));
-		goto out_free;
-	}
+	free_percpu(pmu->hw_events);
+	kfree(pmu);
+}

+int armpmu_register(struct arm_pmu *pmu)
+{
+	int ret;

 	ret = cpu_pmu_init(pmu);
 	if (ret)
-		goto out_free;
+		return ret;

 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 	if (ret)
@@ -1066,17 +860,12 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 		__oprofile_cpu_pmu = pmu;

 	pr_info("enabled with %s PMU driver, %d counters available\n",
-			pmu->name, pmu->num_events);
+		pmu->name, pmu->num_events);

 	return 0;

 out_destroy:
 	cpu_pmu_destroy(pmu);
-out_free:
-	pr_info("%s: failed to register PMU devices!\n",
-		of_node_full_name(node));
-	kfree(pmu->irq_affinity);
-	kfree(pmu);
 	return ret;
 }

@@ -1086,7 +875,8 @@ static int arm_pmu_hp_init(void)

 	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
 				      "perf/arm/pmu:starting",
-				      arm_perf_starting_cpu, NULL);
+				      arm_perf_starting_cpu,
+				      arm_perf_teardown_cpu);
 	if (ret)
 		pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
 		       ret);

--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
+/*
+ * ACPI probing code for ARM performance counters.
+ *
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/perf/arm_pmu.h>
+
+#include <asm/cputype.h>
+
+static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
+static DEFINE_PER_CPU(int, pmu_irqs);
+
+static int arm_pmu_acpi_register_irq(int cpu)
+{
+	struct acpi_madt_generic_interrupt *gicc;
+	int gsi, trigger;
+
+	gicc = acpi_cpu_get_madt_gicc(cpu);
+	if (WARN_ON(!gicc))
+		return -EINVAL;
+
+	gsi = gicc->performance_interrupt;
+	if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE)
+		trigger = ACPI_EDGE_SENSITIVE;
+	else
+		trigger = ACPI_LEVEL_SENSITIVE;
+
+	/*
+	 * Helpfully, the MADT GICC doesn't have a polarity flag for the
+	 * "performance interrupt". Luckily, on compliant GICs the polarity is
+	 * a fixed value in HW (for both SPIs and PPIs) that we cannot change
+	 * from SW.
+	 *
+	 * Here we pass in ACPI_ACTIVE_HIGH to keep the core code happy. This
+	 * may not match the real polarity, but that should not matter.
+	 *
+	 * Other interrupt controllers are not supported with ACPI.
+	 */
+	return acpi_register_gsi(NULL, gsi, trigger, ACPI_ACTIVE_HIGH);
+}
+
+static void arm_pmu_acpi_unregister_irq(int cpu)
+{
+	struct acpi_madt_generic_interrupt *gicc;
+	int gsi;
+
+	gicc = acpi_cpu_get_madt_gicc(cpu);
+	if (!gicc)
+		return;
+
+	gsi = gicc->performance_interrupt;
+	acpi_unregister_gsi(gsi);
+}
+
+static int arm_pmu_acpi_parse_irqs(void)
+{
+	int irq, cpu, irq_cpu, err;
+
+	for_each_possible_cpu(cpu) {
+		irq = arm_pmu_acpi_register_irq(cpu);
+		if (irq < 0) {
+			err = irq;
+			pr_warn("Unable to parse ACPI PMU IRQ for CPU%d: %d\n",
+				cpu, err);
+			goto out_err;
+		} else if (irq == 0) {
+			pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu);
+		}
+
+		per_cpu(pmu_irqs, cpu) = irq;
+	}
+
+	return 0;
+
+out_err:
+	for_each_possible_cpu(cpu) {
+		irq = per_cpu(pmu_irqs, cpu);
+		if (!irq)
+			continue;
+
+		arm_pmu_acpi_unregister_irq(cpu);
+
+		/*
+		 * Blat all copies of the IRQ so that we only unregister the
+		 * corresponding GSI once (e.g. when we have PPIs).
+		 */
+		for_each_possible_cpu(irq_cpu) {
+			if (per_cpu(pmu_irqs, irq_cpu) == irq)
+				per_cpu(pmu_irqs, irq_cpu) = 0;
+		}
+	}
+
+	return err;
+}
+
+static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
+{
+	unsigned long cpuid = read_cpuid_id();
+	struct arm_pmu *pmu;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		pmu = per_cpu(probed_pmus, cpu);
+		if (!pmu || pmu->acpi_cpuid != cpuid)
+			continue;
+
+		return pmu;
+	}
+
+	pmu = armpmu_alloc();
+	if (!pmu) {
+		pr_warn("Unable to allocate PMU for CPU%d\n",
+			smp_processor_id());
+		return NULL;
+	}
+
+	pmu->acpi_cpuid = cpuid;
+
+	return pmu;
+}
+
+/*
+ * This must run before the common arm_pmu hotplug logic, so that we can
+ * associate a CPU and its interrupt before the common code tries to manage the
+ * affinity and so on.
+ *
+ * Note that hotplug events are serialized, so we cannot race with another CPU
+ * coming up. The perf core won't open events while a hotplug event is in
+ * progress.
+ */
+static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
+{
+	struct arm_pmu *pmu;
+	struct pmu_hw_events __percpu *hw_events;
+	int irq;
+
+	/* If we've already probed this CPU, we have nothing to do */
+	if (per_cpu(probed_pmus, cpu))
+		return 0;
+
+	irq = per_cpu(pmu_irqs, cpu);
+
+	pmu = arm_pmu_acpi_find_alloc_pmu();
+	if (!pmu)
+		return -ENOMEM;
+
+	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+
+	per_cpu(probed_pmus, cpu) = pmu;
+
+	/*
+	 * Log and request the IRQ so the core arm_pmu code can manage it.  In
+	 * some situations (e.g. mismatched PPIs), we may fail to request the
+	 * IRQ. However, it may be too late for us to do anything about it.
+	 * The common ARM PMU code will log a warning in this case.
+	 */
+	hw_events = pmu->hw_events;
+	per_cpu(hw_events->irq, cpu) = irq;
+	armpmu_request_irq(pmu, cpu);
+
+	/*
+	 * Ideally, we'd probe the PMU here when we find the first matching
+	 * CPU. We can't do that for several reasons; see the comment in
+	 * arm_pmu_acpi_init().
+	 *
+	 * So for the time being, we're done.
+	 */
+	return 0;
+}
+
+int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
+{
+	int pmu_idx = 0;
+	int cpu, ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	/*
+	 * Initialise and register the set of PMUs which we know about right
+	 * now. Ideally we'd do this in arm_pmu_acpi_cpu_starting() so that we
+	 * could handle late hotplug, but this may lead to deadlock since we
+	 * might try to register a hotplug notifier instance from within a
+	 * hotplug notifier.
+	 *
+	 * There's also the problem of having access to the right init_fn,
+	 * without tying this too deeply into the "real" PMU driver.
+	 *
+	 * For the moment, as with the platform/DT case, we need at least one
+	 * of a PMU's CPUs to be online at probe time.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+		char *base_name;
+
+		if (!pmu || pmu->name)
+			continue;
+
+		ret = init_fn(pmu);
+		if (ret == -ENODEV) {
+			/* PMU not handled by this driver, or not present */
+			continue;
+		} else if (ret) {
+			pr_warn("Unable to initialise PMU for CPU%d\n", cpu);
+			return ret;
+		}
+
+		base_name = pmu->name;
+		pmu->name = kasprintf(GFP_KERNEL, "%s_%d", base_name, pmu_idx++);
+		if (!pmu->name) {
+			pr_warn("Unable to allocate PMU name for CPU%d\n", cpu);
+			return -ENOMEM;
+		}
+
+		ret = armpmu_register(pmu);
+		if (ret) {
+			pr_warn("Failed to register PMU for CPU%d\n", cpu);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_pmu_acpi_init(void)
+{
+	int ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	/*
+	 * We can't request IRQs yet, since we don't know the cookie value
+	 * until we know which CPUs share the same logical PMU. We'll handle
+	 * that in arm_pmu_acpi_cpu_starting().
+	 */
+	ret = arm_pmu_acpi_parse_irqs();
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+				"perf/arm/pmu_acpi:starting",
+				arm_pmu_acpi_cpu_starting, NULL);
+
+	return ret;
+}
+subsys_initcall(arm_pmu_acpi_init)
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
+/*
+ * platform_device probing code for ARM performance counters.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/kconfig.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/percpu.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq)
+{
+	int cpu, ret;
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+
+	ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
+	if (ret)
+		return ret;
+
+	for_each_cpu(cpu, &pmu->supported_cpus)
+		per_cpu(hw_events->irq, cpu) = irq;
+
+	return 0;
+}
+
+static bool pmu_has_irq_affinity(struct device_node *node)
+{
+	return !!of_find_property(node, "interrupt-affinity", NULL);
+}
+
+static int pmu_parse_irq_affinity(struct device_node *node, int i)
+{
+	struct device_node *dn;
+	int cpu;
+
+	/*
+	 * If we don't have an interrupt-affinity property, we guess irq
+	 * affinity matches our logical CPU order, as we used to assume.
+	 * This is fragile, so we'll warn in pmu_parse_irqs().
+	 */
+	if (!pmu_has_irq_affinity(node))
+		return i;
+
+	dn = of_parse_phandle(node, "interrupt-affinity", i);
+	if (!dn) {
+		pr_warn("failed to parse interrupt-affinity[%d] for %s\n",
+			i, node->name);
+		return -EINVAL;
+	}
+
+	/* Now look up the logical CPU number */
+	for_each_possible_cpu(cpu) {
+		struct device_node *cpu_dn;
+
+		cpu_dn = of_cpu_device_node_get(cpu);
+		of_node_put(cpu_dn);
+
+		if (dn == cpu_dn)
+			break;
+	}
+
+	if (cpu >= nr_cpu_ids) {
+		pr_warn("failed to find logical CPU for %s\n", dn->name);
+	}
+
+	of_node_put(dn);
+
+	return cpu;
+}
+
+static int pmu_parse_irqs(struct arm_pmu *pmu)
+{
+	int i = 0, num_irqs;
+	struct platform_device *pdev = pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+
+	num_irqs = platform_irq_count(pdev);
+	if (num_irqs < 0) {
+		pr_err("unable to count PMU IRQs\n");
+		return num_irqs;
+	}
+
+	/*
+	 * In this case we have no idea which CPUs are covered by the PMU.
+	 * To match our prior behaviour, we assume all CPUs in this case.
+	 */
+	if (num_irqs == 0) {
+		pr_warn("no irqs for PMU, sampling events not supported\n");
+		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+		cpumask_setall(&pmu->supported_cpus);
+		return 0;
+	}
+
+	if (num_irqs == 1) {
+		int irq = platform_get_irq(pdev, 0);
+		if (irq && irq_is_percpu(irq))
+			return pmu_parse_percpu_irq(pmu, irq);
+	}
+
+	if (!pmu_has_irq_affinity(pdev->dev.of_node)) {
+		pr_warn("no interrupt-affinity property for %s, guessing.\n",
+			of_node_full_name(pdev->dev.of_node));
+	}
+
+	/*
+	 * Some platforms have all PMU IRQs OR'd into a single IRQ, with a
+	 * special platdata function that attempts to demux them.
+	 */
+	if (dev_get_platdata(&pdev->dev))
+		cpumask_setall(&pmu->supported_cpus);
+
+	for (i = 0; i < num_irqs; i++) {
+		int cpu, irq;
+
+		irq = platform_get_irq(pdev, i);
+		if (WARN_ON(irq <= 0))
+			continue;
+
+		if (irq_is_percpu(irq)) {
+			pr_warn("multiple PPIs or mismatched SPI/PPI detected\n");
+			return -EINVAL;
+		}
+
+		cpu = pmu_parse_irq_affinity(pdev->dev.of_node, i);
+		if (cpu < 0)
+			return cpu;
+		if (cpu >= nr_cpu_ids)
+			continue;
+
+		if (per_cpu(hw_events->irq, cpu)) {
+			pr_warn("multiple PMU IRQs for the same CPU detected\n");
+			return -EINVAL;
+		}
+
+		per_cpu(hw_events->irq, cpu) = irq;
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	}
+
+	return 0;
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
+{
+	const struct of_device_id *of_id;
+	armpmu_init_fn init_fn;
+	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	pmu = armpmu_alloc();
+	if (!pmu)
+		return -ENOMEM;
+
+	pmu->plat_device = pdev;
+
+	ret = pmu_parse_irqs(pmu);
+	if (ret)
+		goto out_free;
+
+	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+		init_fn = of_id->data;
+
+		pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
+							   "secure-reg-access");
+
+		/* arm64 systems boot only as non-secure */
+		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
+			pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
+			pmu->secure_access = false;
+		}
+
+		ret = init_fn(pmu);
+	} else if (probe_table) {
+		cpumask_setall(&pmu->supported_cpus);
+		ret = probe_current_pmu(pmu, probe_table);
+	}
+
+	if (ret) {
+		pr_info("%s: failed to probe PMU!\n", of_node_full_name(node));
+		goto out_free;
+	}
+
+	ret = armpmu_request_irqs(pmu);
+	if (ret)
+		goto out_free_irqs;
+
+	ret = armpmu_register(pmu);
+	if (ret)
+		goto out_free;
+
+	return 0;
+
+out_free_irqs:
+	armpmu_free_irqs(pmu);
+out_free:
+	pr_info("%s: failed to register PMU devices!\n",
+		of_node_full_name(node));
+	armpmu_free(pmu);
+	return ret;
+}
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
+/*
+ * Driver for the L3 cache PMUs in Qualcomm Technologies chips.
+ *
+ * The driver supports a distributed cache architecture where the overall
+ * cache for a socket is comprised of multiple slices each with its own PMU.
+ * Access to each individual PMU is provided even though all CPUs share all
+ * the slices. User space needs to aggregate to individual counts to provide
+ * a global picture.
+ *
+ * See Documentation/perf/qcom_l3_pmu.txt for more details.
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/*
+ * General constants
+ */
+
+/* Number of counters on each PMU */
+#define L3_NUM_COUNTERS  8
+/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
+#define L3_EVTYPE_MASK   0xFF
+/*
+ * Bit position of the 'long counter' flag within perf_event_attr.config.
+ * Reserve some space between the event type and this flag to allow expansion
+ * in the event type field.
+ */
+#define L3_EVENT_LC_BIT  32
+
+/*
+ * Register offsets
+ */
+
+/* Perfmon registers */
+#define L3_HML3_PM_CR       0x000
+#define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_FILTRA   0x300
+#define L3_HML3_PM_FILTRB   0x308
+#define L3_HML3_PM_FILTRC   0x310
+#define L3_HML3_PM_FILTRAM  0x304
+#define L3_HML3_PM_FILTRBM  0x30C
+#define L3_HML3_PM_FILTRCM  0x314
+
+/* Basic counter registers */
+#define L3_M_BC_CR         0x500
+#define L3_M_BC_SATROLL_CR 0x504
+#define L3_M_BC_CNTENSET   0x508
+#define L3_M_BC_CNTENCLR   0x50C
+#define L3_M_BC_INTENSET   0x510
+#define L3_M_BC_INTENCLR   0x514
+#define L3_M_BC_GANG       0x718
+#define L3_M_BC_OVSR       0x740
+#define L3_M_BC_IRQCTL     0x96C
+
+/*
+ * Bit field definitions
+ */
+
+/* L3_HML3_PM_CR */
+#define PM_CR_RESET           (0)
+
+/* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */
+#define PMCNT_RESET           (0)
+
+/* L3_HML3_PM_EVTYPEx */
+#define EVSEL(__val)          ((__val) & L3_EVTYPE_MASK)
+
+/* Reset value for all the filter registers */
+#define PM_FLTR_RESET         (0)
+
+/* L3_M_BC_CR */
+#define BC_RESET              (1UL << 1)
+#define BC_ENABLE             (1UL << 0)
+
+/* L3_M_BC_SATROLL_CR */
+#define BC_SATROLL_CR_RESET   (0)
+
+/* L3_M_BC_CNTENSET */
+#define PMCNTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
+
+/* L3_M_BC_CNTENCLR */
+#define PMCNTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
+#define BC_CNTENCLR_RESET     (0xFF)
+
+/* L3_M_BC_INTENSET */
+#define PMINTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
+
+/* L3_M_BC_INTENCLR */
+#define PMINTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
+#define BC_INTENCLR_RESET     (0xFF)
+
+/* L3_M_BC_GANG */
+#define GANG_EN(__cntr)       (1UL << ((__cntr) & 0x7))
+#define BC_GANG_RESET         (0)
+
+/* L3_M_BC_OVSR */
+#define PMOVSRCLR(__cntr)     (1UL << ((__cntr) & 0x7))
+#define PMOVSRCLR_RESET       (0xFF)
+
+/* L3_M_BC_IRQCTL */
+#define PMIRQONMSBEN(__cntr)  (1UL << ((__cntr) & 0x7))
+#define BC_IRQCTL_RESET       (0x0)
+
+/*
+ * Events
+ */
+
+#define L3_EVENT_CYCLES		0x01
+#define L3_EVENT_READ_HIT		0x20
+#define L3_EVENT_READ_MISS		0x21
+#define L3_EVENT_READ_HIT_D		0x22
+#define L3_EVENT_READ_MISS_D		0x23
+#define L3_EVENT_WRITE_HIT		0x24
+#define L3_EVENT_WRITE_MISS		0x25
+
+/*
+ * Decoding of settings from perf_event_attr
+ *
+ * The config format for perf events is:
+ * - config: bits 0-7: event type
+ *           bit  32:  HW counter size requested, 0: 32 bits, 1: 64 bits
+ */
+
+static inline u32 get_event_type(struct perf_event *event)
+{
+	return (event->attr.config) & L3_EVTYPE_MASK;
+}
+
+static inline bool event_uses_long_counter(struct perf_event *event)
+{
+	return !!(event->attr.config & BIT_ULL(L3_EVENT_LC_BIT));
+}
+
+static inline int event_num_counters(struct perf_event *event)
+{
+	return event_uses_long_counter(event) ? 2 : 1;
+}
+
+/*
+ * Main PMU, inherits from the core perf PMU type
+ */
+struct l3cache_pmu {
+	struct pmu		pmu;
+	struct hlist_node	node;
+	void __iomem		*regs;
+	struct perf_event	*events[L3_NUM_COUNTERS];
+	unsigned long		used_mask[BITS_TO_LONGS(L3_NUM_COUNTERS)];
+	cpumask_t		cpumask;
+};
+
+#define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu))
+
+/*
+ * Type used to group hardware counter operations
+ *
+ * Used to implement two types of hardware counters, standard (32bits) and
+ * long (64bits). The hardware supports counter chaining which we use to
+ * implement long counters. This support is exposed via the 'lc' flag field
+ * in perf_event_attr.config.
+ */
+struct l3cache_event_ops {
+	/* Called to start event monitoring */
+	void (*start)(struct perf_event *event);
+	/* Called to stop event monitoring */
+	void (*stop)(struct perf_event *event, int flags);
+	/* Called to update the perf_event */
+	void (*update)(struct perf_event *event);
+};
+
+/*
+ * Implementation of long counter operations
+ *
+ * 64bit counters are implemented by chaining two of the 32bit physical
+ * counters. The PMU only supports chaining of adjacent even/odd pairs
+ * and for simplicity the driver always configures the odd counter to
+ * count the overflows of the lower-numbered even counter. Note that since
+ * the resulting hardware counter is 64bits no IRQs are required to maintain
+ * the software counter which is also 64bits.
+ */
+
+static void qcom_l3_cache__64bit_counter_start(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 evsel = get_event_type(event);
+	u32 gang;
+
+	/* Set the odd counter to count the overflows of the even counter */
+	gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
+	gang |= GANG_EN(idx + 1);
+	writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG);
+
+	/* Initialize the hardware counters and reset prev_count*/
+	local64_set(&event->hw.prev_count, 0);
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+
+	/*
+	 * Set the event types, the upper half must use zero and the lower
+	 * half the actual event type
+	 */
+	writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1));
+	writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
+
+	/* Finally, enable the counters */
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx + 1));
+	writel_relaxed(PMCNTENSET(idx + 1), l3pmu->regs + L3_M_BC_CNTENSET);
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
+	writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
+}
+
+static void qcom_l3_cache__64bit_counter_stop(struct perf_event *event,
+					      int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
+
+	/* Disable the counters */
+	writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
+	writel_relaxed(PMCNTENCLR(idx + 1), l3pmu->regs + L3_M_BC_CNTENCLR);
+
+	/* Disable chaining */
+	writel_relaxed(gang & ~GANG_EN(idx + 1), l3pmu->regs + L3_M_BC_GANG);
+}
+
+static void qcom_l3_cache__64bit_counter_update(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 hi, lo;
+	u64 prev, new;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		do {
+			hi = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
+			lo = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+		} while (hi != readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)));
+		new = ((u64)hi << 32) | lo;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static const struct l3cache_event_ops event_ops_long = {
+	.start = qcom_l3_cache__64bit_counter_start,
+	.stop = qcom_l3_cache__64bit_counter_stop,
+	.update = qcom_l3_cache__64bit_counter_update,
+};
+
+/*
+ * Implementation of standard counter operations
+ *
+ * 32bit counters use a single physical counter and a hardware feature that
+ * asserts the overflow IRQ on the toggling of the most significant bit in
+ * the counter. This feature allows the counters to be left free-running
+ * without needing the usual reprogramming required to properly handle races
+ * during concurrent calls to update.
+ */
+
+static void qcom_l3_cache__32bit_counter_start(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 evsel = get_event_type(event);
+	u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Set the counter to assert the overflow IRQ on MSB toggling */
+	writel_relaxed(irqctl | PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Initialize the hardware counter and reset prev_count*/
+	local64_set(&event->hw.prev_count, 0);
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+
+	/* Set the event type */
+	writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
+
+	/* Enable interrupt generation by this counter */
+	writel_relaxed(PMINTENSET(idx), l3pmu->regs + L3_M_BC_INTENSET);
+
+	/* Finally, enable the counter */
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
+	writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
+}
+
+static void qcom_l3_cache__32bit_counter_stop(struct perf_event *event,
+					      int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Disable the counter */
+	writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
+
+	/* Disable interrupt generation by this counter */
+	writel_relaxed(PMINTENCLR(idx), l3pmu->regs + L3_M_BC_INTENCLR);
+
+	/* Set the counter to not assert the overflow IRQ on MSB toggling */
+	writel_relaxed(irqctl & ~PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
+}
+
+static void qcom_l3_cache__32bit_counter_update(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 prev, new;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		new = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static const struct l3cache_event_ops event_ops_std = {
+	.start = qcom_l3_cache__32bit_counter_start,
+	.stop = qcom_l3_cache__32bit_counter_stop,
+	.update = qcom_l3_cache__32bit_counter_update,
+};
+
+/* Retrieve the appropriate operations for the given event */
+static
+const struct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event)
+{
+	if (event_uses_long_counter(event))
+		return &event_ops_long;
+	else
+		return &event_ops_std;
+}
+
+/*
+ * Top level PMU functions.
+ */
+
+static inline void qcom_l3_cache__init(struct l3cache_pmu *l3pmu)
+{
+	int i;
+
+	writel_relaxed(BC_RESET, l3pmu->regs + L3_M_BC_CR);
+
+	/*
+	 * Use writel for the first programming command to ensure the basic
+	 * counter unit is stopped before proceeding
+	 */
+	writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR);
+
+	writel_relaxed(BC_CNTENCLR_RESET, l3pmu->regs + L3_M_BC_CNTENCLR);
+	writel_relaxed(BC_INTENCLR_RESET, l3pmu->regs + L3_M_BC_INTENCLR);
+	writel_relaxed(PMOVSRCLR_RESET, l3pmu->regs + L3_M_BC_OVSR);
+	writel_relaxed(BC_GANG_RESET, l3pmu->regs + L3_M_BC_GANG);
+	writel_relaxed(BC_IRQCTL_RESET, l3pmu->regs + L3_M_BC_IRQCTL);
+	writel_relaxed(PM_CR_RESET, l3pmu->regs + L3_HML3_PM_CR);
+
+	for (i = 0; i < L3_NUM_COUNTERS; ++i) {
+		writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(i));
+		writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(i));
+	}
+
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRA);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRAM);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRB);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRBM);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRC);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRCM);
+
+	/*
+	 * Use writel here to ensure all programming commands are done
+	 *  before proceeding
+	 */
+	writel(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
+}
+
+static irqreturn_t qcom_l3_cache__handle_irq(int irq_num, void *data)
+{
+	struct l3cache_pmu *l3pmu = data;
+	/* Read the overflow status register */
+	long status = readl_relaxed(l3pmu->regs + L3_M_BC_OVSR);
+	int idx;
+
+	if (status == 0)
+		return IRQ_NONE;
+
+	/* Clear the bits we read on the overflow status register */
+	writel_relaxed(status, l3pmu->regs + L3_M_BC_OVSR);
+
+	for_each_set_bit(idx, &status, L3_NUM_COUNTERS) {
+		struct perf_event *event;
+		const struct l3cache_event_ops *ops;
+
+		event = l3pmu->events[idx];
+		if (!event)
+			continue;
+
+		/*
+		 * Since the IRQ is not enabled for events using long counters
+		 * we should never see one of those here, however, be consistent
+		 * and use the ops indirections like in the other operations.
+		 */
+
+		ops = l3cache_event_get_ops(event);
+		ops->update(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static void qcom_l3_cache__pmu_enable(struct pmu *pmu)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
+
+	/* Ensure the other programming commands are observed before enabling */
+	wmb();
+
+	writel_relaxed(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
+}
+
+static void qcom_l3_cache__pmu_disable(struct pmu *pmu)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
+
+	writel_relaxed(0, l3pmu->regs + L3_M_BC_CR);
+
+	/* Ensure the basic counter unit is stopped before proceeding */
+	wmb();
+}
+
+/*
+ * We must NOT create groups containing events from multiple hardware PMUs,
+ * although mixing different software and hardware PMUs is allowed.
+ */
+static bool qcom_l3_cache__validate_event_group(struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
+	struct perf_event *sibling;
+	int counters = 0;
+
+	if (leader->pmu != event->pmu && !is_software_event(leader))
+		return false;
+
+	counters = event_num_counters(event);
+	counters += event_num_counters(leader);
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (is_software_event(sibling))
+			continue;
+		if (sibling->pmu != event->pmu)
+			return false;
+		counters += event_num_counters(sibling);
+	}
+
+	/*
+	 * If the group requires more counters than the HW has, it
+	 * cannot ever be scheduled.
+	 */
+	return counters <= L3_NUM_COUNTERS;
+}
+
+static int qcom_l3_cache__event_init(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * Is the event for this PMU?
+	 */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * There are no per-counter mode filters in the PMU.
+	 */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+	    event->attr.exclude_hv || event->attr.exclude_idle)
+		return -EINVAL;
+
+	/*
+	 * Sampling not supported since these events are not core-attributable.
+	 */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * Task mode not available, we run the counters as socket counters,
+	 * not attributable to any CPU and therefore cannot attribute per-task.
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/* Validate the group */
+	if (!qcom_l3_cache__validate_event_group(event))
+		return -EINVAL;
+
+	hwc->idx = -1;
+
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, like this one, where
+	 * each event could be theoretically assigned to a different CPU.
+	 * To mitigate this, we enforce CPU assignment to one designated
+	 * processor (the one described in the "cpumask" attribute exported
+	 * by the PMU device). perf user space tools honor this and avoid
+	 * opening more than one copy of the events.
+	 */
+	event->cpu = cpumask_first(&l3pmu->cpumask);
+
+	return 0;
+}
+
+static void qcom_l3_cache__event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	hwc->state = 0;
+	ops->start(event);
+}
+
+static void qcom_l3_cache__event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	ops->stop(event, flags);
+	if (flags & PERF_EF_UPDATE)
+		ops->update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int qcom_l3_cache__event_add(struct perf_event *event, int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int order = event_uses_long_counter(event) ? 1 : 0;
+	int idx;
+
+	/*
+	 * Try to allocate a counter.
+	 */
+	idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order);
+	if (idx < 0)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	l3pmu->events[idx] = event;
+
+	if (flags & PERF_EF_START)
+		qcom_l3_cache__event_start(event, 0);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void qcom_l3_cache__event_del(struct perf_event *event, int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int order = event_uses_long_counter(event) ? 1 : 0;
+
+	/* Stop and clean up */
+	qcom_l3_cache__event_stop(event,  flags | PERF_EF_UPDATE);
+	l3pmu->events[hwc->idx] = NULL;
+	bitmap_release_region(l3pmu->used_mask, hwc->idx, order);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+}
+
+static void qcom_l3_cache__event_read(struct perf_event *event)
+{
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	ops->update(event);
+}
+
+/*
+ * Add sysfs attributes
+ *
+ * We export:
+ * - formats, used by perf user space and other tools to configure events
+ * - events, used by perf user space and other tools to create events
+ *   symbolically, e.g.:
+ *     perf stat -a -e l3cache_0_0/event=read-miss/ ls
+ *     perf stat -a -e l3cache_0_0/event=0x21/ ls
+ * - cpumask, used by perf user space and other tools to know on which CPUs
+ *   to open the events
+ */
+
+/* formats */
+
+static ssize_t l3cache_pmu_format_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sprintf(buf, "%s\n", (char *) eattr->var);
+}
+
+#define L3CACHE_PMU_FORMAT_ATTR(_name, _config)				      \
+	(&((struct dev_ext_attribute[]) {				      \
+		{ .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
+		  .var = (void *) _config, }				      \
+	})[0].attr.attr)
+
+static struct attribute *qcom_l3_cache_pmu_formats[] = {
+	L3CACHE_PMU_FORMAT_ATTR(event, "config:0-7"),
+	L3CACHE_PMU_FORMAT_ATTR(lc, "config:" __stringify(L3_EVENT_LC_BIT)),
+	NULL,
+};
+
+static struct attribute_group qcom_l3_cache_pmu_format_group = {
+	.name = "format",
+	.attrs = qcom_l3_cache_pmu_formats,
+};
+
+/* events */
+
+static ssize_t l3cache_pmu_event_show(struct device *dev,
+				     struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L3CACHE_EVENT_ATTR(_name, _id)					     \
+	(&((struct perf_pmu_events_attr[]) {				     \
+		{ .attr = __ATTR(_name, 0444, l3cache_pmu_event_show, NULL), \
+		  .id = _id, }						     \
+	})[0].attr.attr)
+
+static struct attribute *qcom_l3_cache_pmu_events[] = {
+	L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES),
+	L3CACHE_EVENT_ATTR(read-hit, L3_EVENT_READ_HIT),
+	L3CACHE_EVENT_ATTR(read-miss, L3_EVENT_READ_MISS),
+	L3CACHE_EVENT_ATTR(read-hit-d-side, L3_EVENT_READ_HIT_D),
+	L3CACHE_EVENT_ATTR(read-miss-d-side, L3_EVENT_READ_MISS_D),
+	L3CACHE_EVENT_ATTR(write-hit, L3_EVENT_WRITE_HIT),
+	L3CACHE_EVENT_ATTR(write-miss, L3_EVENT_WRITE_MISS),
+	NULL
+};
+
+static struct attribute_group qcom_l3_cache_pmu_events_group = {
+	.name = "events",
+	.attrs = qcom_l3_cache_pmu_events,
+};
+
+/* cpumask */
+
+static ssize_t qcom_l3_cache_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask);
+}
+
+static DEVICE_ATTR(cpumask, 0444, qcom_l3_cache_pmu_cpumask_show, NULL);
+
+static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
+	.attrs = qcom_l3_cache_pmu_cpumask_attrs,
+};
+
+/*
+ * Per PMU device attribute groups
+ */
+static const struct attribute_group *qcom_l3_cache_pmu_attr_grps[] = {
+	&qcom_l3_cache_pmu_format_group,
+	&qcom_l3_cache_pmu_events_group,
+	&qcom_l3_cache_pmu_cpumask_attr_group,
+	NULL,
+};
+
+/*
+ * Probing functions and data.
+ */
+
+static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
+
+	/* If there is not a CPU/PMU association pick this CPU */
+	if (cpumask_empty(&l3pmu->cpumask))
+		cpumask_set_cpu(cpu, &l3pmu->cpumask);
+
+	return 0;
+}
+
+static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &l3pmu->cpumask))
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&l3pmu->pmu, cpu, target);
+	cpumask_set_cpu(target, &l3pmu->cpumask);
+	return 0;
+}
+
+static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
+{
+	struct l3cache_pmu *l3pmu;
+	struct acpi_device *acpi_dev;
+	struct resource *memrc;
+	int ret;
+	char *name;
+
+	/* Initialize the PMU data structures */
+
+	acpi_dev = ACPI_COMPANION(&pdev->dev);
+	if (!acpi_dev)
+		return -ENODEV;
+
+	l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL);
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s",
+		      acpi_dev->parent->pnp.unique_id, acpi_dev->pnp.unique_id);
+	if (!l3pmu || !name)
+		return -ENOMEM;
+
+	l3pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+
+		.pmu_enable	= qcom_l3_cache__pmu_enable,
+		.pmu_disable	= qcom_l3_cache__pmu_disable,
+		.event_init	= qcom_l3_cache__event_init,
+		.add		= qcom_l3_cache__event_add,
+		.del		= qcom_l3_cache__event_del,
+		.start		= qcom_l3_cache__event_start,
+		.stop		= qcom_l3_cache__event_stop,
+		.read		= qcom_l3_cache__event_read,
+
+		.attr_groups	= qcom_l3_cache_pmu_attr_grps,
+	};
+
+	memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc);
+	if (IS_ERR(l3pmu->regs)) {
+		dev_err(&pdev->dev, "Can't map PMU @%pa\n", &memrc->start);
+		return PTR_ERR(l3pmu->regs);
+	}
+
+	qcom_l3_cache__init(l3pmu);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret <= 0)
+		return ret;
+
+	ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0,
+			       name, l3pmu);
+	if (ret) {
+		dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n",
+			&memrc->start);
+		return ret;
+	}
+
+	/* Add this instance to the list used by the offline callback */
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", ret);
+		return ret;
+	}
+
+	ret = perf_pmu_register(&l3pmu->pmu, name, -1);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret);
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Registered %s, type: %d\n", name, l3pmu->pmu.type);
+
+	return 0;
+}
+
+static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match[] = {
+	{ "QCOM8081", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, qcom_l3_cache_pmu_acpi_match);
+
+static struct platform_driver qcom_l3_cache_pmu_driver = {
+	.driver = {
+		.name = "qcom-l3cache-pmu",
+		.acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match),
+	},
+	.probe = qcom_l3_cache_pmu_probe,
+};
+
+static int __init register_qcom_l3_cache_pmu_driver(void)
+{
+	int ret;
+
+	/* Install a hook to update the reader CPU in case it goes offline */
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
+				      "perf/qcom/l3cache:online",
+				      qcom_l3_cache_pmu_online_cpu,
+				      qcom_l3_cache_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&qcom_l3_cache_pmu_driver);
+}
+device_initcall(register_qcom_l3_cache_pmu_driver);
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -34,6 +34,8 @@ void acpi_iort_init(void);
 bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
+void acpi_configure_pmsi_domain(struct device *dev);
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 /* IOMMU interface */
 void iort_set_dma_mask(struct device *dev);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
@@ -45,6 +47,7 @@ static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 static inline struct irq_domain *iort_get_device_domain(struct device *dev,
 							u32 req_id)
 { return NULL; }
+static inline void acpi_configure_pmsi_domain(struct device *dev) { }
 /* IOMMU interface */
 static inline void iort_set_dma_mask(struct device *dev) { }
 static inline

--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -94,6 +94,7 @@ enum cpuhp_state {
 	CPUHP_AP_ARM_VFP_STARTING,
 	CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
 	CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
+	CPUHP_AP_PERF_ARM_ACPI_STARTING,
 	CPUHP_AP_PERF_ARM_STARTING,
 	CPUHP_AP_ARM_L2X0_STARTING,
 	CPUHP_AP_ARM_ARCH_TIMER_STARTING,
@@ -137,6 +138,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,

--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -93,6 +93,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
+int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 ulong choose_memblock_flags(void);

 /* Low level functions */
@@ -335,6 +336,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
 void memblock_enforce_memory_limit(phys_addr_t memory_limit);
+void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
 void memblock_mem_limit_remove_map(phys_addr_t limit);
 bool memblock_is_memory(phys_addr_t addr);
 int memblock_is_map_memory(phys_addr_t addr);

--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -23,34 +23,6 @@

 #define MZ_MAGIC	0x5a4d	/* "MZ" */

-struct mz_hdr {
-	uint16_t magic;		/* MZ_MAGIC */
-	uint16_t lbsize;	/* size of last used block */
-	uint16_t blocks;	/* pages in file, 0x3 */
-	uint16_t relocs;	/* relocations */
-	uint16_t hdrsize;	/* header size in "paragraphs" */
-	uint16_t min_extra_pps;	/* .bss */
-	uint16_t max_extra_pps;	/* runtime limit for the arena size */
-	uint16_t ss;		/* relative stack segment */
-	uint16_t sp;		/* initial %sp register */
-	uint16_t checksum;	/* word checksum */
-	uint16_t ip;		/* initial %ip register */
-	uint16_t cs;		/* initial %cs relative to load segment */
-	uint16_t reloc_table_offset;	/* offset of the first relocation */
-	uint16_t overlay_num;	/* overlay number.  set to 0. */
-	uint16_t reserved0[4];	/* reserved */
-	uint16_t oem_id;	/* oem identifier */
-	uint16_t oem_info;	/* oem specific */
-	uint16_t reserved1[10];	/* reserved */
-	uint32_t peaddr;	/* address of pe header */
-	char     message[64];	/* message to print */
-};
-
-struct mz_reloc {
-	uint16_t offset;
-	uint16_t segment;
-};
-
 #define PE_MAGIC		0x00004550	/* "PE\0\0" */
 #define PE_OPT_MAGIC_PE32	0x010b
 #define PE_OPT_MAGIC_PE32_ROM	0x0107
@@ -62,6 +34,7 @@ struct mz_reloc {
 #define	IMAGE_FILE_MACHINE_AMD64	0x8664
 #define	IMAGE_FILE_MACHINE_ARM		0x01c0
 #define	IMAGE_FILE_MACHINE_ARMV7	0x01c4
+#define	IMAGE_FILE_MACHINE_ARM64	0xaa64
 #define	IMAGE_FILE_MACHINE_EBC		0x0ebc
 #define	IMAGE_FILE_MACHINE_I386		0x014c
 #define	IMAGE_FILE_MACHINE_IA64		0x0200
@@ -98,17 +71,6 @@ struct mz_reloc {
 #define IMAGE_FILE_UP_SYSTEM_ONLY            0x4000
 #define IMAGE_FILE_BYTES_REVERSED_HI         0x8000

-struct pe_hdr {
-	uint32_t magic;		/* PE magic */
-	uint16_t machine;	/* machine type */
-	uint16_t sections;	/* number of sections */
-	uint32_t timestamp;	/* time_t */
-	uint32_t symbol_table;	/* symbol table offset */
-	uint32_t symbols;	/* number of symbols */
-	uint16_t opt_hdr_size;	/* size of optional header */
-	uint16_t flags;		/* flags */
-};
-
 #define IMAGE_FILE_OPT_ROM_MAGIC	0x107
 #define IMAGE_FILE_OPT_PE32_MAGIC	0x10b
 #define IMAGE_FILE_OPT_PE32_PLUS_MAGIC	0x20b
@@ -134,6 +96,95 @@ struct pe_hdr {
 #define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER             0x2000
 #define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE  0x8000

+/* they actually defined 0x00000000 as well, but I think we'll skip that one. */
+#define IMAGE_SCN_RESERVED_0	0x00000001
+#define IMAGE_SCN_RESERVED_1	0x00000002
+#define IMAGE_SCN_RESERVED_2	0x00000004
+#define IMAGE_SCN_TYPE_NO_PAD	0x00000008 /* don't pad - obsolete */
+#define IMAGE_SCN_RESERVED_3	0x00000010
+#define IMAGE_SCN_CNT_CODE	0x00000020 /* .text */
+#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */
+#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */
+#define IMAGE_SCN_LNK_OTHER	0x00000100 /* reserved */
+#define IMAGE_SCN_LNK_INFO	0x00000200 /* .drectve comments */
+#define IMAGE_SCN_RESERVED_4	0x00000400
+#define IMAGE_SCN_LNK_REMOVE	0x00000800 /* .o only - scn to be rm'd*/
+#define IMAGE_SCN_LNK_COMDAT	0x00001000 /* .o only - COMDAT data */
+#define IMAGE_SCN_RESERVED_5	0x00002000 /* spec omits this */
+#define IMAGE_SCN_RESERVED_6	0x00004000 /* spec omits this */
+#define IMAGE_SCN_GPREL		0x00008000 /* global pointer referenced data */
+/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */
+#define IMAGE_SCN_MEM_PURGEABLE	0x00010000 /* reserved for "future" use */
+#define IMAGE_SCN_16BIT		0x00020000 /* reserved for "future" use */
+#define IMAGE_SCN_LOCKED	0x00040000 /* reserved for "future" use */
+#define IMAGE_SCN_PRELOAD	0x00080000 /* reserved for "future" use */
+/* and here they just stuck a 1-byte integer in the middle of a bitfield */
+#define IMAGE_SCN_ALIGN_1BYTES	0x00100000 /* it does what it says on the box */
+#define IMAGE_SCN_ALIGN_2BYTES	0x00200000
+#define IMAGE_SCN_ALIGN_4BYTES	0x00300000
+#define IMAGE_SCN_ALIGN_8BYTES	0x00400000
+#define IMAGE_SCN_ALIGN_16BYTES	0x00500000
+#define IMAGE_SCN_ALIGN_32BYTES	0x00600000
+#define IMAGE_SCN_ALIGN_64BYTES	0x00700000
+#define IMAGE_SCN_ALIGN_128BYTES 0x00800000
+#define IMAGE_SCN_ALIGN_256BYTES 0x00900000
+#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000
+#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000
+#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000
+#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000
+#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000
+#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */
+#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */
+#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */
+#define IMAGE_SCN_MEM_NOT_PAGED	0x08000000 /* not pageable */
+#define IMAGE_SCN_MEM_SHARED	0x10000000 /* can be shared */
+#define IMAGE_SCN_MEM_EXECUTE	0x20000000 /* can be executed as code */
+#define IMAGE_SCN_MEM_READ	0x40000000 /* readable */
+#define IMAGE_SCN_MEM_WRITE	0x80000000 /* writeable */
+
+#define IMAGE_DEBUG_TYPE_CODEVIEW	2
+
+#ifndef __ASSEMBLY__
+
+struct mz_hdr {
+	uint16_t magic;		/* MZ_MAGIC */
+	uint16_t lbsize;	/* size of last used block */
+	uint16_t blocks;	/* pages in file, 0x3 */
+	uint16_t relocs;	/* relocations */
+	uint16_t hdrsize;	/* header size in "paragraphs" */
+	uint16_t min_extra_pps;	/* .bss */
+	uint16_t max_extra_pps;	/* runtime limit for the arena size */
+	uint16_t ss;		/* relative stack segment */
+	uint16_t sp;		/* initial %sp register */
+	uint16_t checksum;	/* word checksum */
+	uint16_t ip;		/* initial %ip register */
+	uint16_t cs;		/* initial %cs relative to load segment */
+	uint16_t reloc_table_offset;	/* offset of the first relocation */
+	uint16_t overlay_num;	/* overlay number.  set to 0. */
+	uint16_t reserved0[4];	/* reserved */
+	uint16_t oem_id;	/* oem identifier */
+	uint16_t oem_info;	/* oem specific */
+	uint16_t reserved1[10];	/* reserved */
+	uint32_t peaddr;	/* address of pe header */
+	char     message[64];	/* message to print */
+};
+
+struct mz_reloc {
+	uint16_t offset;
+	uint16_t segment;
+};
+
+struct pe_hdr {
+	uint32_t magic;		/* PE magic */
+	uint16_t machine;	/* machine type */
+	uint16_t sections;	/* number of sections */
+	uint32_t timestamp;	/* time_t */
+	uint32_t symbol_table;	/* symbol table offset */
+	uint32_t symbols;	/* number of symbols */
+	uint16_t opt_hdr_size;	/* size of optional header */
+	uint16_t flags;		/* flags */
+};
+
 /* the fact that pe32 isn't padded where pe32+ is 64-bit means union won't
 * work right.  vomit. */
 struct pe32_opt_hdr {
@@ -243,52 +294,6 @@ struct section_header {
 	uint32_t flags;
 };

-/* they actually defined 0x00000000 as well, but I think we'll skip that one. */
-#define IMAGE_SCN_RESERVED_0	0x00000001
-#define IMAGE_SCN_RESERVED_1	0x00000002
-#define IMAGE_SCN_RESERVED_2	0x00000004
-#define IMAGE_SCN_TYPE_NO_PAD	0x00000008 /* don't pad - obsolete */
-#define IMAGE_SCN_RESERVED_3	0x00000010
-#define IMAGE_SCN_CNT_CODE	0x00000020 /* .text */
-#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */
-#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */
-#define IMAGE_SCN_LNK_OTHER	0x00000100 /* reserved */
-#define IMAGE_SCN_LNK_INFO	0x00000200 /* .drectve comments */
-#define IMAGE_SCN_RESERVED_4	0x00000400
-#define IMAGE_SCN_LNK_REMOVE	0x00000800 /* .o only - scn to be rm'd*/
-#define IMAGE_SCN_LNK_COMDAT	0x00001000 /* .o only - COMDAT data */
-#define IMAGE_SCN_RESERVED_5	0x00002000 /* spec omits this */
-#define IMAGE_SCN_RESERVED_6	0x00004000 /* spec omits this */
-#define IMAGE_SCN_GPREL		0x00008000 /* global pointer referenced data */
-/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */
-#define IMAGE_SCN_MEM_PURGEABLE	0x00010000 /* reserved for "future" use */
-#define IMAGE_SCN_16BIT		0x00020000 /* reserved for "future" use */
-#define IMAGE_SCN_LOCKED	0x00040000 /* reserved for "future" use */
-#define IMAGE_SCN_PRELOAD	0x00080000 /* reserved for "future" use */
-/* and here they just stuck a 1-byte integer in the middle of a bitfield */
-#define IMAGE_SCN_ALIGN_1BYTES	0x00100000 /* it does what it says on the box */
-#define IMAGE_SCN_ALIGN_2BYTES	0x00200000
-#define IMAGE_SCN_ALIGN_4BYTES	0x00300000
-#define IMAGE_SCN_ALIGN_8BYTES	0x00400000
-#define IMAGE_SCN_ALIGN_16BYTES	0x00500000
-#define IMAGE_SCN_ALIGN_32BYTES	0x00600000
-#define IMAGE_SCN_ALIGN_64BYTES	0x00700000
-#define IMAGE_SCN_ALIGN_128BYTES 0x00800000
-#define IMAGE_SCN_ALIGN_256BYTES 0x00900000
-#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000
-#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000
-#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000
-#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000
-#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000
-#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */
-#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */
-#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */
-#define IMAGE_SCN_MEM_NOT_PAGED	0x08000000 /* not pageable */
-#define IMAGE_SCN_MEM_SHARED	0x10000000 /* can be shared */
-#define IMAGE_SCN_MEM_EXECUTE	0x20000000 /* can be executed as code */
-#define IMAGE_SCN_MEM_READ	0x40000000 /* readable */
-#define IMAGE_SCN_MEM_WRITE	0x80000000 /* writeable */
-
 enum x64_coff_reloc_type {
 	IMAGE_REL_AMD64_ABSOLUTE = 0,
 	IMAGE_REL_AMD64_ADDR64,
@@ -445,4 +450,6 @@ struct win_certificate {
 	uint16_t cert_type;
 };

+#endif /* !__ASSEMBLY__ */
+
 #endif /* __LINUX_PE_H */
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -75,6 +75,8 @@ struct pmu_hw_events {
 	 * already have to allocate this struct per cpu.
 	 */
 	struct arm_pmu		*percpu_pmu;
+
+	int irq;
 };

 enum armpmu_attr_groups {
@@ -88,7 +90,6 @@ struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
 	cpumask_t	supported_cpus;
-	int		*irq_affinity;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct perf_event *event);
@@ -104,12 +105,8 @@ struct arm_pmu {
 	void		(*start)(struct arm_pmu *);
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
-	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
-	void		(*free_irq)(struct arm_pmu *);
 	int		(*map_event)(struct perf_event *event);
 	int		num_events;
-	atomic_t	active_events;
-	struct mutex	reserve_mutex;
 	u64		max_period;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
@@ -120,6 +117,9 @@ struct arm_pmu {
 	struct notifier_block	cpu_pm_nb;
 	/* the attr_groups array must be NULL-terminated */
 	const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
+
+	/* Only to be used by ACPI probing code */
+	unsigned long acpi_cpuid;
 };

 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
@@ -135,10 +135,12 @@ int armpmu_map_event(struct perf_event *event,
 						[PERF_COUNT_HW_CACHE_RESULT_MAX],
 		     u32 raw_event_mask);

+typedef int (*armpmu_init_fn)(struct arm_pmu *);
+
 struct pmu_probe_info {
 	unsigned int cpuid;
 	unsigned int mask;
-	int (*init)(struct arm_pmu *);
+	armpmu_init_fn init;
 };

 #define PMU_PROBE(_cpuid, _mask, _fn)	\
@@ -160,6 +162,21 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 			 const struct of_device_id *of_table,
 			 const struct pmu_probe_info *probe_table);

+#ifdef CONFIG_ACPI
+int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
+#else
+static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
+#endif
+
+/* Internal functions only for core arm_pmu code */
+struct arm_pmu *armpmu_alloc(void);
+void armpmu_free(struct arm_pmu *pmu);
+int armpmu_register(struct arm_pmu *pmu);
+int armpmu_request_irqs(struct arm_pmu *armpmu);
+void armpmu_free_irqs(struct arm_pmu *armpmu);
+int armpmu_request_irq(struct arm_pmu *armpmu, int cpu);
+void armpmu_free_irq(struct arm_pmu *armpmu, int cpu);
+
 #define ARMV8_PMU_PDEV_NAME "armv8-pmu"

 #endif /* CONFIG_ARM_PMU */

--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -804,6 +804,18 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
 	return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
 }

+/**
+ * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * Return 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
+{
+	return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
+}
+
 /**
 * __next_reserved_mem_region - next function for for_each_reserved_region()
 * @idx: pointer to u64 loop variable
@@ -1531,11 +1543,37 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
 			      (phys_addr_t)ULLONG_MAX);
 }

+void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
+{
+	int start_rgn, end_rgn;
+	int i, ret;
+
+	if (!size)
+		return;
+
+	ret = memblock_isolate_range(&memblock.memory, base, size,
+						&start_rgn, &end_rgn);
+	if (ret)
+		return;
+
+	/* remove all the MAP regions */
+	for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
+		if (!memblock_is_nomap(&memblock.memory.regions[i]))
+			memblock_remove_region(&memblock.memory, i);
+
+	for (i = start_rgn - 1; i >= 0; i--)
+		if (!memblock_is_nomap(&memblock.memory.regions[i]))
+			memblock_remove_region(&memblock.memory, i);
+
+	/* truncate the reserved regions */
+	memblock_remove_range(&memblock.reserved, 0, base);
+	memblock_remove_range(&memblock.reserved,
+			base + size, (phys_addr_t)ULLONG_MAX);
+}
+
 void __init memblock_mem_limit_remove_map(phys_addr_t limit)
 {
-	struct memblock_type *type = &memblock.memory;
 	phys_addr_t max_addr;
-	int i, ret, start_rgn, end_rgn;

 	if (!limit)
 		return;
@@ -1546,19 +1584,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit)
 	if (max_addr == (phys_addr_t)ULLONG_MAX)
 		return;

-	ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX,
-				&start_rgn, &end_rgn);
-	if (ret)
-		return;
-
-	/* remove all the MAP regions above the limit */
-	for (i = end_rgn - 1; i >= start_rgn; i--) {
-		if (!memblock_is_nomap(&type->regions[i]))
-			memblock_remove_region(type, i);
-	}
-	/* truncate the reserved regions */
-	memblock_remove_range(&memblock.reserved, max_addr,
-			      (phys_addr_t)ULLONG_MAX);
+	memblock_cap_memory_range(0, max_addr);
 }

 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)

--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -29,7 +29,9 @@
 #define DEBUG_SPINLOCK_BUG_ON(p)
 #endif

-struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,};
+struct vgic_global kvm_vgic_global_state __ro_after_init = {
+	.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
+};

 /*
 * Locking order is always: