Merge pull request #34 from gibsson/4.9-2.0.x-imx

Merge v4.9.126 into 4.9-2.0.x-imx
2018-09-12 10:15:37 -03:00 · 2018-09-12 10:15:37 -03:00 · 5051a0ca27
parent c58afb02ae 33446a98b6
commit 5051a0ca27
873 changed files with 9860 additions and 3646 deletions
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@ -356,6 +356,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
 		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+		/sys/devices/system/cpu/vulnerabilities/l1tf
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
@ -367,3 +368,26 @@ Description:	Information about CPU vulnerabilities
 		"Not affected"	  CPU is not affected by the vulnerability
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
+
+		Details about the l1tf file can be found in
+		Documentation/admin-guide/l1tf.rst
+
+What:		/sys/devices/system/cpu/smt
+		/sys/devices/system/cpu/smt/active
+		/sys/devices/system/cpu/smt/control
+Date:		June 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Control Symetric Multi Threading (SMT)
+
+		active:  Tells whether SMT is active (enabled and siblings online)
+
+		control: Read/write interface to control SMT. Possible
+			 values:
+
+			 "on"		SMT is enabled
+			 "off"		SMT is disabled
+			 "forceoff"	SMT is force disabled. Cannot be changed.
+			 "notsupported" SMT is not supported by the CPU
+
+			 If control status is "forceoff" or "notsupported" writes
+			 are rejected.
--- a/Documentation/Changes
+++ b/Documentation/Changes
@ -33,7 +33,7 @@ GNU C                  3.2              gcc --version
 GNU make               3.80             make --version
 binutils               2.12             ld -v
 util-linux             2.10o            fdformat --version
-module-init-tools      0.9.10           depmod -V
+kmod                   13               depmod -V
 e2fsprogs              1.41.4           e2fsck -V
 jfsutils               1.1.3            fsck.jfs -V
 reiserfsprogs          3.6.3            reiserfsck -V
@ -143,12 +143,6 @@ is not build with ``CONFIG_KALLSYMS`` and you have no way to rebuild and
 reproduce the Oops with that option, then you can still decode that Oops
 with ksymoops.

-Module-Init-Tools
-----------------
-
-A new module loader is now in the kernel that requires ``module-init-tools``
-to use.  It is backward compatible with the 2.4.x series kernels.
-
 Mkinitrd
 --------

@ -363,16 +357,17 @@ Util-linux

 - <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>

+Kmod
+----
+
+- <https://www.kernel.org/pub/linux/utils/kernel/kmod/>
+- <https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git>
+
 Ksymoops
 --------

 - <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>

-Module-Init-Tools
-----------------
-
- <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
-
 Mkinitrd
 --------

--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@ -2,7 +2,10 @@

 Required properties:

- compatible: should be "qca,qca8337"
+- compatible: should be one of:
+    "qca,qca8334"
+    "qca,qca8337"
+
 - #size-cells: must be 0
 - #address-cells: must be 1

@ -14,6 +17,20 @@ port and PHY id, each subnode describing a port needs to have a valid phandle
 referencing the internal PHY connected to it. The CPU port of this switch is
 always port 0.

+A CPU port node has the following optional node:
+
+- fixed-link            : Fixed-link subnode describing a link to a non-MDIO
+                          managed entity. See
+                          Documentation/devicetree/bindings/net/fixed-link.txt
+                          for details.
+
+For QCA8K the 'fixed-link' sub-node supports only the following properties:
+
+- 'speed' (integer, mandatory), to indicate the link speed. Accepted
+  values are 10, 100 and 1000
+- 'full-duplex' (boolean, optional), to indicate that full duplex is
+  used. When absent, half duplex is assumed.
+
 Example:


@ -53,6 +70,10 @@ Example:
 					label = "cpu";
 					ethernet = <&gmac1>;
 					phy-mode = "rgmii";
+					fixed-link {
+						speed = 1000;
+						full-duplex;
+					};
 				};

 				port@1 {
--- a/Documentation/devicetree/bindings/net/meson-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt
@ -10,6 +10,7 @@ Required properties on all platforms:
 			- "amlogic,meson6-dwmac"
 			- "amlogic,meson8b-dwmac"
 			- "amlogic,meson-gxbb-dwmac"
+			- "amlogic,meson-axg-dwmac"
 		Additionally "snps,dwmac" and any applicable more
 		detailed version number described in net/stmmac.txt
 		should be used.
--- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@ -3,8 +3,10 @@
 Required properties for the root node:
 - compatible: one of "amlogic,meson8-cbus-pinctrl"
 		      "amlogic,meson8b-cbus-pinctrl"
+		      "amlogic,meson8m2-cbus-pinctrl"
 		      "amlogic,meson8-aobus-pinctrl"
 		      "amlogic,meson8b-aobus-pinctrl"
+		      "amlogic,meson8m2-aobus-pinctrl"
 		      "amlogic,meson-gxbb-periphs-pinctrl"
 		      "amlogic,meson-gxbb-aobus-pinctrl"
 - reg: address and size of registers controlling irq functionality
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@ -12,6 +12,7 @@ Contents:
   :maxdepth: 2

   kernel-documentation
+   l1tf
   development-process/index
   dev-tools/tools
   driver-api/index
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@ -2010,10 +2010,84 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			(virtualized real and unpaged mode) on capable
 			Intel chips. Default is 1 (enabled)

+	kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
+			CVE-2018-3620.
+
+			Valid arguments: never, cond, always
+
+			always: L1D cache flush on every VMENTER.
+			cond:	Flush L1D on VMENTER only when the code between
+				VMEXIT and VMENTER can leak host memory.
+			never:	Disables the mitigation
+
+			Default is cond (do L1 cache flush in specific instances)
+
 	kvm-intel.vpid=	[KVM,Intel] Disable Virtual Processor Identification
 			feature (tagged TLBs) on capable Intel chips.
 			Default is 1 (enabled)

+	l1tf=           [X86] Control mitigation of the L1TF vulnerability on
+			      affected CPUs
+
+			The kernel PTE inversion protection is unconditionally
+			enabled and cannot be disabled.
+
+			full
+				Provides all available mitigations for the
+				L1TF vulnerability. Disables SMT and
+				enables all mitigations in the
+				hypervisors, i.e. unconditional L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			full,force
+				Same as 'full', but disables SMT and L1D
+				flush runtime control. Implies the
+				'nosmt=force' command line option.
+				(i.e. sysfs control of SMT is disabled.)
+
+			flush
+				Leaves SMT enabled and enables the default
+				hypervisor mitigation, i.e. conditional
+				L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nosmt
+
+				Disables SMT and enables the default
+				hypervisor mitigation.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nowarn
+				Same as 'flush', but hypervisors will not
+				warn when a VM is started in a potentially
+				insecure configuration.
+
+			off
+				Disables hypervisor mitigations and doesn't
+				emit any warnings.
+
+			Default is 'flush'.
+
+			For details see: Documentation/admin-guide/l1tf.rst
+
 	l2cr=		[PPC]

 	l3cr=		[PPC]
@ -2694,6 +2768,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.

+			[KNL,x86] Disable symmetric multithreading (SMT).
+			nosmt=force: Force disable SMT, cannot be undone
+				     via the sysfs control file.
+
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
 			allow data leaks with this option, which is equivalent
@ -4023,6 +4101,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	spia_pedr=
 	spia_peddr=

+	ssbd=		[ARM64,HW]
+			Speculative Store Bypass Disable control
+
+			On CPUs that are vulnerable to the Speculative
+			Store Bypass vulnerability and offer a
+			firmware based mitigation, this parameter
+			indicates how the mitigation should be used:
+
+			force-on:  Unconditionally enable mitigation for
+				   for both kernel and userspace
+			force-off: Unconditionally disable mitigation for
+				   for both kernel and userspace
+			kernel:    Always enable mitigation in the
+				   kernel, and offer a prctl interface
+				   to allow userspace to register its
+				   interest in being mitigated too.
+
 	stack_guard_gap=	[MM]
 			override the default stack gap protection. The value
 			is in page units and it defines how many pages prior
--- a/Documentation/l1tf.rst
+++ b/Documentation/l1tf.rst
@ -0,0 +1,610 @@
+L1TF - L1 Terminal Fault
+========================
+
+L1 Terminal Fault is a hardware vulnerability which allows unprivileged
+speculative access to data which is available in the Level 1 Data Cache
+when the page table entry controlling the virtual address, which is used
+for the access, has the Present bit cleared or other reserved bits set.
+
+Affected processors
+-------------------
+
+This vulnerability affects a wide range of Intel processors. The
+vulnerability is not present on:
+
+   - Processors from AMD, Centaur and other non Intel vendors
+
+   - Older processor models, where the CPU family is < 6
+
+   - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
+     Penwell, Pineview, Silvermont, Airmont, Merrifield)
+
+   - The Intel XEON PHI family
+
+   - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
+     IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
+     by the Meltdown vulnerability either. These CPUs should become
+     available by end of 2018.
+
+Whether a processor is affected or not can be read out from the L1TF
+vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entries are related to the L1TF vulnerability:
+
+   =============  =================  ==============================
+   CVE-2018-3615  L1 Terminal Fault  SGX related aspects
+   CVE-2018-3620  L1 Terminal Fault  OS, SMM related aspects
+   CVE-2018-3646  L1 Terminal Fault  Virtualization related aspects
+   =============  =================  ==============================
+
+Problem
+-------
+
+If an instruction accesses a virtual address for which the relevant page
+table entry (PTE) has the Present bit cleared or other reserved bits set,
+then speculative execution ignores the invalid PTE and loads the referenced
+data if it is present in the Level 1 Data Cache, as if the page referenced
+by the address bits in the PTE was still present and accessible.
+
+While this is a purely speculative mechanism and the instruction will raise
+a page fault when it is retired eventually, the pure act of loading the
+data and making it available to other speculative instructions opens up the
+opportunity for side channel attacks to unprivileged malicious code,
+similar to the Meltdown attack.
+
+While Meltdown breaks the user space to kernel space protection, L1TF
+allows to attack any physical memory address in the system and the attack
+works across all protection domains. It allows an attack of SGX and also
+works from inside virtual machines because the speculation bypasses the
+extended page table (EPT) protection mechanism.
+
+
+Attack scenarios
+----------------
+
+1. Malicious user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   Operating Systems store arbitrary information in the address bits of a
+   PTE which is marked non present. This allows a malicious user space
+   application to attack the physical memory to which these PTEs resolve.
+   In some cases user-space can maliciously influence the information
+   encoded in the address bits of the PTE, thus making attacks more
+   deterministic and more practical.
+
+   The Linux kernel contains a mitigation for this attack vector, PTE
+   inversion, which is permanently enabled and has no performance
+   impact. The kernel ensures that the address bits of PTEs, which are not
+   marked present, never point to cacheable physical memory space.
+
+   A system with an up to date kernel is protected against attacks from
+   malicious user space applications.
+
+2. Malicious guest in a virtual machine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The fact that L1TF breaks all domain protections allows malicious guest
+   OSes, which can control the PTEs directly, and malicious guest user
+   space applications, which run on an unprotected guest kernel lacking the
+   PTE inversion mitigation for L1TF, to attack physical host memory.
+
+   A special aspect of L1TF in the context of virtualization is symmetric
+   multi threading (SMT). The Intel implementation of SMT is called
+   HyperThreading. The fact that Hyperthreads on the affected processors
+   share the L1 Data Cache (L1D) is important for this. As the flaw allows
+   only to attack data which is present in L1D, a malicious guest running
+   on one Hyperthread can attack the data which is brought into the L1D by
+   the context which runs on the sibling Hyperthread of the same physical
+   core. This context can be host OS, host user space or a different guest.
+
+   If the processor does not support Extended Page Tables, the attack is
+   only possible, when the hypervisor does not sanitize the content of the
+   effective (shadow) page tables.
+
+   While solutions exist to mitigate these attack vectors fully, these
+   mitigations are not enabled by default in the Linux kernel because they
+   can affect performance significantly. The kernel provides several
+   mechanisms which can be utilized to address the problem depending on the
+   deployment scenario. The mitigations, their protection scope and impact
+   are described in the next sections.
+
+   The default mitigations and the rationale for choosing them are explained
+   at the end of this document. See :ref:`default_mitigations`.
+
+.. _l1tf_sys_info:
+
+L1TF system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current L1TF
+status of the system: whether the system is vulnerable, and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/l1tf
+
+The possible values in this file are:
+
+  ===========================   ===============================
+  'Not affected'		The processor is not vulnerable
+  'Mitigation: PTE Inversion'	The host protection is active
+  ===========================   ===============================
+
+If KVM/VMX is enabled and the processor is vulnerable then the following
+information is appended to the 'Mitigation: PTE Inversion' part:
+
+  - SMT status:
+
+    =====================  ================
+    'VMX: SMT vulnerable'  SMT is enabled
+    'VMX: SMT disabled'    SMT is disabled
+    =====================  ================
+
+  - L1D Flush mode:
+
+    ================================  ====================================
+    'L1D vulnerable'		      L1D flushing is disabled
+
+    'L1D conditional cache flushes'   L1D flush is conditionally enabled
+
+    'L1D cache flushes'		      L1D flush is unconditionally enabled
+    ================================  ====================================
+
+The resulting grade of protection is discussed in the following sections.
+
+
+Host mitigation mechanism
+-------------------------
+
+The kernel is unconditionally protected against L1TF attacks from malicious
+user space running on the host.
+
+
+Guest mitigation mechanisms
+---------------------------
+
+.. _l1d_flush:
+
+1. L1D flush on VMENTER
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   To make sure that a guest cannot attack data which is present in the L1D
+   the hypervisor flushes the L1D before entering the guest.
+
+   Flushing the L1D evicts not only the data which should not be accessed
+   by a potentially malicious guest, it also flushes the guest
+   data. Flushing the L1D has a performance impact as the processor has to
+   bring the flushed guest data back into the L1D. Depending on the
+   frequency of VMEXIT/VMENTER and the type of computations in the guest
+   performance degradation in the range of 1% to 50% has been observed. For
+   scenarios where guest VMEXIT/VMENTER are rare the performance impact is
+   minimal. Virtio and mechanisms like posted interrupts are designed to
+   confine the VMEXITs to a bare minimum, but specific configurations and
+   application scenarios might still suffer from a high VMEXIT rate.
+
+   The kernel provides two L1D flush modes:
+    - conditional ('cond')
+    - unconditional ('always')
+
+   The conditional mode avoids L1D flushing after VMEXITs which execute
+   only audited code paths before the corresponding VMENTER. These code
+   paths have been verified that they cannot expose secrets or other
+   interesting data to an attacker, but they can leak information about the
+   address space layout of the hypervisor.
+
+   Unconditional mode flushes L1D on all VMENTER invocations and provides
+   maximum protection. It has a higher overhead than the conditional
+   mode. The overhead cannot be quantified correctly as it depends on the
+   workload scenario and the resulting number of VMEXITs.
+
+   The general recommendation is to enable L1D flush on VMENTER. The kernel
+   defaults to conditional mode on affected processors.
+
+   **Note**, that L1D flush does not prevent the SMT problem because the
+   sibling thread will also bring back its data into the L1D which makes it
+   attackable again.
+
+   L1D flush can be controlled by the administrator via the kernel command
+   line and sysfs control files. See :ref:`mitigation_control_command_line`
+   and :ref:`mitigation_control_kvm`.
+
+.. _guest_confinement:
+
+2. Guest VCPU confinement to dedicated physical cores
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   To address the SMT problem, it is possible to make a guest or a group of
+   guests affine to one or more physical cores. The proper mechanism for
+   that is to utilize exclusive cpusets to ensure that no other guest or
+   host tasks can run on these cores.
+
+   If only a single guest or related guests run on sibling SMT threads on
+   the same physical core then they can only attack their own memory and
+   restricted parts of the host memory.
+
+   Host memory is attackable, when one of the sibling SMT threads runs in
+   host OS (hypervisor) context and the other in guest context. The amount
+   of valuable information from the host OS context depends on the context
+   which the host OS executes, i.e. interrupts, soft interrupts and kernel
+   threads. The amount of valuable data from these contexts cannot be
+   declared as non-interesting for an attacker without deep inspection of
+   the code.
+
+   **Note**, that assigning guests to a fixed set of physical cores affects
+   the ability of the scheduler to do load balancing and might have
+   negative effects on CPU utilization depending on the hosting
+   scenario. Disabling SMT might be a viable alternative for particular
+   scenarios.
+
+   For further information about confining guests to a single or to a group
+   of cores consult the cpusets documentation:
+
+   https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
+
+.. _interrupt_isolation:
+
+3. Interrupt affinity
+^^^^^^^^^^^^^^^^^^^^^
+
+   Interrupts can be made affine to logical CPUs. This is not universally
+   true because there are types of interrupts which are truly per CPU
+   interrupts, e.g. the local timer interrupt. Aside of that multi queue
+   devices affine their interrupts to single CPUs or groups of CPUs per
+   queue without allowing the administrator to control the affinities.
+
+   Moving the interrupts, which can be affinity controlled, away from CPUs
+   which run untrusted guests, reduces the attack vector space.
+
+   Whether the interrupts with are affine to CPUs, which run untrusted
+   guests, provide interesting data for an attacker depends on the system
+   configuration and the scenarios which run on the system. While for some
+   of the interrupts it can be assumed that they won't expose interesting
+   information beyond exposing hints about the host OS memory layout, there
+   is no way to make general assumptions.
+
+   Interrupt affinity can be controlled by the administrator via the
+   /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
+   available at:
+
+   https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
+
+.. _smt_control:
+
+4. SMT control
+^^^^^^^^^^^^^^
+
+   To prevent the SMT issues of L1TF it might be necessary to disable SMT
+   completely. Disabling SMT can have a significant performance impact, but
+   the impact depends on the hosting scenario and the type of workloads.
+   The impact of disabling SMT needs also to be weighted against the impact
+   of other mitigation solutions like confining guests to dedicated cores.
+
+   The kernel provides a sysfs interface to retrieve the status of SMT and
+   to control it. It also provides a kernel command line interface to
+   control SMT.
+
+   The kernel command line interface consists of the following options:
+
+     =========== ==========================================================
+     nosmt	 Affects the bring up of the secondary CPUs during boot. The
+		 kernel tries to bring all present CPUs online during the
+		 boot process. "nosmt" makes sure that from each physical
+		 core only one - the so called primary (hyper) thread is
+		 activated. Due to a design flaw of Intel processors related
+		 to Machine Check Exceptions the non primary siblings have
+		 to be brought up at least partially and are then shut down
+		 again.  "nosmt" can be undone via the sysfs interface.
+
+     nosmt=force Has the same effect as "nosmt" but it does not allow to
+		 undo the SMT disable via the sysfs interface.
+     =========== ==========================================================
+
+   The sysfs interface provides two files:
+
+   - /sys/devices/system/cpu/smt/control
+   - /sys/devices/system/cpu/smt/active
+
+   /sys/devices/system/cpu/smt/control:
+
+     This file allows to read out the SMT control state and provides the
+     ability to disable or (re)enable SMT. The possible states are:
+
+	==============  ===================================================
+	on		SMT is supported by the CPU and enabled. All
+			logical CPUs can be onlined and offlined without
+			restrictions.
+
+	off		SMT is supported by the CPU and disabled. Only
+			the so called primary SMT threads can be onlined
+			and offlined without restrictions. An attempt to
+			online a non-primary sibling is rejected
+
+	forceoff	Same as 'off' but the state cannot be controlled.
+			Attempts to write to the control file are rejected.
+
+	notsupported	The processor does not support SMT. It's therefore
+			not affected by the SMT implications of L1TF.
+			Attempts to write to the control file are rejected.
+	==============  ===================================================
+
+     The possible states which can be written into this file to control SMT
+     state are:
+
+     - on
+     - off
+     - forceoff
+
+   /sys/devices/system/cpu/smt/active:
+
+     This file reports whether SMT is enabled and active, i.e. if on any
+     physical core two or more sibling threads are online.
+
+   SMT control is also possible at boot time via the l1tf kernel command
+   line parameter in combination with L1D flush control. See
+   :ref:`mitigation_control_command_line`.
+
+5. Disabling EPT
+^^^^^^^^^^^^^^^^
+
+  Disabling EPT for virtual machines provides full mitigation for L1TF even
+  with SMT enabled, because the effective page tables for guests are
+  managed and sanitized by the hypervisor. Though disabling EPT has a
+  significant performance impact especially when the Meltdown mitigation
+  KPTI is enabled.
+
+  EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
+
+There is ongoing research and development for new mitigation mechanisms to
+address the performance impact of disabling SMT or EPT.
+
+.. _mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the L1TF mitigations at boot
+time with the option "l1tf=". The valid arguments for this option are:
+
+  ============  =============================================================
+  full		Provides all available mitigations for the L1TF
+		vulnerability. Disables SMT and enables all mitigations in
+		the hypervisors, i.e. unconditional L1D flushing
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  full,force	Same as 'full', but disables SMT and L1D flush runtime
+		control. Implies the 'nosmt=force' command line option.
+		(i.e. sysfs control of SMT is disabled.)
+
+  flush		Leaves SMT enabled and enables the default hypervisor
+		mitigation, i.e. conditional L1D flushing
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  flush,nosmt	Disables SMT and enables the default hypervisor mitigation,
+		i.e. conditional L1D flushing.
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  flush,nowarn	Same as 'flush', but hypervisors will not warn when a VM is
+		started in a potentially insecure configuration.
+
+  off		Disables hypervisor mitigations and doesn't emit any
+		warnings.
+  ============  =============================================================
+
+The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
+
+
+.. _mitigation_control_kvm:
+
+Mitigation control for KVM - module parameter
+-------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism, flushing the L1D cache when
+entering a guest, can be controlled with a module parameter.
+
+The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
+following arguments:
+
+  ============  ==============================================================
+  always	L1D cache flush on every VMENTER.
+
+  cond		Flush L1D on VMENTER only when the code between VMEXIT and
+		VMENTER can leak host memory which is considered
+		interesting for an attacker. This still can leak host memory
+		which allows e.g. to determine the hosts address space layout.
+
+  never		Disables the mitigation
+  ============  ==============================================================
+
+The parameter can be provided on the kernel command line, as a module
+parameter when loading the modules and at runtime modified via the sysfs
+file:
+
+/sys/module/kvm_intel/parameters/vmentry_l1d_flush
+
+The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
+line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
+module parameter is ignored and writes to the sysfs file are rejected.
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The system is protected by the kernel unconditionally and no further
+   action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   If the guest comes from a trusted source and the guest OS kernel is
+   guaranteed to have the L1TF mitigations in place the system is fully
+   protected against L1TF and no further action is required.
+
+   To avoid the overhead of the default L1D flushing on VMENTER the
+   administrator can disable the flushing via the kernel command line and
+   sysfs control files. See :ref:`mitigation_control_command_line` and
+   :ref:`mitigation_control_kvm`.
+
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+3.1. SMT not supported or disabled
+""""""""""""""""""""""""""""""""""
+
+  If SMT is not supported by the processor or disabled in the BIOS or by
+  the kernel, it's only required to enforce L1D flushing on VMENTER.
+
+  Conditional L1D flushing is the default behaviour and can be tuned. See
+  :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
+
+3.2. EPT not supported or disabled
+""""""""""""""""""""""""""""""""""
+
+  If EPT is not supported by the processor or disabled in the hypervisor,
+  the system is fully protected. SMT can stay enabled and L1D flushing on
+  VMENTER is not required.
+
+  EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
+
+3.3. SMT and EPT supported and active
+"""""""""""""""""""""""""""""""""""""
+
+  If SMT and EPT are supported and active then various degrees of
+  mitigations can be employed:
+
+  - L1D flushing on VMENTER:
+
+    L1D flushing on VMENTER is the minimal protection requirement, but it
+    is only potent in combination with other mitigation methods.
+
+    Conditional L1D flushing is the default behaviour and can be tuned. See
+    :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
+
+  - Guest confinement:
+
+    Confinement of guests to a single or a group of physical cores which
+    are not running any other processes, can reduce the attack surface
+    significantly, but interrupts, soft interrupts and kernel threads can
+    still expose valuable data to a potential attacker. See
+    :ref:`guest_confinement`.
+
+  - Interrupt isolation:
+
+    Isolating the guest CPUs from interrupts can reduce the attack surface
+    further, but still allows a malicious guest to explore a limited amount
+    of host physical memory. This can at least be used to gain knowledge
+    about the host address space layout. The interrupts which have a fixed
+    affinity to the CPUs which run the untrusted guests can depending on
+    the scenario still trigger soft interrupts and schedule kernel threads
+    which might expose valuable information. See
+    :ref:`interrupt_isolation`.
+
+The above three mitigation methods combined can provide protection to a
+certain degree, but the risk of the remaining attack surface has to be
+carefully analyzed. For full protection the following methods are
+available:
+
+  - Disabling SMT:
+
+    Disabling SMT and enforcing the L1D flushing provides the maximum
+    amount of protection. This mitigation is not depending on any of the
+    above mitigation methods.
+
+    SMT control and L1D flushing can be tuned by the command line
+    parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
+    time with the matching sysfs control files. See :ref:`smt_control`,
+    :ref:`mitigation_control_command_line` and
+    :ref:`mitigation_control_kvm`.
+
+  - Disabling EPT:
+
+    Disabling EPT provides the maximum amount of protection as well. It is
+    not depending on any of the above mitigation methods. SMT can stay
+    enabled and L1D flushing is not required, but the performance impact is
+    significant.
+
+    EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
+    parameter.
+
+3.4. Nested virtual machines
+""""""""""""""""""""""""""""
+
+When nested virtualization is in use, three operating systems are involved:
+the bare metal hypervisor, the nested hypervisor and the nested virtual
+machine.  VMENTER operations from the nested hypervisor into the nested
+guest will always be processed by the bare metal hypervisor. If KVM is the
+bare metal hypervisor it wiil:
+
+ - Flush the L1D cache on every switch from the nested hypervisor to the
+   nested virtual machine, so that the nested hypervisor's secrets are not
+   exposed to the nested virtual machine;
+
+ - Flush the L1D cache on every switch from the nested virtual machine to
+   the nested hypervisor; this is a complex operation, and flushing the L1D
+   cache avoids that the bare metal hypervisor's secrets are exposed to the
+   nested virtual machine;
+
+ - Instruct the nested hypervisor to not perform any L1D cache flush. This
+   is an optimization to avoid double L1D flushing.
+
+
+.. _default_mitigations:
+
+Default mitigations
+-------------------
+
+  The kernel default mitigations for vulnerable processors are:
+
+  - PTE inversion to protect against malicious user space. This is done
+    unconditionally and cannot be controlled.
+
+  - L1D conditional flushing on VMENTER when EPT is enabled for
+    a guest.
+
+  The kernel does not by default enforce the disabling of SMT, which leaves
+  SMT systems vulnerable when running untrusted guests with EPT enabled.
+
+  The rationale for this choice is:
+
+  - Force disabling SMT can break existing setups, especially with
+    unattended updates.
+
+  - If regular users run untrusted guests on their machine, then L1TF is
+    just an add on to other malware which might be embedded in an untrusted
+    guest, e.g. spam-bots or attacks on the local network.
+
+    There is no technical way to prevent a user from running untrusted code
+    on their machines blindly.
+
+  - It's technically extremely unlikely and from today's knowledge even
+    impossible that L1TF can be exploited via the most popular attack
+    mechanisms like JavaScript because these mechanisms have no way to
+    control PTEs. If this would be possible and not other mitigation would
+    be possible, then the default might be different.
+
+  - The administrators of cloud and hosting setups have to carefully
+    analyze the risk for their scenarios and make the appropriate
+    mitigation choices, which might even vary across their deployed
+    machines and also result in other changes of their overall setup.
+    There is no way for the kernel to provide a sensible default for this
+    kind of scenarios.
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@ -279,11 +279,10 @@ struct clk:

 	%pC	pll1
 	%pCn	pll1
-	%pCr	1560000000

 	For printing struct clk structures. '%pC' and '%pCn' print the name
 	(Common Clock Framework) or address (legacy clock framework) of the
-	structure; '%pCr' prints the current clock rate.
+	structure.

 	Passed by reference.

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@ -122,14 +122,15 @@ KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
 privileged user (CAP_SYS_ADMIN).


-4.3 KVM_GET_MSR_INDEX_LIST
+4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST

-Capability: basic
+Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
 Architectures: x86
-Type: system
+Type: system ioctl
 Parameters: struct kvm_msr_list (in/out)
 Returns: 0 on success; -1 on error
 Errors:
+  EFAULT:    the msr index list cannot be read from or written to
  E2BIG:     the msr index list is to be to fit in the array specified by
             the user.

@ -138,16 +139,23 @@ struct kvm_msr_list {
 	__u32 indices[0];
 };

-This ioctl returns the guest msrs that are supported.  The list varies
-by kvm version and host processor, but does not change otherwise.  The
-user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in
-the indices array with their numbers.
+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
+varies by kvm version and host processor, but does not change otherwise.

 Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
 not returned in the MSR list, as different vcpus can have a different number
 of banks, as set via the KVM_X86_SETUP_MCE ioctl.

+KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
+to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
+and processor features that are exposed via MSRs (e.g., VMX capabilities).
+This list also varies by kvm version and host processor, but does not change
+otherwise.
+

 4.4 KVM_CHECK_EXTENSION

@ -474,14 +482,22 @@ Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.

 4.18 KVM_GET_MSRS

-Capability: basic
+Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
 Architectures: x86
-Type: vcpu ioctl
+Type: system ioctl, vcpu ioctl
 Parameters: struct kvm_msrs (in/out)
-Returns: 0 on success, -1 on error
+Returns: number of msrs successfully returned;
+        -1 on error

+When used as a system ioctl:
+Reads the values of MSR-based features that are available for the VM.  This
+is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
+The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
+in a system ioctl.
+
+When used as a vcpu ioctl:
 Reads model-specific registers from the vcpu.  Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST.
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.

 struct kvm_msrs {
 	__u32 nmsrs; /* number of msrs in entries */
--- a/6
+++ b/6
@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 110
+SUBLEVEL = 126
 EXTRAVERSION =
 NAME = Roaring Lionus

@ -417,7 +417,8 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS

 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
+export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@ -635,6 +636,7 @@ KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, attribute-alias)

 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
 KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -5,6 +5,9 @@
 config KEXEC_CORE
 	bool

+config HOTPLUG_SMT
+	bool
+
 config OPROFILE
 	tristate "OProfile system profiling"
 	depends on PROFILING
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@ -526,24 +526,19 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
 SYSCALL_DEFINE1(osf_utsname, char __user *, name)
 {
 	int error;
+	char tmp[5 * 32];

 	down_read(&uts_sem);
-	error = -EFAULT;
-	if (copy_to_user(name + 0, utsname()->sysname, 32))
-		goto out;
-	if (copy_to_user(name + 32, utsname()->nodename, 32))
-		goto out;
-	if (copy_to_user(name + 64, utsname()->release, 32))
-		goto out;
-	if (copy_to_user(name + 96, utsname()->version, 32))
-		goto out;
-	if (copy_to_user(name + 128, utsname()->machine, 32))
-		goto out;
+	memcpy(tmp + 0 * 32, utsname()->sysname, 32);
+	memcpy(tmp + 1 * 32, utsname()->nodename, 32);
+	memcpy(tmp + 2 * 32, utsname()->release, 32);
+	memcpy(tmp + 3 * 32, utsname()->version, 32);
+	memcpy(tmp + 4 * 32, utsname()->machine, 32);
+	up_read(&uts_sem);

-	error = 0;
- out:
-	up_read(&uts_sem);	
-	return error;
+	if (copy_to_user(name, tmp, sizeof(tmp)))
+		return -EFAULT;
+	return 0;
 }

 SYSCALL_DEFINE0(getpagesize)
@ -561,24 +556,22 @@ SYSCALL_DEFINE0(getdtablesize)
 */
 SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen)
 {
-	unsigned len;
-	int i;
+	int len, err = 0;
+	char *kname;
+	char tmp[32];

-	if (!access_ok(VERIFY_WRITE, name, namelen))
-		return -EFAULT;
-
-	len = namelen;
-	if (len > 32)
-		len = 32;
+	if (namelen < 0 || namelen > 32)
+		namelen = 32;

 	down_read(&uts_sem);
-	for (i = 0; i < len; ++i) {
-		__put_user(utsname()->domainname[i], name + i);
-		if (utsname()->domainname[i] == '\0')
-			break;
-	}
+	kname = utsname()->domainname;
+	len = strnlen(kname, namelen);
+	len = min(len + 1, namelen);
+	memcpy(tmp, kname, len);
 	up_read(&uts_sem);

+	if (copy_to_user(name, tmp, len))
+		return -EFAULT;
 	return 0;
 }

@ -741,13 +734,14 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
 	};
 	unsigned long offset;
 	const char *res;
-	long len, err = -EINVAL;
+	long len;
+	char tmp[__NEW_UTS_LEN + 1];

 	offset = command-1;
 	if (offset >= ARRAY_SIZE(sysinfo_table)) {
 		/* Digital UNIX has a few unpublished interfaces here */
 		printk("sysinfo(%d)", command);
-		goto out;
+		return -EINVAL;
 	}

 	down_read(&uts_sem);
@ -755,13 +749,11 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
 	len = strlen(res)+1;
 	if ((unsigned long)len > (unsigned long)count)
 		len = count;
-	if (copy_to_user(buf, res, len))
-		err = -EFAULT;
-	else
-		err = 0;
+	memcpy(tmp, res, len);
 	up_read(&uts_sem);
- out:
-	return err;
+	if (copy_to_user(buf, tmp, len))
+		return -EFAULT;
+	return 0;
 }

 SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@ -18,7 +18,7 @@ endif

 KBUILD_DEFCONFIG := nsim_700_defconfig

-cflags-y	+= -fno-common -pipe -fno-builtin -D__linux__
+cflags-y	+= -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
 cflags-$(CONFIG_ISA_ARCOMPACT)	+= -mA7
 cflags-$(CONFIG_ISA_ARCV2)	+= -mcpu=archs

@ -141,16 +141,3 @@ dtbs: scripts

 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
-
-# Hacks to enable final link due to absence of link-time branch relexation
-# and gcc choosing optimal(shorter) branches at -O3
-#
-# vineetg Feb 2010: -mlong-calls switched off for overall kernel build
-# However lib/decompress_inflate.o (.init.text) calls
-# zlib_inflate_workspacesize (.text) causing relocation errors.
-# Thus forcing all exten calls in this file to be long calls
-export CFLAGS_decompress_inflate.o = -mmedium-calls
-export CFLAGS_initramfs.o = -mmedium-calls
-ifdef CONFIG_SMP
-export CFLAGS_core.o = -mmedium-calls
-endif
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@ -17,8 +17,11 @@
 #ifndef __ASM_ARC_UDELAY_H
 #define __ASM_ARC_UDELAY_H

+#include <asm-generic/types.h>
 #include <asm/param.h>		/* HZ */

+extern unsigned long loops_per_jiffy;
+
 static inline void __delay(unsigned long loops)
 {
 	__asm__ __volatile__(
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@ -34,9 +34,7 @@ struct machine_desc {
 	const char		*name;
 	const char		**dt_compat;
 	void			(*init_early)(void);
-#ifdef CONFIG_SMP
 	void			(*init_per_cpu)(unsigned int);
-#endif
 	void			(*init_machine)(void);
 	void			(*init_late)(void);

--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@ -105,7 +105,7 @@ typedef pte_t * pgtable_t;
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))

 /* Default Permissions for stack/heaps pages (Non Executable) */
-#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE)
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)

 #define WANT_PAGE_VIRTUAL   1

--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@ -378,7 +378,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,

 /* Decode a PTE containing swap "identifier "into constituents */
 #define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val << 13)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)

 /* NOPs, to keep generic kernel happy */
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@ -31,10 +31,10 @@ void __init init_IRQ(void)
 	/* a SMP H/w block could do IPI IRQ request here */
 	if (plat_smp_ops.init_per_cpu)
 		plat_smp_ops.init_per_cpu(smp_processor_id());
+#endif

 	if (machine_desc->init_per_cpu)
 		machine_desc->init_per_cpu(smp_processor_id());
-#endif
 }

 /*
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@ -44,7 +44,8 @@ SYSCALL_DEFINE0(arc_gettls)
 SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 {
 	struct pt_regs *regs = current_pt_regs();
-	int uval = -EFAULT;
+	u32 uval;
+	int ret;

 	/*
 	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
@ -57,23 +58,47 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 	/* Z indicates to userspace if operation succeded */
 	regs->status32 &= ~STATUS_Z_MASK;

-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
+	ret = access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr));
+	if (!ret)
+		 goto fail;

+again:
 	preempt_disable();

-	if (__get_user(uval, uaddr))
-		goto done;
+	ret = __get_user(uval, uaddr);
+	if (ret)
+		 goto fault;

-	if (uval == expected) {
-		if (!__put_user(new, uaddr))
-			regs->status32 |= STATUS_Z_MASK;
-	}
+	if (uval != expected)
+		 goto out;

-done:
+	ret = __put_user(new, uaddr);
+	if (ret)
+		 goto fault;
+
+	regs->status32 |= STATUS_Z_MASK;
+
+out:
+	preempt_enable();
+	return uval;
+
+fault:
 	preempt_enable();

-	return uval;
+	if (unlikely(ret != -EFAULT))
+		 goto fail;
+
+	down_read(&current->mm->mmap_sem);
+	ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
+			       FAULT_FLAG_WRITE, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (likely(!ret))
+		 goto again;
+
+fail:
+	force_sig(SIGSEGV, current);
+	return ret;
 }

 void arch_cpu_idle(void)
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@ -840,7 +840,7 @@ void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 		      unsigned long pfn)
 {
-	unsigned int paddr = pfn << PAGE_SHIFT;
+	phys_addr_t paddr = pfn << PAGE_SHIFT;

 	u_vaddr &= PAGE_MASK;

@ -860,8 +860,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 		     unsigned long u_vaddr)
 {
 	/* TBD: do we really need to clear the kernel mapping */
-	__flush_dcache_page(page_address(page), u_vaddr);
-	__flush_dcache_page(page_address(page), page_address(page));
+	__flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
+	__flush_dcache_page((phys_addr_t)page_address(page),
+			    (phys_addr_t)page_address(page));

 }

--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@ -21,6 +21,7 @@
 #error "Incorrect ctop.h include"
 #endif

+#include <linux/types.h>
 #include <soc/nps/common.h>

 /* core auxiliary registers */
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@ -74,6 +74,11 @@
 	};
 };

+/* Table Table 5-79 of the TRM shows 480ab000 is reserved */
+&usb_otg_hs {
+	status = "disabled";
+};
+
 &iva {
 	status = "disabled";
 };
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@ -533,6 +533,8 @@

 		touchscreen-size-x = <480>;
 		touchscreen-size-y = <272>;
+
+		wakeup-source;
 	};

 	tlv320aic3106: tlv320aic3106@1b {
--- a/arch/arm/boot/dts/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
@ -128,7 +128,7 @@
 			reg = <0x18008000 0x100>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 85 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
@ -157,7 +157,7 @@
 			reg = <0x1800b000 0x100>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 86 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
@ -168,7 +168,7 @@

 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 0>;
-			interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>;
+			interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;

 			linux,pci-domain = <0>;

@ -190,10 +190,10 @@
 				compatible = "brcm,iproc-msi";
 				msi-controller;
 				interrupt-parent = <&gic>;
-				interrupts = <GIC_SPI 96 IRQ_TYPE_NONE>,
-					     <GIC_SPI 97 IRQ_TYPE_NONE>,
-					     <GIC_SPI 98 IRQ_TYPE_NONE>,
-					     <GIC_SPI 99 IRQ_TYPE_NONE>;
+				interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
 			};
 		};

@ -203,7 +203,7 @@

 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 0>;
-			interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>;
+			interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;

 			linux,pci-domain = <1>;

@ -225,10 +225,10 @@
 				compatible = "brcm,iproc-msi";
 				msi-controller;
 				interrupt-parent = <&gic>;
-				interrupts = <GIC_SPI 102 IRQ_TYPE_NONE>,
-					     <GIC_SPI 103 IRQ_TYPE_NONE>,
-					     <GIC_SPI 104 IRQ_TYPE_NONE>,
-					     <GIC_SPI 105 IRQ_TYPE_NONE>;
+				interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
 			};
 		};

--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@ -288,7 +288,7 @@
 			reg = <0x38000 0x50>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 89 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 		};

@ -375,7 +375,7 @@

 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0 0 0 0 &gic GIC_SPI 131 IRQ_TYPE_NONE>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;

 		linux,pci-domain = <0>;

@ -397,10 +397,10 @@
 			compatible = "brcm,iproc-msi";
 			msi-controller;
 			interrupt-parent = <&gic>;
-			interrupts = <GIC_SPI 127 IRQ_TYPE_NONE>,
-				     <GIC_SPI 128 IRQ_TYPE_NONE>,
-				     <GIC_SPI 129 IRQ_TYPE_NONE>,
-				     <GIC_SPI 130 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>;
 			brcm,pcie-msi-inten;
 		};
 	};
@ -411,7 +411,7 @@

 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0 0 0 0 &gic GIC_SPI 137 IRQ_TYPE_NONE>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>;

 		linux,pci-domain = <1>;

@ -433,10 +433,10 @@
 			compatible = "brcm,iproc-msi";
 			msi-controller;
 			interrupt-parent = <&gic>;
-			interrupts = <GIC_SPI 133 IRQ_TYPE_NONE>,
-				     <GIC_SPI 134 IRQ_TYPE_NONE>,
-				     <GIC_SPI 135 IRQ_TYPE_NONE>,
-				     <GIC_SPI 136 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>;
 			brcm,pcie-msi-inten;
 		};
 	};
@ -447,7 +447,7 @@

 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
-		interrupt-map = <0 0 0 0 &gic GIC_SPI 143 IRQ_TYPE_NONE>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;

 		linux,pci-domain = <2>;

@ -469,10 +469,10 @@
 			compatible = "brcm,iproc-msi";
 			msi-controller;
 			interrupt-parent = <&gic>;
-			interrupts = <GIC_SPI 139 IRQ_TYPE_NONE>,
-				     <GIC_SPI 140 IRQ_TYPE_NONE>,
-				     <GIC_SPI 141 IRQ_TYPE_NONE>,
-				     <GIC_SPI 142 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>;
 			brcm,pcie-msi-inten;
 		};
 	};
--- a/arch/arm/boot/dts/da850.dtsi
+++ b/arch/arm/boot/dts/da850.dtsi
@ -377,11 +377,7 @@
 			gpio-controller;
 			#gpio-cells = <2>;
 			reg = <0x226000 0x1000>;
-			interrupts = <42 IRQ_TYPE_EDGE_BOTH
-				43 IRQ_TYPE_EDGE_BOTH 44 IRQ_TYPE_EDGE_BOTH
-				45 IRQ_TYPE_EDGE_BOTH 46 IRQ_TYPE_EDGE_BOTH
-				47 IRQ_TYPE_EDGE_BOTH 48 IRQ_TYPE_EDGE_BOTH
-				49 IRQ_TYPE_EDGE_BOTH 50 IRQ_TYPE_EDGE_BOTH>;
+			interrupts = <42 43 44 45 46 47 48 49 50>;
 			ti,ngpio = <144>;
 			ti,davinci-gpio-unbanked = <0>;
 			status = "disabled";
--- a/arch/arm/boot/dts/emev2.dtsi
+++ b/arch/arm/boot/dts/emev2.dtsi
@ -30,13 +30,13 @@
 		#address-cells = <1>;
 		#size-cells = <0>;

-		cpu@0 {
+		cpu0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
 			clock-frequency = <533000000>;
 		};
-		cpu@1 {
+		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
@ -56,6 +56,7 @@
 		compatible = "arm,cortex-a9-pmu";
 		interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>;
 	};

 	clocks@e0110000 {
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@ -157,7 +157,7 @@
 					clocks = <&clks IMX6Q_CLK_ECSPI5>,
 						 <&clks IMX6Q_CLK_ECSPI5>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 11 7 1>, <&sdma 12 7 2>;
+					dmas = <&sdma 11 8 1>, <&sdma 12 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};
--- a/arch/arm/boot/dts/sh73a0.dtsi
+++ b/arch/arm/boot/dts/sh73a0.dtsi
@ -22,7 +22,7 @@
 		#address-cells = <1>;
 		#size-cells = <0>;

-		cpu@0 {
+		cpu0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
@ -30,7 +30,7 @@
 			power-domains = <&pd_a2sl>;
 			next-level-cache = <&L2>;
 		};
-		cpu@1 {
+		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
@ -89,6 +89,7 @@
 		compatible = "arm,cortex-a9-pmu";
 		interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&cpu0>, <&cpu1>;
 	};

 	cmt1: timer@e6138000 {
--- a/arch/arm/boot/dts/tegra30-cardhu.dtsi
+++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi
@ -205,6 +205,7 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			reg = <0x70>;
+			reset-gpio = <&gpio TEGRA_GPIO(BB, 0) GPIO_ACTIVE_LOW>;
 		};
 	};

--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@ -145,9 +145,11 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_ULPI=y
 CONFIG_NOP_USB_XCEIV=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_ETH=m
+CONFIG_USB_ULPI_BUS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@ -271,6 +271,7 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_ULPI=y
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_FTDI_SIO=m
@ -307,6 +308,7 @@ CONFIG_USB_GADGETFS=m
 CONFIG_USB_FUNCTIONFS=m
 CONFIG_USB_MASS_STORAGE=m
 CONFIG_USB_G_SERIAL=m
+CONFIG_USB_ULPI_BUS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@ -76,7 +76,7 @@ extern int kgdb_fault_expected;

 #define KGDB_MAX_NO_CPUS	1
 #define BUFMAX			400
-#define NUMREGBYTES		(DBG_MAX_REG_NUM << 2)
+#define NUMREGBYTES		(GDB_MAX_REGS << 2)
 #define NUMCRITREGBYTES		(32 << 2)

 #define _R0			0
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@ -327,4 +327,16 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 	return false;
 }

+#define KVM_SSBD_UNKNOWN		-1
+#define KVM_SSBD_FORCE_DISABLE		0
+#define KVM_SSBD_KERNEL		1
+#define KVM_SSBD_FORCE_ENABLE		2
+#define KVM_SSBD_MITIGATED		3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+	/* No way to detect it yet, pretend it is not there. */
+	return KVM_SSBD_UNKNOWN;
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@ -28,6 +28,13 @@
 */
 #define kern_hyp_va(kva)	(kva)

+/* Contrary to arm64, there is no need to generate a PC-relative address */
+#define hyp_symbol_addr(s)						\
+	({								\
+		typeof(s) *addr = &(s);					\
+		addr;							\
+	})
+
 /*
 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
 */
@ -249,6 +256,11 @@ static inline int kvm_map_vectors(void)
 	return 0;
 }

+static inline int hyp_map_aux_data(void)
+{
+	return 0;
+}
+
 #endif	/* !__ASSEMBLY__ */

 #endif /* __ARM_KVM_MMU_H__ */
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@ -51,8 +51,8 @@
 __asm__(".arch_extension	virt");
 #endif

+DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
 static unsigned long hyp_default_vectors;

 /* Per-CPU variable containing the currently running vcpu. */
@ -338,7 +338,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}

 	vcpu->cpu = cpu;
-	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
+	vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state);

 	kvm_arm_set_running_vcpu(vcpu);
 }
@ -1199,19 +1199,8 @@ static inline void hyp_cpu_pm_exit(void)
 }
 #endif

-static void teardown_common_resources(void)
-{
-	free_percpu(kvm_host_cpu_state);
-}
-
 static int init_common_resources(void)
 {
-	kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
-	if (!kvm_host_cpu_state) {
-		kvm_err("Cannot allocate host CPU state\n");
-		return -ENOMEM;
-	}
-
 	/* set size of VMID supported by CPU */
 	kvm_vmid_bits = kvm_get_vmid_bits();
 	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@ -1369,7 +1358,7 @@ static int init_hyp_mode(void)
 	for_each_possible_cpu(cpu) {
 		kvm_cpu_context_t *cpu_ctxt;

-		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+		cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
 		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);

 		if (err) {
@ -1378,6 +1367,12 @@ static int init_hyp_mode(void)
 		}
 	}

+	err = hyp_map_aux_data();
+	if (err) {
+		kvm_err("Cannot map host auxilary data: %d\n", err);
+		goto out_err;
+	}
+
 	kvm_info("Hyp mode initialized successfully\n");

 	return 0;
@ -1447,7 +1442,6 @@ int kvm_arch_init(void *opaque)
 out_hyp:
 	teardown_hyp_mode();
 out_err:
-	teardown_common_resources();
 	return err;
 }

--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@ -894,19 +894,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	pmd = stage2_get_pmd(kvm, cache, addr);
 	VM_BUG_ON(!pmd);

-	/*
-	 * Mapping in huge pages should only happen through a fault.  If a
-	 * page is merged into a transparent huge page, the individual
-	 * subpages of that huge page should be unmapped through MMU
-	 * notifiers before we get here.
-	 *
-	 * Merging of CompoundPages is not supported; they should become
-	 * splitting first, unmapped, merged, and mapped back in on-demand.
-	 */
-	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
 	old_pmd = *pmd;
 	if (pmd_present(old_pmd)) {
+		/*
+		 * Multiple vcpus faulting on the same PMD entry, can
+		 * lead to them sequentially updating the PMD with the
+		 * same value. Following the break-before-make
+		 * (pmd_clear() followed by tlb_flush()) process can
+		 * hinder forward progress due to refaults generated
+		 * on missing translations.
+		 *
+		 * Skip updating the page table if the entry is
+		 * unchanged.
+		 */
+		if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+			return 0;
+
+		/*
+		 * Mapping in huge pages should only happen through a
+		 * fault.  If a page is merged into a transparent huge
+		 * page, the individual subpages of that huge page
+		 * should be unmapped through MMU notifiers before we
+		 * get here.
+		 *
+		 * Merging of CompoundPages is not supported; they
+		 * should become splitting first, unmapped, merged,
+		 * and mapped back in on-demand.
+		 */
+		VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
 		pmd_clear(pmd);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {
@ -962,6 +978,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	/* Create 2nd stage page table mapping - Level 3 */
 	old_pte = *pte;
 	if (pte_present(old_pte)) {
+		/* Skip page table update if there is no change */
+		if (pte_val(old_pte) == pte_val(*new_pte))
+			return 0;
+
 		kvm_set_pte(pte, __pte(0));
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@ -403,7 +403,7 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 {
 	u32 func_id = smccc_get_function(vcpu);
-	u32 val = PSCI_RET_NOT_SUPPORTED;
+	u32 val = SMCCC_RET_NOT_SUPPORTED;
 	u32 feature;

 	switch (func_id) {
@ -415,7 +415,21 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 		switch(feature) {
 		case ARM_SMCCC_ARCH_WORKAROUND_1:
 			if (kvm_arm_harden_branch_predictor())
-				val = 0;
+				val = SMCCC_RET_SUCCESS;
+			break;
+		case ARM_SMCCC_ARCH_WORKAROUND_2:
+			switch (kvm_arm_have_ssbd()) {
+			case KVM_SSBD_FORCE_DISABLE:
+			case KVM_SSBD_UNKNOWN:
+				break;
+			case KVM_SSBD_KERNEL:
+				val = SMCCC_RET_SUCCESS;
+				break;
+			case KVM_SSBD_FORCE_ENABLE:
+			case KVM_SSBD_MITIGATED:
+				val = SMCCC_RET_NOT_REQUIRED;
+				break;
+			}
 			break;
 		}
 		break;
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@ -104,6 +104,45 @@ void omap5_erratum_workaround_801819(void)
 static inline void omap5_erratum_workaround_801819(void) { }
 #endif

+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+/*
+ * Configure ACR and enable ACTLR[0] (Enable invalidates of BTB with
+ * ICIALLU) to activate the workaround for secondary Core.
+ * NOTE: it is assumed that the primary core's configuration is done
+ * by the boot loader (kernel will detect a misconfiguration and complain
+ * if this is not done).
+ *
+ * In General Purpose(GP) devices, ACR bit settings can only be done
+ * by ROM code in "secure world" using the smc call and there is no
+ * option to update the "firmware" on such devices. This also works for
+ * High security(HS) devices, as a backup option in case the
+ * "update" is not done in the "security firmware".
+ */
+static void omap5_secondary_harden_predictor(void)
+{
+	u32 acr, acr_mask;
+
+	asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr));
+
+	/*
+	 * ACTLR[0] (Enable invalidates of BTB with ICIALLU)
+	 */
+	acr_mask = BIT(0);
+
+	/* Do we already have it done.. if yes, skip expensive smc */
+	if ((acr & acr_mask) == acr_mask)
+		return;
+
+	acr |= acr_mask;
+	omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr);
+
+	pr_debug("%s: ARM ACR setup for CVE_2017_5715 applied on CPU%d\n",
+		 __func__, smp_processor_id());
+}
+#else
+static inline void omap5_secondary_harden_predictor(void) { }
+#endif
+
 static void omap4_secondary_init(unsigned int cpu)
 {
 	/*
@ -126,6 +165,8 @@ static void omap4_secondary_init(unsigned int cpu)
 		set_cntfreq();
 		/* Configure ACR to disable streaming WA for 801819 */
 		omap5_erratum_workaround_801819();
+		/* Enable ACR to allow for ICUALLU workaround */
+		omap5_secondary_harden_predictor();
 	}

 	/*
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@ -185,7 +185,7 @@ static int pxa_irq_suspend(void)
 {
 	int i;

-	for (i = 0; i < pxa_internal_irq_nr / 32; i++) {
+	for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) {
 		void __iomem *base = irq_base(i);

 		saved_icmr[i] = __raw_readl(base + ICMR);
@ -204,7 +204,7 @@ static void pxa_irq_resume(void)
 {
 	int i;

-	for (i = 0; i < pxa_internal_irq_nr / 32; i++) {
+	for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) {
 		void __iomem *base = irq_base(i);

 		__raw_writel(saved_icmr[i], base + ICMR);
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@ -722,19 +722,28 @@ int __mark_rodata_ro(void *unused)
 	return 0;
 }

+static int kernel_set_to_readonly __read_mostly;
+
 void mark_rodata_ro(void)
 {
+	kernel_set_to_readonly = 1;
 	stop_machine(__mark_rodata_ro, NULL, NULL);
 }

 void set_kernel_text_rw(void)
 {
+	if (!kernel_set_to_readonly)
+		return;
+
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
 				current->active_mm);
 }

 void set_kernel_text_ro(void)
 {
+	if (!kernel_set_to_readonly)
+		return;
+
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
 				current->active_mm);
 }
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@ -777,6 +777,15 @@ config HARDEN_BRANCH_PREDICTOR

 	  If unsure, say Y.

+config ARM64_SSBD
+	bool "Speculative Store Bypass Disable" if EXPERT
+	default y
+	help
+	  This enables mitigation of the bypassing of previous stores
+	  by speculative loads.
+
+	  If unsure, say Y.
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on COMPAT
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@ -393,7 +393,7 @@
 			reg = <0x66080000 0x100>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 394 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 394 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
@ -421,7 +421,7 @@
 			reg = <0x660b0000 0x100>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 395 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 395 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@ -319,6 +319,8 @@ CONFIG_GPIO_MAX732X=y
 CONFIG_GPIO_PCA953X=y
 CONFIG_GPIO_PCA953X_IRQ=y
 CONFIG_GPIO_MAX77620=y
+CONFIG_POWER_AVS=y
+CONFIG_ROCKCHIP_IODOMAIN=y
 CONFIG_POWER_RESET_MSM=y
 CONFIG_POWER_RESET_XGENE=y
 CONFIG_POWER_RESET_SYSCON=y
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@ -4,6 +4,8 @@
 #include <asm/cpucaps.h>
 #include <asm/insn.h>

+#define ARM64_CB_PATCH ARM64_NCAPS
+
 #ifndef __ASSEMBLY__

 #include <linux/init.h>
@ -11,6 +13,8 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>

+extern int alternatives_applied;
+
 struct alt_instr {
 	s32 orig_offset;	/* offset to original instruction */
 	s32 alt_offset;		/* offset to replacement instruction */
@ -19,12 +23,19 @@ struct alt_instr {
 	u8  alt_len;		/* size of new instruction(s), <= orig_len */
 };

+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+				 __le32 *origptr, __le32 *updptr, int nr_inst);
+
 void __init apply_alternatives_all(void);
 void apply_alternatives(void *start, size_t length);

-#define ALTINSTR_ENTRY(feature)						      \
+#define ALTINSTR_ENTRY(feature,cb)					      \
 	" .word 661b - .\n"				/* label           */ \
+	" .if " __stringify(cb) " == 0\n"				      \
 	" .word 663f - .\n"				/* new instruction */ \
+	" .else\n"							      \
+	" .word " __stringify(cb) "- .\n"		/* callback */	      \
+	" .endif\n"							      \
 	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
 	" .byte 662b-661b\n"				/* source len      */ \
 	" .byte 664f-663f\n"				/* replacement len */
@ -42,15 +53,18 @@ void apply_alternatives(void *start, size_t length);
 * but most assemblers die if insn1 or insn2 have a .inst. This should
 * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
 * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
 */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)	\
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb)	\
 	".if "__stringify(cfg_enabled)" == 1\n"				\
 	"661:\n\t"							\
 	oldinstr "\n"							\
 	"662:\n"							\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature)						\
+	ALTINSTR_ENTRY(feature,cb)					\
 	".popsection\n"							\
+	" .if " __stringify(cb) " == 0\n"				\
 	".pushsection .altinstr_replacement, \"a\"\n"			\
 	"663:\n\t"							\
 	newinstr "\n"							\
@ -58,11 +72,17 @@ void apply_alternatives(void *start, size_t length);
 	".popsection\n\t"						\
 	".org	. - (664b-663b) + (662b-661b)\n\t"			\
 	".org	. - (662b-661b) + (664b-663b)\n"			\
+	".else\n\t"							\
+	"663:\n\t"							\
+	"664:\n\t"							\
+	".endif\n"							\
 	".endif\n"

 #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)	\
-	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)

+#define ALTERNATIVE_CB(oldinstr, cb) \
+	__ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
 #else

 #include <asm/assembler.h>
@ -129,6 +149,14 @@ void apply_alternatives(void *start, size_t length);
 661:
 .endm

+.macro alternative_cb cb
+	.set .Lasm_alt_mode, 0
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+	.popsection
+661:
+.endm
+
 /*
 * Provide the other half of the alternative code sequence.
 */
@ -154,6 +182,13 @@ void apply_alternatives(void *start, size_t length);
 	.org	. - (662b-661b) + (664b-663b)
 .endm

+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
 /*
 * Provides a trivial alternative or default sequence consisting solely
 * of NOPs. The number of NOPs is chosen automatically to match the
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@ -239,14 +239,33 @@ lr	.req	x30		// link register
 	.endm

 	/*
+	 * @dst: Result of per_cpu(sym, smp_processor_id())
 	 * @sym: The name of the per-cpu variable
-	 * @reg: Result of per_cpu(sym, smp_processor_id())
 	 * @tmp: scratch register
 	 */
-	.macro this_cpu_ptr, sym, reg, tmp
-	adr_l	\reg, \sym
+	.macro adr_this_cpu, dst, sym, tmp
+	adr_l	\dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	mrs	\tmp, tpidr_el1
-	add	\reg, \reg, \tmp
+alternative_else
+	mrs	\tmp, tpidr_el2
+alternative_endif
+	add	\dst, \dst, \tmp
+	.endm
+
+	/*
+	 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
+	 * @sym: The name of the per-cpu variable
+	 * @tmp: scratch register
+	 */
+	.macro ldr_this_cpu dst, sym, tmp
+	adr_l	\dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	mrs	\tmp, tpidr_el1
+alternative_else
+	mrs	\tmp, tpidr_el2
+alternative_endif
+	ldr	\dst, [\dst, \tmp]
 	.endm

 /*
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@ -229,7 +229,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr,		\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
-	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"		\
+	"	sevl\n"							\
+	"	wfe\n"							\
+	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"			\
 	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
 	"	cbnz	%" #w "[tmp], 1f\n"				\
 	"	wfe\n"							\
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@ -36,7 +36,8 @@
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
 #define ARM64_UNMAP_KERNEL_AT_EL0		16
 #define ARM64_HARDEN_BRANCH_PREDICTOR		17
+#define ARM64_SSBD				18

-#define ARM64_NCAPS				18
+#define ARM64_NCAPS				19

 #endif /* __ASM_CPUCAPS_H */
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@ -221,6 +221,28 @@ static inline bool system_supports_mixed_endian_el0(void)
 	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
 }

+#define ARM64_SSBD_UNKNOWN		-1
+#define ARM64_SSBD_FORCE_DISABLE	0
+#define ARM64_SSBD_KERNEL		1
+#define ARM64_SSBD_FORCE_ENABLE		2
+#define ARM64_SSBD_MITIGATED		3
+
+static inline int arm64_get_ssbd_state(void)
+{
+#ifdef CONFIG_ARM64_SSBD
+	extern int ssbd_state;
+	return ssbd_state;
+#else
+	return ARM64_SSBD_UNKNOWN;
+#endif
+}
+
+#ifdef CONFIG_ARM64_SSBD
+void arm64_set_ssbd_mitigation(bool state);
+#else
+static inline void arm64_set_ssbd_mitigation(bool state) {}
+#endif
+
 #endif /* __ASSEMBLY__ */

 #endif
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@ -33,6 +33,10 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)

+#define	VCPU_WORKAROUND_2_FLAG_SHIFT	0
+#define	VCPU_WORKAROUND_2_FLAG		(_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
+
+/* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)						\
 	({								\
 		void *val = &sym;					\
@ -65,6 +69,43 @@ extern u32 __kvm_get_mdcr_el2(void);

 extern u32 __init_stage2_translation(void);

+/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
+#define __hyp_this_cpu_ptr(sym)						\
+	({								\
+		void *__ptr = hyp_symbol_addr(sym);			\
+		__ptr += read_sysreg(tpidr_el2);			\
+		(typeof(&sym))__ptr;					\
+	 })
+
+#define __hyp_this_cpu_read(sym)					\
+	({								\
+		*__hyp_this_cpu_ptr(sym);				\
+	 })
+
+#else /* __ASSEMBLY__ */
+
+.macro hyp_adr_this_cpu reg, sym, tmp
+	adr_l	\reg, \sym
+	mrs	\tmp, tpidr_el2
+	add	\reg, \reg, \tmp
+.endm
+
+.macro hyp_ldr_this_cpu reg, sym, tmp
+	adr_l	\reg, \sym
+	mrs	\tmp, tpidr_el2
+	ldr	\reg,  [\reg, \tmp]
+.endm
+
+.macro get_host_ctxt reg, tmp
+	hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+	get_host_ctxt \ctxt, \vcpu
+	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+	kern_hyp_va	\vcpu
+.endm
+
 #endif

 #endif /* __ARM_KVM_ASM_H__ */
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@ -197,6 +197,8 @@ struct kvm_cpu_context {
 		u64 sys_regs[NR_SYS_REGS];
 		u32 copro[NR_COPRO_REGS];
 	};
+
+	struct kvm_vcpu *__hyp_running_vcpu;
 };

 typedef struct kvm_cpu_context kvm_cpu_context_t;
@ -211,6 +213,9 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;

+	/* State of various workarounds, see kvm_asm.h for bit assignment */
+	u64 workaround_flags;
+
 	/* Guest debug state */
 	u64 debug_flags;

@ -354,10 +359,15 @@ int kvm_perf_teardown(void);

 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);

+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
+	u64 tpidr_el2;
+
 	/*
 	 * Call initialization code, and switch to the full blown HYP code.
 	 * If the cpucaps haven't been finalized yet, something has gone very
@ -366,6 +376,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 */
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
 }

 void __kvm_hyp_teardown(void);
@ -405,4 +425,27 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 	return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
 }

+#define KVM_SSBD_UNKNOWN		-1
+#define KVM_SSBD_FORCE_DISABLE		0
+#define KVM_SSBD_KERNEL		1
+#define KVM_SSBD_FORCE_ENABLE		2
+#define KVM_SSBD_MITIGATED		3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+	switch (arm64_get_ssbd_state()) {
+	case ARM64_SSBD_FORCE_DISABLE:
+		return KVM_SSBD_FORCE_DISABLE;
+	case ARM64_SSBD_KERNEL:
+		return KVM_SSBD_KERNEL;
+	case ARM64_SSBD_FORCE_ENABLE:
+		return KVM_SSBD_FORCE_ENABLE;
+	case ARM64_SSBD_MITIGATED:
+		return KVM_SSBD_MITIGATED;
+	case ARM64_SSBD_UNKNOWN:
+	default:
+		return KVM_SSBD_UNKNOWN;
+	}
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@ -130,6 +130,26 @@ static inline unsigned long __kern_hyp_va(unsigned long v)

 #define kern_hyp_va(v) 	((typeof(v))(__kern_hyp_va((unsigned long)(v))))

+/*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s)						\
+	({								\
+		typeof(s) *addr;					\
+		asm("adrp	%0, %1\n"				\
+		    "add	%0, %0, :lo12:%1\n"			\
+		    : "=r" (addr) : "S" (&s));				\
+		addr;							\
+	})
+
 /*
 * We currently only support a 40bit IPA.
 */
@ -367,5 +387,29 @@ static inline int kvm_map_vectors(void)
 }
 #endif

+#ifdef CONFIG_ARM64_SSBD
+DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+static inline int hyp_map_aux_data(void)
+{
+	int cpu, err;
+
+	for_each_possible_cpu(cpu) {
+		u64 *ptr;
+
+		ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu);
+		err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+#else
+static inline int hyp_map_aux_data(void)
+{
+	return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@ -16,9 +16,14 @@
 #ifndef __ASM_PERCPU_H
 #define __ASM_PERCPU_H

+#include <asm/alternative.h>
+
 static inline void set_my_cpu_offset(unsigned long off)
 {
-	asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+	asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
+				 "msr tpidr_el2, %0",
+				 ARM64_HAS_VIRT_HOST_EXTN)
+			:: "r" (off) : "memory");
 }

 static inline unsigned long __my_cpu_offset(void)
@ -29,7 +34,10 @@ static inline unsigned long __my_cpu_offset(void)
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
-	asm("mrs %0, tpidr_el1" : "=r" (off) :
+	asm(ALTERNATIVE("mrs %0, tpidr_el1",
+			"mrs %0, tpidr_el2",
+			ARM64_HAS_VIRT_HOST_EXTN)
+		: "=r" (off) :
 		"Q" (*(const unsigned long *)current_stack_pointer));

 	return off;
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@ -122,6 +122,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	20
 #define TIF_SINGLESTEP		21
 #define TIF_32BIT		22	/* 32bit process */
+#define TIF_SSBD		23	/* Wants SSB mitigation */

 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@ -50,6 +50,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+arm64-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o

 ifeq ($(CONFIG_KVM),y)
 arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR)	+= bpi.o
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@ -28,10 +28,12 @@
 #include <asm/sections.h>
 #include <linux/stop_machine.h>

-#define __ALT_PTR(a,f)		(u32 *)((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a,f)		((void *)&(a)->f + (a)->f)
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)

+int alternatives_applied;
+
 struct alt_region {
 	struct alt_instr *begin;
 	struct alt_instr *end;
@ -105,32 +107,52 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
 	return insn;
 }

-static void __apply_alternatives(void *alt_region, bool use_linear_alias)
+static void patch_alternative(struct alt_instr *alt,
+			      __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	__le32 *replptr;
+	int i;
+
+	replptr = ALT_REPL_PTR(alt);
+	for (i = 0; i < nr_inst; i++) {
+		u32 insn;
+
+		insn = get_alt_insn(alt, origptr + i, replptr + i);
+		updptr[i] = cpu_to_le32(insn);
+	}
+}
+
+static void __apply_alternatives(void *alt_region)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
-	u32 *origptr, *replptr, *updptr;
+	__le32 *origptr;
+	alternative_cb_t alt_cb;

 	for (alt = region->begin; alt < region->end; alt++) {
-		u32 insn;
-		int i, nr_inst;
+		int nr_inst;

-		if (!cpus_have_cap(alt->cpufeature))
+		/* Use ARM64_CB_PATCH as an unconditional patch */
+		if (alt->cpufeature < ARM64_CB_PATCH &&
+		    !cpus_have_cap(alt->cpufeature))
 			continue;

-		BUG_ON(alt->alt_len != alt->orig_len);
+		if (alt->cpufeature == ARM64_CB_PATCH)
+			BUG_ON(alt->alt_len != 0);
+		else
+			BUG_ON(alt->alt_len != alt->orig_len);

 		pr_info_once("patching kernel code\n");

 		origptr = ALT_ORIG_PTR(alt);
-		replptr = ALT_REPL_PTR(alt);
-		updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr;
-		nr_inst = alt->alt_len / sizeof(insn);
+		nr_inst = alt->orig_len / AARCH64_INSN_SIZE;

-		for (i = 0; i < nr_inst; i++) {
-			insn = get_alt_insn(alt, origptr + i, replptr + i);
-			updptr[i] = cpu_to_le32(insn);
-		}
+		if (alt->cpufeature < ARM64_CB_PATCH)
+			alt_cb = patch_alternative;
+		else
+			alt_cb  = ALT_REPL_PTR(alt);
+
+		alt_cb(alt, origptr, origptr, nr_inst);

 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));
@ -143,7 +165,6 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 */
 static int __apply_alternatives_multi_stop(void *unused)
 {
-	static int patched = 0;
 	struct alt_region region = {
 		.begin	= (struct alt_instr *)__alt_instructions,
 		.end	= (struct alt_instr *)__alt_instructions_end,
@ -151,14 +172,14 @@ static int __apply_alternatives_multi_stop(void *unused)

 	/* We always have a CPU 0 at this point (__init) */
 	if (smp_processor_id()) {
-		while (!READ_ONCE(patched))
+		while (!READ_ONCE(alternatives_applied))
 			cpu_relax();
 		isb();
 	} else {
-		BUG_ON(patched);
-		__apply_alternatives(&region, true);
+		BUG_ON(alternatives_applied);
+		__apply_alternatives(&region);
 		/* Barriers provided by the cache flushing */
-		WRITE_ONCE(patched, 1);
+		WRITE_ONCE(alternatives_applied, 1);
 	}

 	return 0;
@ -177,5 +198,5 @@ void apply_alternatives(void *start, size_t length)
 		.end	= start + length,
 	};

-	__apply_alternatives(&region, false);
+	__apply_alternatives(&region);
 }
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@ -127,11 +127,13 @@ int main(void)
  BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
  DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
 #endif
 #ifdef CONFIG_CPU_PM
  DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@ -187,6 +187,178 @@ static int enable_smccc_arch_workaround_1(void *data)
 }
 #endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */

+#ifdef CONFIG_ARM64_SSBD
+DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
+
+static const struct ssbd_options {
+	const char	*str;
+	int		state;
+} ssbd_options[] = {
+	{ "force-on",	ARM64_SSBD_FORCE_ENABLE, },
+	{ "force-off",	ARM64_SSBD_FORCE_DISABLE, },
+	{ "kernel",	ARM64_SSBD_KERNEL, },
+};
+
+static int __init ssbd_cfg(char *buf)
+{
+	int i;
+
+	if (!buf || !buf[0])
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) {
+		int len = strlen(ssbd_options[i].str);
+
+		if (strncmp(buf, ssbd_options[i].str, len))
+			continue;
+
+		ssbd_state = ssbd_options[i].state;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+early_param("ssbd", ssbd_cfg);
+
+void __init arm64_update_smccc_conduit(struct alt_instr *alt,
+				       __le32 *origptr, __le32 *updptr,
+				       int nr_inst)
+{
+	u32 insn;
+
+	BUG_ON(nr_inst != 1);
+
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		insn = aarch64_insn_get_hvc_value();
+		break;
+	case PSCI_CONDUIT_SMC:
+		insn = aarch64_insn_get_smc_value();
+		break;
+	default:
+		return;
+	}
+
+	*updptr = cpu_to_le32(insn);
+}
+
+void __init arm64_enable_wa2_handling(struct alt_instr *alt,
+				      __le32 *origptr, __le32 *updptr,
+				      int nr_inst)
+{
+	BUG_ON(nr_inst != 1);
+	/*
+	 * Only allow mitigation on EL1 entry/exit and guest
+	 * ARCH_WORKAROUND_2 handling if the SSBD state allows it to
+	 * be flipped.
+	 */
+	if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL)
+		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+void arm64_set_ssbd_mitigation(bool state)
+{
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
+		break;
+
+	case PSCI_CONDUIT_SMC:
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
+				    int scope)
+{
+	struct arm_smccc_res res;
+	bool required = true;
+	s32 val;
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
+		ssbd_state = ARM64_SSBD_UNKNOWN;
+		return false;
+	}
+
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+		break;
+
+	case PSCI_CONDUIT_SMC:
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+		break;
+
+	default:
+		ssbd_state = ARM64_SSBD_UNKNOWN;
+		return false;
+	}
+
+	val = (s32)res.a0;
+
+	switch (val) {
+	case SMCCC_RET_NOT_SUPPORTED:
+		ssbd_state = ARM64_SSBD_UNKNOWN;
+		return false;
+
+	case SMCCC_RET_NOT_REQUIRED:
+		pr_info_once("%s mitigation not required\n", entry->desc);
+		ssbd_state = ARM64_SSBD_MITIGATED;
+		return false;
+
+	case SMCCC_RET_SUCCESS:
+		required = true;
+		break;
+
+	case 1:	/* Mitigation not required on this CPU */
+		required = false;
+		break;
+
+	default:
+		WARN_ON(1);
+		return false;
+	}
+
+	switch (ssbd_state) {
+	case ARM64_SSBD_FORCE_DISABLE:
+		pr_info_once("%s disabled from command-line\n", entry->desc);
+		arm64_set_ssbd_mitigation(false);
+		required = false;
+		break;
+
+	case ARM64_SSBD_KERNEL:
+		if (required) {
+			__this_cpu_write(arm64_ssbd_callback_required, 1);
+			arm64_set_ssbd_mitigation(true);
+		}
+		break;
+
+	case ARM64_SSBD_FORCE_ENABLE:
+		pr_info_once("%s forced from command-line\n", entry->desc);
+		arm64_set_ssbd_mitigation(true);
+		required = true;
+		break;
+
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	return required;
+}
+#endif	/* CONFIG_ARM64_SSBD */
+
 #define MIDR_RANGE(model, min, max) \
 	.def_scope = SCOPE_LOCAL_CPU, \
 	.matches = is_affected_midr_range, \
@ -309,6 +481,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
 		.enable = enable_smccc_arch_workaround_1,
 	},
+#endif
+#ifdef CONFIG_ARM64_SSBD
+	{
+		.desc = "Speculative Store Bypass Disable",
+		.def_scope = SCOPE_LOCAL_CPU,
+		.capability = ARM64_SSBD,
+		.matches = has_ssbd_mitigation,
+	},
 #endif
 	{
 	}
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@ -828,9 +828,25 @@ static int __init parse_kpti(char *str)
 	__kpti_forced = enabled ? 1 : -1;
 	return 0;
 }
-__setup("kpti=", parse_kpti);
+early_param("kpti", parse_kpti);
 #endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */

+static int cpu_copy_el2regs(void *__unused)
+{
+	/*
+	 * Copy register values that aren't redirected by hardware.
+	 *
+	 * Before code patching, we only set tpidr_el1, all CPUs need to copy
+	 * this value to tpidr_el2 before we patch the code. Once we've done
+	 * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
+	 * do anything here.
+	 */
+	if (!alternatives_applied)
+		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
+	return 0;
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@ -897,6 +913,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_VIRT_HOST_EXTN,
 		.def_scope = SCOPE_SYSTEM,
 		.matches = runs_at_el2,
+		.enable = cpu_copy_el2regs,
 	},
 	{
 		.desc = "32-bit EL0 Support",
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@ -18,6 +18,7 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <linux/arm-smccc.h>
 #include <linux/init.h>
 #include <linux/linkage.h>

@ -95,6 +96,25 @@ alternative_else_nop_endif
 	add	\dst, \dst, #(\sym - .entry.tramp.text)
 	.endm

+	// This macro corrupts x0-x3. It is the caller's duty
+	// to save/restore them if required.
+	.macro	apply_ssbd, state, targ, tmp1, tmp2
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb	arm64_enable_wa2_handling
+	b	\targ
+alternative_cb_end
+	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
+	cbz	\tmp2, \targ
+	ldr	\tmp2, [tsk, #TI_FLAGS]
+	tbnz	\tmp2, #TIF_SSBD, \targ
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+	mov	w1, #\state
+alternative_cb	arm64_update_smccc_conduit
+	nop					// Patched to SMC/HVC #0
+alternative_cb_end
+#endif
+	.endm
+
 	.macro	kernel_entry, el, regsize = 64
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
@ -122,6 +142,14 @@ alternative_else_nop_endif
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.

+	apply_ssbd 1, 1f, x22, x23
+
+#ifdef CONFIG_ARM64_SSBD
+	ldp	x0, x1, [sp, #16 * 0]
+	ldp	x2, x3, [sp, #16 * 1]
+#endif
+1:
+
 	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
@ -190,6 +218,8 @@ alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
+	apply_ssbd 0, 5f, x0, x1
+5:
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
@ -243,7 +273,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
 	cmp	x25, tsk
 	b.ne	9998f

-	this_cpu_ptr irq_stack, x25, x26
+	adr_this_cpu x25, irq_stack, x26
 	mov	x26, #IRQ_STACK_START_SP
 	add	x26, x25, x26

--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@ -308,6 +308,17 @@ int swsusp_arch_suspend(void)

 		sleep_cpu = -EINVAL;
 		__cpu_suspend_exit();
+
+		/*
+		 * Just in case the boot kernel did turn the SSBD
+		 * mitigation off behind our back, let's set the state
+		 * to what we expect it to be.
+		 */
+		switch (arm64_get_ssbd_state()) {
+		case ARM64_SSBD_FORCE_ENABLE:
+		case ARM64_SSBD_KERNEL:
+			arm64_set_ssbd_mitigation(true);
+		}
 	}

 	local_dbg_restore(flags);
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@ -274,7 +274,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
 		break;
 	case KPROBE_HIT_SS:
 	case KPROBE_REENTER:
-		pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr);
+		pr_warn("Unrecoverable kprobe detected.\n");
 		dump_kprobe(p);
 		BUG();
 		break;
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@ -205,7 +205,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 * This is the secondary CPU boot entry.  We're using this CPUs
 * idle thread stack, but a set of temporary page tables.
 */
-asmlinkage void secondary_start_kernel(void)
+asmlinkage notrace void secondary_start_kernel(void)
 {
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu = smp_processor_id();
--- a/arch/arm64/kernel/ssbd.c
+++ b/arch/arm64/kernel/ssbd.c
@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
+ */
+
+#include <linux/errno.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+
+/*
+ * prctl interface for SSBD
+ */
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+	int state = arm64_get_ssbd_state();
+
+	/* Unsupported */
+	if (state == ARM64_SSBD_UNKNOWN)
+		return -EINVAL;
+
+	/* Treat the unaffected/mitigated state separately */
+	if (state == ARM64_SSBD_MITIGATED) {
+		switch (ctrl) {
+		case PR_SPEC_ENABLE:
+			return -EPERM;
+		case PR_SPEC_DISABLE:
+		case PR_SPEC_FORCE_DISABLE:
+			return 0;
+		}
+	}
+
+	/*
+	 * Things are a bit backward here: the arm64 internal API
+	 * *enables the mitigation* when the userspace API *disables
+	 * speculation*. So much fun.
+	 */
+	switch (ctrl) {
+	case PR_SPEC_ENABLE:
+		/* If speculation is force disabled, enable is not allowed */
+		if (state == ARM64_SSBD_FORCE_ENABLE ||
+		    task_spec_ssb_force_disable(task))
+			return -EPERM;
+		task_clear_spec_ssb_disable(task);
+		clear_tsk_thread_flag(task, TIF_SSBD);
+		break;
+	case PR_SPEC_DISABLE:
+		if (state == ARM64_SSBD_FORCE_DISABLE)
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		set_tsk_thread_flag(task, TIF_SSBD);
+		break;
+	case PR_SPEC_FORCE_DISABLE:
+		if (state == ARM64_SSBD_FORCE_DISABLE)
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		task_set_spec_ssb_force_disable(task);
+		set_tsk_thread_flag(task, TIF_SSBD);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+			     unsigned long ctrl)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssbd_prctl_set(task, ctrl);
+	default:
+		return -ENODEV;
+	}
+}
+
+static int ssbd_prctl_get(struct task_struct *task)
+{
+	switch (arm64_get_ssbd_state()) {
+	case ARM64_SSBD_UNKNOWN:
+		return -EINVAL;
+	case ARM64_SSBD_FORCE_ENABLE:
+		return PR_SPEC_DISABLE;
+	case ARM64_SSBD_KERNEL:
+		if (task_spec_ssb_force_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ssb_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+	case ARM64_SSBD_FORCE_DISABLE:
+		return PR_SPEC_ENABLE;
+	default:
+		return PR_SPEC_NOT_AFFECTED;
+	}
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssbd_prctl_get(task);
+	default:
+		return -ENODEV;
+	}
+}
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@ -67,6 +67,14 @@ void notrace __cpu_suspend_exit(void)
 	 */
 	if (hw_breakpoint_restore)
 		hw_breakpoint_restore(cpu);
+
+	/*
+	 * On resume, firmware implementing dynamic mitigation will
+	 * have turned the mitigation on. If the user has forcefully
+	 * disabled it, make sure their wishes are obeyed.
+	 */
+	if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
+		arm64_set_ssbd_mitigation(false);
 }

 /*
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@ -118,6 +118,10 @@ CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	kern_hyp_va	x2
 	msr	vbar_el2, x2

+	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
+	mrs	x1, tpidr_el1
+	msr	tpidr_el2, x1
+
 	/* Hello, World! */
 	eret
 ENDPROC(__kvm_hyp_init)
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@ -62,9 +62,6 @@ ENTRY(__guest_enter)
 	// Store the host regs
 	save_callee_saved_regs x1

-	// Store the host_ctxt for use at exit time
-	str	x1, [sp, #-16]!
-
 	add	x18, x0, #VCPU_CONTEXT

 	// Restore guest regs x0-x17
@ -118,8 +115,7 @@ ENTRY(__guest_exit)
 	// Store the guest regs x19-x29, lr
 	save_callee_saved_regs x1

-	// Restore the host_ctxt from the stack
-	ldr	x2, [sp], #16
+	get_host_ctxt	x2, x3

 	// Now restore the host regs
 	restore_callee_saved_regs x2
@ -159,6 +155,10 @@ abort_guest_exit_end:
 ENDPROC(__guest_exit)

 ENTRY(__fpsimd_guest_restore)
+	// x0: esr
+	// x1: vcpu
+	// x2-x29,lr: vcpu regs
+	// vcpu x0-x1 on the stack
 	stp	x2, x3, [sp, #-16]!
 	stp	x4, lr, [sp, #-16]!

@ -173,7 +173,7 @@ alternative_else
 alternative_endif
 	isb

-	mrs	x3, tpidr_el2
+	mov	x3, x1

 	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
 	kern_hyp_va x0
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@ -72,13 +72,8 @@ ENDPROC(__kvm_hyp_teardown)
 el1_sync:				// Guest trapped into EL2
 	stp	x0, x1, [sp, #-16]!

-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x1, esr_el2
-alternative_else
-	mrs	x1, esr_el1
-alternative_endif
-	lsr	x0, x1, #ESR_ELx_EC_SHIFT
-
+	mrs	x0, esr_el2
+	lsr	x0, x0, #ESR_ELx_EC_SHIFT
 	cmp	x0, #ESR_ELx_EC_HVC64
 	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
 	b.ne	el1_trap
@ -112,33 +107,73 @@ el1_hvc_guest:
 	 */
 	ldr	x1, [sp]				// Guest's x0
 	eor	w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+	cbz	w1, wa_epilogue
+
+	/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+			  ARM_SMCCC_ARCH_WORKAROUND_2)
 	cbnz	w1, el1_trap
-	mov	x0, x1
+
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb	arm64_enable_wa2_handling
+	b	wa2_end
+alternative_cb_end
+	get_vcpu_ptr	x2, x0
+	ldr	x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+	// Sanitize the argument and update the guest flags
+	ldr	x1, [sp, #8]			// Guest's x1
+	clz	w1, w1				// Murphy's device:
+	lsr	w1, w1, #5			// w1 = !!w1 without using
+	eor	w1, w1, #1			// the flags...
+	bfi	x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
+	str	x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+	/* Check that we actually need to perform the call */
+	hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
+	cbz	x0, wa2_end
+
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+	smc	#0
+
+	/* Don't leak data from the SMC call */
+	mov	x3, xzr
+wa2_end:
+	mov	x2, xzr
+	mov	x1, xzr
+#endif
+
+wa_epilogue:
+	mov	x0, xzr
 	add	sp, sp, #16
 	eret

 el1_trap:
+	get_vcpu_ptr	x1, x0
+
+	mrs		x0, esr_el2
+	lsr		x0, x0, #ESR_ELx_EC_SHIFT
 	/*
 	 * x0: ESR_EC
+	 * x1: vcpu pointer
 	 */

 	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
 	cmp	x0, #ESR_ELx_EC_FP_ASIMD
 	b.eq	__fpsimd_guest_restore

-	mrs	x1, tpidr_el2
 	mov	x0, #ARM_EXCEPTION_TRAP
 	b	__guest_exit

 el1_irq:
 	stp     x0, x1, [sp, #-16]!
-	mrs	x1, tpidr_el2
+	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit

 el1_error:
 	stp     x0, x1, [sp, #-16]!
-	mrs	x1, tpidr_el2
+	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit

@ -173,6 +208,11 @@ ENTRY(__hyp_do_panic)
 	eret
 ENDPROC(__hyp_do_panic)

+ENTRY(__hyp_panic)
+	get_host_ctxt x0, x1
+	b	hyp_panic
+ENDPROC(__hyp_panic)
+
 .macro invalid_vector	label, target = __hyp_panic
 	.align	2
 \label:
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@ -15,6 +15,7 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <linux/arm-smccc.h>
 #include <linux/types.h>
 #include <linux/jump_label.h>
 #include <uapi/linux/psci.h>
@ -267,6 +268,39 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	write_sysreg_el2(*vcpu_pc(vcpu), elr);
 }

+static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+	if (!cpus_have_cap(ARM64_SSBD))
+		return false;
+
+	return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+	/*
+	 * The host runs with the workaround always present. If the
+	 * guest wants it disabled, so be it...
+	 */
+	if (__needs_ssbd_off(vcpu) &&
+	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+	/*
+	 * If the guest has disabled the workaround, bring it back on.
+	 */
+	if (__needs_ssbd_off(vcpu) &&
+	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
 int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
@ -275,9 +309,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	u64 exit_code;

 	vcpu = kern_hyp_va(vcpu);
-	write_sysreg(vcpu, tpidr_el2);

 	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	host_ctxt->__hyp_running_vcpu = vcpu;
 	guest_ctxt = &vcpu->arch.ctxt;

 	__sysreg_save_host_state(host_ctxt);
@ -297,6 +331,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	__sysreg_restore_guest_state(guest_ctxt);
 	__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);

+	__set_guest_arch_workaround_state(vcpu);
+
 	/* Jump in the fire! */
 again:
 	exit_code = __guest_enter(vcpu, host_ctxt);
@ -339,6 +375,8 @@ again:
 		}
 	}

+	__set_host_arch_workaround_state(vcpu);
+
 	fp_enabled = __fpsimd_enabled();

 	__sysreg_save_guest_state(guest_ctxt);
@ -364,7 +402,8 @@ again:

 static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";

-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
+					     struct kvm_vcpu *vcpu)
 {
 	unsigned long str_va;

@ -378,35 +417,32 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
 	__hyp_do_panic(str_va,
 		       spsr,  elr,
 		       read_sysreg(esr_el2),   read_sysreg_el2(far),
-		       read_sysreg(hpfar_el2), par,
-		       (void *)read_sysreg(tpidr_el2));
+		       read_sysreg(hpfar_el2), par, vcpu);
 }

-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+					    struct kvm_vcpu *vcpu)
 {
 	panic(__hyp_panic_string,
 	      spsr,  elr,
 	      read_sysreg_el2(esr),   read_sysreg_el2(far),
-	      read_sysreg(hpfar_el2), par,
-	      (void *)read_sysreg(tpidr_el2));
+	      read_sysreg(hpfar_el2), par, vcpu);
 }

 static hyp_alternate_select(__hyp_call_panic,
 			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);

-void __hyp_text __noreturn __hyp_panic(void)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
 {
+	struct kvm_vcpu *vcpu = NULL;
+
 	u64 spsr = read_sysreg_el2(spsr);
 	u64 elr = read_sysreg_el2(elr);
 	u64 par = read_sysreg(par_el1);

 	if (read_sysreg(vttbr_el2)) {
-		struct kvm_vcpu *vcpu;
-		struct kvm_cpu_context *host_ctxt;
-
-		vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
-		host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+		vcpu = host_ctxt->__hyp_running_vcpu;
 		__timer_save_state(vcpu);
 		__deactivate_traps(vcpu);
 		__deactivate_vm(vcpu);
@ -414,7 +450,7 @@ void __hyp_text __noreturn __hyp_panic(void)
 	}

 	/* Call panic for real */
-	__hyp_call_panic()(spsr, elr, par);
+	__hyp_call_panic()(spsr, elr, par, vcpu);

 	unreachable();
 }
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
 /*
 * Non-VHE: Both host and guest must save everything.
 *
- * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
- * pstate, and guest must save everything.
+ * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * and guest must save everything.
 */

 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
 	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
 	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
-	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
 	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
 	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
-	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
-	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
 }

 static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
@ -62,10 +59,13 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg_el1(amair);
 	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg_el1(cntkctl);
 	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
+	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);

 	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
 	ctxt->gp_regs.elr_el1		= read_sysreg_el1(elr);
 	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
+	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
 }

 static hyp_alternate_select(__sysreg_call_save_host_state,
@ -89,11 +89,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
 	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
 	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
-	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
 	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
 	write_sysreg(ctxt->gp_regs.regs.sp,	  sp_el0);
-	write_sysreg_el2(ctxt->gp_regs.regs.pc,	  elr);
-	write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
 }

 static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
@ -115,10 +112,13 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 	write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1],	amair);
 	write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], 	cntkctl);
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);

 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		elr);
 	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+	write_sysreg_el2(ctxt->gp_regs.regs.pc,		elr);
+	write_sysreg_el2(ctxt->gp_regs.regs.pstate,	spsr);
 }

 static hyp_alternate_select(__sysreg_call_restore_host_state,
@ -183,3 +183,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
 		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
 }
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@ -135,6 +135,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset PMU */
 	kvm_pmu_vcpu_reset(vcpu);

+	/* Default workaround setup is enabled (if supported) */
+	if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
+		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
+
 	/* Reset timer */
 	return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
 }
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@ -147,7 +147,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
 int pfn_valid(unsigned long pfn)
 {
-	return memblock_is_map_memory(pfn << PAGE_SHIFT);
+	phys_addr_t addr = pfn << PAGE_SHIFT;
+
+	if ((addr >> PAGE_SHIFT) != pfn)
+		return 0;
+	return memblock_is_map_memory(addr);
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif
@ -468,11 +472,13 @@ void __init mem_init(void)
 	BUILD_BUG_ON(TASK_SIZE_32			> TASK_SIZE_64);
 #endif

+#ifdef CONFIG_SPARSEMEM_VMEMMAP
 	/*
 	 * Make sure we chose the upper bound of sizeof(struct page)
-	 * correctly.
+	 * correctly when sizing the VMEMMAP array.
 	 */
 	BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
+#endif

 	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@ -816,12 +816,12 @@ int pmd_clear_huge(pmd_t *pmd)
 	return 1;
 }

-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }

-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@ -186,8 +186,9 @@ ENDPROC(idmap_cpu_replace_ttbr1)

 	.macro __idmap_kpti_put_pgtable_ent_ng, type
 	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
-	str	\type, [cur_\()\type\()p]	// Update the entry and ensure it
-	dc	civac, cur_\()\type\()p		// is visible to all CPUs.
+	str	\type, [cur_\()\type\()p]	// Update the entry and ensure
+	dmb	sy				// that it is visible to all
+	dc	civac, cur_\()\type\()p		// CPUs.
 	.endm

 /*
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@ -43,6 +43,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 				  unsigned long address)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }

@ -73,8 +74,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return page;
 }

-extern inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }

--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@ -88,7 +88,8 @@ static inline void free_io_area(void *addr)
 	for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
 			*p = tmp->next;
-			__iounmap(tmp->addr, tmp->size);
+			/* remove gap added in get_io_area() */
+			__iounmap(tmp->addr, tmp->size - IO_SIZE);
 			kfree(tmp);
 			return;
 		}
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@ -21,17 +21,19 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
 quiet_cmd_cp = CP      $< $@$2
 	cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)

-quiet_cmd_strip = STRIP   $@
+quiet_cmd_strip = STRIP   $< $@$2
 	cmd_strip = $(STRIP) -K microblaze_start -K _end -K __log_buf \
-				-K _fdt_start vmlinux -o $@
+				-K _fdt_start $< -o $@$2

 UIMAGE_LOADADDR = $(CONFIG_KERNEL_BASE_ADDR)
+UIMAGE_IN = $@
+UIMAGE_OUT = $@.ub

 $(obj)/simpleImage.%: vmlinux FORCE
 	$(call if_changed,cp,.unstrip)
 	$(call if_changed,objcopy)
 	$(call if_changed,uimage)
-	$(call if_changed,strip)
-	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+	$(call if_changed,strip,.strip)
+	@echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')'

 clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb
--- a/arch/mips/ath79/common.c
+++ b/arch/mips/ath79/common.c
@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init);

 void ath79_ddr_wb_flush(u32 reg)
 {
-	void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg;
+	void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4);

 	/* Flush the DDR write buffer. */
 	__raw_writel(0x1, flush_reg);
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@ -412,6 +412,8 @@ static inline type pfx##in##bwlq##p(unsigned long port)			\
 	__val = *__addr;						\
 	slow;								\
 									\
+	/* prevent prefetching of coherent DMA data prematurely */	\
+	rmb();								\
 	return pfx##ioswab##bwlq(__addr, __val);			\
 }

--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@ -141,7 +141,7 @@ struct mips_fpu_struct {

 #define NUM_DSP_REGS   6

-typedef __u32 dspreg_t;
+typedef unsigned long dspreg_t;

 struct mips_dsp_state {
 	dspreg_t	dspr[NUM_DSP_REGS];
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@ -116,10 +116,20 @@ ftrace_stub:
 NESTED(_mcount, PT_SIZE, ra)
 	PTR_LA	t1, ftrace_stub
 	PTR_L	t2, ftrace_trace_function /* Prepare t2 for (1) */
-	bne	t1, t2, static_trace
+	beq	t1, t2, fgraph_trace
 	 nop

+	MCOUNT_SAVE_REGS
+
+	move	a0, ra		/* arg1: self return address */
+	jalr	t2		/* (1) call *ftrace_trace_function */
+	 move	a1, AT		/* arg2: parent's return address */
+
+	MCOUNT_RESTORE_REGS
+
+fgraph_trace:
 #ifdef	CONFIG_FUNCTION_GRAPH_TRACER
+	PTR_LA	t1, ftrace_stub
 	PTR_L	t3, ftrace_graph_return
 	bne	t1, t3, ftrace_graph_caller
 	 nop
@ -128,24 +138,11 @@ NESTED(_mcount, PT_SIZE, ra)
 	bne	t1, t3, ftrace_graph_caller
 	 nop
 #endif
-	b	ftrace_stub
-#ifdef CONFIG_32BIT
-	 addiu sp, sp, 8
-#else
-	 nop
-#endif

-static_trace:
-	MCOUNT_SAVE_REGS
-
-	move	a0, ra		/* arg1: self return address */
-	jalr	t2		/* (1) call *ftrace_trace_function */
-	 move	a1, AT		/* arg2: parent's return address */
-
-	MCOUNT_RESTORE_REGS
 #ifdef CONFIG_32BIT
 	addiu sp, sp, 8
 #endif
+
 	.globl ftrace_stub
 ftrace_stub:
 	RETURN_BACK
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@ -26,6 +26,7 @@
 #include <linux/kallsyms.h>
 #include <linux/random.h>
 #include <linux/prctl.h>
+#include <linux/nmi.h>

 #include <asm/asm.h>
 #include <asm/bootinfo.h>
@ -633,28 +634,42 @@ unsigned long arch_align_stack(unsigned long sp)
 	return sp & ALMASK;
 }

-static void arch_dump_stack(void *info)
+static DEFINE_PER_CPU(struct call_single_data, backtrace_csd);
+static struct cpumask backtrace_csd_busy;
+
+static void handle_backtrace(void *info)
 {
-	struct pt_regs *regs;
+	nmi_cpu_backtrace(get_irq_regs());
+	cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
+}

-	regs = get_irq_regs();
+static void raise_backtrace(cpumask_t *mask)
+{
+	struct call_single_data *csd;
+	int cpu;

-	if (regs)
-		show_regs(regs);
+	for_each_cpu(cpu, mask) {
+		/*
+		 * If we previously sent an IPI to the target CPU & it hasn't
+		 * cleared its bit in the busy cpumask then it didn't handle
+		 * our previous IPI & it's not safe for us to reuse the
+		 * call_single_data_t.
+		 */
+		if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) {
+			pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n",
+				cpu);
+			continue;
+		}

-	dump_stack();
+		csd = &per_cpu(backtrace_csd, cpu);
+		csd->func = handle_backtrace;
+		smp_call_function_single_async(cpu, csd);
+	}
 }

 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
-	long this_cpu = get_cpu();
-
-	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
-		dump_stack();
-
-	smp_call_function_many(mask, arch_dump_stack, NULL, 1);
-
-	put_cpu();
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
 }

 int mips_get_process_fp_mode(struct task_struct *task)
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@ -876,7 +876,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				goto out;
 			}
 			dregs = __get_dsp_regs(child);
-			tmp = (unsigned long) (dregs[addr - DSP_BASE]);
+			tmp = dregs[addr - DSP_BASE];
 			break;
 		}
 		case DSP_CONTROL:
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@ -140,7 +140,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 				goto out;
 			}
 			dregs = __get_dsp_regs(child);
-			tmp = (unsigned long) (dregs[addr - DSP_BASE]);
+			tmp = dregs[addr - DSP_BASE];
 			break;
 		}
 		case DSP_CONTROL:
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@ -351,6 +351,7 @@ static void __show_regs(const struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 	__show_regs((struct pt_regs *)regs);
+	dump_stack();
 }

 void show_registers(struct pt_regs *regs)
--- a/arch/mips/lib/multi3.c
+++ b/arch/mips/lib/multi3.c
@ -4,12 +4,12 @@
 #include "libgcc.h"

 /*
- * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
- * specific case only we'll implement it here.
+ * GCC 7 & older can suboptimally generate __multi3 calls for mips64r6, so for
+ * that specific case only we implement that intrinsic here.
 *
 * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
 */
-#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 8)

 /* multiply 64-bit values, low 64-bits returned */
 static inline long long notrace dmulu(long long a, long long b)
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <asm/addrspace.h>
 #include <asm/byteorder.h>
+#include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@ -97,6 +98,20 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 	return error;
 }

+static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
+			       void *arg)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (pfn_valid(start_pfn + i) &&
+		    !PageReserved(pfn_to_page(start_pfn + i)))
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
 * Generic mapping function (not visible outside):
 */
@ -115,8 +130,8 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,

 void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags)
 {
+	unsigned long offset, pfn, last_pfn;
 	struct vm_struct * area;
-	unsigned long offset;
 	phys_addr_t last_addr;
 	void * addr;

@ -136,18 +151,16 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long
 		return (void __iomem *) CKSEG1ADDR(phys_addr);

 	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
+	 * Don't allow anybody to remap RAM that may be allocated by the page
+	 * allocator, since that could lead to races & data clobbering.
 	 */
-	if (phys_addr < virt_to_phys(high_memory)) {
-		char *t_addr, *t_end;
-		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-
-		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
-			if(!PageReserved(page))
-				return NULL;
+	pfn = PFN_DOWN(phys_addr);
+	last_pfn = PFN_DOWN(last_addr);
+	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
+				  __ioremap_check_ram) == 1) {
+		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
+			  &phys_addr, &last_addr);
+		return NULL;
 	}

 	/*
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@ -55,7 +55,7 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
 	phys_addr_t size = resource_size(rsrc);

 	*start = fixup_bigphys_addr(rsrc->start, size);
-	*end = rsrc->start + size;
+	*end = rsrc->start + size - 1;
 }

 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@ -184,7 +184,7 @@ config PREFETCH

 config MLONGCALLS
 	bool "Enable the -mlong-calls compiler option for big kernels"
-	def_bool y if (!MODULES)
+	default y
 	depends on PA8X00
 	help
 	  If you configure the kernel to include many drivers built-in instead
--- a/arch/parisc/include/asm/barrier.h
+++ b/arch/parisc/include/asm/barrier.h
@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+/* The synchronize caches instruction executes as a nop on systems in
+   which all memory references are performed in order. */
+#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory")
+
+#if defined(CONFIG_SMP)
+#define mb()		do { synchronize_caches(); } while (0)
+#define rmb()		mb()
+#define wmb()		mb()
+#define dma_rmb()	mb()
+#define dma_wmb()	mb()
+#else
+#define mb()		barrier()
+#define rmb()		barrier()
+#define wmb()		barrier()
+#define dma_rmb()	barrier()
+#define dma_wmb()	barrier()
+#endif
+
+#define __smp_mb()	mb()
+#define __smp_rmb()	mb()
+#define __smp_wmb()	mb()
+
+#include <asm-generic/barrier.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __ASM_BARRIER_H */
--- a/Show More
+++ b/Show More