mirror of
https://github.com/brain-hackers/linux-brain.git
synced 2024-06-09 15:26:21 +09:00
This is the 4.14.16 stable release
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAlpxv2AACgkQONu9yGCS aT4U/w//T4qrABtOm4N7gTp5hIYnAPon8W3hOVZ1DUofH99cDDlB8UCtl5+4317b HCr0Yh/vs9iQzjc9Zs1apBjTOZVAAfFWUUVj5bjqerFdBgHWzJGMpyi8jFN582LY JCgk2Z3S8akRXmDt5o+sLeFxHKtFlmyaFkpfy2TqCNVxW8cayPh6cCg8+IiCL7qg jPxvaSvw+gRnDUWrCoJGYTvRY2SlMxXir225vL6eAuiS1E5h/WkWdlVQItAtwHqi qDYX7AB5UIhF5tUl7w/DYuXnLaY2gsqsUnPELaUPXVR1N55dVRkUCnMTPT5s2wWD cX0CJKS7dsVWpvPR1o0TNMWMGX6KXcIl/CqxAp1C/iSweAKwxnpX293oQ5xv6WlS xkDAXcrkGVFtQiMclQIw4E+4v29S2WbBxq1VjBCg/hkW9AGyDCLfKLaeCMUbTFe3 hX7PldzXzJ+CDtrKBZM19k8w0wqSP+U8p7FU/dwya5qNd0RWMb+QVdktDRt7gXit +RTJK2ZiJof4ByUzla/IrHGNEPjb9opR3GtqIewDe+deE/7SVxw2OkwWoXBcHALa 8UtgKVft4M6qUW15e5awlKXMlPYJFU9YrXAyrZNxlaMwJ7JytFAa/siYZZJS8t1w uRly6MSna3zCox4XFxP9Wp6MF/sraFVzIF/ntvj9VciuorUL4vo= =MtGW -----END PGP SIGNATURE----- Merge tag 'v4.14.16' into 4.14.x+fslc This is the 4.14.16 stable release * tag 'v4.14.16': (282 commits) Linux 4.14.16 nfsd: auth: Fix gid sorting when rootsquash enabled cpufreq: governor: Ensure sufficiently large sampling intervals bpf, arm64: fix stack_depth tracking in combination with tail calls bpf: reject stores into ctx via st and xadd bpf: fix 32-bit divide by zero bpf: fix divides by zero bpf: avoid false sharing of map refcount with max_entries bpf: introduce BPF_JIT_ALWAYS_ON config hrtimer: Reset hrtimer cpu base proper on CPU hotplug x86/mm/64: Fix vmapped stack syncing on very-large-memory 4-level systems x86/microcode: Fix again accessing initrd after having been freed x86/microcode/intel: Extend BDW late-loading further with LLC size check perf/x86/amd/power: Do not load AMD power module on !AMD platforms vmxnet3: repair memory leak net: ipv4: Make "ip route get" match iif lo rules again. tls: reset crypto_info when do_tls_setsockopt_tx fails tls: return -EBUSY if crypto_info is already set tls: fix sw_ctx leak net/tls: Only attach to sockets in ESTABLISHED state ...
This commit is contained in:
commit
ebef0b63a5
|
@ -373,3 +373,19 @@ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||||
Description: information about CPUs heterogeneity.
|
Description: information about CPUs heterogeneity.
|
||||||
|
|
||||||
cpu_capacity: capacity of cpu#.
|
cpu_capacity: capacity of cpu#.
|
||||||
|
|
||||||
|
What: /sys/devices/system/cpu/vulnerabilities
|
||||||
|
/sys/devices/system/cpu/vulnerabilities/meltdown
|
||||||
|
/sys/devices/system/cpu/vulnerabilities/spectre_v1
|
||||||
|
/sys/devices/system/cpu/vulnerabilities/spectre_v2
|
||||||
|
Date: January 2018
|
||||||
|
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||||
|
Description: Information about CPU vulnerabilities
|
||||||
|
|
||||||
|
The files are named after the code names of CPU
|
||||||
|
vulnerabilities. The output of those files reflects the
|
||||||
|
state of the CPUs in the system. Possible output values:
|
||||||
|
|
||||||
|
"Not affected" CPU is not affected by the vulnerability
|
||||||
|
"Vulnerable" CPU is affected and no mitigation in effect
|
||||||
|
"Mitigation: $M" CPU is affected and mitigation $M is in effect
|
||||||
|
|
|
@ -2599,6 +2599,11 @@
|
||||||
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
|
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
|
||||||
Equivalent to smt=1.
|
Equivalent to smt=1.
|
||||||
|
|
||||||
|
nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
|
||||||
|
(indirect branch prediction) vulnerability. System may
|
||||||
|
allow data leaks with this option, which is equivalent
|
||||||
|
to spectre_v2=off.
|
||||||
|
|
||||||
noxsave [BUGS=X86] Disables x86 extended register state save
|
noxsave [BUGS=X86] Disables x86 extended register state save
|
||||||
and restore using xsave. The kernel will fallback to
|
and restore using xsave. The kernel will fallback to
|
||||||
enabling legacy floating-point and sse state.
|
enabling legacy floating-point and sse state.
|
||||||
|
@ -2685,8 +2690,6 @@
|
||||||
steal time is computed, but won't influence scheduler
|
steal time is computed, but won't influence scheduler
|
||||||
behaviour
|
behaviour
|
||||||
|
|
||||||
nopti [X86-64] Disable kernel page table isolation
|
|
||||||
|
|
||||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||||
|
|
||||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||||
|
@ -3255,11 +3258,20 @@
|
||||||
pt. [PARIDE]
|
pt. [PARIDE]
|
||||||
See Documentation/blockdev/paride.txt.
|
See Documentation/blockdev/paride.txt.
|
||||||
|
|
||||||
pti= [X86_64]
|
pti= [X86_64] Control Page Table Isolation of user and
|
||||||
Control user/kernel address space isolation:
|
kernel address spaces. Disabling this feature
|
||||||
on - enable
|
removes hardening, but improves performance of
|
||||||
off - disable
|
system calls and interrupts.
|
||||||
auto - default setting
|
|
||||||
|
on - unconditionally enable
|
||||||
|
off - unconditionally disable
|
||||||
|
auto - kernel detects whether your CPU model is
|
||||||
|
vulnerable to issues that PTI mitigates
|
||||||
|
|
||||||
|
Not specifying this option is equivalent to pti=auto.
|
||||||
|
|
||||||
|
nopti [X86_64]
|
||||||
|
Equivalent to pti=off
|
||||||
|
|
||||||
pty.legacy_count=
|
pty.legacy_count=
|
||||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||||
|
@ -3901,6 +3913,29 @@
|
||||||
sonypi.*= [HW] Sony Programmable I/O Control Device driver
|
sonypi.*= [HW] Sony Programmable I/O Control Device driver
|
||||||
See Documentation/laptops/sonypi.txt
|
See Documentation/laptops/sonypi.txt
|
||||||
|
|
||||||
|
spectre_v2= [X86] Control mitigation of Spectre variant 2
|
||||||
|
(indirect branch speculation) vulnerability.
|
||||||
|
|
||||||
|
on - unconditionally enable
|
||||||
|
off - unconditionally disable
|
||||||
|
auto - kernel detects whether your CPU model is
|
||||||
|
vulnerable
|
||||||
|
|
||||||
|
Selecting 'on' will, and 'auto' may, choose a
|
||||||
|
mitigation method at run time according to the
|
||||||
|
CPU, the available microcode, the setting of the
|
||||||
|
CONFIG_RETPOLINE configuration option, and the
|
||||||
|
compiler with which the kernel was built.
|
||||||
|
|
||||||
|
Specific mitigations can also be selected manually:
|
||||||
|
|
||||||
|
retpoline - replace indirect branches
|
||||||
|
retpoline,generic - google's original retpoline
|
||||||
|
retpoline,amd - AMD-specific minimal thunk
|
||||||
|
|
||||||
|
Not specifying this option is equivalent to
|
||||||
|
spectre_v2=auto.
|
||||||
|
|
||||||
spia_io_base= [HW,MTD]
|
spia_io_base= [HW,MTD]
|
||||||
spia_fio_base=
|
spia_fio_base=
|
||||||
spia_pedr=
|
spia_pedr=
|
||||||
|
|
186
Documentation/x86/pti.txt
Normal file
186
Documentation/x86/pti.txt
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
Overview
|
||||||
|
========
|
||||||
|
|
||||||
|
Page Table Isolation (pti, previously known as KAISER[1]) is a
|
||||||
|
countermeasure against attacks on the shared user/kernel address
|
||||||
|
space such as the "Meltdown" approach[2].
|
||||||
|
|
||||||
|
To mitigate this class of attacks, we create an independent set of
|
||||||
|
page tables for use only when running userspace applications. When
|
||||||
|
the kernel is entered via syscalls, interrupts or exceptions, the
|
||||||
|
page tables are switched to the full "kernel" copy. When the system
|
||||||
|
switches back to user mode, the user copy is used again.
|
||||||
|
|
||||||
|
The userspace page tables contain only a minimal amount of kernel
|
||||||
|
data: only what is needed to enter/exit the kernel such as the
|
||||||
|
entry/exit functions themselves and the interrupt descriptor table
|
||||||
|
(IDT). There are a few strictly unnecessary things that get mapped
|
||||||
|
such as the first C function when entering an interrupt (see
|
||||||
|
comments in pti.c).
|
||||||
|
|
||||||
|
This approach helps to ensure that side-channel attacks leveraging
|
||||||
|
the paging structures do not function when PTI is enabled. It can be
|
||||||
|
enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
|
||||||
|
Once enabled at compile-time, it can be disabled at boot with the
|
||||||
|
'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
|
||||||
|
|
||||||
|
Page Table Management
|
||||||
|
=====================
|
||||||
|
|
||||||
|
When PTI is enabled, the kernel manages two sets of page tables.
|
||||||
|
The first set is very similar to the single set which is present in
|
||||||
|
kernels without PTI. This includes a complete mapping of userspace
|
||||||
|
that the kernel can use for things like copy_to_user().
|
||||||
|
|
||||||
|
Although _complete_, the user portion of the kernel page tables is
|
||||||
|
crippled by setting the NX bit in the top level. This ensures
|
||||||
|
that any missed kernel->user CR3 switch will immediately crash
|
||||||
|
userspace upon executing its first instruction.
|
||||||
|
|
||||||
|
The userspace page tables map only the kernel data needed to enter
|
||||||
|
and exit the kernel. This data is entirely contained in the 'struct
|
||||||
|
cpu_entry_area' structure which is placed in the fixmap which gives
|
||||||
|
each CPU's copy of the area a compile-time-fixed virtual address.
|
||||||
|
|
||||||
|
For new userspace mappings, the kernel makes the entries in its
|
||||||
|
page tables like normal. The only difference is when the kernel
|
||||||
|
makes entries in the top (PGD) level. In addition to setting the
|
||||||
|
entry in the main kernel PGD, a copy of the entry is made in the
|
||||||
|
userspace page tables' PGD.
|
||||||
|
|
||||||
|
This sharing at the PGD level also inherently shares all the lower
|
||||||
|
layers of the page tables. This leaves a single, shared set of
|
||||||
|
userspace page tables to manage. One PTE to lock, one set of
|
||||||
|
accessed bits, dirty bits, etc...
|
||||||
|
|
||||||
|
Overhead
|
||||||
|
========
|
||||||
|
|
||||||
|
Protection against side-channel attacks is important. But,
|
||||||
|
this protection comes at a cost:
|
||||||
|
|
||||||
|
1. Increased Memory Use
|
||||||
|
a. Each process now needs an order-1 PGD instead of order-0.
|
||||||
|
(Consumes an additional 4k per process).
|
||||||
|
b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
|
||||||
|
aligned so that it can be mapped by setting a single PMD
|
||||||
|
entry. This consumes nearly 2MB of RAM once the kernel
|
||||||
|
is decompressed, but no space in the kernel image itself.
|
||||||
|
|
||||||
|
2. Runtime Cost
|
||||||
|
a. CR3 manipulation to switch between the page table copies
|
||||||
|
must be done at interrupt, syscall, and exception entry
|
||||||
|
and exit (it can be skipped when the kernel is interrupted,
|
||||||
|
though.) Moves to CR3 are on the order of a hundred
|
||||||
|
cycles, and are required at every entry and exit.
|
||||||
|
b. A "trampoline" must be used for SYSCALL entry. This
|
||||||
|
trampoline depends on a smaller set of resources than the
|
||||||
|
non-PTI SYSCALL entry code, so requires mapping fewer
|
||||||
|
things into the userspace page tables. The downside is
|
||||||
|
that stacks must be switched at entry time.
|
||||||
|
c. Global pages are disabled for all kernel structures not
|
||||||
|
mapped into both kernel and userspace page tables. This
|
||||||
|
feature of the MMU allows different processes to share TLB
|
||||||
|
entries mapping the kernel. Losing the feature means more
|
||||||
|
TLB misses after a context switch. The actual loss of
|
||||||
|
performance is very small, however, never exceeding 1%.
|
||||||
|
d. Process Context IDentifiers (PCID) is a CPU feature that
|
||||||
|
allows us to skip flushing the entire TLB when switching page
|
||||||
|
tables by setting a special bit in CR3 when the page tables
|
||||||
|
are changed. This makes switching the page tables (at context
|
||||||
|
switch, or kernel entry/exit) cheaper. But, on systems with
|
||||||
|
PCID support, the context switch code must flush both the user
|
||||||
|
and kernel entries out of the TLB. The user PCID TLB flush is
|
||||||
|
deferred until the exit to userspace, minimizing the cost.
|
||||||
|
See intel.com/sdm for the gory PCID/INVPCID details.
|
||||||
|
e. The userspace page tables must be populated for each new
|
||||||
|
process. Even without PTI, the shared kernel mappings
|
||||||
|
are created by copying top-level (PGD) entries into each
|
||||||
|
new process. But, with PTI, there are now *two* kernel
|
||||||
|
mappings: one in the kernel page tables that maps everything
|
||||||
|
and one for the entry/exit structures. At fork(), we need to
|
||||||
|
copy both.
|
||||||
|
f. In addition to the fork()-time copying, there must also
|
||||||
|
be an update to the userspace PGD any time a set_pgd() is done
|
||||||
|
on a PGD used to map userspace. This ensures that the kernel
|
||||||
|
and userspace copies always map the same userspace
|
||||||
|
memory.
|
||||||
|
g. On systems without PCID support, each CR3 write flushes
|
||||||
|
the entire TLB. That means that each syscall, interrupt
|
||||||
|
or exception flushes the TLB.
|
||||||
|
h. INVPCID is a TLB-flushing instruction which allows flushing
|
||||||
|
of TLB entries for non-current PCIDs. Some systems support
|
||||||
|
PCIDs, but do not support INVPCID. On these systems, addresses
|
||||||
|
can only be flushed from the TLB for the current PCID. When
|
||||||
|
flushing a kernel address, we need to flush all PCIDs, so a
|
||||||
|
single kernel address flush will require a TLB-flushing CR3
|
||||||
|
write upon the next use of every PCID.
|
||||||
|
|
||||||
|
Possible Future Work
|
||||||
|
====================
|
||||||
|
1. We can be more careful about not actually writing to CR3
|
||||||
|
unless its value is actually changed.
|
||||||
|
2. Allow PTI to be enabled/disabled at runtime in addition to the
|
||||||
|
boot-time switching.
|
||||||
|
|
||||||
|
Testing
|
||||||
|
========
|
||||||
|
|
||||||
|
To test stability of PTI, the following test procedure is recommended,
|
||||||
|
ideally doing all of these in parallel:
|
||||||
|
|
||||||
|
1. Set CONFIG_DEBUG_ENTRY=y
|
||||||
|
2. Run several copies of all of the tools/testing/selftests/x86/ tests
|
||||||
|
(excluding MPX and protection_keys) in a loop on multiple CPUs for
|
||||||
|
several minutes. These tests frequently uncover corner cases in the
|
||||||
|
kernel entry code. In general, old kernels might cause these tests
|
||||||
|
themselves to crash, but they should never crash the kernel.
|
||||||
|
3. Run the 'perf' tool in a mode (top or record) that generates many
|
||||||
|
frequent performance monitoring non-maskable interrupts (see "NMI"
|
||||||
|
in /proc/interrupts). This exercises the NMI entry/exit code which
|
||||||
|
is known to trigger bugs in code paths that did not expect to be
|
||||||
|
interrupted, including nested NMIs. Using "-c" boosts the rate of
|
||||||
|
NMIs, and using two -c with separate counters encourages nested NMIs
|
||||||
|
and less deterministic behavior.
|
||||||
|
|
||||||
|
while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
|
||||||
|
|
||||||
|
4. Launch a KVM virtual machine.
|
||||||
|
5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
|
||||||
|
This has been a lightly-tested code path and needs extra scrutiny.
|
||||||
|
|
||||||
|
Debugging
|
||||||
|
=========
|
||||||
|
|
||||||
|
Bugs in PTI cause a few different signatures of crashes
|
||||||
|
that are worth noting here.
|
||||||
|
|
||||||
|
* Failures of the selftests/x86 code. Usually a bug in one of the
|
||||||
|
more obscure corners of entry_64.S
|
||||||
|
* Crashes in early boot, especially around CPU bringup. Bugs
|
||||||
|
in the trampoline code or mappings cause these.
|
||||||
|
* Crashes at the first interrupt. Caused by bugs in entry_64.S,
|
||||||
|
like screwing up a page table switch. Also caused by
|
||||||
|
incorrectly mapping the IRQ handler entry code.
|
||||||
|
* Crashes at the first NMI. The NMI code is separate from main
|
||||||
|
interrupt handlers and can have bugs that do not affect
|
||||||
|
normal interrupts. Also caused by incorrectly mapping NMI
|
||||||
|
code. NMIs that interrupt the entry code must be very
|
||||||
|
careful and can be the cause of crashes that show up when
|
||||||
|
running perf.
|
||||||
|
* Kernel crashes at the first exit to userspace. entry_64.S
|
||||||
|
bugs, or failing to map some of the exit code.
|
||||||
|
* Crashes at first interrupt that interrupts userspace. The paths
|
||||||
|
in entry_64.S that return to userspace are sometimes separate
|
||||||
|
from the ones that return to the kernel.
|
||||||
|
* Double faults: overflowing the kernel stack because of page
|
||||||
|
faults upon page faults. Caused by touching non-pti-mapped
|
||||||
|
data in the entry code, or forgetting to switch to kernel
|
||||||
|
CR3 before calling into C functions which are not pti-mapped.
|
||||||
|
* Userspace segfaults early in boot, sometimes manifesting
|
||||||
|
as mount(8) failing to mount the rootfs. These have
|
||||||
|
tended to be TLB invalidation issues. Usually invalidating
|
||||||
|
the wrong PCID, or otherwise missing an invalidation.
|
||||||
|
|
||||||
|
1. https://gruss.cc/files/kaiser.pdf
|
||||||
|
2. https://meltdownattack.com/meltdown.pdf
|
2
Makefile
2
Makefile
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
VERSION = 4
|
VERSION = 4
|
||||||
PATCHLEVEL = 14
|
PATCHLEVEL = 14
|
||||||
SUBLEVEL = 13
|
SUBLEVEL = 16
|
||||||
EXTRAVERSION =
|
EXTRAVERSION =
|
||||||
NAME = Petit Gorille
|
NAME = Petit Gorille
|
||||||
|
|
||||||
|
|
|
@ -102,6 +102,15 @@ sio_pci_route(void)
|
||||||
alpha_mv.sys.sio.route_tab);
|
alpha_mv.sys.sio.route_tab);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev)
|
||||||
|
{
|
||||||
|
if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
|
||||||
|
(dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned int __init
|
static unsigned int __init
|
||||||
sio_collect_irq_levels(void)
|
sio_collect_irq_levels(void)
|
||||||
{
|
{
|
||||||
|
@ -110,8 +119,7 @@ sio_collect_irq_levels(void)
|
||||||
|
|
||||||
/* Iterate through the devices, collecting IRQ levels. */
|
/* Iterate through the devices, collecting IRQ levels. */
|
||||||
for_each_pci_dev(dev) {
|
for_each_pci_dev(dev) {
|
||||||
if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
|
if (!sio_pci_dev_irq_needs_level(dev))
|
||||||
(dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (dev->irq)
|
if (dev->irq)
|
||||||
|
@ -120,8 +128,7 @@ sio_collect_irq_levels(void)
|
||||||
return level_bits;
|
return level_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init
|
static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset)
|
||||||
sio_fixup_irq_levels(unsigned int level_bits)
|
|
||||||
{
|
{
|
||||||
unsigned int old_level_bits;
|
unsigned int old_level_bits;
|
||||||
|
|
||||||
|
@ -139,12 +146,21 @@ sio_fixup_irq_levels(unsigned int level_bits)
|
||||||
*/
|
*/
|
||||||
old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8);
|
old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8);
|
||||||
|
|
||||||
level_bits |= (old_level_bits & 0x71ff);
|
if (reset)
|
||||||
|
old_level_bits &= 0x71ff;
|
||||||
|
|
||||||
|
level_bits |= old_level_bits;
|
||||||
|
|
||||||
outb((level_bits >> 0) & 0xff, 0x4d0);
|
outb((level_bits >> 0) & 0xff, 0x4d0);
|
||||||
outb((level_bits >> 8) & 0xff, 0x4d1);
|
outb((level_bits >> 8) & 0xff, 0x4d1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
sio_fixup_irq_levels(unsigned int level_bits)
|
||||||
|
{
|
||||||
|
__sio_fixup_irq_levels(level_bits, true);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
|
noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
|
||||||
{
|
{
|
||||||
|
@ -181,7 +197,14 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
|
||||||
const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5;
|
const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5;
|
||||||
int irq = COMMON_TABLE_LOOKUP, tmp;
|
int irq = COMMON_TABLE_LOOKUP, tmp;
|
||||||
tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq);
|
tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq);
|
||||||
return irq >= 0 ? tmp : -1;
|
|
||||||
|
irq = irq >= 0 ? tmp : -1;
|
||||||
|
|
||||||
|
/* Fixup IRQ level if an actual IRQ mapping is detected */
|
||||||
|
if (sio_pci_dev_irq_needs_level(dev) && irq >= 0)
|
||||||
|
__sio_fixup_irq_levels(1 << irq, false);
|
||||||
|
|
||||||
|
return irq;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
|
|
|
@ -53,7 +53,8 @@
|
||||||
};
|
};
|
||||||
|
|
||||||
pinctrl: pin-controller@10000 {
|
pinctrl: pin-controller@10000 {
|
||||||
pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
|
pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
|
||||||
|
&pmx_gpio_header_gpo>;
|
||||||
pinctrl-names = "default";
|
pinctrl-names = "default";
|
||||||
|
|
||||||
pmx_uart0: pmx-uart0 {
|
pmx_uart0: pmx-uart0 {
|
||||||
|
@ -85,11 +86,16 @@
|
||||||
* ground.
|
* ground.
|
||||||
*/
|
*/
|
||||||
pmx_gpio_header: pmx-gpio-header {
|
pmx_gpio_header: pmx-gpio-header {
|
||||||
marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
|
marvell,pins = "mpp17", "mpp29", "mpp28",
|
||||||
"mpp35", "mpp34", "mpp40";
|
"mpp35", "mpp34", "mpp40";
|
||||||
marvell,function = "gpio";
|
marvell,function = "gpio";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pmx_gpio_header_gpo: pxm-gpio-header-gpo {
|
||||||
|
marvell,pins = "mpp7";
|
||||||
|
marvell,function = "gpo";
|
||||||
|
};
|
||||||
|
|
||||||
pmx_gpio_init: pmx-init {
|
pmx_gpio_init: pmx-init {
|
||||||
marvell,pins = "mpp38";
|
marvell,pins = "mpp38";
|
||||||
marvell,function = "gpio";
|
marvell,function = "gpio";
|
||||||
|
|
|
@ -10,6 +10,7 @@ CONFIG_SMP=y
|
||||||
CONFIG_NR_CPUS=8
|
CONFIG_NR_CPUS=8
|
||||||
CONFIG_AEABI=y
|
CONFIG_AEABI=y
|
||||||
CONFIG_HIGHMEM=y
|
CONFIG_HIGHMEM=y
|
||||||
|
CONFIG_CMA=y
|
||||||
CONFIG_ARM_APPENDED_DTB=y
|
CONFIG_ARM_APPENDED_DTB=y
|
||||||
CONFIG_ARM_ATAG_DTB_COMPAT=y
|
CONFIG_ARM_ATAG_DTB_COMPAT=y
|
||||||
CONFIG_CPU_FREQ=y
|
CONFIG_CPU_FREQ=y
|
||||||
|
@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y
|
||||||
# CONFIG_WIRELESS is not set
|
# CONFIG_WIRELESS is not set
|
||||||
CONFIG_DEVTMPFS=y
|
CONFIG_DEVTMPFS=y
|
||||||
CONFIG_DEVTMPFS_MOUNT=y
|
CONFIG_DEVTMPFS_MOUNT=y
|
||||||
|
CONFIG_DMA_CMA=y
|
||||||
CONFIG_BLK_DEV_SD=y
|
CONFIG_BLK_DEV_SD=y
|
||||||
CONFIG_ATA=y
|
CONFIG_ATA=y
|
||||||
CONFIG_AHCI_SUNXI=y
|
CONFIG_AHCI_SUNXI=y
|
||||||
|
|
|
@ -1656,6 +1656,7 @@ static struct omap_hwmod omap3xxx_mmc3_hwmod = {
|
||||||
.main_clk = "mmchs3_fck",
|
.main_clk = "mmchs3_fck",
|
||||||
.prcm = {
|
.prcm = {
|
||||||
.omap2 = {
|
.omap2 = {
|
||||||
|
.module_offs = CORE_MOD,
|
||||||
.prcm_reg_id = 1,
|
.prcm_reg_id = 1,
|
||||||
.module_bit = OMAP3430_EN_MMC3_SHIFT,
|
.module_bit = OMAP3430_EN_MMC3_SHIFT,
|
||||||
.idlest_reg_id = 1,
|
.idlest_reg_id = 1,
|
||||||
|
|
|
@ -27,14 +27,58 @@
|
||||||
|
|
||||||
int bpf_jit_enable __read_mostly;
|
int bpf_jit_enable __read_mostly;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* eBPF prog stack layout:
|
||||||
|
*
|
||||||
|
* high
|
||||||
|
* original ARM_SP => +-----+
|
||||||
|
* | | callee saved registers
|
||||||
|
* +-----+ <= (BPF_FP + SCRATCH_SIZE)
|
||||||
|
* | ... | eBPF JIT scratch space
|
||||||
|
* eBPF fp register => +-----+
|
||||||
|
* (BPF_FP) | ... | eBPF prog stack
|
||||||
|
* +-----+
|
||||||
|
* |RSVD | JIT scratchpad
|
||||||
|
* current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
|
||||||
|
* | |
|
||||||
|
* | ... | Function call stack
|
||||||
|
* | |
|
||||||
|
* +-----+
|
||||||
|
* low
|
||||||
|
*
|
||||||
|
* The callee saved registers depends on whether frame pointers are enabled.
|
||||||
|
* With frame pointers (to be compliant with the ABI):
|
||||||
|
*
|
||||||
|
* high
|
||||||
|
* original ARM_SP => +------------------+ \
|
||||||
|
* | pc | |
|
||||||
|
* current ARM_FP => +------------------+ } callee saved registers
|
||||||
|
* |r4-r8,r10,fp,ip,lr| |
|
||||||
|
* +------------------+ /
|
||||||
|
* low
|
||||||
|
*
|
||||||
|
* Without frame pointers:
|
||||||
|
*
|
||||||
|
* high
|
||||||
|
* original ARM_SP => +------------------+
|
||||||
|
* | r4-r8,r10,fp,lr | callee saved registers
|
||||||
|
* current ARM_FP => +------------------+
|
||||||
|
* low
|
||||||
|
*
|
||||||
|
* When popping registers off the stack at the end of a BPF function, we
|
||||||
|
* reference them via the current ARM_FP register.
|
||||||
|
*/
|
||||||
|
#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
|
||||||
|
1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
|
||||||
|
1 << ARM_FP)
|
||||||
|
#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
|
||||||
|
#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
|
||||||
|
|
||||||
#define STACK_OFFSET(k) (k)
|
#define STACK_OFFSET(k) (k)
|
||||||
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
|
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
|
||||||
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
|
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
|
||||||
#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
|
#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
|
||||||
|
|
||||||
/* Flags used for JIT optimization */
|
|
||||||
#define SEEN_CALL (1 << 0)
|
|
||||||
|
|
||||||
#define FLAG_IMM_OVERFLOW (1 << 0)
|
#define FLAG_IMM_OVERFLOW (1 << 0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -95,7 +139,6 @@ static const u8 bpf2a32[][2] = {
|
||||||
* idx : index of current last JITed instruction.
|
* idx : index of current last JITed instruction.
|
||||||
* prologue_bytes : bytes used in prologue.
|
* prologue_bytes : bytes used in prologue.
|
||||||
* epilogue_offset : offset of epilogue starting.
|
* epilogue_offset : offset of epilogue starting.
|
||||||
* seen : bit mask used for JIT optimization.
|
|
||||||
* offsets : array of eBPF instruction offsets in
|
* offsets : array of eBPF instruction offsets in
|
||||||
* JITed code.
|
* JITed code.
|
||||||
* target : final JITed code.
|
* target : final JITed code.
|
||||||
|
@ -110,7 +153,6 @@ struct jit_ctx {
|
||||||
unsigned int idx;
|
unsigned int idx;
|
||||||
unsigned int prologue_bytes;
|
unsigned int prologue_bytes;
|
||||||
unsigned int epilogue_offset;
|
unsigned int epilogue_offset;
|
||||||
u32 seen;
|
|
||||||
u32 flags;
|
u32 flags;
|
||||||
u32 *offsets;
|
u32 *offsets;
|
||||||
u32 *target;
|
u32 *target;
|
||||||
|
@ -179,8 +221,13 @@ static void jit_fill_hole(void *area, unsigned int size)
|
||||||
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
|
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stack must be multiples of 16 Bytes */
|
#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
|
||||||
#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
|
/* EABI requires the stack to be aligned to 64-bit boundaries */
|
||||||
|
#define STACK_ALIGNMENT 8
|
||||||
|
#else
|
||||||
|
/* Stack must be aligned to 32-bit boundaries */
|
||||||
|
#define STACK_ALIGNMENT 4
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
|
/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
|
||||||
* BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
|
* BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
|
||||||
|
@ -194,7 +241,7 @@ static void jit_fill_hole(void *area, unsigned int size)
|
||||||
+ SCRATCH_SIZE + \
|
+ SCRATCH_SIZE + \
|
||||||
+ 4 /* extra for skb_copy_bits buffer */)
|
+ 4 /* extra for skb_copy_bits buffer */)
|
||||||
|
|
||||||
#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
|
#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
|
||||||
|
|
||||||
/* Get the offset of eBPF REGISTERs stored on scratch space. */
|
/* Get the offset of eBPF REGISTERs stored on scratch space. */
|
||||||
#define STACK_VAR(off) (STACK_SIZE-off-4)
|
#define STACK_VAR(off) (STACK_SIZE-off-4)
|
||||||
|
@ -285,16 +332,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
|
||||||
emit_mov_i_no8m(rd, val, ctx);
|
emit_mov_i_no8m(rd, val, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
|
static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
|
||||||
{
|
{
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
#if __LINUX_ARM_ARCH__ < 5
|
|
||||||
emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
|
|
||||||
|
|
||||||
if (elf_hwcap & HWCAP_THUMB)
|
if (elf_hwcap & HWCAP_THUMB)
|
||||||
emit(ARM_BX(tgt_reg), ctx);
|
emit(ARM_BX(tgt_reg), ctx);
|
||||||
else
|
else
|
||||||
emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
|
emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
|
||||||
|
{
|
||||||
|
#if __LINUX_ARM_ARCH__ < 5
|
||||||
|
emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
|
||||||
|
emit_bx_r(tgt_reg, ctx);
|
||||||
#else
|
#else
|
||||||
emit(ARM_BLX_R(tgt_reg), ctx);
|
emit(ARM_BLX_R(tgt_reg), ctx);
|
||||||
#endif
|
#endif
|
||||||
|
@ -354,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Call appropriate function */
|
/* Call appropriate function */
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit_mov_i(ARM_IP, op == BPF_DIV ?
|
emit_mov_i(ARM_IP, op == BPF_DIV ?
|
||||||
(u32)jit_udiv32 : (u32)jit_mod32, ctx);
|
(u32)jit_udiv32 : (u32)jit_mod32, ctx);
|
||||||
emit_blx_r(ARM_IP, ctx);
|
emit_blx_r(ARM_IP, ctx);
|
||||||
|
@ -620,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
|
||||||
/* Do LSH operation */
|
/* Do LSH operation */
|
||||||
emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
|
emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
|
||||||
emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
|
emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
|
||||||
/* As we are using ARM_LR */
|
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
|
emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
|
||||||
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
|
||||||
emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
|
emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
|
||||||
|
@ -656,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
|
||||||
/* Do the ARSH operation */
|
/* Do the ARSH operation */
|
||||||
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
|
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
|
||||||
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
||||||
/* As we are using ARM_LR */
|
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
|
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
|
||||||
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
|
||||||
_emit(ARM_COND_MI, ARM_B(0), ctx);
|
_emit(ARM_COND_MI, ARM_B(0), ctx);
|
||||||
|
@ -692,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
|
||||||
/* Do LSH operation */
|
/* Do LSH operation */
|
||||||
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
|
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
|
||||||
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
||||||
/* As we are using ARM_LR */
|
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
|
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
|
||||||
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
|
||||||
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
|
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
|
||||||
|
@ -828,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
|
||||||
/* Do Multiplication */
|
/* Do Multiplication */
|
||||||
emit(ARM_MUL(ARM_IP, rd, rn), ctx);
|
emit(ARM_MUL(ARM_IP, rd, rn), ctx);
|
||||||
emit(ARM_MUL(ARM_LR, rm, rt), ctx);
|
emit(ARM_MUL(ARM_LR, rm, rt), ctx);
|
||||||
/* As we are using ARM_LR */
|
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
|
emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
|
||||||
|
|
||||||
emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
|
emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
|
||||||
|
@ -872,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* dst = *(size*)(src + off) */
|
/* dst = *(size*)(src + off) */
|
||||||
static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
|
static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
|
||||||
const s32 off, struct jit_ctx *ctx, const u8 sz){
|
s32 off, struct jit_ctx *ctx, const u8 sz){
|
||||||
const u8 *tmp = bpf2a32[TMP_REG_1];
|
const u8 *tmp = bpf2a32[TMP_REG_1];
|
||||||
u8 rd = dstk ? tmp[1] : dst;
|
const u8 *rd = dstk ? tmp : dst;
|
||||||
u8 rm = src;
|
u8 rm = src;
|
||||||
|
s32 off_max;
|
||||||
|
|
||||||
if (off) {
|
if (sz == BPF_H)
|
||||||
|
off_max = 0xff;
|
||||||
|
else
|
||||||
|
off_max = 0xfff;
|
||||||
|
|
||||||
|
if (off < 0 || off > off_max) {
|
||||||
emit_a32_mov_i(tmp[0], off, false, ctx);
|
emit_a32_mov_i(tmp[0], off, false, ctx);
|
||||||
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
|
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
|
||||||
rm = tmp[0];
|
rm = tmp[0];
|
||||||
|
off = 0;
|
||||||
|
} else if (rd[1] == rm) {
|
||||||
|
emit(ARM_MOV_R(tmp[0], rm), ctx);
|
||||||
|
rm = tmp[0];
|
||||||
}
|
}
|
||||||
switch (sz) {
|
switch (sz) {
|
||||||
case BPF_W:
|
case BPF_B:
|
||||||
/* Load a Word */
|
/* Load a Byte */
|
||||||
emit(ARM_LDR_I(rd, rm, 0), ctx);
|
emit(ARM_LDRB_I(rd[1], rm, off), ctx);
|
||||||
|
emit_a32_mov_i(dst[0], 0, dstk, ctx);
|
||||||
break;
|
break;
|
||||||
case BPF_H:
|
case BPF_H:
|
||||||
/* Load a HalfWord */
|
/* Load a HalfWord */
|
||||||
emit(ARM_LDRH_I(rd, rm, 0), ctx);
|
emit(ARM_LDRH_I(rd[1], rm, off), ctx);
|
||||||
|
emit_a32_mov_i(dst[0], 0, dstk, ctx);
|
||||||
break;
|
break;
|
||||||
case BPF_B:
|
case BPF_W:
|
||||||
/* Load a Byte */
|
/* Load a Word */
|
||||||
emit(ARM_LDRB_I(rd, rm, 0), ctx);
|
emit(ARM_LDR_I(rd[1], rm, off), ctx);
|
||||||
|
emit_a32_mov_i(dst[0], 0, dstk, ctx);
|
||||||
|
break;
|
||||||
|
case BPF_DW:
|
||||||
|
/* Load a Double Word */
|
||||||
|
emit(ARM_LDR_I(rd[1], rm, off), ctx);
|
||||||
|
emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (dstk)
|
if (dstk)
|
||||||
emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
|
emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
|
||||||
|
if (dstk && sz == BPF_DW)
|
||||||
|
emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Arithmatic Operation */
|
/* Arithmatic Operation */
|
||||||
|
@ -906,7 +967,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
|
||||||
const u8 rn, struct jit_ctx *ctx, u8 op) {
|
const u8 rn, struct jit_ctx *ctx, u8 op) {
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case BPF_JSET:
|
case BPF_JSET:
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
|
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
|
||||||
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
|
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
|
||||||
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
|
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
|
||||||
|
@ -945,7 +1005,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
||||||
const u8 *tcc = bpf2a32[TCALL_CNT];
|
const u8 *tcc = bpf2a32[TCALL_CNT];
|
||||||
const int idx0 = ctx->idx;
|
const int idx0 = ctx->idx;
|
||||||
#define cur_offset (ctx->idx - idx0)
|
#define cur_offset (ctx->idx - idx0)
|
||||||
#define jmp_offset (out_offset - (cur_offset))
|
#define jmp_offset (out_offset - (cur_offset) - 2)
|
||||||
u32 off, lo, hi;
|
u32 off, lo, hi;
|
||||||
|
|
||||||
/* if (index >= array->map.max_entries)
|
/* if (index >= array->map.max_entries)
|
||||||
|
@ -956,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
||||||
emit_a32_mov_i(tmp[1], off, false, ctx);
|
emit_a32_mov_i(tmp[1], off, false, ctx);
|
||||||
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
|
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
|
||||||
emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
|
emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
|
||||||
/* index (64 bit) */
|
/* index is 32-bit for arrays */
|
||||||
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
|
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
|
||||||
/* index >= array->map.max_entries */
|
/* index >= array->map.max_entries */
|
||||||
emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
|
emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
|
||||||
|
@ -997,7 +1057,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
||||||
emit_a32_mov_i(tmp2[1], off, false, ctx);
|
emit_a32_mov_i(tmp2[1], off, false, ctx);
|
||||||
emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
|
emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
|
||||||
emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
|
emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
|
||||||
emit(ARM_BX(tmp[1]), ctx);
|
emit_bx_r(tmp[1], ctx);
|
||||||
|
|
||||||
/* out: */
|
/* out: */
|
||||||
if (out_offset == -1)
|
if (out_offset == -1)
|
||||||
|
@ -1070,54 +1130,22 @@ static void build_prologue(struct jit_ctx *ctx)
|
||||||
const u8 r2 = bpf2a32[BPF_REG_1][1];
|
const u8 r2 = bpf2a32[BPF_REG_1][1];
|
||||||
const u8 r3 = bpf2a32[BPF_REG_1][0];
|
const u8 r3 = bpf2a32[BPF_REG_1][0];
|
||||||
const u8 r4 = bpf2a32[BPF_REG_6][1];
|
const u8 r4 = bpf2a32[BPF_REG_6][1];
|
||||||
const u8 r5 = bpf2a32[BPF_REG_6][0];
|
|
||||||
const u8 r6 = bpf2a32[TMP_REG_1][1];
|
|
||||||
const u8 r7 = bpf2a32[TMP_REG_1][0];
|
|
||||||
const u8 r8 = bpf2a32[TMP_REG_2][1];
|
|
||||||
const u8 r10 = bpf2a32[TMP_REG_2][0];
|
|
||||||
const u8 fplo = bpf2a32[BPF_REG_FP][1];
|
const u8 fplo = bpf2a32[BPF_REG_FP][1];
|
||||||
const u8 fphi = bpf2a32[BPF_REG_FP][0];
|
const u8 fphi = bpf2a32[BPF_REG_FP][0];
|
||||||
const u8 sp = ARM_SP;
|
|
||||||
const u8 *tcc = bpf2a32[TCALL_CNT];
|
const u8 *tcc = bpf2a32[TCALL_CNT];
|
||||||
|
|
||||||
u16 reg_set = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* eBPF prog stack layout
|
|
||||||
*
|
|
||||||
* high
|
|
||||||
* original ARM_SP => +-----+ eBPF prologue
|
|
||||||
* |FP/LR|
|
|
||||||
* current ARM_FP => +-----+
|
|
||||||
* | ... | callee saved registers
|
|
||||||
* eBPF fp register => +-----+ <= (BPF_FP)
|
|
||||||
* | ... | eBPF JIT scratch space
|
|
||||||
* | | eBPF prog stack
|
|
||||||
* +-----+
|
|
||||||
* |RSVD | JIT scratchpad
|
|
||||||
* current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
|
|
||||||
* | |
|
|
||||||
* | ... | Function call stack
|
|
||||||
* | |
|
|
||||||
* +-----+
|
|
||||||
* low
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Save callee saved registers. */
|
/* Save callee saved registers. */
|
||||||
reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
|
|
||||||
#ifdef CONFIG_FRAME_POINTER
|
#ifdef CONFIG_FRAME_POINTER
|
||||||
reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
|
u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
|
||||||
emit(ARM_MOV_R(ARM_IP, sp), ctx);
|
emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
|
||||||
emit(ARM_PUSH(reg_set), ctx);
|
emit(ARM_PUSH(reg_set), ctx);
|
||||||
emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
|
emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
|
||||||
#else
|
#else
|
||||||
/* Check if call instruction exists in BPF body */
|
emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
|
||||||
if (ctx->seen & SEEN_CALL)
|
emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
|
||||||
reg_set |= (1<<ARM_LR);
|
|
||||||
emit(ARM_PUSH(reg_set), ctx);
|
|
||||||
#endif
|
#endif
|
||||||
/* Save frame pointer for later */
|
/* Save frame pointer for later */
|
||||||
emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
|
emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
|
||||||
|
|
||||||
ctx->stack_size = imm8m(STACK_SIZE);
|
ctx->stack_size = imm8m(STACK_SIZE);
|
||||||
|
|
||||||
|
@ -1140,33 +1168,19 @@ static void build_prologue(struct jit_ctx *ctx)
|
||||||
/* end of prologue */
|
/* end of prologue */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* restore callee saved registers. */
|
||||||
static void build_epilogue(struct jit_ctx *ctx)
|
static void build_epilogue(struct jit_ctx *ctx)
|
||||||
{
|
{
|
||||||
const u8 r4 = bpf2a32[BPF_REG_6][1];
|
|
||||||
const u8 r5 = bpf2a32[BPF_REG_6][0];
|
|
||||||
const u8 r6 = bpf2a32[TMP_REG_1][1];
|
|
||||||
const u8 r7 = bpf2a32[TMP_REG_1][0];
|
|
||||||
const u8 r8 = bpf2a32[TMP_REG_2][1];
|
|
||||||
const u8 r10 = bpf2a32[TMP_REG_2][0];
|
|
||||||
u16 reg_set = 0;
|
|
||||||
|
|
||||||
/* unwind function call stack */
|
|
||||||
emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
|
|
||||||
|
|
||||||
/* restore callee saved registers. */
|
|
||||||
reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
|
|
||||||
#ifdef CONFIG_FRAME_POINTER
|
#ifdef CONFIG_FRAME_POINTER
|
||||||
/* the first instruction of the prologue was: mov ip, sp */
|
/* When using frame pointers, some additional registers need to
|
||||||
reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
|
* be loaded. */
|
||||||
|
u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
|
||||||
|
emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
|
||||||
emit(ARM_LDM(ARM_SP, reg_set), ctx);
|
emit(ARM_LDM(ARM_SP, reg_set), ctx);
|
||||||
#else
|
#else
|
||||||
if (ctx->seen & SEEN_CALL)
|
|
||||||
reg_set |= (1<<ARM_PC);
|
|
||||||
/* Restore callee saved registers. */
|
/* Restore callee saved registers. */
|
||||||
emit(ARM_POP(reg_set), ctx);
|
emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
|
||||||
/* Return back to the callee function */
|
emit(ARM_POP(CALLEE_POP_MASK), ctx);
|
||||||
if (!(ctx->seen & SEEN_CALL))
|
|
||||||
emit(ARM_BX(ARM_LR), ctx);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1394,8 +1408,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||||
emit_rev32(rt, rt, ctx);
|
emit_rev32(rt, rt, ctx);
|
||||||
goto emit_bswap_uxt;
|
goto emit_bswap_uxt;
|
||||||
case 64:
|
case 64:
|
||||||
/* Because of the usage of ARM_LR */
|
|
||||||
ctx->seen |= SEEN_CALL;
|
|
||||||
emit_rev32(ARM_LR, rt, ctx);
|
emit_rev32(ARM_LR, rt, ctx);
|
||||||
emit_rev32(rt, rd, ctx);
|
emit_rev32(rt, rd, ctx);
|
||||||
emit(ARM_MOV_R(rd, ARM_LR), ctx);
|
emit(ARM_MOV_R(rd, ARM_LR), ctx);
|
||||||
|
@ -1448,22 +1460,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||||
rn = sstk ? tmp2[1] : src_lo;
|
rn = sstk ? tmp2[1] : src_lo;
|
||||||
if (sstk)
|
if (sstk)
|
||||||
emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
|
emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
|
||||||
switch (BPF_SIZE(code)) {
|
emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
|
||||||
case BPF_W:
|
|
||||||
/* Load a Word */
|
|
||||||
case BPF_H:
|
|
||||||
/* Load a Half-Word */
|
|
||||||
case BPF_B:
|
|
||||||
/* Load a Byte */
|
|
||||||
emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
|
|
||||||
emit_a32_mov_i(dst_hi, 0, dstk, ctx);
|
|
||||||
break;
|
|
||||||
case BPF_DW:
|
|
||||||
/* Load a double word */
|
|
||||||
emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
|
|
||||||
emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
|
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
|
||||||
case BPF_LD | BPF_ABS | BPF_W:
|
case BPF_LD | BPF_ABS | BPF_W:
|
||||||
|
|
|
@ -63,8 +63,10 @@
|
||||||
cpm_ethernet: ethernet@0 {
|
cpm_ethernet: ethernet@0 {
|
||||||
compatible = "marvell,armada-7k-pp22";
|
compatible = "marvell,armada-7k-pp22";
|
||||||
reg = <0x0 0x100000>, <0x129000 0xb000>;
|
reg = <0x0 0x100000>, <0x129000 0xb000>;
|
||||||
clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>;
|
clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>,
|
||||||
clock-names = "pp_clk", "gop_clk", "mg_clk";
|
<&cpm_clk 1 5>, <&cpm_clk 1 18>;
|
||||||
|
clock-names = "pp_clk", "gop_clk",
|
||||||
|
"mg_clk","axi_clk";
|
||||||
marvell,system-controller = <&cpm_syscon0>;
|
marvell,system-controller = <&cpm_syscon0>;
|
||||||
status = "disabled";
|
status = "disabled";
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
|
@ -114,7 +116,8 @@
|
||||||
#size-cells = <0>;
|
#size-cells = <0>;
|
||||||
compatible = "marvell,orion-mdio";
|
compatible = "marvell,orion-mdio";
|
||||||
reg = <0x12a200 0x10>;
|
reg = <0x12a200 0x10>;
|
||||||
clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>;
|
clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>,
|
||||||
|
<&cpm_clk 1 6>, <&cpm_clk 1 18>;
|
||||||
status = "disabled";
|
status = "disabled";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -295,8 +298,8 @@
|
||||||
compatible = "marvell,armada-cp110-sdhci";
|
compatible = "marvell,armada-cp110-sdhci";
|
||||||
reg = <0x780000 0x300>;
|
reg = <0x780000 0x300>;
|
||||||
interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>;
|
interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>;
|
||||||
clock-names = "core";
|
clock-names = "core","axi";
|
||||||
clocks = <&cpm_clk 1 4>;
|
clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>;
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
status = "disabled";
|
status = "disabled";
|
||||||
};
|
};
|
||||||
|
|
|
@ -63,8 +63,10 @@
|
||||||
cps_ethernet: ethernet@0 {
|
cps_ethernet: ethernet@0 {
|
||||||
compatible = "marvell,armada-7k-pp22";
|
compatible = "marvell,armada-7k-pp22";
|
||||||
reg = <0x0 0x100000>, <0x129000 0xb000>;
|
reg = <0x0 0x100000>, <0x129000 0xb000>;
|
||||||
clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>;
|
clocks = <&cps_clk 1 3>, <&cps_clk 1 9>,
|
||||||
clock-names = "pp_clk", "gop_clk", "mg_clk";
|
<&cps_clk 1 5>, <&cps_clk 1 18>;
|
||||||
|
clock-names = "pp_clk", "gop_clk",
|
||||||
|
"mg_clk", "axi_clk";
|
||||||
marvell,system-controller = <&cps_syscon0>;
|
marvell,system-controller = <&cps_syscon0>;
|
||||||
status = "disabled";
|
status = "disabled";
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
|
@ -114,7 +116,8 @@
|
||||||
#size-cells = <0>;
|
#size-cells = <0>;
|
||||||
compatible = "marvell,orion-mdio";
|
compatible = "marvell,orion-mdio";
|
||||||
reg = <0x12a200 0x10>;
|
reg = <0x12a200 0x10>;
|
||||||
clocks = <&cps_clk 1 9>, <&cps_clk 1 5>;
|
clocks = <&cps_clk 1 9>, <&cps_clk 1 5>,
|
||||||
|
<&cps_clk 1 6>, <&cps_clk 1 18>;
|
||||||
status = "disabled";
|
status = "disabled";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||||
|
|
||||||
ret = kvm_psci_call(vcpu);
|
ret = kvm_psci_call(vcpu);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
kvm_inject_undefined(vcpu);
|
vcpu_set_reg(vcpu, 0, ~0UL);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||||
|
|
||||||
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||||
{
|
{
|
||||||
kvm_inject_undefined(vcpu);
|
vcpu_set_reg(vcpu, 0, ~0UL);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -148,7 +148,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
|
||||||
/* Stack must be multiples of 16B */
|
/* Stack must be multiples of 16B */
|
||||||
#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
|
#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
|
||||||
|
|
||||||
#define PROLOGUE_OFFSET 8
|
/* Tail call offset to jump into */
|
||||||
|
#define PROLOGUE_OFFSET 7
|
||||||
|
|
||||||
static int build_prologue(struct jit_ctx *ctx)
|
static int build_prologue(struct jit_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
@ -200,19 +201,19 @@ static int build_prologue(struct jit_ctx *ctx)
|
||||||
/* Initialize tail_call_cnt */
|
/* Initialize tail_call_cnt */
|
||||||
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
|
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
|
||||||
|
|
||||||
/* 4 byte extra for skb_copy_bits buffer */
|
|
||||||
ctx->stack_size = prog->aux->stack_depth + 4;
|
|
||||||
ctx->stack_size = STACK_ALIGN(ctx->stack_size);
|
|
||||||
|
|
||||||
/* Set up function call stack */
|
|
||||||
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
|
|
||||||
|
|
||||||
cur_offset = ctx->idx - idx0;
|
cur_offset = ctx->idx - idx0;
|
||||||
if (cur_offset != PROLOGUE_OFFSET) {
|
if (cur_offset != PROLOGUE_OFFSET) {
|
||||||
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
|
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
|
||||||
cur_offset, PROLOGUE_OFFSET);
|
cur_offset, PROLOGUE_OFFSET);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* 4 byte extra for skb_copy_bits buffer */
|
||||||
|
ctx->stack_size = prog->aux->stack_depth + 4;
|
||||||
|
ctx->stack_size = STACK_ALIGN(ctx->stack_size);
|
||||||
|
|
||||||
|
/* Set up function call stack */
|
||||||
|
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,11 +261,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
||||||
emit(A64_LDR64(prg, tmp, prg), ctx);
|
emit(A64_LDR64(prg, tmp, prg), ctx);
|
||||||
emit(A64_CBZ(1, prg, jmp_offset), ctx);
|
emit(A64_CBZ(1, prg, jmp_offset), ctx);
|
||||||
|
|
||||||
/* goto *(prog->bpf_func + prologue_size); */
|
/* goto *(prog->bpf_func + prologue_offset); */
|
||||||
off = offsetof(struct bpf_prog, bpf_func);
|
off = offsetof(struct bpf_prog, bpf_func);
|
||||||
emit_a64_mov_i64(tmp, off, ctx);
|
emit_a64_mov_i64(tmp, off, ctx);
|
||||||
emit(A64_LDR64(tmp, prg, tmp), ctx);
|
emit(A64_LDR64(tmp, prg, tmp), ctx);
|
||||||
emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
|
emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
|
||||||
|
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
|
||||||
emit(A64_BR(tmp), ctx);
|
emit(A64_BR(tmp), ctx);
|
||||||
|
|
||||||
/* out: */
|
/* out: */
|
||||||
|
|
|
@ -575,7 +575,7 @@ static int __init ar7_register_uarts(void)
|
||||||
uart_port.type = PORT_AR7;
|
uart_port.type = PORT_AR7;
|
||||||
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
|
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
|
||||||
uart_port.iotype = UPIO_MEM32;
|
uart_port.iotype = UPIO_MEM32;
|
||||||
uart_port.flags = UPF_FIXED_TYPE;
|
uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
|
||||||
uart_port.regshift = 2;
|
uart_port.regshift = 2;
|
||||||
|
|
||||||
uart_port.line = 0;
|
uart_port.line = 0;
|
||||||
|
|
|
@ -292,7 +292,6 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core,
|
||||||
*this_cpu_ptr(&cm_core_lock_flags));
|
*this_cpu_ptr(&cm_core_lock_flags));
|
||||||
} else {
|
} else {
|
||||||
WARN_ON(cluster != 0);
|
WARN_ON(cluster != 0);
|
||||||
WARN_ON(vp != 0);
|
|
||||||
WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL);
|
WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
|
||||||
struct task_struct *t;
|
struct task_struct *t;
|
||||||
int max_users;
|
int max_users;
|
||||||
|
|
||||||
|
/* If nothing to change, return right away, successfully. */
|
||||||
|
if (value == mips_get_process_fp_mode(task))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Only accept a mode change if 64-bit FP enabled for o32. */
|
||||||
|
if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
/* And only for o32 tasks. */
|
||||||
|
if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
/* Check the value is valid */
|
/* Check the value is valid */
|
||||||
if (value & ~known_bits)
|
if (value & ~known_bits)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
|
@ -410,25 +410,38 @@ static int gpr64_set(struct task_struct *target,
|
||||||
|
|
||||||
#endif /* CONFIG_64BIT */
|
#endif /* CONFIG_64BIT */
|
||||||
|
|
||||||
static int fpr_get(struct task_struct *target,
|
/*
|
||||||
const struct user_regset *regset,
|
* Copy the floating-point context to the supplied NT_PRFPREG buffer,
|
||||||
unsigned int pos, unsigned int count,
|
* !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
|
||||||
void *kbuf, void __user *ubuf)
|
* correspond 1:1 to buffer slots. Only general registers are copied.
|
||||||
|
*/
|
||||||
|
static int fpr_get_fpa(struct task_struct *target,
|
||||||
|
unsigned int *pos, unsigned int *count,
|
||||||
|
void **kbuf, void __user **ubuf)
|
||||||
{
|
{
|
||||||
unsigned i;
|
return user_regset_copyout(pos, count, kbuf, ubuf,
|
||||||
int err;
|
&target->thread.fpu,
|
||||||
|
0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy the floating-point context to the supplied NT_PRFPREG buffer,
|
||||||
|
* CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
|
||||||
|
* general register slots are copied to buffer slots. Only general
|
||||||
|
* registers are copied.
|
||||||
|
*/
|
||||||
|
static int fpr_get_msa(struct task_struct *target,
|
||||||
|
unsigned int *pos, unsigned int *count,
|
||||||
|
void **kbuf, void __user **ubuf)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
u64 fpr_val;
|
u64 fpr_val;
|
||||||
|
int err;
|
||||||
|
|
||||||
/* XXX fcr31 */
|
BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
|
||||||
|
|
||||||
if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
|
|
||||||
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
|
|
||||||
&target->thread.fpu,
|
|
||||||
0, sizeof(elf_fpregset_t));
|
|
||||||
|
|
||||||
for (i = 0; i < NUM_FPU_REGS; i++) {
|
for (i = 0; i < NUM_FPU_REGS; i++) {
|
||||||
fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
|
fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
|
||||||
err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
|
err = user_regset_copyout(pos, count, kbuf, ubuf,
|
||||||
&fpr_val, i * sizeof(elf_fpreg_t),
|
&fpr_val, i * sizeof(elf_fpreg_t),
|
||||||
(i + 1) * sizeof(elf_fpreg_t));
|
(i + 1) * sizeof(elf_fpreg_t));
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -438,27 +451,64 @@ static int fpr_get(struct task_struct *target,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int fpr_set(struct task_struct *target,
|
/*
|
||||||
|
* Copy the floating-point context to the supplied NT_PRFPREG buffer.
|
||||||
|
* Choose the appropriate helper for general registers, and then copy
|
||||||
|
* the FCSR register separately.
|
||||||
|
*/
|
||||||
|
static int fpr_get(struct task_struct *target,
|
||||||
const struct user_regset *regset,
|
const struct user_regset *regset,
|
||||||
unsigned int pos, unsigned int count,
|
unsigned int pos, unsigned int count,
|
||||||
const void *kbuf, const void __user *ubuf)
|
void *kbuf, void __user *ubuf)
|
||||||
{
|
{
|
||||||
unsigned i;
|
const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
|
||||||
|
err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
|
||||||
|
else
|
||||||
|
err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
|
||||||
|
&target->thread.fpu.fcr31,
|
||||||
|
fcr31_pos, fcr31_pos + sizeof(u32));
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy the supplied NT_PRFPREG buffer to the floating-point context,
|
||||||
|
* !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
|
||||||
|
* context's general register slots. Only general registers are copied.
|
||||||
|
*/
|
||||||
|
static int fpr_set_fpa(struct task_struct *target,
|
||||||
|
unsigned int *pos, unsigned int *count,
|
||||||
|
const void **kbuf, const void __user **ubuf)
|
||||||
|
{
|
||||||
|
return user_regset_copyin(pos, count, kbuf, ubuf,
|
||||||
|
&target->thread.fpu,
|
||||||
|
0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy the supplied NT_PRFPREG buffer to the floating-point context,
|
||||||
|
* CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
|
||||||
|
* bits only of FP context's general register slots. Only general
|
||||||
|
* registers are copied.
|
||||||
|
*/
|
||||||
|
static int fpr_set_msa(struct task_struct *target,
|
||||||
|
unsigned int *pos, unsigned int *count,
|
||||||
|
const void **kbuf, const void __user **ubuf)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
u64 fpr_val;
|
u64 fpr_val;
|
||||||
|
int err;
|
||||||
/* XXX fcr31 */
|
|
||||||
|
|
||||||
init_fp_ctx(target);
|
|
||||||
|
|
||||||
if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
|
|
||||||
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
|
|
||||||
&target->thread.fpu,
|
|
||||||
0, sizeof(elf_fpregset_t));
|
|
||||||
|
|
||||||
BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
|
BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
|
||||||
for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
|
for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
|
||||||
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
|
err = user_regset_copyin(pos, count, kbuf, ubuf,
|
||||||
&fpr_val, i * sizeof(elf_fpreg_t),
|
&fpr_val, i * sizeof(elf_fpreg_t),
|
||||||
(i + 1) * sizeof(elf_fpreg_t));
|
(i + 1) * sizeof(elf_fpreg_t));
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -469,6 +519,53 @@ static int fpr_set(struct task_struct *target,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy the supplied NT_PRFPREG buffer to the floating-point context.
|
||||||
|
* Choose the appropriate helper for general registers, and then copy
|
||||||
|
* the FCSR register separately.
|
||||||
|
*
|
||||||
|
* We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
|
||||||
|
* which is supposed to have been guaranteed by the kernel before
|
||||||
|
* calling us, e.g. in `ptrace_regset'. We enforce that requirement,
|
||||||
|
* so that we can safely avoid preinitializing temporaries for
|
||||||
|
* partial register writes.
|
||||||
|
*/
|
||||||
|
static int fpr_set(struct task_struct *target,
|
||||||
|
const struct user_regset *regset,
|
||||||
|
unsigned int pos, unsigned int count,
|
||||||
|
const void *kbuf, const void __user *ubuf)
|
||||||
|
{
|
||||||
|
const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
|
||||||
|
u32 fcr31;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
BUG_ON(count % sizeof(elf_fpreg_t));
|
||||||
|
|
||||||
|
if (pos + count > sizeof(elf_fpregset_t))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
init_fp_ctx(target);
|
||||||
|
|
||||||
|
if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
|
||||||
|
err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
|
||||||
|
else
|
||||||
|
err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (count > 0) {
|
||||||
|
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
|
||||||
|
&fcr31,
|
||||||
|
fcr31_pos, fcr31_pos + sizeof(u32));
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
ptrace_setfcr31(target, fcr31);
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
enum mips_regset {
|
enum mips_regset {
|
||||||
REGSET_GPR,
|
REGSET_GPR,
|
||||||
REGSET_FPR,
|
REGSET_FPR,
|
||||||
|
|
|
@ -209,5 +209,11 @@ exc_##label##_book3e:
|
||||||
ori r3,r3,vector_offset@l; \
|
ori r3,r3,vector_offset@l; \
|
||||||
mtspr SPRN_IVOR##vector_number,r3;
|
mtspr SPRN_IVOR##vector_number,r3;
|
||||||
|
|
||||||
|
#define RFI_TO_KERNEL \
|
||||||
|
rfi
|
||||||
|
|
||||||
|
#define RFI_TO_USER \
|
||||||
|
rfi
|
||||||
|
|
||||||
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
|
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
|
||||||
|
|
||||||
|
|
|
@ -69,6 +69,59 @@
|
||||||
*/
|
*/
|
||||||
#define EX_R3 EX_DAR
|
#define EX_R3 EX_DAR
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Macros for annotating the expected destination of (h)rfid
|
||||||
|
*
|
||||||
|
* The nop instructions allow us to insert one or more instructions to flush the
|
||||||
|
* L1-D cache when returning to userspace or a guest.
|
||||||
|
*/
|
||||||
|
#define RFI_FLUSH_SLOT \
|
||||||
|
RFI_FLUSH_FIXUP_SECTION; \
|
||||||
|
nop; \
|
||||||
|
nop; \
|
||||||
|
nop
|
||||||
|
|
||||||
|
#define RFI_TO_KERNEL \
|
||||||
|
rfid
|
||||||
|
|
||||||
|
#define RFI_TO_USER \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
rfid; \
|
||||||
|
b rfi_flush_fallback
|
||||||
|
|
||||||
|
#define RFI_TO_USER_OR_KERNEL \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
rfid; \
|
||||||
|
b rfi_flush_fallback
|
||||||
|
|
||||||
|
#define RFI_TO_GUEST \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
rfid; \
|
||||||
|
b rfi_flush_fallback
|
||||||
|
|
||||||
|
#define HRFI_TO_KERNEL \
|
||||||
|
hrfid
|
||||||
|
|
||||||
|
#define HRFI_TO_USER \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
hrfid; \
|
||||||
|
b hrfi_flush_fallback
|
||||||
|
|
||||||
|
#define HRFI_TO_USER_OR_KERNEL \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
hrfid; \
|
||||||
|
b hrfi_flush_fallback
|
||||||
|
|
||||||
|
#define HRFI_TO_GUEST \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
hrfid; \
|
||||||
|
b hrfi_flush_fallback
|
||||||
|
|
||||||
|
#define HRFI_TO_UNKNOWN \
|
||||||
|
RFI_FLUSH_SLOT; \
|
||||||
|
hrfid; \
|
||||||
|
b hrfi_flush_fallback
|
||||||
|
|
||||||
#ifdef CONFIG_RELOCATABLE
|
#ifdef CONFIG_RELOCATABLE
|
||||||
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
|
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
|
||||||
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
|
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
|
||||||
|
@ -213,7 +266,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||||
mtspr SPRN_##h##SRR0,r12; \
|
mtspr SPRN_##h##SRR0,r12; \
|
||||||
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
|
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
|
||||||
mtspr SPRN_##h##SRR1,r10; \
|
mtspr SPRN_##h##SRR1,r10; \
|
||||||
h##rfid; \
|
h##RFI_TO_KERNEL; \
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
|
#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
|
||||||
__EXCEPTION_PROLOG_PSERIES_1(label, h)
|
__EXCEPTION_PROLOG_PSERIES_1(label, h)
|
||||||
|
@ -227,7 +280,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||||
mtspr SPRN_##h##SRR0,r12; \
|
mtspr SPRN_##h##SRR0,r12; \
|
||||||
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
|
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
|
||||||
mtspr SPRN_##h##SRR1,r10; \
|
mtspr SPRN_##h##SRR1,r10; \
|
||||||
h##rfid; \
|
h##RFI_TO_KERNEL; \
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
|
#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
|
||||||
|
|
|
@ -187,7 +187,20 @@ label##3: \
|
||||||
FTR_ENTRY_OFFSET label##1b-label##3b; \
|
FTR_ENTRY_OFFSET label##1b-label##3b; \
|
||||||
.popsection;
|
.popsection;
|
||||||
|
|
||||||
|
#define RFI_FLUSH_FIXUP_SECTION \
|
||||||
|
951: \
|
||||||
|
.pushsection __rfi_flush_fixup,"a"; \
|
||||||
|
.align 2; \
|
||||||
|
952: \
|
||||||
|
FTR_ENTRY_OFFSET 951b-952b; \
|
||||||
|
.popsection;
|
||||||
|
|
||||||
|
|
||||||
#ifndef __ASSEMBLY__
|
#ifndef __ASSEMBLY__
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
|
||||||
|
|
||||||
void apply_feature_fixups(void);
|
void apply_feature_fixups(void);
|
||||||
void setup_feature_keys(void);
|
void setup_feature_keys(void);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -241,6 +241,7 @@
|
||||||
#define H_GET_HCA_INFO 0x1B8
|
#define H_GET_HCA_INFO 0x1B8
|
||||||
#define H_GET_PERF_COUNT 0x1BC
|
#define H_GET_PERF_COUNT 0x1BC
|
||||||
#define H_MANAGE_TRACE 0x1C0
|
#define H_MANAGE_TRACE 0x1C0
|
||||||
|
#define H_GET_CPU_CHARACTERISTICS 0x1C8
|
||||||
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
|
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
|
||||||
#define H_QUERY_INT_STATE 0x1E4
|
#define H_QUERY_INT_STATE 0x1E4
|
||||||
#define H_POLL_PENDING 0x1D8
|
#define H_POLL_PENDING 0x1D8
|
||||||
|
@ -330,6 +331,17 @@
|
||||||
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
|
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
|
||||||
/* >= 0 values are CPU number */
|
/* >= 0 values are CPU number */
|
||||||
|
|
||||||
|
/* H_GET_CPU_CHARACTERISTICS return values */
|
||||||
|
#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
|
||||||
|
#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
|
||||||
|
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
|
||||||
|
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
|
||||||
|
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
|
||||||
|
|
||||||
|
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
|
||||||
|
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
|
||||||
|
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
|
||||||
|
|
||||||
/* Flag values used in H_REGISTER_PROC_TBL hcall */
|
/* Flag values used in H_REGISTER_PROC_TBL hcall */
|
||||||
#define PROC_TABLE_OP_MASK 0x18
|
#define PROC_TABLE_OP_MASK 0x18
|
||||||
#define PROC_TABLE_DEREG 0x10
|
#define PROC_TABLE_DEREG 0x10
|
||||||
|
@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct h_cpu_char_result {
|
||||||
|
u64 character;
|
||||||
|
u64 behaviour;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif /* _ASM_POWERPC_HVCALL_H */
|
#endif /* _ASM_POWERPC_HVCALL_H */
|
||||||
|
|
|
@ -231,6 +231,16 @@ struct paca_struct {
|
||||||
struct sibling_subcore_state *sibling_subcore_state;
|
struct sibling_subcore_state *sibling_subcore_state;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_PPC_BOOK3S_64
|
||||||
|
/*
|
||||||
|
* rfi fallback flush must be in its own cacheline to prevent
|
||||||
|
* other paca data leaking into the L1d
|
||||||
|
*/
|
||||||
|
u64 exrfi[EX_SIZE] __aligned(0x80);
|
||||||
|
void *rfi_flush_fallback_area;
|
||||||
|
u64 l1d_flush_congruence;
|
||||||
|
u64 l1d_flush_sets;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void copy_mm_to_paca(struct mm_struct *mm);
|
extern void copy_mm_to_paca(struct mm_struct *mm);
|
||||||
|
|
|
@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
|
||||||
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
|
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
|
||||||
|
{
|
||||||
|
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
||||||
|
long rc;
|
||||||
|
|
||||||
|
rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
|
||||||
|
if (rc == H_SUCCESS) {
|
||||||
|
p->character = retbuf[0];
|
||||||
|
p->behaviour = retbuf[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
|
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
|
||||||
|
|
|
@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
|
||||||
static inline void pseries_little_endian_exceptions(void) {}
|
static inline void pseries_little_endian_exceptions(void) {}
|
||||||
#endif /* CONFIG_PPC_PSERIES */
|
#endif /* CONFIG_PPC_PSERIES */
|
||||||
|
|
||||||
|
void rfi_flush_enable(bool enable);
|
||||||
|
|
||||||
|
/* These are bit flags */
|
||||||
|
enum l1d_flush_type {
|
||||||
|
L1D_FLUSH_NONE = 0x1,
|
||||||
|
L1D_FLUSH_FALLBACK = 0x2,
|
||||||
|
L1D_FLUSH_ORI = 0x4,
|
||||||
|
L1D_FLUSH_MTTRIG = 0x8,
|
||||||
|
};
|
||||||
|
|
||||||
|
void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
|
||||||
|
void do_rfi_flush_fixups(enum l1d_flush_type types);
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
#endif /* _ASM_POWERPC_SETUP_H */
|
#endif /* _ASM_POWERPC_SETUP_H */
|
||||||
|
|
|
@ -237,6 +237,11 @@ int main(void)
|
||||||
OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
|
OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
|
||||||
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
|
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
|
||||||
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
|
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
|
||||||
|
OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
|
||||||
|
OFFSET(PACA_EXRFI, paca_struct, exrfi);
|
||||||
|
OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
|
||||||
|
OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
|
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
|
||||||
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
|
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
|
||||||
|
|
|
@ -37,6 +37,11 @@
|
||||||
#include <asm/tm.h>
|
#include <asm/tm.h>
|
||||||
#include <asm/ppc-opcode.h>
|
#include <asm/ppc-opcode.h>
|
||||||
#include <asm/export.h>
|
#include <asm/export.h>
|
||||||
|
#ifdef CONFIG_PPC_BOOK3S
|
||||||
|
#include <asm/exception-64s.h>
|
||||||
|
#else
|
||||||
|
#include <asm/exception-64e.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* System calls.
|
* System calls.
|
||||||
|
@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
|
|
||||||
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
|
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
|
||||||
|
ld r2,GPR2(r1)
|
||||||
|
ld r1,GPR1(r1)
|
||||||
|
mtlr r4
|
||||||
|
mtcr r5
|
||||||
|
mtspr SPRN_SRR0,r7
|
||||||
|
mtspr SPRN_SRR1,r8
|
||||||
|
RFI_TO_USER
|
||||||
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
|
/* exit to kernel */
|
||||||
1: ld r2,GPR2(r1)
|
1: ld r2,GPR2(r1)
|
||||||
ld r1,GPR1(r1)
|
ld r1,GPR1(r1)
|
||||||
mtlr r4
|
mtlr r4
|
||||||
mtcr r5
|
mtcr r5
|
||||||
mtspr SPRN_SRR0,r7
|
mtspr SPRN_SRR0,r7
|
||||||
mtspr SPRN_SRR1,r8
|
mtspr SPRN_SRR1,r8
|
||||||
RFI
|
RFI_TO_KERNEL
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
.Lsyscall_error:
|
.Lsyscall_error:
|
||||||
|
@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
mtmsrd r10, 1
|
mtmsrd r10, 1
|
||||||
mtspr SPRN_SRR0, r11
|
mtspr SPRN_SRR0, r11
|
||||||
mtspr SPRN_SRR1, r12
|
mtspr SPRN_SRR1, r12
|
||||||
|
RFI_TO_USER
|
||||||
rfid
|
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
#endif
|
#endif
|
||||||
_ASM_NOKPROBE_SYMBOL(system_call_common);
|
_ASM_NOKPROBE_SYMBOL(system_call_common);
|
||||||
|
@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
|
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
|
||||||
REST_GPR(13, r1)
|
REST_GPR(13, r1)
|
||||||
1:
|
|
||||||
mtspr SPRN_SRR1,r3
|
mtspr SPRN_SRR1,r3
|
||||||
|
|
||||||
ld r2,_CCR(r1)
|
ld r2,_CCR(r1)
|
||||||
|
@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
ld r3,GPR3(r1)
|
ld r3,GPR3(r1)
|
||||||
ld r4,GPR4(r1)
|
ld r4,GPR4(r1)
|
||||||
ld r1,GPR1(r1)
|
ld r1,GPR1(r1)
|
||||||
|
RFI_TO_USER
|
||||||
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
rfid
|
1: mtspr SPRN_SRR1,r3
|
||||||
|
|
||||||
|
ld r2,_CCR(r1)
|
||||||
|
mtcrf 0xFF,r2
|
||||||
|
ld r2,_NIP(r1)
|
||||||
|
mtspr SPRN_SRR0,r2
|
||||||
|
|
||||||
|
ld r0,GPR0(r1)
|
||||||
|
ld r2,GPR2(r1)
|
||||||
|
ld r3,GPR3(r1)
|
||||||
|
ld r4,GPR4(r1)
|
||||||
|
ld r1,GPR1(r1)
|
||||||
|
RFI_TO_KERNEL
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
#endif /* CONFIG_PPC_BOOK3E */
|
#endif /* CONFIG_PPC_BOOK3E */
|
||||||
|
@ -1073,7 +1101,7 @@ __enter_rtas:
|
||||||
|
|
||||||
mtspr SPRN_SRR0,r5
|
mtspr SPRN_SRR0,r5
|
||||||
mtspr SPRN_SRR1,r6
|
mtspr SPRN_SRR1,r6
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
rtas_return_loc:
|
rtas_return_loc:
|
||||||
|
@ -1098,7 +1126,7 @@ rtas_return_loc:
|
||||||
|
|
||||||
mtspr SPRN_SRR0,r3
|
mtspr SPRN_SRR0,r3
|
||||||
mtspr SPRN_SRR1,r4
|
mtspr SPRN_SRR1,r4
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
_ASM_NOKPROBE_SYMBOL(__enter_rtas)
|
_ASM_NOKPROBE_SYMBOL(__enter_rtas)
|
||||||
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
|
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
|
||||||
|
@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
|
||||||
LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
|
LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
|
||||||
andc r11,r11,r12
|
andc r11,r11,r12
|
||||||
mtsrr1 r11
|
mtsrr1 r11
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
#endif /* CONFIG_PPC_BOOK3E */
|
#endif /* CONFIG_PPC_BOOK3E */
|
||||||
|
|
||||||
1: /* Return from OF */
|
1: /* Return from OF */
|
||||||
|
|
|
@ -254,7 +254,7 @@ BEGIN_FTR_SECTION
|
||||||
LOAD_HANDLER(r12, machine_check_handle_early)
|
LOAD_HANDLER(r12, machine_check_handle_early)
|
||||||
1: mtspr SPRN_SRR0,r12
|
1: mtspr SPRN_SRR0,r12
|
||||||
mtspr SPRN_SRR1,r11
|
mtspr SPRN_SRR1,r11
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
2:
|
2:
|
||||||
/* Stack overflow. Stay on emergency stack and panic.
|
/* Stack overflow. Stay on emergency stack and panic.
|
||||||
|
@ -443,7 +443,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
|
||||||
li r3,MSR_ME
|
li r3,MSR_ME
|
||||||
andc r10,r10,r3 /* Turn off MSR_ME */
|
andc r10,r10,r3 /* Turn off MSR_ME */
|
||||||
mtspr SPRN_SRR1,r10
|
mtspr SPRN_SRR1,r10
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b .
|
b .
|
||||||
2:
|
2:
|
||||||
/*
|
/*
|
||||||
|
@ -461,7 +461,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
|
||||||
*/
|
*/
|
||||||
bl machine_check_queue_event
|
bl machine_check_queue_event
|
||||||
MACHINE_CHECK_HANDLER_WINDUP
|
MACHINE_CHECK_HANDLER_WINDUP
|
||||||
rfid
|
RFI_TO_USER_OR_KERNEL
|
||||||
9:
|
9:
|
||||||
/* Deliver the machine check to host kernel in V mode. */
|
/* Deliver the machine check to host kernel in V mode. */
|
||||||
MACHINE_CHECK_HANDLER_WINDUP
|
MACHINE_CHECK_HANDLER_WINDUP
|
||||||
|
@ -596,6 +596,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
|
||||||
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
|
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
|
||||||
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
|
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
|
||||||
|
|
||||||
|
andi. r9,r11,MSR_PR // Check for exception from userspace
|
||||||
|
cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test MSR_RI before calling slb_allocate_realmode, because the
|
* Test MSR_RI before calling slb_allocate_realmode, because the
|
||||||
* MSR in r11 gets clobbered. However we still want to allocate
|
* MSR in r11 gets clobbered. However we still want to allocate
|
||||||
|
@ -622,9 +625,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||||
|
|
||||||
/* All done -- return from exception. */
|
/* All done -- return from exception. */
|
||||||
|
|
||||||
|
bne cr4,1f /* returning to kernel */
|
||||||
|
|
||||||
.machine push
|
.machine push
|
||||||
.machine "power4"
|
.machine "power4"
|
||||||
mtcrf 0x80,r9
|
mtcrf 0x80,r9
|
||||||
|
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
|
||||||
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
|
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
|
||||||
mtcrf 0x02,r9 /* I/D indication is in cr6 */
|
mtcrf 0x02,r9 /* I/D indication is in cr6 */
|
||||||
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
||||||
|
@ -638,8 +644,29 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||||
ld r11,PACA_EXSLB+EX_R11(r13)
|
ld r11,PACA_EXSLB+EX_R11(r13)
|
||||||
ld r12,PACA_EXSLB+EX_R12(r13)
|
ld r12,PACA_EXSLB+EX_R12(r13)
|
||||||
ld r13,PACA_EXSLB+EX_R13(r13)
|
ld r13,PACA_EXSLB+EX_R13(r13)
|
||||||
rfid
|
RFI_TO_USER
|
||||||
b . /* prevent speculative execution */
|
b . /* prevent speculative execution */
|
||||||
|
1:
|
||||||
|
.machine push
|
||||||
|
.machine "power4"
|
||||||
|
mtcrf 0x80,r9
|
||||||
|
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
|
||||||
|
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
|
||||||
|
mtcrf 0x02,r9 /* I/D indication is in cr6 */
|
||||||
|
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
||||||
|
.machine pop
|
||||||
|
|
||||||
|
RESTORE_CTR(r9, PACA_EXSLB)
|
||||||
|
RESTORE_PPR_PACA(PACA_EXSLB, r9)
|
||||||
|
mr r3,r12
|
||||||
|
ld r9,PACA_EXSLB+EX_R9(r13)
|
||||||
|
ld r10,PACA_EXSLB+EX_R10(r13)
|
||||||
|
ld r11,PACA_EXSLB+EX_R11(r13)
|
||||||
|
ld r12,PACA_EXSLB+EX_R12(r13)
|
||||||
|
ld r13,PACA_EXSLB+EX_R13(r13)
|
||||||
|
RFI_TO_KERNEL
|
||||||
|
b . /* prevent speculative execution */
|
||||||
|
|
||||||
|
|
||||||
2: std r3,PACA_EXSLB+EX_DAR(r13)
|
2: std r3,PACA_EXSLB+EX_DAR(r13)
|
||||||
mr r3,r12
|
mr r3,r12
|
||||||
|
@ -649,7 +676,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||||
mtspr SPRN_SRR0,r10
|
mtspr SPRN_SRR0,r10
|
||||||
ld r10,PACAKMSR(r13)
|
ld r10,PACAKMSR(r13)
|
||||||
mtspr SPRN_SRR1,r10
|
mtspr SPRN_SRR1,r10
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b .
|
b .
|
||||||
|
|
||||||
8: std r3,PACA_EXSLB+EX_DAR(r13)
|
8: std r3,PACA_EXSLB+EX_DAR(r13)
|
||||||
|
@ -660,7 +687,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||||
mtspr SPRN_SRR0,r10
|
mtspr SPRN_SRR0,r10
|
||||||
ld r10,PACAKMSR(r13)
|
ld r10,PACAKMSR(r13)
|
||||||
mtspr SPRN_SRR1,r10
|
mtspr SPRN_SRR1,r10
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b .
|
b .
|
||||||
|
|
||||||
EXC_COMMON_BEGIN(unrecov_slb)
|
EXC_COMMON_BEGIN(unrecov_slb)
|
||||||
|
@ -905,7 +932,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
|
||||||
mtspr SPRN_SRR0,r10 ; \
|
mtspr SPRN_SRR0,r10 ; \
|
||||||
ld r10,PACAKMSR(r13) ; \
|
ld r10,PACAKMSR(r13) ; \
|
||||||
mtspr SPRN_SRR1,r10 ; \
|
mtspr SPRN_SRR1,r10 ; \
|
||||||
rfid ; \
|
RFI_TO_KERNEL ; \
|
||||||
b . ; /* prevent speculative execution */
|
b . ; /* prevent speculative execution */
|
||||||
|
|
||||||
#define SYSCALL_FASTENDIAN \
|
#define SYSCALL_FASTENDIAN \
|
||||||
|
@ -914,7 +941,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
|
||||||
xori r12,r12,MSR_LE ; \
|
xori r12,r12,MSR_LE ; \
|
||||||
mtspr SPRN_SRR1,r12 ; \
|
mtspr SPRN_SRR1,r12 ; \
|
||||||
mr r13,r9 ; \
|
mr r13,r9 ; \
|
||||||
rfid ; /* return to userspace */ \
|
RFI_TO_USER ; /* return to userspace */ \
|
||||||
b . ; /* prevent speculative execution */
|
b . ; /* prevent speculative execution */
|
||||||
|
|
||||||
#if defined(CONFIG_RELOCATABLE)
|
#if defined(CONFIG_RELOCATABLE)
|
||||||
|
@ -1299,7 +1326,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
||||||
ld r11,PACA_EXGEN+EX_R11(r13)
|
ld r11,PACA_EXGEN+EX_R11(r13)
|
||||||
ld r12,PACA_EXGEN+EX_R12(r13)
|
ld r12,PACA_EXGEN+EX_R12(r13)
|
||||||
ld r13,PACA_EXGEN+EX_R13(r13)
|
ld r13,PACA_EXGEN+EX_R13(r13)
|
||||||
HRFID
|
HRFI_TO_UNKNOWN
|
||||||
b .
|
b .
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1403,10 +1430,94 @@ masked_##_H##interrupt: \
|
||||||
ld r10,PACA_EXGEN+EX_R10(r13); \
|
ld r10,PACA_EXGEN+EX_R10(r13); \
|
||||||
ld r11,PACA_EXGEN+EX_R11(r13); \
|
ld r11,PACA_EXGEN+EX_R11(r13); \
|
||||||
/* returns to kernel where r13 must be set up, so don't restore it */ \
|
/* returns to kernel where r13 must be set up, so don't restore it */ \
|
||||||
##_H##rfid; \
|
##_H##RFI_TO_KERNEL; \
|
||||||
b .; \
|
b .; \
|
||||||
MASKED_DEC_HANDLER(_H)
|
MASKED_DEC_HANDLER(_H)
|
||||||
|
|
||||||
|
TRAMP_REAL_BEGIN(rfi_flush_fallback)
|
||||||
|
SET_SCRATCH0(r13);
|
||||||
|
GET_PACA(r13);
|
||||||
|
std r9,PACA_EXRFI+EX_R9(r13)
|
||||||
|
std r10,PACA_EXRFI+EX_R10(r13)
|
||||||
|
std r11,PACA_EXRFI+EX_R11(r13)
|
||||||
|
std r12,PACA_EXRFI+EX_R12(r13)
|
||||||
|
std r8,PACA_EXRFI+EX_R13(r13)
|
||||||
|
mfctr r9
|
||||||
|
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
|
||||||
|
ld r11,PACA_L1D_FLUSH_SETS(r13)
|
||||||
|
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
|
||||||
|
/*
|
||||||
|
* The load adresses are at staggered offsets within cachelines,
|
||||||
|
* which suits some pipelines better (on others it should not
|
||||||
|
* hurt).
|
||||||
|
*/
|
||||||
|
addi r12,r12,8
|
||||||
|
mtctr r11
|
||||||
|
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
|
||||||
|
|
||||||
|
/* order ld/st prior to dcbt stop all streams with flushing */
|
||||||
|
sync
|
||||||
|
1: li r8,0
|
||||||
|
.rept 8 /* 8-way set associative */
|
||||||
|
ldx r11,r10,r8
|
||||||
|
add r8,r8,r12
|
||||||
|
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
|
||||||
|
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
|
||||||
|
.endr
|
||||||
|
addi r10,r10,128 /* 128 byte cache line */
|
||||||
|
bdnz 1b
|
||||||
|
|
||||||
|
mtctr r9
|
||||||
|
ld r9,PACA_EXRFI+EX_R9(r13)
|
||||||
|
ld r10,PACA_EXRFI+EX_R10(r13)
|
||||||
|
ld r11,PACA_EXRFI+EX_R11(r13)
|
||||||
|
ld r12,PACA_EXRFI+EX_R12(r13)
|
||||||
|
ld r8,PACA_EXRFI+EX_R13(r13)
|
||||||
|
GET_SCRATCH0(r13);
|
||||||
|
rfid
|
||||||
|
|
||||||
|
TRAMP_REAL_BEGIN(hrfi_flush_fallback)
|
||||||
|
SET_SCRATCH0(r13);
|
||||||
|
GET_PACA(r13);
|
||||||
|
std r9,PACA_EXRFI+EX_R9(r13)
|
||||||
|
std r10,PACA_EXRFI+EX_R10(r13)
|
||||||
|
std r11,PACA_EXRFI+EX_R11(r13)
|
||||||
|
std r12,PACA_EXRFI+EX_R12(r13)
|
||||||
|
std r8,PACA_EXRFI+EX_R13(r13)
|
||||||
|
mfctr r9
|
||||||
|
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
|
||||||
|
ld r11,PACA_L1D_FLUSH_SETS(r13)
|
||||||
|
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
|
||||||
|
/*
|
||||||
|
* The load adresses are at staggered offsets within cachelines,
|
||||||
|
* which suits some pipelines better (on others it should not
|
||||||
|
* hurt).
|
||||||
|
*/
|
||||||
|
addi r12,r12,8
|
||||||
|
mtctr r11
|
||||||
|
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
|
||||||
|
|
||||||
|
/* order ld/st prior to dcbt stop all streams with flushing */
|
||||||
|
sync
|
||||||
|
1: li r8,0
|
||||||
|
.rept 8 /* 8-way set associative */
|
||||||
|
ldx r11,r10,r8
|
||||||
|
add r8,r8,r12
|
||||||
|
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
|
||||||
|
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
|
||||||
|
.endr
|
||||||
|
addi r10,r10,128 /* 128 byte cache line */
|
||||||
|
bdnz 1b
|
||||||
|
|
||||||
|
mtctr r9
|
||||||
|
ld r9,PACA_EXRFI+EX_R9(r13)
|
||||||
|
ld r10,PACA_EXRFI+EX_R10(r13)
|
||||||
|
ld r11,PACA_EXRFI+EX_R11(r13)
|
||||||
|
ld r12,PACA_EXRFI+EX_R12(r13)
|
||||||
|
ld r8,PACA_EXRFI+EX_R13(r13)
|
||||||
|
GET_SCRATCH0(r13);
|
||||||
|
hrfid
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Real mode exceptions actually use this too, but alternate
|
* Real mode exceptions actually use this too, but alternate
|
||||||
* instruction code patches (which end up in the common .text area)
|
* instruction code patches (which end up in the common .text area)
|
||||||
|
@ -1426,7 +1537,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
|
||||||
addi r13, r13, 4
|
addi r13, r13, 4
|
||||||
mtspr SPRN_SRR0, r13
|
mtspr SPRN_SRR0, r13
|
||||||
GET_SCRATCH0(r13)
|
GET_SCRATCH0(r13)
|
||||||
rfid
|
RFI_TO_KERNEL
|
||||||
b .
|
b .
|
||||||
|
|
||||||
TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
|
TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
|
||||||
|
@ -1438,7 +1549,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
|
||||||
addi r13, r13, 4
|
addi r13, r13, 4
|
||||||
mtspr SPRN_HSRR0, r13
|
mtspr SPRN_HSRR0, r13
|
||||||
GET_SCRATCH0(r13)
|
GET_SCRATCH0(r13)
|
||||||
hrfid
|
HRFI_TO_KERNEL
|
||||||
b .
|
b .
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -784,3 +784,104 @@ static int __init disable_hardlockup_detector(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
early_initcall(disable_hardlockup_detector);
|
early_initcall(disable_hardlockup_detector);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PPC_BOOK3S_64
|
||||||
|
static enum l1d_flush_type enabled_flush_types;
|
||||||
|
static void *l1d_flush_fallback_area;
|
||||||
|
static bool no_rfi_flush;
|
||||||
|
bool rfi_flush;
|
||||||
|
|
||||||
|
static int __init handle_no_rfi_flush(char *p)
|
||||||
|
{
|
||||||
|
pr_info("rfi-flush: disabled on command line.");
|
||||||
|
no_rfi_flush = true;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
early_param("no_rfi_flush", handle_no_rfi_flush);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The RFI flush is not KPTI, but because users will see doco that says to use
|
||||||
|
* nopti we hijack that option here to also disable the RFI flush.
|
||||||
|
*/
|
||||||
|
static int __init handle_no_pti(char *p)
|
||||||
|
{
|
||||||
|
pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
|
||||||
|
handle_no_rfi_flush(NULL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
early_param("nopti", handle_no_pti);
|
||||||
|
|
||||||
|
static void do_nothing(void *unused)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We don't need to do the flush explicitly, just enter+exit kernel is
|
||||||
|
* sufficient, the RFI exit handlers will do the right thing.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
void rfi_flush_enable(bool enable)
|
||||||
|
{
|
||||||
|
if (rfi_flush == enable)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (enable) {
|
||||||
|
do_rfi_flush_fixups(enabled_flush_types);
|
||||||
|
on_each_cpu(do_nothing, NULL, 1);
|
||||||
|
} else
|
||||||
|
do_rfi_flush_fixups(L1D_FLUSH_NONE);
|
||||||
|
|
||||||
|
rfi_flush = enable;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void init_fallback_flush(void)
|
||||||
|
{
|
||||||
|
u64 l1d_size, limit;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
l1d_size = ppc64_caches.l1d.size;
|
||||||
|
limit = min(safe_stack_limit(), ppc64_rma_size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Align to L1d size, and size it at 2x L1d size, to catch possible
|
||||||
|
* hardware prefetch runoff. We don't have a recipe for load patterns to
|
||||||
|
* reliably avoid the prefetcher.
|
||||||
|
*/
|
||||||
|
l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
|
||||||
|
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
/*
|
||||||
|
* The fallback flush is currently coded for 8-way
|
||||||
|
* associativity. Different associativity is possible, but it
|
||||||
|
* will be treated as 8-way and may not evict the lines as
|
||||||
|
* effectively.
|
||||||
|
*
|
||||||
|
* 128 byte lines are mandatory.
|
||||||
|
*/
|
||||||
|
u64 c = l1d_size / 8;
|
||||||
|
|
||||||
|
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
|
||||||
|
paca[cpu].l1d_flush_congruence = c;
|
||||||
|
paca[cpu].l1d_flush_sets = c / 128;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
|
||||||
|
{
|
||||||
|
if (types & L1D_FLUSH_FALLBACK) {
|
||||||
|
pr_info("rfi-flush: Using fallback displacement flush\n");
|
||||||
|
init_fallback_flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (types & L1D_FLUSH_ORI)
|
||||||
|
pr_info("rfi-flush: Using ori type flush\n");
|
||||||
|
|
||||||
|
if (types & L1D_FLUSH_MTTRIG)
|
||||||
|
pr_info("rfi-flush: Using mttrig type flush\n");
|
||||||
|
|
||||||
|
enabled_flush_types = types;
|
||||||
|
|
||||||
|
if (!no_rfi_flush)
|
||||||
|
rfi_flush_enable(enable);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||||
|
|
|
@ -132,6 +132,15 @@ SECTIONS
|
||||||
/* Read-only data */
|
/* Read-only data */
|
||||||
RO_DATA(PAGE_SIZE)
|
RO_DATA(PAGE_SIZE)
|
||||||
|
|
||||||
|
#ifdef CONFIG_PPC64
|
||||||
|
. = ALIGN(8);
|
||||||
|
__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
|
||||||
|
__start___rfi_flush_fixup = .;
|
||||||
|
*(__rfi_flush_fixup)
|
||||||
|
__stop___rfi_flush_fixup = .;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
EXCEPTION_TABLE(0)
|
EXCEPTION_TABLE(0)
|
||||||
|
|
||||||
NOTES :kernel :notes
|
NOTES :kernel :notes
|
||||||
|
|
|
@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||||
gpte->may_read = true;
|
gpte->may_read = true;
|
||||||
gpte->may_write = true;
|
gpte->may_write = true;
|
||||||
gpte->page_size = MMU_PAGE_4K;
|
gpte->page_size = MMU_PAGE_4K;
|
||||||
|
gpte->wimg = HPTE_R_M;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,11 +65,17 @@ struct kvm_resize_hpt {
|
||||||
u32 order;
|
u32 order;
|
||||||
|
|
||||||
/* These fields protected by kvm->lock */
|
/* These fields protected by kvm->lock */
|
||||||
int error;
|
|
||||||
bool prepare_done;
|
|
||||||
|
|
||||||
/* Private to the work thread, until prepare_done is true,
|
/* Possible values and their usage:
|
||||||
* then protected by kvm->resize_hpt_sem */
|
* <0 an error occurred during allocation,
|
||||||
|
* -EBUSY allocation is in the progress,
|
||||||
|
* 0 allocation made successfuly.
|
||||||
|
*/
|
||||||
|
int error;
|
||||||
|
|
||||||
|
/* Private to the work thread, until error != -EBUSY,
|
||||||
|
* then protected by kvm->lock.
|
||||||
|
*/
|
||||||
struct kvm_hpt_info hpt;
|
struct kvm_hpt_info hpt;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
|
||||||
* Reset all the reverse-mapping chains for all memslots
|
* Reset all the reverse-mapping chains for all memslots
|
||||||
*/
|
*/
|
||||||
kvmppc_rmap_reset(kvm);
|
kvmppc_rmap_reset(kvm);
|
||||||
/* Ensure that each vcpu will flush its TLB on next entry. */
|
|
||||||
cpumask_setall(&kvm->arch.need_tlb_flush);
|
|
||||||
err = 0;
|
err = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
|
||||||
kvmppc_set_hpt(kvm, &info);
|
kvmppc_set_hpt(kvm, &info);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
if (err == 0)
|
||||||
|
/* Ensure that each vcpu will flush its TLB on next entry. */
|
||||||
|
cpumask_setall(&kvm->arch.need_tlb_flush);
|
||||||
|
|
||||||
mutex_unlock(&kvm->lock);
|
mutex_unlock(&kvm->lock);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1424,16 +1432,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
|
||||||
|
|
||||||
static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
|
static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
|
||||||
{
|
{
|
||||||
BUG_ON(kvm->arch.resize_hpt != resize);
|
if (WARN_ON(!mutex_is_locked(&kvm->lock)))
|
||||||
|
return;
|
||||||
|
|
||||||
if (!resize)
|
if (!resize)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (resize->hpt.virt)
|
if (resize->error != -EBUSY) {
|
||||||
kvmppc_free_hpt(&resize->hpt);
|
if (resize->hpt.virt)
|
||||||
|
kvmppc_free_hpt(&resize->hpt);
|
||||||
|
kfree(resize);
|
||||||
|
}
|
||||||
|
|
||||||
kvm->arch.resize_hpt = NULL;
|
if (kvm->arch.resize_hpt == resize)
|
||||||
kfree(resize);
|
kvm->arch.resize_hpt = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void resize_hpt_prepare_work(struct work_struct *work)
|
static void resize_hpt_prepare_work(struct work_struct *work)
|
||||||
|
@ -1442,17 +1454,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
|
||||||
struct kvm_resize_hpt,
|
struct kvm_resize_hpt,
|
||||||
work);
|
work);
|
||||||
struct kvm *kvm = resize->kvm;
|
struct kvm *kvm = resize->kvm;
|
||||||
int err;
|
int err = 0;
|
||||||
|
|
||||||
resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
|
if (WARN_ON(resize->error != -EBUSY))
|
||||||
resize->order);
|
return;
|
||||||
|
|
||||||
err = resize_hpt_allocate(resize);
|
|
||||||
|
|
||||||
mutex_lock(&kvm->lock);
|
mutex_lock(&kvm->lock);
|
||||||
|
|
||||||
|
/* Request is still current? */
|
||||||
|
if (kvm->arch.resize_hpt == resize) {
|
||||||
|
/* We may request large allocations here:
|
||||||
|
* do not sleep with kvm->lock held for a while.
|
||||||
|
*/
|
||||||
|
mutex_unlock(&kvm->lock);
|
||||||
|
|
||||||
|
resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
|
||||||
|
resize->order);
|
||||||
|
|
||||||
|
err = resize_hpt_allocate(resize);
|
||||||
|
|
||||||
|
/* We have strict assumption about -EBUSY
|
||||||
|
* when preparing for HPT resize.
|
||||||
|
*/
|
||||||
|
if (WARN_ON(err == -EBUSY))
|
||||||
|
err = -EINPROGRESS;
|
||||||
|
|
||||||
|
mutex_lock(&kvm->lock);
|
||||||
|
/* It is possible that kvm->arch.resize_hpt != resize
|
||||||
|
* after we grab kvm->lock again.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
resize->error = err;
|
resize->error = err;
|
||||||
resize->prepare_done = true;
|
|
||||||
|
if (kvm->arch.resize_hpt != resize)
|
||||||
|
resize_hpt_release(kvm, resize);
|
||||||
|
|
||||||
mutex_unlock(&kvm->lock);
|
mutex_unlock(&kvm->lock);
|
||||||
}
|
}
|
||||||
|
@ -1477,14 +1513,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
|
||||||
|
|
||||||
if (resize) {
|
if (resize) {
|
||||||
if (resize->order == shift) {
|
if (resize->order == shift) {
|
||||||
/* Suitable resize in progress */
|
/* Suitable resize in progress? */
|
||||||
if (resize->prepare_done) {
|
ret = resize->error;
|
||||||
ret = resize->error;
|
if (ret == -EBUSY)
|
||||||
if (ret != 0)
|
|
||||||
resize_hpt_release(kvm, resize);
|
|
||||||
} else {
|
|
||||||
ret = 100; /* estimated time in ms */
|
ret = 100; /* estimated time in ms */
|
||||||
}
|
else if (ret)
|
||||||
|
resize_hpt_release(kvm, resize);
|
||||||
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1504,6 +1538,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resize->error = -EBUSY;
|
||||||
resize->order = shift;
|
resize->order = shift;
|
||||||
resize->kvm = kvm;
|
resize->kvm = kvm;
|
||||||
INIT_WORK(&resize->work, resize_hpt_prepare_work);
|
INIT_WORK(&resize->work, resize_hpt_prepare_work);
|
||||||
|
@ -1558,16 +1594,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
|
||||||
if (!resize || (resize->order != shift))
|
if (!resize || (resize->order != shift))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = -EBUSY;
|
|
||||||
if (!resize->prepare_done)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
ret = resize->error;
|
ret = resize->error;
|
||||||
if (ret != 0)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = resize_hpt_rehash(resize);
|
ret = resize_hpt_rehash(resize);
|
||||||
if (ret != 0)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
resize_hpt_pivot(resize);
|
resize_hpt_pivot(resize);
|
||||||
|
|
|
@ -78,7 +78,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
|
||||||
mtmsrd r0,1 /* clear RI in MSR */
|
mtmsrd r0,1 /* clear RI in MSR */
|
||||||
mtsrr0 r5
|
mtsrr0 r5
|
||||||
mtsrr1 r6
|
mtsrr1 r6
|
||||||
RFI
|
RFI_TO_KERNEL
|
||||||
|
|
||||||
kvmppc_call_hv_entry:
|
kvmppc_call_hv_entry:
|
||||||
ld r4, HSTATE_KVM_VCPU(r13)
|
ld r4, HSTATE_KVM_VCPU(r13)
|
||||||
|
@ -187,7 +187,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||||
mtmsrd r6, 1 /* Clear RI in MSR */
|
mtmsrd r6, 1 /* Clear RI in MSR */
|
||||||
mtsrr0 r8
|
mtsrr0 r8
|
||||||
mtsrr1 r7
|
mtsrr1 r7
|
||||||
RFI
|
RFI_TO_KERNEL
|
||||||
|
|
||||||
/* Virtual-mode return */
|
/* Virtual-mode return */
|
||||||
.Lvirt_return:
|
.Lvirt_return:
|
||||||
|
@ -1131,8 +1131,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||||
|
|
||||||
ld r0, VCPU_GPR(R0)(r4)
|
ld r0, VCPU_GPR(R0)(r4)
|
||||||
ld r4, VCPU_GPR(R4)(r4)
|
ld r4, VCPU_GPR(R4)(r4)
|
||||||
|
HRFI_TO_GUEST
|
||||||
hrfid
|
|
||||||
b .
|
b .
|
||||||
|
|
||||||
secondary_too_late:
|
secondary_too_late:
|
||||||
|
|
|
@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
|
||||||
#define MSR_USER32 MSR_USER
|
#define MSR_USER32 MSR_USER
|
||||||
#define MSR_USER64 MSR_USER
|
#define MSR_USER64 MSR_USER
|
||||||
#define HW_PAGE_SIZE PAGE_SIZE
|
#define HW_PAGE_SIZE PAGE_SIZE
|
||||||
|
#define HPTE_R_M _PAGE_COHERENT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
|
static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
|
||||||
|
@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||||
pte.eaddr = eaddr;
|
pte.eaddr = eaddr;
|
||||||
pte.vpage = eaddr >> 12;
|
pte.vpage = eaddr >> 12;
|
||||||
pte.page_size = MMU_PAGE_64K;
|
pte.page_size = MMU_PAGE_64K;
|
||||||
|
pte.wimg = HPTE_R_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
|
switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
|
||||||
|
|
|
@ -46,6 +46,9 @@
|
||||||
|
|
||||||
#define FUNC(name) name
|
#define FUNC(name) name
|
||||||
|
|
||||||
|
#define RFI_TO_KERNEL RFI
|
||||||
|
#define RFI_TO_GUEST RFI
|
||||||
|
|
||||||
.macro INTERRUPT_TRAMPOLINE intno
|
.macro INTERRUPT_TRAMPOLINE intno
|
||||||
|
|
||||||
.global kvmppc_trampoline_\intno
|
.global kvmppc_trampoline_\intno
|
||||||
|
@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
|
||||||
GET_SCRATCH0(r13)
|
GET_SCRATCH0(r13)
|
||||||
|
|
||||||
/* And get back into the code */
|
/* And get back into the code */
|
||||||
RFI
|
RFI_TO_KERNEL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
|
||||||
ori r5, r5, MSR_EE
|
ori r5, r5, MSR_EE
|
||||||
mtsrr0 r7
|
mtsrr0 r7
|
||||||
mtsrr1 r6
|
mtsrr1 r6
|
||||||
RFI
|
RFI_TO_KERNEL
|
||||||
|
|
||||||
#include "book3s_segment.S"
|
#include "book3s_segment.S"
|
||||||
|
|
|
@ -156,7 +156,7 @@ no_dcbz32_on:
|
||||||
PPC_LL r9, SVCPU_R9(r3)
|
PPC_LL r9, SVCPU_R9(r3)
|
||||||
PPC_LL r3, (SVCPU_R3)(r3)
|
PPC_LL r3, (SVCPU_R3)(r3)
|
||||||
|
|
||||||
RFI
|
RFI_TO_GUEST
|
||||||
kvmppc_handler_trampoline_enter_end:
|
kvmppc_handler_trampoline_enter_end:
|
||||||
|
|
||||||
|
|
||||||
|
@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
|
||||||
cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
|
cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
|
||||||
beqa BOOK3S_INTERRUPT_DOORBELL
|
beqa BOOK3S_INTERRUPT_DOORBELL
|
||||||
|
|
||||||
RFI
|
RFI_TO_KERNEL
|
||||||
kvmppc_handler_trampoline_exit_end:
|
kvmppc_handler_trampoline_exit_end:
|
||||||
|
|
|
@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PPC_BOOK3S_64
|
||||||
|
void do_rfi_flush_fixups(enum l1d_flush_type types)
|
||||||
|
{
|
||||||
|
unsigned int instrs[3], *dest;
|
||||||
|
long *start, *end;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
start = PTRRELOC(&__start___rfi_flush_fixup),
|
||||||
|
end = PTRRELOC(&__stop___rfi_flush_fixup);
|
||||||
|
|
||||||
|
instrs[0] = 0x60000000; /* nop */
|
||||||
|
instrs[1] = 0x60000000; /* nop */
|
||||||
|
instrs[2] = 0x60000000; /* nop */
|
||||||
|
|
||||||
|
if (types & L1D_FLUSH_FALLBACK)
|
||||||
|
/* b .+16 to fallback flush */
|
||||||
|
instrs[0] = 0x48000010;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
if (types & L1D_FLUSH_ORI) {
|
||||||
|
instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
|
||||||
|
instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (types & L1D_FLUSH_MTTRIG)
|
||||||
|
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
|
||||||
|
|
||||||
|
for (i = 0; start < end; start++, i++) {
|
||||||
|
dest = (void *)start + *start;
|
||||||
|
|
||||||
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
||||||
|
|
||||||
|
patch_instruction(dest, instrs[0]);
|
||||||
|
patch_instruction(dest + 1, instrs[1]);
|
||||||
|
patch_instruction(dest + 2, instrs[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||||
|
|
||||||
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
||||||
{
|
{
|
||||||
long *start, *end;
|
long *start, *end;
|
||||||
|
|
|
@ -36,13 +36,62 @@
|
||||||
#include <asm/opal.h>
|
#include <asm/opal.h>
|
||||||
#include <asm/kexec.h>
|
#include <asm/kexec.h>
|
||||||
#include <asm/smp.h>
|
#include <asm/smp.h>
|
||||||
|
#include <asm/setup.h>
|
||||||
|
|
||||||
#include "powernv.h"
|
#include "powernv.h"
|
||||||
|
|
||||||
|
static void pnv_setup_rfi_flush(void)
|
||||||
|
{
|
||||||
|
struct device_node *np, *fw_features;
|
||||||
|
enum l1d_flush_type type;
|
||||||
|
int enable;
|
||||||
|
|
||||||
|
/* Default to fallback in case fw-features are not available */
|
||||||
|
type = L1D_FLUSH_FALLBACK;
|
||||||
|
enable = 1;
|
||||||
|
|
||||||
|
np = of_find_node_by_name(NULL, "ibm,opal");
|
||||||
|
fw_features = of_get_child_by_name(np, "fw-features");
|
||||||
|
of_node_put(np);
|
||||||
|
|
||||||
|
if (fw_features) {
|
||||||
|
np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
|
||||||
|
if (np && of_property_read_bool(np, "enabled"))
|
||||||
|
type = L1D_FLUSH_MTTRIG;
|
||||||
|
|
||||||
|
of_node_put(np);
|
||||||
|
|
||||||
|
np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
|
||||||
|
if (np && of_property_read_bool(np, "enabled"))
|
||||||
|
type = L1D_FLUSH_ORI;
|
||||||
|
|
||||||
|
of_node_put(np);
|
||||||
|
|
||||||
|
/* Enable unless firmware says NOT to */
|
||||||
|
enable = 2;
|
||||||
|
np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
|
||||||
|
if (np && of_property_read_bool(np, "disabled"))
|
||||||
|
enable--;
|
||||||
|
|
||||||
|
of_node_put(np);
|
||||||
|
|
||||||
|
np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
|
||||||
|
if (np && of_property_read_bool(np, "disabled"))
|
||||||
|
enable--;
|
||||||
|
|
||||||
|
of_node_put(np);
|
||||||
|
of_node_put(fw_features);
|
||||||
|
}
|
||||||
|
|
||||||
|
setup_rfi_flush(type, enable > 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void __init pnv_setup_arch(void)
|
static void __init pnv_setup_arch(void)
|
||||||
{
|
{
|
||||||
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
|
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
|
||||||
|
|
||||||
|
pnv_setup_rfi_flush();
|
||||||
|
|
||||||
/* Initialize SMP */
|
/* Initialize SMP */
|
||||||
pnv_smp_init();
|
pnv_smp_init();
|
||||||
|
|
||||||
|
|
|
@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
|
||||||
of_pci_check_probe_only();
|
of_pci_check_probe_only();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void pseries_setup_rfi_flush(void)
|
||||||
|
{
|
||||||
|
struct h_cpu_char_result result;
|
||||||
|
enum l1d_flush_type types;
|
||||||
|
bool enable;
|
||||||
|
long rc;
|
||||||
|
|
||||||
|
/* Enable by default */
|
||||||
|
enable = true;
|
||||||
|
|
||||||
|
rc = plpar_get_cpu_characteristics(&result);
|
||||||
|
if (rc == H_SUCCESS) {
|
||||||
|
types = L1D_FLUSH_NONE;
|
||||||
|
|
||||||
|
if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
|
||||||
|
types |= L1D_FLUSH_MTTRIG;
|
||||||
|
if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
|
||||||
|
types |= L1D_FLUSH_ORI;
|
||||||
|
|
||||||
|
/* Use fallback if nothing set in hcall */
|
||||||
|
if (types == L1D_FLUSH_NONE)
|
||||||
|
types = L1D_FLUSH_FALLBACK;
|
||||||
|
|
||||||
|
if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
|
||||||
|
enable = false;
|
||||||
|
} else {
|
||||||
|
/* Default to fallback if case hcall is not available */
|
||||||
|
types = L1D_FLUSH_FALLBACK;
|
||||||
|
}
|
||||||
|
|
||||||
|
setup_rfi_flush(types, enable);
|
||||||
|
}
|
||||||
|
|
||||||
static void __init pSeries_setup_arch(void)
|
static void __init pSeries_setup_arch(void)
|
||||||
{
|
{
|
||||||
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
|
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
|
||||||
|
@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
|
||||||
|
|
||||||
fwnmi_init();
|
fwnmi_init();
|
||||||
|
|
||||||
|
pseries_setup_rfi_flush();
|
||||||
|
|
||||||
/* By default, only probe PCI (can be overridden by rtas_pci) */
|
/* By default, only probe PCI (can be overridden by rtas_pci) */
|
||||||
pci_add_flags(PCI_PROBE_ONLY);
|
pci_add_flags(PCI_PROBE_ONLY);
|
||||||
|
|
||||||
|
|
|
@ -768,7 +768,7 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Must be called with kvm->srcu held to avoid races on memslots, and with
|
* Must be called with kvm->srcu held to avoid races on memslots, and with
|
||||||
* kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
|
* kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
|
||||||
*/
|
*/
|
||||||
static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
@ -824,7 +824,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Must be called with kvm->lock to avoid races with ourselves and
|
* Must be called with kvm->slots_lock to avoid races with ourselves and
|
||||||
* kvm_s390_vm_start_migration.
|
* kvm_s390_vm_start_migration.
|
||||||
*/
|
*/
|
||||||
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
||||||
|
@ -839,6 +839,8 @@ static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
||||||
|
|
||||||
if (kvm->arch.use_cmma) {
|
if (kvm->arch.use_cmma) {
|
||||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
|
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
|
||||||
|
/* We have to wait for the essa emulation to finish */
|
||||||
|
synchronize_srcu(&kvm->srcu);
|
||||||
vfree(mgs->pgste_bitmap);
|
vfree(mgs->pgste_bitmap);
|
||||||
}
|
}
|
||||||
kfree(mgs);
|
kfree(mgs);
|
||||||
|
@ -848,14 +850,12 @@ static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
||||||
static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
||||||
struct kvm_device_attr *attr)
|
struct kvm_device_attr *attr)
|
||||||
{
|
{
|
||||||
int idx, res = -ENXIO;
|
int res = -ENXIO;
|
||||||
|
|
||||||
mutex_lock(&kvm->lock);
|
mutex_lock(&kvm->slots_lock);
|
||||||
switch (attr->attr) {
|
switch (attr->attr) {
|
||||||
case KVM_S390_VM_MIGRATION_START:
|
case KVM_S390_VM_MIGRATION_START:
|
||||||
idx = srcu_read_lock(&kvm->srcu);
|
|
||||||
res = kvm_s390_vm_start_migration(kvm);
|
res = kvm_s390_vm_start_migration(kvm);
|
||||||
srcu_read_unlock(&kvm->srcu, idx);
|
|
||||||
break;
|
break;
|
||||||
case KVM_S390_VM_MIGRATION_STOP:
|
case KVM_S390_VM_MIGRATION_STOP:
|
||||||
res = kvm_s390_vm_stop_migration(kvm);
|
res = kvm_s390_vm_stop_migration(kvm);
|
||||||
|
@ -863,7 +863,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
mutex_unlock(&kvm->lock);
|
mutex_unlock(&kvm->slots_lock);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -1753,7 +1753,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||||
r = -EFAULT;
|
r = -EFAULT;
|
||||||
if (copy_from_user(&args, argp, sizeof(args)))
|
if (copy_from_user(&args, argp, sizeof(args)))
|
||||||
break;
|
break;
|
||||||
|
mutex_lock(&kvm->slots_lock);
|
||||||
r = kvm_s390_get_cmma_bits(kvm, &args);
|
r = kvm_s390_get_cmma_bits(kvm, &args);
|
||||||
|
mutex_unlock(&kvm->slots_lock);
|
||||||
if (!r) {
|
if (!r) {
|
||||||
r = copy_to_user(argp, &args, sizeof(args));
|
r = copy_to_user(argp, &args, sizeof(args));
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -1767,7 +1769,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||||
r = -EFAULT;
|
r = -EFAULT;
|
||||||
if (copy_from_user(&args, argp, sizeof(args)))
|
if (copy_from_user(&args, argp, sizeof(args)))
|
||||||
break;
|
break;
|
||||||
|
mutex_lock(&kvm->slots_lock);
|
||||||
r = kvm_s390_set_cmma_bits(kvm, &args);
|
r = kvm_s390_set_cmma_bits(kvm, &args);
|
||||||
|
mutex_unlock(&kvm->slots_lock);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -89,6 +89,7 @@ config X86
|
||||||
select GENERIC_CLOCKEVENTS_MIN_ADJUST
|
select GENERIC_CLOCKEVENTS_MIN_ADJUST
|
||||||
select GENERIC_CMOS_UPDATE
|
select GENERIC_CMOS_UPDATE
|
||||||
select GENERIC_CPU_AUTOPROBE
|
select GENERIC_CPU_AUTOPROBE
|
||||||
|
select GENERIC_CPU_VULNERABILITIES
|
||||||
select GENERIC_EARLY_IOREMAP
|
select GENERIC_EARLY_IOREMAP
|
||||||
select GENERIC_FIND_FIRST_BIT
|
select GENERIC_FIND_FIRST_BIT
|
||||||
select GENERIC_IOMAP
|
select GENERIC_IOMAP
|
||||||
|
@ -428,6 +429,19 @@ config GOLDFISH
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on X86_GOLDFISH
|
depends on X86_GOLDFISH
|
||||||
|
|
||||||
|
config RETPOLINE
|
||||||
|
bool "Avoid speculative indirect branches in kernel"
|
||||||
|
default y
|
||||||
|
help
|
||||||
|
Compile kernel with the retpoline compiler options to guard against
|
||||||
|
kernel-to-user data leaks by avoiding speculative indirect
|
||||||
|
branches. Requires a compiler with -mindirect-branch=thunk-extern
|
||||||
|
support for full protection. The kernel may run slower.
|
||||||
|
|
||||||
|
Without compiler support, at least indirect branches in assembler
|
||||||
|
code are eliminated. Since this includes the syscall entry path,
|
||||||
|
it is not entirely pointless.
|
||||||
|
|
||||||
config INTEL_RDT
|
config INTEL_RDT
|
||||||
bool "Intel Resource Director Technology support"
|
bool "Intel Resource Director Technology support"
|
||||||
default n
|
default n
|
||||||
|
|
|
@ -235,6 +235,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
|
||||||
#
|
#
|
||||||
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
||||||
|
|
||||||
|
# Avoid indirect branches in kernel to deal with Spectre
|
||||||
|
ifdef CONFIG_RETPOLINE
|
||||||
|
RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
|
||||||
|
ifneq ($(RETPOLINE_CFLAGS),)
|
||||||
|
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
archscripts: scripts_basic
|
archscripts: scripts_basic
|
||||||
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
|
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
#include <asm/inst.h>
|
#include <asm/inst.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following macros are used to move an (un)aligned 16 byte value to/from
|
* The following macros are used to move an (un)aligned 16 byte value to/from
|
||||||
|
@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
|
||||||
pxor INC, STATE4
|
pxor INC, STATE4
|
||||||
movdqu IV, 0x30(OUTP)
|
movdqu IV, 0x30(OUTP)
|
||||||
|
|
||||||
call *%r11
|
CALL_NOSPEC %r11
|
||||||
|
|
||||||
movdqu 0x00(OUTP), INC
|
movdqu 0x00(OUTP), INC
|
||||||
pxor INC, STATE1
|
pxor INC, STATE1
|
||||||
|
@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
|
||||||
_aesni_gf128mul_x_ble()
|
_aesni_gf128mul_x_ble()
|
||||||
movups IV, (IVP)
|
movups IV, (IVP)
|
||||||
|
|
||||||
call *%r11
|
CALL_NOSPEC %r11
|
||||||
|
|
||||||
movdqu 0x40(OUTP), INC
|
movdqu 0x40(OUTP), INC
|
||||||
pxor INC, STATE1
|
pxor INC, STATE1
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#define CAMELLIA_TABLE_BYTE_LEN 272
|
#define CAMELLIA_TABLE_BYTE_LEN 272
|
||||||
|
|
||||||
|
@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
|
||||||
vpxor 14 * 16(%rax), %xmm15, %xmm14;
|
vpxor 14 * 16(%rax), %xmm15, %xmm14;
|
||||||
vpxor 15 * 16(%rax), %xmm15, %xmm15;
|
vpxor 15 * 16(%rax), %xmm15, %xmm15;
|
||||||
|
|
||||||
call *%r9;
|
CALL_NOSPEC %r9;
|
||||||
|
|
||||||
addq $(16 * 16), %rsp;
|
addq $(16 * 16), %rsp;
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#define CAMELLIA_TABLE_BYTE_LEN 272
|
#define CAMELLIA_TABLE_BYTE_LEN 272
|
||||||
|
|
||||||
|
@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
|
||||||
vpxor 14 * 32(%rax), %ymm15, %ymm14;
|
vpxor 14 * 32(%rax), %ymm15, %ymm14;
|
||||||
vpxor 15 * 32(%rax), %ymm15, %ymm15;
|
vpxor 15 * 32(%rax), %ymm15, %ymm15;
|
||||||
|
|
||||||
call *%r9;
|
CALL_NOSPEC %r9;
|
||||||
|
|
||||||
addq $(16 * 32), %rsp;
|
addq $(16 * 32), %rsp;
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
|
|
||||||
#include <asm/inst.h>
|
#include <asm/inst.h>
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
|
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
|
||||||
|
|
||||||
|
@ -172,7 +173,7 @@ continue_block:
|
||||||
movzxw (bufp, %rax, 2), len
|
movzxw (bufp, %rax, 2), len
|
||||||
lea crc_array(%rip), bufp
|
lea crc_array(%rip), bufp
|
||||||
lea (bufp, len, 1), bufp
|
lea (bufp, len, 1), bufp
|
||||||
jmp *bufp
|
JMP_NOSPEC bufp
|
||||||
|
|
||||||
################################################################
|
################################################################
|
||||||
## 2a) PROCESS FULL BLOCKS:
|
## 2a) PROCESS FULL BLOCKS:
|
||||||
|
|
|
@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
|
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
|
||||||
* halves:
|
* halves:
|
||||||
*/
|
*/
|
||||||
#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
|
#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
|
||||||
#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
|
#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
|
||||||
|
#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
|
||||||
|
#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
|
||||||
|
#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
|
||||||
|
|
||||||
.macro SET_NOFLUSH_BIT reg:req
|
.macro SET_NOFLUSH_BIT reg:req
|
||||||
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
|
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
|
||||||
|
@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
.macro ADJUST_KERNEL_CR3 reg:req
|
.macro ADJUST_KERNEL_CR3 reg:req
|
||||||
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
|
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
|
||||||
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
|
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
|
||||||
andq $(~PTI_SWITCH_MASK), \reg
|
andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
|
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
|
||||||
|
@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
/* Flush needed, clear the bit */
|
/* Flush needed, clear the bit */
|
||||||
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||||
movq \scratch_reg2, \scratch_reg
|
movq \scratch_reg2, \scratch_reg
|
||||||
jmp .Lwrcr3_\@
|
jmp .Lwrcr3_pcid_\@
|
||||||
|
|
||||||
.Lnoflush_\@:
|
.Lnoflush_\@:
|
||||||
movq \scratch_reg2, \scratch_reg
|
movq \scratch_reg2, \scratch_reg
|
||||||
SET_NOFLUSH_BIT \scratch_reg
|
SET_NOFLUSH_BIT \scratch_reg
|
||||||
|
|
||||||
|
.Lwrcr3_pcid_\@:
|
||||||
|
/* Flip the ASID to the user version */
|
||||||
|
orq $(PTI_USER_PCID_MASK), \scratch_reg
|
||||||
|
|
||||||
.Lwrcr3_\@:
|
.Lwrcr3_\@:
|
||||||
/* Flip the PGD and ASID to the user version */
|
/* Flip the PGD to the user version */
|
||||||
orq $(PTI_SWITCH_MASK), \scratch_reg
|
orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
|
||||||
mov \scratch_reg, %cr3
|
mov \scratch_reg, %cr3
|
||||||
.Lend_\@:
|
.Lend_\@:
|
||||||
.endm
|
.endm
|
||||||
|
@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
movq %cr3, \scratch_reg
|
movq %cr3, \scratch_reg
|
||||||
movq \scratch_reg, \save_reg
|
movq \scratch_reg, \save_reg
|
||||||
/*
|
/*
|
||||||
* Is the "switch mask" all zero? That means that both of
|
* Test the user pagetable bit. If set, then the user page tables
|
||||||
* these are zero:
|
* are active. If clear CR3 already has the kernel page table
|
||||||
*
|
* active.
|
||||||
* 1. The user/kernel PCID bit, and
|
|
||||||
* 2. The user/kernel "bit" that points CR3 to the
|
|
||||||
* bottom half of the 8k PGD
|
|
||||||
*
|
|
||||||
* That indicates a kernel CR3 value, not a user CR3.
|
|
||||||
*/
|
*/
|
||||||
testq $(PTI_SWITCH_MASK), \scratch_reg
|
bt $PTI_USER_PGTABLE_BIT, \scratch_reg
|
||||||
jz .Ldone_\@
|
jnc .Ldone_\@
|
||||||
|
|
||||||
ADJUST_KERNEL_CR3 \scratch_reg
|
ADJUST_KERNEL_CR3 \scratch_reg
|
||||||
movq \scratch_reg, %cr3
|
movq \scratch_reg, %cr3
|
||||||
|
@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
* KERNEL pages can always resume with NOFLUSH as we do
|
* KERNEL pages can always resume with NOFLUSH as we do
|
||||||
* explicit flushes.
|
* explicit flushes.
|
||||||
*/
|
*/
|
||||||
bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
|
bt $PTI_USER_PGTABLE_BIT, \save_reg
|
||||||
jnc .Lnoflush_\@
|
jnc .Lnoflush_\@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -44,6 +44,7 @@
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
#include <asm/smap.h>
|
#include <asm/smap.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
.section .entry.text, "ax"
|
.section .entry.text, "ax"
|
||||||
|
|
||||||
|
@ -243,6 +244,17 @@ ENTRY(__switch_to_asm)
|
||||||
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
|
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
/*
|
||||||
|
* When switching from a shallower to a deeper call stack
|
||||||
|
* the RSB may either underflow or use entries populated
|
||||||
|
* with userspace addresses. On CPUs where those concerns
|
||||||
|
* exist, overwrite the RSB with entries which capture
|
||||||
|
* speculative execution to prevent attack.
|
||||||
|
*/
|
||||||
|
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
||||||
|
#endif
|
||||||
|
|
||||||
/* restore callee-saved registers */
|
/* restore callee-saved registers */
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
|
@ -290,7 +302,7 @@ ENTRY(ret_from_fork)
|
||||||
|
|
||||||
/* kernel thread */
|
/* kernel thread */
|
||||||
1: movl %edi, %eax
|
1: movl %edi, %eax
|
||||||
call *%ebx
|
CALL_NOSPEC %ebx
|
||||||
/*
|
/*
|
||||||
* A kernel thread is allowed to return here after successfully
|
* A kernel thread is allowed to return here after successfully
|
||||||
* calling do_execve(). Exit to userspace to complete the execve()
|
* calling do_execve(). Exit to userspace to complete the execve()
|
||||||
|
@ -919,7 +931,7 @@ common_exception:
|
||||||
movl %ecx, %es
|
movl %ecx, %es
|
||||||
TRACE_IRQS_OFF
|
TRACE_IRQS_OFF
|
||||||
movl %esp, %eax # pt_regs pointer
|
movl %esp, %eax # pt_regs pointer
|
||||||
call *%edi
|
CALL_NOSPEC %edi
|
||||||
jmp ret_from_exception
|
jmp ret_from_exception
|
||||||
END(common_exception)
|
END(common_exception)
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
#include <asm/pgtable_types.h>
|
#include <asm/pgtable_types.h>
|
||||||
#include <asm/export.h>
|
#include <asm/export.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
|
|
||||||
#include "calling.h"
|
#include "calling.h"
|
||||||
|
@ -187,7 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
|
||||||
*/
|
*/
|
||||||
pushq %rdi
|
pushq %rdi
|
||||||
movq $entry_SYSCALL_64_stage2, %rdi
|
movq $entry_SYSCALL_64_stage2, %rdi
|
||||||
jmp *%rdi
|
JMP_NOSPEC %rdi
|
||||||
END(entry_SYSCALL_64_trampoline)
|
END(entry_SYSCALL_64_trampoline)
|
||||||
|
|
||||||
.popsection
|
.popsection
|
||||||
|
@ -266,7 +267,12 @@ entry_SYSCALL_64_fastpath:
|
||||||
* It might end up jumping to the slow path. If it jumps, RAX
|
* It might end up jumping to the slow path. If it jumps, RAX
|
||||||
* and all argument registers are clobbered.
|
* and all argument registers are clobbered.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
movq sys_call_table(, %rax, 8), %rax
|
||||||
|
call __x86_indirect_thunk_rax
|
||||||
|
#else
|
||||||
call *sys_call_table(, %rax, 8)
|
call *sys_call_table(, %rax, 8)
|
||||||
|
#endif
|
||||||
.Lentry_SYSCALL_64_after_fastpath_call:
|
.Lentry_SYSCALL_64_after_fastpath_call:
|
||||||
|
|
||||||
movq %rax, RAX(%rsp)
|
movq %rax, RAX(%rsp)
|
||||||
|
@ -438,7 +444,7 @@ ENTRY(stub_ptregs_64)
|
||||||
jmp entry_SYSCALL64_slow_path
|
jmp entry_SYSCALL64_slow_path
|
||||||
|
|
||||||
1:
|
1:
|
||||||
jmp *%rax /* Called from C */
|
JMP_NOSPEC %rax /* Called from C */
|
||||||
END(stub_ptregs_64)
|
END(stub_ptregs_64)
|
||||||
|
|
||||||
.macro ptregs_stub func
|
.macro ptregs_stub func
|
||||||
|
@ -481,6 +487,17 @@ ENTRY(__switch_to_asm)
|
||||||
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
|
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
/*
|
||||||
|
* When switching from a shallower to a deeper call stack
|
||||||
|
* the RSB may either underflow or use entries populated
|
||||||
|
* with userspace addresses. On CPUs where those concerns
|
||||||
|
* exist, overwrite the RSB with entries which capture
|
||||||
|
* speculative execution to prevent attack.
|
||||||
|
*/
|
||||||
|
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
||||||
|
#endif
|
||||||
|
|
||||||
/* restore callee-saved registers */
|
/* restore callee-saved registers */
|
||||||
popq %r15
|
popq %r15
|
||||||
popq %r14
|
popq %r14
|
||||||
|
@ -517,7 +534,7 @@ ENTRY(ret_from_fork)
|
||||||
1:
|
1:
|
||||||
/* kernel thread */
|
/* kernel thread */
|
||||||
movq %r12, %rdi
|
movq %r12, %rdi
|
||||||
call *%rbx
|
CALL_NOSPEC %rbx
|
||||||
/*
|
/*
|
||||||
* A kernel thread is allowed to return here after successfully
|
* A kernel thread is allowed to return here after successfully
|
||||||
* calling do_execve(). Exit to userspace to complete the execve()
|
* calling do_execve(). Exit to userspace to complete the execve()
|
||||||
|
@ -1241,7 +1258,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_MCE
|
#ifdef CONFIG_X86_MCE
|
||||||
idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
|
idtentry machine_check do_mce has_error_code=0 paranoid=1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void)
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!x86_match_cpu(cpu_match))
|
if (!x86_match_cpu(cpu_match))
|
||||||
return 0;
|
return -ENODEV;
|
||||||
|
|
||||||
if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
|
if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
|
@ -582,6 +582,24 @@ static __init int bts_init(void)
|
||||||
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
|
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
|
if (boot_cpu_has(X86_FEATURE_PTI)) {
|
||||||
|
/*
|
||||||
|
* BTS hardware writes through a virtual memory map we must
|
||||||
|
* either use the kernel physical map, or the user mapping of
|
||||||
|
* the AUX buffer.
|
||||||
|
*
|
||||||
|
* However, since this driver supports per-CPU and per-task inherit
|
||||||
|
* we cannot use the user mapping since it will not be availble
|
||||||
|
* if we're not running the owning process.
|
||||||
|
*
|
||||||
|
* With PTI we can't use the kernal map either, because its not
|
||||||
|
* there when we run userspace.
|
||||||
|
*
|
||||||
|
* For now, disable this driver when using PTI.
|
||||||
|
*/
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
|
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
|
||||||
PERF_PMU_CAP_EXCLUSIVE;
|
PERF_PMU_CAP_EXCLUSIVE;
|
||||||
bts_pmu.task_ctx_nr = perf_sw_context;
|
bts_pmu.task_ctx_nr = perf_sw_context;
|
||||||
|
|
|
@ -11,7 +11,32 @@
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/special_insns.h>
|
#include <asm/special_insns.h>
|
||||||
#include <asm/preempt.h>
|
#include <asm/preempt.h>
|
||||||
|
#include <asm/asm.h>
|
||||||
|
|
||||||
#ifndef CONFIG_X86_CMPXCHG64
|
#ifndef CONFIG_X86_CMPXCHG64
|
||||||
extern void cmpxchg8b_emu(void);
|
extern void cmpxchg8b_emu(void);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
|
||||||
|
#else
|
||||||
|
#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
|
||||||
|
INDIRECT_THUNK(8)
|
||||||
|
INDIRECT_THUNK(9)
|
||||||
|
INDIRECT_THUNK(10)
|
||||||
|
INDIRECT_THUNK(11)
|
||||||
|
INDIRECT_THUNK(12)
|
||||||
|
INDIRECT_THUNK(13)
|
||||||
|
INDIRECT_THUNK(14)
|
||||||
|
INDIRECT_THUNK(15)
|
||||||
|
#endif
|
||||||
|
INDIRECT_THUNK(ax)
|
||||||
|
INDIRECT_THUNK(bx)
|
||||||
|
INDIRECT_THUNK(cx)
|
||||||
|
INDIRECT_THUNK(dx)
|
||||||
|
INDIRECT_THUNK(si)
|
||||||
|
INDIRECT_THUNK(di)
|
||||||
|
INDIRECT_THUNK(bp)
|
||||||
|
INDIRECT_THUNK(sp)
|
||||||
|
#endif /* CONFIG_RETPOLINE */
|
||||||
|
|
|
@ -203,12 +203,14 @@
|
||||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||||
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
||||||
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
|
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
|
||||||
|
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
|
||||||
|
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
|
||||||
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
||||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
|
||||||
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
|
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
|
||||||
#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
|
#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
|
||||||
|
|
||||||
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
|
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
|
||||||
|
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
|
||||||
|
|
||||||
/* Virtualization flags: Linux defined, word 8 */
|
/* Virtualization flags: Linux defined, word 8 */
|
||||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||||
|
@ -243,6 +245,7 @@
|
||||||
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
|
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
|
||||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||||
|
#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
|
||||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||||
|
@ -342,5 +345,7 @@
|
||||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||||
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
|
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
|
||||||
|
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
|
||||||
|
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
|
||||||
|
|
||||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||||
|
|
|
@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
|
||||||
|
|
||||||
void __init sme_early_init(void);
|
void __init sme_early_init(void);
|
||||||
|
|
||||||
void __init sme_encrypt_kernel(void);
|
void __init sme_encrypt_kernel(struct boot_params *bp);
|
||||||
void __init sme_enable(struct boot_params *bp);
|
void __init sme_enable(struct boot_params *bp);
|
||||||
|
|
||||||
/* Architecture __weak replacement functions */
|
/* Architecture __weak replacement functions */
|
||||||
|
@ -61,7 +61,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
|
||||||
|
|
||||||
static inline void __init sme_early_init(void) { }
|
static inline void __init sme_early_init(void) { }
|
||||||
|
|
||||||
static inline void __init sme_encrypt_kernel(void) { }
|
static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
|
||||||
static inline void __init sme_enable(struct boot_params *bp) { }
|
static inline void __init sme_enable(struct boot_params *bp) { }
|
||||||
|
|
||||||
#endif /* CONFIG_AMD_MEM_ENCRYPT */
|
#endif /* CONFIG_AMD_MEM_ENCRYPT */
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <linux/nmi.h>
|
#include <linux/nmi.h>
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
#include <asm/hyperv.h>
|
#include <asm/hyperv.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
|
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
|
||||||
|
@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
|
||||||
return U64_MAX;
|
return U64_MAX;
|
||||||
|
|
||||||
__asm__ __volatile__("mov %4, %%r8\n"
|
__asm__ __volatile__("mov %4, %%r8\n"
|
||||||
"call *%5"
|
CALL_NOSPEC
|
||||||
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
|
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
|
||||||
"+c" (control), "+d" (input_address)
|
"+c" (control), "+d" (input_address)
|
||||||
: "r" (output_address), "m" (hv_hypercall_pg)
|
: "r" (output_address),
|
||||||
|
THUNK_TARGET(hv_hypercall_pg)
|
||||||
: "cc", "memory", "r8", "r9", "r10", "r11");
|
: "cc", "memory", "r8", "r9", "r10", "r11");
|
||||||
#else
|
#else
|
||||||
u32 input_address_hi = upper_32_bits(input_address);
|
u32 input_address_hi = upper_32_bits(input_address);
|
||||||
|
@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
|
||||||
if (!hv_hypercall_pg)
|
if (!hv_hypercall_pg)
|
||||||
return U64_MAX;
|
return U64_MAX;
|
||||||
|
|
||||||
__asm__ __volatile__("call *%7"
|
__asm__ __volatile__(CALL_NOSPEC
|
||||||
: "=A" (hv_status),
|
: "=A" (hv_status),
|
||||||
"+c" (input_address_lo), ASM_CALL_CONSTRAINT
|
"+c" (input_address_lo), ASM_CALL_CONSTRAINT
|
||||||
: "A" (control),
|
: "A" (control),
|
||||||
"b" (input_address_hi),
|
"b" (input_address_hi),
|
||||||
"D"(output_address_hi), "S"(output_address_lo),
|
"D"(output_address_hi), "S"(output_address_lo),
|
||||||
"m" (hv_hypercall_pg)
|
THUNK_TARGET(hv_hypercall_pg)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
#endif /* !x86_64 */
|
#endif /* !x86_64 */
|
||||||
return hv_status;
|
return hv_status;
|
||||||
|
@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
{
|
{
|
||||||
__asm__ __volatile__("call *%4"
|
__asm__ __volatile__(CALL_NOSPEC
|
||||||
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
|
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
|
||||||
"+c" (control), "+d" (input1)
|
"+c" (control), "+d" (input1)
|
||||||
: "m" (hv_hypercall_pg)
|
: THUNK_TARGET(hv_hypercall_pg)
|
||||||
: "cc", "r8", "r9", "r10", "r11");
|
: "cc", "r8", "r9", "r10", "r11");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
|
||||||
u32 input1_hi = upper_32_bits(input1);
|
u32 input1_hi = upper_32_bits(input1);
|
||||||
u32 input1_lo = lower_32_bits(input1);
|
u32 input1_lo = lower_32_bits(input1);
|
||||||
|
|
||||||
__asm__ __volatile__ ("call *%5"
|
__asm__ __volatile__ (CALL_NOSPEC
|
||||||
: "=A"(hv_status),
|
: "=A"(hv_status),
|
||||||
"+c"(input1_lo),
|
"+c"(input1_lo),
|
||||||
ASM_CALL_CONSTRAINT
|
ASM_CALL_CONSTRAINT
|
||||||
: "A" (control),
|
: "A" (control),
|
||||||
"b" (input1_hi),
|
"b" (input1_hi),
|
||||||
"m" (hv_hypercall_pg)
|
THUNK_TARGET(hv_hypercall_pg)
|
||||||
: "cc", "edi", "esi");
|
: "cc", "edi", "esi");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -352,6 +352,9 @@
|
||||||
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
|
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
|
||||||
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
|
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
|
||||||
#define MSR_FAM10H_NODE_ID 0xc001100c
|
#define MSR_FAM10H_NODE_ID 0xc001100c
|
||||||
|
#define MSR_F10H_DECFG 0xc0011029
|
||||||
|
#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
|
||||||
|
#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
|
||||||
|
|
||||||
/* K8 MSRs */
|
/* K8 MSRs */
|
||||||
#define MSR_K8_TOP_MEM1 0xc001001a
|
#define MSR_K8_TOP_MEM1 0xc001001a
|
||||||
|
|
222
arch/x86/include/asm/nospec-branch.h
Normal file
222
arch/x86/include/asm/nospec-branch.h
Normal file
|
@ -0,0 +1,222 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
|
||||||
|
#ifndef __NOSPEC_BRANCH_H__
|
||||||
|
#define __NOSPEC_BRANCH_H__
|
||||||
|
|
||||||
|
#include <asm/alternative.h>
|
||||||
|
#include <asm/alternative-asm.h>
|
||||||
|
#include <asm/cpufeatures.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill the CPU return stack buffer.
|
||||||
|
*
|
||||||
|
* Each entry in the RSB, if used for a speculative 'ret', contains an
|
||||||
|
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
|
||||||
|
*
|
||||||
|
* This is required in various cases for retpoline and IBRS-based
|
||||||
|
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
|
||||||
|
* eliminate potentially bogus entries from the RSB, and sometimes
|
||||||
|
* purely to ensure that it doesn't get empty, which on some CPUs would
|
||||||
|
* allow predictions from other (unwanted!) sources to be used.
|
||||||
|
*
|
||||||
|
* We define a CPP macro such that it can be used from both .S files and
|
||||||
|
* inline assembly. It's possible to do a .macro and then include that
|
||||||
|
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
|
||||||
|
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Google experimented with loop-unrolling and this turned out to be
|
||||||
|
* the optimal version — two calls, each with their own speculation
|
||||||
|
* trap should their return address end up getting used, in a loop.
|
||||||
|
*/
|
||||||
|
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
|
||||||
|
mov $(nr/2), reg; \
|
||||||
|
771: \
|
||||||
|
call 772f; \
|
||||||
|
773: /* speculation trap */ \
|
||||||
|
pause; \
|
||||||
|
lfence; \
|
||||||
|
jmp 773b; \
|
||||||
|
772: \
|
||||||
|
call 774f; \
|
||||||
|
775: /* speculation trap */ \
|
||||||
|
pause; \
|
||||||
|
lfence; \
|
||||||
|
jmp 775b; \
|
||||||
|
774: \
|
||||||
|
dec reg; \
|
||||||
|
jnz 771b; \
|
||||||
|
add $(BITS_PER_LONG/8) * nr, sp;
|
||||||
|
|
||||||
|
#ifdef __ASSEMBLY__
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This should be used immediately before a retpoline alternative. It tells
|
||||||
|
* objtool where the retpolines are so that it can make sense of the control
|
||||||
|
* flow by just reading the original instruction(s) and ignoring the
|
||||||
|
* alternatives.
|
||||||
|
*/
|
||||||
|
.macro ANNOTATE_NOSPEC_ALTERNATIVE
|
||||||
|
.Lannotate_\@:
|
||||||
|
.pushsection .discard.nospec
|
||||||
|
.long .Lannotate_\@ - .
|
||||||
|
.popsection
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These are the bare retpoline primitives for indirect jmp and call.
|
||||||
|
* Do not use these directly; they only exist to make the ALTERNATIVE
|
||||||
|
* invocation below less ugly.
|
||||||
|
*/
|
||||||
|
.macro RETPOLINE_JMP reg:req
|
||||||
|
call .Ldo_rop_\@
|
||||||
|
.Lspec_trap_\@:
|
||||||
|
pause
|
||||||
|
lfence
|
||||||
|
jmp .Lspec_trap_\@
|
||||||
|
.Ldo_rop_\@:
|
||||||
|
mov \reg, (%_ASM_SP)
|
||||||
|
ret
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a wrapper around RETPOLINE_JMP so the called function in reg
|
||||||
|
* returns to the instruction after the macro.
|
||||||
|
*/
|
||||||
|
.macro RETPOLINE_CALL reg:req
|
||||||
|
jmp .Ldo_call_\@
|
||||||
|
.Ldo_retpoline_jmp_\@:
|
||||||
|
RETPOLINE_JMP \reg
|
||||||
|
.Ldo_call_\@:
|
||||||
|
call .Ldo_retpoline_jmp_\@
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
|
||||||
|
* indirect jmp/call which may be susceptible to the Spectre variant 2
|
||||||
|
* attack.
|
||||||
|
*/
|
||||||
|
.macro JMP_NOSPEC reg:req
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||||
|
ALTERNATIVE_2 __stringify(jmp *\reg), \
|
||||||
|
__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
|
||||||
|
__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||||
|
#else
|
||||||
|
jmp *\reg
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro CALL_NOSPEC reg:req
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||||
|
ALTERNATIVE_2 __stringify(call *\reg), \
|
||||||
|
__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
|
||||||
|
__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||||
|
#else
|
||||||
|
call *\reg
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
|
||||||
|
* monstrosity above, manually.
|
||||||
|
*/
|
||||||
|
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||||
|
ALTERNATIVE "jmp .Lskip_rsb_\@", \
|
||||||
|
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
|
||||||
|
\ftr
|
||||||
|
.Lskip_rsb_\@:
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#else /* __ASSEMBLY__ */
|
||||||
|
|
||||||
|
#define ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||||
|
"999:\n\t" \
|
||||||
|
".pushsection .discard.nospec\n\t" \
|
||||||
|
".long 999b - .\n\t" \
|
||||||
|
".popsection\n\t"
|
||||||
|
|
||||||
|
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since the inline asm uses the %V modifier which is only in newer GCC,
|
||||||
|
* the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
|
||||||
|
*/
|
||||||
|
# define CALL_NOSPEC \
|
||||||
|
ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||||
|
ALTERNATIVE( \
|
||||||
|
"call *%[thunk_target]\n", \
|
||||||
|
"call __x86_indirect_thunk_%V[thunk_target]\n", \
|
||||||
|
X86_FEATURE_RETPOLINE)
|
||||||
|
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
|
||||||
|
|
||||||
|
#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
|
||||||
|
/*
|
||||||
|
* For i386 we use the original ret-equivalent retpoline, because
|
||||||
|
* otherwise we'll run out of registers. We don't care about CET
|
||||||
|
* here, anyway.
|
||||||
|
*/
|
||||||
|
# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
|
||||||
|
" jmp 904f;\n" \
|
||||||
|
" .align 16\n" \
|
||||||
|
"901: call 903f;\n" \
|
||||||
|
"902: pause;\n" \
|
||||||
|
" lfence;\n" \
|
||||||
|
" jmp 902b;\n" \
|
||||||
|
" .align 16\n" \
|
||||||
|
"903: addl $4, %%esp;\n" \
|
||||||
|
" pushl %[thunk_target];\n" \
|
||||||
|
" ret;\n" \
|
||||||
|
" .align 16\n" \
|
||||||
|
"904: call 901b;\n", \
|
||||||
|
X86_FEATURE_RETPOLINE)
|
||||||
|
|
||||||
|
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
||||||
|
#else /* No retpoline for C / inline asm */
|
||||||
|
# define CALL_NOSPEC "call *%[thunk_target]\n"
|
||||||
|
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The Spectre V2 mitigation variants */
|
||||||
|
enum spectre_v2_mitigation {
|
||||||
|
SPECTRE_V2_NONE,
|
||||||
|
SPECTRE_V2_RETPOLINE_MINIMAL,
|
||||||
|
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
|
||||||
|
SPECTRE_V2_RETPOLINE_GENERIC,
|
||||||
|
SPECTRE_V2_RETPOLINE_AMD,
|
||||||
|
SPECTRE_V2_IBRS,
|
||||||
|
};
|
||||||
|
|
||||||
|
extern char __indirect_thunk_start[];
|
||||||
|
extern char __indirect_thunk_end[];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On VMEXIT we must ensure that no RSB predictions learned in the guest
|
||||||
|
* can be followed in the host, by overwriting the RSB completely. Both
|
||||||
|
* retpoline and IBRS mitigations for Spectre v2 need this; only on future
|
||||||
|
* CPUs with IBRS_ATT *might* it be avoided.
|
||||||
|
*/
|
||||||
|
static inline void vmexit_fill_RSB(void)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
unsigned long loops;
|
||||||
|
|
||||||
|
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
|
||||||
|
ALTERNATIVE("jmp 910f",
|
||||||
|
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
|
||||||
|
X86_FEATURE_RETPOLINE)
|
||||||
|
"910:"
|
||||||
|
: "=r" (loops), ASM_CALL_CONSTRAINT
|
||||||
|
: : "memory" );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __ASSEMBLY__ */
|
||||||
|
#endif /* __NOSPEC_BRANCH_H__ */
|
|
@ -40,7 +40,7 @@
|
||||||
#define CR3_NOFLUSH BIT_ULL(63)
|
#define CR3_NOFLUSH BIT_ULL(63)
|
||||||
|
|
||||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
# define X86_CR3_PTI_SWITCH_BIT 11
|
# define X86_CR3_PTI_PCID_USER_BIT 11
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
|
||||||
* Make sure that the dynamic ASID space does not confict with the
|
* Make sure that the dynamic ASID space does not confict with the
|
||||||
* bit we are using to switch between user and kernel ASIDs.
|
* bit we are using to switch between user and kernel ASIDs.
|
||||||
*/
|
*/
|
||||||
BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
|
BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The ASID being passed in here should have respected the
|
* The ASID being passed in here should have respected the
|
||||||
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
|
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
|
||||||
*/
|
*/
|
||||||
VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
|
VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* The dynamically-assigned ASIDs that get passed in are small
|
* The dynamically-assigned ASIDs that get passed in are small
|
||||||
|
@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
|
||||||
{
|
{
|
||||||
u16 ret = kern_pcid(asid);
|
u16 ret = kern_pcid(asid);
|
||||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
|
ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
|
||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
dotraplinkage void do_iret_error(struct pt_regs *, long);
|
dotraplinkage void do_iret_error(struct pt_regs *, long);
|
||||||
#endif
|
#endif
|
||||||
|
dotraplinkage void do_mce(struct pt_regs *, long);
|
||||||
|
|
||||||
static inline int get_si_code(unsigned long condition)
|
static inline int get_si_code(unsigned long condition)
|
||||||
{
|
{
|
||||||
|
|
|
@ -44,6 +44,7 @@
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/smap.h>
|
#include <asm/smap.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#include <xen/interface/xen.h>
|
#include <xen/interface/xen.h>
|
||||||
#include <xen/interface/sched.h>
|
#include <xen/interface/sched.h>
|
||||||
|
@ -217,9 +218,9 @@ privcmd_call(unsigned call,
|
||||||
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
|
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
|
||||||
|
|
||||||
stac();
|
stac();
|
||||||
asm volatile("call *%[call]"
|
asm volatile(CALL_NOSPEC
|
||||||
: __HYPERCALL_5PARAM
|
: __HYPERCALL_5PARAM
|
||||||
: [call] "a" (&hypercall_page[call])
|
: [thunk_target] "a" (&hypercall_page[call])
|
||||||
: __HYPERCALL_CLOBBER5);
|
: __HYPERCALL_CLOBBER5);
|
||||||
clac();
|
clac();
|
||||||
|
|
||||||
|
|
|
@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
|
||||||
#ifdef CONFIG_X86_IO_APIC
|
#ifdef CONFIG_X86_IO_APIC
|
||||||
#define MP_ISA_BUS 0
|
#define MP_ISA_BUS 0
|
||||||
|
|
||||||
|
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
|
||||||
|
u8 trigger, u32 gsi);
|
||||||
|
|
||||||
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
|
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
|
||||||
u32 gsi)
|
u32 gsi)
|
||||||
{
|
{
|
||||||
int ioapic;
|
|
||||||
int pin;
|
|
||||||
struct mpc_intsrc mp_irq;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check bus_irq boundary.
|
* Check bus_irq boundary.
|
||||||
*/
|
*/
|
||||||
|
@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert 'gsi' to 'ioapic.pin'.
|
|
||||||
*/
|
|
||||||
ioapic = mp_find_ioapic(gsi);
|
|
||||||
if (ioapic < 0)
|
|
||||||
return;
|
|
||||||
pin = mp_find_ioapic_pin(ioapic, gsi);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TBD: This check is for faulty timer entries, where the override
|
* TBD: This check is for faulty timer entries, where the override
|
||||||
* erroneously sets the trigger to level, resulting in a HUGE
|
* erroneously sets the trigger to level, resulting in a HUGE
|
||||||
|
@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
|
||||||
if ((bus_irq == 0) && (trigger == 3))
|
if ((bus_irq == 0) && (trigger == 3))
|
||||||
trigger = 1;
|
trigger = 1;
|
||||||
|
|
||||||
mp_irq.type = MP_INTSRC;
|
if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
|
||||||
mp_irq.irqtype = mp_INT;
|
return;
|
||||||
mp_irq.irqflag = (trigger << 2) | polarity;
|
|
||||||
mp_irq.srcbus = MP_ISA_BUS;
|
|
||||||
mp_irq.srcbusirq = bus_irq; /* IRQ */
|
|
||||||
mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
|
|
||||||
mp_irq.dstirq = pin; /* INTIN# */
|
|
||||||
|
|
||||||
mp_save_irq(&mp_irq);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reset default identity mapping if gsi is also an legacy IRQ,
|
* Reset default identity mapping if gsi is also an legacy IRQ,
|
||||||
* otherwise there will be more than one entry with the same GSI
|
* otherwise there will be more than one entry with the same GSI
|
||||||
|
@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
|
||||||
|
u8 trigger, u32 gsi)
|
||||||
|
{
|
||||||
|
struct mpc_intsrc mp_irq;
|
||||||
|
int ioapic, pin;
|
||||||
|
|
||||||
|
/* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
|
||||||
|
ioapic = mp_find_ioapic(gsi);
|
||||||
|
if (ioapic < 0) {
|
||||||
|
pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
|
||||||
|
return ioapic;
|
||||||
|
}
|
||||||
|
|
||||||
|
pin = mp_find_ioapic_pin(ioapic, gsi);
|
||||||
|
|
||||||
|
mp_irq.type = MP_INTSRC;
|
||||||
|
mp_irq.irqtype = mp_INT;
|
||||||
|
mp_irq.irqflag = (trigger << 2) | polarity;
|
||||||
|
mp_irq.srcbus = MP_ISA_BUS;
|
||||||
|
mp_irq.srcbusirq = bus_irq;
|
||||||
|
mp_irq.dstapic = mpc_ioapic_id(ioapic);
|
||||||
|
mp_irq.dstirq = pin;
|
||||||
|
|
||||||
|
mp_save_irq(&mp_irq);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int __init
|
static int __init
|
||||||
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
|
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
|
||||||
{
|
{
|
||||||
|
@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
|
||||||
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
|
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
|
||||||
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
|
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
|
||||||
|
|
||||||
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
|
if (bus_irq < NR_IRQS_LEGACY)
|
||||||
|
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
|
||||||
|
else
|
||||||
|
mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
|
||||||
|
|
||||||
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
|
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
|
||||||
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
|
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (instr[0] != 0x90)
|
for (i = 0; i < a->padlen; i++) {
|
||||||
return;
|
if (instr[i] != 0x90)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
|
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
|
||||||
|
|
|
@ -369,8 +369,11 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
|
||||||
irq_data->hwirq = virq + i;
|
irq_data->hwirq = virq + i;
|
||||||
err = assign_irq_vector_policy(virq + i, node, data, info,
|
err = assign_irq_vector_policy(virq + i, node, data, info,
|
||||||
irq_data);
|
irq_data);
|
||||||
if (err)
|
if (err) {
|
||||||
|
irq_data->chip_data = NULL;
|
||||||
|
free_apic_chip_data(data);
|
||||||
goto error;
|
goto error;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* If the apic destination mode is physical, then the
|
* If the apic destination mode is physical, then the
|
||||||
* effective affinity is restricted to a single target
|
* effective affinity is restricted to a single target
|
||||||
|
@ -383,7 +386,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
x86_vector_free_irqs(domain, virq, i + 1);
|
x86_vector_free_irqs(domain, virq, i);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
|
||||||
set_cpu_cap(c, X86_FEATURE_K8);
|
set_cpu_cap(c, X86_FEATURE_K8);
|
||||||
|
|
||||||
if (cpu_has(c, X86_FEATURE_XMM2)) {
|
if (cpu_has(c, X86_FEATURE_XMM2)) {
|
||||||
/* MFENCE stops RDTSC speculation */
|
unsigned long long val;
|
||||||
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A serializing LFENCE has less overhead than MFENCE, so
|
||||||
|
* use it for execution serialization. On families which
|
||||||
|
* don't have that MSR, LFENCE is already serializing.
|
||||||
|
* msr_set_bit() uses the safe accessors, too, even if the MSR
|
||||||
|
* is not present.
|
||||||
|
*/
|
||||||
|
msr_set_bit(MSR_F10H_DECFG,
|
||||||
|
MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verify that the MSR write was successful (could be running
|
||||||
|
* under a hypervisor) and only then assume that LFENCE is
|
||||||
|
* serializing.
|
||||||
|
*/
|
||||||
|
ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
|
||||||
|
if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
|
||||||
|
/* A serializing LFENCE stops RDTSC speculation */
|
||||||
|
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
||||||
|
} else {
|
||||||
|
/* MFENCE stops RDTSC speculation */
|
||||||
|
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -10,6 +10,10 @@
|
||||||
*/
|
*/
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/utsname.h>
|
#include <linux/utsname.h>
|
||||||
|
#include <linux/cpu.h>
|
||||||
|
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
#include <asm/cmdline.h>
|
||||||
#include <asm/bugs.h>
|
#include <asm/bugs.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/processor-flags.h>
|
#include <asm/processor-flags.h>
|
||||||
|
@ -19,6 +23,9 @@
|
||||||
#include <asm/alternative.h>
|
#include <asm/alternative.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
|
#include <asm/intel-family.h>
|
||||||
|
|
||||||
|
static void __init spectre_v2_select_mitigation(void);
|
||||||
|
|
||||||
void __init check_bugs(void)
|
void __init check_bugs(void)
|
||||||
{
|
{
|
||||||
|
@ -29,6 +36,9 @@ void __init check_bugs(void)
|
||||||
print_cpu_info(&boot_cpu_data);
|
print_cpu_info(&boot_cpu_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Select the proper spectre mitigation before patching alternatives */
|
||||||
|
spectre_v2_select_mitigation();
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
/*
|
/*
|
||||||
* Check whether we are able to run this kernel safely on SMP.
|
* Check whether we are able to run this kernel safely on SMP.
|
||||||
|
@ -60,3 +70,214 @@ void __init check_bugs(void)
|
||||||
set_memory_4k((unsigned long)__va(0), 1);
|
set_memory_4k((unsigned long)__va(0), 1);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The kernel command line selection */
|
||||||
|
enum spectre_v2_mitigation_cmd {
|
||||||
|
SPECTRE_V2_CMD_NONE,
|
||||||
|
SPECTRE_V2_CMD_AUTO,
|
||||||
|
SPECTRE_V2_CMD_FORCE,
|
||||||
|
SPECTRE_V2_CMD_RETPOLINE,
|
||||||
|
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
|
||||||
|
SPECTRE_V2_CMD_RETPOLINE_AMD,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *spectre_v2_strings[] = {
|
||||||
|
[SPECTRE_V2_NONE] = "Vulnerable",
|
||||||
|
[SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
|
||||||
|
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
|
||||||
|
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
|
||||||
|
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef pr_fmt
|
||||||
|
#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
|
||||||
|
|
||||||
|
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
|
||||||
|
|
||||||
|
static void __init spec2_print_if_insecure(const char *reason)
|
||||||
|
{
|
||||||
|
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||||
|
pr_info("%s\n", reason);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init spec2_print_if_secure(const char *reason)
|
||||||
|
{
|
||||||
|
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||||
|
pr_info("%s\n", reason);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool retp_compiler(void)
|
||||||
|
{
|
||||||
|
return __is_defined(RETPOLINE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool match_option(const char *arg, int arglen, const char *opt)
|
||||||
|
{
|
||||||
|
int len = strlen(opt);
|
||||||
|
|
||||||
|
return len == arglen && !strncmp(arg, opt, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
|
||||||
|
{
|
||||||
|
char arg[20];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
|
||||||
|
sizeof(arg));
|
||||||
|
if (ret > 0) {
|
||||||
|
if (match_option(arg, ret, "off")) {
|
||||||
|
goto disable;
|
||||||
|
} else if (match_option(arg, ret, "on")) {
|
||||||
|
spec2_print_if_secure("force enabled on command line.");
|
||||||
|
return SPECTRE_V2_CMD_FORCE;
|
||||||
|
} else if (match_option(arg, ret, "retpoline")) {
|
||||||
|
spec2_print_if_insecure("retpoline selected on command line.");
|
||||||
|
return SPECTRE_V2_CMD_RETPOLINE;
|
||||||
|
} else if (match_option(arg, ret, "retpoline,amd")) {
|
||||||
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
|
||||||
|
pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
|
||||||
|
return SPECTRE_V2_CMD_AUTO;
|
||||||
|
}
|
||||||
|
spec2_print_if_insecure("AMD retpoline selected on command line.");
|
||||||
|
return SPECTRE_V2_CMD_RETPOLINE_AMD;
|
||||||
|
} else if (match_option(arg, ret, "retpoline,generic")) {
|
||||||
|
spec2_print_if_insecure("generic retpoline selected on command line.");
|
||||||
|
return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
|
||||||
|
} else if (match_option(arg, ret, "auto")) {
|
||||||
|
return SPECTRE_V2_CMD_AUTO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
|
||||||
|
return SPECTRE_V2_CMD_AUTO;
|
||||||
|
disable:
|
||||||
|
spec2_print_if_insecure("disabled on command line.");
|
||||||
|
return SPECTRE_V2_CMD_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for Skylake-like CPUs (for RSB handling) */
|
||||||
|
static bool __init is_skylake_era(void)
|
||||||
|
{
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||||
|
boot_cpu_data.x86 == 6) {
|
||||||
|
switch (boot_cpu_data.x86_model) {
|
||||||
|
case INTEL_FAM6_SKYLAKE_MOBILE:
|
||||||
|
case INTEL_FAM6_SKYLAKE_DESKTOP:
|
||||||
|
case INTEL_FAM6_SKYLAKE_X:
|
||||||
|
case INTEL_FAM6_KABYLAKE_MOBILE:
|
||||||
|
case INTEL_FAM6_KABYLAKE_DESKTOP:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init spectre_v2_select_mitigation(void)
|
||||||
|
{
|
||||||
|
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
|
||||||
|
enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the CPU is not affected and the command line mode is NONE or AUTO
|
||||||
|
* then nothing to do.
|
||||||
|
*/
|
||||||
|
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
|
||||||
|
(cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
|
||||||
|
return;
|
||||||
|
|
||||||
|
switch (cmd) {
|
||||||
|
case SPECTRE_V2_CMD_NONE:
|
||||||
|
return;
|
||||||
|
|
||||||
|
case SPECTRE_V2_CMD_FORCE:
|
||||||
|
/* FALLTRHU */
|
||||||
|
case SPECTRE_V2_CMD_AUTO:
|
||||||
|
goto retpoline_auto;
|
||||||
|
|
||||||
|
case SPECTRE_V2_CMD_RETPOLINE_AMD:
|
||||||
|
if (IS_ENABLED(CONFIG_RETPOLINE))
|
||||||
|
goto retpoline_amd;
|
||||||
|
break;
|
||||||
|
case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
|
||||||
|
if (IS_ENABLED(CONFIG_RETPOLINE))
|
||||||
|
goto retpoline_generic;
|
||||||
|
break;
|
||||||
|
case SPECTRE_V2_CMD_RETPOLINE:
|
||||||
|
if (IS_ENABLED(CONFIG_RETPOLINE))
|
||||||
|
goto retpoline_auto;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pr_err("kernel not compiled with retpoline; no mitigation available!");
|
||||||
|
return;
|
||||||
|
|
||||||
|
retpoline_auto:
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||||
|
retpoline_amd:
|
||||||
|
if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
|
||||||
|
pr_err("LFENCE not serializing. Switching to generic retpoline\n");
|
||||||
|
goto retpoline_generic;
|
||||||
|
}
|
||||||
|
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
|
||||||
|
SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
|
||||||
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
|
||||||
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
|
||||||
|
} else {
|
||||||
|
retpoline_generic:
|
||||||
|
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
|
||||||
|
SPECTRE_V2_RETPOLINE_MINIMAL;
|
||||||
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
|
||||||
|
}
|
||||||
|
|
||||||
|
spectre_v2_enabled = mode;
|
||||||
|
pr_info("%s\n", spectre_v2_strings[mode]);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If neither SMEP or KPTI are available, there is a risk of
|
||||||
|
* hitting userspace addresses in the RSB after a context switch
|
||||||
|
* from a shallow call stack to a deeper one. To prevent this fill
|
||||||
|
* the entire RSB, even when using IBRS.
|
||||||
|
*
|
||||||
|
* Skylake era CPUs have a separate issue with *underflow* of the
|
||||||
|
* RSB, when they will predict 'ret' targets from the generic BTB.
|
||||||
|
* The proper mitigation for this is IBRS. If IBRS is not supported
|
||||||
|
* or deactivated in favour of retpolines the RSB fill on context
|
||||||
|
* switch is required.
|
||||||
|
*/
|
||||||
|
if ((!boot_cpu_has(X86_FEATURE_PTI) &&
|
||||||
|
!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
|
||||||
|
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
|
||||||
|
pr_info("Filling RSB on context switch\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef pr_fmt
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYSFS
|
||||||
|
ssize_t cpu_show_meltdown(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
|
||||||
|
return sprintf(buf, "Not affected\n");
|
||||||
|
if (boot_cpu_has(X86_FEATURE_PTI))
|
||||||
|
return sprintf(buf, "Mitigation: PTI\n");
|
||||||
|
return sprintf(buf, "Vulnerable\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t cpu_show_spectre_v1(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
|
||||||
|
return sprintf(buf, "Not affected\n");
|
||||||
|
return sprintf(buf, "Vulnerable\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t cpu_show_spectre_v2(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||||
|
return sprintf(buf, "Not affected\n");
|
||||||
|
|
||||||
|
return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||||
if (c->x86_vendor != X86_VENDOR_AMD)
|
if (c->x86_vendor != X86_VENDOR_AMD)
|
||||||
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
|
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
|
||||||
|
|
||||||
|
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
|
||||||
|
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
|
||||||
|
|
||||||
fpu__init_system(c);
|
fpu__init_system(c);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
|
|
|
@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
|
||||||
*/
|
*/
|
||||||
if (static_branch_unlikely(&rdt_mon_enable_key))
|
if (static_branch_unlikely(&rdt_mon_enable_key))
|
||||||
rmdir_mondata_subdir_allrdtgrp(r, d->id);
|
rmdir_mondata_subdir_allrdtgrp(r, d->id);
|
||||||
kfree(d->ctrl_val);
|
|
||||||
kfree(d->rmid_busy_llc);
|
|
||||||
kfree(d->mbm_total);
|
|
||||||
kfree(d->mbm_local);
|
|
||||||
list_del(&d->list);
|
list_del(&d->list);
|
||||||
if (is_mbm_enabled())
|
if (is_mbm_enabled())
|
||||||
cancel_delayed_work(&d->mbm_over);
|
cancel_delayed_work(&d->mbm_over);
|
||||||
|
@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
|
||||||
cancel_delayed_work(&d->cqm_limbo);
|
cancel_delayed_work(&d->cqm_limbo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kfree(d->ctrl_val);
|
||||||
|
kfree(d->rmid_busy_llc);
|
||||||
|
kfree(d->mbm_total);
|
||||||
|
kfree(d->mbm_local);
|
||||||
kfree(d);
|
kfree(d);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1788,6 +1788,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
||||||
void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||||
unexpected_machine_check;
|
unexpected_machine_check;
|
||||||
|
|
||||||
|
dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
|
||||||
|
{
|
||||||
|
machine_check_vector(regs, error_code);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called for each booted CPU to set up machine checks.
|
* Called for each booted CPU to set up machine checks.
|
||||||
* Must be called with preempt off:
|
* Must be called with preempt off:
|
||||||
|
|
|
@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void)
|
||||||
break;
|
break;
|
||||||
case X86_VENDOR_AMD:
|
case X86_VENDOR_AMD:
|
||||||
if (c->x86 >= 0x10)
|
if (c->x86 >= 0x10)
|
||||||
return save_microcode_in_initrd_amd(cpuid_eax(1));
|
ret = save_microcode_in_initrd_amd(cpuid_eax(1));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
|
||||||
/* Current microcode patch used in early patching on the APs. */
|
/* Current microcode patch used in early patching on the APs. */
|
||||||
static struct microcode_intel *intel_ucode_patch;
|
static struct microcode_intel *intel_ucode_patch;
|
||||||
|
|
||||||
|
/* last level cache size per core */
|
||||||
|
static int llc_size_per_core;
|
||||||
|
|
||||||
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
|
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
|
||||||
unsigned int s2, unsigned int p2)
|
unsigned int s2, unsigned int p2)
|
||||||
{
|
{
|
||||||
|
@ -910,8 +913,19 @@ static bool is_blacklisted(unsigned int cpu)
|
||||||
{
|
{
|
||||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||||
|
|
||||||
if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
|
/*
|
||||||
pr_err_once("late loading on model 79 is disabled.\n");
|
* Late loading on model 79 with microcode revision less than 0x0b000021
|
||||||
|
* and LLC size per core bigger than 2.5MB may result in a system hang.
|
||||||
|
* This behavior is documented in item BDF90, #334165 (Intel Xeon
|
||||||
|
* Processor E7-8800/4800 v4 Product Family).
|
||||||
|
*/
|
||||||
|
if (c->x86 == 6 &&
|
||||||
|
c->x86_model == INTEL_FAM6_BROADWELL_X &&
|
||||||
|
c->x86_mask == 0x01 &&
|
||||||
|
llc_size_per_core > 2621440 &&
|
||||||
|
c->microcode < 0x0b000021) {
|
||||||
|
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
|
||||||
|
pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -966,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = {
|
||||||
.apply_microcode = apply_microcode_intel,
|
.apply_microcode = apply_microcode_intel,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
|
||||||
|
{
|
||||||
|
u64 llc_size = c->x86_cache_size * 1024;
|
||||||
|
|
||||||
|
do_div(llc_size, c->x86_max_cores);
|
||||||
|
|
||||||
|
return (int)llc_size;
|
||||||
|
}
|
||||||
|
|
||||||
struct microcode_ops * __init init_intel_microcode(void)
|
struct microcode_ops * __init init_intel_microcode(void)
|
||||||
{
|
{
|
||||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||||
|
@ -976,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llc_size_per_core = calc_llc_size_per_core(c);
|
||||||
|
|
||||||
return µcode_intel_ops;
|
return µcode_intel_ops;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ struct cpuid_bit {
|
||||||
static const struct cpuid_bit cpuid_bits[] = {
|
static const struct cpuid_bit cpuid_bits[] = {
|
||||||
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
|
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
|
||||||
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
|
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
|
||||||
{ X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
|
|
||||||
{ X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
|
{ X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
|
||||||
{ X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
|
{ X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
|
||||||
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
|
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <asm/segment.h>
|
#include <asm/segment.h>
|
||||||
#include <asm/export.h>
|
#include <asm/export.h>
|
||||||
#include <asm/ftrace.h>
|
#include <asm/ftrace.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#ifdef CC_USING_FENTRY
|
#ifdef CC_USING_FENTRY
|
||||||
# define function_hook __fentry__
|
# define function_hook __fentry__
|
||||||
|
@ -197,7 +198,8 @@ ftrace_stub:
|
||||||
movl 0x4(%ebp), %edx
|
movl 0x4(%ebp), %edx
|
||||||
subl $MCOUNT_INSN_SIZE, %eax
|
subl $MCOUNT_INSN_SIZE, %eax
|
||||||
|
|
||||||
call *ftrace_trace_function
|
movl ftrace_trace_function, %ecx
|
||||||
|
CALL_NOSPEC %ecx
|
||||||
|
|
||||||
popl %edx
|
popl %edx
|
||||||
popl %ecx
|
popl %ecx
|
||||||
|
@ -241,5 +243,5 @@ return_to_handler:
|
||||||
movl %eax, %ecx
|
movl %eax, %ecx
|
||||||
popl %edx
|
popl %edx
|
||||||
popl %eax
|
popl %eax
|
||||||
jmp *%ecx
|
JMP_NOSPEC %ecx
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
#include <asm/ptrace.h>
|
#include <asm/ptrace.h>
|
||||||
#include <asm/ftrace.h>
|
#include <asm/ftrace.h>
|
||||||
#include <asm/export.h>
|
#include <asm/export.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
.code64
|
.code64
|
||||||
.section .entry.text, "ax"
|
.section .entry.text, "ax"
|
||||||
|
@ -286,8 +286,8 @@ trace:
|
||||||
* ip and parent ip are used and the list function is called when
|
* ip and parent ip are used and the list function is called when
|
||||||
* function tracing is enabled.
|
* function tracing is enabled.
|
||||||
*/
|
*/
|
||||||
call *ftrace_trace_function
|
movq ftrace_trace_function, %r8
|
||||||
|
CALL_NOSPEC %r8
|
||||||
restore_mcount_regs
|
restore_mcount_regs
|
||||||
|
|
||||||
jmp fgraph_trace
|
jmp fgraph_trace
|
||||||
|
@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
|
||||||
movq 8(%rsp), %rdx
|
movq 8(%rsp), %rdx
|
||||||
movq (%rsp), %rax
|
movq (%rsp), %rax
|
||||||
addq $24, %rsp
|
addq $24, %rsp
|
||||||
jmp *%rdi
|
JMP_NOSPEC %rdi
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||||
p = fixup_pointer(&phys_base, physaddr);
|
p = fixup_pointer(&phys_base, physaddr);
|
||||||
*p += load_delta - sme_get_me_mask();
|
*p += load_delta - sme_get_me_mask();
|
||||||
|
|
||||||
/* Encrypt the kernel (if SME is active) */
|
/* Encrypt the kernel and related (if SME is active) */
|
||||||
sme_encrypt_kernel();
|
sme_encrypt_kernel(bp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the SME encryption mask (if SME is active) to be used as a
|
* Return the SME encryption mask (if SME is active) to be used as a
|
||||||
|
|
|
@ -56,7 +56,7 @@ struct idt_data {
|
||||||
* Early traps running on the DEFAULT_STACK because the other interrupt
|
* Early traps running on the DEFAULT_STACK because the other interrupt
|
||||||
* stacks work only after cpu_init().
|
* stacks work only after cpu_init().
|
||||||
*/
|
*/
|
||||||
static const __initdata struct idt_data early_idts[] = {
|
static const __initconst struct idt_data early_idts[] = {
|
||||||
INTG(X86_TRAP_DB, debug),
|
INTG(X86_TRAP_DB, debug),
|
||||||
SYSG(X86_TRAP_BP, int3),
|
SYSG(X86_TRAP_BP, int3),
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
|
@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
|
||||||
* the traps which use them are reinitialized with IST after cpu_init() has
|
* the traps which use them are reinitialized with IST after cpu_init() has
|
||||||
* set up TSS.
|
* set up TSS.
|
||||||
*/
|
*/
|
||||||
static const __initdata struct idt_data def_idts[] = {
|
static const __initconst struct idt_data def_idts[] = {
|
||||||
INTG(X86_TRAP_DE, divide_error),
|
INTG(X86_TRAP_DE, divide_error),
|
||||||
INTG(X86_TRAP_NMI, nmi),
|
INTG(X86_TRAP_NMI, nmi),
|
||||||
INTG(X86_TRAP_BR, bounds),
|
INTG(X86_TRAP_BR, bounds),
|
||||||
|
@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
|
||||||
/*
|
/*
|
||||||
* The APIC and SMP idt entries
|
* The APIC and SMP idt entries
|
||||||
*/
|
*/
|
||||||
static const __initdata struct idt_data apic_idts[] = {
|
static const __initconst struct idt_data apic_idts[] = {
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
|
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
|
||||||
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
|
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
|
||||||
|
@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
|
||||||
* Early traps running on the DEFAULT_STACK because the other interrupt
|
* Early traps running on the DEFAULT_STACK because the other interrupt
|
||||||
* stacks work only after cpu_init().
|
* stacks work only after cpu_init().
|
||||||
*/
|
*/
|
||||||
static const __initdata struct idt_data early_pf_idts[] = {
|
static const __initconst struct idt_data early_pf_idts[] = {
|
||||||
INTG(X86_TRAP_PF, page_fault),
|
INTG(X86_TRAP_PF, page_fault),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
|
||||||
* Override for the debug_idt. Same as the default, but with interrupt
|
* Override for the debug_idt. Same as the default, but with interrupt
|
||||||
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
|
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
|
||||||
*/
|
*/
|
||||||
static const __initdata struct idt_data dbg_idts[] = {
|
static const __initconst struct idt_data dbg_idts[] = {
|
||||||
INTG(X86_TRAP_DB, debug),
|
INTG(X86_TRAP_DB, debug),
|
||||||
INTG(X86_TRAP_BP, int3),
|
INTG(X86_TRAP_BP, int3),
|
||||||
};
|
};
|
||||||
|
@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
|
||||||
* The exceptions which use Interrupt stacks. They are setup after
|
* The exceptions which use Interrupt stacks. They are setup after
|
||||||
* cpu_init() when the TSS has been initialized.
|
* cpu_init() when the TSS has been initialized.
|
||||||
*/
|
*/
|
||||||
static const __initdata struct idt_data ist_idts[] = {
|
static const __initconst struct idt_data ist_idts[] = {
|
||||||
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
|
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
|
||||||
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
|
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
|
||||||
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
|
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
|
||||||
#include <asm/apic.h>
|
#include <asm/apic.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
||||||
|
|
||||||
|
@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
|
||||||
static void call_on_stack(void *func, void *stack)
|
static void call_on_stack(void *func, void *stack)
|
||||||
{
|
{
|
||||||
asm volatile("xchgl %%ebx,%%esp \n"
|
asm volatile("xchgl %%ebx,%%esp \n"
|
||||||
"call *%%edi \n"
|
CALL_NOSPEC
|
||||||
"movl %%ebx,%%esp \n"
|
"movl %%ebx,%%esp \n"
|
||||||
: "=b" (stack)
|
: "=b" (stack)
|
||||||
: "0" (stack),
|
: "0" (stack),
|
||||||
"D"(func)
|
[thunk_target] "D"(func)
|
||||||
: "memory", "cc", "edx", "ecx", "eax");
|
: "memory", "cc", "edx", "ecx", "eax");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
|
||||||
call_on_stack(print_stack_overflow, isp);
|
call_on_stack(print_stack_overflow, isp);
|
||||||
|
|
||||||
asm volatile("xchgl %%ebx,%%esp \n"
|
asm volatile("xchgl %%ebx,%%esp \n"
|
||||||
"call *%%edi \n"
|
CALL_NOSPEC
|
||||||
"movl %%ebx,%%esp \n"
|
"movl %%ebx,%%esp \n"
|
||||||
: "=a" (arg1), "=b" (isp)
|
: "=a" (arg1), "=b" (isp)
|
||||||
: "0" (desc), "1" (isp),
|
: "0" (desc), "1" (isp),
|
||||||
"D" (desc->handle_irq)
|
[thunk_target] "D" (desc->handle_irq)
|
||||||
: "memory", "cc", "ecx");
|
: "memory", "cc", "ecx");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
#include <asm/debugreg.h>
|
#include <asm/debugreg.h>
|
||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
@ -205,7 +206,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check whether insn is indirect jump */
|
/* Check whether insn is indirect jump */
|
||||||
static int insn_is_indirect_jump(struct insn *insn)
|
static int __insn_is_indirect_jump(struct insn *insn)
|
||||||
{
|
{
|
||||||
return ((insn->opcode.bytes[0] == 0xff &&
|
return ((insn->opcode.bytes[0] == 0xff &&
|
||||||
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
|
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
|
||||||
|
@ -239,6 +240,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
|
||||||
return (start <= target && target <= start + len);
|
return (start <= target && target <= start + len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int insn_is_indirect_jump(struct insn *insn)
|
||||||
|
{
|
||||||
|
int ret = __insn_is_indirect_jump(insn);
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
/*
|
||||||
|
* Jump to x86_indirect_thunk_* is treated as an indirect jump.
|
||||||
|
* Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
|
||||||
|
* older gcc may use indirect jump. So we add this check instead of
|
||||||
|
* replace indirect-jump check.
|
||||||
|
*/
|
||||||
|
if (!ret)
|
||||||
|
ret = insn_jump_into_range(insn,
|
||||||
|
(unsigned long)__indirect_thunk_start,
|
||||||
|
(unsigned long)__indirect_thunk_end -
|
||||||
|
(unsigned long)__indirect_thunk_start);
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* Decode whole function to ensure any instructions don't jump into target */
|
/* Decode whole function to ensure any instructions don't jump into target */
|
||||||
static int can_optimize(unsigned long paddr)
|
static int can_optimize(unsigned long paddr)
|
||||||
{
|
{
|
||||||
|
|
|
@ -380,19 +380,24 @@ void stop_this_cpu(void *dummy)
|
||||||
disable_local_APIC();
|
disable_local_APIC();
|
||||||
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use wbinvd on processors that support SME. This provides support
|
||||||
|
* for performing a successful kexec when going from SME inactive
|
||||||
|
* to SME active (or vice-versa). The cache must be cleared so that
|
||||||
|
* if there are entries with the same physical address, both with and
|
||||||
|
* without the encryption bit, they don't race each other when flushed
|
||||||
|
* and potentially end up with the wrong entry being committed to
|
||||||
|
* memory.
|
||||||
|
*/
|
||||||
|
if (boot_cpu_has(X86_FEATURE_SME))
|
||||||
|
native_wbinvd();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/*
|
/*
|
||||||
* Use wbinvd followed by hlt to stop the processor. This
|
* Use native_halt() so that memory contents don't change
|
||||||
* provides support for kexec on a processor that supports
|
* (stack usage and variables) after possibly issuing the
|
||||||
* SME. With kexec, going from SME inactive to SME active
|
* native_wbinvd() above.
|
||||||
* requires clearing cache entries so that addresses without
|
|
||||||
* the encryption bit set don't corrupt the same physical
|
|
||||||
* address that has the encryption bit set when caches are
|
|
||||||
* flushed. To achieve this a wbinvd is performed followed by
|
|
||||||
* a hlt. Even if the processor is not in the kexec/SME
|
|
||||||
* scenario this only adds a wbinvd to a halting processor.
|
|
||||||
*/
|
*/
|
||||||
asm volatile("wbinvd; hlt" : : : "memory");
|
native_halt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -376,14 +376,6 @@ static void __init reserve_initrd(void)
|
||||||
!ramdisk_image || !ramdisk_size)
|
!ramdisk_image || !ramdisk_size)
|
||||||
return; /* No initrd provided by bootloader */
|
return; /* No initrd provided by bootloader */
|
||||||
|
|
||||||
/*
|
|
||||||
* If SME is active, this memory will be marked encrypted by the
|
|
||||||
* kernel when it is accessed (including relocation). However, the
|
|
||||||
* ramdisk image was loaded decrypted by the bootloader, so make
|
|
||||||
* sure that it is encrypted before accessing it.
|
|
||||||
*/
|
|
||||||
sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
|
|
||||||
|
|
||||||
initrd_start = 0;
|
initrd_start = 0;
|
||||||
|
|
||||||
mapped_size = memblock_mem_size(max_pfn_mapped);
|
mapped_size = memblock_mem_size(max_pfn_mapped);
|
||||||
|
|
|
@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
|
||||||
return -1;
|
return -1;
|
||||||
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
|
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
|
||||||
pte_unmap(pte);
|
pte_unmap(pte);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PTI poisons low addresses in the kernel page tables in the
|
||||||
|
* name of making them unusable for userspace. To execute
|
||||||
|
* code at such a low address, the poison must be cleared.
|
||||||
|
*
|
||||||
|
* Note: 'pgd' actually gets set in p4d_alloc() _or_
|
||||||
|
* pud_alloc() depending on 4/5-level paging.
|
||||||
|
*/
|
||||||
|
pgd->pgd &= ~_PAGE_NX;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
|
||||||
case INTEL_FAM6_KABYLAKE_DESKTOP:
|
case INTEL_FAM6_KABYLAKE_DESKTOP:
|
||||||
crystal_khz = 24000; /* 24.0 MHz */
|
crystal_khz = 24000; /* 24.0 MHz */
|
||||||
break;
|
break;
|
||||||
case INTEL_FAM6_SKYLAKE_X:
|
|
||||||
case INTEL_FAM6_ATOM_DENVERTON:
|
case INTEL_FAM6_ATOM_DENVERTON:
|
||||||
crystal_khz = 25000; /* 25.0 MHz */
|
crystal_khz = 25000; /* 25.0 MHz */
|
||||||
break;
|
break;
|
||||||
|
@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (crystal_khz == 0)
|
||||||
|
return 0;
|
||||||
/*
|
/*
|
||||||
* TSC frequency determined by CPUID is a "hardware reported"
|
* TSC frequency determined by CPUID is a "hardware reported"
|
||||||
* frequency and is the most accurate one so far we have. This
|
* frequency and is the most accurate one so far we have. This
|
||||||
|
|
|
@ -124,6 +124,12 @@ SECTIONS
|
||||||
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
|
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
__indirect_thunk_start = .;
|
||||||
|
*(.text.__x86.indirect_thunk)
|
||||||
|
__indirect_thunk_end = .;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* End of text section */
|
/* End of text section */
|
||||||
_etext = .;
|
_etext = .;
|
||||||
} :text = 0x9090
|
} :text = 0x9090
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
#include <asm/debugreg.h>
|
#include <asm/debugreg.h>
|
||||||
#include <asm/kvm_para.h>
|
#include <asm/kvm_para.h>
|
||||||
#include <asm/irq_remapping.h>
|
#include <asm/irq_remapping.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#include <asm/virtext.h>
|
#include <asm/virtext.h>
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
@ -4964,6 +4965,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
"mov %%r13, %c[r13](%[svm]) \n\t"
|
"mov %%r13, %c[r13](%[svm]) \n\t"
|
||||||
"mov %%r14, %c[r14](%[svm]) \n\t"
|
"mov %%r14, %c[r14](%[svm]) \n\t"
|
||||||
"mov %%r15, %c[r15](%[svm]) \n\t"
|
"mov %%r15, %c[r15](%[svm]) \n\t"
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Clear host registers marked as clobbered to prevent
|
||||||
|
* speculative use.
|
||||||
|
*/
|
||||||
|
"xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
|
||||||
|
"xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
|
||||||
|
"xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
|
||||||
|
"xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
|
||||||
|
"xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
"xor %%r8, %%r8 \n\t"
|
||||||
|
"xor %%r9, %%r9 \n\t"
|
||||||
|
"xor %%r10, %%r10 \n\t"
|
||||||
|
"xor %%r11, %%r11 \n\t"
|
||||||
|
"xor %%r12, %%r12 \n\t"
|
||||||
|
"xor %%r13, %%r13 \n\t"
|
||||||
|
"xor %%r14, %%r14 \n\t"
|
||||||
|
"xor %%r15, %%r15 \n\t"
|
||||||
#endif
|
#endif
|
||||||
"pop %%" _ASM_BP
|
"pop %%" _ASM_BP
|
||||||
:
|
:
|
||||||
|
@ -4994,6 +5014,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* Eliminate branch target predictions from guest mode */
|
||||||
|
vmexit_fill_RSB();
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
|
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -50,6 +50,7 @@
|
||||||
#include <asm/apic.h>
|
#include <asm/apic.h>
|
||||||
#include <asm/irq_remapping.h>
|
#include <asm/irq_remapping.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "pmu.h"
|
#include "pmu.h"
|
||||||
|
@ -888,8 +889,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
|
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
|
||||||
|
|
||||||
if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
|
if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
|
||||||
vmcs_field_to_offset_table[field] == 0)
|
return -ENOENT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME: Mitigation for CVE-2017-5753. To be replaced with a
|
||||||
|
* generic mechanism.
|
||||||
|
*/
|
||||||
|
asm("lfence");
|
||||||
|
|
||||||
|
if (vmcs_field_to_offset_table[field] == 0)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
return vmcs_field_to_offset_table[field];
|
return vmcs_field_to_offset_table[field];
|
||||||
|
@ -9405,6 +9414,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
/* Save guest registers, load host registers, keep flags */
|
/* Save guest registers, load host registers, keep flags */
|
||||||
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
|
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
|
||||||
"pop %0 \n\t"
|
"pop %0 \n\t"
|
||||||
|
"setbe %c[fail](%0)\n\t"
|
||||||
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
|
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
|
||||||
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
|
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
|
||||||
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
|
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
|
||||||
|
@ -9421,12 +9431,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
"mov %%r13, %c[r13](%0) \n\t"
|
"mov %%r13, %c[r13](%0) \n\t"
|
||||||
"mov %%r14, %c[r14](%0) \n\t"
|
"mov %%r14, %c[r14](%0) \n\t"
|
||||||
"mov %%r15, %c[r15](%0) \n\t"
|
"mov %%r15, %c[r15](%0) \n\t"
|
||||||
|
"xor %%r8d, %%r8d \n\t"
|
||||||
|
"xor %%r9d, %%r9d \n\t"
|
||||||
|
"xor %%r10d, %%r10d \n\t"
|
||||||
|
"xor %%r11d, %%r11d \n\t"
|
||||||
|
"xor %%r12d, %%r12d \n\t"
|
||||||
|
"xor %%r13d, %%r13d \n\t"
|
||||||
|
"xor %%r14d, %%r14d \n\t"
|
||||||
|
"xor %%r15d, %%r15d \n\t"
|
||||||
#endif
|
#endif
|
||||||
"mov %%cr2, %%" _ASM_AX " \n\t"
|
"mov %%cr2, %%" _ASM_AX " \n\t"
|
||||||
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
|
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
|
||||||
|
|
||||||
|
"xor %%eax, %%eax \n\t"
|
||||||
|
"xor %%ebx, %%ebx \n\t"
|
||||||
|
"xor %%esi, %%esi \n\t"
|
||||||
|
"xor %%edi, %%edi \n\t"
|
||||||
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
|
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
|
||||||
"setbe %c[fail](%0) \n\t"
|
|
||||||
".pushsection .rodata \n\t"
|
".pushsection .rodata \n\t"
|
||||||
".global vmx_return \n\t"
|
".global vmx_return \n\t"
|
||||||
"vmx_return: " _ASM_PTR " 2b \n\t"
|
"vmx_return: " _ASM_PTR " 2b \n\t"
|
||||||
|
@ -9463,6 +9484,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* Eliminate branch target predictions from guest mode */
|
||||||
|
vmexit_fill_RSB();
|
||||||
|
|
||||||
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
|
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
|
||||||
if (debugctlmsr)
|
if (debugctlmsr)
|
||||||
update_debugctlmsr(debugctlmsr);
|
update_debugctlmsr(debugctlmsr);
|
||||||
|
|
|
@ -4362,7 +4362,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
|
||||||
addr, n, v))
|
addr, n, v))
|
||||||
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
|
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
|
||||||
break;
|
break;
|
||||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
|
trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
|
||||||
handled += n;
|
handled += n;
|
||||||
addr += n;
|
addr += n;
|
||||||
len -= n;
|
len -= n;
|
||||||
|
@ -4621,7 +4621,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
|
||||||
{
|
{
|
||||||
if (vcpu->mmio_read_completed) {
|
if (vcpu->mmio_read_completed) {
|
||||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
|
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
|
||||||
vcpu->mmio_fragments[0].gpa, *(u64 *)val);
|
vcpu->mmio_fragments[0].gpa, val);
|
||||||
vcpu->mmio_read_completed = 0;
|
vcpu->mmio_read_completed = 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -4643,14 +4643,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||||
|
|
||||||
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
|
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
|
||||||
{
|
{
|
||||||
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
|
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
|
||||||
return vcpu_mmio_write(vcpu, gpa, bytes, val);
|
return vcpu_mmio_write(vcpu, gpa, bytes, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
|
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||||
void *val, int bytes)
|
void *val, int bytes)
|
||||||
{
|
{
|
||||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
|
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
|
||||||
return X86EMUL_IO_NEEDED;
|
return X86EMUL_IO_NEEDED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
|
||||||
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
|
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
|
||||||
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
|
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
|
||||||
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
||||||
|
lib-$(CONFIG_RETPOLINE) += retpoline.o
|
||||||
|
|
||||||
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
|
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,8 @@
|
||||||
#include <asm/errno.h>
|
#include <asm/errno.h>
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
#include <asm/export.h>
|
#include <asm/export.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* computes a partial checksum, e.g. for TCP/UDP fragments
|
* computes a partial checksum, e.g. for TCP/UDP fragments
|
||||||
*/
|
*/
|
||||||
|
@ -156,7 +157,7 @@ ENTRY(csum_partial)
|
||||||
negl %ebx
|
negl %ebx
|
||||||
lea 45f(%ebx,%ebx,2), %ebx
|
lea 45f(%ebx,%ebx,2), %ebx
|
||||||
testl %esi, %esi
|
testl %esi, %esi
|
||||||
jmp *%ebx
|
JMP_NOSPEC %ebx
|
||||||
|
|
||||||
# Handle 2-byte-aligned regions
|
# Handle 2-byte-aligned regions
|
||||||
20: addw (%esi), %ax
|
20: addw (%esi), %ax
|
||||||
|
@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
|
||||||
andl $-32,%edx
|
andl $-32,%edx
|
||||||
lea 3f(%ebx,%ebx), %ebx
|
lea 3f(%ebx,%ebx), %ebx
|
||||||
testl %esi, %esi
|
testl %esi, %esi
|
||||||
jmp *%ebx
|
JMP_NOSPEC %ebx
|
||||||
1: addl $64,%esi
|
1: addl $64,%esi
|
||||||
addl $64,%edi
|
addl $64,%edi
|
||||||
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
|
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
|
||||||
|
|
49
arch/x86/lib/retpoline.S
Normal file
49
arch/x86/lib/retpoline.S
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
|
||||||
|
#include <linux/stringify.h>
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
#include <asm/dwarf2.h>
|
||||||
|
#include <asm/cpufeatures.h>
|
||||||
|
#include <asm/alternative-asm.h>
|
||||||
|
#include <asm/export.h>
|
||||||
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
|
.macro THUNK reg
|
||||||
|
.section .text.__x86.indirect_thunk
|
||||||
|
|
||||||
|
ENTRY(__x86_indirect_thunk_\reg)
|
||||||
|
CFI_STARTPROC
|
||||||
|
JMP_NOSPEC %\reg
|
||||||
|
CFI_ENDPROC
|
||||||
|
ENDPROC(__x86_indirect_thunk_\reg)
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Despite being an assembler file we can't just use .irp here
|
||||||
|
* because __KSYM_DEPS__ only uses the C preprocessor and would
|
||||||
|
* only see one instance of "__x86_indirect_thunk_\reg" rather
|
||||||
|
* than one per register with the correct names. So we do it
|
||||||
|
* the simple and nasty way...
|
||||||
|
*/
|
||||||
|
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
|
||||||
|
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
|
||||||
|
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
|
||||||
|
|
||||||
|
GENERATE_THUNK(_ASM_AX)
|
||||||
|
GENERATE_THUNK(_ASM_BX)
|
||||||
|
GENERATE_THUNK(_ASM_CX)
|
||||||
|
GENERATE_THUNK(_ASM_DX)
|
||||||
|
GENERATE_THUNK(_ASM_SI)
|
||||||
|
GENERATE_THUNK(_ASM_DI)
|
||||||
|
GENERATE_THUNK(_ASM_BP)
|
||||||
|
GENERATE_THUNK(_ASM_SP)
|
||||||
|
#ifdef CONFIG_64BIT
|
||||||
|
GENERATE_THUNK(r8)
|
||||||
|
GENERATE_THUNK(r9)
|
||||||
|
GENERATE_THUNK(r10)
|
||||||
|
GENERATE_THUNK(r11)
|
||||||
|
GENERATE_THUNK(r12)
|
||||||
|
GENERATE_THUNK(r13)
|
||||||
|
GENERATE_THUNK(r14)
|
||||||
|
GENERATE_THUNK(r15)
|
||||||
|
#endif
|
|
@ -173,14 +173,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||||
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
|
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
|
||||||
* faulted on a pte with its pkey=4.
|
* faulted on a pte with its pkey=4.
|
||||||
*/
|
*/
|
||||||
static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
|
static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
|
||||||
|
u32 *pkey)
|
||||||
{
|
{
|
||||||
/* This is effectively an #ifdef */
|
/* This is effectively an #ifdef */
|
||||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Fault not from Protection Keys: nothing to do */
|
/* Fault not from Protection Keys: nothing to do */
|
||||||
if (si_code != SEGV_PKUERR)
|
if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
|
||||||
return;
|
return;
|
||||||
/*
|
/*
|
||||||
* force_sig_info_fault() is called from a number of
|
* force_sig_info_fault() is called from a number of
|
||||||
|
@ -219,7 +220,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
|
||||||
lsb = PAGE_SHIFT;
|
lsb = PAGE_SHIFT;
|
||||||
info.si_addr_lsb = lsb;
|
info.si_addr_lsb = lsb;
|
||||||
|
|
||||||
fill_sig_info_pkey(si_code, &info, pkey);
|
fill_sig_info_pkey(si_signo, si_code, &info, pkey);
|
||||||
|
|
||||||
force_sig_info(si_signo, &info, tsk);
|
force_sig_info(si_signo, &info, tsk);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||||
|
|
||||||
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||||
|
|
||||||
static __init void *early_alloc(size_t size, int nid)
|
static __init void *early_alloc(size_t size, int nid, bool panic)
|
||||||
{
|
{
|
||||||
return memblock_virt_alloc_try_nid_nopanic(size, size,
|
if (panic)
|
||||||
__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
|
return memblock_virt_alloc_try_nid(size, size,
|
||||||
|
__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
|
||||||
|
else
|
||||||
|
return memblock_virt_alloc_try_nid_nopanic(size, size,
|
||||||
|
__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
|
static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
|
||||||
|
@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
|
||||||
if (boot_cpu_has(X86_FEATURE_PSE) &&
|
if (boot_cpu_has(X86_FEATURE_PSE) &&
|
||||||
((end - addr) == PMD_SIZE) &&
|
((end - addr) == PMD_SIZE) &&
|
||||||
IS_ALIGNED(addr, PMD_SIZE)) {
|
IS_ALIGNED(addr, PMD_SIZE)) {
|
||||||
p = early_alloc(PMD_SIZE, nid);
|
p = early_alloc(PMD_SIZE, nid, false);
|
||||||
if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
|
if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
|
||||||
return;
|
return;
|
||||||
else if (p)
|
else if (p)
|
||||||
memblock_free(__pa(p), PMD_SIZE);
|
memblock_free(__pa(p), PMD_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
p = early_alloc(PAGE_SIZE, nid);
|
p = early_alloc(PAGE_SIZE, nid, true);
|
||||||
pmd_populate_kernel(&init_mm, pmd, p);
|
pmd_populate_kernel(&init_mm, pmd, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
|
||||||
if (!pte_none(*pte))
|
if (!pte_none(*pte))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
p = early_alloc(PAGE_SIZE, nid);
|
p = early_alloc(PAGE_SIZE, nid, true);
|
||||||
entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
|
entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
|
||||||
set_pte_at(&init_mm, addr, pte, entry);
|
set_pte_at(&init_mm, addr, pte, entry);
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
|
@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
|
||||||
if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
|
if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
|
||||||
((end - addr) == PUD_SIZE) &&
|
((end - addr) == PUD_SIZE) &&
|
||||||
IS_ALIGNED(addr, PUD_SIZE)) {
|
IS_ALIGNED(addr, PUD_SIZE)) {
|
||||||
p = early_alloc(PUD_SIZE, nid);
|
p = early_alloc(PUD_SIZE, nid, false);
|
||||||
if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
|
if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
|
||||||
return;
|
return;
|
||||||
else if (p)
|
else if (p)
|
||||||
memblock_free(__pa(p), PUD_SIZE);
|
memblock_free(__pa(p), PUD_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
p = early_alloc(PAGE_SIZE, nid);
|
p = early_alloc(PAGE_SIZE, nid, true);
|
||||||
pud_populate(&init_mm, pud, p);
|
pud_populate(&init_mm, pud, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
|
|
||||||
if (p4d_none(*p4d)) {
|
if (p4d_none(*p4d)) {
|
||||||
void *p = early_alloc(PAGE_SIZE, nid);
|
void *p = early_alloc(PAGE_SIZE, nid, true);
|
||||||
|
|
||||||
p4d_populate(&init_mm, p4d, p);
|
p4d_populate(&init_mm, p4d, p);
|
||||||
}
|
}
|
||||||
|
@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
|
|
||||||
if (pgd_none(*pgd)) {
|
if (pgd_none(*pgd)) {
|
||||||
p = early_alloc(PAGE_SIZE, nid);
|
p = early_alloc(PAGE_SIZE, nid, true);
|
||||||
pgd_populate(&init_mm, pgd, p);
|
pgd_populate(&init_mm, pgd, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -213,37 +213,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
|
||||||
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
|
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
|
struct sme_populate_pgd_data {
|
||||||
unsigned long end)
|
void *pgtable_area;
|
||||||
|
pgd_t *pgd;
|
||||||
|
|
||||||
|
pmdval_t pmd_flags;
|
||||||
|
pteval_t pte_flags;
|
||||||
|
unsigned long paddr;
|
||||||
|
|
||||||
|
unsigned long vaddr;
|
||||||
|
unsigned long vaddr_end;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
|
||||||
{
|
{
|
||||||
unsigned long pgd_start, pgd_end, pgd_size;
|
unsigned long pgd_start, pgd_end, pgd_size;
|
||||||
pgd_t *pgd_p;
|
pgd_t *pgd_p;
|
||||||
|
|
||||||
pgd_start = start & PGDIR_MASK;
|
pgd_start = ppd->vaddr & PGDIR_MASK;
|
||||||
pgd_end = end & PGDIR_MASK;
|
pgd_end = ppd->vaddr_end & PGDIR_MASK;
|
||||||
|
|
||||||
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
|
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
|
||||||
pgd_size *= sizeof(pgd_t);
|
|
||||||
|
|
||||||
pgd_p = pgd_base + pgd_index(start);
|
pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
|
||||||
|
|
||||||
memset(pgd_p, 0, pgd_size);
|
memset(pgd_p, 0, pgd_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PGD_FLAGS _KERNPG_TABLE_NOENC
|
#define PGD_FLAGS _KERNPG_TABLE_NOENC
|
||||||
#define P4D_FLAGS _KERNPG_TABLE_NOENC
|
#define P4D_FLAGS _KERNPG_TABLE_NOENC
|
||||||
#define PUD_FLAGS _KERNPG_TABLE_NOENC
|
#define PUD_FLAGS _KERNPG_TABLE_NOENC
|
||||||
#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
|
#define PMD_FLAGS _KERNPG_TABLE_NOENC
|
||||||
|
|
||||||
static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
|
#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
|
||||||
unsigned long vaddr, pmdval_t pmd_val)
|
|
||||||
|
#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
|
||||||
|
#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
|
||||||
|
(_PAGE_PAT | _PAGE_PWT))
|
||||||
|
|
||||||
|
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
|
||||||
|
|
||||||
|
#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
|
||||||
|
|
||||||
|
#define PTE_FLAGS_DEC PTE_FLAGS
|
||||||
|
#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
|
||||||
|
(_PAGE_PAT | _PAGE_PWT))
|
||||||
|
|
||||||
|
#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
|
||||||
|
|
||||||
|
static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
|
||||||
{
|
{
|
||||||
pgd_t *pgd_p;
|
pgd_t *pgd_p;
|
||||||
p4d_t *p4d_p;
|
p4d_t *p4d_p;
|
||||||
pud_t *pud_p;
|
pud_t *pud_p;
|
||||||
pmd_t *pmd_p;
|
pmd_t *pmd_p;
|
||||||
|
|
||||||
pgd_p = pgd_base + pgd_index(vaddr);
|
pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
|
||||||
if (native_pgd_val(*pgd_p)) {
|
if (native_pgd_val(*pgd_p)) {
|
||||||
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||||
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
|
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
|
||||||
|
@ -253,15 +278,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
|
||||||
pgd_t pgd;
|
pgd_t pgd;
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||||
p4d_p = pgtable_area;
|
p4d_p = ppd->pgtable_area;
|
||||||
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
|
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
|
||||||
pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
|
ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
|
||||||
|
|
||||||
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
|
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
|
||||||
} else {
|
} else {
|
||||||
pud_p = pgtable_area;
|
pud_p = ppd->pgtable_area;
|
||||||
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
||||||
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
|
ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
|
||||||
|
|
||||||
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
|
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
|
||||||
}
|
}
|
||||||
|
@ -269,58 +294,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||||
p4d_p += p4d_index(vaddr);
|
p4d_p += p4d_index(ppd->vaddr);
|
||||||
if (native_p4d_val(*p4d_p)) {
|
if (native_p4d_val(*p4d_p)) {
|
||||||
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
|
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
|
||||||
} else {
|
} else {
|
||||||
p4d_t p4d;
|
p4d_t p4d;
|
||||||
|
|
||||||
pud_p = pgtable_area;
|
pud_p = ppd->pgtable_area;
|
||||||
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
||||||
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
|
ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
|
||||||
|
|
||||||
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
|
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
|
||||||
native_set_p4d(p4d_p, p4d);
|
native_set_p4d(p4d_p, p4d);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pud_p += pud_index(vaddr);
|
pud_p += pud_index(ppd->vaddr);
|
||||||
if (native_pud_val(*pud_p)) {
|
if (native_pud_val(*pud_p)) {
|
||||||
if (native_pud_val(*pud_p) & _PAGE_PSE)
|
if (native_pud_val(*pud_p) & _PAGE_PSE)
|
||||||
goto out;
|
return NULL;
|
||||||
|
|
||||||
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
|
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
|
||||||
} else {
|
} else {
|
||||||
pud_t pud;
|
pud_t pud;
|
||||||
|
|
||||||
pmd_p = pgtable_area;
|
pmd_p = ppd->pgtable_area;
|
||||||
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
|
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
|
||||||
pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
|
ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
|
||||||
|
|
||||||
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
|
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
|
||||||
native_set_pud(pud_p, pud);
|
native_set_pud(pud_p, pud);
|
||||||
}
|
}
|
||||||
|
|
||||||
pmd_p += pmd_index(vaddr);
|
return pmd_p;
|
||||||
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
|
}
|
||||||
native_set_pmd(pmd_p, native_make_pmd(pmd_val));
|
|
||||||
|
|
||||||
out:
|
static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
|
||||||
return pgtable_area;
|
{
|
||||||
|
pmd_t *pmd_p;
|
||||||
|
|
||||||
|
pmd_p = sme_prepare_pgd(ppd);
|
||||||
|
if (!pmd_p)
|
||||||
|
return;
|
||||||
|
|
||||||
|
pmd_p += pmd_index(ppd->vaddr);
|
||||||
|
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
|
||||||
|
native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
|
||||||
|
{
|
||||||
|
pmd_t *pmd_p;
|
||||||
|
pte_t *pte_p;
|
||||||
|
|
||||||
|
pmd_p = sme_prepare_pgd(ppd);
|
||||||
|
if (!pmd_p)
|
||||||
|
return;
|
||||||
|
|
||||||
|
pmd_p += pmd_index(ppd->vaddr);
|
||||||
|
if (native_pmd_val(*pmd_p)) {
|
||||||
|
if (native_pmd_val(*pmd_p) & _PAGE_PSE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
|
||||||
|
} else {
|
||||||
|
pmd_t pmd;
|
||||||
|
|
||||||
|
pte_p = ppd->pgtable_area;
|
||||||
|
memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
|
||||||
|
ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
|
||||||
|
|
||||||
|
pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
|
||||||
|
native_set_pmd(pmd_p, pmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
pte_p += pte_index(ppd->vaddr);
|
||||||
|
if (!native_pte_val(*pte_p))
|
||||||
|
native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
|
||||||
|
{
|
||||||
|
while (ppd->vaddr < ppd->vaddr_end) {
|
||||||
|
sme_populate_pgd_large(ppd);
|
||||||
|
|
||||||
|
ppd->vaddr += PMD_PAGE_SIZE;
|
||||||
|
ppd->paddr += PMD_PAGE_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
|
||||||
|
{
|
||||||
|
while (ppd->vaddr < ppd->vaddr_end) {
|
||||||
|
sme_populate_pgd(ppd);
|
||||||
|
|
||||||
|
ppd->vaddr += PAGE_SIZE;
|
||||||
|
ppd->paddr += PAGE_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
|
||||||
|
pmdval_t pmd_flags, pteval_t pte_flags)
|
||||||
|
{
|
||||||
|
unsigned long vaddr_end;
|
||||||
|
|
||||||
|
ppd->pmd_flags = pmd_flags;
|
||||||
|
ppd->pte_flags = pte_flags;
|
||||||
|
|
||||||
|
/* Save original end value since we modify the struct value */
|
||||||
|
vaddr_end = ppd->vaddr_end;
|
||||||
|
|
||||||
|
/* If start is not 2MB aligned, create PTE entries */
|
||||||
|
ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
|
||||||
|
__sme_map_range_pte(ppd);
|
||||||
|
|
||||||
|
/* Create PMD entries */
|
||||||
|
ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
|
||||||
|
__sme_map_range_pmd(ppd);
|
||||||
|
|
||||||
|
/* If end is not 2MB aligned, create PTE entries */
|
||||||
|
ppd->vaddr_end = vaddr_end;
|
||||||
|
__sme_map_range_pte(ppd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
|
||||||
|
{
|
||||||
|
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
|
||||||
|
{
|
||||||
|
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
|
||||||
|
{
|
||||||
|
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long __init sme_pgtable_calc(unsigned long len)
|
static unsigned long __init sme_pgtable_calc(unsigned long len)
|
||||||
{
|
{
|
||||||
unsigned long p4d_size, pud_size, pmd_size;
|
unsigned long p4d_size, pud_size, pmd_size, pte_size;
|
||||||
unsigned long total;
|
unsigned long total;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform a relatively simplistic calculation of the pagetable
|
* Perform a relatively simplistic calculation of the pagetable
|
||||||
* entries that are needed. That mappings will be covered by 2MB
|
* entries that are needed. Those mappings will be covered mostly
|
||||||
* PMD entries so we can conservatively calculate the required
|
* by 2MB PMD entries so we can conservatively calculate the required
|
||||||
* number of P4D, PUD and PMD structures needed to perform the
|
* number of P4D, PUD and PMD structures needed to perform the
|
||||||
* mappings. Incrementing the count for each covers the case where
|
* mappings. For mappings that are not 2MB aligned, PTE mappings
|
||||||
* the addresses cross entries.
|
* would be needed for the start and end portion of the address range
|
||||||
|
* that fall outside of the 2MB alignment. This results in, at most,
|
||||||
|
* two extra pages to hold PTE entries for each range that is mapped.
|
||||||
|
* Incrementing the count for each covers the case where the addresses
|
||||||
|
* cross entries.
|
||||||
*/
|
*/
|
||||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||||
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
|
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
|
||||||
|
@ -334,8 +461,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
|
||||||
}
|
}
|
||||||
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
|
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
|
||||||
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
|
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
|
||||||
|
pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
|
||||||
|
|
||||||
total = p4d_size + pud_size + pmd_size;
|
total = p4d_size + pud_size + pmd_size + pte_size;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now calculate the added pagetable structures needed to populate
|
* Now calculate the added pagetable structures needed to populate
|
||||||
|
@ -359,29 +487,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init sme_encrypt_kernel(void)
|
void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
|
||||||
{
|
{
|
||||||
unsigned long workarea_start, workarea_end, workarea_len;
|
unsigned long workarea_start, workarea_end, workarea_len;
|
||||||
unsigned long execute_start, execute_end, execute_len;
|
unsigned long execute_start, execute_end, execute_len;
|
||||||
unsigned long kernel_start, kernel_end, kernel_len;
|
unsigned long kernel_start, kernel_end, kernel_len;
|
||||||
|
unsigned long initrd_start, initrd_end, initrd_len;
|
||||||
|
struct sme_populate_pgd_data ppd;
|
||||||
unsigned long pgtable_area_len;
|
unsigned long pgtable_area_len;
|
||||||
unsigned long paddr, pmd_flags;
|
|
||||||
unsigned long decrypted_base;
|
unsigned long decrypted_base;
|
||||||
void *pgtable_area;
|
|
||||||
pgd_t *pgd;
|
|
||||||
|
|
||||||
if (!sme_active())
|
if (!sme_active())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prepare for encrypting the kernel by building new pagetables with
|
* Prepare for encrypting the kernel and initrd by building new
|
||||||
* the necessary attributes needed to encrypt the kernel in place.
|
* pagetables with the necessary attributes needed to encrypt the
|
||||||
|
* kernel in place.
|
||||||
*
|
*
|
||||||
* One range of virtual addresses will map the memory occupied
|
* One range of virtual addresses will map the memory occupied
|
||||||
* by the kernel as encrypted.
|
* by the kernel and initrd as encrypted.
|
||||||
*
|
*
|
||||||
* Another range of virtual addresses will map the memory occupied
|
* Another range of virtual addresses will map the memory occupied
|
||||||
* by the kernel as decrypted and write-protected.
|
* by the kernel and initrd as decrypted and write-protected.
|
||||||
*
|
*
|
||||||
* The use of write-protect attribute will prevent any of the
|
* The use of write-protect attribute will prevent any of the
|
||||||
* memory from being cached.
|
* memory from being cached.
|
||||||
|
@ -392,6 +520,20 @@ void __init sme_encrypt_kernel(void)
|
||||||
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
|
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
|
||||||
kernel_len = kernel_end - kernel_start;
|
kernel_len = kernel_end - kernel_start;
|
||||||
|
|
||||||
|
initrd_start = 0;
|
||||||
|
initrd_end = 0;
|
||||||
|
initrd_len = 0;
|
||||||
|
#ifdef CONFIG_BLK_DEV_INITRD
|
||||||
|
initrd_len = (unsigned long)bp->hdr.ramdisk_size |
|
||||||
|
((unsigned long)bp->ext_ramdisk_size << 32);
|
||||||
|
if (initrd_len) {
|
||||||
|
initrd_start = (unsigned long)bp->hdr.ramdisk_image |
|
||||||
|
((unsigned long)bp->ext_ramdisk_image << 32);
|
||||||
|
initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
|
||||||
|
initrd_len = initrd_end - initrd_start;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Set the encryption workarea to be immediately after the kernel */
|
/* Set the encryption workarea to be immediately after the kernel */
|
||||||
workarea_start = kernel_end;
|
workarea_start = kernel_end;
|
||||||
|
|
||||||
|
@ -414,16 +556,21 @@ void __init sme_encrypt_kernel(void)
|
||||||
*/
|
*/
|
||||||
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
|
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
|
||||||
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
|
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
|
||||||
|
if (initrd_len)
|
||||||
|
pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
|
||||||
|
|
||||||
/* PUDs and PMDs needed in the current pagetables for the workarea */
|
/* PUDs and PMDs needed in the current pagetables for the workarea */
|
||||||
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
|
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The total workarea includes the executable encryption area and
|
* The total workarea includes the executable encryption area and
|
||||||
* the pagetable area.
|
* the pagetable area. The start of the workarea is already 2MB
|
||||||
|
* aligned, align the end of the workarea on a 2MB boundary so that
|
||||||
|
* we don't try to create/allocate PTE entries from the workarea
|
||||||
|
* before it is mapped.
|
||||||
*/
|
*/
|
||||||
workarea_len = execute_len + pgtable_area_len;
|
workarea_len = execute_len + pgtable_area_len;
|
||||||
workarea_end = workarea_start + workarea_len;
|
workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set the address to the start of where newly created pagetable
|
* Set the address to the start of where newly created pagetable
|
||||||
|
@ -432,45 +579,30 @@ void __init sme_encrypt_kernel(void)
|
||||||
* pagetables and when the new encrypted and decrypted kernel
|
* pagetables and when the new encrypted and decrypted kernel
|
||||||
* mappings are populated.
|
* mappings are populated.
|
||||||
*/
|
*/
|
||||||
pgtable_area = (void *)execute_end;
|
ppd.pgtable_area = (void *)execute_end;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure the current pagetable structure has entries for
|
* Make sure the current pagetable structure has entries for
|
||||||
* addressing the workarea.
|
* addressing the workarea.
|
||||||
*/
|
*/
|
||||||
pgd = (pgd_t *)native_read_cr3_pa();
|
ppd.pgd = (pgd_t *)native_read_cr3_pa();
|
||||||
paddr = workarea_start;
|
ppd.paddr = workarea_start;
|
||||||
while (paddr < workarea_end) {
|
ppd.vaddr = workarea_start;
|
||||||
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
|
ppd.vaddr_end = workarea_end;
|
||||||
paddr,
|
sme_map_range_decrypted(&ppd);
|
||||||
paddr + PMD_FLAGS);
|
|
||||||
|
|
||||||
paddr += PMD_PAGE_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Flush the TLB - no globals so cr3 is enough */
|
/* Flush the TLB - no globals so cr3 is enough */
|
||||||
native_write_cr3(__native_read_cr3());
|
native_write_cr3(__native_read_cr3());
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A new pagetable structure is being built to allow for the kernel
|
* A new pagetable structure is being built to allow for the kernel
|
||||||
* to be encrypted. It starts with an empty PGD that will then be
|
* and initrd to be encrypted. It starts with an empty PGD that will
|
||||||
* populated with new PUDs and PMDs as the encrypted and decrypted
|
* then be populated with new PUDs and PMDs as the encrypted and
|
||||||
* kernel mappings are created.
|
* decrypted kernel mappings are created.
|
||||||
*/
|
*/
|
||||||
pgd = pgtable_area;
|
ppd.pgd = ppd.pgtable_area;
|
||||||
memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
|
memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
|
||||||
pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
|
ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
|
||||||
|
|
||||||
/* Add encrypted kernel (identity) mappings */
|
|
||||||
pmd_flags = PMD_FLAGS | _PAGE_ENC;
|
|
||||||
paddr = kernel_start;
|
|
||||||
while (paddr < kernel_end) {
|
|
||||||
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
|
|
||||||
paddr,
|
|
||||||
paddr + pmd_flags);
|
|
||||||
|
|
||||||
paddr += PMD_PAGE_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A different PGD index/entry must be used to get different
|
* A different PGD index/entry must be used to get different
|
||||||
|
@ -479,47 +611,79 @@ void __init sme_encrypt_kernel(void)
|
||||||
* the base of the mapping.
|
* the base of the mapping.
|
||||||
*/
|
*/
|
||||||
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
|
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
|
||||||
|
if (initrd_len) {
|
||||||
|
unsigned long check_base;
|
||||||
|
|
||||||
|
check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
|
||||||
|
decrypted_base = max(decrypted_base, check_base);
|
||||||
|
}
|
||||||
decrypted_base <<= PGDIR_SHIFT;
|
decrypted_base <<= PGDIR_SHIFT;
|
||||||
|
|
||||||
/* Add decrypted, write-protected kernel (non-identity) mappings */
|
/* Add encrypted kernel (identity) mappings */
|
||||||
pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
|
ppd.paddr = kernel_start;
|
||||||
paddr = kernel_start;
|
ppd.vaddr = kernel_start;
|
||||||
while (paddr < kernel_end) {
|
ppd.vaddr_end = kernel_end;
|
||||||
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
|
sme_map_range_encrypted(&ppd);
|
||||||
paddr + decrypted_base,
|
|
||||||
paddr + pmd_flags);
|
|
||||||
|
|
||||||
paddr += PMD_PAGE_SIZE;
|
/* Add decrypted, write-protected kernel (non-identity) mappings */
|
||||||
|
ppd.paddr = kernel_start;
|
||||||
|
ppd.vaddr = kernel_start + decrypted_base;
|
||||||
|
ppd.vaddr_end = kernel_end + decrypted_base;
|
||||||
|
sme_map_range_decrypted_wp(&ppd);
|
||||||
|
|
||||||
|
if (initrd_len) {
|
||||||
|
/* Add encrypted initrd (identity) mappings */
|
||||||
|
ppd.paddr = initrd_start;
|
||||||
|
ppd.vaddr = initrd_start;
|
||||||
|
ppd.vaddr_end = initrd_end;
|
||||||
|
sme_map_range_encrypted(&ppd);
|
||||||
|
/*
|
||||||
|
* Add decrypted, write-protected initrd (non-identity) mappings
|
||||||
|
*/
|
||||||
|
ppd.paddr = initrd_start;
|
||||||
|
ppd.vaddr = initrd_start + decrypted_base;
|
||||||
|
ppd.vaddr_end = initrd_end + decrypted_base;
|
||||||
|
sme_map_range_decrypted_wp(&ppd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add decrypted workarea mappings to both kernel mappings */
|
/* Add decrypted workarea mappings to both kernel mappings */
|
||||||
paddr = workarea_start;
|
ppd.paddr = workarea_start;
|
||||||
while (paddr < workarea_end) {
|
ppd.vaddr = workarea_start;
|
||||||
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
|
ppd.vaddr_end = workarea_end;
|
||||||
paddr,
|
sme_map_range_decrypted(&ppd);
|
||||||
paddr + PMD_FLAGS);
|
|
||||||
|
|
||||||
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
|
ppd.paddr = workarea_start;
|
||||||
paddr + decrypted_base,
|
ppd.vaddr = workarea_start + decrypted_base;
|
||||||
paddr + PMD_FLAGS);
|
ppd.vaddr_end = workarea_end + decrypted_base;
|
||||||
|
sme_map_range_decrypted(&ppd);
|
||||||
paddr += PMD_PAGE_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Perform the encryption */
|
/* Perform the encryption */
|
||||||
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
|
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
|
||||||
kernel_len, workarea_start, (unsigned long)pgd);
|
kernel_len, workarea_start, (unsigned long)ppd.pgd);
|
||||||
|
|
||||||
|
if (initrd_len)
|
||||||
|
sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
|
||||||
|
initrd_len, workarea_start,
|
||||||
|
(unsigned long)ppd.pgd);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point we are running encrypted. Remove the mappings for
|
* At this point we are running encrypted. Remove the mappings for
|
||||||
* the decrypted areas - all that is needed for this is to remove
|
* the decrypted areas - all that is needed for this is to remove
|
||||||
* the PGD entry/entries.
|
* the PGD entry/entries.
|
||||||
*/
|
*/
|
||||||
sme_clear_pgd(pgd, kernel_start + decrypted_base,
|
ppd.vaddr = kernel_start + decrypted_base;
|
||||||
kernel_end + decrypted_base);
|
ppd.vaddr_end = kernel_end + decrypted_base;
|
||||||
|
sme_clear_pgd(&ppd);
|
||||||
|
|
||||||
sme_clear_pgd(pgd, workarea_start + decrypted_base,
|
if (initrd_len) {
|
||||||
workarea_end + decrypted_base);
|
ppd.vaddr = initrd_start + decrypted_base;
|
||||||
|
ppd.vaddr_end = initrd_end + decrypted_base;
|
||||||
|
sme_clear_pgd(&ppd);
|
||||||
|
}
|
||||||
|
|
||||||
|
ppd.vaddr = workarea_start + decrypted_base;
|
||||||
|
ppd.vaddr_end = workarea_end + decrypted_base;
|
||||||
|
sme_clear_pgd(&ppd);
|
||||||
|
|
||||||
/* Flush the TLB - no globals so cr3 is enough */
|
/* Flush the TLB - no globals so cr3 is enough */
|
||||||
native_write_cr3(__native_read_cr3());
|
native_write_cr3(__native_read_cr3());
|
||||||
|
|
|
@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Entry parameters:
|
* Entry parameters:
|
||||||
* RDI - virtual address for the encrypted kernel mapping
|
* RDI - virtual address for the encrypted mapping
|
||||||
* RSI - virtual address for the decrypted kernel mapping
|
* RSI - virtual address for the decrypted mapping
|
||||||
* RDX - length of kernel
|
* RDX - length to encrypt
|
||||||
* RCX - virtual address of the encryption workarea, including:
|
* RCX - virtual address of the encryption workarea, including:
|
||||||
* - stack page (PAGE_SIZE)
|
* - stack page (PAGE_SIZE)
|
||||||
* - encryption routine page (PAGE_SIZE)
|
* - encryption routine page (PAGE_SIZE)
|
||||||
|
@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
|
||||||
addq $PAGE_SIZE, %rax /* Workarea encryption routine */
|
addq $PAGE_SIZE, %rax /* Workarea encryption routine */
|
||||||
|
|
||||||
push %r12
|
push %r12
|
||||||
movq %rdi, %r10 /* Encrypted kernel */
|
movq %rdi, %r10 /* Encrypted area */
|
||||||
movq %rsi, %r11 /* Decrypted kernel */
|
movq %rsi, %r11 /* Decrypted area */
|
||||||
movq %rdx, %r12 /* Kernel length */
|
movq %rdx, %r12 /* Area length */
|
||||||
|
|
||||||
/* Copy encryption routine into the workarea */
|
/* Copy encryption routine into the workarea */
|
||||||
movq %rax, %rdi /* Workarea encryption routine */
|
movq %rax, %rdi /* Workarea encryption routine */
|
||||||
|
@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
|
||||||
rep movsb
|
rep movsb
|
||||||
|
|
||||||
/* Setup registers for call */
|
/* Setup registers for call */
|
||||||
movq %r10, %rdi /* Encrypted kernel */
|
movq %r10, %rdi /* Encrypted area */
|
||||||
movq %r11, %rsi /* Decrypted kernel */
|
movq %r11, %rsi /* Decrypted area */
|
||||||
movq %r8, %rdx /* Pagetables used for encryption */
|
movq %r8, %rdx /* Pagetables used for encryption */
|
||||||
movq %r12, %rcx /* Kernel length */
|
movq %r12, %rcx /* Area length */
|
||||||
movq %rax, %r8 /* Workarea encryption routine */
|
movq %rax, %r8 /* Workarea encryption routine */
|
||||||
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
|
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
|
||||||
|
|
||||||
ENTRY(__enc_copy)
|
ENTRY(__enc_copy)
|
||||||
/*
|
/*
|
||||||
* Routine used to encrypt kernel.
|
* Routine used to encrypt memory in place.
|
||||||
* This routine must be run outside of the kernel proper since
|
* This routine must be run outside of the kernel proper since
|
||||||
* the kernel will be encrypted during the process. So this
|
* the kernel will be encrypted during the process. So this
|
||||||
* routine is defined here and then copied to an area outside
|
* routine is defined here and then copied to an area outside
|
||||||
|
@ -79,19 +79,19 @@ ENTRY(__enc_copy)
|
||||||
* during execution.
|
* during execution.
|
||||||
*
|
*
|
||||||
* On entry the registers must be:
|
* On entry the registers must be:
|
||||||
* RDI - virtual address for the encrypted kernel mapping
|
* RDI - virtual address for the encrypted mapping
|
||||||
* RSI - virtual address for the decrypted kernel mapping
|
* RSI - virtual address for the decrypted mapping
|
||||||
* RDX - address of the pagetables to use for encryption
|
* RDX - address of the pagetables to use for encryption
|
||||||
* RCX - length of kernel
|
* RCX - length of area
|
||||||
* R8 - intermediate copy buffer
|
* R8 - intermediate copy buffer
|
||||||
*
|
*
|
||||||
* RAX - points to this routine
|
* RAX - points to this routine
|
||||||
*
|
*
|
||||||
* The kernel will be encrypted by copying from the non-encrypted
|
* The area will be encrypted by copying from the non-encrypted
|
||||||
* kernel space to an intermediate buffer and then copying from the
|
* memory space to an intermediate buffer and then copying from the
|
||||||
* intermediate buffer back to the encrypted kernel space. The physical
|
* intermediate buffer back to the encrypted memory space. The physical
|
||||||
* addresses of the two kernel space mappings are the same which
|
* addresses of the two mappings are the same which results in the area
|
||||||
* results in the kernel being encrypted "in place".
|
* being encrypted "in place".
|
||||||
*/
|
*/
|
||||||
/* Enable the new page tables */
|
/* Enable the new page tables */
|
||||||
mov %rdx, %cr3
|
mov %rdx, %cr3
|
||||||
|
@ -103,47 +103,55 @@ ENTRY(__enc_copy)
|
||||||
orq $X86_CR4_PGE, %rdx
|
orq $X86_CR4_PGE, %rdx
|
||||||
mov %rdx, %cr4
|
mov %rdx, %cr4
|
||||||
|
|
||||||
|
push %r15
|
||||||
|
push %r12
|
||||||
|
|
||||||
|
movq %rcx, %r9 /* Save area length */
|
||||||
|
movq %rdi, %r10 /* Save encrypted area address */
|
||||||
|
movq %rsi, %r11 /* Save decrypted area address */
|
||||||
|
|
||||||
/* Set the PAT register PA5 entry to write-protect */
|
/* Set the PAT register PA5 entry to write-protect */
|
||||||
push %rcx
|
|
||||||
movl $MSR_IA32_CR_PAT, %ecx
|
movl $MSR_IA32_CR_PAT, %ecx
|
||||||
rdmsr
|
rdmsr
|
||||||
push %rdx /* Save original PAT value */
|
mov %rdx, %r15 /* Save original PAT value */
|
||||||
andl $0xffff00ff, %edx /* Clear PA5 */
|
andl $0xffff00ff, %edx /* Clear PA5 */
|
||||||
orl $0x00000500, %edx /* Set PA5 to WP */
|
orl $0x00000500, %edx /* Set PA5 to WP */
|
||||||
wrmsr
|
wrmsr
|
||||||
pop %rdx /* RDX contains original PAT value */
|
|
||||||
pop %rcx
|
|
||||||
|
|
||||||
movq %rcx, %r9 /* Save kernel length */
|
|
||||||
movq %rdi, %r10 /* Save encrypted kernel address */
|
|
||||||
movq %rsi, %r11 /* Save decrypted kernel address */
|
|
||||||
|
|
||||||
wbinvd /* Invalidate any cache entries */
|
wbinvd /* Invalidate any cache entries */
|
||||||
|
|
||||||
/* Copy/encrypt 2MB at a time */
|
/* Copy/encrypt up to 2MB at a time */
|
||||||
|
movq $PMD_PAGE_SIZE, %r12
|
||||||
1:
|
1:
|
||||||
movq %r11, %rsi /* Source - decrypted kernel */
|
cmpq %r12, %r9
|
||||||
|
jnb 2f
|
||||||
|
movq %r9, %r12
|
||||||
|
|
||||||
|
2:
|
||||||
|
movq %r11, %rsi /* Source - decrypted area */
|
||||||
movq %r8, %rdi /* Dest - intermediate copy buffer */
|
movq %r8, %rdi /* Dest - intermediate copy buffer */
|
||||||
movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
|
movq %r12, %rcx
|
||||||
rep movsb
|
rep movsb
|
||||||
|
|
||||||
movq %r8, %rsi /* Source - intermediate copy buffer */
|
movq %r8, %rsi /* Source - intermediate copy buffer */
|
||||||
movq %r10, %rdi /* Dest - encrypted kernel */
|
movq %r10, %rdi /* Dest - encrypted area */
|
||||||
movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
|
movq %r12, %rcx
|
||||||
rep movsb
|
rep movsb
|
||||||
|
|
||||||
addq $PMD_PAGE_SIZE, %r11
|
addq %r12, %r11
|
||||||
addq $PMD_PAGE_SIZE, %r10
|
addq %r12, %r10
|
||||||
subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
|
subq %r12, %r9 /* Kernel length decrement */
|
||||||
jnz 1b /* Kernel length not zero? */
|
jnz 1b /* Kernel length not zero? */
|
||||||
|
|
||||||
/* Restore PAT register */
|
/* Restore PAT register */
|
||||||
push %rdx /* Save original PAT value */
|
|
||||||
movl $MSR_IA32_CR_PAT, %ecx
|
movl $MSR_IA32_CR_PAT, %ecx
|
||||||
rdmsr
|
rdmsr
|
||||||
pop %rdx /* Restore original PAT value */
|
mov %r15, %rdx /* Restore original PAT value */
|
||||||
wrmsr
|
wrmsr
|
||||||
|
|
||||||
|
pop %r12
|
||||||
|
pop %r15
|
||||||
|
|
||||||
ret
|
ret
|
||||||
.L__enc_copy_end:
|
.L__enc_copy_end:
|
||||||
ENDPROC(__enc_copy)
|
ENDPROC(__enc_copy)
|
||||||
|
|
|
@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||||
*
|
*
|
||||||
* Returns a pointer to a P4D on success, or NULL on failure.
|
* Returns a pointer to a P4D on success, or NULL on failure.
|
||||||
*/
|
*/
|
||||||
static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
|
static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
|
||||||
{
|
{
|
||||||
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
|
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
|
||||||
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
||||||
|
@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
|
||||||
if (!new_p4d_page)
|
if (!new_p4d_page)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (pgd_none(*pgd)) {
|
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
|
||||||
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
|
|
||||||
new_p4d_page = 0;
|
|
||||||
}
|
|
||||||
if (new_p4d_page)
|
|
||||||
free_page(new_p4d_page);
|
|
||||||
}
|
}
|
||||||
BUILD_BUG_ON(pgd_large(*pgd) != 0);
|
BUILD_BUG_ON(pgd_large(*pgd) != 0);
|
||||||
|
|
||||||
|
@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
|
||||||
*
|
*
|
||||||
* Returns a pointer to a PMD on success, or NULL on failure.
|
* Returns a pointer to a PMD on success, or NULL on failure.
|
||||||
*/
|
*/
|
||||||
static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
|
static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
|
||||||
{
|
{
|
||||||
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
||||||
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
|
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
|
||||||
|
@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
|
||||||
if (!new_pud_page)
|
if (!new_pud_page)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (p4d_none(*p4d)) {
|
set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
|
||||||
set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
|
|
||||||
new_pud_page = 0;
|
|
||||||
}
|
|
||||||
if (new_pud_page)
|
|
||||||
free_page(new_pud_page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pud = pud_offset(p4d, address);
|
pud = pud_offset(p4d, address);
|
||||||
|
@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
|
||||||
if (!new_pmd_page)
|
if (!new_pmd_page)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (pud_none(*pud)) {
|
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
|
||||||
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
|
|
||||||
new_pmd_page = 0;
|
|
||||||
}
|
|
||||||
if (new_pmd_page)
|
|
||||||
free_page(new_pmd_page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return pmd_offset(pud, address);
|
return pmd_offset(pud, address);
|
||||||
|
@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
|
||||||
if (!new_pte_page)
|
if (!new_pte_page)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (pmd_none(*pmd)) {
|
set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
|
||||||
set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
|
|
||||||
new_pte_page = 0;
|
|
||||||
}
|
|
||||||
if (new_pte_page)
|
|
||||||
free_page(new_pte_page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pte = pte_offset_kernel(pmd, address);
|
pte = pte_offset_kernel(pmd, address);
|
||||||
|
|
|
@ -151,6 +151,34 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sync_current_stack_to_mm(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
unsigned long sp = current_stack_pointer;
|
||||||
|
pgd_t *pgd = pgd_offset(mm, sp);
|
||||||
|
|
||||||
|
if (CONFIG_PGTABLE_LEVELS > 4) {
|
||||||
|
if (unlikely(pgd_none(*pgd))) {
|
||||||
|
pgd_t *pgd_ref = pgd_offset_k(sp);
|
||||||
|
|
||||||
|
set_pgd(pgd, *pgd_ref);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* "pgd" is faked. The top level entries are "p4d"s, so sync
|
||||||
|
* the p4d. This compiles to approximately the same code as
|
||||||
|
* the 5-level case.
|
||||||
|
*/
|
||||||
|
p4d_t *p4d = p4d_offset(pgd, sp);
|
||||||
|
|
||||||
|
if (unlikely(p4d_none(*p4d))) {
|
||||||
|
pgd_t *pgd_ref = pgd_offset_k(sp);
|
||||||
|
p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
|
||||||
|
|
||||||
|
set_p4d(p4d, *p4d_ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
struct task_struct *tsk)
|
struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
|
@ -226,11 +254,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
* mapped in the new pgd, we'll double-fault. Forcibly
|
* mapped in the new pgd, we'll double-fault. Forcibly
|
||||||
* map it.
|
* map it.
|
||||||
*/
|
*/
|
||||||
unsigned int index = pgd_index(current_stack_pointer);
|
sync_current_stack_to_mm(next);
|
||||||
pgd_t *pgd = next->pgd + index;
|
|
||||||
|
|
||||||
if (unlikely(pgd_none(*pgd)))
|
|
||||||
set_pgd(pgd, init_mm.pgd[index]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stop remote flushes for the previous mm */
|
/* Stop remote flushes for the previous mm */
|
||||||
|
|
|
@ -134,7 +134,9 @@ pgd_t * __init efi_call_phys_prolog(void)
|
||||||
pud[j] = *pud_offset(p4d_k, vaddr);
|
pud[j] = *pud_offset(p4d_k, vaddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
|
|
||||||
|
|
|
@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
|
||||||
|
|
||||||
spawn->alg = NULL;
|
spawn->alg = NULL;
|
||||||
spawns = &inst->alg.cra_users;
|
spawns = &inst->alg.cra_users;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We may encounter an unregistered instance here, since
|
||||||
|
* an instance's spawns are set up prior to the instance
|
||||||
|
* being registered. An unregistered instance will have
|
||||||
|
* NULL ->cra_users.next, since ->cra_users isn't
|
||||||
|
* properly initialized until registration. But an
|
||||||
|
* unregistered instance cannot have any users, so treat
|
||||||
|
* it the same as ->cra_users being empty.
|
||||||
|
*/
|
||||||
|
if (spawns->next == NULL)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} while ((spawns = crypto_more_spawns(alg, &stack, &top,
|
} while ((spawns = crypto_more_spawns(alg, &stack, &top,
|
||||||
&secondary_spawns)));
|
&secondary_spawns)));
|
||||||
|
|
|
@ -4439,6 +4439,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
|
||||||
* https://bugzilla.kernel.org/show_bug.cgi?id=121671
|
* https://bugzilla.kernel.org/show_bug.cgi?id=121671
|
||||||
*/
|
*/
|
||||||
{ "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 },
|
{ "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 },
|
||||||
|
{ "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 },
|
||||||
|
|
||||||
/* Devices we expect to fail diagnostics */
|
/* Devices we expect to fail diagnostics */
|
||||||
|
|
||||||
|
|
|
@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
|
||||||
config GENERIC_CPU_AUTOPROBE
|
config GENERIC_CPU_AUTOPROBE
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config GENERIC_CPU_VULNERABILITIES
|
||||||
|
bool
|
||||||
|
|
||||||
config SOC_BUS
|
config SOC_BUS
|
||||||
bool
|
bool
|
||||||
select GLOB
|
select GLOB
|
||||||
|
|
|
@ -501,10 +501,58 @@ static void __init cpu_dev_register_generic(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
|
||||||
|
|
||||||
|
ssize_t __weak cpu_show_meltdown(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
return sprintf(buf, "Not affected\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t __weak cpu_show_spectre_v1(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
return sprintf(buf, "Not affected\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t __weak cpu_show_spectre_v2(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
return sprintf(buf, "Not affected\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
|
||||||
|
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
|
||||||
|
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
|
||||||
|
|
||||||
|
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
||||||
|
&dev_attr_meltdown.attr,
|
||||||
|
&dev_attr_spectre_v1.attr,
|
||||||
|
&dev_attr_spectre_v2.attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct attribute_group cpu_root_vulnerabilities_group = {
|
||||||
|
.name = "vulnerabilities",
|
||||||
|
.attrs = cpu_root_vulnerabilities_attrs,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __init cpu_register_vulnerabilities(void)
|
||||||
|
{
|
||||||
|
if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
|
||||||
|
&cpu_root_vulnerabilities_group))
|
||||||
|
pr_err("Unable to register CPU vulnerabilities\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static inline void cpu_register_vulnerabilities(void) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
void __init cpu_dev_init(void)
|
void __init cpu_dev_init(void)
|
||||||
{
|
{
|
||||||
if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
|
if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
|
||||||
panic("Failed to register CPU subsystem");
|
panic("Failed to register CPU subsystem");
|
||||||
|
|
||||||
cpu_dev_register_generic();
|
cpu_dev_register_generic();
|
||||||
|
cpu_register_vulnerabilities();
|
||||||
}
|
}
|
||||||
|
|
|
@ -3074,13 +3074,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
|
||||||
mutex_unlock(&rbd_dev->watch_mutex);
|
mutex_unlock(&rbd_dev->watch_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
|
||||||
|
{
|
||||||
|
struct rbd_client_id cid = rbd_get_cid(rbd_dev);
|
||||||
|
|
||||||
|
strcpy(rbd_dev->lock_cookie, cookie);
|
||||||
|
rbd_set_owner_cid(rbd_dev, &cid);
|
||||||
|
queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* lock_rwsem must be held for write
|
* lock_rwsem must be held for write
|
||||||
*/
|
*/
|
||||||
static int rbd_lock(struct rbd_device *rbd_dev)
|
static int rbd_lock(struct rbd_device *rbd_dev)
|
||||||
{
|
{
|
||||||
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
|
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
|
||||||
struct rbd_client_id cid = rbd_get_cid(rbd_dev);
|
|
||||||
char cookie[32];
|
char cookie[32];
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -3095,9 +3103,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
|
rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
|
||||||
strcpy(rbd_dev->lock_cookie, cookie);
|
__rbd_lock(rbd_dev, cookie);
|
||||||
rbd_set_owner_cid(rbd_dev, &cid);
|
|
||||||
queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3883,7 +3889,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
|
||||||
queue_delayed_work(rbd_dev->task_wq,
|
queue_delayed_work(rbd_dev->task_wq,
|
||||||
&rbd_dev->lock_dwork, 0);
|
&rbd_dev->lock_dwork, 0);
|
||||||
} else {
|
} else {
|
||||||
strcpy(rbd_dev->lock_cookie, cookie);
|
__rbd_lock(rbd_dev, cookie);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4415,7 +4421,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||||
segment_size = rbd_obj_bytes(&rbd_dev->header);
|
segment_size = rbd_obj_bytes(&rbd_dev->header);
|
||||||
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
|
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
|
||||||
q->limits.max_sectors = queue_max_hw_sectors(q);
|
q->limits.max_sectors = queue_max_hw_sectors(q);
|
||||||
blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
|
blk_queue_max_segments(q, USHRT_MAX);
|
||||||
blk_queue_max_segment_size(q, segment_size);
|
blk_queue_max_segment_size(q, segment_size);
|
||||||
blk_queue_io_min(q, segment_size);
|
blk_queue_io_min(q, segment_size);
|
||||||
blk_queue_io_opt(q, segment_size);
|
blk_queue_io_opt(q, segment_size);
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user