Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

This commit is contained in:
David S. Miller 2018-02-19 18:46:11 -05:00
commit f5c0c6f429
333 changed files with 4400 additions and 2798 deletions

View File

@ -0,0 +1,39 @@
What: /sys/devices/platform/dock.N/docked
Date: Dec, 2006
KernelVersion: 2.6.19
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Value 1 or 0 indicates whether the software believes the
laptop is docked in a docking station.
What: /sys/devices/platform/dock.N/undock
Date: Dec, 2006
KernelVersion: 2.6.19
Contact: linux-acpi@vger.kernel.org
Description:
(WO) Writing to this file causes the software to initiate an
undock request to the firmware.
What: /sys/devices/platform/dock.N/uid
Date: Feb, 2007
KernelVersion: v2.6.21
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Displays the docking station the laptop is docked to.
What: /sys/devices/platform/dock.N/flags
Date: May, 2007
KernelVersion: v2.6.21
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Show dock station flags, useful for checking if undock
request has been made by the user (from the immediate_undock
option).
What: /sys/devices/platform/dock.N/type
Date: Aug, 2008
KernelVersion: v2.6.27
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Display the dock station type- dock_station, ata_bay or
battery_bay.

View File

@ -108,6 +108,8 @@ Description: CPU topology files that describe a logical CPU's relationship
What: /sys/devices/system/cpu/cpuidle/current_driver
/sys/devices/system/cpu/cpuidle/current_governer_ro
/sys/devices/system/cpu/cpuidle/available_governors
/sys/devices/system/cpu/cpuidle/current_governor
Date: September 2007
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Discover cpuidle policy and mechanism
@ -119,13 +121,84 @@ Description: Discover cpuidle policy and mechanism
Idle policy (governor) is differentiated from idle mechanism
(driver)
current_driver: displays current idle mechanism
current_driver: (RO) displays current idle mechanism
current_governor_ro: displays current idle policy
current_governor_ro: (RO) displays current idle policy
With the cpuidle_sysfs_switch boot option enabled (meant for
developer testing), the following three attributes are visible
instead:
current_driver: same as described above
available_governors: (RO) displays a space separated list of
available governors
current_governor: (RW) displays current idle policy. Users can
switch the governor at runtime by writing to this file.
See files in Documentation/cpuidle/ for more information.
What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
/sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
/sys/devices/system/cpu/cpuX/cpuidle/stateN/power
/sys/devices/system/cpu/cpuX/cpuidle/stateN/time
/sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
Date: September 2007
KernelVersion: v2.6.24
Contact: Linux power management list <linux-pm@vger.kernel.org>
Description:
The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
logical CPU specific cpuidle information for each online cpu X.
The processor idle states which are available for use have the
following attributes:
name: (RO) Name of the idle state (string).
latency: (RO) The latency to exit out of this idle state (in
microseconds).
power: (RO) The power consumed while in this idle state (in
milliwatts).
time: (RO) The total time spent in this idle state (in microseconds).
usage: (RO) Number of times this state was entered (a count).
What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
Date: February 2008
KernelVersion: v2.6.25
Contact: Linux power management list <linux-pm@vger.kernel.org>
Description:
(RO) A small description about the idle state (string).
What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
Date: March 2012
KernelVersion: v3.10
Contact: Linux power management list <linux-pm@vger.kernel.org>
Description:
(RW) Option to disable this idle state (bool). The behavior and
the effect of the disable variable depends on the implementation
of a particular governor. In the ladder governor, for example,
it is not coherent, i.e. if one is disabling a light state, then
all deeper states are disabled as well, but the disable variable
does not reflect it. Likewise, if one enables a deep state but a
lighter state still is disabled, then this has no effect.
What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
Date: March 2014
KernelVersion: v3.15
Contact: Linux power management list <linux-pm@vger.kernel.org>
Description:
(RO) Display the target residency i.e. the minimum amount of
time (in microseconds) this cpu should spend in this idle state
to make the transition worth the effort.
What: /sys/devices/system/cpu/cpu#/cpufreq/*
Date: pre-git history
Contact: linux-pm@vger.kernel.org

View File

@ -0,0 +1,40 @@
What: /sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
Date: Jul, 2016
KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) The charger type - Traditional, Hybrid or NVDC.
What: /sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
Date: Jul, 2016
KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Adapter rating in milliwatts (the maximum Adapter power).
Must be 0 if no AC Adaptor is plugged in.
What: /sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
Date: Jul, 2016
KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Maximum platform power that can be supported by the battery
in milliwatts.
What: /sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
Date: Jul, 2016
KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Display the platform power source
0x00 = DC
0x01 = AC
0x02 = USB
0x03 = Wireless Charger
What: /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
Date: Jul, 2016
KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) The maximum sustained power for battery in milliwatts.

View File

@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
- RMW operations that have a return value are fully ordered.
Except for test_and_set_bit_lock() which has ACQUIRE semantics and
- RMW operations that are conditional are unordered on FAILURE,
otherwise the above rules apply. In the case of test_and_{}_bit() operations,
if the bit in memory is unchanged by the operation then it is deemed to have
failed.
Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
clear_bit_unlock() which has RELEASE semantics.
Since a platform only has a single means of achieving atomic operations

View File

@ -0,0 +1,8 @@
Binding for MIPS Cluster Power Controller (CPC).
This binding allows a system to specify where the CPC registers are
located.
Required properties:
compatible : Should be "mti,mips-cpc".
regs: Should describe the address & size of the CPC register region.

View File

@ -0,0 +1,62 @@
#
# Feature name: membarrier-sync-core
# Kconfig: ARCH_HAS_MEMBARRIER_SYNC_CORE
# description: arch supports core serializing membarrier
#
# Architecture requirements
#
# * arm64
#
# Rely on eret context synchronization when returning from IPI handler, and
# when returning to user-space.
#
# * x86
#
# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
# instruction is core serializing, but not SYSEXIT.
#
# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
# However, it can return to user-space through either SYSRETL (compat code),
# SYSRETQ, or IRET.
#
# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
# instead on write_cr3() performed by switch_mm() to provide core serialization
# after changing the current mm, and deal with the special case of kthread ->
# uthread (temporarily keeping current mm into active_mm) by issuing a
# sync_core_before_usermode() in that specific case.
#
-----------------------
| arch |status|
-----------------------
| alpha: | TODO |
| arc: | TODO |
| arm: | TODO |
| arm64: | ok |
| blackfin: | TODO |
| c6x: | TODO |
| cris: | TODO |
| frv: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| m32r: | TODO |
| m68k: | TODO |
| metag: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| mn10300: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
| s390: | TODO |
| score: | TODO |
| sh: | TODO |
| sparc: | TODO |
| tile: | TODO |
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
| xtensa: | TODO |
-----------------------

View File

@ -21,37 +21,23 @@ Implementation
--------------
Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
and implemented in kernel/locking/mutex.c. These locks use a three
state atomic counter (->count) to represent the different possible
transitions that can occur during the lifetime of a lock:
1: unlocked
0: locked, no waiters
negative: locked, with potential waiters
In its most basic form it also includes a wait-queue and a spinlock
that serializes access to it. CONFIG_SMP systems can also include
a pointer to the lock task owner (->owner) as well as a spinner MCS
lock (->osq), both described below in (ii).
and implemented in kernel/locking/mutex.c. These locks use an atomic variable
(->owner) to keep track of the lock state during its lifetime. Field owner
actually contains 'struct task_struct *' to the current lock owner and it is
therefore NULL if not currently owned. Since task_struct pointers are aligned
at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
if waiter list is non-empty). In its most basic form it also includes a
wait-queue and a spinlock that serializes access to it. Furthermore,
CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
below in (ii).
When acquiring a mutex, there are three possible paths that can be
taken, depending on the state of the lock:
(i) fastpath: tries to atomically acquire the lock by decrementing the
counter. If it was already taken by another task it goes to the next
possible path. This logic is architecture specific. On x86-64, the
locking fastpath is 2 instructions:
0000000000000e10 <mutex_lock>:
e21: f0 ff 0b lock decl (%rbx)
e24: 79 08 jns e2e <mutex_lock+0x1e>
the unlocking fastpath is equally tight:
0000000000000bc0 <mutex_unlock>:
bc8: f0 ff 07 lock incl (%rdi)
bcb: 7f 0a jg bd7 <mutex_unlock+0x17>
(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
the current task. This only works in the uncontended case (cmpxchg() checks
against 0UL, so all 3 state bits above have to be 0). If the lock is
contended it goes to the next possible path.
(ii) midpath: aka optimistic spinning, tries to spin for acquisition
while the lock owner is running and there are no other tasks ready
@ -143,11 +129,10 @@ Test if the mutex is taken:
Disadvantages
-------------
Unlike its original design and purpose, 'struct mutex' is larger than
most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
for the largest lock in the kernel. Larger structure sizes mean more
CPU cache and memory footprint.
Unlike its original design and purpose, 'struct mutex' is among the largest
locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
cache and memory footprint.
When to use mutexes
-------------------

View File

@ -13,6 +13,7 @@ The following technologies are described:
* Generic Segmentation Offload - GSO
* Generic Receive Offload - GRO
* Partial Generic Segmentation Offload - GSO_PARTIAL
* SCTP accelleration with GSO - GSO_BY_FRAGS
TCP Segmentation Offload
========================
@ -49,6 +50,10 @@ datagram into multiple IPv4 fragments. Many of the requirements for UDP
fragmentation offload are the same as TSO. However the IPv4 ID for
fragments should not increment as a single IPv4 datagram is fragmented.
UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
still receive them from tuntap and similar devices. Offload of UDP-based
tunnel protocols is still supported.
IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
========================================================
@ -83,10 +88,10 @@ SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the
fact that the outer header also requests to have a non-zero checksum
included in the outer header.
Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header
has requested a remote checksum offload. In this case the inner headers
will be left with a partial checksum and only the outer header checksum
will be computed.
Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
header has requested a remote checksum offload. In this case the inner
headers will be left with a partial checksum and only the outer header
checksum will be computed.
Generic Segmentation Offload
============================
@ -128,3 +133,28 @@ values for if the header was simply duplicated. The one exception to this
is the outer IPv4 ID field. It is up to the device drivers to guarantee
that the IPv4 ID field is incremented in the case that a given header does
not have the DF bit set.
SCTP accelleration with GSO
===========================
SCTP - despite the lack of hardware support - can still take advantage of
GSO to pass one large packet through the network stack, rather than
multiple small packets.
This requires a different approach to other offloads, as SCTP packets
cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
IP segments, padding respected. So unlike regular GSO, SCTP can't just
generate a big skb, set gso_size to the fragmentation point and deliver it
to IP layer.
Instead, the SCTP protocol layer builds an skb with the segments correctly
padded and stored as chained skbs, and skb_segment() splits based on those.
To signal this, gso_size is set to the special value GSO_BY_FRAGS.
Therefore, any code in the core networking stack must be aware of the
possibility that gso_size will be GSO_BY_FRAGS and handle that case
appropriately. (For size checks, the skb_gso_validate_*_len family of
helpers do this automatically.)
This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.

View File

@ -9206,6 +9206,7 @@ MIPS GENERIC PLATFORM
M: Paul Burton <paul.burton@mips.com>
L: linux-mips@linux-mips.org
S: Supported
F: Documentation/devicetree/bindings/power/mti,mips-cpc.txt
F: arch/mips/generic/
F: arch/mips/tools/generic-board-config.sh
@ -9945,6 +9946,7 @@ F: drivers/nfc/nxp-nci
OBJTOOL
M: Josh Poimboeuf <jpoimboe@redhat.com>
M: Peter Zijlstra <peterz@infradead.org>
S: Supported
F: tools/objtool/

View File

@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 16
SUBLEVEL = 0
EXTRAVERSION = -rc1
EXTRAVERSION = -rc2
NAME = Fearless Coyote
# *DOCUMENTATION*

View File

@ -20,7 +20,7 @@
#define MPIDR_UP_BITMASK (0x1 << 30)
#define MPIDR_MT_BITMASK (0x1 << 24)
#define MPIDR_HWID_BITMASK 0xff00ffffff
#define MPIDR_HWID_BITMASK 0xff00ffffffUL
#define MPIDR_LEVEL_BITS_SHIFT 3
#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT)

View File

@ -22,7 +22,7 @@
static inline pte_t huge_ptep_get(pte_t *ptep)
{
return *ptep;
return READ_ONCE(*ptep);
}

View File

@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
return pmd;
}
static inline void kvm_set_s2pte_readonly(pte_t *pte)
static inline void kvm_set_s2pte_readonly(pte_t *ptep)
{
pteval_t old_pteval, pteval;
pteval = READ_ONCE(pte_val(*pte));
pteval = READ_ONCE(pte_val(*ptep));
do {
old_pteval = pteval;
pteval &= ~PTE_S2_RDWR;
pteval |= PTE_S2_RDONLY;
pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
}
static inline bool kvm_s2pte_readonly(pte_t *pte)
static inline bool kvm_s2pte_readonly(pte_t *ptep)
{
return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
}
static inline bool kvm_s2pte_exec(pte_t *pte)
static inline bool kvm_s2pte_exec(pte_t *ptep)
{
return !(pte_val(*pte) & PTE_S2_XN);
return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
}
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
{
kvm_set_s2pte_readonly((pte_t *)pmd);
kvm_set_s2pte_readonly((pte_t *)pmdp);
}
static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
{
return kvm_s2pte_readonly((pte_t *)pmd);
return kvm_s2pte_readonly((pte_t *)pmdp);
}
static inline bool kvm_s2pmd_exec(pmd_t *pmd)
static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
{
return !(pmd_val(*pmd) & PMD_S2_XN);
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
static inline bool kvm_page_empty(void *ptr)

View File

@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
static inline void cpu_replace_ttbr1(pgd_t *pgd)
static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
phys_addr_t pgd_phys = virt_to_phys(pgd);
phys_addr_t pgd_phys = virt_to_phys(pgdp);
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);

View File

@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
return (pmd_t *)__get_free_page(PGALLOC_GFP);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
{
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
free_page((unsigned long)pmd);
BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
free_page((unsigned long)pmdp);
}
static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
__pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
}
#else
static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
BUILD_BUG();
}
@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
return (pud_t *)__get_free_page(PGALLOC_GFP);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
free_page((unsigned long)pud);
BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
free_page((unsigned long)pudp);
}
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
{
set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
{
__pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
__pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
}
#else
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
{
BUILD_BUG();
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
/*
* Free a PTE table.
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
{
if (pte)
free_page((unsigned long)pte);
if (ptep)
free_page((unsigned long)ptep);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
__free_page(pte);
}
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
pmdval_t prot)
{
set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
}
/*

View File

@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
static inline void set_pte(pte_t *ptep, pte_t pte)
{
*ptep = pte;
WRITE_ONCE(*ptep, pte);
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
pte_t old_pte;
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte, addr);
@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
old_pte = READ_ONCE(*ptep);
if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
(mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
__func__, pte_val(old_pte), pte_val(pte));
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
__func__, pte_val(old_pte), pte_val(pte));
}
set_pte(ptep, pte);
@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
WRITE_ONCE(*pmdp, pmd);
dsb(ishst);
isb();
}
@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
WRITE_ONCE(*pudp, pud);
dsb(ishst);
isb();
}
@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
/* Find an entry in the second-level page table. */
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
}
@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
/* Find an entry in the frst-level page table. */
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))

View File

@ -406,6 +406,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
.enable = qcom_enable_link_stack_sanitization,
},
{
.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),

View File

@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = *ptep;
pte_t pte = READ_ONCE(*ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));

View File

@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
gfp_t mask)
{
int rc = 0;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long dst = (unsigned long)allocator(mask);
if (!dst) {
@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
memcpy((void *)dst, src_start, length);
flush_icache_range(dst, dst + length);
pgd = pgd_offset_raw(allocator(mask), dst_addr);
if (pgd_none(*pgd)) {
pud = allocator(mask);
if (!pud) {
pgdp = pgd_offset_raw(allocator(mask), dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
pudp = allocator(mask);
if (!pudp) {
rc = -ENOMEM;
goto out;
}
pgd_populate(&init_mm, pgd, pud);
pgd_populate(&init_mm, pgdp, pudp);
}
pud = pud_offset(pgd, dst_addr);
if (pud_none(*pud)) {
pmd = allocator(mask);
if (!pmd) {
pudp = pud_offset(pgdp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = allocator(mask);
if (!pmdp) {
rc = -ENOMEM;
goto out;
}
pud_populate(&init_mm, pud, pmd);
pud_populate(&init_mm, pudp, pmdp);
}
pmd = pmd_offset(pud, dst_addr);
if (pmd_none(*pmd)) {
pte = allocator(mask);
if (!pte) {
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = allocator(mask);
if (!ptep) {
rc = -ENOMEM;
goto out;
}
pmd_populate_kernel(&init_mm, pmd, pte);
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
pte = pte_offset_kernel(pmd, dst_addr);
set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
/*
* Load our new page tables. A strict BBM approach requires that we
@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys((void *)dst);
@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
return ret;
}
static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
pte_t pte = *src_pte;
pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
set_pte(dst_pte, pte_mkwrite(pte));
set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
unsigned long end)
{
pte_t *src_pte;
pte_t *dst_pte;
pte_t *src_ptep;
pte_t *dst_ptep;
unsigned long addr = start;
dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pte)
dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
if (!dst_ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
dst_pte = pte_offset_kernel(dst_pmd, start);
pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
dst_ptep = pte_offset_kernel(dst_pmdp, start);
src_pte = pte_offset_kernel(src_pmd, start);
src_ptep = pte_offset_kernel(src_pmdp, start);
do {
_copy_pte(dst_pte, src_pte, addr);
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
_copy_pte(dst_ptep, src_ptep, addr);
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
unsigned long end)
{
pmd_t *src_pmd;
pmd_t *dst_pmd;
pmd_t *src_pmdp;
pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
if (pud_none(*dst_pud)) {
dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pmd)
if (pud_none(READ_ONCE(*dst_pudp))) {
dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pmdp)
return -ENOMEM;
pud_populate(&init_mm, dst_pud, dst_pmd);
pud_populate(&init_mm, dst_pudp, dst_pmdp);
}
dst_pmd = pmd_offset(dst_pud, start);
dst_pmdp = pmd_offset(dst_pudp, start);
src_pmd = pmd_offset(src_pud, start);
src_pmdp = pmd_offset(src_pudp, start);
do {
pmd_t pmd = READ_ONCE(*src_pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(*src_pmd))
if (pmd_none(pmd))
continue;
if (pmd_table(*src_pmd)) {
if (copy_pte(dst_pmd, src_pmd, addr, next))
if (pmd_table(pmd)) {
if (copy_pte(dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
set_pmd(dst_pmd,
__pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
set_pmd(dst_pmdp,
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
} while (dst_pmd++, src_pmd++, addr = next, addr != end);
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
pud_t *dst_pud;
pud_t *src_pud;
pud_t *dst_pudp;
pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
if (pgd_none(*dst_pgd)) {
dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pud)
if (pgd_none(READ_ONCE(*dst_pgdp))) {
dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pudp)
return -ENOMEM;
pgd_populate(&init_mm, dst_pgd, dst_pud);
pgd_populate(&init_mm, dst_pgdp, dst_pudp);
}
dst_pud = pud_offset(dst_pgd, start);
dst_pudp = pud_offset(dst_pgdp, start);
src_pud = pud_offset(src_pgd, start);
src_pudp = pud_offset(src_pgdp, start);
do {
pud_t pud = READ_ONCE(*src_pudp);
next = pud_addr_end(addr, end);
if (pud_none(*src_pud))
if (pud_none(pud))
continue;
if (pud_table(*(src_pud))) {
if (copy_pmd(dst_pud, src_pud, addr, next))
if (pud_table(pud)) {
if (copy_pmd(dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
set_pud(dst_pud,
__pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
set_pud(dst_pudp,
__pud(pud_val(pud) & ~PMD_SECT_RDONLY));
}
} while (dst_pud++, src_pud++, addr = next, addr != end);
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
unsigned long next;
unsigned long addr = start;
pgd_t *src_pgd = pgd_offset_k(start);
pgd_t *src_pgdp = pgd_offset_k(start);
dst_pgd = pgd_offset_raw(dst_pgd, start);
dst_pgdp = pgd_offset_raw(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(*src_pgd))
if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
if (copy_pud(dst_pgd, src_pgd, addr, next))
if (copy_pud(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
} while (dst_pgd++, src_pgd++, addr = next, addr != end);
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}

View File

@ -407,8 +407,10 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
u32 midr = read_cpuid_id();
/* Apply BTAC predictors mitigation to all Falkor chips */
if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
__qcom_hyp_sanitize_btac_predictors();
}
}
fp_enabled = __fpsimd_enabled();

View File

@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
}
static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
{
pte_t *pte = pte_offset_kernel(pmd, 0UL);
pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
unsigned long addr;
unsigned i;
for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
addr = start + i * PAGE_SIZE;
note_page(st, addr, 4, pte_val(*pte));
note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
}
}
static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
{
pmd_t *pmd = pmd_offset(pud, 0UL);
pmd_t *pmdp = pmd_offset(pudp, 0UL);
unsigned long addr;
unsigned i;
for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
pmd_t pmd = READ_ONCE(*pmdp);
addr = start + i * PMD_SIZE;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
note_page(st, addr, 3, pmd_val(*pmd));
if (pmd_none(pmd) || pmd_sect(pmd)) {
note_page(st, addr, 3, pmd_val(pmd));
} else {
BUG_ON(pmd_bad(*pmd));
walk_pte(st, pmd, addr);
BUG_ON(pmd_bad(pmd));
walk_pte(st, pmdp, addr);
}
}
}
static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
{
pud_t *pud = pud_offset(pgd, 0UL);
pud_t *pudp = pud_offset(pgdp, 0UL);
unsigned long addr;
unsigned i;
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
pud_t pud = READ_ONCE(*pudp);
addr = start + i * PUD_SIZE;
if (pud_none(*pud) || pud_sect(*pud)) {
note_page(st, addr, 2, pud_val(*pud));
if (pud_none(pud) || pud_sect(pud)) {
note_page(st, addr, 2, pud_val(pud));
} else {
BUG_ON(pud_bad(*pud));
walk_pmd(st, pud, addr);
BUG_ON(pud_bad(pud));
walk_pmd(st, pudp, addr);
}
}
}
@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
unsigned long start)
{
pgd_t *pgd = pgd_offset(mm, 0UL);
pgd_t *pgdp = pgd_offset(mm, 0UL);
unsigned i;
unsigned long addr;
for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
pgd_t pgd = READ_ONCE(*pgdp);
addr = start + i * PGDIR_SIZE;
if (pgd_none(*pgd)) {
note_page(st, addr, 1, pgd_val(*pgd));
if (pgd_none(pgd)) {
note_page(st, addr, 1, pgd_val(pgd));
} else {
BUG_ON(pgd_bad(*pgd));
walk_pud(st, pgd, addr);
BUG_ON(pgd_bad(pgd));
walk_pud(st, pgdp, addr);
}
}
}

View File

@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
void show_pte(unsigned long addr)
{
struct mm_struct *mm;
pgd_t *pgd;
pgd_t *pgdp;
pgd_t pgd;
if (addr < TASK_SIZE) {
/* TTBR0 */
@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
return;
}
pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
VA_BITS, mm->pgd);
pgd = pgd_offset(mm, addr);
pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
pgdp = pgd_offset(mm, addr);
pgd = READ_ONCE(*pgdp);
pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
do {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
if (pgd_none(*pgd) || pgd_bad(*pgd))
if (pgd_none(pgd) || pgd_bad(pgd))
break;
pud = pud_offset(pgd, addr);
pr_cont(", *pud=%016llx", pud_val(*pud));
if (pud_none(*pud) || pud_bad(*pud))
pudp = pud_offset(pgdp, addr);
pud = READ_ONCE(*pudp);
pr_cont(", pud=%016llx", pud_val(pud));
if (pud_none(pud) || pud_bad(pud))
break;
pmd = pmd_offset(pud, addr);
pr_cont(", *pmd=%016llx", pmd_val(*pmd));
if (pmd_none(*pmd) || pmd_bad(*pmd))
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
pr_cont(", pmd=%016llx", pmd_val(pmd));
if (pmd_none(pmd) || pmd_bad(pmd))
break;
pte = pte_offset_map(pmd, addr);
pr_cont(", *pte=%016llx", pte_val(*pte));
pte_unmap(pte);
ptep = pte_offset_map(pmdp, addr);
pte = READ_ONCE(*ptep);
pr_cont(", pte=%016llx", pte_val(pte));
pte_unmap(ptep);
} while(0);
pr_cont("\n");
@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
pte_t entry, int dirty)
{
pteval_t old_pteval, pteval;
pte_t pte = READ_ONCE(*ptep);
if (pte_same(*ptep, entry))
if (pte_same(pte, entry))
return 0;
/* only preserve the access flags and write permission */
@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* (calculated as: a & b == ~(~a | ~b)).
*/
pte_val(entry) ^= PTE_RDONLY;
pteval = READ_ONCE(pte_val(*ptep));
pteval = pte_val(pte);
do {
old_pteval = pteval;
pteval ^= PTE_RDONLY;

View File

@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
pgd_t *pgd = pgd_offset(mm, addr);
pud_t *pud;
pmd_t *pmd;
pgd_t *pgdp = pgd_offset(mm, addr);
pud_t *pudp;
pmd_t *pmdp;
*pgsize = PAGE_SIZE;
pud = pud_offset(pgd, addr);
pmd = pmd_offset(pud, addr);
if ((pte_t *)pmd == ptep) {
pudp = pud_offset(pgdp, addr);
pmdp = pmd_offset(pudp, addr);
if ((pte_t *)pmdp == ptep) {
*pgsize = PMD_SIZE;
return CONT_PMDS;
}
@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
clear_flush(mm, addr, ptep, pgsize, ncontig);
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
pte_val(pfn_pte(pfn, hugeprot)));
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
}
}
void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep = NULL;
pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
pgdp = pgd_offset(mm, addr);
pudp = pud_alloc(mm, pgdp, addr);
if (!pudp)
return NULL;
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
ptep = (pte_t *)pudp;
} else if (sz == (PAGE_SIZE * CONT_PTES)) {
pmd_t *pmd = pmd_alloc(mm, pud, addr);
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
/*
@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
pte = pte_alloc_map(mm, pmd, addr);
ptep = pte_alloc_map(mm, pmdp, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
pud_none(READ_ONCE(*pudp)))
ptep = huge_pmd_share(mm, addr, pudp);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
pmd_t *pmd;
pmd = pmd_alloc(mm, pud, addr);
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
return (pte_t *)pmd;
return (pte_t *)pmdp;
}
pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
sz, pte, pte_val(*pte));
return pte;
return ptep;
}
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd_t *pgdp;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pgd = pgd_offset(mm, addr);
pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
if (!pgd_present(*pgd))
pgdp = pgd_offset(mm, addr);
if (!pgd_present(READ_ONCE(*pgdp)))
return NULL;
pud = pud_offset(pgd, addr);
if (sz != PUD_SIZE && pud_none(*pud))
pudp = pud_offset(pgdp, addr);
pud = READ_ONCE(*pudp);
if (sz != PUD_SIZE && pud_none(pud))
return NULL;
/* hugepage or swap? */
if (pud_huge(*pud) || !pud_present(*pud))
return (pte_t *)pud;
if (pud_huge(pud) || !pud_present(pud))
return (pte_t *)pudp;
/* table; check the next level */
if (sz == CONT_PMD_SIZE)
addr &= CONT_PMD_MASK;
pmd = pmd_offset(pud, addr);
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
pmd_none(*pmd))
pmd_none(pmd))
return NULL;
if (pmd_huge(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
if (pmd_huge(pmd) || !pmd_present(pmd))
return (pte_t *)pmdp;
if (sz == CONT_PTE_SIZE) {
pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
return pte;
}
if (sz == CONT_PTE_SIZE)
return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
return NULL;
}
@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
size_t pgsize;
pte_t pte;
if (!pte_cont(*ptep)) {
if (!pte_cont(READ_ONCE(*ptep))) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
size_t pgsize;
int ncontig;
if (!pte_cont(*ptep)) {
if (!pte_cont(READ_ONCE(*ptep))) {
ptep_clear_flush(vma, addr, ptep);
return;
}

View File

@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
return __pa(p);
}
static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
bool early)
{
if (pmd_none(*pmd)) {
if (pmd_none(READ_ONCE(*pmdp))) {
phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
: kasan_alloc_zeroed_page(node);
__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
}
return early ? pte_offset_kimg(pmd, addr)
: pte_offset_kernel(pmd, addr);
return early ? pte_offset_kimg(pmdp, addr)
: pte_offset_kernel(pmdp, addr);
}
static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
bool early)
{
if (pud_none(*pud)) {
if (pud_none(READ_ONCE(*pudp))) {
phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
: kasan_alloc_zeroed_page(node);
__pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
__pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
}
return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
}
static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
bool early)
{
if (pgd_none(*pgd)) {
if (pgd_none(READ_ONCE(*pgdp))) {
phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
: kasan_alloc_zeroed_page(node);
__pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
__pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
}
return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
}
static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
do {
phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
: kasan_alloc_zeroed_page(node);
next = addr + PAGE_SIZE;
set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
} while (pte++, addr = next, addr != end && pte_none(*pte));
set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
} while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
}
static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
do {
next = pmd_addr_end(addr, end);
kasan_pte_populate(pmd, addr, next, node, early);
} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
kasan_pte_populate(pmdp, addr, next, node, early);
} while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
}
static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
do {
next = pud_addr_end(addr, end);
kasan_pmd_populate(pud, addr, next, node, early);
} while (pud++, addr = next, addr != end && pud_none(*pud));
kasan_pmd_populate(pudp, addr, next, node, early);
} while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
}
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
int node, bool early)
{
unsigned long next;
pgd_t *pgd;
pgd_t *pgdp;
pgd = pgd_offset_k(addr);
pgdp = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
kasan_pud_populate(pgd, addr, next, node, early);
} while (pgd++, addr = next, addr != end);
kasan_pud_populate(pgdp, addr, next, node, early);
} while (pgdp++, addr = next, addr != end);
}
/* The early shadow maps everything to a single page of zeroes */
@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
*/
void __init kasan_copy_shadow(pgd_t *pgdir)
{
pgd_t *pgd, *pgd_new, *pgd_end;
pgd_t *pgdp, *pgdp_new, *pgdp_end;
pgd = pgd_offset_k(KASAN_SHADOW_START);
pgd_end = pgd_offset_k(KASAN_SHADOW_END);
pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
pgdp = pgd_offset_k(KASAN_SHADOW_START);
pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
do {
set_pgd(pgd_new, *pgd);
} while (pgd++, pgd_new++, pgd != pgd_end);
set_pgd(pgdp_new, READ_ONCE(*pgdp));
} while (pgdp++, pgdp_new++, pgdp != pgdp_end);
}
static void __init clear_pgds(unsigned long start,

View File

@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
return ((old ^ new) & ~mask) == 0;
}
static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot)
{
pte_t *pte;
pte_t *ptep;
pte = pte_set_fixmap_offset(pmd, addr);
ptep = pte_set_fixmap_offset(pmdp, addr);
do {
pte_t old_pte = *pte;
pte_t old_pte = READ_ONCE(*ptep);
set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
/*
* After the PTE entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
READ_ONCE(pte_val(*ptep))));
phys += PAGE_SIZE;
} while (pte++, addr += PAGE_SIZE, addr != end);
} while (ptep++, addr += PAGE_SIZE, addr != end);
pte_clear_fixmap();
}
static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
int flags)
{
unsigned long next;
pmd_t pmd = READ_ONCE(*pmdp);
BUG_ON(pmd_sect(*pmd));
if (pmd_none(*pmd)) {
BUG_ON(pmd_sect(pmd));
if (pmd_none(pmd)) {
phys_addr_t pte_phys;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc();
__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
pmd = READ_ONCE(*pmdp);
}
BUG_ON(pmd_bad(*pmd));
BUG_ON(pmd_bad(pmd));
do {
pgprot_t __prot = prot;
@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
init_pte(pmd, addr, next, phys, __prot);
init_pte(pmdp, addr, next, phys, __prot);
phys += next - addr;
} while (addr = next, addr != end);
}
static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void), int flags)
{
unsigned long next;
pmd_t *pmd;
pmd_t *pmdp;
pmd = pmd_set_fixmap_offset(pud, addr);
pmdp = pmd_set_fixmap_offset(pudp, addr);
do {
pmd_t old_pmd = *pmd;
pmd_t old_pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
pmd_set_huge(pmd, phys, prot);
pmd_set_huge(pmdp, phys, prot);
/*
* After the PMD entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
pmd_val(*pmd)));
READ_ONCE(pmd_val(*pmdp))));
} else {
alloc_init_cont_pte(pmd, addr, next, phys, prot,
alloc_init_cont_pte(pmdp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(pmd_val(old_pmd) != 0 &&
pmd_val(old_pmd) != pmd_val(*pmd));
pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
} while (pmdp++, addr = next, addr != end);
pmd_clear_fixmap();
}
static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void), int flags)
{
unsigned long next;
pud_t pud = READ_ONCE(*pudp);
/*
* Check for initial section mappings in the pgd/pud.
*/
BUG_ON(pud_sect(*pud));
if (pud_none(*pud)) {
BUG_ON(pud_sect(pud));
if (pud_none(pud)) {
phys_addr_t pmd_phys;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc();
__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
pud = READ_ONCE(*pudp);
}
BUG_ON(pud_bad(*pud));
BUG_ON(pud_bad(pud));
do {
pgprot_t __prot = prot;
@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
phys += next - addr;
} while (addr = next, addr != end);
@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
return true;
}
static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
int flags)
static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
int flags)
{
pud_t *pud;
unsigned long next;
pud_t *pudp;
pgd_t pgd = READ_ONCE(*pgdp);
if (pgd_none(*pgd)) {
if (pgd_none(pgd)) {
phys_addr_t pud_phys;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc();
__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
__pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
pgd = READ_ONCE(*pgdp);
}
BUG_ON(pgd_bad(*pgd));
BUG_ON(pgd_bad(pgd));
pud = pud_set_fixmap_offset(pgd, addr);
pudp = pud_set_fixmap_offset(pgdp, addr);
do {
pud_t old_pud = *pud;
pud_t old_pud = READ_ONCE(*pudp);
next = pud_addr_end(addr, end);
@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
*/
if (use_1G_block(addr, next, phys) &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
pud_set_huge(pud, phys, prot);
pud_set_huge(pudp, phys, prot);
/*
* After the PUD entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
pud_val(*pud)));
READ_ONCE(pud_val(*pudp))));
} else {
alloc_init_cont_pmd(pud, addr, next, phys, prot,
alloc_init_cont_pmd(pudp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(pud_val(old_pud) != 0 &&
pud_val(old_pud) != pud_val(*pud));
pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
} while (pudp++, addr = next, addr != end);
pud_clear_fixmap();
}
@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
int flags)
{
unsigned long addr, length, end, next;
pgd_t *pgd = pgd_offset_raw(pgdir, virt);
pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
/*
* If the virtual and physical address don't have the same offset
@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
flags);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
} while (pgdp++, addr = next, addr != end);
}
static phys_addr_t pgd_pgtable_alloc(void)
@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
flush_tlb_kernel_range(virt, virt + size);
}
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
phys_addr_t end, pgprot_t prot, int flags)
{
__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
__create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
prot, early_pgtable_alloc, flags);
}
@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
PAGE_KERNEL_RO);
}
static void __init map_mem(pgd_t *pgd)
static void __init map_mem(pgd_t *pgdp)
{
phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
if (memblock_is_nomap(reg))
continue;
__map_memblock(pgd, start, end, PAGE_KERNEL, flags);
__map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
}
/*
@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
* Note that contiguous mappings cannot be remapped in this way,
* so we should avoid them here.
*/
__map_memblock(pgd, kernel_start, kernel_end,
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
* through /sys/kernel/kexec_crash_size interface.
*/
if (crashk_res.end) {
__map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
__map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
PAGE_KERNEL,
NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
memblock_clear_nomap(crashk_res.start,
@ -499,7 +506,7 @@ void mark_rodata_ro(void)
debug_checkwx();
}
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma,
int flags, unsigned long vm_flags)
{
@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(pa_start));
BUG_ON(!PAGE_ALIGNED(size));
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
early_pgtable_alloc, flags);
if (!(vm_flags & VM_NO_GUARD))
@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
/*
* Create fine-grained mappings for the kernel.
*/
static void __init map_kernel(pgd_t *pgd)
static void __init map_kernel(pgd_t *pgdp)
{
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
vmlinux_initdata, vmlinux_data;
@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
*/
map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
VM_NO_GUARD);
map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
&vmlinux_inittext, 0, VM_NO_GUARD);
map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
&vmlinux_initdata, 0, VM_NO_GUARD);
map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
/*
* The fixmap falls in a separate pgd to the kernel, and doesn't
* live in the carveout for the swapper_pg_dir. We can simply
* re-use the existing dir for the fixmap.
*/
set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
*pgd_offset_k(FIXADDR_START));
set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
READ_ONCE(*pgd_offset_k(FIXADDR_START)));
} else if (CONFIG_PGTABLE_LEVELS > 3) {
/*
* The fixmap shares its top level pgd entry with the kernel
@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
pud_populate(&init_mm,
pud_set_fixmap_offset(pgdp, FIXADDR_START),
lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
}
kasan_copy_shadow(pgd);
kasan_copy_shadow(pgdp);
}
/*
@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
void __init paging_init(void)
{
phys_addr_t pgd_phys = early_pgtable_alloc();
pgd_t *pgd = pgd_set_fixmap(pgd_phys);
pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
map_kernel(pgd);
map_mem(pgd);
map_kernel(pgdp);
map_mem(pgdp);
/*
* We want to reuse the original swapper_pg_dir so we don't have to
@ -635,7 +643,7 @@ void __init paging_init(void)
* To do this we need to go via a temporary pgd.
*/
cpu_replace_ttbr1(__va(pgd_phys));
memcpy(swapper_pg_dir, pgd, PGD_SIZE);
memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
pgd_clear_fixmap();
@ -655,37 +663,40 @@ void __init paging_init(void)
*/
int kern_addr_valid(unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd_t *pgdp;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
if ((((long)addr) >> VA_BITS) != -1UL)
return 0;
pgd = pgd_offset_k(addr);
if (pgd_none(*pgd))
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return 0;
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
pudp = pud_offset(pgdp, addr);
pud = READ_ONCE(*pudp);
if (pud_none(pud))
return 0;
if (pud_sect(*pud))
return pfn_valid(pud_pfn(*pud));
if (pud_sect(pud))
return pfn_valid(pud_pfn(pud));
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd))
return 0;
if (pmd_sect(*pmd))
return pfn_valid(pmd_pfn(*pmd));
if (pmd_sect(pmd))
return pfn_valid(pmd_pfn(pmd));
pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte))
ptep = pte_offset_kernel(pmdp, addr);
pte = READ_ONCE(*ptep);
if (pte_none(pte))
return 0;
return pfn_valid(pte_pfn(*pte));
return pfn_valid(pte_pfn(pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
{
unsigned long addr = start;
unsigned long next;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
do {
next = pmd_addr_end(addr, end);
pgd = vmemmap_pgd_populate(addr, node);
if (!pgd)
pgdp = vmemmap_pgd_populate(addr, node);
if (!pgdp)
return -ENOMEM;
pud = vmemmap_pud_populate(pgd, addr, node);
if (!pud)
pudp = vmemmap_pud_populate(pgdp, addr, node);
if (!pudp)
return -ENOMEM;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
pmdp = pmd_offset(pudp, addr);
if (pmd_none(READ_ONCE(*pmdp))) {
void *p = NULL;
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
} else
vmemmap_verify((pte_t *)pmd, node, addr, next);
vmemmap_verify((pte_t *)pmdp, node, addr, next);
} while (addr = next, addr != end);
return 0;
@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
static inline pud_t * fixmap_pud(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
pgd_t *pgdp = pgd_offset_k(addr);
pgd_t pgd = READ_ONCE(*pgdp);
BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
return pud_offset_kimg(pgd, addr);
return pud_offset_kimg(pgdp, addr);
}
static inline pmd_t * fixmap_pmd(unsigned long addr)
{
pud_t *pud = fixmap_pud(addr);
pud_t *pudp = fixmap_pud(addr);
pud_t pud = READ_ONCE(*pudp);
BUG_ON(pud_none(*pud) || pud_bad(*pud));
BUG_ON(pud_none(pud) || pud_bad(pud));
return pmd_offset_kimg(pud, addr);
return pmd_offset_kimg(pudp, addr);
}
static inline pte_t * fixmap_pte(unsigned long addr)
@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
*/
void __init early_fixmap_init(void)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd_t *pgdp, pgd;
pud_t *pudp;
pmd_t *pmdp;
unsigned long addr = FIXADDR_START;
pgd = pgd_offset_k(addr);
pgdp = pgd_offset_k(addr);
pgd = READ_ONCE(*pgdp);
if (CONFIG_PGTABLE_LEVELS > 3 &&
!(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
!(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
* 16k/4 levels configurations.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
pud = pud_offset_kimg(pgd, addr);
pudp = pud_offset_kimg(pgdp, addr);
} else {
if (pgd_none(*pgd))
__pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
pud = fixmap_pud(addr);
if (pgd_none(pgd))
__pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
pudp = fixmap_pud(addr);
}
if (pud_none(*pud))
__pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
pmd = fixmap_pmd(addr);
__pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
if (pud_none(READ_ONCE(*pudp)))
__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
pmdp = fixmap_pmd(addr);
__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
/*
* The boot-ioremap range spans multiple pmds, for which
@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
|| pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
|| pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
pr_warn("pmd %p != %p, %p\n",
pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
pr_warn("pmdp %p != %p, %p\n",
pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
pte = fixmap_pte(addr);
ptep = fixmap_pte(addr);
if (pgprot_val(flags)) {
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
} else {
pte_clear(&init_mm, addr, pte);
pte_clear(&init_mm, addr, ptep);
flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
}
}
@ -915,36 +929,36 @@ int __init arch_ioremap_pmd_supported(void)
return 1;
}
int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
BUG_ON(phys & ~PUD_MASK);
set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
return 1;
}
int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
BUG_ON(phys & ~PMD_MASK);
set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
return 1;
}
int pud_clear_huge(pud_t *pud)
int pud_clear_huge(pud_t *pudp)
{
if (!pud_sect(*pud))
if (!pud_sect(READ_ONCE(*pudp)))
return 0;
pud_clear(pud);
pud_clear(pudp);
return 1;
}
int pmd_clear_huge(pmd_t *pmd)
int pmd_clear_huge(pmd_t *pmdp)
{
if (!pmd_sect(*pmd))
if (!pmd_sect(READ_ONCE(*pmdp)))
return 0;
pmd_clear(pmd);
pmd_clear(pmdp);
return 1;
}

View File

@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
{
struct page_change_data *cdata = data;
pte_t pte = *ptep;
pte_t pte = READ_ONCE(*ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
*/
bool kernel_page_present(struct page *page)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd_t *pgdp;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
pgd = pgd_offset_k(addr);
if (pgd_none(*pgd))
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
pudp = pud_offset(pgdp, addr);
pud = READ_ONCE(*pudp);
if (pud_none(pud))
return false;
if (pud_sect(*pud))
if (pud_sect(pud))
return true;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd))
return false;
if (pmd_sect(*pmd))
if (pmd_sect(pmd))
return true;
pte = pte_offset_kernel(pmd, addr);
return pte_valid(*pte);
ptep = pte_offset_kernel(pmdp, addr);
return pte_valid(READ_ONCE(*ptep));
}
#endif /* CONFIG_HIBERNATION */
#endif /* CONFIG_DEBUG_PAGEALLOC */

View File

@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
dc cvac, cur_\()\type\()p // Ensure any existing dirty
dmb sy // lines are written back before
ldr \type, [cur_\()\type\()p] // loading the entry
tbz \type, #0, next_\()\type // Skip invalid entries
tbz \type, #0, skip_\()\type // Skip invalid and
tbnz \type, #11, skip_\()\type // non-global entries
.endm
.macro __idmap_kpti_put_pgtable_ent_ng, type
@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
do_pgd: __idmap_kpti_get_pgtable_ent pgd
tbnz pgd, #1, walk_puds
__idmap_kpti_put_pgtable_ent_ng pgd
next_pgd:
__idmap_kpti_put_pgtable_ent_ng pgd
skip_pgd:
add cur_pgdp, cur_pgdp, #8
cmp cur_pgdp, end_pgdp
b.ne do_pgd
@ -294,8 +296,9 @@ walk_puds:
add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
do_pud: __idmap_kpti_get_pgtable_ent pud
tbnz pud, #1, walk_pmds
__idmap_kpti_put_pgtable_ent_ng pud
next_pud:
__idmap_kpti_put_pgtable_ent_ng pud
skip_pud:
add cur_pudp, cur_pudp, 8
cmp cur_pudp, end_pudp
b.ne do_pud
@ -314,8 +317,9 @@ walk_pmds:
add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
do_pmd: __idmap_kpti_get_pgtable_ent pmd
tbnz pmd, #1, walk_ptes
__idmap_kpti_put_pgtable_ent_ng pmd
next_pmd:
__idmap_kpti_put_pgtable_ent_ng pmd
skip_pmd:
add cur_pmdp, cur_pmdp, #8
cmp cur_pmdp, end_pmdp
b.ne do_pmd
@ -333,7 +337,7 @@ walk_ptes:
add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
do_pte: __idmap_kpti_get_pgtable_ent pte
__idmap_kpti_put_pgtable_ent_ng pte
next_pte:
skip_pte:
add cur_ptep, cur_ptep, #8
cmp cur_ptep, end_ptep
b.ne do_pte

View File

@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o

View File

@ -10,6 +10,8 @@
#include <linux/errno.h>
#include <linux/percpu.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/spinlock.h>
#include <asm/mips-cps.h>
@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
phys_addr_t __weak mips_cpc_default_phys_base(void)
{
struct device_node *cpc_node;
struct resource res;
int err;
cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
if (cpc_node) {
err = of_address_to_resource(cpc_node, 0, &res);
if (!err)
return res.start;
}
return 0;
}

View File

@ -375,6 +375,7 @@ static void __init bootmem_init(void)
unsigned long reserved_end;
unsigned long mapstart = ~0UL;
unsigned long bootmap_size;
phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
bool bootmap_valid = false;
int i;
@ -395,7 +396,8 @@ static void __init bootmem_init(void)
max_low_pfn = 0;
/*
* Find the highest page frame number we have available.
* Find the highest page frame number we have available
* and the lowest used RAM address
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
@ -407,6 +409,8 @@ static void __init bootmem_init(void)
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
ramstart = min(ramstart, boot_mem_map.map[i].addr);
#ifndef CONFIG_HIGHMEM
/*
* Skip highmem here so we get an accurate max_low_pfn if low
@ -436,6 +440,13 @@ static void __init bootmem_init(void)
mapstart = max(reserved_end, start);
}
/*
* Reserve any memory between the start of RAM and PHYS_OFFSET
*/
if (ramstart > PHYS_OFFSET)
add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
BOOT_MEM_RESERVED);
if (min_low_pfn >= max_low_pfn)
panic("Incorrect memory mapping !!!");
if (min_low_pfn > ARCH_PFN_OFFSET) {
@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
add_memory_region(start, size, BOOT_MEM_RAM);
if (start && start > PHYS_OFFSET)
add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
BOOT_MEM_RESERVED);
return 0;
}
early_param("mem", early_parse_mem);

View File

@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
*/
}
void __init bmips_cpu_setup(void)
void bmips_cpu_setup(void)
{
void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
u32 __maybe_unused cfg;

View File

@ -16,6 +16,7 @@
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)

View File

@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
* keeping the prototype consistent across the two formats.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
unsigned int subpg_index, unsigned long hidx)
unsigned int subpg_index, unsigned long hidx,
int offset)
{
return (hidx << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);

View File

@ -45,7 +45,7 @@
* generic accessors and iterators here
*/
#define __real_pte __real_pte
static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
{
real_pte_t rpte;
unsigned long *hidxp;
@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
*/
smp_rmb();
hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
hidxp = (unsigned long *)(ptep + offset);
rpte.hidx = *hidxp;
return rpte;
}
@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
* expected to modify the PTE bits accordingly and commit the PTE to memory.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
unsigned int subpg_index, unsigned long hidx)
unsigned int subpg_index,
unsigned long hidx, int offset)
{
unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
unsigned long *hidxp = (unsigned long *)(ptep + offset);
rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
*hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
}
#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
(sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
#ifdef CONFIG_HUGETLB_PAGE
#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
(sizeof(unsigned long) << PUD_INDEX_SIZE))
#else
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#endif
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE

View File

@ -23,7 +23,8 @@
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
defined(CONFIG_PPC_64K_PAGES)
/*
* only with hash 64k we need to use the second half of pmd page table
* to store pointer to deposited pgtable_t
@ -32,6 +33,16 @@
#else
#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
#endif
/*
* We store the slot details in the second half of page table.
* Increase the pud level table so that hugetlb ptes can be stored
* at pud level.
*/
#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
#else
#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
#endif
/*
* Define the address range of the kernel non-linear virtual area
*/

View File

@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
if (radix_enabled())
return radix__pgd_alloc(mm);
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
memset(pgd, 0, PGD_TABLE_SIZE);
return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
* ahead and flush the page walk cache
*/
flush_tlb_pgtable(tlb, address);
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)

View File

@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
extern unsigned long __pud_index_size;
extern unsigned long __pgd_index_size;
extern unsigned long __pmd_cache_index;
extern unsigned long __pud_cache_index;
#define PTE_INDEX_SIZE __pte_index_size
#define PMD_INDEX_SIZE __pmd_index_size
#define PUD_INDEX_SIZE __pud_index_size
#define PGD_INDEX_SIZE __pgd_index_size
#define PMD_CACHE_INDEX __pmd_cache_index
#define PUD_CACHE_INDEX __pud_cache_index
/*
* Because of use of pte fragments and THP, size of page table
* are not always derived out of index size above.
@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
*/
#ifndef __real_pte
#define __real_pte(e,p) ((real_pte_t){(e)})
#define __real_pte(e, p, o) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)

View File

@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXC_HV, SOFTEN_TEST_HV, bitmask)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*

View File

@ -29,6 +29,16 @@
#define PACA_IRQ_HMI 0x20
#define PACA_IRQ_PMI 0x40
/*
* Some soft-masked interrupts must be hard masked until they are replayed
* (e.g., because the soft-masked handler does not clear the exception).
*/
#ifdef CONFIG_PPC_BOOK3S
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
#else
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
#endif
/*
* flags for paca->irq_soft_mask
*/
@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
static inline void may_hard_irq_enable(void)
{
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
if (!(get_paca()->irq_happened & PACA_IRQ_EE))
if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
__hard_irq_enable();
}

View File

@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
return false;
}
static inline void crash_ipi_callback(struct pt_regs *regs) { }
static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
}
#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */

View File

@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)

View File

@ -27,6 +27,7 @@
#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#endif
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
* Define the address range of the kernel non-linear virtual area

View File

@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
}
static inline int early_cpu_to_node(int cpu)
{
int nid;
@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
{
return 0;
}
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int start_topology_update(void);
extern int stop_topology_update(void);
extern int prrn_is_enabled(void);
extern int find_and_online_cpu_nid(int cpu);
#else
static inline int start_topology_update(void)
{
@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
{
return 0;
}
static inline int find_and_online_cpu_nid(int cpu)
{
return 0;
}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)

View File

@ -943,6 +943,8 @@ kernel_dbg_exc:
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
* hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
* keep these in synch.
*/
.macro masked_interrupt_book3e paca_irq full_mask

View File

@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* triggered and won't automatically refire.
* - If it was a HMI we return immediately since we handled it in realmode
* and it won't refire.
* - else we hard disable and return.
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
#define MASKED_INTERRUPT(_H) \
@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
bne 2f; \
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
beq 2f; \
mfspr r10,SPRN_##_H##SRR1; \
xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \

View File

@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
if (cpu_has_feature(CPU_FTR_ARCH_206))
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
!firmware_has_feature(FW_FEATURE_LPAR))
device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
if (cpu_has_feature(CPU_FTR_ARCH_206))
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
!firmware_has_feature(FW_FEATURE_LPAR))
device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */

View File

@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
u32 i, n_lmbs;
n_lmbs = of_read_number(prop++, 1);
if (n_lmbs == 0)
return;
for (i = 0; i < n_lmbs; i++) {
read_drconf_v1_cell(&lmb, &prop);
@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
u32 i, j, lmb_sets;
lmb_sets = of_read_number(prop++, 1);
if (lmb_sets == 0)
return;
for (i = 0; i < lmb_sets; i++) {
read_drconf_v2_cell(&dr_cell, &prop);
@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
struct drmem_lmb *lmb;
drmem_info->n_lmbs = of_read_number(prop++, 1);
if (drmem_info->n_lmbs == 0)
return;
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
GFP_KERNEL);
@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
int lmb_index;
lmb_sets = of_read_number(prop++, 1);
if (lmb_sets == 0)
return;
/* first pass, calculate the number of LMBs */
p = prop;

View File

@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
rpte = __real_pte(__pte(old_pte), ptep);
rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@ -117,7 +117,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;

View File

@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn = hpt_vpn(ea, vsid, ssize);
rpte = __real_pte(__pte(old_pte), ptep);
rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
/*
*None of the sub 4k page is hashed
*/
@ -214,7 +214,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
rpte = __real_pte(__pte(old_pte), ptep);
rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@ -327,7 +327,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;

View File

@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
__pmd_index_size = H_PMD_INDEX_SIZE;
__pud_index_size = H_PUD_INDEX_SIZE;
__pgd_index_size = H_PGD_INDEX_SIZE;
__pud_cache_index = H_PUD_CACHE_INDEX;
__pmd_cache_index = H_PMD_CACHE_INDEX;
__pte_table_size = H_PTE_TABLE_SIZE;
__pmd_table_size = H_PMD_TABLE_SIZE;

View File

@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
long slot;
long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
rpte = __real_pte(__pte(old_pte), ptep);
if (unlikely(mmu_psize == MMU_PAGE_16G))
offset = PTRS_PER_PUD;
else
offset = PTRS_PER_PMD;
rpte = __real_pte(__pte(old_pte), ptep, offset);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
}
/*

View File

@ -100,6 +100,6 @@ void pgtable_cache_init(void)
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
}

View File

@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
}
static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
}
static void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);

View File

@ -17,9 +17,11 @@
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/string_helpers.h>
#include <linux/stop_machine.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
/*
* The init_mm context is given the first available (non-zero) PID,
* which is the "guard PID" and contains no page table. PIDR should
* never be set to zero because that duplicates the kernel address
* space at the 0x0... offset (quadrant 0)!
*
* An arbitrary PID that may later be allocated by the PID allocator
* for userspace processes must not be used either, because that
* would cause stale user mappings for that PID on CPUs outside of
* the TLB invalidation scheme (because it won't be in mm_cpumask).
*
* So permanently carve out one PID for the purpose of a guard PID.
*/
init_mm.context.id = mmu_base_pid;
mmu_base_pid++;
}
static void __init radix_init_partition_table(void)
@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
__pmd_index_size = RADIX_PMD_INDEX_SIZE;
__pud_index_size = RADIX_PUD_INDEX_SIZE;
__pgd_index_size = RADIX_PGD_INDEX_SIZE;
__pud_cache_index = RADIX_PUD_INDEX_SIZE;
__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
__pte_table_size = RADIX_PTE_TABLE_SIZE;
__pmd_table_size = RADIX_PMD_TABLE_SIZE;
@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
radix_init_iamr();
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
}
radix_init_iamr();
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
}
struct change_mapping_params {
pte_t *pte;
unsigned long start;
unsigned long end;
unsigned long aligned_start;
unsigned long aligned_end;
};
static int stop_machine_change_mapping(void *data)
{
struct change_mapping_params *params =
(struct change_mapping_params *)data;
if (!data)
return -1;
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
create_physical_mapping(params->aligned_start, params->start);
create_physical_mapping(params->end, params->aligned_end);
spin_lock(&init_mm.page_table_lock);
return 0;
}
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
unsigned long end)
{
@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
/*
* clear the pte and potentially split the mapping helper
*/
static void split_kernel_mapping(unsigned long addr, unsigned long end,
unsigned long size, pte_t *pte)
{
unsigned long mask = ~(size - 1);
unsigned long aligned_start = addr & mask;
unsigned long aligned_end = addr + size;
struct change_mapping_params params;
bool split_region = false;
if ((end - addr) < size) {
/*
* We're going to clear the PTE, but not flushed
* the mapping, time to remap and flush. The
* effects if visible outside the processor or
* if we are running in code close to the
* mapping we cleared, we are in trouble.
*/
if (overlaps_kernel_text(aligned_start, addr) ||
overlaps_kernel_text(end, aligned_end)) {
/*
* Hack, just return, don't pte_clear
*/
WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
"text, not splitting\n", addr, end);
return;
}
split_region = true;
}
if (split_region) {
params.pte = pte;
params.start = addr;
params.end = end;
params.aligned_start = addr & ~(size - 1);
params.aligned_end = min_t(unsigned long, aligned_end,
(unsigned long)__va(memblock_end_of_DRAM()));
stop_machine(stop_machine_change_mapping, &params, NULL);
return;
}
pte_clear(&init_mm, addr, pte);
}
static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_huge(*pmd)) {
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
continue;
}
pte_clear(&init_mm, addr, (pte_t *)pmd);
split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_huge(*pud)) {
if (!IS_ALIGNED(addr, PUD_SIZE) ||
!IS_ALIGNED(next, PUD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
continue;
}
pte_clear(&init_mm, addr, (pte_t *)pud);
split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
continue;
if (pgd_huge(*pgd)) {
if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
!IS_ALIGNED(next, PGDIR_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
continue;
}
pte_clear(&init_mm, addr, (pte_t *)pgd);
split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}

View File

@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
EXPORT_SYMBOL(__pgd_index_size);
unsigned long __pmd_cache_index;
EXPORT_SYMBOL(__pmd_cache_index);
unsigned long __pud_cache_index;
EXPORT_SYMBOL(__pud_cache_index);
unsigned long __pte_table_size;
EXPORT_SYMBOL(__pte_table_size);
unsigned long __pmd_table_size;
@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
} else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :

View File

@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
unsigned int psize;
int ssize;
real_pte_t rpte;
int i;
int i, offset;
i = batch->index;
@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
psize = get_slice_psize(mm, addr);
/* Mask the address for the correct page size */
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
if (unlikely(psize == MMU_PAGE_16G))
offset = PTRS_PER_PUD;
else
offset = PTRS_PER_PMD;
#else
BUG();
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
* support 64k pages, this might be different from the
* hardware page size encoded in the slice table. */
addr &= PAGE_MASK;
offset = PTRS_PER_PTE;
}
@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
}
WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
rpte = __real_pte(__pte(pte), ptep);
rpte = __real_pte(__pte(pte), ptep, offset);
/*
* Check if we have an active batch on this CPU. If not, just

View File

@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
const struct cpumask *l_cpumask;
get_online_cpus();
for_each_online_node(nid) {
for_each_node_with_cpus(nid) {
l_cpumask = cpumask_of_node(nid);
cpu = cpumask_first(l_cpumask);
cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
if (cpu >= nr_cpu_ids)
continue;
opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(cpu));
}

View File

@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
rc = PTR_ERR(txwin->paste_kaddr);
goto free_window;
}
} else {
/*
* A user mapping must ensure that context switch issues
* CP_ABORT for this thread.
*/
rc = set_thread_uses_vas();
if (rc)
goto free_window;
}
/*
* Now that we have a send window, ensure context switch issues
* CP_ABORT for this thread.
*/
rc = -EINVAL;
if (set_thread_uses_vas() < 0)
goto free_window;
set_vinst_win(vinst, txwin);
return txwin;

View File

@ -36,6 +36,7 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
#include <asm/topology.h>
#include "pseries.h"
#include "offline_states.h"
@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)
@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
extern int find_and_online_cpu_nid(int cpu);
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;

View File

@ -48,6 +48,28 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
/*
* Enable the hotplug interrupt late because processing them may touch other
* devices or systems (e.g. hugepages) that have not been initialized at the
* subsys stage.
*/
int __init init_ras_hotplug_IRQ(void)
{
struct device_node *np;
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
if (dlpar_workqueue_init() == 0)
request_event_sources_irqs(np, ras_hotplug_interrupt,
"RAS_HOTPLUG");
of_node_put(np);
}
return 0;
}
machine_late_initcall(pseries, init_ras_hotplug_IRQ);
/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
of_node_put(np);
}
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
if (dlpar_workqueue_init() == 0)
request_event_sources_irqs(np, ras_hotplug_interrupt,
"RAS_HOTPLUG");
of_node_put(np);
}
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {

View File

@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
if (rc) {
pr_err("Error %lld getting queue info prio %d\n", rc, prio);
pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
target, prio);
rc = -EIO;
goto fail;
}
@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
/* Configure and enable the queue in HW */
rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
if (rc) {
pr_err("Error %lld setting queue for prio %d\n", rc, prio);
pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
target, prio);
rc = -EIO;
} else {
q->qpage = qpage;
@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
if (IS_ERR(qpage))
return PTR_ERR(qpage);
return xive_spapr_configure_queue(cpu, q, prio, qpage,
xive_queue_shift);
return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
q, prio, qpage, xive_queue_shift);
}
static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
struct xive_q *q = &xc->queue[prio];
unsigned int alloc_order;
long rc;
int hw_cpu = get_hard_smp_processor_id(cpu);
rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
if (rc)
pr_err("Error %ld setting queue for prio %d\n", rc, prio);
pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
hw_cpu, prio);
alloc_order = xive_alloc_order(xive_queue_shift);
free_pages((unsigned long)q->qpage, alloc_order);

View File

@ -430,6 +430,8 @@ config SPARC_LEON
depends on SPARC32
select USB_EHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_DESC
select USB_UHCI_BIG_ENDIAN_MMIO
select USB_UHCI_BIG_ENDIAN_DESC
---help---
If you say Y here if you are running on a SPARC-LEON processor.
The LEON processor is a synthesizable VHDL model of the

1
arch/x86/.gitignore vendored
View File

@ -1,6 +1,7 @@
boot/compressed/vmlinux
tools/test_get_len
tools/insn_sanity
tools/insn_decoder_test
purgatory/kexec-purgatory.c
purgatory/purgatory.ro

View File

@ -423,12 +423,6 @@ config X86_MPPARSE
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
config X86_BIGSMP
bool "Support for big SMP systems with more than 8 CPUs"
depends on X86_32 && SMP
---help---
This option is needed for the systems that have more than 8 CPUs
config GOLDFISH
def_bool y
depends on X86_GOLDFISH
@ -460,6 +454,12 @@ config INTEL_RDT
Say N if unsure.
if X86_32
config X86_BIGSMP
bool "Support for big SMP systems with more than 8 CPUs"
depends on SMP
---help---
This option is needed for the systems that have more than 8 CPUs
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
default y
@ -949,25 +949,66 @@ config MAXSMP
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
#
# The maximum number of CPUs supported:
#
# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
# and which can be configured interactively in the
# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
#
# The ranges are different on 32-bit and 64-bit kernels, depending on
# hardware capabilities and scalability features of the kernel.
#
# ( If MAXSMP is enabled we just use the highest possible value and disable
# interactive configuration. )
#
config NR_CPUS_RANGE_BEGIN
int
default NR_CPUS_RANGE_END if MAXSMP
default 1 if !SMP
default 2
config NR_CPUS_RANGE_END
int
depends on X86_32
default 64 if SMP && X86_BIGSMP
default 8 if SMP && !X86_BIGSMP
default 1 if !SMP
config NR_CPUS_RANGE_END
int
depends on X86_64
default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
default 1 if !SMP
config NR_CPUS_DEFAULT
int
depends on X86_32
default 32 if X86_BIGSMP
default 8 if SMP
default 1 if !SMP
config NR_CPUS_DEFAULT
int
depends on X86_64
default 8192 if MAXSMP
default 64 if SMP
default 1 if !SMP
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
range 2 8 if SMP && X86_32 && !X86_BIGSMP
range 2 64 if SMP && X86_32 && X86_BIGSMP
range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
default "1" if !SMP
default "8192" if MAXSMP
default "32" if SMP && X86_BIGSMP
default "8" if SMP && X86_32
default "64" if SMP
range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
default NR_CPUS_DEFAULT
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image.
This is purely to save memory: each supported CPU adds about 8KB
to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
@ -1363,7 +1404,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
depends on !M486
depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
---help---
Select this if you have a 32-bit processor and more than 4

View File

@ -374,7 +374,7 @@ config X86_TSC
config X86_CMPXCHG64
def_bool y
depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
# this should be set for all -march=.. options where the compiler
# generates cmov.
@ -385,7 +385,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
default "6" if X86_32 && X86_P6_NOP
default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"

View File

@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
{
unsigned int j;
state->lens[0] = 0;
state->lens[1] = 1;
state->lens[2] = 2;
state->lens[3] = 3;
/* initially all lanes are unused */
state->lens[0] = 0xFFFFFFFF00000000;
state->lens[1] = 0xFFFFFFFF00000001;
state->lens[2] = 0xFFFFFFFF00000002;
state->lens[3] = 0xFFFFFFFF00000003;
state->unused_lanes = 0xFF03020100;
for (j = 0; j < 4; j++)
state->ldata[j].job_in_lane = NULL;

View File

@ -97,80 +97,69 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro ALLOC_PT_GPREGS_ON_STACK
addq $-(15*8), %rsp
.endm
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
* lower registers are likely clobbered well before they
* could be put to use in a speculative execution gadget.
* Interleave XOR with PUSH for better uop scheduling:
*/
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq \rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
xorq %r8, %r8 /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
xorq %r9, %r9 /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
xorq %r10, %r10 /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
xorq %r11, %r11 /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
xorq %r12, %r12 /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
xorq %r13, %r13 /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
xorq %r14, %r14 /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
xorq %r15, %r15 /* nospec r15*/
UNWIND_HINT_REGS
.endm
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
.if \r11
movq %r11, 6*8+\offset(%rsp)
.endif
.if \r8910
movq %r10, 7*8+\offset(%rsp)
movq %r9, 8*8+\offset(%rsp)
movq %r8, 9*8+\offset(%rsp)
.endif
.if \rax
movq %rax, 10*8+\offset(%rsp)
.endif
.if \rcx
movq %rcx, 11*8+\offset(%rsp)
.endif
movq %rdx, 12*8+\offset(%rsp)
movq %rsi, 13*8+\offset(%rsp)
movq %rdi, 14*8+\offset(%rsp)
UNWIND_HINT_REGS offset=\offset extra=0
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_R891011
SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
.endm
.macro SAVE_EXTRA_REGS offset=0
movq %r15, 0*8+\offset(%rsp)
movq %r14, 1*8+\offset(%rsp)
movq %r13, 2*8+\offset(%rsp)
movq %r12, 3*8+\offset(%rsp)
movq %rbp, 4*8+\offset(%rsp)
movq %rbx, 5*8+\offset(%rsp)
UNWIND_HINT_REGS offset=\offset
.endm
.macro POP_EXTRA_REGS
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
.macro POP_C_REGS
.if \skip_r11rcx
popq %rsi
.else
popq %r11
.endif
popq %r10
popq %r9
popq %r8
popq %rax
.if \skip_r11rcx
popq %rsi
.else
popq %rcx
.endif
popq %rdx
popq %rsi
.if \pop_rdi
popq %rdi
.endm
.macro icebp
.byte 0xf1
.endm
.endif
.endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
@ -178,7 +167,7 @@ For 32-bit we have the following conventions - kernel is built with
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
* NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0

View File

@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
swapgs
/*
* This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
* This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
* is not required to switch CR3.
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
pushq %rcx /* pt_regs->ip */
GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */
pushq %r12 /* pt_regs->r12 */
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
UNWIND_HINT_REGS
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
TRACE_IRQS_OFF
@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
POP_EXTRA_REGS
popq %rsi /* skip r11 */
popq %r10
popq %r9
popq %r8
popq %rax
popq %rsi /* skip rcx */
popq %rdx
popq %rsi
POP_REGS pop_rdi=0 skip_r11rcx=1
/*
* Now all regs are restored except RSP and RDI.
@ -559,9 +537,7 @@ END(irq_entries_start)
call switch_to_thread_stack
1:
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
testb $3, CS(%rsp)
@ -622,15 +598,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
ud2
1:
#endif
POP_EXTRA_REGS
popq %r11
popq %r10
popq %r9
popq %r8
popq %rax
popq %rcx
popq %rdx
popq %rsi
POP_REGS pop_rdi=0
/*
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@ -688,8 +656,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
ud2
1:
#endif
POP_EXTRA_REGS
POP_C_REGS
POP_REGS
addq $8, %rsp /* skip regs->orig_ax */
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@ -908,7 +875,9 @@ ENTRY(\sym)
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
ALLOC_PT_GPREGS_ON_STACK
/* Save all registers in pt_regs */
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
.if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
@ -1121,9 +1090,7 @@ ENTRY(xen_failsafe_callback)
addq $0x30, %rsp
UNWIND_HINT_IRET_REGS
pushq $-1 /* orig_ax = -1 => not a system call */
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
@ -1163,16 +1130,13 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
* Save all registers in pt_regs, and switch gs if needed.
* Switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
UNWIND_HINT_FUNC
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
@ -1211,21 +1175,18 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch gs if needed.
* Switch gs if needed.
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
UNWIND_HINT_FUNC
UNWIND_HINT_REGS offset=8
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
ENCODE_FRAME_POINTER 8
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@ -1406,22 +1367,7 @@ ENTRY(nmi)
pushq 1*8(%rdx) /* pt_regs->rip */
UNWIND_HINT_IRET_REGS
pushq $-1 /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq (%rdx) /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq %rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */
pushq %r12 /* pt_regs->r12 */
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
UNWIND_HINT_REGS
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
/*
@ -1631,7 +1577,8 @@ end_repeat_nmi:
* frame to point back to repeat_nmi.
*/
pushq $-1 /* ORIG_RAX: no syscall to restart */
ALLOC_PT_GPREGS_ON_STACK
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
/*
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@ -1655,8 +1602,7 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
POP_EXTRA_REGS
POP_C_REGS
POP_REGS
/*
* Skip orig_ax and the "outermost" frame to point RSP at the "iret"

View File

@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
xorq %r15, %r15 /* nospec r15 */
cld
/*
@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
xorq %r15, %r15 /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@ -338,15 +358,25 @@ ENTRY(entry_INT80_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
xorq %r12, %r12 /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
xorq %r13, %r13 /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
xorq %r14, %r14 /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
xorq %r15, %r15 /* nospec r15 */
cld
/*

View File

@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)
break;
case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_mask) {
switch (cpu_data(cpu).x86_stepping) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}

View File

@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)
* on PMU interrupt
*/
if (boot_cpu_data.x86_model == 28
&& boot_cpu_data.x86_mask < 10) {
&& boot_cpu_data.x86_stepping < 10) {
pr_cont("LBR disabled due to erratum");
return;
}

View File

@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {
static __init void p6_pmu_rdpmc_quirk(void)
{
if (boot_cpu_data.x86_mask < 9) {
if (boot_cpu_data.x86_stepping < 9) {
/*
* PPro erratum 26; fixed in stepping 9 and above.
*/

View File

@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
if (boot_cpu_data.x86 == 0x0F &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86_model <= 0x05 &&
boot_cpu_data.x86_mask < 0x0A)
boot_cpu_data.x86_stepping < 0x0A)
return 1;
else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
return 1;

View File

@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
asm ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
:"r"(size),"r" (index)
:"g"(size),"r" (index)
:"cc");
return mask;
}

View File

@ -5,23 +5,20 @@
#include <linux/stringify.h>
/*
* Since some emulators terminate on UD2, we cannot use it for WARN.
* Since various instruction decoders disagree on the length of UD1,
* we cannot use it either. So use UD0 for WARN.
* Despite that some emulators terminate on UD2, we use it for WARN().
*
* (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
* our kernel decoder thinks it takes a ModRM byte, which seems consistent
* with various things like the Intel SDM instruction encoding rules)
* Since various instruction decoders/specs disagree on the encoding of
* UD0/UD1.
*/
#define ASM_UD0 ".byte 0x0f, 0xff"
#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
#define ASM_UD2 ".byte 0x0f, 0x0b"
#define INSN_UD0 0xff0f
#define INSN_UD2 0x0b0f
#define LEN_UD0 2
#define LEN_UD2 2
#ifdef CONFIG_GENERIC_BUG
@ -77,7 +74,11 @@ do { \
unreachable(); \
} while (0)
#define __WARN_FLAGS(flags) _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
#define __WARN_FLAGS(flags) \
do { \
_BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
annotate_reachable(); \
} while (0)
#include <asm-generic/bug.h>

View File

@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
"3:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 4f - .\n" /* repl offset */
" .word %P1\n" /* always replace */
" .byte 3b - 1b\n" /* src len */
" .byte 5f - 4f\n" /* repl len */
" .byte 3b - 2b\n" /* pad len */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"4: jmp %l[t_no]\n"
"5:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */
" .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
".section .altinstr_aux,\"ax\"\n"
"6:\n"
" testb %[bitnum],%[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
t_no:
return false;
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
"3:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 4f - .\n" /* repl offset */
" .word %P[always]\n" /* always replace */
" .byte 3b - 1b\n" /* src len */
" .byte 5f - 4f\n" /* repl len */
" .byte 3b - 2b\n" /* pad len */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"4: jmp %l[t_no]\n"
"5:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */
" .word %P[feature]\n" /* feature bit */
" .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
".section .altinstr_aux,\"ax\"\n"
"6:\n"
" testb %[bitnum],%[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".previous\n"
: : [feature] "i" (bit),
[always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
t_no:
return false;
}
#define static_cpu_has(bit) \

View File

@ -6,6 +6,7 @@
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#ifdef __ASSEMBLY__
@ -164,10 +165,15 @@ static inline void vmexit_fill_RSB(void)
static inline void indirect_branch_prediction_barrier(void)
{
alternative_input("",
"call __ibp_barrier",
X86_FEATURE_USE_IBPB,
ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
asm volatile(ALTERNATIVE("",
"movl %[msr], %%ecx\n\t"
"movl %[val], %%eax\n\t"
"movl $0, %%edx\n\t"
"wrmsr",
X86_FEATURE_USE_IBPB)
: : [msr] "i" (MSR_IA32_PRED_CMD),
[val] "i" (PRED_CMD_IBPB)
: "eax", "ecx", "edx", "memory");
}
#endif /* __ASSEMBLY__ */

View File

@ -52,10 +52,6 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
#ifdef CONFIG_X86_MCE
#define arch_unmap_kpfn arch_unmap_kpfn
#endif
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION

View File

@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void)
{
PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
}
static inline void __flush_tlb_single(unsigned long addr)
static inline void __flush_tlb_one_user(unsigned long addr)
{
PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
}
static inline void flush_tlb_others(const struct cpumask *cpumask,

View File

@ -217,7 +217,7 @@ struct pv_mmu_ops {
/* TLB operations */
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
void (*flush_tlb_single)(unsigned long addr);
void (*flush_tlb_one_user)(unsigned long addr);
void (*flush_tlb_others)(const struct cpumask *cpus,
const struct flush_tlb_info *info);

View File

@ -61,7 +61,7 @@ void paging_init(void);
#define kpte_clear_flush(ptep, vaddr) \
do { \
pte_clear(&init_mm, (vaddr), (ptep)); \
__flush_tlb_one((vaddr)); \
__flush_tlb_one_kernel((vaddr)); \
} while (0)
#endif /* !__ASSEMBLY__ */

View File

@ -91,7 +91,7 @@ struct cpuinfo_x86 {
__u8 x86; /* CPU family */
__u8 x86_vendor; /* CPU vendor */
__u8 x86_model;
__u8 x86_mask;
__u8 x86_stepping;
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
@ -109,7 +109,7 @@ struct cpuinfo_x86 {
char x86_vendor_id[16];
char x86_model_id[64];
/* in KB - valid for CPUS which support this call: */
int x86_cache_size;
unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
/* Cache QoS architectural values: */
int x86_cache_max_rmid; /* max index */
@ -977,7 +977,4 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
void __ibp_barrier(void);
#endif /* _ASM_X86_PROCESSOR_H */

View File

@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);

View File

@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
#else
#define __flush_tlb() __native_flush_tlb()
#define __flush_tlb_global() __native_flush_tlb_global()
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
#endif
static inline bool tlb_defer_switch_to_init_mm(void)
@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)
/*
* flush one page in the user mapping
*/
static inline void __native_flush_tlb_single(unsigned long addr)
static inline void __native_flush_tlb_one_user(unsigned long addr)
{
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)
/*
* flush one page in the kernel mapping
*/
static inline void __flush_tlb_one(unsigned long addr)
static inline void __flush_tlb_one_kernel(unsigned long addr)
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
/*
* If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
* paravirt equivalent. Even with PCID, this is sufficient: we only
* use PCID if we also use global PTEs for the kernel mapping, and
* INVLPG flushes global translations across all address spaces.
*
* If PTI is on, then the kernel is mapped with non-global PTEs, and
* __flush_tlb_one_user() will flush the given address for the current
* kernel address space and for its usermode counterpart, but it does
* not flush it for other address spaces.
*/
__flush_tlb_one_user(addr);
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/*
* __flush_tlb_single() will have cleared the TLB entry for this ASID,
* but since kernel space is replicated across all, we must also
* invalidate all others.
* See above. We need to propagate the flush to all other address
* spaces. In principle, we only need to propagate it to kernelmode
* address spaces, but the extra bookkeeping we would need is not
* worth it.
*/
invalidate_other_asid();
}

View File

@ -235,7 +235,7 @@ int amd_cache_northbridges(void)
if (boot_cpu_data.x86 == 0x10 &&
boot_cpu_data.x86_model >= 0x8 &&
(boot_cpu_data.x86_model > 0x9 ||
boot_cpu_data.x86_mask >= 0x1))
boot_cpu_data.x86_stepping >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
if (boot_cpu_data.x86 == 0x15)

View File

@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
static u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_mask) {
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
case 0x04: return 0x0f; /* EX */
}
@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)
static u32 bdx_deadline_rev(void)
{
switch (boot_cpu_data.x86_mask) {
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
case 0x03: return 0x0700000e;
case 0x04: return 0x0f00000c;
@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)
static u32 skx_deadline_rev(void)
{
switch (boot_cpu_data.x86_mask) {
switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
case 0x04: return 0x02000014;
}

View File

@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
uv_gre_table = gre;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
unsigned long size = ((unsigned long)(gre->limit - lgre)
<< UV_GAM_RANGE_SHFT);
int order = 0;
char suffix[] = " KMGTPE";
while (size > 9999 && order < sizeof(suffix)) {
size /= 1024;
order++;
}
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;

View File

@ -18,7 +18,7 @@ void foo(void)
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);

View File

@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
if (c->x86_model == 6 && c->x86_mask == 1) {
if (c->x86_model == 6 && c->x86_stepping == 1) {
const int K6_BUG_LOOP = 1000000;
int n;
void (*f_vide)(void);
@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
/* K6 with old style WHCR */
if (c->x86_model < 8 ||
(c->x86_model == 8 && c->x86_mask < 8)) {
(c->x86_model == 8 && c->x86_stepping < 8)) {
/* We can only write allocate on the low 508Mb */
if (mbytes > 508)
mbytes = 508;
@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
if ((c->x86_model == 8 && c->x86_mask > 7) ||
if ((c->x86_model == 8 && c->x86_stepping > 7) ||
c->x86_model == 9 || c->x86_model == 13) {
/* The more serious chips .. */
@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
* As per AMD technical note 27212 0.2
*/
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* but they are not certified as MP capable.
*/
/* Athlon 660/661 is valid. */
if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
(c->x86_mask == 1)))
if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
(c->x86_stepping == 1)))
return;
/* Duron 670 is valid */
if ((c->x86_model == 7) && (c->x86_mask == 0))
if ((c->x86_model == 7) && (c->x86_stepping == 0))
return;
/*
@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
* more.
*/
if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
((c->x86_model == 7) && (c->x86_mask >= 1)) ||
if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
(c->x86_model > 7))
if (cpu_has(c, X86_FEATURE_MP))
return;
@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
/* Set MTRR capability flag if appropriate */
if (c->x86 == 5)
if (c->x86_model == 13 || c->x86_model == 9 ||
(c->x86_model == 8 && c->x86_mask >= 8))
(c->x86_model == 8 && c->x86_stepping >= 8))
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
* Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
* all up to and including B1.
*/
if (c->x86_model <= 1 && c->x86_mask <= 1)
if (c->x86_model <= 1 && c->x86_stepping <= 1)
set_cpu_cap(c, X86_FEATURE_CPB);
}
@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
/* Duron Rev A0 */
if (c->x86_model == 3 && c->x86_mask == 0)
if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
/* Tbird rev A1/A2 */
if (c->x86_model == 4 &&
(c->x86_mask == 0 || c->x86_mask == 1))
(c->x86_stepping == 0 || c->x86_stepping == 1))
size = 256;
}
return size;
@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
ms = (cpu->x86_model << 4) | cpu->x86_mask;
ms = (cpu->x86_model << 4) | cpu->x86_stepping;
while ((range = *erratum++))
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&

View File

@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
else {
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
sizeof(arg));
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
}
if (i >= ARRAY_SIZE(mitigation_options)) {
pr_err("unknown option (%s). Switching to AUTO select\n",
mitigation_options[i].option);
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_CMD_AUTO;
}
}
@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
!IS_ENABLED(CONFIG_RETPOLINE)) {
pr_err("%s selected but not compiled in. Switching to AUTO select\n",
mitigation_options[i].option);
pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
goto retpoline_auto;
break;
}
pr_err("kernel not compiled with retpoline; no mitigation available!");
pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
return;
retpoline_auto:
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
retpoline_amd:
if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
pr_err("LFENCE not serializing. Switching to generic retpoline\n");
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
goto retpoline_generic;
}
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@ -281,7 +278,7 @@ static void __init spectre_v2_select_mitigation(void)
pr_info("%s\n", spectre_v2_strings[mode]);
/*
* If neither SMEP or KPTI are available, there is a risk of
* If neither SMEP nor PTI are available, there is a risk of
* hitting userspace addresses in the RSB after a context switch
* from a shallow call stack to a deeper one. To prevent this fill
* the entire RSB, even when using IBRS.
@ -295,21 +292,20 @@ static void __init spectre_v2_select_mitigation(void)
if ((!boot_cpu_has(X86_FEATURE_PTI) &&
!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Filling RSB on context switch\n");
pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
}
/* Initialize Indirect Branch Prediction Barrier if supported */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Enabling Indirect Branch Prediction Barrier\n");
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
}
#undef pr_fmt
#ifdef CONFIG_SYSFS
ssize_t cpu_show_meltdown(struct device *dev,
struct device_attribute *attr, char *buf)
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return sprintf(buf, "Not affected\n");
@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev,
return sprintf(buf, "Vulnerable\n");
}
ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf)
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
}
ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
@ -337,9 +331,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
spectre_v2_module_string());
}
#endif
void __ibp_barrier(void)
{
__wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
}
EXPORT_SYMBOL_GPL(__ibp_barrier);

View File

@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
switch (c->x86_mask) {
switch (c->x86_stepping) {
default:
name = "2";
break;
@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
* - Note, it seems this may only be in engineering samples.
*/
if ((c->x86 == 6) && (c->x86_model == 9) &&
(c->x86_mask == 1) && (size == 65))
(c->x86_stepping == 1) && (size == 65))
size -= 1;
return size;
}

View File

@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = x86_family(tfms);
c->x86_model = x86_model(tfms);
c->x86_mask = x86_stepping(tfms);
c->x86_stepping = x86_stepping(tfms);
if (cap0 & (1<<19)) {
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
int i;
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = -1;
c->x86_cache_size = 0;
c->x86_vendor = X86_VENDOR_UNKNOWN;
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
pr_cont(", stepping: 0x%x)\n", c->x86_mask);
if (c->x86_stepping || c->cpuid_level >= 0)
pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
else
pr_cont(")\n");
}

View File

@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
/* common case step number/rev -- exceptions handled below */
c->x86_model = (dir1 >> 4) + 1;
c->x86_mask = dir1 & 0xf;
c->x86_stepping = dir1 & 0xf;
/* Now cook; the original recipe is by Channing Corn, from Cyrix.
* We do the same thing for each generation: we work out

View File

@ -116,14 +116,13 @@ struct sku_microcode {
u32 microcode;
};
static const struct sku_microcode spectre_bad_microcodes[] = {
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
{ INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
{ INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
{ INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
{ INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
{ INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
{ INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
@ -136,8 +135,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
{ INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
{ INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
{ INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
/* Updated in the 20180108 release; blacklist until we know otherwise */
{ INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
/* Observed in the wild */
{ INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
{ INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
@ -149,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_mask == spectre_bad_microcodes[i].stepping)
c->x86_stepping == spectre_bad_microcodes[i].stepping)
return (c->microcode <= spectre_bad_microcodes[i].microcode);
}
return false;
@ -196,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* need the microcode to have already been loaded... so if it is
* not, recommend a BIOS update and disable large pages.
*/
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
c->microcode < 0x20e) {
pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
@ -212,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
/* CPUID workaround for 0F33/0F34 CPU */
if (c->x86 == 0xF && c->x86_model == 0x3
&& (c->x86_mask == 0x3 || c->x86_mask == 0x4))
&& (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
c->x86_phys_bits = 36;
/*
@ -310,7 +307,7 @@ int ppro_with_ram_bug(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask < 8) {
boot_cpu_data.x86_stepping < 8) {
pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
@ -327,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
* Mask B, Pentium, but not Pentium MMX
*/
if (c->x86 == 5 &&
c->x86_mask >= 1 && c->x86_mask <= 4 &&
c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
c->x86_model <= 3) {
/*
* Remember we have B step Pentia with bugs
@ -370,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
* model 3 mask 3
*/
if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
clear_cpu_cap(c, X86_FEATURE_SEP);
/*
@ -388,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* P4 Xeon erratum 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
if (msr_set_bit(MSR_IA32_MISC_ENABLE,
MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@ -403,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* Specification Update").
*/
if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
(c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@ -650,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c)
case 6:
if (l2 == 128)
p = "Celeron (Mendocino)";
else if (c->x86_mask == 0 || c->x86_mask == 5)
else if (c->x86_stepping == 0 || c->x86_stepping == 5)
p = "Celeron-A";
break;

View File

@ -819,7 +819,7 @@ static __init void rdt_quirks(void)
cache_alloc_hsw_probe();
break;
case INTEL_FAM6_SKYLAKE_X:
if (boot_cpu_data.x86_mask <= 4)
if (boot_cpu_data.x86_stepping <= 4)
set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
}
}

View File

@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
extern struct mca_config mca_cfg;
#ifndef CONFIG_X86_64
/*
* On 32-bit systems it would be difficult to safely unmap a poison page
* from the kernel 1:1 map because there are no non-canonical addresses that
* we can use to refer to the address without risking a speculative access.
* However, this isn't much of an issue because:
* 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
* are only mapped into the kernel as needed
* 2) Few people would run a 32-bit kernel on a machine that supports
* recoverable errors because they have too much memory to boot 32-bit.
*/
static inline void mce_unmap_kpfn(unsigned long pfn) {}
#define mce_unmap_kpfn mce_unmap_kpfn
#endif
#endif /* __X86_MCE_INTERNAL_H__ */

View File

@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
#ifndef mce_unmap_kpfn
static void mce_unmap_kpfn(unsigned long pfn);
#endif
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
pr_cont("{%pS}", (void *)m->ip);
pr_cont("{%pS}", (void *)(unsigned long)m->ip);
pr_cont("\n");
}
@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, 0);
if (!memory_failure(pfn, 0))
mce_unmap_kpfn(pfn);
}
return NOTIFY_OK;
@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
else
mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
return ret;
}
#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
void arch_unmap_kpfn(unsigned long pfn)
#ifndef mce_unmap_kpfn
static void mce_unmap_kpfn(unsigned long pfn)
{
unsigned long decoy_addr;
@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
* We would like to just call:
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
* but doing that would radically increase the odds of a
* speculative access to the posion page because we'd have
* speculative access to the poison page because we'd have
* the virtual address of the kernel 1:1 mapping sitting
* around in registers.
* Instead we get tricky. We create a non-canonical address
@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
}
#endif

View File

@ -921,7 +921,7 @@ static bool is_blacklisted(unsigned int cpu)
*/
if (c->x86 == 6 &&
c->x86_model == INTEL_FAM6_BROADWELL_X &&
c->x86_mask == 0x01 &&
c->x86_stepping == 0x01 &&
llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
@ -944,7 +944,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
return UCODE_NFOUND;
sprintf(name, "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
c->x86, c->x86_model, c->x86_stepping);
if (request_firmware_direct(&firmware, name, device)) {
pr_debug("data file %s load failed\n", name);
@ -982,7 +982,7 @@ static struct microcode_ops microcode_intel_ops = {
static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
{
u64 llc_size = c->x86_cache_size * 1024;
u64 llc_size = c->x86_cache_size * 1024ULL;
do_div(llc_size, c->x86_max_cores);

View File

@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
*/
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask <= 7) {
boot_cpu_data.x86_stepping <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;

View File

@ -711,8 +711,8 @@ void __init mtrr_bp_init(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 0xF &&
boot_cpu_data.x86_model == 0x3 &&
(boot_cpu_data.x86_mask == 0x3 ||
boot_cpu_data.x86_mask == 0x4))
(boot_cpu_data.x86_stepping == 0x3 ||
boot_cpu_data.x86_stepping == 0x4))
phys_addr = 36;
size_or_mask = SIZE_OR_MASK_BITS(phys_addr);

Some files were not shown because too many files have changed in this diff Show More