From bcf7f2d3fcec8a47ddfee6d8801ab57162922480 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 20 Nov 2020 10:35:12 +1100 Subject: [PATCH 01/18] selftests/powerpc: rfi_flush: disable entry flush if present commit fcb48454c23c5679d1a2e252f127642e91b05cbe upstream. We are about to add an entry flush. The rfi (exit) flush test measures the number of L1D flushes over a syscall with the RFI flush enabled and disabled. But if the entry flush is also enabled, the effect of enabling and disabling the RFI flush is masked. If there is a debugfs entry for the entry flush, disable it during the RFI flush and restore it later. Reported-by: Spoorthy S Signed-off-by: Russell Currey Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- .../selftests/powerpc/security/rfi_flush.c | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 0a7d0afb26b8..533315e68133 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -50,16 +50,30 @@ int rfi_flush_test(void) __u64 l1d_misses_total = 0; unsigned long iterations = 100000, zero_size = 24 * 1024; unsigned long l1d_misses_expected; - int rfi_flush_org, rfi_flush; + int rfi_flush_orig, rfi_flush; + int have_entry_flush, entry_flush_orig; SKIP_IF(geteuid() != 0); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - rfi_flush = rfi_flush_org; + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + have_entry_flush = 0; + } else { + have_entry_flush = 1; + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + } + } + + rfi_flush = rfi_flush_orig; fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); FAIL_IF(fd < 0); @@ -68,6 +82,7 @@ int rfi_flush_test(void) FAIL_IF(perf_event_enable(fd)); + // disable L1 prefetching set_dscr(1); iter = repetitions; @@ -109,8 +124,8 @@ int rfi_flush_test(void) repetitions * l1d_misses_expected / 2, passes, repetitions); - if (rfi_flush == rfi_flush_org) { - rfi_flush = !rfi_flush_org; + if (rfi_flush == rfi_flush_orig) { + rfi_flush = !rfi_flush_orig; if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); return 1; @@ -126,11 +141,19 @@ int rfi_flush_test(void) set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) { + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } + if (have_entry_flush) { + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush " + "debugfs file"); + return 1; + } + } + return rc; } From b65458b6be8032c5179d4f562038575d7b3a6be3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 Nov 2020 10:35:13 +1100 Subject: [PATCH 02/18] powerpc/64s: flush L1D on kernel entry commit f79643787e0a0762d2409b7b8334e83f22d85695 upstream. [backporting note: we need to mark some exception handlers as out-of-line because the flushing makes them take too much space -- dja] IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache on kernel entry. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 3 + arch/powerpc/include/asm/exception-64s.h | 9 ++- arch/powerpc/include/asm/feature-fixups.h | 10 ++++ arch/powerpc/include/asm/security_features.h | 4 ++ arch/powerpc/include/asm/setup.h | 3 + arch/powerpc/kernel/exceptions-64s.S | 49 +++++++++++++-- arch/powerpc/kernel/setup_64.c | 60 ++++++++++++++++++- arch/powerpc/kernel/vmlinux.lds.S | 7 +++ arch/powerpc/lib/feature-fixups.c | 54 +++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 11 ++++ arch/powerpc/platforms/pseries/setup.c | 4 ++ 11 files changed, 206 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5b4753e602de..e1036ff037e6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2667,6 +2667,7 @@ mds=off [X86] tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] + no_entry_flush [PPC] Exceptions: This does not have any effect on @@ -2989,6 +2990,8 @@ noefi Disable EFI runtime services support. + no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel. + noexec [IA-64] noexec [X86] diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 33f4f72eb035..82fc12ae3278 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -61,11 +61,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..06a48219bbf2 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +245,10 @@ label##3: \ #include extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 7c05e95a5c44..8c99b651a83e 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -84,12 +84,16 @@ static inline bool security_ftr_enabled(u64 feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..556635217e5c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,15 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 70ac8a6ba0c1..a31a8b39f234 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1150,7 +1150,7 @@ EXC_REAL_BEGIN(data_access, 0x300, 0x80) INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1 EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) - INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1 + INT_HANDLER data_access, 0x300, ool=1, virt=1, dar=1, dsisr=1 EXC_VIRT_END(data_access, 0x4300, 0x80) INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 EXC_COMMON_BEGIN(data_access_common) @@ -1205,7 +1205,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access, 0x400, 0x80) - INT_HANDLER instruction_access, 0x400, kvm=1 + INT_HANDLER instruction_access, 0x400, ool=1, kvm=1 EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) INT_HANDLER instruction_access, 0x400, virt=1 @@ -1225,7 +1225,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1 + INT_HANDLER instruction_access_slb, 0x480, ool=1, area=PACA_EXSLB, kvm=1 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB @@ -1365,17 +1365,17 @@ EXC_REAL_BEGIN(decrementer, 0x900, 0x80) INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1 EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) - INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED + INT_HANDLER decrementer, 0x900, ool=1, virt=1, bitmask=IRQS_DISABLED EXC_VIRT_END(decrementer, 0x4900, 0x80) INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80) - INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1 + INT_HANDLER hdecrementer, 0x980, ool=1, hsrr=EXC_HV, kvm=1 EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) - INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1 + INT_HANDLER hdecrementer, 0x980, ool=1, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0 EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) @@ -2046,6 +2046,43 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load addresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e50fbed36651..fc0ec8cf3a7e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -859,7 +859,9 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; bool rfi_flush; +bool entry_flush; static int __init handle_no_rfi_flush(char *p) { @@ -869,6 +871,14 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -900,6 +910,18 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -958,10 +980,19 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -989,9 +1020,36 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 060a1acd7c6d..752bf5910283 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -143,6 +143,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce5..8050f074b346 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -228,6 +228,60 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((dest + 1), instrs[1]); + + patch_instruction((dest + 2), instrs[2]); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 83498604d322..36d60bc2c5e4 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -122,12 +122,23 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry: it fixes a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0c8421dd01ab..0597bff44788 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -561,6 +561,10 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } #ifdef CONFIG_PCI_IOV From 09495b5f7aab84cf41ef54259cfea4da86a7df98 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 Nov 2020 10:35:14 +1100 Subject: [PATCH 03/18] powerpc/64s: flush L1D after user accesses commit 9a32a7e78bd0cd9a9b6332cbdc345ee5ffd0c5de upstream. IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache after user accesses. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 4 + .../powerpc/include/asm/book3s/64/kup-radix.h | 24 +++--- arch/powerpc/include/asm/exception-64s.h | 3 + arch/powerpc/include/asm/feature-fixups.h | 9 +++ arch/powerpc/include/asm/kup.h | 17 +++- arch/powerpc/include/asm/security_features.h | 3 + arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 81 ++++++------------- arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 ++ arch/powerpc/lib/feature-fixups.c | 50 ++++++++++++ arch/powerpc/platforms/powernv/setup.c | 4 + arch/powerpc/platforms/pseries/setup.c | 4 + 13 files changed, 199 insertions(+), 70 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e1036ff037e6..fea15cd49fbc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2668,6 +2668,7 @@ tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] + no_uaccess_flush [PPC] Exceptions: This does not have any effect on @@ -3041,6 +3042,9 @@ nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + no_uaccess_flush + [PPC] Don't flush the L1-D cache after accessing user data. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index c8d1076e0ebb..394931798550 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -54,6 +54,8 @@ #else /* !__ASSEMBLY__ */ +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + #ifdef CONFIG_PPC_KUAP #include @@ -77,6 +79,17 @@ static inline void set_kuap(unsigned long value) isync(); } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ + static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { @@ -94,17 +107,10 @@ static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#endif /* CONFIG_PPC_KUAP */ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 82fc12ae3278..6d0795d7b89c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -134,6 +134,9 @@ hrfid; \ b hrfi_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 06a48219bbf2..fbd406cd6916 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + #define ENTRY_FLUSH_FIXUP_SECTION \ 957: \ .pushsection __entry_flush_fixup,"a"; \ @@ -248,6 +256,7 @@ extern long stf_barrier_fallback; extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 94f24928916a..8f4d27980003 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -45,15 +45,24 @@ static inline void setup_kuep(bool disabled) { } void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } + static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return false; } + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC64 +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 8c99b651a83e..e9e3f85134e5 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -87,6 +87,8 @@ static inline bool security_ftr_enabled(u64 feature) // The L1-D cache should be flushed when entering the kernel #define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ @@ -94,6 +96,7 @@ static inline bool security_ftr_enabled(u64 feature) SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 556635217e5c..6f2f4497e13b 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -60,6 +60,7 @@ void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a31a8b39f234..88bba0a931d6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2046,11 +2046,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(entry_flush_fallback) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2076,7 +2073,14 @@ TRAMP_REAL_BEGIN(entry_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2092,32 +2096,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2135,32 +2114,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2169,6 +2123,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +USE_TEXT_SECTION() + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fc0ec8cf3a7e..a6b72dd431a4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -860,8 +860,12 @@ static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -879,6 +883,14 @@ static int __init handle_no_entry_flush(char *p) } early_param("no_entry_flush", handle_no_entry_flush); +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -922,6 +934,20 @@ void entry_flush_enable(bool enable) entry_flush = enable; } +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -993,6 +1019,15 @@ void setup_entry_flush(bool enable) entry_flush_enable(enable); } +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(true); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1046,10 +1081,37 @@ static int entry_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 752bf5910283..5229eeac8946 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -143,6 +143,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + . = ALIGN(8); __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { __start___entry_flush_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 8050f074b346..e8b25f74454d 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -228,6 +228,56 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + patch_instruction((dest + 1), instrs[1]); + patch_instruction((dest + 2), instrs[2]); + patch_instruction((dest + 3), instrs[3]); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_entry_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 36d60bc2c5e4..ef7b4c09e7d6 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -139,6 +139,10 @@ static void pnv_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0597bff44788..ce71235c8b81 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -565,6 +565,10 @@ void pseries_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV From eb37345ed224c579b0a1b0e9bd72b788122ae91d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 20 Nov 2020 10:35:15 +1100 Subject: [PATCH 04/18] powerpc: Only include kup-radix.h for 64-bit Book3S commit 178d52c6e89c38d0553b0ac8b99927b11eb995b0 upstream. In kup.h we currently include kup-radix.h for all 64-bit builds, which includes Book3S and Book3E. The latter doesn't make sense, Book3E never uses the Radix MMU. This has worked up until now, but almost by accident, and the recent uaccess flush changes introduced a build breakage on Book3E because of the bad structure of the code. So disentangle things so that we only use kup-radix.h for Book3S. This requires some more stubs in kup.h. Signed-off-by: Michael Ellerman Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 5 +++-- arch/powerpc/include/asm/kup.h | 14 +++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 394931798550..c1e45f510591 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -11,13 +11,12 @@ #ifdef __ASSEMBLY__ -.macro kuap_restore_amr gpr #ifdef CONFIG_PPC_KUAP +.macro kuap_restore_amr gpr BEGIN_MMU_FTR_SECTION_NESTED(67) ld \gpr, STACK_REGS_KUAP(r1) mtspr SPRN_AMR, \gpr END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif .endm .macro kuap_check_amr gpr1, gpr2 @@ -31,6 +30,7 @@ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm +#endif .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr #ifdef CONFIG_PPC_KUAP @@ -87,6 +87,7 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } #else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } static inline void set_kuap(unsigned long value) { } #endif /* !CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 8f4d27980003..ed4f5f536fc1 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -6,7 +6,7 @@ #define KUAP_WRITE 2 #define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include #endif #ifdef CONFIG_PPC_8xx @@ -24,9 +24,15 @@ .macro kuap_restore sp, current, gpr1, gpr2, gpr3 .endm +.macro kuap_restore_amr gpr +.endm + .macro kuap_check current, gpr .endm +.macro kuap_check_amr gpr1, gpr2 +.endm + #endif #else /* !__ASSEMBLY__ */ @@ -52,17 +58,19 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } +static inline void kuap_check_amr(void) { } + /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush * the L1D cache after user accesses. Only include the empty stubs for other * platforms. */ -#ifndef CONFIG_PPC64 +#ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) From 01474e8cc3421cc55f55c5a0c6e1aef40efa43ab Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 20 Nov 2020 10:35:16 +1100 Subject: [PATCH 05/18] selftests/powerpc: entry flush test commit 89a83a0c69c81a25ce91002b90ca27ed86132a0a upstream. Add a test modelled on the RFI flush test which counts the number of L1D misses doing a simple syscall with the entry flush on and off. For simplicity of backporting, this test duplicates a lot of code from the upstream rfi_flush. This is cleaned up upstream, but we don't clean it up here because it would involve bringing in even more commits. Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/platforms/powernv/setup.c | 6 +- .../selftests/powerpc/security/.gitignore | 1 + .../selftests/powerpc/security/Makefile | 2 +- .../selftests/powerpc/security/entry_flush.c | 163 ++++++++++++++++++ 5 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a6b72dd431a4..480c236724da 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -1025,7 +1025,7 @@ void setup_uaccess_flush(bool enable) return; if (!no_uaccess_flush) - uaccess_flush_enable(true); + uaccess_flush_enable(enable); } #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ef7b4c09e7d6..3a9f79d18f6b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -124,10 +124,12 @@ static void pnv_setup_rfi_flush(void) /* * If we are non-Power9 bare metal, we don't need to flush on kernel - * entry: it fixes a P9 specific vulnerability. + * entry or after user access: they fix a P9 specific vulnerability. */ - if (!pvr_version_is(PVR_POWER9)) + if (!pvr_version_is(PVR_POWER9)) { security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 0b969fba3beb..b8afb4f2481e 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1 +1,2 @@ rfi_flush +entry_flush diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 85861c46b445..e550a287768f 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush +TEST_GEN_PROGS := rfi_flush entry_flush top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c new file mode 100644 index 000000000000..e8d24f9a5d3e --- /dev/null +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#define CACHELINE_SIZE 128 + +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +static void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +int entry_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush, entry_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + entry_flush = entry_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (entry_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n", + entry_flush, l1d_misses_total, entry_flush ? '<' : '>', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else + printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n", + entry_flush, l1d_misses_total, entry_flush ? '>' : '<', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + + if (entry_flush == entry_flush_orig) { + entry_flush = !entry_flush_orig; + if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(entry_flush_test, "entry_flush_test"); +} From b33905dc1ce55e5af610ba83d122a5b1da22c0b9 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 19 Nov 2020 10:26:33 +0000 Subject: [PATCH 06/18] MIPS: PCI: Fix MIPS build While backporting 37640adbefd6 ("MIPS: PCI: remember nasid changed by set interrupt affinity") something went wrong and an extra 'n' was added. So 'data->nasid' became 'data->nnasid' and the MIPS builds started failing. This is only needed for 5.4-stable tree. Fixes: 957978aa56f1 ("MIPS: PCI: remember nasid changed by set interrupt affinity") Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/pci-xtalk-bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index c4b1c6cf2660..adc9f83b2c44 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -284,7 +284,7 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask, ret = irq_chip_set_affinity_parent(d, mask, force); if (ret >= 0) { cpu = cpumask_first_and(mask, cpu_online_mask); - data->nnasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); + data->nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); bridge_write(data->bc, b_int_addr[pin].addr, (((data->bc->intr_addr >> 30) & 0x30000) | bit | (data->nasid << 8))); From 42bb7b7b9654ef45f5e6611714f724fdeb8adf5f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 6 Dec 2019 15:13:41 -0600 Subject: [PATCH 07/18] net/mlx5: Use async EQ setup cleanup helpers for multiple EQs commit 3ed879965cc4ea13fe0908468b653c4ff2cb1309 upstream. Use helper routines to setup and teardown multiple EQs and reuse the code in setup, cleanup and error unwinding flows. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed Cc: Timo Rothenpieler Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 114 ++++++++----------- 1 file changed, 49 insertions(+), 65 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 580c71cb9dfa..aff1737ef1ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -563,6 +563,39 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) gather_user_async_events(dev, mask); } +static int +setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, + struct mlx5_eq_param *param, const char *name) +{ + int err; + + eq->irq_nb.notifier_call = mlx5_eq_async_int; + + err = create_async_eq(dev, &eq->core, param); + if (err) { + mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); + return err; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); + destroy_async_eq(dev, &eq->core); + } + return err; +} + +static void cleanup_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq_async *eq, const char *name) +{ + int err; + + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + err = destroy_async_eq(dev, &eq->core); + if (err) + mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", + name, err); +} + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -572,77 +605,45 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); - table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, + .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; - - param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD; - err = create_async_eq(dev, &table->cmd_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - goto err0; - } - err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); + if (err) goto err1; - } + mlx5_cmd_use_events(dev); - table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, }; gather_async_events_mask(dev, param.mask); - err = create_async_eq(dev, &table->async_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create async EQ %d\n", err); + err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); + if (err) goto err2; - } - err = mlx5_eq_enable(dev, &table->async_eq.core, - &table->async_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); - goto err3; - } - table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, + .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; - param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST; - err = create_async_eq(dev, &table->pages_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err4; - } - err = mlx5_eq_enable(dev, &table->pages_eq.core, - &table->pages_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); - goto err5; - } + err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); + if (err) + goto err3; - return err; + return 0; -err5: - destroy_async_eq(dev, &table->pages_eq.core); -err4: - mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); err3: - destroy_async_eq(dev, &table->async_eq.core); + cleanup_async_eq(dev, &table->async_eq, "async"); err2: mlx5_cmd_use_polling(dev); - mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); err1: - destroy_async_eq(dev, &table->cmd_eq.core); -err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } @@ -650,28 +651,11 @@ static int create_async_eqs(struct mlx5_core_dev *dev) static void destroy_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int err; - - mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); - err = destroy_async_eq(dev, &table->pages_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", - err); - - mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); - err = destroy_async_eq(dev, &table->async_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", - err); + cleanup_async_eq(dev, &table->pages_eq, "pages"); + cleanup_async_eq(dev, &table->async_eq, "async"); mlx5_cmd_use_polling(dev); - - mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); - err = destroy_async_eq(dev, &table->cmd_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", - err); - + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } From 3fa9daaccce8c83444a5610f3b2c55c4cebd5ecb Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 21 Jul 2020 10:25:52 +0300 Subject: [PATCH 08/18] net/mlx5: poll cmd EQ in case of command timeout commit 1d5558b1f0de81f54ddee05f3793acc5260d107f upstream. Once driver detects a command interface command timeout, it warns the user and returns timeout error to the caller. In such case, the entry of the command is not evacuated (because only real event interrupt is allowed to clear command interface entry). If the HW event interrupt of this entry will never arrive, this entry will be left unused forever. Command interface entries are limited and eventually we can end up without the ability to post a new command. In addition, if driver will not consume the EQE of the lost interrupt and rearm the EQ, no new interrupts will arrive for other commands. Add a resiliency mechanism for manually polling the command EQ in case of a command timeout. In case resiliency mechanism will find non-handled EQE, it will consume it, and the command interface will be fully functional again. Once the resiliency flow finished, wait another 5 seconds for the command interface to complete for this command entry. Define mlx5_cmd_eq_recover() to manage the cmd EQ polling resiliency flow. Add an async EQ spinlock to avoid races between resiliency flows and real interrupts that might run simultaneously. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Cc: Timo Rothenpieler Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 53 ++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 40 +++++++++++++- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 + 3 files changed, 86 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 7089ffcc4e51..93c4d251b3e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -853,11 +853,21 @@ static void cb_timeout_handler(struct work_struct *work) struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + mlx5_cmd_eq_recover(dev); + + /* Maybe got handled by eq recover ? */ + if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + goto out; /* phew, already handled */ + } + ent->ret = -ETIMEDOUT; - mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", + ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + +out: cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ } @@ -987,6 +997,35 @@ static const char *deliv_status_to_str(u8 status) } } +enum { + MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000, +}; + +static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC); + + mlx5_cmd_eq_recover(dev); + + /* Re-wait on the ent->done after executing the recovery flow. If the + * recovery flow (or any other recovery flow running simultaneously) + * has recovered an EQE, it should cause the entry to be completed by + * the command interface. + */ + if (wait_for_completion_timeout(&ent->done, timeout)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + return; + } + + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); +} + static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -998,12 +1037,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) ent->ret = -ECANCELED; goto out_err; } - if (cmd->mode == CMD_MODE_POLLING || ent->polling) { + if (cmd->mode == CMD_MODE_POLLING || ent->polling) wait_for_completion(&ent->done); - } else if (!wait_for_completion_timeout(&ent->done, timeout)) { - ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); - } + else if (!wait_for_completion_timeout(&ent->done, timeout)) + wait_func_handle_exec_timeout(dev, ent); out_err: err = ent->ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index aff1737ef1ee..851fd81508e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -190,6 +190,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) return count_eqe; } +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags) + __acquires(&eq->lock) +{ + if (in_irq()) + spin_lock(&eq->lock); + else + spin_lock_irqsave(&eq->lock, *flags); +} + +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags) + __releases(&eq->lock) +{ + if (in_irq()) + spin_unlock(&eq->lock); + else + spin_unlock_irqrestore(&eq->lock, *flags); +} + +enum async_eq_nb_action { + ASYNC_EQ_IRQ_HANDLER = 0, + ASYNC_EQ_RECOVER = 1, +}; + static int mlx5_eq_async_int(struct notifier_block *nb, unsigned long action, void *data) { @@ -199,11 +222,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb, struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; + unsigned long flags; int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; + mlx5_eq_async_int_lock(eq_async, &flags); + eqe = next_eqe_sw(eq); if (!eqe) goto out; @@ -224,8 +250,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb, out: eq_update_ci(eq, 1); + mlx5_eq_async_int_unlock(eq_async, &flags); - return 0; + return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0; +} + +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; + int eqes; + + eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); + if (eqes) + mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); } static void init_eq_buf(struct mlx5_eq *eq) @@ -570,6 +607,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, int err; eq->irq_nb.notifier_call = mlx5_eq_async_int; + spin_lock_init(&eq->lock); err = create_async_eq(dev, &eq->core, param); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 4be4d2d36218..9aaf0eab7c2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -38,6 +38,7 @@ struct mlx5_eq { struct mlx5_eq_async { struct mlx5_eq core; struct notifier_block irq_nb; + spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */ }; struct mlx5_eq_comp { @@ -82,6 +83,7 @@ void mlx5_cq_tasklet_cb(unsigned long data); struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev); void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); From 7db82a5a4c15b89fa800ce2cf5d04cc0ae30279c Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 18 Mar 2020 21:44:32 +0200 Subject: [PATCH 09/18] net/mlx5: Fix a race when moving command interface to events mode commit d43b7007dbd1195a5b6b83213e49b1516aaf6f5e upstream. After driver creates (via FW command) an EQ for commands, the driver will be informed on new commands completion by EQE. However, due to a race in driver's internal command mode metadata update, some new commands will still be miss-handled by driver as if we are in polling mode. Such commands can get two non forced completion, leading to already freed command entry access. CREATE_EQ command, that maps EQ to the command queue must be posted to the command queue while it is empty and no other command should be posted. Add SW mechanism that once the CREATE_EQ command is about to be executed, all other commands will return error without being sent to the FW. Allow sending other commands only after successfully changing the driver's internal command mode metadata. We can safely return error to all other commands while creating the command EQ, as all other commands might be sent from the user/application during driver load. Application can rerun them later after driver's load was finished. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Cc: Timo Rothenpieler Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 35 ++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 ++ include/linux/mlx5/driver.h | 6 ++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 93c4d251b3e2..b00e72b86863 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -875,6 +875,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) +{ + if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL) + return true; + + return cmd->allowed_opcode == opcode; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -941,7 +949,8 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; u32 drv_synd; @@ -1459,6 +1468,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_init(dev); } +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + down(&cmd->pages_sem); + + cmd->allowed_opcode = opcode; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; @@ -1751,12 +1776,13 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int err; u8 status = 0; u32 drv_synd; + u16 opcode; u8 token; + opcode = MLX5_GET(mbox_in, in, opcode); if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); - + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + !opcode_allowed(&dev->cmd, opcode)) { err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, syndrome, drv_synd); @@ -2058,6 +2084,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); cmd->mode = CMD_MODE_POLLING; + cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; create_msg_cache(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 851fd81508e1..0a20938b4aad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -648,11 +648,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); if (err) goto err1; mlx5_cmd_use_events(dev); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { .irq_index = 0, @@ -682,6 +684,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_polling(dev); cleanup_async_eq(dev, &table->cmd_eq, "cmd"); err1: + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6b4f86dfca38..2b65ffb3bd76 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -299,6 +299,7 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; int mode; + u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; struct dma_pool *pool; struct mlx5_cmd_debug dbg; @@ -890,10 +891,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); } +enum { + CMD_ALLOWED_OPCODE_ALL, +}; + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); struct mlx5_async_ctx { struct mlx5_core_dev *dev; From b57c75956e79e6fb526c3e71ea63f2ffb6eb8435 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 31 Aug 2020 15:04:35 +0300 Subject: [PATCH 10/18] net/mlx5: Add retry mechanism to the command entry index allocation commit 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf upstream. It is possible that new command entry index allocation will temporarily fail. The new command holds the semaphore, so it means that a free entry should be ready soon. Add one second retry mechanism before returning an error. Patch "net/mlx5: Avoid possible free of command entry while timeout comp handler" increase the possibility to bump into this temporarily failure as it delays the entry index release for non-callback commands. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Cc: Timo Rothenpieler Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index b00e72b86863..76547d35cd0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -883,6 +883,25 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } +static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) +{ + unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); + int idx; + +retry: + idx = cmd_alloc_index(cmd); + if (idx < 0 && time_before(jiffies, alloc_end)) { + /* Index allocation can fail on heavy load of commands. This is a temporary + * situation as the current command already holds the semaphore, meaning that + * another command completion is being handled and it is expected to release + * the entry index soon. + */ + cpu_relax(); + goto retry; + } + return idx; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -900,7 +919,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index_retry(cmd); if (alloc_ret < 0) { mlx5_core_err(dev, "failed to allocate command entry\n"); if (ent->callback) { From 8cad37eb129f9374bfcc5552a84a28db4e9daccc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Oct 2020 08:54:31 +0000 Subject: [PATCH 11/18] powerpc/8xx: Always fault when _PAGE_ACCESSED is not set commit 29daf869cbab69088fe1755d9dd224e99ba78b56 upstream. The kernel expects pte_young() to work regardless of CONFIG_SWAP. Make sure a minor fault is taken to set _PAGE_ACCESSED when it is not already set, regardless of the selection of CONFIG_SWAP. This adds at least 3 instructions to the TLB miss exception handlers fast path. Following patch will reduce this overhead. Also update the rotation instruction to the correct number of bits to reflect all changes done to _PAGE_ACCESSED over time. Fixes: d069cb4373fe ("powerpc/8xx: Don't touch ACCESSED when no SWAP.") Fixes: 5f356497c384 ("powerpc/8xx: remove unused _PAGE_WRITETHRU") Fixes: e0a8e0d90a9f ("powerpc/8xx: Handle PAGE_USER via APG bits") Fixes: 5b2753fc3e8a ("powerpc/8xx: Implementation of PAGE_EXEC") Fixes: a891c43b97d3 ("powerpc/8xx: Prepare handlers for _PAGE_HUGE for 512k pages.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/af834e8a0f1fa97bfae65664950f0984a70c4750.1602492856.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_8xx.S | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 98d8b6832fcb..f6428b90a6c7 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -229,9 +229,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -278,11 +276,9 @@ InstructionTLBMiss: #ifdef ITLB_MISS_KERNEL mtcr r11 #endif -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -296,9 +292,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -308,9 +302,7 @@ InstructionTLBMiss: addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -394,11 +386,9 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior From cd61f14592df1054eda1dfcee6dafd248d07c1b8 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 22 Sep 2020 23:41:12 +0200 Subject: [PATCH 12/18] net: lantiq: Add locking for TX DMA channel commit f9317ae5523f99999fb54c513ebabbb2bc887ddf upstream. The TX DMA channel data is accessed by the xrx200_start_xmit() and the xrx200_tx_housekeeping() function from different threads. Make sure the accesses are synchronized by acquiring the netif_tx_lock() in the xrx200_tx_housekeeping() function too. This lock is acquired by the kernel before calling xrx200_start_xmit(). Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/lantiq_xrx200.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 96948276b2bc..4e44a39267eb 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -245,6 +245,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) int pkts = 0; int bytes = 0; + netif_tx_lock(net_dev); while (pkts < budget) { struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->tx_free]; @@ -268,6 +269,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) net_dev->stats.tx_bytes += bytes; netdev_completed_queue(ch->priv->net_dev, pkts, bytes); + netif_tx_unlock(net_dev); if (netif_queue_stopped(net_dev)) netif_wake_queue(net_dev); From df33054114475477b5e7810aa0efb26916220474 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 26 Oct 2020 13:36:17 -0700 Subject: [PATCH 13/18] Input: sunkbd - avoid use-after-free in teardown paths commit 77e70d351db7de07a46ac49b87a6c3c7a60fca7e upstream. We need to make sure we cancel the reinit work before we tear down the driver structures. Reported-by: Bodong Zhao Tested-by: Bodong Zhao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/sunkbd.c | 41 ++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c index 27126e621eb6..d450f11b98a7 100644 --- a/drivers/input/keyboard/sunkbd.c +++ b/drivers/input/keyboard/sunkbd.c @@ -99,7 +99,8 @@ static irqreturn_t sunkbd_interrupt(struct serio *serio, switch (data) { case SUNKBD_RET_RESET: - schedule_work(&sunkbd->tq); + if (sunkbd->enabled) + schedule_work(&sunkbd->tq); sunkbd->reset = -1; break; @@ -200,16 +201,12 @@ static int sunkbd_initialize(struct sunkbd *sunkbd) } /* - * sunkbd_reinit() sets leds and beeps to a state the computer remembers they - * were in. + * sunkbd_set_leds_beeps() sets leds and beeps to a state the computer remembers + * they were in. */ -static void sunkbd_reinit(struct work_struct *work) +static void sunkbd_set_leds_beeps(struct sunkbd *sunkbd) { - struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); - - wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ); - serio_write(sunkbd->serio, SUNKBD_CMD_SETLED); serio_write(sunkbd->serio, (!!test_bit(LED_CAPSL, sunkbd->dev->led) << 3) | @@ -222,11 +219,39 @@ static void sunkbd_reinit(struct work_struct *work) SUNKBD_CMD_BELLOFF - !!test_bit(SND_BELL, sunkbd->dev->snd)); } + +/* + * sunkbd_reinit() wait for the keyboard reset to complete and restores state + * of leds and beeps. + */ + +static void sunkbd_reinit(struct work_struct *work) +{ + struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); + + /* + * It is OK that we check sunkbd->enabled without pausing serio, + * as we only want to catch true->false transition that will + * happen once and we will be woken up for it. + */ + wait_event_interruptible_timeout(sunkbd->wait, + sunkbd->reset >= 0 || !sunkbd->enabled, + HZ); + + if (sunkbd->reset >= 0 && sunkbd->enabled) + sunkbd_set_leds_beeps(sunkbd); +} + static void sunkbd_enable(struct sunkbd *sunkbd, bool enable) { serio_pause_rx(sunkbd->serio); sunkbd->enabled = enable; serio_continue_rx(sunkbd->serio); + + if (!enable) { + wake_up_interruptible(&sunkbd->wait); + cancel_work_sync(&sunkbd->tq); + } } /* From 1527ab7859b2b3231f33eaf594e7aa28eda148ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Oct 2020 14:17:11 +0200 Subject: [PATCH 14/18] mac80211: always wind down STA state commit dcd479e10a0510522a5d88b29b8f79ea3467d501 upstream. When (for example) an IBSS station is pre-moved to AUTHORIZED before it's inserted, and then the insertion fails, we don't clean up the fast RX/TX states that might already have been created, since we don't go through all the state transitions again on the way down. Do that, if it hasn't been done already, when the station is freed. I considered only freeing the fast TX/RX state there, but we might add more state so it's more robust to wind down the state properly. Note that we warn if the station was ever inserted, it should have been properly cleaned up in that case, and the driver will probably not like things happening out of order. Reported-by: syzbot+2e293dbd67de2836ba42@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201009141710.7223b322a955.I95bd08b9ad0e039c034927cce0b75beea38e059b@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4f14d8a06915..38bb6d512b36 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + /* + * If we had used sta_info_pre_move_state() then we might not + * have gone through the state transitions down again, so do + * it here now (and warn if it's inserted). + * + * This will clear state such as fast TX/RX that may have been + * allocated during state transitions. + */ + while (sta->sta_state > IEEE80211_STA_NONE) { + int ret; + + WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); + + ret = sta_info_move_state(sta, sta->sta_state - 1); + if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) + break; + } + if (sta->rate_ctrl) rate_control_free_sta(sta); From 7ae6f2df438d71e30ecbedabd055aa2bfba2ba4d Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 14 Jul 2020 14:44:50 +0800 Subject: [PATCH 15/18] can: proc: can_remove_proc(): silence remove_proc_entry warning commit 3accbfdc36130282f5ae9e6eecfdf820169fedce upstream. If can_init_proc() fail to create /proc/net/can directory, can_remove_proc() will trigger a warning: WARNING: CPU: 6 PID: 7133 at fs/proc/generic.c:672 remove_proc_entry+0x17b0 Kernel panic - not syncing: panic_on_warn set ... Fix to return early from can_remove_proc() if can proc_dir does not exists. Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1594709090-3203-1-git-send-email-zhangchangzhong@huawei.com Fixes: 8e8cda6d737d ("can: initial support for network namespaces") Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/can/proc.c b/net/can/proc.c index e6881bfc3ed1..077af42c26ba 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -471,6 +471,9 @@ void can_init_proc(struct net *net) */ void can_remove_proc(struct net *net) { + if (!net->can.proc_dir) + return; + if (net->can.pde_version) remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir); @@ -498,6 +501,5 @@ void can_remove_proc(struct net *net) if (net->can.pde_rcvlist_sff) remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (net->can.proc_dir) - remove_proc_entry("can", net->proc_net); + remove_proc_entry("can", net->proc_net); } From 087c857e0131c2a045b29401b9f99d9265f8bcd5 Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Tue, 3 Nov 2020 12:04:00 +0000 Subject: [PATCH 16/18] KVM: x86: clflushopt should be treated as a no-op by emulation commit 51b958e5aeb1e18c00332e0b37c5d4e95a3eff84 upstream. The instruction emulator ignores clflush instructions, yet fails to support clflushopt. Treat both similarly. Fixes: 13e457e0eebf ("KVM: x86: Emulator does not decode clflush well") Signed-off-by: David Edmondson Message-Id: <20201103120400.240882-1-david.edmondson@oracle.com> Reviewed-by: Joao Martins Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 484c32b7f79f..39265b55929d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4050,6 +4050,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4592,7 +4598,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { From 26c7d2883851c70c854a98934ae27379286de8bb Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 7 Nov 2020 00:49:39 -0800 Subject: [PATCH 17/18] ACPI: GED: fix -Wformat commit 9debfb81e7654fe7388a49f45bc4d789b94c1103 upstream. Clang is more aggressive about -Wformat warnings when the format flag specifies a type smaller than the parameter. It turns out that gsi is an int. Fixes: drivers/acpi/evged.c:105:48: warning: format specifies type 'unsigned char' but the argument has type 'unsigned int' [-Wformat] trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); ^~~ Link: https://github.com/ClangBuiltLinux/linux/issues/378 Fixes: ea6f3af4c5e6 ("ACPI: GED: add support for _Exx / _Lxx handler methods") Acked-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/evged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c index ccd900690b6f..9df6991635c2 100644 --- a/drivers/acpi/evged.c +++ b/drivers/acpi/evged.c @@ -101,7 +101,7 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, switch (gsi) { case 0 ... 255: - sprintf(ev_name, "_%c%02hhX", + sprintf(ev_name, "_%c%02X", trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) From fc8334619167ce90b6d3f76e3dce9284dbe14fa2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 22 Nov 2020 10:14:12 +0100 Subject: [PATCH 18/18] Linux 5.4.79 Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20201120104541.058449969@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5725b07aaddf..f02539be5e07 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Kleptomaniac Octopus