From bded4de4a5e13036a1589e9d5939b4d420479c0d Mon Sep 17 00:00:00 2001 From: Saeed Mirzamohammadi Date: Tue, 20 Oct 2020 13:41:36 +0200 Subject: [PATCH 01/49] netfilter: nftables_offload: KASAN slab-out-of-bounds Read in nft_flow_rule_create commit 31cc578ae2de19c748af06d859019dced68e325d upstream. This patch fixes the issue due to: BUG: KASAN: slab-out-of-bounds in nft_flow_rule_create+0x622/0x6a2 net/netfilter/nf_tables_offload.c:40 Read of size 8 at addr ffff888103910b58 by task syz-executor227/16244 The error happens when expr->ops is accessed early on before performing the boundary check and after nft_expr_next() moves the expr to go out-of-bounds. This patch checks the boundary condition before expr->ops that fixes the slab-out-of-bounds Read issue. Add nft_expr_more() and use it to fix this problem. Signed-off-by: Saeed Mirzamohammadi Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 6 +++--- net/netfilter/nf_tables_offload.c | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bc2c73f54962..a576bcbba2fc 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -872,6 +872,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) return (struct nft_expr *)&rule->data[rule->dlen]; } +static inline bool nft_expr_more(const struct nft_rule *rule, + const struct nft_expr *expr) +{ + return expr != nft_expr_last(rule) && expr->ops; +} + static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) { return (void *)&rule->data[rule->dlen]; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ff2d2b514506..5a77b7a17722 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -254,7 +254,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { if (expr->ops->activate) expr->ops->activate(ctx, expr); @@ -269,7 +269,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { if (expr->ops->deactivate) expr->ops->deactivate(ctx, expr, phase); @@ -2642,7 +2642,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, * is called on error from nf_tables_newrule(). */ expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); expr = next; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 914cd0618d5a..c480549a7f94 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr->ops && expr != nft_expr_last(rule)) { + while (nft_expr_more(rule, expr)) { if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) num_actions++; @@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, ctx->net = net; ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; - while (expr->ops && expr != nft_expr_last(rule)) { + while (nft_expr_more(rule, expr)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; From a45c8c0a31a7ff7fc7d6c540ecea017116d3386e Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 12 Oct 2020 11:35:42 +0200 Subject: [PATCH 02/49] socket: don't clear SOCK_TSTAMP_NEW when SO_TIMESTAMPNS is disabled commit 4e3bbb33e6f36e4b05be1b1b9b02e3dd5aaa3e69 upstream. SOCK_TSTAMP_NEW (timespec64 instead of timespec) is also used for hardware time stamps (configured via SO_TIMESTAMPING_NEW). User space (ptp4l) first configures hardware time stamping via SO_TIMESTAMPING_NEW which sets SOCK_TSTAMP_NEW. In the next step, ptp4l disables SO_TIMESTAMPNS(_NEW) (software time stamps), but this must not switch hardware time stamps back to "32 bit mode". This problem happens on 32 bit platforms were the libc has already switched to struct timespec64 (from SO_TIMExxx_OLD to SO_TIMExxx_NEW socket options). ptp4l complains with "missing timestamp on transmitted peer delay request" because the wrong format is received (and discarded). Fixes: 887feae36aee ("socket: Add SO_TIMESTAMP[NS]_NEW") Fixes: 783da70e8396 ("net: add sock_enable_timestamps") Signed-off-by: Christian Eggers Acked-by: Willem de Bruijn Acked-by: Deepa Dinamani Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 1b765e62658f..33543d59a10d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -923,7 +923,6 @@ set_rcvbuf: } else { sock_reset_flag(sk, SOCK_RCVTSTAMP); sock_reset_flag(sk, SOCK_RCVTSTAMPNS); - sock_reset_flag(sk, SOCK_TSTAMP_NEW); } break; From c8a5496bc747650a538b291ad3feee94747ff539 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Apr 2020 13:23:27 -0500 Subject: [PATCH 03/49] objtool: Support Clang non-section symbols in ORC generation commit e81e0724432542af8d8c702c31e9d82f57b1ff31 upstream. When compiling the kernel with AS=clang, objtool produces a lot of warnings: warning: objtool: missing symbol for section .text warning: objtool: missing symbol for section .init.text warning: objtool: missing symbol for section .ref.text It then fails to generate the ORC table. The problem is that objtool assumes text section symbols always exist. But the Clang assembler is aggressive about removing them. When generating relocations for the ORC table, objtool always tries to reference instructions by their section symbol offset. If the section symbol doesn't exist, it bails. Do a fallback: when a section symbol isn't available, reference a function symbol instead. Reported-by: Dmitry Golovin Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Tested-by: Nathan Chancellor Reviewed-by: Miroslav Benes Acked-by: Peter Zijlstra (Intel) Link: https://github.com/ClangBuiltLinux/linux/issues/669 Link: https://lkml.kernel.org/r/9a9cae7fcf628843aabe5a086b1a3c5bf50f42e8.1585761021.git.jpoimboe@redhat.com Cc: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- tools/objtool/orc_gen.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 27a4112848c2..e8c34b0a8b00 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -88,11 +88,6 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, struct orc_entry *orc; struct rela *rela; - if (!insn_sec->sym) { - WARN("missing symbol for section %s", insn_sec->name); - return -1; - } - /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); @@ -105,8 +100,32 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, } memset(rela, 0, sizeof(*rela)); - rela->sym = insn_sec->sym; - rela->addend = insn_off; + if (insn_sec->sym) { + rela->sym = insn_sec->sym; + rela->addend = insn_off; + } else { + /* + * The Clang assembler doesn't produce section symbols, so we + * have to reference the function symbol instead: + */ + rela->sym = find_symbol_containing(insn_sec, insn_off); + if (!rela->sym) { + /* + * Hack alert. This happens when we need to reference + * the NOP pad insn immediately after the function. + */ + rela->sym = find_symbol_containing(insn_sec, + insn_off - 1); + } + if (!rela->sym) { + WARN("missing symbol for insn at offset 0x%lx\n", + insn_off); + return -1; + } + + rela->addend = insn_off - rela->sym->offset; + } + rela->type = R_X86_64_PC32; rela->offset = idx * sizeof(int); From 2dcb0c6c381817013b126fa141b05474b7a7cff0 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 17 Sep 2020 08:56:11 +0200 Subject: [PATCH 04/49] scripts/setlocalversion: make git describe output more reliable commit 548b8b5168c90c42e88f70fcf041b4ce0b8e7aa8 upstream. When building for an embedded target using Yocto, we're sometimes observing that the version string that gets built into vmlinux (and thus what uname -a reports) differs from the path under /lib/modules/ where modules get installed in the rootfs, but only in the length of the -gabc123def suffix. Hence modprobe always fails. The problem is that Yocto has the concept of "sstate" (shared state), which allows different developers/buildbots/etc. to share build artifacts, based on a hash of all the metadata that went into building that artifact - and that metadata includes all dependencies (e.g. the compiler used etc.). That normally works quite well; usually a clean build (without using any sstate cache) done by one developer ends up being binary identical to a build done on another host. However, one thing that can cause two developers to end up with different builds [and thus make one's vmlinux package incompatible with the other's kernel-dev package], which is not captured by the metadata hashing, is this `git describe`: The output of that can be affected by (1) git version: before 2.11 git defaulted to a minimum of 7, since 2.11 (git.git commit e6c587) the default is dynamic based on the number of objects in the repo (2) hence even if both run the same git version, the output can differ based on how many remotes are being tracked (or just lots of local development branches or plain old garbage) (3) and of course somebody could have a core.abbrev config setting in ~/.gitconfig So in order to avoid `uname -a` output relying on such random details of the build environment which are rather hard to ensure are consistent between developers and buildbots, make sure the abbreviated sha1 always consists of exactly 12 hex characters. That is consistent with the current rule for -stable patches, and is almost always enough to identify the head commit unambigously - in the few cases where it does not, the v5.4.3-00021- prefix would certainly nail it down. [Adapt to `` vs $() differences between 5.4 and upstream.] Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/setlocalversion | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index a2998b118ef9..45609dba7d72 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -45,7 +45,7 @@ scm_version() # Check for git and a git repo. if test -z "$(git rev-parse --show-cdup 2>/dev/null)" && - head=`git rev-parse --verify --short HEAD 2>/dev/null`; then + head=$(git rev-parse --verify HEAD 2>/dev/null); then # If we are at a tagged commit (like "v2.6.30-rc6"), we ignore # it, because this version is defined in the top level Makefile. @@ -59,11 +59,22 @@ scm_version() fi # If we are past a tagged commit (like # "v2.6.30-rc5-302-g72357d5"), we pretty print it. - if atag="`git describe 2>/dev/null`"; then - echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}' + # + # Ensure the abbreviated sha1 has exactly 12 + # hex characters, to make the output + # independent of git version, local + # core.abbrev settings and/or total number of + # objects in the current repository - passing + # --abbrev=12 ensures a minimum of 12, and the + # awk substr() then picks the 'g' and first 12 + # hex chars. + if atag="$(git describe --abbrev=12 2>/dev/null)"; then + echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}' - # If we don't have a tag at all we print -g{commitish}. + # If we don't have a tag at all we print -g{commitish}, + # again using exactly 12 hex chars. else + head="$(echo $head | cut -c1-12)" printf '%s%s' -g $head fi fi From 114c6930b3514f743bcb0da7258d5d862106a6a7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Jul 2020 17:11:09 +0100 Subject: [PATCH 05/49] arm64: Run ARCH_WORKAROUND_1 enabling code on all CPUs commit 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 upstream. Commit 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof") changed the way we deal with ARCH_WORKAROUND_1, by moving most of the enabling code to the .matches() callback. This has the unfortunate effect that the workaround gets only enabled on the first affected CPU, and no other. In order to address this, forcefully call the .matches() callback from a .cpu_enable() callback, which brings us back to the original behaviour. Fixes: 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof") Cc: Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d2e738c45556..d4af16d5ab6c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -635,6 +635,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static void +cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap) +{ + cap->matches(cap, SCOPE_LOCAL_CPU); +} + static const __maybe_unused struct midr_range tx2_family_cpus[] = { MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), @@ -894,9 +900,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { + .desc = "Branch predictor hardening", .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, + .cpu_enable = cpu_enable_branch_predictor_hardening, }, #ifdef CONFIG_HARDEN_EL2_VECTORS { From 7736c61080f19160d306bdce07553b8a8c797e57 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Jul 2020 17:11:10 +0100 Subject: [PATCH 06/49] arm64: Run ARCH_WORKAROUND_2 enabling code on all CPUs commit 39533e12063be7f55e3d6ae21ffe067799d542a4 upstream. Commit 606f8e7b27bf ("arm64: capabilities: Use linear array for detection and verification") changed the way we deal with per-CPU errata by only calling the .matches() callback until one CPU is found to be affected. At this point, .matches() stop being called, and .cpu_enable() will be called on all CPUs. This breaks the ARCH_WORKAROUND_2 handling, as only a single CPU will be mitigated. In order to address this, forcefully call the .matches() callback from a .cpu_enable() callback, which brings us back to the original behaviour. Fixes: 606f8e7b27bf ("arm64: capabilities: Use linear array for detection and verification") Cc: Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d4af16d5ab6c..1e16c4e00e77 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -492,6 +492,12 @@ out_printmsg: return required; } +static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap) +{ + if (ssbd_state != ARM64_SSBD_FORCE_DISABLE) + cap->matches(cap, SCOPE_LOCAL_CPU); +} + /* known invulnerable cores */ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), @@ -918,6 +924,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_SSBD, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, + .cpu_enable = cpu_enable_ssbd_mitigation, .midr_range_list = arm64_ssb_cpus, }, #ifdef CONFIG_ARM64_ERRATUM_1418040 From 57a88e44b51205bffcadd48294f966e402b006b8 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 16 Oct 2020 10:53:39 -0700 Subject: [PATCH 07/49] arm64: link with -z norelro regardless of CONFIG_RELOCATABLE commit 3b92fa7485eba16b05166fddf38ab42f2ff6ab95 upstream. With CONFIG_EXPERT=y, CONFIG_KASAN=y, CONFIG_RANDOMIZE_BASE=n, CONFIG_RELOCATABLE=n, we observe the following failure when trying to link the kernel image with LD=ld.lld: error: section: .exit.data is not contiguous with other relro sections ld.lld defaults to -z relro while ld.bfd defaults to -z norelro. This was previously fixed, but only for CONFIG_RELOCATABLE=y. Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options") Signed-off-by: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201016175339.2429280-1-ndesaulniers@google.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 96abe558aea8..cd8f3cdabfd0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X -z norelro CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 @@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour # for relative relocs, since this leads to better Image compression # with the relocation offsets always being zero. -LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ +LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ $(call ld-option, --no-apply-dynamic-relocs) endif From c3019695f1d8e0b88a1dc188f5554ab550dd7320 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 21 Aug 2020 17:10:27 -0700 Subject: [PATCH 08/49] x86/PCI: Fix intel_mid_pci.c build error when ACPI is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 035fff1f7aab43e420e0098f0854470a5286fb83 upstream. Fix build error when CONFIG_ACPI is not set/enabled by adding the header file which contains a stub for the function in the build error. ../arch/x86/pci/intel_mid_pci.c: In function ‘intel_mid_pci_init’: ../arch/x86/pci/intel_mid_pci.c:303:2: error: implicit declaration of function ‘acpi_noirq_set’; did you mean ‘acpi_irq_get’? [-Werror=implicit-function-declaration] acpi_noirq_set(); Fixes: a912a7584ec3 ("x86/platform/intel-mid: Move PCI initialization to arch_init()") Link: https://lore.kernel.org/r/ea903917-e51b-4cc9-2680-bc1e36efa026@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Jesse Barnes Acked-by: Thomas Gleixner Cc: stable@vger.kernel.org # v4.16+ Cc: Jacob Pan Cc: Len Brown Cc: Jesse Barnes Cc: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/intel_mid_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 43867bc85368..eea5a0f3b959 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -33,6 +33,7 @@ #include #include #include +#include #define PCIE_CAP_OFFSET 0x100 From 8e1b40e57dca742575b975b5e8a595021e78f3b3 Mon Sep 17 00:00:00 2001 From: Michael Schaller Date: Fri, 25 Sep 2020 09:45:02 +0200 Subject: [PATCH 09/49] efivarfs: Replace invalid slashes with exclamation marks in dentries. commit 336af6a4686d885a067ecea8c3c3dd129ba4fc75 upstream. Without this patch efivarfs_alloc_dentry creates dentries with slashes in their name if the respective EFI variable has slashes in its name. This in turn causes EIO on getdents64, which prevents a complete directory listing of /sys/firmware/efi/efivars/. This patch replaces the invalid shlashes with exclamation marks like kobject_set_name_vargs does for /sys/firmware/efi/vars/ to have consistently named dentries under /sys/firmware/efi/vars/ and /sys/firmware/efi/efivars/. Signed-off-by: Michael Schaller Link: https://lore.kernel.org/r/20200925074502.150448-1-misch@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: dann frazier Signed-off-by: Greg Kroah-Hartman --- fs/efivarfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index fa4f6447ddad..9760a52800b4 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -141,6 +141,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; + /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */ + strreplace(name, '/', '!'); + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, is_removable); if (!inode) From 72c17fadf3f8a9481ef6202e11a2281afb3b2d00 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Oct 2020 00:18:20 -0400 Subject: [PATCH 10/49] bnxt_en: Check abort error state in bnxt_open_nic(). [ Upstream commit a1301f08c5acf992d9c1fafddc84c3a822844b04 ] bnxt_open_nic() is called during configuration changes that require the NIC to be closed and then opened. This call is protected by rtnl_lock. Firmware reset can be happening at the same time. Only critical portions of the entire firmware reset sequence are protected by the rtnl_lock. It is possible that bnxt_open_nic() can be called when the firmware reset sequence is aborting. In that case, bnxt_open_nic() needs to check if the ABORT_ERR flag is set and abort if it is. The configuration change that resulted in the bnxt_open_nic() call will fail but the NIC will be brought to a consistent IF_DOWN state. Without this patch, if bnxt_open_nic() were to continue in this error state, it may crash like this: [ 1648.659736] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1648.659768] IP: [] bnxt_alloc_mem+0x50a/0x1140 [bnxt_en] [ 1648.659796] PGD 101e1b3067 PUD 101e1b2067 PMD 0 [ 1648.659813] Oops: 0000 [#1] SMP [ 1648.659825] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc dell_smbios dell_wmi_descriptor dcdbas amd64_edac_mod edac_mce_amd kvm_amd kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper vfat cryptd fat pcspkr ipmi_ssif sg k10temp i2c_piix4 wmi ipmi_si ipmi_devintf ipmi_msghandler tpm_crb acpi_power_meter sch_fq_codel ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci megaraid_sas crct10dif_pclmul crct10dif_common [ 1648.660063] tg3 libata crc32c_intel bnxt_en(OE) drm_panel_orientation_quirks devlink ptp pps_core dm_mirror dm_region_hash dm_log dm_mod fuse [ 1648.660105] CPU: 13 PID: 3867 Comm: ethtool Kdump: loaded Tainted: G OE ------------ 3.10.0-1152.el7.x86_64 #1 [ 1648.660911] Hardware name: Dell Inc. PowerEdge R7515/0R4CNN, BIOS 1.2.14 01/28/2020 [ 1648.661662] task: ffff94e64cbc9080 ti: ffff94f55df1c000 task.ti: ffff94f55df1c000 [ 1648.662409] RIP: 0010:[] [] bnxt_alloc_mem+0x50a/0x1140 [bnxt_en] [ 1648.663171] RSP: 0018:ffff94f55df1fba8 EFLAGS: 00010202 [ 1648.663927] RAX: 0000000000000000 RBX: ffff94e6827e0000 RCX: 0000000000000000 [ 1648.664684] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff94e6827e08c0 [ 1648.665433] RBP: ffff94f55df1fc20 R08: 00000000000001ff R09: 0000000000000008 [ 1648.666184] R10: 0000000000000d53 R11: ffff94f55df1f7ce R12: ffff94e6827e08c0 [ 1648.666940] R13: ffff94e6827e08c0 R14: ffff94e6827e08c0 R15: ffffffffb9115e40 [ 1648.667695] FS: 00007f8aadba5740(0000) GS:ffff94f57eb40000(0000) knlGS:0000000000000000 [ 1648.668447] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1648.669202] CR2: 0000000000000000 CR3: 0000001022772000 CR4: 0000000000340fe0 [ 1648.669966] Call Trace: [ 1648.670730] [] ? bnxt_need_reserve_rings+0x9d/0x170 [bnxt_en] [ 1648.671496] [] __bnxt_open_nic+0x8a/0x9a0 [bnxt_en] [ 1648.672263] [] ? bnxt_close_nic+0x59/0x1b0 [bnxt_en] [ 1648.673031] [] bnxt_open_nic+0x1b/0x50 [bnxt_en] [ 1648.673793] [] bnxt_set_ringparam+0x6c/0xa0 [bnxt_en] [ 1648.674550] [] dev_ethtool+0x1334/0x21a0 [ 1648.675306] [] dev_ioctl+0x1ef/0x5f0 [ 1648.676061] [] sock_do_ioctl+0x4d/0x60 [ 1648.676810] [] sock_ioctl+0x1eb/0x2d0 [ 1648.677548] [] do_vfs_ioctl+0x3a0/0x5b0 [ 1648.678282] [] ? __do_page_fault+0x238/0x500 [ 1648.679016] [] SyS_ioctl+0xa1/0xc0 [ 1648.679745] [] system_call_fastpath+0x25/0x2a [ 1648.680461] Code: 9e 60 01 00 00 0f 1f 40 00 45 8b 8e 48 01 00 00 31 c9 45 85 c9 0f 8e 73 01 00 00 66 0f 1f 44 00 00 49 8b 86 a8 00 00 00 48 63 d1 <48> 8b 14 d0 48 85 d2 0f 84 46 01 00 00 41 8b 86 44 01 00 00 c7 [ 1648.681986] RIP [] bnxt_alloc_mem+0x50a/0x1140 [bnxt_en] [ 1648.682724] RSP [ 1648.683451] CR2: 0000000000000000 Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.") Reviewed-by: Vasundhara Volam Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4f4fd8076261..e25a29d35026 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9193,7 +9193,10 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; - rc = __bnxt_open_nic(bp, irq_re_init, link_re_init); + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) + rc = -EIO; + if (!rc) + rc = __bnxt_open_nic(bp, irq_re_init, link_re_init); if (rc) { netdev_err(bp->dev, "nic open fail (rc: %x)\n", rc); dev_close(bp->dev); From c17d5aea33950b2356038ed4b30dcfad42300d92 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 26 Oct 2020 00:18:21 -0400 Subject: [PATCH 11/49] bnxt_en: Send HWRM_FUNC_RESET fw command unconditionally. [ Upstream commit 825741b071722f1c8ad692cead562c4b5f5eaa93 ] In the AER or firmware reset flow, if we are in fatal error state or if pci_channel_offline() is true, we don't send any commands to the firmware because the commands will likely not reach the firmware and most commands don't matter much because the firmware is likely to be reset imminently. However, the HWRM_FUNC_RESET command is different and we should always attempt to send it. In the AER flow for example, the .slot_reset() call will trigger this fw command and we need to try to send it to effect the proper reset. Fixes: b340dc680ed4 ("bnxt_en: Avoid sending firmware messages when AER error is detected.") Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e25a29d35026..9acef18e04a7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4204,7 +4204,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM; u16 dst = BNXT_HWRM_CHNL_CHIMP; - if (BNXT_NO_FW_ACCESS(bp)) + if (BNXT_NO_FW_ACCESS(bp) && + le16_to_cpu(req->req_type) != HWRM_FUNC_RESET) return -EBUSY; if (msg_len > BNXT_HWRM_MAX_REQ_LEN) { From a5b9b28b22baaab47e23fe1cac7300f04fb7b9f6 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 26 Oct 2020 01:05:39 +0530 Subject: [PATCH 12/49] chelsio/chtls: fix deadlock issue [ Upstream commit 28e9dcd9172028263c8225c15c4e329e08475e89 ] In chtls_pass_establish() we hold child socket lock using bh_lock_sock and we are again trying bh_lock_sock in add_to_reap_list, causing deadlock. Remove bh_lock_sock in add_to_reap_list() as lock is already held. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201025193538.31112-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 9b410ffafc4d..4c35164f92cc 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1369,7 +1369,6 @@ static void add_to_reap_list(struct sock *sk) struct chtls_sock *csk = sk->sk_user_data; local_bh_disable(); - bh_lock_sock(sk); release_tcp_port(sk); /* release the port immediately */ spin_lock(&reap_list_lock); @@ -1378,7 +1377,6 @@ static void add_to_reap_list(struct sock *sk) if (!csk->passive_reap_next) schedule_work(&reap_task); spin_unlock(&reap_list_lock); - bh_unlock_sock(sk); local_bh_enable(); } From c5db8069776f4ca44edc77f3c3dff140a886762d Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 26 Oct 2020 01:12:29 +0530 Subject: [PATCH 13/49] chelsio/chtls: fix memory leaks in CPL handlers [ Upstream commit 6daa1da4e262b0cd52ef0acc1989ff22b5540264 ] CPL handler functions chtls_pass_open_rpl() and chtls_close_listsrv_rpl() should return CPL_RET_BUF_DONE so that caller function will do skb free to avoid leak. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201025194228.31271-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 27 +++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 4c35164f92cc..e2c64c943cfc 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -692,14 +692,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb) if (rpl->status != CPL_ERR_NONE) { pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n", rpl->status, stid); - return CPL_RET_BUF_DONE; + } else { + cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); + sock_put(listen_ctx->lsk); + kfree(listen_ctx); + module_put(THIS_MODULE); } - cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); - sock_put(listen_ctx->lsk); - kfree(listen_ctx); - module_put(THIS_MODULE); - - return 0; + return CPL_RET_BUF_DONE; } static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) @@ -716,15 +715,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) if (rpl->status != CPL_ERR_NONE) { pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n", rpl->status, stid); - return CPL_RET_BUF_DONE; + } else { + cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); + sock_put(listen_ctx->lsk); + kfree(listen_ctx); + module_put(THIS_MODULE); } - - cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); - sock_put(listen_ctx->lsk); - kfree(listen_ctx); - module_put(THIS_MODULE); - - return 0; + return CPL_RET_BUF_DONE; } static void chtls_purge_wr_queue(struct sock *sk) From 3a0d5b5358d19d5340cca8fce34fbe29067df074 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Fri, 23 Oct 2020 00:35:57 +0530 Subject: [PATCH 14/49] chelsio/chtls: fix tls record info to user [ Upstream commit 4f3391ce8f5a69e7e6d66d0a3fc654eb6dbdc919 ] chtls_pt_recvmsg() receives a skb with tls header and subsequent skb with data, need to finalize the data copy whenever next skb with tls header is available. but here current tls header is overwritten by next available tls header, ends up corrupting user buffer data. fixing it by finalizing current record whenever next skb contains tls header. v1->v2: - Improved commit message. Fixes: 17a7d24aa89d ("crypto: chtls - generic handling of data and hdr") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201022190556.21308-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_io.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index a5903cfc8352..4c44d7c0c2c1 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1537,6 +1537,7 @@ skip_copy: tp->urg_data = 0; if ((avail + offset) >= skb->len) { + struct sk_buff *next_skb; if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; @@ -1546,8 +1547,10 @@ skip_copy: chtls_free_skb(sk, skb); buffers_freed++; hws->copied_seq = 0; - if (copied >= target && - !skb_peek(&sk->sk_receive_queue)) + next_skb = skb_peek(&sk->sk_receive_queue); + if (copied >= target && !next_skb) + break; + if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) break; } } while (len > 0); From 0ea202010b4080827f7ebd3064e32cf843c27284 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 23 Oct 2020 17:28:52 +0530 Subject: [PATCH 15/49] cxgb4: set up filter action after rewrites [ Upstream commit 937d8420588421eaa5c7aa5c79b26b42abb288ef ] The current code sets up the filter action field before rewrites are set up. When the action 'switch' is used with rewrites, this may result in initial few packets that get switched out don't have rewrites applied on them. So, make sure filter action is set up along with rewrites or only after everything else is set up for rewrites. Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Signed-off-by: Raju Rangoju Link: https://lore.kernel.org/r/20201023115852.18262-1-rajur@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 56 +++++++++---------- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 4 ++ 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 137ff00605d9..202af8dc7966 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) int err; /* do a set-tcb for smac-sel and CWR bit.. */ - err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); - if (err) - goto smac_err; - err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), TCB_SMAC_SEL_V(f->smt->idx), 1); + if (err) + goto smac_err; + + err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); if (!err) return 0; @@ -612,6 +612,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || @@ -629,7 +630,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; + fwr->smac_sel = f->smt->idx; fwr->rx_chan_rx_rpl_iq = htons(FW_FILTER_WR_RX_CHAN_V(0) | FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); @@ -1048,11 +1049,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) | - CONG_CNTRL_V((f->fs.action == FILTER_DROP) | - (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | - ((f->fs.dirsteerhash) << 1)) | - CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); + ((f->fs.dirsteerhash) << 1))); } static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, @@ -1088,11 +1086,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) | - CONG_CNTRL_V((f->fs.action == FILTER_DROP) | - (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | - ((f->fs.dirsteerhash) << 1)) | - CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); + ((f->fs.dirsteerhash) << 1))); } static int cxgb4_set_hash_filter(struct net_device *dev, @@ -1748,6 +1743,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) } return; } + switch (f->fs.action) { + case FILTER_PASS: + if (f->fs.dirsteer) + set_tcb_tflag(adap, f, tid, + TF_DIRECT_STEER_S, 1, 1); + break; + case FILTER_DROP: + set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1); + break; + case FILTER_SWITCH: + set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1); + break; + } + break; default: @@ -1808,22 +1817,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) if (ctx) ctx->result = 0; } else if (ret == FW_FILTER_WR_FLT_ADDED) { - int err = 0; - - if (f->fs.newsmac) - err = configure_filter_smac(adap, f); - - if (!err) { - f->pending = 0; /* async setup completed */ - f->valid = 1; - if (ctx) { - ctx->result = 0; - ctx->tid = idx; - } - } else { - clear_filter(adap, f); - if (ctx) - ctx->result = err; + f->pending = 0; /* async setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; } } else { /* Something went wrong. Issue a warning about the diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 1b9afb192f7f..358f0fe40270 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -50,6 +50,10 @@ #define TCB_RQ_START_M 0x3ffffffULL #define TCB_RQ_START_V(x) ((x) << TCB_RQ_START_S) +#define TF_DROP_S 22 +#define TF_DIRECT_STEER_S 23 +#define TF_LPBK_S 59 + #define TF_CCTRL_ECE_S 60 #define TF_CCTRL_CWR_S 61 #define TF_CCTRL_RFR_S 62 From e781c67629ed9923ba364372bd97183650f518cf Mon Sep 17 00:00:00 2001 From: Masahiro Fujiwara Date: Tue, 27 Oct 2020 20:48:46 +0900 Subject: [PATCH 16/49] gtp: fix an use-before-init in gtp_newlink() [ Upstream commit 51467431200b91682b89d31317e35dcbca1469ce ] *_pdp_find() from gtp_encap_recv() would trigger a crash when a peer sends GTP packets while creating new GTP device. RIP: 0010:gtp1_pdp_find.isra.0+0x68/0x90 [gtp] Call Trace: gtp_encap_recv+0xc2/0x2e0 [gtp] ? gtp1_pdp_find.isra.0+0x90/0x90 [gtp] udp_queue_rcv_one_skb+0x1fe/0x530 udp_queue_rcv_skb+0x40/0x1b0 udp_unicast_rcv_skb.isra.0+0x78/0x90 __udp4_lib_rcv+0x5af/0xc70 udp_rcv+0x1a/0x20 ip_protocol_deliver_rcu+0xc5/0x1b0 ip_local_deliver_finish+0x48/0x50 ip_local_deliver+0xe5/0xf0 ? ip_protocol_deliver_rcu+0x1b0/0x1b0 gtp_encap_enable() should be called after gtp_hastable_new() otherwise *_pdp_find() will access the uninitialized hash table. Fixes: 1e3a3abd8b28 ("gtp: make GTP sockets in gtp_newlink optional") Signed-off-by: Masahiro Fujiwara Link: https://lore.kernel.org/r/20201027114846.3924-1-fujiwara.masahiro@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 634bdea38ecb..4e19c3149848 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -663,10 +663,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, gtp = netdev_priv(dev); - err = gtp_encap_enable(gtp, data); - if (err < 0) - return err; - if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; } else { @@ -677,12 +673,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, err = gtp_hashtable_new(gtp, hashsize); if (err < 0) - goto out_encap; + return err; + + err = gtp_encap_enable(gtp, data); + if (err < 0) + goto out_hashtable; err = register_netdevice(dev); if (err < 0) { netdev_dbg(dev, "failed to register new netdev %d\n", err); - goto out_hashtable; + goto out_encap; } gn = net_generic(dev_net(dev), gtp_net_id); @@ -693,11 +693,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, return 0; +out_encap: + gtp_encap_disable(gtp); out_hashtable: kfree(gtp->addr_hash); kfree(gtp->tid_hash); -out_encap: - gtp_encap_disable(gtp); return err; } From c90459593f55a7b1a2fe91bc9e418cbd1cd5ac5d Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Tue, 27 Oct 2020 17:04:56 -0500 Subject: [PATCH 17/49] ibmvnic: fix ibmvnic_set_mac [ Upstream commit 8fc3672a8ad3e782bac80e979bc2a2c10960cbe9 ] Jakub Kicinski brought up a concern in ibmvnic_set_mac(). ibmvnic_set_mac() does this: ether_addr_copy(adapter->mac_addr, addr->sa_data); if (adapter->state != VNIC_PROBED) rc = __ibmvnic_set_mac(netdev, addr->sa_data); So if state == VNIC_PROBED, the user can assign an invalid address to adapter->mac_addr, and ibmvnic_set_mac() will still return 0. The fix is to validate ethernet address at the beginning of ibmvnic_set_mac(), and move the ether_addr_copy to the case of "adapter->state != VNIC_PROBED". Fixes: c26eba03e407 ("ibmvnic: Update reset infrastructure to support tunable parameters") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20201027220456.71450-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 48105a2eebe4..91559a52c7ad 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1735,9 +1735,13 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) int rc; rc = 0; - ether_addr_copy(adapter->mac_addr, addr->sa_data); - if (adapter->state != VNIC_PROBED) + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (adapter->state != VNIC_PROBED) { + ether_addr_copy(adapter->mac_addr, addr->sa_data); rc = __ibmvnic_set_mac(netdev, addr->sa_data); + } return rc; } From 13a4843d3938a9bd49eac6d441e3dc0ace87e894 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 24 Oct 2020 16:37:32 +0300 Subject: [PATCH 18/49] mlxsw: core: Fix memory leak on module removal [ Upstream commit adc80b6cfedff6dad8b93d46a5ea2775fd5af9ec ] Free the devlink instance during the teardown sequence in the non-reload case to avoid the following memory leak. unreferenced object 0xffff888232895000 (size 2048): comm "modprobe", pid 1073, jiffies 4295568857 (age 164.871s) hex dump (first 32 bytes): 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de ........"....... 10 50 89 32 82 88 ff ff 10 50 89 32 82 88 ff ff .P.2.....P.2.... backtrace: [<00000000c704e9a6>] __kmalloc+0x13a/0x2a0 [<00000000ee30129d>] devlink_alloc+0xff/0x760 [<0000000092ab3e5d>] 0xffffffffa042e5b0 [<000000004f3f8a31>] 0xffffffffa042f6ad [<0000000092800b4b>] 0xffffffffa0491df3 [<00000000c4843903>] local_pci_probe+0xcb/0x170 [<000000006993ded7>] pci_device_probe+0x2c2/0x4e0 [<00000000a8e0de75>] really_probe+0x2c5/0xf90 [<00000000d42ba75d>] driver_probe_device+0x1eb/0x340 [<00000000bcc95e05>] device_driver_attach+0x294/0x300 [<000000000e2bc177>] __driver_attach+0x167/0x2f0 [<000000007d44cd6e>] bus_for_each_dev+0x148/0x1f0 [<000000003cd5a91e>] driver_attach+0x45/0x60 [<000000000041ce51>] bus_add_driver+0x3b8/0x720 [<00000000f5215476>] driver_register+0x230/0x4e0 [<00000000d79356f5>] __pci_register_driver+0x190/0x200 Fixes: a22712a96291 ("mlxsw: core: Fix devlink unregister flow") Signed-off-by: Ido Schimmel Reported-by: Vadim Pasternak Tested-by: Oleksandr Shamray Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index c7c3fc7d1126..7277706847b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1281,6 +1281,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (!reload) devlink_resources_unregister(devlink, NULL); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + if (!reload) + devlink_free(devlink); return; From 4bffc9618caf2106e54cbe60a91cce92e0c29b2b Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 28 Oct 2020 17:07:31 +0000 Subject: [PATCH 19/49] netem: fix zero division in tabledist [ Upstream commit eadd1befdd778a1eca57fad058782bd22b4db804 ] Currently it is possible to craft a special netlink RTM_NEWQDISC command that can result in jitter being equal to 0x80000000. It is enough to set the 32 bit jitter to 0x02000000 (it will later be multiplied by 2^6) or just set the 64 bit jitter via TCA_NETEM_JITTER64. This causes an overflow during the generation of uniformly distributed numbers in tabledist(), which in turn leads to division by zero (sigma != 0, but sigma * 2 is 0). The related fragment of code needs 32-bit division - see commit 9b0ed89 ("netem: remove unnecessary 64 bit modulus"), so switching to 64 bit is not an option. Fix the issue by keeping the value of jitter within the range that can be adequately handled by tabledist() - [0;INT_MAX]. As negative std deviation makes no sense, take the absolute value of the passed value and cap it at INT_MAX. Inside tabledist(), switch to unsigned 32 bit arithmetic in order to prevent overflows. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Aleksandr Nogikh Reported-by: syzbot+ec762a6342ad0d3c0d8f@syzkaller.appspotmail.com Acked-by: Stephen Hemminger Link: https://lore.kernel.org/r/20201028170731.1383332-1-aleksandrnogikh@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 42e557d48e4e..f4101a920d1f 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma, /* default uniform distribution */ if (dist == NULL) - return ((rnd % (2 * sigma)) + mu) - sigma; + return ((rnd % (2 * (u32)sigma)) + mu) - sigma; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; @@ -812,6 +812,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) q->slot_config.max_packets = INT_MAX; if (q->slot_config.max_bytes == 0) q->slot_config.max_bytes = INT_MAX; + + /* capping dist_jitter to the range acceptable by tabledist() */ + q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); + q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; if (q->slot_config.min_delay | q->slot_config.max_delay | @@ -1037,6 +1041,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_NETEM_SLOT]) get_slot(q, tb[TCA_NETEM_SLOT]); + /* capping jitter to the range acceptable by tabledist() */ + q->jitter = min_t(s64, abs(q->jitter), INT_MAX); + return ret; get_table_failure: From f1493ab336799809e7b483d713c24b3eb1f28c0b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 26 Oct 2020 11:29:45 +0100 Subject: [PATCH 20/49] net/sched: act_mpls: Add softdep on mpls_gso.ko TCA_MPLS_ACT_PUSH and TCA_MPLS_ACT_MAC_PUSH might be used on gso packets. Such packets will thus require mpls_gso.ko for segmentation. v2: Drop dependency on CONFIG_NET_MPLS_GSO in Kconfig (from Jakub and David). Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/1f6cab15bbd15666795061c55563aaf6a386e90e.1603708007.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/act_mpls.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index f496c99d9826..7954021ade33 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -408,6 +408,7 @@ static void __exit mpls_cleanup_module(void) module_init(mpls_init_module); module_exit(mpls_cleanup_module); +MODULE_SOFTDEP("post: mpls_gso"); MODULE_AUTHOR("Netronome Systems "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MPLS manipulation actions"); From 4939183bb28c209bdb81508c18994a8459103569 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 29 Oct 2020 10:18:53 +0100 Subject: [PATCH 21/49] r8169: fix issue with forced threading in combination with shared interrupts [ Upstream commit 2734a24e6e5d18522fbf599135c59b82ec9b2c9e ] As reported by Serge flag IRQF_NO_THREAD causes an error if the interrupt is actually shared and the other driver(s) don't have this flag set. This situation can occur if a PCI(e) legacy interrupt is used in combination with forced threading. There's no good way to deal with this properly, therefore we have to remove flag IRQF_NO_THREAD. For fixing the original forced threading issue switch to napi_schedule(). Fixes: 424a646e072a ("r8169: fix operation under forced interrupt threading") Link: https://www.spinics.net/lists/netdev/msg694960.html Reported-by: Serge Belyshev Signed-off-by: Heiner Kallweit Tested-by: Serge Belyshev Link: https://lore.kernel.org/r/b5b53bfe-35ac-3768-85bf-74d1290cf394@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f838abdb35e1..d8881ba773de 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -6264,7 +6264,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) } rtl_irq_disable(tp); - napi_schedule_irqoff(&tp->napi); + napi_schedule(&tp->napi); out: rtl_ack_events(tp, status); @@ -6470,7 +6470,7 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt, - IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp); + IRQF_SHARED, dev->name, tp); if (retval < 0) goto err_release_fw_2; From 7740774940fcea82132c0188bd9ce2e8c613deac Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Mon, 26 Oct 2020 05:21:30 -0500 Subject: [PATCH 22/49] ravb: Fix bit fields checking in ravb_hwtstamp_get() [ Upstream commit 68b9f0865b1ef545da180c57d54b82c94cb464a4 ] In the function ravb_hwtstamp_get() in ravb_main.c with the existing values for RAVB_RXTSTAMP_TYPE_V2_L2_EVENT (0x2) and RAVB_RXTSTAMP_TYPE_ALL (0x6) if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL) config.rx_filter = HWTSTAMP_FILTER_ALL; if the test on RAVB_RXTSTAMP_TYPE_ALL should be true, it will never be reached. This issue can be verified with 'hwtstamp_config' testing program (tools/testing/selftests/net/hwtstamp_config.c). Setting filter type to ALL and subsequent retrieving it gives incorrect value: $ hwtstamp_config eth0 OFF ALL flags = 0 tx_type = OFF rx_filter = ALL $ hwtstamp_config eth0 flags = 0 tx_type = OFF rx_filter = PTP_V2_L2_EVENT Correct this by converting if-else's to switch. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reported-by: Julia Lawall Signed-off-by: Andrew Gabbasov Reviewed-by: Sergei Shtylyov Link: https://lore.kernel.org/r/20201026102130.29368-1-andrew_gabbasov@mentor.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 30cdabf64ccc..551799fb3842 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1741,12 +1741,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req) config.flags = 0; config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT) + switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) { + case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT: config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL) + break; + case RAVB_RXTSTAMP_TYPE_ALL: config.rx_filter = HWTSTAMP_FILTER_ALL; - else + break; + default: config.rx_filter = HWTSTAMP_FILTER_NONE; + } return copy_to_user(req->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; From 8cc351a3d44462b81caf3d654e7498898a95af60 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 23 Oct 2020 11:47:09 -0700 Subject: [PATCH 23/49] tcp: Prevent low rmem stalls with SO_RCVLOWAT. [ Upstream commit 435ccfa894e35e3d4a1799e6ac030e48a7b69ef5 ] With SO_RCVLOWAT, under memory pressure, it is possible to enter a state where: 1. We have not received enough bytes to satisfy SO_RCVLOWAT. 2. We have not entered buffer pressure (see tcp_rmem_pressure()). 3. But, we do not have enough buffer space to accept more packets. In this case, we advertise 0 rwnd (due to #3) but the application does not drain the receive queue (no wakeup because of #1 and #2) so the flow stalls. Modify the heuristic for SO_RCVLOWAT so that, if we are advertising rwnd<=rcv_mss, force a wakeup to prevent a stall. Without this patch, setting tcp_rmem to 6143 and disabling TCP autotune causes a stalled flow. With this patch, no stall occurs. This is with RPC-style traffic with large messages. Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users") Signed-off-by: Arjun Roy Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20201023184709.217614-1-arjunroy.kdev@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_input.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 97f2b11ce203..d3ced6a39bcf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -484,6 +484,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, return true; if (tcp_rmem_pressure(sk)) return true; + if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) + return true; } if (sk->sk_prot->stream_memory_read) return sk->sk_prot->stream_memory_read(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 62f8ba404818..54fd6bc5adcc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4774,7 +4774,8 @@ void tcp_data_ready(struct sock *sk) int avail = tp->rcv_nxt - tp->copied_seq; if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && - !sock_flag(sk, SOCK_DONE)) + !sock_flag(sk, SOCK_DONE) && + tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) return; sk->sk_data_ready(sk); From 30d628ede582f33e8ff4e206af35be70c1e77f7f Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 27 Oct 2020 10:24:03 +0700 Subject: [PATCH 24/49] tipc: fix memory leak caused by tipc_buf_append() [ Upstream commit ceb1eb2fb609c88363e06618b8d4bbf7815a4e03 ] Commit ed42989eab57 ("tipc: fix the skb_unshare() in tipc_buf_append()") replaced skb_unshare() with skb_copy() to not reduce the data reference counter of the original skb intentionally. This is not the correct way to handle the cloned skb because it causes memory leak in 2 following cases: 1/ Sending multicast messages via broadcast link The original skb list is cloned to the local skb list for local destination. After that, the data reference counter of each skb in the original list has the value of 2. This causes each skb not to be freed after receiving ACK: tipc_link_advance_transmq() { ... /* release skb */ __skb_unlink(skb, &l->transmq); kfree_skb(skb); <-- memory exists after being freed } 2/ Sending multicast messages via replicast link Similar to the above case, each skb cannot be freed after purging the skb list: tipc_mcast_xmit() { ... __skb_queue_purge(pkts); <-- memory exists after being freed } This commit fixes this issue by using skb_unshare() instead. Besides, to avoid use-after-free error reported by KASAN, the pointer to the fragment is set to NULL before calling skb_unshare() to make sure that the original skb is not freed after freeing the fragment 2 times in case skb_unshare() returns NULL. Fixes: ed42989eab57 ("tipc: fix the skb_unshare() in tipc_buf_append()") Acked-by: Jon Maloy Reported-by: Thang Hoang Ngo Signed-off-by: Tung Nguyen Reviewed-by: Xin Long Acked-by: Cong Wang Link: https://lore.kernel.org/r/20201027032403.1823-1-tung.q.nguyen@dektech.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tipc/msg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b0ed3c944b2d..46e89c992c2d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -140,12 +140,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (skb_cloned(frag)) - frag = skb_copy(frag, GFP_ATOMIC); + *buf = NULL; + frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; head = *headbuf = frag; - *buf = NULL; TIPC_SKB_CB(head)->tail = NULL; if (skb_is_nonlinear(head)) { skb_walk_frags(head, tail) { From 5c86cda6a529081083ab38795ad4e516a6e13a3b Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Fri, 23 Oct 2020 13:15:50 +0800 Subject: [PATCH 25/49] net: hns3: Clear the CMDQ registers before unmapping BAR region [ Upstream commit e3364c5ff3ff975b943a7bf47e21a2a4bf20f3fe ] When unbinding the hns3 driver with the HNS3 VF, I got the following kernel panic: [ 265.709989] Unable to handle kernel paging request at virtual address ffff800054627000 [ 265.717928] Mem abort info: [ 265.720740] ESR = 0x96000047 [ 265.723810] EC = 0x25: DABT (current EL), IL = 32 bits [ 265.729126] SET = 0, FnV = 0 [ 265.732195] EA = 0, S1PTW = 0 [ 265.735351] Data abort info: [ 265.738227] ISV = 0, ISS = 0x00000047 [ 265.742071] CM = 0, WnR = 1 [ 265.745055] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000009b54000 [ 265.751753] [ffff800054627000] pgd=0000202ffffff003, p4d=0000202ffffff003, pud=00002020020eb003, pmd=00000020a0dfc003, pte=0000000000000000 [ 265.764314] Internal error: Oops: 96000047 [#1] SMP [ 265.830357] CPU: 61 PID: 20319 Comm: bash Not tainted 5.9.0+ #206 [ 265.836423] Hardware name: Huawei TaiShan 2280 V2/BC82AMDDA, BIOS 1.05 09/18/2019 [ 265.843873] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO BTYPE=--) [ 265.843890] pc : hclgevf_cmd_uninit+0xbc/0x300 [ 265.861988] lr : hclgevf_cmd_uninit+0xb0/0x300 [ 265.861992] sp : ffff80004c983b50 [ 265.881411] pmr_save: 000000e0 [ 265.884453] x29: ffff80004c983b50 x28: ffff20280bbce500 [ 265.889744] x27: 0000000000000000 x26: 0000000000000000 [ 265.895034] x25: ffff800011a1f000 x24: ffff800011a1fe90 [ 265.900325] x23: ffff0020ce9b00d8 x22: ffff0020ce9b0150 [ 265.905616] x21: ffff800010d70e90 x20: ffff800010d70e90 [ 265.910906] x19: ffff0020ce9b0080 x18: 0000000000000004 [ 265.916198] x17: 0000000000000000 x16: ffff800011ae32e8 [ 265.916201] x15: 0000000000000028 x14: 0000000000000002 [ 265.916204] x13: ffff800011ae32e8 x12: 0000000000012ad8 [ 265.946619] x11: ffff80004c983b50 x10: 0000000000000000 [ 265.951911] x9 : ffff8000115d0888 x8 : 0000000000000000 [ 265.951914] x7 : ffff800011890b20 x6 : c0000000ffff7fff [ 265.951917] x5 : ffff80004c983930 x4 : 0000000000000001 [ 265.951919] x3 : ffffa027eec1b000 x2 : 2b78ccbbff369100 [ 265.964487] x1 : 0000000000000000 x0 : ffff800054627000 [ 265.964491] Call trace: [ 265.964494] hclgevf_cmd_uninit+0xbc/0x300 [ 265.964496] hclgevf_uninit_ae_dev+0x9c/0xe8 [ 265.964501] hnae3_unregister_ae_dev+0xb0/0x130 [ 265.964516] hns3_remove+0x34/0x88 [hns3] [ 266.009683] pci_device_remove+0x48/0xf0 [ 266.009692] device_release_driver_internal+0x114/0x1e8 [ 266.030058] device_driver_detach+0x28/0x38 [ 266.034224] unbind_store+0xd4/0x108 [ 266.037784] drv_attr_store+0x40/0x58 [ 266.041435] sysfs_kf_write+0x54/0x80 [ 266.045081] kernfs_fop_write+0x12c/0x250 [ 266.049076] vfs_write+0xc4/0x248 [ 266.052378] ksys_write+0x74/0xf8 [ 266.055677] __arm64_sys_write+0x24/0x30 [ 266.059584] el0_svc_common.constprop.3+0x84/0x270 [ 266.064354] do_el0_svc+0x34/0xa0 [ 266.067658] el0_svc+0x38/0x40 [ 266.070700] el0_sync_handler+0x8c/0xb0 [ 266.074519] el0_sync+0x140/0x180 It looks like the BAR memory region had already been unmapped before we start clearing CMDQ registers in it, which is pretty bad and the kernel happily kills itself because of a Current EL Data Abort (on arm64). Moving the CMDQ uninitialization a bit early fixes the issue for me. Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR") Signed-off-by: Zenghui Yu Link: https://lore.kernel.org/r/20201023051550.793-1-yuzenghui@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 7d7e712691b9..9b09dd95e878 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2782,8 +2782,8 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) hclgevf_uninit_msi(hdev); } - hclgevf_pci_uninit(hdev); hclgevf_cmd_uninit(hdev); + hclgevf_pci_uninit(hdev); } static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) From aa4dba4e222693fc4c44ce18643f640e6e30bd9f Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 26 Oct 2020 00:18:19 -0400 Subject: [PATCH 26/49] bnxt_en: Re-write PCI BARs after PCI fatal error. [ Upstream commit f75d9a0aa96721d20011cd5f8c7a24eb32728589 ] When a PCIe fatal error occurs, the internal latched BAR addresses in the chip get reset even though the BAR register values in config space are retained. pci_restore_state() will not rewrite the BAR addresses if the BAR address values are valid, causing the chip's internal BAR addresses to stay invalid. So we need to zero the BAR registers during PCIe fatal error to force pci_restore_state() to restore the BAR addresses. These write cycles to the BAR registers will cause the proper BAR addresses to latch internally. Fixes: 6316ea6db93d ("bnxt_en: Enable AER support.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9acef18e04a7..8903ab7e0006 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12049,6 +12049,9 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_DISCONNECT; } + if (state == pci_channel_io_frozen) + set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state); + if (netif_running(netdev)) bnxt_close(netdev); @@ -12072,7 +12075,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); - int err = 0; + int err = 0, off; pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; netdev_info(bp->dev, "PCI Slot Reset\n"); @@ -12084,6 +12087,20 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) "Cannot re-enable PCI device after reset.\n"); } else { pci_set_master(pdev); + /* Upon fatal error, our device internal logic that latches to + * BAR value is getting reset and will restore only upon + * rewritting the BARs. + * + * As pci_restore_state() does not re-write the BARs if the + * value is same as saved value earlier, driver needs to + * write the BARs to 0 to force restore, in case of fatal error. + */ + if (test_and_clear_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, + &bp->state)) { + for (off = PCI_BASE_ADDRESS_0; + off <= PCI_BASE_ADDRESS_5; off += 4) + pci_write_config_dword(bp->pdev, off, 0); + } pci_restore_state(pdev); pci_save_state(pdev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d2dd852d27da..510ff01bdad8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1627,6 +1627,7 @@ struct bnxt { #define BNXT_STATE_IN_FW_RESET 4 #define BNXT_STATE_ABORT_ERR 5 #define BNXT_STATE_FW_FATAL_COND 6 +#define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 #define BNXT_NO_FW_ACCESS(bp) \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ From b1b5efe574cddc2aebf7bd0fc6bd2fc286df3098 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 26 Oct 2020 00:18:17 -0400 Subject: [PATCH 27/49] bnxt_en: Fix regression in workqueue cleanup logic in bnxt_remove_one(). [ Upstream commit 21d6a11e2cadfb8446265a3efff0e2aad206e15e ] A recent patch has moved the workqueue cleanup logic before calling unregister_netdev() in bnxt_remove_one(). This caused a regression because the workqueue can be restarted if the device is still open. Workqueue cleanup must be done after unregister_netdev(). The workqueue will not restart itself after the device is closed. Call bnxt_cancel_sp_work() after unregister_netdev() and call bnxt_dl_fw_reporters_destroy() after that. This fixes the regession and the original NULL ptr dereference issue. Fixes: b16939b59cc0 ("bnxt_en: Fix NULL ptr dereference crash in bnxt_fw_reset_task()") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8903ab7e0006..d9a2c61371a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11396,13 +11396,14 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) bnxt_sriov_disable(bp); + pci_disable_pcie_error_reporting(pdev); + unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + /* Flush any pending tasks */ bnxt_cancel_sp_work(bp); bp->sp_event = 0; bnxt_dl_fw_reporters_destroy(bp, true); - pci_disable_pcie_error_reporting(pdev); - unregister_netdev(dev); bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); From b8321829036ff93e98de67eb6391e4aa6cccfef8 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 26 Oct 2020 00:18:18 -0400 Subject: [PATCH 28/49] bnxt_en: Invoke cancel_delayed_work_sync() for PFs also. [ Upstream commit 631ce27a3006fc0b732bfd589c6df505f62eadd9 ] As part of the commit b148bb238c02 ("bnxt_en: Fix possible crash in bnxt_fw_reset_task()."), cancel_delayed_work_sync() is called only for VFs to fix a possible crash by cancelling any pending delayed work items. It was assumed by mistake that the flush_workqueue() call on the PF would flush delayed work items as well. As flush_workqueue() does not cancel the delayed workqueue, extend the fix for PFs. This fix will avoid the system crash, if there are any pending delayed work items in fw_reset_task() during driver's .remove() call. Unify the workqueue cleanup logic for both PF and VF by calling cancel_work_sync() and cancel_delayed_work_sync() directly in bnxt_remove_one(). Fixes: b148bb238c02 ("bnxt_en: Fix possible crash in bnxt_fw_reset_task().") Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d9a2c61371a2..cdd3764760ed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1160,16 +1160,6 @@ static void bnxt_queue_sp_work(struct bnxt *bp) schedule_work(&bp->sp_task); } -static void bnxt_cancel_sp_work(struct bnxt *bp) -{ - if (BNXT_PF(bp)) { - flush_workqueue(bnxt_pf_wq); - } else { - cancel_work_sync(&bp->sp_task); - cancel_delayed_work_sync(&bp->fw_reset_task); - } -} - static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { if (!rxr->bnapi->in_reset) { @@ -11400,7 +11390,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); /* Flush any pending tasks */ - bnxt_cancel_sp_work(bp); + cancel_work_sync(&bp->sp_task); + cancel_delayed_work_sync(&bp->fw_reset_task); bp->sp_event = 0; bnxt_dl_fw_reporters_destroy(bp, true); From 95daf621291c1203d9d2cebb119d0cd2d53898f9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 11 Aug 2020 15:00:20 +0800 Subject: [PATCH 29/49] erofs: avoid duplicated permission check for "trusted." xattrs commit d578b46db69d125a654f509bdc9091d84e924dc8 upstream. Don't recheck it since xattr_permission() already checks CAP_SYS_ADMIN capability. Just follow 5d3ce4f70172 ("f2fs: avoid duplicated permission check for "trusted." xattrs") Reported-by: Hongyu Jin [ Gao Xiang: since it could cause some complex Android overlay permission issue as well on android-5.4+, it'd be better to backport to 5.4+ rather than pure cleanup on mainline. ] Cc: # 5.4+ Link: https://lore.kernel.org/r/20200811070020.6339-1-hsiangkao@redhat.com Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- fs/erofs/xattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index b766c3ee5fa8..503bea20cde2 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -473,8 +473,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case EROFS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; break; case EROFS_XATTR_INDEX_SECURITY: break; From 47a4d5406389c819f01c4f84bfeacf697626954d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 8 Sep 2020 16:47:36 -0500 Subject: [PATCH 30/49] arch/x86/amd/ibs: Fix re-arming IBS Fetch commit 221bfce5ebbdf72ff08b3bf2510ae81058ee568b upstream. Stephane Eranian found a bug in that IBS' current Fetch counter was not being reset when the driver would write the new value to clear it along with the enable bit set, and found that adding an MSR write that would first disable IBS Fetch would make IBS Fetch reset its current count. Indeed, the PPR for AMD Family 17h Model 31h B0 55803 Rev 0.54 - Sep 12, 2019 states "The periodic fetch counter is set to IbsFetchCnt [...] when IbsFetchEn is changed from 0 to 1." Explicitly set IbsFetchEn to 0 and then to 1 when re-enabling IBS Fetch, so the driver properly resets the internal counter to 0 and IBS Fetch starts counting again. A family 15h machine tested does not have this problem, and the extra wrmsr is also not needed on Family 19h, so only do the extra wrmsr on families 16h through 18h. Reported-by: Stephane Eranian Signed-off-by: Kim Phillips [peterz: optimized] Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 26c36357c4c9..a023cbe21230 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -89,6 +89,7 @@ struct perf_ibs { u64 max_period; unsigned long offset_mask[1]; int offset_max; + unsigned int fetch_count_reset_broken : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -363,7 +364,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 tmp = hwc->config | config; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + + wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); } /* @@ -733,6 +739,13 @@ static __init void perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { From 7aae7466f5dbb310cbef7d6fbb0073b4753b78e8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 25 Sep 2020 16:07:51 +0200 Subject: [PATCH 31/49] x86/xen: disable Firmware First mode for correctable memory errors commit d759af38572f97321112a0852353613d18126038 upstream. When running as Xen dom0 the kernel isn't responsible for selecting the error handling mode, this should be handled by the hypervisor. So disable setting FF mode when running as Xen pv guest. Not doing so might result in boot splats like: [ 7.509696] HEST: Enabling Firmware First mode for corrected errors. [ 7.510382] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 2. [ 7.510383] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 3. [ 7.510384] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 4. [ 7.510384] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 5. [ 7.510385] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 6. [ 7.510386] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 7. [ 7.510386] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 8. Reason is that the HEST ACPI table contains the real number of MCA banks, while the hypervisor is emulating only 2 banks for guests. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200925140751.31381-1-jgross@suse.com Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten_pv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 6d4d8a5700b7..3e66feff524a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1382,6 +1382,15 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_init.mpparse.get_smp_config = x86_init_uint_noop; xen_boot_params_init_edd(); + +#ifdef CONFIG_ACPI + /* + * Disable selecting "Firmware First mode" for correctable + * memory errors, as this is the duty of the hypervisor to + * decide. + */ + acpi_disable_cmcff = 1; +#endif } if (!boot_params.screen_info.orig_video_isVGA) From 4e1a23779bdec8099735ff3a6a69347e09f8660c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 9 Oct 2020 10:42:44 +0200 Subject: [PATCH 32/49] ata: ahci: mvebu: Make SATA PHY optional for Armada 3720 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 45aefe3d2251e4e229d7662052739f96ad1d08d9 upstream. Older ATF does not provide SMC call for SATA phy power on functionality and therefore initialization of ahci_mvebu is failing when older version of ATF is using. In this case phy_power_on() function returns -EOPNOTSUPP. This patch adds a new hflag AHCI_HFLAG_IGN_NOTSUPP_POWER_ON which cause that ahci_platform_enable_phys() would ignore -EOPNOTSUPP errors from phy_power_on() call. It fixes initialization of ahci_mvebu on Espressobin boards where is older Marvell's Arm Trusted Firmware without SMC call for SATA phy power. This is regression introduced in commit 8e18c8e58da64 ("arm64: dts: marvell: armada-3720-espressobin: declare SATA PHY property") where SATA phy was defined and therefore ahci_platform_enable_phys() on Espressobin started failing. Fixes: 8e18c8e58da64 ("arm64: dts: marvell: armada-3720-espressobin: declare SATA PHY property") Signed-off-by: Pali Rohár Tested-by: Tomasz Maciej Nowak Cc: # 5.1+: ea17a0f153af: phy: marvell: comphy: Convert internal SMCC firmware return codes to errno Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.h | 2 ++ drivers/ata/ahci_mvebu.c | 2 +- drivers/ata/libahci_platform.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 3dbf398c92ea..9ef62e647cd2 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -240,6 +240,8 @@ enum { as default lpm_policy */ AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during suspend/resume */ + AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = (1 << 27), /* ignore -EOPNOTSUPP + from phy_power_on() */ /* ap->flags bits */ diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index d4bba3ace45d..3ad46d26d9d5 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -227,7 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { .plat_config = ahci_mvebu_armada_3700_config, - .flags = AHCI_HFLAG_SUSPEND_PHYS, + .flags = AHCI_HFLAG_SUSPEND_PHYS | AHCI_HFLAG_IGN_NOTSUPP_POWER_ON, }; static const struct of_device_id ahci_mvebu_of_match[] = { diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 129556fcf6be..a1cbb894e5f0 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -59,7 +59,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) } rc = phy_power_on(hpriv->phys[i]); - if (rc) { + if (rc && !(rc == -EOPNOTSUPP && (hpriv->flags & AHCI_HFLAG_IGN_NOTSUPP_POWER_ON))) { phy_exit(hpriv->phys[i]); goto disable_phys; } From 860448e73ba2831ec90570520ed7f39943f444f5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 18 Sep 2020 10:36:50 +0200 Subject: [PATCH 33/49] fuse: fix page dereference after free commit d78092e4937de9ce55edcb4ee4c5e3c707be0190 upstream. After unlock_request() pages from the ap->pages[] array may be put (e.g. by aborting the connection) and the pages can be freed. Prevent use after free by grabbing a reference to the page before calling unlock_request(). The original patch was created by Pradeep P V K. Reported-by: Pradeep P V K Cc: Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f9022b702875..40c262b3f9ff 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -786,15 +786,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; + get_page(oldpage); err = unlock_request(cs->req); if (err) - return err; + goto out_put_old; fuse_copy_finish(cs); err = pipe_buf_confirm(cs->pipe, buf); if (err) - return err; + goto out_put_old; BUG_ON(!cs->nr_segs); cs->currbuf = buf; @@ -834,7 +835,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); if (err) { unlock_page(newpage); - return err; + goto out_put_old; } get_page(newpage); @@ -853,14 +854,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (err) { unlock_page(newpage); put_page(newpage); - return err; + goto out_put_old; } unlock_page(oldpage); + /* Drop ref for ap->pages[] array */ put_page(oldpage); cs->len = 0; - return 0; + err = 0; +out_put_old: + /* Drop ref obtained in this function */ + put_page(oldpage); + return err; out_fallback_unlock: unlock_page(newpage); @@ -869,10 +875,10 @@ out_fallback: cs->offset = buf->offset; err = lock_request(cs->req); - if (err) - return err; + if (!err) + err = 1; - return 1; + goto out_put_old; } static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, @@ -884,14 +890,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, if (cs->nr_segs == cs->pipe->buffers) return -EIO; + get_page(page); err = unlock_request(cs->req); - if (err) + if (err) { + put_page(page); return err; + } fuse_copy_finish(cs); buf = cs->pipebufs; - get_page(page); buf->page = page; buf->offset = offset; buf->len = count; From dd2f800e907493c6075ed97b1b9be42dd1df6052 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 10 Sep 2020 13:33:14 -0700 Subject: [PATCH 34/49] bpf: Fix comment for helper bpf_current_task_under_cgroup() commit 1aef5b4391f0c75c0a1523706a7b0311846ee12f upstream. This should be "current" not "skb". Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Cc: Link: https://lore.kernel.org/bpf/20200910203314.70018-1-songliubraving@fb.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bpf.h | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77c6be96d676..8649422e760c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1294,8 +1294,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77c6be96d676..8649422e760c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1294,8 +1294,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) From 801863f634c4d0614d5d107876a7bf577a5cfb66 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Fri, 4 Sep 2020 11:23:30 +0200 Subject: [PATCH 35/49] evm: Check size of security.evm before using it commit 455b6c9112eff8d249e32ba165742085678a80a4 upstream. This patch checks the size for the EVM_IMA_XATTR_DIGSIG and EVM_XATTR_PORTABLE_DIGSIG types to ensure that the algorithm is read from the buffer returned by vfs_getxattr_alloc(). Cc: stable@vger.kernel.org # 4.19.x Fixes: 5feeb61183dde ("evm: Allow non-SHA1 digital signatures") Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/evm/evm_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index a2c393385db0..615094eda36d 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -183,6 +183,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, break; case EVM_IMA_XATTR_DIGSIG: case EVM_XATTR_PORTABLE_DIGSIG: + /* accept xattr with non-empty signature field */ + if (xattr_len <= sizeof(struct signature_v2_hdr)) { + evm_status = INTEGRITY_FAIL; + goto out; + } + hdr = (struct signature_v2_hdr *)xattr_data; digest.hdr.algo = hdr->hash_algo; rc = evm_calc_hash(dentry, xattr_name, xattr_value, From 125a229e52e7ef265b2760c7050ebf81e6570775 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 2 Aug 2020 21:29:49 +0800 Subject: [PATCH 36/49] p54: avoid accessing the data mapped to streaming DMA commit 478762855b5ae9f68fa6ead1edf7abada70fcd5f upstream. In p54p_tx(), skb->data is mapped to streaming DMA on line 337: mapping = pci_map_single(..., skb->data, ...); Then skb->data is accessed on line 349: desc->device_addr = ((struct p54_hdr *)skb->data)->req_id; This access may cause data inconsistency between CPU cache and hardware. To fix this problem, ((struct p54_hdr *)skb->data)->req_id is stored in a local variable before DMA mapping, and then the driver accesses this local variable instead of skb->data. Cc: Signed-off-by: Jia-Ju Bai Acked-by: Christian Lamparter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200802132949.26788-1-baijiaju@tsinghua.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intersil/p54/p54pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c index 80ad0b7eaef4..f8c6027cab6b 100644 --- a/drivers/net/wireless/intersil/p54/p54pci.c +++ b/drivers/net/wireless/intersil/p54/p54pci.c @@ -329,10 +329,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) struct p54p_desc *desc; dma_addr_t mapping; u32 idx, i; + __le32 device_addr; spin_lock_irqsave(&priv->lock, flags); idx = le32_to_cpu(ring_control->host_idx[1]); i = idx % ARRAY_SIZE(ring_control->tx_data); + device_addr = ((struct p54_hdr *)skb->data)->req_id; mapping = pci_map_single(priv->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); @@ -346,7 +348,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) desc = &ring_control->tx_data[i]; desc->host_addr = cpu_to_le32(mapping); - desc->device_addr = ((struct p54_hdr *)skb->data)->req_id; + desc->device_addr = device_addr; desc->len = cpu_to_le16(skb->len); desc->flags = 0; From ebb0adcfbb1fc1eeb924bb0512d6a7e3509083ee Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 7 Apr 2020 13:56:01 +0200 Subject: [PATCH 37/49] cxl: Rework error message for incompatible slots commit 40ac790d99c6dd16b367d5c2339e446a5f1b0593 upstream. Improve the error message shown if a capi adapter is plugged on a capi-incompatible slot directly under the PHB (no intermediate switch). Fixes: 5632874311db ("cxl: Add support for POWER9 DD2") Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200407115601.25453-1-fbarrat@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 25a9dd9c0c1b..2ba899f5659f 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -393,8 +393,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, *capp_unit_id = get_capp_unit_id(np, *phb_index); of_node_put(np); if (!*capp_unit_id) { - pr_err("cxl: invalid capp unit id (phb_index: %d)\n", - *phb_index); + pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", + *chipid, *phb_index); return -ENODEV; } From bc67eeb9781bce842a9fcf4bc9c72d17956d468a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Sep 2020 10:20:07 +0300 Subject: [PATCH 38/49] RDMA/addr: Fix race with netevent_callback()/rdma_addr_cancel() commit 2ee9bf346fbfd1dad0933b9eb3a4c2c0979b633e upstream. This three thread race can result in the work being run once the callback becomes NULL: CPU1 CPU2 CPU3 netevent_callback() process_one_req() rdma_addr_cancel() [..] spin_lock_bh() set_timeout() spin_unlock_bh() spin_lock_bh() list_del_init(&req->list); spin_unlock_bh() req->callback = NULL spin_lock_bh() if (!list_empty(&req->list)) // Skipped! // cancel_delayed_work(&req->work); spin_unlock_bh() process_one_req() // again req->callback() // BOOM cancel_delayed_work_sync() The solution is to always cancel the work once it is completed so any in between set_timeout() does not result in it running again. Cc: stable@vger.kernel.org Fixes: 44e75052bc2a ("RDMA/rdma_cm: Make rdma_addr_cancel into a fence") Link: https://lore.kernel.org/r/20200930072007.1009692-1-leon@kernel.org Reported-by: Dan Aloni Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1753a9801b70..8beed4197e73 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -645,13 +645,12 @@ static void process_one_req(struct work_struct *_work) req->callback = NULL; spin_lock_bh(&lock); + /* + * Although the work will normally have been canceled by the workqueue, + * it can still be requeued as long as it is on the req_list. + */ + cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list. - */ - cancel_delayed_work(&req->work); list_del_init(&req->list); kfree(req); } From c274d1f8baafedd565b14717fa209f8eb8e65c6a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Apr 2020 14:50:37 -0500 Subject: [PATCH 39/49] mtd: lpddr: Fix bad logic in print_drs_error commit 1c9c02bb22684f6949d2e7ddc0a3ff364fd5a6fc upstream. Update logic for broken test. Use a more common logging style. It appears the logic in this function is broken for the consecutive tests of if (prog_status & 0x3) ... else if (prog_status & 0x2) ... else (prog_status & 0x1) ... Likely the first test should be if ((prog_status & 0x3) == 0x3) Found by inspection of include files using printk. Fixes: eb3db27507f7 ("[MTD] LPDDR PFOW definition") Cc: stable@vger.kernel.org Reported-by: Joe Perches Signed-off-by: Gustavo A. R. Silva Acked-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/3fb0e29f5b601db8be2938a01d974b00c8788501.1588016644.git.gustavo@embeddedor.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/pfow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 122f3439e1af..c65d7a3be3c6 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr) if (!(dsr & DSR_AVAILABLE)) printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); - if (prog_status & 0x03) + if ((prog_status & 0x03) == 0x03) printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " "half with 41h command\n"); else if (prog_status & 0x02) From 44ef3b63c788a72266a124022569fff785d2b7c4 Mon Sep 17 00:00:00 2001 From: Paras Sharma Date: Wed, 30 Sep 2020 11:35:26 +0530 Subject: [PATCH 40/49] serial: qcom_geni_serial: To correct QUP Version detection logic commit c9ca43d42ed8d5fd635d327a664ed1d8579eb2af upstream. For QUP IP versions 2.5 and above the oversampling rate is halved from 32 to 16. Commit ce734600545f ("tty: serial: qcom_geni_serial: Update the oversampling rate") is pushed to handle this scenario. But the existing logic is failing to classify QUP Version 3.0 into the correct group ( 2.5 and above). As result Serial Engine clocks are not configured properly for baud rate and garbage data is sampled to FIFOs from the line. So, fix the logic to detect QUP with versions 2.5 and above. Fixes: ce734600545f ("tty: serial: qcom_geni_serial: Update the oversampling rate") Cc: stable Signed-off-by: Paras Sharma Reviewed-by: Akash Asthana Link: https://lore.kernel.org/r/1601445926-23673-1-git-send-email-parashar@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 2 +- include/linux/qcom-geni-se.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 0b184256034f..9b148f78323f 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -970,7 +970,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, sampling_rate = UART_OVERSAMPLING; /* Sampling rate is halved for IP versions >= 2.5 */ ver = geni_se_get_qup_hw_version(&port->se); - if (GENI_SE_VERSION_MAJOR(ver) >= 2 && GENI_SE_VERSION_MINOR(ver) >= 5) + if (ver >= QUP_SE_VERSION_2_5) sampling_rate /= 2; clk_rate = get_clk_div_rate(baud, sampling_rate, &clk_div); diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index dd464943f717..5b90eff50bf6 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -229,6 +229,9 @@ struct geni_se { #define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) #define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) +/* QUP SE VERSION value for major number 2 and minor number 5 */ +#define QUP_SE_VERSION_2_5 0x20050000 + #if IS_ENABLED(CONFIG_QCOM_GENI_SE) u32 geni_se_get_qup_hw_version(struct geni_se *se); From 870d910e1afb196fa7558e6de41ea749e7605638 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Sep 2020 13:04:32 +0100 Subject: [PATCH 41/49] serial: pl011: Fix lockdep splat when handling magic-sysrq interrupt commit 534cf755d9df99e214ddbe26b91cd4d81d2603e2 upstream. Issuing a magic-sysrq via the PL011 causes the following lockdep splat, which is easily reproducible under QEMU: | sysrq: Changing Loglevel | sysrq: Loglevel set to 9 | | ====================================================== | WARNING: possible circular locking dependency detected | 5.9.0-rc7 #1 Not tainted | ------------------------------------------------------ | systemd-journal/138 is trying to acquire lock: | ffffab133ad950c0 (console_owner){-.-.}-{0:0}, at: console_lock_spinning_enable+0x34/0x70 | | but task is already holding lock: | ffff0001fd47b098 (&port_lock_key){-.-.}-{2:2}, at: pl011_int+0x40/0x488 | | which lock already depends on the new lock. [...] | Possible unsafe locking scenario: | | CPU0 CPU1 | ---- ---- | lock(&port_lock_key); | lock(console_owner); | lock(&port_lock_key); | lock(console_owner); | | *** DEADLOCK *** The issue being that CPU0 takes 'port_lock' on the irq path in pl011_int() before taking 'console_owner' on the printk() path, whereas CPU1 takes the two locks in the opposite order on the printk() path due to setting the "console_owner" prior to calling into into the actual console driver. Fix this in the same way as the msm-serial driver by dropping 'port_lock' before handling the sysrq. Cc: # 4.19+ Cc: Russell King Cc: Greg Kroah-Hartman Cc: Jiri Slaby Link: https://lore.kernel.org/r/20200811101313.GA6970@willie-the-truck Signed-off-by: Peter Zijlstra Tested-by: Will Deacon Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20200930120432.16551-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index a8a538b34b53..16720c97a4dd 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -313,8 +313,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap, */ static int pl011_fifo_to_tty(struct uart_amba_port *uap) { - u16 status; unsigned int ch, flag, fifotaken; + int sysrq; + u16 status; for (fifotaken = 0; fifotaken != 256; fifotaken++) { status = pl011_read(uap, REG_FR); @@ -349,10 +350,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) flag = TTY_FRAME; } - if (uart_handle_sysrq_char(&uap->port, ch & 255)) - continue; + spin_unlock(&uap->port.lock); + sysrq = uart_handle_sysrq_char(&uap->port, ch & 255); + spin_lock(&uap->port.lock); - uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); + if (!sysrq) + uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); } return fifotaken; From 9f531583c1f06c1b374e79654be65a511e669a25 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 18 Sep 2020 19:55:18 +0300 Subject: [PATCH 42/49] PM: runtime: Fix timer_expires data type on 32-bit arches commit 6b61d49a55796dbbc479eeb4465e59fd656c719c upstream. Commit 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") switched PM runtime autosuspend to use hrtimers and all related time accounting in ns, but missed to update the timer_expires data type in struct dev_pm_info to u64. This causes the timer_expires value to be truncated on 32-bit architectures when assignment is done from u64 values: rpm_suspend() |- dev->power.timer_expires = expires; Fix it by changing the timer_expires type to u64. Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") Signed-off-by: Grygorii Strashko Acked-by: Pavel Machek Acked-by: Vincent Guittot Cc: 5.0+ # 5.0+ [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index 4c441be03079..c1d21e9a864f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -598,7 +598,7 @@ struct dev_pm_info { #endif #ifdef CONFIG_PM struct hrtimer suspend_timer; - unsigned long timer_expires; + u64 timer_expires; struct work_struct work; wait_queue_head_t wait_queue; struct wake_irq *wakeirq; From 8f640cd8ee60591821224e3dcba5439d1166b48e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 17 Sep 2020 15:09:20 +0200 Subject: [PATCH 43/49] ata: sata_rcar: Fix DMA boundary mask commit df9c590986fdb6db9d5636d6cd93bc919c01b451 upstream. Before commit 9495b7e92f716ab2 ("driver core: platform: Initialize dma_parms for platform devices"), the R-Car SATA device didn't have DMA parameters. Hence the DMA boundary mask supplied by its driver was silently ignored, as __scsi_init_queue() doesn't check the return value of dma_set_seg_boundary(), and the default value of 0xffffffff was used. Now the device has gained DMA parameters, the driver-supplied value is used, and the following warning is printed on Salvator-XS: DMA-API: sata_rcar ee300000.sata: mapping sg segment across boundary [start=0x00000000ffffe000] [end=0x00000000ffffefff] [boundary=0x000000001ffffffe] WARNING: CPU: 5 PID: 38 at kernel/dma/debug.c:1233 debug_dma_map_sg+0x298/0x300 (the range of start/end values depend on whether IOMMU support is enabled or not) The issue here is that SATA_RCAR_DMA_BOUNDARY doesn't have bit 0 set, so any typical end value, which is odd, will trigger the check. Fix this by increasing the DMA boundary value by 1. This also fixes the following WRITE DMA EXT timeout issue: # dd if=/dev/urandom of=/mnt/de1/file1-1024M bs=1M count=1024 ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 frozen ata1.00: failed command: WRITE DMA EXT ata1.00: cmd 35/00:00:00:e6:0c/00:0a:00:00:00/e0 tag 0 dma 1310720 out res 40/00:01:00:00:00/00:00:00:00:00/00 Emask 0x4 (timeout) ata1.00: status: { DRDY } as seen by Shimoda-san since commit 429120f3df2dba2b ("block: fix splitting segments on boundary masks"). Fixes: 8bfbeed58665dbbf ("sata_rcar: correct 'sata_rcar_sht'") Fixes: 9495b7e92f716ab2 ("driver core: platform: Initialize dma_parms for platform devices") Fixes: 429120f3df2dba2b ("block: fix splitting segments on boundary masks") Signed-off-by: Geert Uytterhoeven Tested-by: Lad Prabhakar Tested-by: Yoshihiro Shimoda Reviewed-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Sergei Shtylyov Reviewed-by: Ulf Hansson Cc: stable Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 141ac600b64c..44b0ed8f6bb8 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -120,7 +120,7 @@ /* Descriptor table word 0 bit (when DTA32M = 1) */ #define SATA_RCAR_DTEND BIT(0) -#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL +#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL /* Gen2 Physical Layer Control Registers */ #define RCAR_GEN2_PHY_CTL1_REG 0x1704 From 29bbc9cb0b27d066e3dc98df62e118ccdf413de9 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sun, 6 Sep 2020 12:21:53 +0530 Subject: [PATCH 44/49] xen/gntdev.c: Mark pages as dirty commit 779055842da5b2e508f3ccf9a8153cb1f704f566 upstream. There seems to be a bug in the original code when gntdev_get_page() is called with writeable=true then the page needs to be marked dirty before being put. To address this, a bool writeable is added in gnt_dev_copy_batch, set it in gntdev_grant_copy_seg() (and drop `writeable` argument to gntdev_get_page()) and then, based on batch->writeable, use set_page_dirty_lock(). Fixes: a4cdb556cae0 (xen/gntdev: add ioctl for grant copy) Suggested-by: Boris Ostrovsky Signed-off-by: Souptick Joarder Cc: John Hubbard Cc: Boris Ostrovsky Cc: Juergen Gross Cc: David Vrabel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1599375114-32360-1-git-send-email-jrdr.linux@gmail.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 81401f386c9c..246864bbb3e4 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -831,17 +831,18 @@ struct gntdev_copy_batch { s16 __user *status[GNTDEV_COPY_BATCH]; unsigned int nr_ops; unsigned int nr_pages; + bool writeable; }; static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, - bool writeable, unsigned long *gfn) + unsigned long *gfn) { unsigned long addr = (unsigned long)virt; struct page *page; unsigned long xen_pfn; int ret; - ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page); + ret = get_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page); if (ret < 0) return ret; @@ -857,9 +858,13 @@ static void gntdev_put_pages(struct gntdev_copy_batch *batch) { unsigned int i; - for (i = 0; i < batch->nr_pages; i++) + for (i = 0; i < batch->nr_pages; i++) { + if (batch->writeable && !PageDirty(batch->pages[i])) + set_page_dirty_lock(batch->pages[i]); put_page(batch->pages[i]); + } batch->nr_pages = 0; + batch->writeable = false; } static int gntdev_copy(struct gntdev_copy_batch *batch) @@ -948,8 +953,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, virt = seg->source.virt + copied; off = (unsigned long)virt & ~XEN_PAGE_MASK; len = min(len, (size_t)XEN_PAGE_SIZE - off); + batch->writeable = false; - ret = gntdev_get_page(batch, virt, false, &gfn); + ret = gntdev_get_page(batch, virt, &gfn); if (ret < 0) return ret; @@ -967,8 +973,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, virt = seg->dest.virt + copied; off = (unsigned long)virt & ~XEN_PAGE_MASK; len = min(len, (size_t)XEN_PAGE_SIZE - off); + batch->writeable = true; - ret = gntdev_get_page(batch, virt, true, &gfn); + ret = gntdev_get_page(batch, virt, &gfn); if (ret < 0) return ret; From f73328c3192eb4b93d62673cdfb6c0da502a9d71 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:17:40 +0200 Subject: [PATCH 45/49] crypto: x86/crc32c - fix building with clang ias commit 44623b2818f4a442726639572f44fd9b6d0ef68c upstream. The clang integrated assembler complains about movzxw: arch/x86/crypto/crc32c-pcl-intel-asm_64.S:173:2: error: invalid instruction mnemonic 'movzxw' It seems that movzwq is the mnemonic that it expects instead, and this is what objdump prints when disassembling the file. Fixes: 6a8ce1ef3940 ("crypto: crc32c - Optimize CRC32C calculation with PCLMULQDQ instruction") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Herbert Xu [jc: Fixed conflicts due to lack of 34fdce6981b9 ("x86: Change {JMP,CALL}_NOSPEC argument")] Signed-off-by: Jian Cai Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index d9b734d0c8cc..3c6e01520a97 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -170,7 +170,7 @@ continue_block: ## branch into array lea jump_table(%rip), bufp - movzxw (bufp, %rax, 2), len + movzwq (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp JMP_NOSPEC bufp From a6db3aab9c408e1b788c43f9fb179382f5793ea2 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 3 Sep 2020 05:54:40 +0900 Subject: [PATCH 46/49] openrisc: Fix issue with get_user for 64-bit values commit d877322bc1adcab9850732275670409e8bcca4c4 upstream. A build failure was raised by kbuild with the following error. drivers/android/binder.c: Assembler messages: drivers/android/binder.c:3861: Error: unrecognized keyword/register name `l.lwz ?ap,4(r24)' drivers/android/binder.c:3866: Error: unrecognized keyword/register name `l.addi ?ap,r0,0' The issue is with 64-bit get_user() calls on openrisc. I traced this to a problem where in the internally in the get_user macros there is a cast to long __gu_val this causes GCC to think the get_user call is 32-bit. This binder code is really long and GCC allocates register r30, which triggers the issue. The 64-bit get_user asm tries to get the 64-bit pair register, which for r30 overflows the general register names and returns the dummy register ?ap. The fix here is to move the temporary variables into the asm macros. We use a 32-bit __gu_tmp for 32-bit and smaller macro and a 64-bit tmp in the 64-bit macro. The cast in the 64-bit macro has a trick of casting through __typeof__((x)-(x)) which avoids the below warning. This was barrowed from riscv. arch/openrisc/include/asm/uaccess.h:240:8: warning: cast to pointer from integer of different size I tested this in a small unit test to check reading between 64-bit and 32-bit pointers to 64-bit and 32-bit values in all combinations. Also I ran make C=1 to confirm no new sparse warnings came up. It all looks clean to me. Link: https://lore.kernel.org/lkml/202008200453.ohnhqkjQ%25lkp@intel.com/ Signed-off-by: Stafford Horne Reviewed-by: Luc Van Oostenryck Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/include/asm/uaccess.h | 35 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615f..6839f8fcf76b 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -164,19 +164,19 @@ struct __large_struct { #define __get_user_nocheck(x, ptr, size) \ ({ \ - long __gu_err, __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err; \ + __get_user_size((x), (ptr), (size), __gu_err); \ __gu_err; \ }) #define __get_user_check(x, ptr, size) \ ({ \ - long __gu_err = -EFAULT, __gu_val = 0; \ - const __typeof__(*(ptr)) * __gu_addr = (ptr); \ - if (access_ok(__gu_addr, size)) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err = -EFAULT; \ + const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + if (access_ok(__gu_addr, size)) \ + __get_user_size((x), __gu_addr, (size), __gu_err); \ + else \ + (x) = (__typeof__(*(ptr))) 0; \ __gu_err; \ }) @@ -190,11 +190,13 @@ do { \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ case 8: __get_user_asm2(x, ptr, retval); break; \ - default: (x) = __get_user_bad(); \ + default: (x) = (__typeof__(*(ptr)))__get_user_bad(); \ } \ } while (0) #define __get_user_asm(x, addr, err, op) \ +{ \ + unsigned long __gu_tmp; \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ "2:\n" \ @@ -208,10 +210,14 @@ do { \ " .align 2\n" \ " .long 1b,3b\n" \ ".previous" \ - : "=r"(err), "=r"(x) \ - : "r"(addr), "i"(-EFAULT), "0"(err)) + : "=r"(err), "=r"(__gu_tmp) \ + : "r"(addr), "i"(-EFAULT), "0"(err)); \ + (x) = (__typeof__(*(addr)))__gu_tmp; \ +} #define __get_user_asm2(x, addr, err) \ +{ \ + unsigned long long __gu_tmp; \ __asm__ __volatile__( \ "1: l.lwz %1,0(%2)\n" \ "2: l.lwz %H1,4(%2)\n" \ @@ -228,8 +234,11 @@ do { \ " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ - : "=r"(err), "=&r"(x) \ - : "r"(addr), "i"(-EFAULT), "0"(err)) + : "=r"(err), "=&r"(__gu_tmp) \ + : "r"(addr), "i"(-EFAULT), "0"(err)); \ + (x) = (__typeof__(*(addr)))( \ + (__typeof__((x)-(x)))__gu_tmp); \ +} /* more complex routines */ From aa3410cc232cec0c10a15fe04ae4353eb3c1a43b Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Mon, 24 Aug 2020 11:00:06 +0800 Subject: [PATCH 47/49] misc: rtsx: do not setting OC_POWER_DOWN reg in rtsx_pci_init_ocp() commit 551b6729578a8981c46af964c10bf7d5d9ddca83 upstream. this power saving action in rtsx_pci_init_ocp() cause INTEL-NUC6 platform missing card reader Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20200824030006.30033-1-ricky_wu@realtek.com Cc: Chris Clayton Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_pcr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 4fd57052ddd3..4c707d8dc3eb 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1189,10 +1189,6 @@ void rtsx_pci_init_ocp(struct rtsx_pcr *pcr) rtsx_pci_write_register(pcr, REG_OCPGLITCH, SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch); rtsx_pci_enable_ocp(pcr); - } else { - /* OC power down */ - rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, - OC_POWER_DOWN); } } } From 847c86d7f1d5d374f0c8483f7dfe167b89dca301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 2 Sep 2020 16:43:43 +0200 Subject: [PATCH 48/49] phy: marvell: comphy: Convert internal SMCC firmware return codes to errno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ea17a0f153af2cd890e4ce517130dcccaa428c13 upstream. Driver ->power_on and ->power_off callbacks leaks internal SMCC firmware return codes to phy caller. This patch converts SMCC error codes to standard linux errno codes. Include file linux/arm-smccc.h already provides defines for SMCC error codes, so use them instead of custom driver defines. Note that return value is signed 32bit, but stored in unsigned long type with zero padding. Tested-by: Tomasz Maciej Nowak Link: https://lore.kernel.org/r/20200902144344.16684-2-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 14 +++++++++++--- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 14 +++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 1a138be8bd6a..810f25a47632 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -26,7 +26,6 @@ #define COMPHY_SIP_POWER_ON 0x82000001 #define COMPHY_SIP_POWER_OFF 0x82000002 #define COMPHY_SIP_PLL_LOCK 0x82000003 -#define COMPHY_FW_NOT_SUPPORTED (-1) #define COMPHY_FW_MODE_SATA 0x1 #define COMPHY_FW_MODE_SGMII 0x2 @@ -112,10 +111,19 @@ static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane, unsigned long mode) { struct arm_smccc_res res; + s32 ret; arm_smccc_smc(function, lane, mode, 0, 0, 0, 0, 0, &res); + ret = res.a0; - return res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return 0; + case SMCCC_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + default: + return -EINVAL; + } } static int mvebu_a3700_comphy_get_fw_mode(int lane, int port, @@ -220,7 +228,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy) } ret = mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_ON, lane->id, fw_param); - if (ret == COMPHY_FW_NOT_SUPPORTED) + if (ret == -EOPNOTSUPP) dev_err(lane->dev, "unsupported SMC call, try updating your firmware\n"); diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index e3b87c94aaf6..849351b4805f 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -123,7 +123,6 @@ #define COMPHY_SIP_POWER_ON 0x82000001 #define COMPHY_SIP_POWER_OFF 0x82000002 -#define COMPHY_FW_NOT_SUPPORTED (-1) /* * A lane is described by the following bitfields: @@ -273,10 +272,19 @@ static int mvebu_comphy_smc(unsigned long function, unsigned long phys, unsigned long lane, unsigned long mode) { struct arm_smccc_res res; + s32 ret; arm_smccc_smc(function, phys, lane, mode, 0, 0, 0, 0, &res); + ret = res.a0; - return res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return 0; + case SMCCC_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + default: + return -EINVAL; + } } static int mvebu_comphy_get_mode(bool fw_mode, int lane, int port, @@ -819,7 +827,7 @@ static int mvebu_comphy_power_on(struct phy *phy) if (!ret) return ret; - if (ret == COMPHY_FW_NOT_SUPPORTED) + if (ret == -EOPNOTSUPP) dev_err(priv->dev, "unsupported SMC call, try updating your firmware\n"); From b300b28b78145b832f1112d77035111e35112cec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 1 Nov 2020 12:01:07 +0100 Subject: [PATCH 49/49] Linux 5.4.74 Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/CA+G9fYtw23F_PuCjiyVXz4464PsjcTSsL1jgvPP6D9xoZWZU7A@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f9a8d76a693e..3be5a9c352b9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 73 +SUBLEVEL = 74 EXTRAVERSION = NAME = Kleptomaniac Octopus