From 81d152c8daf835af0cf55b3ce3dd1449b4fcf88e Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Tue, 5 May 2020 17:43:14 +0200 Subject: [PATCH 01/61] ext4: fix EXT4_MAX_LOGICAL_BLOCK macro commit 175efa81feb8405676e0136d97b10380179c92e0 upstream. ext4 supports max number of logical blocks in a file to be 0xffffffff. (This is since ext4_extent's ee_block is __le32). This means that EXT4_MAX_LOGICAL_BLOCK should be 0xfffffffe (starting from 0 logical offset). This patch fixes this. The issue was seen when ext4 moved to iomap_fiemap API and when overlayfs was mounted on top of ext4. Since overlayfs was missing filemap_check_ranges(), so it could pass a arbitrary huge length which lead to overflow of map.m_len logic. This patch fixes that. Fixes: d3b6f23f7167 ("ext4: move ext4_fiemap to use iomap framework") Reported-by: syzbot+77fa5bdb65cc39711820@syzkaller.appspotmail.com Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200505154324.3226743-2-hch@lst.de Signed-off-by: Theodore Ts'o Signed-off-by: George Kennedy Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf3eaa903033..ae2cb15d9540 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -718,7 +718,7 @@ enum { #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF /* Max logical block we can support */ -#define EXT4_MAX_LOGICAL_BLOCK 0xFFFFFFFF +#define EXT4_MAX_LOGICAL_BLOCK 0xFFFFFFFE /* * Structure of an inode on the disk From 172b91bbbb49f180e60e206eb85a45b8462e0dc0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Jun 2021 16:18:25 +0200 Subject: [PATCH 02/61] x86/fpu: Make init_fpstate correct with optimized XSAVE commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream. The XSAVE init code initializes all enabled and supported components with XRSTOR(S) to init state. Then it XSAVEs the state of the components back into init_fpstate which is used in several places to fill in the init state of components. This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because those use the init optimization and skip writing state of components which are in init state. So init_fpstate.xsave still contains all zeroes after this operation. There are two ways to solve that: 1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when XSAVES is enabled because XSAVES uses compacted format. 2) Save the components which are known to have a non-zero init state by other means. Looking deeper, #2 is the right thing to do because all components the kernel supports have all-zeroes init state except the legacy features (FP, SSE). Those cannot be hard coded because the states are not identical on all CPUs, but they can be saved with FXSAVE which avoids all conditionals. Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with a BUILD_BUG_ON() which reminds developers to validate that a newly added component has all zeroes init state. As a bonus remove the now unused copy_xregs_to_kernel_booting() crutch. The XSAVE and reshuffle method can still be implemented in the unlikely case that components are added which have a non-zero init state and no other means to save them. For now, FXSAVE is just simple and good enough. [ bp: Fix a typo or two in the text. ] Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fpu/internal.h | 30 ++++++----------------- arch/x86/kernel/fpu/xstate.c | 38 ++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a9d1dd82d820..03b3de491b5e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -272,28 +280,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 735d1f1bbabc..046782df37a6 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -398,6 +398,24 @@ static void __init print_xstate_offset_size(void) } } +/* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR) + /* * setup the xstate image representing the init state */ @@ -405,6 +423,8 @@ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1; + BUILD_BUG_ON(XCNTXT_MASK != XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -423,10 +443,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) From 0c049ce432b37a51a0da005314ac32e5d9324ccf Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 14 Dec 2020 19:21:14 +0200 Subject: [PATCH 03/61] ath: Use safer key clearing with key cache entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56c5485c9e444c2e85e11694b6c44f1338fc20fd upstream. It is possible for there to be pending frames in TXQs with a reference to the key cache entry that is being deleted. If such a key cache entry is cleared, those pending frame in TXQ might get transmitted without proper encryption. It is safer to leave the previously used key into the key cache in such cases. Instead, only clear the MAC address to prevent RX processing from using this key cache entry. This is needed in particularly in AP mode where the TXQs cannot be flushed on station disconnection. This change alone may not be able to address all cases where the key cache entry might get reused for other purposes immediately (the key cache entry should be released for reuse only once the TXQs do not have any remaining references to them), but this makes it less likely to get unprotected frames and the more complete changes may end up being significantly more complex. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214172118.18100-2-jouni@codeaurora.org Cc: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/key.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c index 1816b4e7dc26..59618bb41f6c 100644 --- a/drivers/net/wireless/ath/key.c +++ b/drivers/net/wireless/ath/key.c @@ -583,7 +583,16 @@ EXPORT_SYMBOL(ath_key_config); */ void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key) { - ath_hw_keyreset(common, key->hw_key_idx); + /* Leave CCMP and TKIP (main key) configured to avoid disabling + * encryption for potentially pending frames already in a TXQ with the + * keyix pointing to this key entry. Instead, only clear the MAC address + * to prevent RX processing from using this key cache entry. + */ + if (test_bit(key->hw_key_idx, common->ccmp_keymap) || + test_bit(key->hw_key_idx, common->tkip_keymap)) + ath_hw_keysetmac(common, key->hw_key_idx, NULL); + else + ath_hw_keyreset(common, key->hw_key_idx); if (key->hw_key_idx < IEEE80211_WEP_NKID) return; From add283e2517a90468ce223465e0f4360128bb650 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 14 Dec 2020 19:21:15 +0200 Subject: [PATCH 04/61] ath9k: Clear key cache explicitly on disabling hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 73488cb2fa3bb1ef9f6cf0d757f76958bd4deaca upstream. Now that ath/key.c may not be explicitly clearing keys from the key cache, clear all key cache entries when disabling hardware to make sure no keys are left behind beyond this point. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214172118.18100-3-jouni@codeaurora.org Cc: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index d14e01da3c31..eae7b7e58429 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -896,6 +896,11 @@ static void ath9k_stop(struct ieee80211_hw *hw) spin_unlock_bh(&sc->sc_pcu_lock); + /* Clear key cache entries explicitly to get rid of any potentially + * remaining keys. + */ + ath9k_cmn_init_crypto(sc->sc_ah); + ath9k_ps_restore(sc); sc->ps_idle = prev_idle; From b7d593705eb4f0655a70f0207f573fb1edb80bda Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 14 Dec 2020 19:21:16 +0200 Subject: [PATCH 05/61] ath: Export ath_hw_keysetmac() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d2d3e36498dd8e0c83ea99861fac5cf9e8671226 upstream. ath9k is going to use this for safer management of key cache entries. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214172118.18100-4-jouni@codeaurora.org Cc: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath.h | 1 + drivers/net/wireless/ath/key.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h index 7a364eca46d6..9d18105c449f 100644 --- a/drivers/net/wireless/ath/ath.h +++ b/drivers/net/wireless/ath/ath.h @@ -203,6 +203,7 @@ int ath_key_config(struct ath_common *common, struct ieee80211_sta *sta, struct ieee80211_key_conf *key); bool ath_hw_keyreset(struct ath_common *common, u16 entry); +bool ath_hw_keysetmac(struct ath_common *common, u16 entry, const u8 *mac); void ath_hw_cycle_counters_update(struct ath_common *common); int32_t ath_hw_get_listen_time(struct ath_common *common); diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c index 59618bb41f6c..cb266cf3c77c 100644 --- a/drivers/net/wireless/ath/key.c +++ b/drivers/net/wireless/ath/key.c @@ -84,8 +84,7 @@ bool ath_hw_keyreset(struct ath_common *common, u16 entry) } EXPORT_SYMBOL(ath_hw_keyreset); -static bool ath_hw_keysetmac(struct ath_common *common, - u16 entry, const u8 *mac) +bool ath_hw_keysetmac(struct ath_common *common, u16 entry, const u8 *mac) { u32 macHi, macLo; u32 unicast_flag = AR_KEYTABLE_VALID; @@ -125,6 +124,7 @@ static bool ath_hw_keysetmac(struct ath_common *common, return true; } +EXPORT_SYMBOL(ath_hw_keysetmac); static bool ath_hw_set_keycache_entry(struct ath_common *common, u16 entry, const struct ath_keyval *k, From c6feaf806da6a0deecc2fe41adb3443cdecba347 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 14 Dec 2020 19:21:17 +0200 Subject: [PATCH 06/61] ath: Modify ath_key_delete() to not need full key entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 144cd24dbc36650a51f7fe3bf1424a1432f1f480 upstream. tkip_keymap can be used internally to avoid the reference to key->cipher and with this, only the key index value itself is needed. This allows ath_key_delete() call to be postponed to be handled after the upper layer STA and key entry have already been removed. This is needed to make ath9k key cache management safer. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214172118.18100-5-jouni@codeaurora.org Cc: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath.h | 2 +- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 5 ++- drivers/net/wireless/ath/key.c | 34 +++++++++---------- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h index 9d18105c449f..f083fb9038c3 100644 --- a/drivers/net/wireless/ath/ath.h +++ b/drivers/net/wireless/ath/ath.h @@ -197,7 +197,7 @@ struct sk_buff *ath_rxbuf_alloc(struct ath_common *common, bool ath_is_mybeacon(struct ath_common *common, struct ieee80211_hdr *hdr); void ath_hw_setbssidmask(struct ath_common *common); -void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key); +void ath_key_delete(struct ath_common *common, u8 hw_key_idx); int ath_key_config(struct ath_common *common, struct ieee80211_vif *vif, struct ieee80211_sta *sta, diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 5e866a193ed0..d065600791c1 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -521,7 +521,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } break; case DISABLE_KEY: - ath_key_delete(common, key); + ath_key_delete(common, key->hw_key_idx); break; default: ret = -EINVAL; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a82ad739ab80..16a7bae62b7d 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1460,7 +1460,7 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw, } break; case DISABLE_KEY: - ath_key_delete(common, key); + ath_key_delete(common, key->hw_key_idx); break; default: ret = -EINVAL; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index eae7b7e58429..c08a7202dca2 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1546,12 +1546,11 @@ static void ath9k_del_ps_key(struct ath_softc *sc, { struct ath_common *common = ath9k_hw_common(sc->sc_ah); struct ath_node *an = (struct ath_node *) sta->drv_priv; - struct ieee80211_key_conf ps_key = { .hw_key_idx = an->ps_key }; if (!an->ps_key) return; - ath_key_delete(common, &ps_key); + ath_key_delete(common, an->ps_key); an->ps_key = 0; an->key_idx[0] = 0; } @@ -1742,7 +1741,7 @@ static int ath9k_set_key(struct ieee80211_hw *hw, } break; case DISABLE_KEY: - ath_key_delete(common, key); + ath_key_delete(common, key->hw_key_idx); if (an) { for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) { if (an->key_idx[i] != key->hw_key_idx) diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c index cb266cf3c77c..61b59a804e30 100644 --- a/drivers/net/wireless/ath/key.c +++ b/drivers/net/wireless/ath/key.c @@ -581,38 +581,38 @@ EXPORT_SYMBOL(ath_key_config); /* * Delete Key. */ -void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key) +void ath_key_delete(struct ath_common *common, u8 hw_key_idx) { /* Leave CCMP and TKIP (main key) configured to avoid disabling * encryption for potentially pending frames already in a TXQ with the * keyix pointing to this key entry. Instead, only clear the MAC address * to prevent RX processing from using this key cache entry. */ - if (test_bit(key->hw_key_idx, common->ccmp_keymap) || - test_bit(key->hw_key_idx, common->tkip_keymap)) - ath_hw_keysetmac(common, key->hw_key_idx, NULL); + if (test_bit(hw_key_idx, common->ccmp_keymap) || + test_bit(hw_key_idx, common->tkip_keymap)) + ath_hw_keysetmac(common, hw_key_idx, NULL); else - ath_hw_keyreset(common, key->hw_key_idx); - if (key->hw_key_idx < IEEE80211_WEP_NKID) + ath_hw_keyreset(common, hw_key_idx); + if (hw_key_idx < IEEE80211_WEP_NKID) return; - clear_bit(key->hw_key_idx, common->keymap); - clear_bit(key->hw_key_idx, common->ccmp_keymap); - if (key->cipher != WLAN_CIPHER_SUITE_TKIP) + clear_bit(hw_key_idx, common->keymap); + clear_bit(hw_key_idx, common->ccmp_keymap); + if (!test_bit(hw_key_idx, common->tkip_keymap)) return; - clear_bit(key->hw_key_idx + 64, common->keymap); + clear_bit(hw_key_idx + 64, common->keymap); - clear_bit(key->hw_key_idx, common->tkip_keymap); - clear_bit(key->hw_key_idx + 64, common->tkip_keymap); + clear_bit(hw_key_idx, common->tkip_keymap); + clear_bit(hw_key_idx + 64, common->tkip_keymap); if (!(common->crypt_caps & ATH_CRYPT_CAP_MIC_COMBINED)) { - ath_hw_keyreset(common, key->hw_key_idx + 32); - clear_bit(key->hw_key_idx + 32, common->keymap); - clear_bit(key->hw_key_idx + 64 + 32, common->keymap); + ath_hw_keyreset(common, hw_key_idx + 32); + clear_bit(hw_key_idx + 32, common->keymap); + clear_bit(hw_key_idx + 64 + 32, common->keymap); - clear_bit(key->hw_key_idx + 32, common->tkip_keymap); - clear_bit(key->hw_key_idx + 64 + 32, common->tkip_keymap); + clear_bit(hw_key_idx + 32, common->tkip_keymap); + clear_bit(hw_key_idx + 64 + 32, common->tkip_keymap); } } EXPORT_SYMBOL(ath_key_delete); From 23f77ad13f8176314b7c51f71b9ac7c5c6d10b7b Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 14 Dec 2020 19:21:18 +0200 Subject: [PATCH 07/61] ath9k: Postpone key cache entry deletion for TXQ frames reference it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ca2848022c12789685d3fab3227df02b863f9696 upstream. Do not delete a key cache entry that is still being referenced by pending frames in TXQs. This avoids reuse of the key cache entry while a frame might still be transmitted using it. To avoid having to do any additional operations during the main TX path operations, track pending key cache entries in a new bitmap and check whether any pending entries can be deleted before every new key add/remove operation. Also clear any remaining entries when stopping the interface. Signed-off-by: Jouni Malinen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214172118.18100-6-jouni@codeaurora.org Cc: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.h | 1 + drivers/net/wireless/ath/ath9k/main.c | 87 ++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 2e4489700a85..2842ca205a0a 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -819,6 +819,7 @@ struct ath_hw { struct ath9k_pacal_info pacal_info; struct ar5416Stats stats; struct ath9k_tx_queue_info txq[ATH9K_NUM_TX_QUEUES]; + DECLARE_BITMAP(pending_del_keymap, ATH_KEYMAX); enum ath9k_int imask; u32 imrs2_reg; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index c08a7202dca2..28ccdcb197de 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -823,12 +823,80 @@ exit: ieee80211_free_txskb(hw, skb); } +static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) +{ + struct ath_buf *bf; + struct ieee80211_tx_info *txinfo; + struct ath_frame_info *fi; + + list_for_each_entry(bf, txq_list, list) { + if (bf->bf_state.stale || !bf->bf_mpdu) + continue; + + txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); + fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + if (fi->keyix == keyix) + return true; + } + + return false; +} + +static bool ath9k_txq_has_key(struct ath_softc *sc, u32 keyix) +{ + struct ath_hw *ah = sc->sc_ah; + int i; + struct ath_txq *txq; + bool key_in_use = false; + + for (i = 0; !key_in_use && i < ATH9K_NUM_TX_QUEUES; i++) { + if (!ATH_TXQ_SETUP(sc, i)) + continue; + txq = &sc->tx.txq[i]; + if (!txq->axq_depth) + continue; + if (!ath9k_hw_numtxpending(ah, txq->axq_qnum)) + continue; + + ath_txq_lock(sc, txq); + key_in_use = ath9k_txq_list_has_key(&txq->axq_q, keyix); + if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) { + int idx = txq->txq_tailidx; + + while (!key_in_use && + !list_empty(&txq->txq_fifo[idx])) { + key_in_use = ath9k_txq_list_has_key( + &txq->txq_fifo[idx], keyix); + INCR(idx, ATH_TXFIFO_DEPTH); + } + } + ath_txq_unlock(sc, txq); + } + + return key_in_use; +} + +static void ath9k_pending_key_del(struct ath_softc *sc, u8 keyix) +{ + struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(ah); + + if (!test_bit(keyix, ah->pending_del_keymap) || + ath9k_txq_has_key(sc, keyix)) + return; + + /* No more TXQ frames point to this key cache entry, so delete it. */ + clear_bit(keyix, ah->pending_del_keymap); + ath_key_delete(common, keyix); +} + static void ath9k_stop(struct ieee80211_hw *hw) { struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); bool prev_idle; + int i; ath9k_deinit_channel_context(sc); @@ -896,6 +964,9 @@ static void ath9k_stop(struct ieee80211_hw *hw) spin_unlock_bh(&sc->sc_pcu_lock); + for (i = 0; i < ATH_KEYMAX; i++) + ath9k_pending_key_del(sc, i); + /* Clear key cache entries explicitly to get rid of any potentially * remaining keys. */ @@ -1712,6 +1783,12 @@ static int ath9k_set_key(struct ieee80211_hw *hw, if (sta) an = (struct ath_node *)sta->drv_priv; + /* Delete pending key cache entries if no more frames are pointing to + * them in TXQs. + */ + for (i = 0; i < ATH_KEYMAX; i++) + ath9k_pending_key_del(sc, i); + switch (cmd) { case SET_KEY: if (sta) @@ -1741,7 +1818,15 @@ static int ath9k_set_key(struct ieee80211_hw *hw, } break; case DISABLE_KEY: - ath_key_delete(common, key->hw_key_idx); + if (ath9k_txq_has_key(sc, key->hw_key_idx)) { + /* Delay key cache entry deletion until there are no + * remaining TXQ frames pointing to this entry. + */ + set_bit(key->hw_key_idx, sc->sc_ah->pending_del_keymap); + ath_hw_keysetmac(common, key->hw_key_idx, NULL); + } else { + ath_key_delete(common, key->hw_key_idx); + } if (an) { for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) { if (an->key_idx[i] != key->hw_key_idx) From 7305d6d4078f8a74935457bc9fbea71c733261ff Mon Sep 17 00:00:00 2001 From: Andreas Persson Date: Mon, 12 Jul 2021 09:54:52 +0200 Subject: [PATCH 08/61] mtd: cfi_cmdset_0002: fix crash when erasing/writing AMD cards commit 2394e628738933aa014093d93093030f6232946d upstream. Erasing an AMD linear flash card (AM29F016D) crashes after the first sector has been erased. Likewise, writing to it crashes after two bytes have been written. The reason is a missing check for a null pointer - the cmdset_priv field is not set for this type of card. Fixes: 4844ef80305d ("mtd: cfi_cmdset_0002: Add support for polling status register") Signed-off-by: Andreas Persson Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/DB6P189MB05830B3530B8087476C5CFE4C1159@DB6P189MB0583.EURP189.PROD.OUTLOOK.COM Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/cfi_cmdset_0002.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index c8b9ab40a102..9c98ddef0097 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -119,7 +119,7 @@ static int cfi_use_status_reg(struct cfi_private *cfi) struct cfi_pri_amdstd *extp = cfi->cmdset_priv; u8 poll_mask = CFI_POLL_STATUS_REG | CFI_POLL_DQ; - return extp->MinorVersion >= '5' && + return extp && extp->MinorVersion >= '5' && (extp->SoftwareFeatures & poll_mask) == CFI_POLL_STATUS_REG; } From 79dff2a3f41aa15f31d3307b04049a798f784a1c Mon Sep 17 00:00:00 2001 From: Evgeny Novikov Date: Tue, 6 Oct 2020 19:21:22 +0200 Subject: [PATCH 09/61] media: zr364xx: propagate errors from zr364xx_start_readpipe() [ Upstream commit af0321a5be3e5647441eb6b79355beaa592df97a ] zr364xx_start_readpipe() can fail but callers do not care about that. This can result in various negative consequences. The patch adds missed error handling. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Evgeny Novikov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/zr364xx/zr364xx.c | 31 ++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index 02458c9cb5dc..22b34690a016 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -1331,6 +1331,7 @@ static int zr364xx_board_init(struct zr364xx_camera *cam) { struct zr364xx_pipeinfo *pipe = cam->pipe; unsigned long i; + int err; DBG("board init: %p\n", cam); memset(pipe, 0, sizeof(*pipe)); @@ -1363,9 +1364,8 @@ static int zr364xx_board_init(struct zr364xx_camera *cam) if (i == 0) { printk(KERN_INFO KBUILD_MODNAME ": out of memory. Aborting\n"); - kfree(cam->pipe->transfer_buffer); - cam->pipe->transfer_buffer = NULL; - return -ENOMEM; + err = -ENOMEM; + goto err_free; } else cam->buffer.dwFrames = i; @@ -1380,9 +1380,17 @@ static int zr364xx_board_init(struct zr364xx_camera *cam) /*** end create system buffers ***/ /* start read pipe */ - zr364xx_start_readpipe(cam); + err = zr364xx_start_readpipe(cam); + if (err) + goto err_free; + DBG(": board initialized\n"); return 0; + +err_free: + kfree(cam->pipe->transfer_buffer); + cam->pipe->transfer_buffer = NULL; + return err; } static int zr364xx_probe(struct usb_interface *intf, @@ -1579,10 +1587,19 @@ static int zr364xx_resume(struct usb_interface *intf) if (!cam->was_streaming) return 0; - zr364xx_start_readpipe(cam); + res = zr364xx_start_readpipe(cam); + if (res) + return res; + res = zr364xx_prepare(cam); - if (!res) - zr364xx_start_acquire(cam); + if (res) + goto err_prepare; + + zr364xx_start_acquire(cam); + return 0; + +err_prepare: + zr364xx_stop_readpipe(cam); return res; } #endif From 705660a6d98d22051addf7b849f6a15bec889967 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Jan 2021 07:44:00 +0100 Subject: [PATCH 10/61] media: zr364xx: fix memory leaks in probe() [ Upstream commit ea354b6ddd6f09be29424f41fa75a3e637fea234 ] Syzbot discovered that the probe error handling doesn't clean up the resources allocated in zr364xx_board_init(). There are several related bugs in this code so I have re-written the error handling. 1) Introduce a new function zr364xx_board_uninit() which cleans up the resources in zr364xx_board_init(). 2) In zr364xx_board_init() if the call to zr364xx_start_readpipe() fails then release the "cam->buffer.frame[i].lpvbits" memory before returning. This way every function either allocates everything successfully or it cleans up after itself. 3) Re-write the probe function so that each failure path goto frees the most recent allocation. That way we don't free anything before it has been allocated and we can also verify that everything is freed. 4) Originally, in the probe function the "cam->v4l2_dev.release" pointer was set to "zr364xx_release" near the start but I moved that assignment to the end, after everything had succeeded. The release function was never actually called during the probe cleanup process, but with this change I wanted to make it clear that we don't want to call zr364xx_release() until everything is allocated successfully. Next I re-wrote the zr364xx_release() function. Ideally this would have been a simple matter of copy and pasting the cleanup code from probe and adding an additional call to video_unregister_device(). But there are a couple quirks to note. 1) The probe function does not call videobuf_mmap_free() and I don't know where the videobuf_mmap is allocated. I left the code as-is to avoid introducing a bug in code I don't understand. 2) The zr364xx_board_uninit() has a call to zr364xx_stop_readpipe() which is a change from the original behavior with regards to unloading the driver. Calling zr364xx_stop_readpipe() on a stopped pipe is not a problem so this is safe and is potentially a bugfix. Reported-by: syzbot+b4d54814b339b5c6bbd4@syzkaller.appspotmail.com Signed-off-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/zr364xx/zr364xx.c | 49 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index 22b34690a016..b3f01de9cf37 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -1187,15 +1187,11 @@ out: return err; } -static void zr364xx_release(struct v4l2_device *v4l2_dev) +static void zr364xx_board_uninit(struct zr364xx_camera *cam) { - struct zr364xx_camera *cam = - container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev); unsigned long i; - v4l2_device_unregister(&cam->v4l2_dev); - - videobuf_mmap_free(&cam->vb_vidq); + zr364xx_stop_readpipe(cam); /* release sys buffers */ for (i = 0; i < FRAMES; i++) { @@ -1206,9 +1202,19 @@ static void zr364xx_release(struct v4l2_device *v4l2_dev) cam->buffer.frame[i].lpvbits = NULL; } - v4l2_ctrl_handler_free(&cam->ctrl_handler); /* release transfer buffer */ kfree(cam->pipe->transfer_buffer); +} + +static void zr364xx_release(struct v4l2_device *v4l2_dev) +{ + struct zr364xx_camera *cam = + container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev); + + videobuf_mmap_free(&cam->vb_vidq); + v4l2_ctrl_handler_free(&cam->ctrl_handler); + zr364xx_board_uninit(cam); + v4l2_device_unregister(&cam->v4l2_dev); kfree(cam); } @@ -1382,11 +1388,14 @@ static int zr364xx_board_init(struct zr364xx_camera *cam) /* start read pipe */ err = zr364xx_start_readpipe(cam); if (err) - goto err_free; + goto err_free_frames; DBG(": board initialized\n"); return 0; +err_free_frames: + for (i = 0; i < FRAMES; i++) + vfree(cam->buffer.frame[i].lpvbits); err_free: kfree(cam->pipe->transfer_buffer); cam->pipe->transfer_buffer = NULL; @@ -1415,12 +1424,10 @@ static int zr364xx_probe(struct usb_interface *intf, if (!cam) return -ENOMEM; - cam->v4l2_dev.release = zr364xx_release; err = v4l2_device_register(&intf->dev, &cam->v4l2_dev); if (err < 0) { dev_err(&udev->dev, "couldn't register v4l2_device\n"); - kfree(cam); - return err; + goto free_cam; } hdl = &cam->ctrl_handler; v4l2_ctrl_handler_init(hdl, 1); @@ -1429,7 +1436,7 @@ static int zr364xx_probe(struct usb_interface *intf, if (hdl->error) { err = hdl->error; dev_err(&udev->dev, "couldn't register control\n"); - goto fail; + goto unregister; } /* save the init method used by this camera */ cam->method = id->driver_info; @@ -1502,7 +1509,7 @@ static int zr364xx_probe(struct usb_interface *intf, if (!cam->read_endpoint) { err = -ENOMEM; dev_err(&intf->dev, "Could not find bulk-in endpoint\n"); - goto fail; + goto unregister; } /* v4l */ @@ -1513,10 +1520,11 @@ static int zr364xx_probe(struct usb_interface *intf, /* load zr364xx board specific */ err = zr364xx_board_init(cam); - if (!err) - err = v4l2_ctrl_handler_setup(hdl); if (err) - goto fail; + goto unregister; + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto board_uninit; spin_lock_init(&cam->slock); @@ -1531,16 +1539,21 @@ static int zr364xx_probe(struct usb_interface *intf, err = video_register_device(&cam->vdev, VFL_TYPE_GRABBER, -1); if (err) { dev_err(&udev->dev, "video_register_device failed\n"); - goto fail; + goto free_handler; } + cam->v4l2_dev.release = zr364xx_release; dev_info(&udev->dev, DRIVER_DESC " controlling device %s\n", video_device_node_name(&cam->vdev)); return 0; -fail: +free_handler: v4l2_ctrl_handler_free(hdl); +board_uninit: + zr364xx_board_uninit(cam); +unregister: v4l2_device_unregister(&cam->v4l2_dev); +free_cam: kfree(cam); return err; } From 1bd505c814ccca797f291a3f788b4f695ee1b95b Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Mon, 1 Mar 2021 21:38:26 +0100 Subject: [PATCH 11/61] media: drivers/media/usb: fix memory leak in zr364xx_probe [ Upstream commit 9c39be40c0155c43343f53e3a439290c0fec5542 ] syzbot reported memory leak in zr364xx_probe()[1]. The problem was in invalid error handling order. All error conditions rigth after v4l2_ctrl_handler_init() must call v4l2_ctrl_handler_free(). Reported-by: syzbot+efe9aefc31ae1e6f7675@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/zr364xx/zr364xx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index b3f01de9cf37..25f16ff6dcc7 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -1436,7 +1436,7 @@ static int zr364xx_probe(struct usb_interface *intf, if (hdl->error) { err = hdl->error; dev_err(&udev->dev, "couldn't register control\n"); - goto unregister; + goto free_hdlr_and_unreg_dev; } /* save the init method used by this camera */ cam->method = id->driver_info; @@ -1509,7 +1509,7 @@ static int zr364xx_probe(struct usb_interface *intf, if (!cam->read_endpoint) { err = -ENOMEM; dev_err(&intf->dev, "Could not find bulk-in endpoint\n"); - goto unregister; + goto free_hdlr_and_unreg_dev; } /* v4l */ @@ -1521,7 +1521,7 @@ static int zr364xx_probe(struct usb_interface *intf, /* load zr364xx board specific */ err = zr364xx_board_init(cam); if (err) - goto unregister; + goto free_hdlr_and_unreg_dev; err = v4l2_ctrl_handler_setup(hdl); if (err) goto board_uninit; @@ -1539,7 +1539,7 @@ static int zr364xx_probe(struct usb_interface *intf, err = video_register_device(&cam->vdev, VFL_TYPE_GRABBER, -1); if (err) { dev_err(&udev->dev, "video_register_device failed\n"); - goto free_handler; + goto board_uninit; } cam->v4l2_dev.release = zr364xx_release; @@ -1547,11 +1547,10 @@ static int zr364xx_probe(struct usb_interface *intf, video_device_node_name(&cam->vdev)); return 0; -free_handler: - v4l2_ctrl_handler_free(hdl); board_uninit: zr364xx_board_uninit(cam); -unregister: +free_hdlr_and_unreg_dev: + v4l2_ctrl_handler_free(hdl); v4l2_device_unregister(&cam->v4l2_dev); free_cam: kfree(cam); From fc566b5a21f5bcdc8355906bd4c43bf45dd4fa52 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 7 Jun 2021 11:23:07 -0400 Subject: [PATCH 12/61] USB: core: Avoid WARNings for 0-length descriptor requests [ Upstream commit 60dfe484cef45293e631b3a6e8995f1689818172 ] The USB core has utility routines to retrieve various types of descriptors. These routines will now provoke a WARN if they are asked to retrieve 0 bytes (USB "receive" requests must not have zero length), so avert this by checking the size argument at the start. CC: Johan Hovold Reported-and-tested-by: syzbot+7dbcd9ff34dc4ed45240@syzkaller.appspotmail.com Reviewed-by: Johan Hovold Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210607152307.GD1768031@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/message.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 041c68ea329f..7ca908704777 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -647,6 +647,9 @@ int usb_get_descriptor(struct usb_device *dev, unsigned char type, int i; int result; + if (size <= 0) /* No point in asking for no data */ + return -EINVAL; + memset(buf, 0, size); /* Make sure we parse really received data */ for (i = 0; i < 3; ++i) { @@ -695,6 +698,9 @@ static int usb_get_string(struct usb_device *dev, unsigned short langid, int i; int result; + if (size <= 0) /* No point in asking for no data */ + return -EINVAL; + for (i = 0; i < 3; ++i) { /* retry on length 0 or stall; some devices are flakey */ result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), From 9c97a0539288d29be2fc49465fc1f5d782aa318a Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Wed, 7 Jul 2021 00:43:38 +0100 Subject: [PATCH 13/61] dmaengine: xilinx_dma: Fix read-after-free bug when terminating transfers [ Upstream commit 7dd2dd4ff9f3abda601f22b9d01441a0869d20d7 ] When user calls dmaengine_terminate_sync, the driver will clean up any remaining descriptors for all the pending or active transfers that had previously been submitted. However, this might happen whilst the tasklet is invoking the DMA callback for the last finished transfer, so by the time it returns and takes over the channel's spinlock, the list of completed descriptors it was traversing is no longer valid. This leads to a read-after-free situation. Fix it by signalling whether a user-triggered termination has happened by means of a boolean variable. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20210706234338.7696-3-adrian.martinezlarumbe@imgtec.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 1b5f3e9f43d7..ce18bca45ff2 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -333,6 +333,7 @@ struct xilinx_dma_tx_descriptor { * @genlock: Support genlock mode * @err: Channel has errors * @idle: Check for channel idle + * @terminating: Check for channel being synchronized by user * @tasklet: Cleanup work after irq * @config: Device configuration info * @flush_on_fsync: Flush on Frame sync @@ -370,6 +371,7 @@ struct xilinx_dma_chan { bool genlock; bool err; bool idle; + bool terminating; struct tasklet_struct tasklet; struct xilinx_vdma_config config; bool flush_on_fsync; @@ -844,6 +846,13 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) /* Run any dependencies, then free the descriptor */ dma_run_dependencies(&desc->async_tx); xilinx_dma_free_tx_descriptor(chan, desc); + + /* + * While we ran a callback the user called a terminate function, + * which takes care of cleaning up any remaining descriptors + */ + if (chan->terminating) + break; } spin_unlock_irqrestore(&chan->lock, flags); @@ -1618,6 +1627,8 @@ static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) if (desc->cyclic) chan->cyclic = true; + chan->terminating = false; + spin_unlock_irqrestore(&chan->lock, flags); return cookie; @@ -2074,6 +2085,7 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan) } /* Remove and free all of the descriptors in the lists */ + chan->terminating = true; xilinx_dma_free_descriptors(chan); chan->idle = true; From 11145efd295b0a65fcdadae398f4043300b83567 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 Jul 2021 20:45:21 +0800 Subject: [PATCH 14/61] dmaengine: usb-dmac: Fix PM reference leak in usb_dmac_probe() [ Upstream commit 1da569fa7ec8cb0591c74aa3050d4ea1397778b4 ] pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by moving the error_pm label above the pm_runtime_put() in the error path. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20210706124521.1371901-1-yukuai3@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/sh/usb-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 8f7ceb698226..1cc06900153e 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -855,8 +855,8 @@ static int usb_dmac_probe(struct platform_device *pdev) error: of_dma_controller_free(pdev->dev.of_node); - pm_runtime_put(&pdev->dev); error_pm: + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return ret; } From 12d1322d93a6346483f5dbdd4095d0b64fb1313e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 16 Jul 2021 09:07:30 -0700 Subject: [PATCH 15/61] ARM: dts: am43x-epos-evm: Reduce i2c0 bus speed for tps65218 [ Upstream commit 20a6b3fd8e2e2c063b25fbf2ee74d86b898e5087 ] Based on the latest timing specifications for the TPS65218 from the data sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206 from November 2014, we must change the i2c bus speed to better fit within the minimum high SCL time required for proper i2c transfer. When running at 400khz, measurements show that SCL spends 0.8125 uS/1.666 uS high/low which violates the requirement for minimum high period of SCL provided in datasheet Table 7.6 which is 1 uS. Switching to 100khz gives us 5 uS/5 uS high/low which both fall above the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low. Without this patch occasionally a voltage set operation from the kernel will appear to have worked but the actual voltage reflected on the PMIC will not have updated, causing problems especially with cpufreq that may update to a higher OPP without actually raising the voltage on DCDC2, leading to a hang. Signed-off-by: Dave Gerlach Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am43x-epos-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index a9f191d78b54..d0ea95830d45 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -589,7 +589,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps65218: tps65218@24 { reg = <0x24>; From e37cf26bd56d80f1296db73572af630f1717a538 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 22:00:21 +0300 Subject: [PATCH 16/61] dmaengine: of-dma: router_xlate to return -EPROBE_DEFER if controller is not yet available [ Upstream commit eda97cb095f2958bbad55684a6ca3e7d7af0176a ] If the router_xlate can not find the controller in the available DMA devices then it should return with -EPORBE_DEFER in a same way as the of_dma_request_slave_channel() does. The issue can be reproduced if the event router is registered before the DMA controller itself and a driver would request for a channel before the controller is registered. In of_dma_request_slave_channel(): 1. of_dma_find_controller() would find the dma_router 2. ofdma->of_dma_xlate() would fail and returned NULL 3. -ENODEV is returned as error code with this patch we would return in this case the correct -EPROBE_DEFER and the client can try to request the channel later. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717190021.21897-1-peter.ujfalusi@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/of-dma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index 4bbf4172b9bf..e3f1d4ab8e4f 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -65,8 +65,12 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, return NULL; ofdma_target = of_dma_find_controller(&dma_spec_target); - if (!ofdma_target) - return NULL; + if (!ofdma_target) { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + chan = ERR_PTR(-EPROBE_DEFER); + goto err; + } chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); if (IS_ERR_OR_NULL(chan)) { @@ -77,6 +81,7 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, chan->route_data = route_data; } +err: /* * Need to put the node back since the ofdma->of_dma_route_allocate * has taken it for generating the new, translated dma_spec From 9900e06ae6e696209f17b4162ed521e3db45e919 Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Thu, 8 Jul 2021 13:16:42 +0530 Subject: [PATCH 17/61] scsi: megaraid_mm: Fix end of loop tests for list_for_each_entry() [ Upstream commit 77541f78eadfe9fdb018a7b8b69f0f2af2cf4b82 ] The list_for_each_entry() iterator, "adapter" in this code, can never be NULL. If we exit the loop without finding the correct adapter then "adapter" points invalid memory that is an offset from the list head. This will eventually lead to memory corruption and presumably a kernel crash. Link: https://lore.kernel.org/r/20210708074642.23599-1-harshvardhan.jha@oracle.com Acked-by: Sumit Saxena Signed-off-by: Harshvardhan Jha Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_mm.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index 59cca898f088..fcfbf3343b64 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -246,7 +246,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) mimd_t mimd; uint32_t adapno; int iterator; - + bool is_found; if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) { *rval = -EFAULT; @@ -262,12 +262,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) adapter = NULL; iterator = 0; + is_found = false; list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } - if (!adapter) { + if (!is_found) { *rval = -ENODEV; return NULL; } @@ -733,6 +737,7 @@ ioctl_done(uioc_t *kioc) uint32_t adapno; int iterator; mraid_mmadp_t* adapter; + bool is_found; /* * When the kioc returns from driver, make sure it still doesn't @@ -755,19 +760,23 @@ ioctl_done(uioc_t *kioc) iterator = 0; adapter = NULL; adapno = kioc->adapno; + is_found = false; con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed " "ioctl that was timedout before\n")); list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } kioc->timedout = 0; - if (adapter) { + if (is_found) mraid_mm_dealloc_kioc( adapter, kioc ); - } + } else { wake_up(&wait_q); From 7a721a1e18854a0b2ca4335aea1b898abccc7ea5 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 13 Jan 2021 14:31:03 +0800 Subject: [PATCH 18/61] scsi: scsi_dh_rdac: Avoid crash during rdac_bus_attach() [ Upstream commit bc546c0c9abb3bb2fb46866b3d1e6ade9695a5f6 ] The following BUG_ON() was observed during RDAC scan: [595952.944297] kernel BUG at drivers/scsi/device_handler/scsi_dh_rdac.c:427! [595952.951143] Internal error: Oops - BUG: 0 [#1] SMP ...... [595953.251065] Call trace: [595953.259054] check_ownership+0xb0/0x118 [595953.269794] rdac_bus_attach+0x1f0/0x4b0 [595953.273787] scsi_dh_handler_attach+0x3c/0xe8 [595953.278211] scsi_dh_add_device+0xc4/0xe8 [595953.282291] scsi_sysfs_add_sdev+0x8c/0x2a8 [595953.286544] scsi_probe_and_add_lun+0x9fc/0xd00 [595953.291142] __scsi_scan_target+0x598/0x630 [595953.295395] scsi_scan_target+0x120/0x130 [595953.299481] fc_user_scan+0x1a0/0x1c0 [scsi_transport_fc] [595953.304944] store_scan+0xb0/0x108 [595953.308420] dev_attr_store+0x44/0x60 [595953.312160] sysfs_kf_write+0x58/0x80 [595953.315893] kernfs_fop_write+0xe8/0x1f0 [595953.319888] __vfs_write+0x60/0x190 [595953.323448] vfs_write+0xac/0x1c0 [595953.326836] ksys_write+0x74/0xf0 [595953.330221] __arm64_sys_write+0x24/0x30 Code is in check_ownership: list_for_each_entry_rcu(tmp, &h->ctlr->dh_list, node) { /* h->sdev should always be valid */ BUG_ON(!tmp->sdev); tmp->sdev->access_state = access_state; } rdac_bus_attach initialize_controller list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; rdac_bus_detach list_del_rcu(&h->node); h->sdev = NULL; Fix the race between rdac_bus_attach() and rdac_bus_detach() where h->sdev is NULL when processing the RDAC attach. Link: https://lore.kernel.org/r/20210113063103.2698953-1-yebin10@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Ye Bin Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/device_handler/scsi_dh_rdac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 5efc959493ec..85a71bafaea7 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev, if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; else { - list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; + list_add_rcu(&h->node, &h->ctlr->dh_list); } spin_unlock(&list_lock); err = SCSI_DH_OK; @@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev ) spin_lock(&list_lock); if (h->ctlr) { list_del_rcu(&h->node); - h->sdev = NULL; kref_put(&h->ctlr->kref, release_controller); } spin_unlock(&list_lock); sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } From 935de7ec7a4d31cb275083de11d5ef52749524e1 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 26 Jul 2021 17:24:02 +0530 Subject: [PATCH 19/61] scsi: core: Avoid printing an error if target_alloc() returns -ENXIO [ Upstream commit 70edd2e6f652f67d854981fd67f9ad0f1deaea92 ] Avoid printing a 'target allocation failed' error if the driver target_alloc() callback function returns -ENXIO. This return value indicates that the corresponding H:C:T:L entry is empty. Removing this error reduces the scan time if the user issues SCAN_WILD_CARD scan operation through sysfs parameter on a host with a lot of empty H:C:T:L entries. Avoiding the printk on -ENXIO matches the behavior of the other callback functions during scanning. Link: https://lore.kernel.org/r/20210726115402.1936-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 79232cef1af1..3fd109fd9335 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -454,7 +454,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, error = shost->hostt->target_alloc(starget); if(error) { - dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error); + if (error != -ENXIO) + dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); From 69aa1a1a569f5c6d554b59352130ef363342ed4c Mon Sep 17 00:00:00 2001 From: lijinlin Date: Tue, 27 Jul 2021 11:44:55 +0800 Subject: [PATCH 20/61] scsi: core: Fix capacity set to zero after offlinining device [ Upstream commit f0f82e2476f6adb9c7a0135cfab8091456990c99 ] After adding physical volumes to a volume group through vgextend, the kernel will rescan the partitions. This in turn will cause the device capacity to be queried. If the device status is set to offline through sysfs at this time, READ CAPACITY command will return a result which the host byte is DID_NO_CONNECT, and the capacity of the device will be set to zero in read_capacity_error(). After setting device status back to running, the capacity of the device will remain stuck at zero. Fix this issue by rescanning device when the device state changes to SDEV_RUNNING. Link: https://lore.kernel.org/r/20210727034455.1494960-1-lijinlin3@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: lijinlin Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 6d7362e7367e..11592ec7b23e 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -787,11 +787,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); /* - * If the device state changes to SDEV_RUNNING, we need to run - * the queue to avoid I/O hang. + * If the device state changes to SDEV_RUNNING, we need to + * rescan the device to revalidate it, and run the queue to + * avoid I/O hang. */ - if (ret == 0 && state == SDEV_RUNNING) + if (ret == 0 && state == SDEV_RUNNING) { + scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + } mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; From be7043679967516f41c576f36f9fafeffd737484 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Sat, 26 Jun 2021 02:01:03 +0200 Subject: [PATCH 21/61] ARM: dts: nomadik: Fix up interrupt controller node names [ Upstream commit 47091f473b364c98207c4def197a0ae386fc9af1 ] Once the new schema interrupt-controller/arm,vic.yaml is added, we get the below warnings: arch/arm/boot/dts/ste-nomadik-nhk15.dt.yaml: intc@10140000: $nodename:0: 'intc@10140000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' Fix the node names for the interrupt controller to conform to the standard node name interrupt-controller@.. Signed-off-by: Sudeep Holla Signed-off-by: Linus Walleij Cc: Linus Walleij Link: https://lore.kernel.org/r/20210617210825.3064367-2-sudeep.holla@arm.com Link: https://lore.kernel.org/r/20210626000103.830184-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index f78b4eabd68c..e7178a6db6be 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -755,14 +755,14 @@ status = "disabled"; }; - vica: intc@10140000 { + vica: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x20>; }; - vicb: intc@10140020 { + vicb: interrupt-controller@10140020 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; From f92dc3a89dd8d7b526c741c3df428cecf568b3a1 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Wed, 4 Aug 2021 11:13:39 +0300 Subject: [PATCH 22/61] net: usb: lan78xx: don't modify phy_device state concurrently [ Upstream commit 6b67d4d63edece1033972214704c04f36c5be89a ] Currently phy_device state could be left in inconsistent state shown by following alert message[1]. This is because phy_read_status could be called concurrently from lan78xx_delayedwork, phy_state_machine and __ethtool_get_link. Fix this by making sure that phy_device state is updated atomically. [1] lan78xx 1-1.1.1:1.0 eth0: No phy led trigger registered for speed(-1) Signed-off-by: Ivan T. Ivanov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 71cc5b63d8ce..92d9d3407b79 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1159,7 +1159,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; - int ladv, radv, ret; + int ladv, radv, ret, link; u32 buf; /* clear LAN78xx interrupt status */ @@ -1167,9 +1167,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; + mutex_lock(&phydev->lock); phy_read_status(phydev); + link = phydev->link; + mutex_unlock(&phydev->lock); - if (!phydev->link && dev->link_on) { + if (!link && dev->link_on) { dev->link_on = false; /* reset MAC */ @@ -1182,7 +1185,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) return -EIO; del_timer(&dev->stat_monitor); - } else if (phydev->link && !dev->link_on) { + } else if (link && !dev->link_on) { dev->link_on = true; phy_ethtool_ksettings_get(phydev, &ecmd); @@ -1471,9 +1474,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) static u32 lan78xx_get_link(struct net_device *net) { - phy_read_status(net->phydev); + u32 link; - return net->phydev->link; + mutex_lock(&net->phydev->lock); + phy_read_status(net->phydev); + link = net->phydev->link; + mutex_unlock(&net->phydev->lock); + + return link; } static void lan78xx_get_drvinfo(struct net_device *net, From 5b14c1f16e2d11b093ccc7c74aabed8199d05cda Mon Sep 17 00:00:00 2001 From: Bing Guo Date: Mon, 19 Jul 2021 18:24:06 -0400 Subject: [PATCH 23/61] drm/amd/display: Fix Dynamic bpp issue with 8K30 with Navi 1X [ Upstream commit 06050a0f01dbac2ca33145ef19a72041206ea983 ] Why: In DCN2x, HW doesn't automatically divide MASTER_UPDATE_LOCK_DB_X by the number of pipes ODM Combined. How: Set MASTER_UPDATE_LOCK_DB_X to the value that is adjusted by the number of pipes ODM Combined. Reviewed-by: Martin Leung Acked-by: Aurabindo Pillai Signed-off-by: Bing Guo Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 8d5cfd5357c7..03e207333953 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -362,7 +362,7 @@ void optc2_lock_doublebuffer_enable(struct timing_generator *optc) REG_UPDATE_2(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, - h_blank_start - 200 - 1, + (h_blank_start - 200 - 1) / optc1->opp_count, MASTER_UPDATE_LOCK_DB_Y, v_blank_start - 1); } From 911a8141efddab3cde50a00d38aa4e1427c2da6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ole=20Bj=C3=B8rn=20Midtb=C3=B8?= Date: Sat, 17 Oct 2020 13:15:44 +0200 Subject: [PATCH 24/61] Bluetooth: hidp: use correct wait queue when removing ctrl_wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cca342d98bef68151a80b024f7bf5f388d1fbdea ] A different wait queue was used when removing ctrl_wait than when adding it. This effectively made the remove operation without locking compared to other operations on the wait queue ctrl_wait was part of. This caused issues like below where dead000000000100 is LIST_POISON1 and dead000000000200 is LIST_POISON2. list_add corruption. next->prev should be prev (ffffffc1b0a33a08), \ but was dead000000000200. (next=ffffffc03ac77de0). ------------[ cut here ]------------ CPU: 3 PID: 2138 Comm: bluetoothd Tainted: G O 4.4.238+ #9 ... ---[ end trace 0adc2158f0646eac ]--- Call trace: [] __list_add+0x38/0xb0 [] add_wait_queue+0x4c/0x68 [] __pollwait+0xec/0x100 [] bt_sock_poll+0x74/0x200 [] sock_poll+0x110/0x128 [] do_sys_poll+0x220/0x480 [] SyS_poll+0x80/0x138 [] __sys_trace_return+0x0/0x4 Unable to handle kernel paging request at virtual address dead000000000100 ... CPU: 4 PID: 5387 Comm: kworker/u15:3 Tainted: G W O 4.4.238+ #9 ... Call trace: [] __wake_up_common+0x7c/0xa8 [] __wake_up+0x50/0x70 [] sock_def_wakeup+0x58/0x60 [] l2cap_sock_teardown_cb+0x200/0x224 [] l2cap_chan_del+0xa4/0x298 [] l2cap_conn_del+0x118/0x198 [] l2cap_disconn_cfm+0x6c/0x78 [] hci_event_packet+0x564/0x2e30 [] hci_rx_work+0x10c/0x360 [] process_one_work+0x268/0x460 [] worker_thread+0x268/0x480 [] kthread+0x118/0x128 [] ret_from_fork+0x10/0x20 ---[ end trace 0adc2158f0646ead ]--- Signed-off-by: Ole Bjørn Midtbø Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/bluetooth/hidp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index bef84b95e2c4..ac98e3b37ab4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1290,7 +1290,7 @@ static int hidp_session_thread(void *arg) /* cleanup runtime environment */ remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); - remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait); + remove_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait); wake_up_interruptible(&session->report_queue); hidp_del_timer(session); From cb9a9d5fe636492d6b26fc0de267f8ad50724ed6 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sat, 31 Jul 2021 09:47:37 +0200 Subject: [PATCH 25/61] iommu: Check if group is NULL before remove device [ Upstream commit 5aa95d8834e07907e64937d792c12ffef7fb271f ] If probe_device is failing, iommu_group is not initialized because iommu_group_add_device is not reached, so freeing it will result in NULL pointer access. iommu_bus_init ->bus_iommu_probe ->probe_iommu_group in for each:/* return -22 in fail case */ ->iommu_probe_device ->__iommu_probe_device /* return -22 here.*/ -> ops->probe_device /* return -22 here.*/ -> iommu_group_get_for_dev -> ops->device_group -> iommu_group_add_device //good case ->remove_iommu_group //in fail case, it will remove group ->iommu_release_device ->iommu_group_remove_device // here we don't have group In my case ops->probe_device (mtk_iommu_probe_device from mtk_iommu_v1.c) is due to failing fwspec->ops mismatch. Fixes: d72e31c93746 ("iommu: IOMMU Groups") Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20210731074737.4573-1-linux@fw-web.de Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9d7232e26ecf..c5758fb696cc 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -775,6 +775,9 @@ void iommu_group_remove_device(struct device *dev) struct iommu_group *group = dev->iommu_group; struct group_device *tmp_device, *device = NULL; + if (!group) + return; + dev_info(dev, "Removing from iommu group %d\n", group->id); /* Pre-notify listeners that a device is being removed. */ From 9112ebc2990af22ee80c3f8bcd911a2e5e9414f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:56:01 +0200 Subject: [PATCH 26/61] cpufreq: armada-37xx: forbid cpufreq for 1.2 GHz variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 484f2b7c61b9ae58cc00c5127bcbcd9177af8dfe ] The 1.2 GHz variant of the Armada 3720 SOC is unstable with DVFS: when the SOC boots, the WTMI firmware sets clocks and AVS values that work correctly with 1.2 GHz CPU frequency, but random crashes occur once cpufreq driver starts scaling. We do not know currently what is the reason: - it may be that the voltage value for L0 for 1.2 GHz variant provided by the vendor in the OTP is simply incorrect when scaling is used, - it may be that some delay is needed somewhere, - it may be something else. The most sane solution now seems to be to simply forbid the cpufreq driver on 1.2 GHz variant. Signed-off-by: Marek Behún Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx") Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/armada-37xx-cpufreq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index e4782f562e7a..2de7fd18f66a 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -102,7 +102,11 @@ struct armada_37xx_dvfs { }; static struct armada_37xx_dvfs armada_37xx_dvfs[] = { - {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, + /* + * The cpufreq scaling for 1.2 GHz variant of the SOC is currently + * unstable because we do not know how to configure it properly. + */ + /* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */ {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} }, {.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} }, {.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} }, From b264e37b3517c64fa52ef6bc789abb98198b5b25 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Aug 2021 16:04:40 -0700 Subject: [PATCH 27/61] dccp: add do-while-0 stubs for dccp_pr_debug macros [ Upstream commit 86aab09a4870bb8346c9579864588c3d7f555299 ] GCC complains about empty macros in an 'if' statement, so convert them to 'do {} while (0)' macros. Fixes these build warnings: net/dccp/output.c: In function 'dccp_xmit_packet': ../net/dccp/output.c:283:71: warning: suggest braces around empty body in an 'if' statement [-Wempty-body] 283 | dccp_pr_debug("transmit_skb() returned err=%d\n", err); net/dccp/ackvec.c: In function 'dccp_ackvec_update_old': ../net/dccp/ackvec.c:163:80: warning: suggest braces around empty body in an 'else' statement [-Wempty-body] 163 | (unsigned long long)seqno, state); Fixes: dc841e30eaea ("dccp: Extend CCID packet dequeueing interface") Fixes: 380240864451 ("dccp ccid-2: Update code for the Ack Vector input/registration routine") Signed-off-by: Randy Dunlap Cc: dccp@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Gerrit Renker Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/dccp/dccp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 9c3b27c257bb..cb818617699c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -41,9 +41,9 @@ extern bool dccp_debug; #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) #else -#define dccp_pr_debug(format, a...) -#define dccp_pr_debug_cat(format, a...) -#define dccp_debug(format, a...) +#define dccp_pr_debug(format, a...) do {} while (0) +#define dccp_pr_debug_cat(format, a...) do {} while (0) +#define dccp_debug(format, a...) do {} while (0) #endif extern struct inet_hashinfo dccp_hashinfo; From b9a59636c4bfc5f8324c8580cb46c4430ed2c4a6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:47 +0300 Subject: [PATCH 28/61] virtio: Protect vqs list access [ Upstream commit 0e566c8f0f2e8325e35f6f97e13cde5356b41814 ] VQs may be accessed to mark the device broken while they are created/destroyed. Hence protect the access to the vqs list. Fixes: e2dcdfe95c0b ("virtio: virtio_break_device() to mark all virtqueues broken.") Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-4-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio.c | 1 + drivers/virtio/virtio_ring.c | 8 ++++++++ include/linux/virtio.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a977e32a88f2..59a05f1b8105 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -342,6 +342,7 @@ int register_virtio_device(struct virtio_device *dev) virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); INIT_LIST_HEAD(&dev->vqs); + spin_lock_init(&dev->vqs_list_lock); /* * device_add() causes the bus infrastructure to look for a matching diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 97e8a195e18f..f6011c9ed32f 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1668,7 +1668,9 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); return &vq->vq; err_desc_extra: @@ -2126,7 +2128,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); return &vq->vq; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); @@ -2210,7 +2214,9 @@ void vring_del_virtqueue(struct virtqueue *_vq) } if (!vq->packed_ring) kfree(vq->split.desc_state); + spin_lock(&vq->vq.vdev->vqs_list_lock); list_del(&_vq->list); + spin_unlock(&vq->vq.vdev->vqs_list_lock); kfree(vq); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); @@ -2274,10 +2280,12 @@ void virtio_break_device(struct virtio_device *dev) { struct virtqueue *_vq; + spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); vq->broken = true; } + spin_unlock(&dev->vqs_list_lock); } EXPORT_SYMBOL_GPL(virtio_break_device); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..7c075463c7f2 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -111,6 +111,7 @@ struct virtio_device { bool config_enabled; bool config_change_pending; spinlock_t config_lock; + spinlock_t vqs_list_lock; /* Protects VQs list access */ struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; From 73a45f75a07b0abe92079cace5cb3caa185b5caf Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 28 Jul 2021 21:07:56 +0800 Subject: [PATCH 29/61] vhost: Fix the calculation in vhost_overflow() [ Upstream commit f7ad318ea0ad58ebe0e595e59aed270bb643b29b ] This fixes the incorrect calculation for integer overflow when the last address of iova range is 0xffffffff. Fixes: ec33d031a14b ("vhost: detect 32 bit integer wrap around") Reported-by: Jason Wang Signed-off-by: Xie Yongji Acked-by: Jason Wang Link: https://lore.kernel.org/r/20210728130756.97-2-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a279ecacbf60..97be299f0a8d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -702,10 +702,16 @@ static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +/* Make sure 64 bit math will not overflow. */ static bool vhost_overflow(u64 uaddr, u64 size) { - /* Make sure 64 bit math will not overflow. */ - return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; + if (uaddr > ULONG_MAX || size > ULONG_MAX) + return true; + + if (!size) + return false; + + return uaddr > ULONG_MAX - size + 1; } /* Caller should have vq mutex and device mutex. */ From 1fe038030cc8b38a656514a3e50ae57ba8bfeb63 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 12 Aug 2021 17:18:10 +0200 Subject: [PATCH 30/61] bpf: Clear zext_dst of dead insns [ Upstream commit 45c709f8c71b525b51988e782febe84ce933e7e0 ] "access skb fields ok" verifier test fails on s390 with the "verifier bug. zext_dst is set, but no reg is defined" message. The first insns of the test prog are ... 0: 61 01 00 00 00 00 00 00 ldxw %r0,[%r1+0] 8: 35 00 00 01 00 00 00 00 jge %r0,0,1 10: 61 01 00 08 00 00 00 00 ldxw %r0,[%r1+8] ... and the 3rd one is dead (this does not look intentional to me, but this is a separate topic). sanitize_dead_code() converts dead insns into "ja -1", but keeps zext_dst. When opt_subreg_zext_lo32_rnd_hi32() tries to parse such an insn, it sees this discrepancy and bails. This problem can be seen only with JITs whose bpf_jit_needs_zext() returns true. Fix by clearning dead insns' zext_dst. The commits that contributed to this problem are: 1. 5aa5bd14c5f8 ("bpf: add initial suite for selftests"), which introduced the test with the dead code. 2. 5327ed3d44b7 ("bpf: verifier: mark verified-insn with sub-register zext flag"), which introduced the zext_dst flag. 3. 83a2881903f3 ("bpf: Account for BPF_FETCH in insn_has_def32()"), which introduced the sanity check. 4. 9183671af6db ("bpf: Fix leakage under speculation on mispredicted branches"), which bisect points to. It's best to fix this on stable branches that contain the second one, since that's the point where the inconsistency was introduced. Fixes: 5327ed3d44b7 ("bpf: verifier: mark verified-insn with sub-register zext flag") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210812151811.184086-2-iii@linux.ibm.com Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52c2b11a0b47..0b5a446ee59c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8586,6 +8586,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) if (aux_data[i].seen) continue; memcpy(insn + i, &trap, sizeof(trap)); + aux_data[i].zext_dst = false; } } From a9fb0f1559804a07b44fc82dae6a9cb3c3b4de08 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Aug 2021 14:42:39 -0700 Subject: [PATCH 31/61] bnxt: don't lock the tx queue from napi poll [ Upstream commit 3c603136c9f82833813af77185618de5af67676c ] We can't take the tx lock from the napi poll routine, because netpoll can poll napi at any moment, including with the tx lock already held. The tx lock is protecting against two paths - the disable path, and (as Michael points out) the NETDEV_TX_BUSY case which may occur if NAPI completions race with start_xmit and both decide to re-enable the queue. For the disable/ifdown path use synchronize_net() to make sure closing the device does not race we restarting the queues. Annotate accesses to dev_state against data races. For the NAPI cleanup vs start_xmit path - appropriate barriers are already in place in the main spot where Tx queue is stopped but we need to do the same careful dance in the TX_BUSY case. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Michael Chan Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 54 ++++++++++++++--------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 287ea792922a..8111aefb2411 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -360,6 +360,26 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) return md_dst->u.port_info.port_id; } +static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + struct netdev_queue *txq) +{ + netif_tx_stop_queue(txq); + + /* netif_tx_stop_queue() must be done before checking + * tx index in bnxt_tx_avail() below, because in + * bnxt_tx_int(), we update tx index before checking for + * netif_tx_queue_stopped(). + */ + smp_mb(); + if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) { + netif_tx_wake_queue(txq); + return false; + } + + return true; +} + static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -387,8 +407,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) free_size = bnxt_tx_avail(bp, txr); if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { - netif_tx_stop_queue(txq); - return NETDEV_TX_BUSY; + if (bnxt_txr_netif_try_stop_queue(bp, txr, txq)) + return NETDEV_TX_BUSY; } length = skb->len; @@ -597,16 +617,7 @@ tx_done: if (netdev_xmit_more() && !tx_buf->is_push) bnxt_db_write(bp, &txr->tx_db, prod); - netif_tx_stop_queue(txq); - - /* netif_tx_stop_queue() must be done before checking - * tx index in bnxt_tx_avail() below, because in - * bnxt_tx_int(), we update tx index before checking for - * netif_tx_queue_stopped(). - */ - smp_mb(); - if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) - netif_tx_wake_queue(txq); + bnxt_txr_netif_try_stop_queue(bp, txr, txq); } return NETDEV_TX_OK; @@ -690,14 +701,9 @@ next_tx_int: smp_mb(); if (unlikely(netif_tx_queue_stopped(txq)) && - (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)) { - __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) && - bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && - txr->dev_state != BNXT_DEV_STATE_CLOSING) - netif_tx_wake_queue(txq); - __netif_tx_unlock(txq); - } + bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && + READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING) + netif_tx_wake_queue(txq); } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, @@ -8371,9 +8377,11 @@ void bnxt_tx_disable(struct bnxt *bp) if (bp->tx_ring) { for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txr->dev_state = BNXT_DEV_STATE_CLOSING; + WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING); } } + /* Make sure napi polls see @dev_state change */ + synchronize_net(); /* Drop carrier first to prevent TX timeout */ netif_carrier_off(bp->dev); /* Stop all TX queues */ @@ -8387,8 +8395,10 @@ void bnxt_tx_enable(struct bnxt *bp) for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txr->dev_state = 0; + WRITE_ONCE(txr->dev_state, 0); } + /* Make sure napi polls see @dev_state change */ + synchronize_net(); netif_tx_wake_all_queues(bp->dev); if (bp->link_info.link_up) netif_carrier_on(bp->dev); From 2bc75713434b548dbe84ba3e4c64c053046b31c5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Aug 2021 14:42:40 -0700 Subject: [PATCH 32/61] bnxt: disable napi before canceling DIM [ Upstream commit 01cca6b9330ac7460de44eeeb3a0607f8aae69ff ] napi schedules DIM, napi has to be disabled first, then DIM canceled. Noticed while reading the code. Fixes: 0bc0b97fca73 ("bnxt_en: cleanup DIM work on device shutdown") Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation") Reviewed-by: Michael Chan Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8111aefb2411..1b5839ad97b6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8346,10 +8346,9 @@ static void bnxt_disable_napi(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + napi_disable(&bp->bnapi[i]->napi); if (bp->bnapi[i]->rx_ring) cancel_work_sync(&cpr->dim.work); - - napi_disable(&bp->bnapi[i]->napi); } } From a73b9aa142691c2ae313980a8734997a78f74b22 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 13 Aug 2021 18:14:33 +0300 Subject: [PATCH 33/61] net: 6pack: fix slab-out-of-bounds in decode_data [ Upstream commit 19d1532a187669ce86d5a2696eb7275310070793 ] Syzbot reported slab-out-of bounds write in decode_data(). The problem was in missing validation checks. Syzbot's reproducer generated malicious input, which caused decode_data() to be called a lot in sixpack_decode(). Since rx_count_cooked is only 400 bytes and noone reported before, that 400 bytes is not enough, let's just check if input is malicious and complain about buffer overrun. Fail log: ================================================================== BUG: KASAN: slab-out-of-bounds in drivers/net/hamradio/6pack.c:843 Write of size 1 at addr ffff888087c5544e by task kworker/u4:0/7 CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.6.0-rc3-syzkaller #0 ... Workqueue: events_unbound flush_to_ldisc Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:641 __asan_report_store1_noabort+0x17/0x20 mm/kasan/generic_report.c:137 decode_data.part.0+0x23b/0x270 drivers/net/hamradio/6pack.c:843 decode_data drivers/net/hamradio/6pack.c:965 [inline] sixpack_decode drivers/net/hamradio/6pack.c:968 [inline] Reported-and-tested-by: syzbot+fc8cd9a673d4577fb2e4@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pavel Skripkin Reviewed-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hamradio/6pack.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 71d6629e65c9..da13683d52d1 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -839,6 +839,12 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte) return; } + if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) { + pr_err("6pack: cooked buffer overrun, data loss\n"); + sp->rx_count = 0; + return; + } + buf = sp->raw_buf; sp->cooked_buf[sp->rx_count_cooked++] = buf[0] | ((buf[1] << 2) & 0xc0); From c9566df334d0c3356c80b9ba79161ebb433c16c6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 13 Aug 2021 20:33:27 +0300 Subject: [PATCH 34/61] ptp_pch: Restore dependency on PCI [ Upstream commit 55c8fca1dae1fb0d11deaa21b65a647dedb1bc50 ] During the swap dependency on PCH_GBE to selection PTP_1588_CLOCK_PCH incidentally dropped the implicit dependency on the PCI. Restore it. Fixes: 18d359ceb044 ("pch_gbe, ptp_pch: Fix the dependency direction between these drivers") Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ptp/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 0517272a268e..9fb6f7643ea9 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -92,7 +92,8 @@ config DP83640_PHY config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" depends on X86_32 || COMPILE_TEST - depends on HAS_IOMEM && NET + depends on HAS_IOMEM && PCI + depends on NET imply PTP_1588_CLOCK help This driver adds support for using the PCH EG20T as a PTP From 447b160289560f90e5fe25d040f6a94b82257d6b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 15 Aug 2021 16:15:37 -0400 Subject: [PATCH 35/61] bnxt_en: Add missing DMA memory barriers [ Upstream commit 828affc27ed43441bd1efdaf4e07e96dd43a0362 ] Each completion ring entry has a valid bit to indicate that the entry contains a valid completion event. The driver's main poll loop __bnxt_poll_work() has the proper dma_rmb() to make sure the valid bit of the next entry has been checked before proceeding further. But when we call bnxt_rx_pkt() to process the RX event, the RX completion event consists of two completion entries and only the first entry has been checked to be valid. We need the same barrier after checking the next completion entry. Add missing dma_rmb() barriers in bnxt_rx_pkt() and other similar locations. Fixes: 67a95e2022c7 ("bnxt_en: Need memory barrier when processing the completion ring.") Reported-by: Lance Richardson Reviewed-by: Andy Gospodarek Reviewed-by: Lance Richardson Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1b5839ad97b6..e67f07faca78 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1724,6 +1724,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); prod = rxr->rx_prod; if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) { @@ -1918,6 +1922,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp, if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); cmp_type = RX_CMP_TYPE(rxcmp); if (cmp_type == CMP_TYPE_RX_L2_CMP) { rxcmp1->rx_cmp_cfa_code_errors_v2 |= @@ -2314,6 +2322,10 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) if (!TX_CMP_VALID(txcmp, raw_cons)) break; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { tmp_raw_cons = NEXT_RAW_CMP(raw_cons); cp_cons = RING_CMP(tmp_raw_cons); From 3ed7cf8386c9eec0ad8485bc7f6cf28dfbe2a939 Mon Sep 17 00:00:00 2001 From: Lahav Schlesinger Date: Sun, 15 Aug 2021 12:00:02 +0000 Subject: [PATCH 36/61] vrf: Reset skb conntrack connection on VRF rcv [ Upstream commit 09e856d54bda5f288ef8437a90ab2b9b3eab83d1 ] To fix the "reverse-NAT" for replies. When a packet is sent over a VRF, the POST_ROUTING hooks are called twice: Once from the VRF interface, and once from the "actual" interface the packet will be sent from: 1) First SNAT: l3mdev_l3_out() -> vrf_l3_out() -> .. -> vrf_output_direct() This causes the POST_ROUTING hooks to run. 2) Second SNAT: 'ip_output()' calls POST_ROUTING hooks again. Similarly for replies, first ip_rcv() calls PRE_ROUTING hooks, and second vrf_l3_rcv() calls them again. As an example, consider the following SNAT rule: > iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j SNAT --to-source 2.2.2.2 -o vrf_1 In this case sending over a VRF will create 2 conntrack entries. The first is from the VRF interface, which performs the IP SNAT. The second will run the SNAT, but since the "expected reply" will remain the same, conntrack randomizes the source port of the packet: e..g With a socket bound to 1.1.1.1:10000, sending to 3.3.3.3:53, the conntrack rules are: udp 17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=0 bytes=0 mark=0 use=1 udp 17 29 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1 i.e. First SNAT IP from 1.1.1.1 --> 2.2.2.2, and second the src port is SNAT-ed from 10000 --> 61033. But when a reply is sent (3.3.3.3:53 -> 2.2.2.2:61033) only the later conntrack entry is matched: udp 17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=1 bytes=49 mark=0 use=1 udp 17 28 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1 And a "port 61033 unreachable" ICMP packet is sent back. The issue is that when PRE_ROUTING hooks are called from vrf_l3_rcv(), the skb already has a conntrack flow attached to it, which means nf_conntrack_in() will not resolve the flow again. This means only the dest port is "reverse-NATed" (61033 -> 10000) but the dest IP remains 2.2.2.2, and since the socket is bound to 1.1.1.1 it's not received. This can be verified by logging the 4-tuple of the packet in '__udp4_lib_rcv()'. The fix is then to reset the flow when skb is received on a VRF, to let conntrack resolve the flow again (which now will hit the earlier flow). To reproduce: (Without the fix "Got pkt_to_nat_port" will not be printed by running 'bash ./repro'): $ cat run_in_A1.py import logging logging.getLogger("scapy.runtime").setLevel(logging.ERROR) from scapy.all import * import argparse def get_packet_to_send(udp_dst_port, msg_name): return Ether(src='11:22:33:44:55:66', dst=iface_mac)/ \ IP(src='3.3.3.3', dst='2.2.2.2')/ \ UDP(sport=53, dport=udp_dst_port)/ \ Raw(f'{msg_name}\x0012345678901234567890') parser = argparse.ArgumentParser() parser.add_argument('-iface_mac', dest="iface_mac", type=str, required=True, help="From run_in_A3.py") parser.add_argument('-socket_port', dest="socket_port", type=str, required=True, help="From run_in_A3.py") parser.add_argument('-v1_mac', dest="v1_mac", type=str, required=True, help="From script") args, _ = parser.parse_known_args() iface_mac = args.iface_mac socket_port = int(args.socket_port) v1_mac = args.v1_mac print(f'Source port before NAT: {socket_port}') while True: pkts = sniff(iface='_v0', store=True, count=1, timeout=10) if 0 == len(pkts): print('Something failed, rerun the script :(', flush=True) break pkt = pkts[0] if not pkt.haslayer('UDP'): continue pkt_sport = pkt.getlayer('UDP').sport print(f'Source port after NAT: {pkt_sport}', flush=True) pkt_to_send = get_packet_to_send(pkt_sport, 'pkt_to_nat_port') sendp(pkt_to_send, '_v0', verbose=False) # Will not be received pkt_to_send = get_packet_to_send(socket_port, 'pkt_to_socket_port') sendp(pkt_to_send, '_v0', verbose=False) break $ cat run_in_A2.py import socket import netifaces print(f"{netifaces.ifaddresses('e00000')[netifaces.AF_LINK][0]['addr']}", flush=True) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, str('vrf_1' + '\0').encode('utf-8')) s.connect(('3.3.3.3', 53)) print(f'{s. getsockname()[1]}', flush=True) s.settimeout(5) while True: try: # Periodically send in order to keep the conntrack entry alive. s.send(b'a'*40) resp = s.recvfrom(1024) msg_name = resp[0].decode('utf-8').split('\0')[0] print(f"Got {msg_name}", flush=True) except Exception as e: pass $ cat repro.sh ip netns del A1 2> /dev/null ip netns del A2 2> /dev/null ip netns add A1 ip netns add A2 ip -n A1 link add _v0 type veth peer name _v1 netns A2 ip -n A1 link set _v0 up ip -n A2 link add e00000 type bond ip -n A2 link add lo0 type dummy ip -n A2 link add vrf_1 type vrf table 10001 ip -n A2 link set vrf_1 up ip -n A2 link set e00000 master vrf_1 ip -n A2 addr add 1.1.1.1/24 dev e00000 ip -n A2 link set e00000 up ip -n A2 link set _v1 master e00000 ip -n A2 link set _v1 up ip -n A2 link set lo0 up ip -n A2 addr add 2.2.2.2/32 dev lo0 ip -n A2 neigh add 1.1.1.10 lladdr 77:77:77:77:77:77 dev e00000 ip -n A2 route add 3.3.3.3/32 via 1.1.1.10 dev e00000 table 10001 ip netns exec A2 iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j \ SNAT --to-source 2.2.2.2 -o vrf_1 sleep 5 ip netns exec A2 python3 run_in_A2.py > x & XPID=$! sleep 5 IFACE_MAC=`sed -n 1p x` SOCKET_PORT=`sed -n 2p x` V1_MAC=`ip -n A2 link show _v1 | sed -n 2p | awk '{print $2'}` ip netns exec A1 python3 run_in_A1.py -iface_mac ${IFACE_MAC} -socket_port \ ${SOCKET_PORT} -v1_mac ${SOCKET_PORT} sleep 5 kill -9 $XPID wait $XPID 2> /dev/null ip netns del A1 ip netns del A2 tail x -n 2 rm x set +x Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") Signed-off-by: Lahav Schlesinger Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210815120002.2787653-1-lschlesinger@drivenets.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vrf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f08ed52d51f3..9b626c169554 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1036,6 +1036,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); + nf_reset_ct(skb); + /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For strict packets with a source LLA, determine the dst using the @@ -1092,6 +1094,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; + nf_reset_ct(skb); + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; From 9aeadce8e33bf200656013bc6ff2ebac0604752a Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 10 Mar 2021 10:24:45 +0800 Subject: [PATCH 37/61] virtio-net: support XDP when not more queues [ Upstream commit 97c2c69e1926260c78c7f1c0b2c987934f1dc7a1 ] The number of queues implemented by many virtio backends is limited, especially some machines have a large number of CPUs. In this case, it is often impossible to allocate a separate queue for XDP_TX/XDP_REDIRECT, then xdp cannot be loaded to work, even xdp does not use the XDP_TX/XDP_REDIRECT. This patch allows XDP_TX/XDP_REDIRECT to run by reuse the existing SQ with __netif_tx_lock() hold when there are not enough queues. Signed-off-by: Xuan Zhuo Reviewed-by: Dust Li Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 15453d6fcc23..36f8aeb113a8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -195,6 +195,9 @@ struct virtnet_info { /* # of XDP queue pairs currently used by the driver */ u16 xdp_queue_pairs; + /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ + bool xdp_enabled; + /* I like... big packets and I cannot lie! */ bool big_packets; @@ -485,12 +488,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, return 0; } -static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) -{ - unsigned int qp; +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on + * the current cpu, so it does not need to be locked. + * + * Here we use marco instead of inline functions because we have to deal with + * three issues at the same time: 1. the choice of sq. 2. judge and execute the + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline + * functions to perfectly solve these three problems at the same time. + */ +#define virtnet_xdp_get_sq(vi) ({ \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + unsigned int qp; \ + \ + if (v->curr_queue_pairs > nr_cpu_ids) { \ + qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ + qp += smp_processor_id(); \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_acquire(txq); \ + } else { \ + qp = smp_processor_id() % v->curr_queue_pairs; \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_lock(txq, raw_smp_processor_id()); \ + } \ + v->sq + qp; \ +}) - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - return &vi->sq[qp]; +#define virtnet_xdp_put_sq(vi, q) { \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + \ + txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ + if (v->curr_queue_pairs > nr_cpu_ids) \ + __netif_tx_release(txq); \ + else \ + __netif_tx_unlock(txq); \ } static int virtnet_xdp_xmit(struct net_device *dev, @@ -516,7 +548,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, if (!xdp_prog) return -ENXIO; - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; @@ -564,12 +596,13 @@ out: sq->stats.kicks += kicks; u64_stats_update_end(&sq->stats.syncp); + virtnet_xdp_put_sq(vi, sq); return ret; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { - return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; + return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; } /* We copy the packet for XDP in the following cases: @@ -1458,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) xdp_do_flush_map(); if (xdp_xmit & VIRTIO_XDP_TX) { - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); sq->stats.kicks++; u64_stats_update_end(&sq->stats.syncp); } + virtnet_xdp_put_sq(vi, sq); } return received; @@ -2480,10 +2514,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { - NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); - netdev_warn(dev, "request %i queues but max is %i\n", + netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", curr_qp + xdp_qp, vi->max_queue_pairs); - return -ENOMEM; + xdp_qp = 0; } old_prog = rtnl_dereference(vi->rq[0].xdp_prog); @@ -2520,11 +2553,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_queue_pairs = xdp_qp; if (prog) { + vi->xdp_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + } else { + vi->xdp_enabled = false; } for (i = 0; i < vi->max_queue_pairs; i++) { @@ -2609,7 +2645,7 @@ static int virtnet_set_features(struct net_device *dev, int err; if ((dev->features ^ features) & NETIF_F_LRO) { - if (vi->xdp_queue_pairs) + if (vi->xdp_enabled) return -EBUSY; if (features & NETIF_F_LRO) From da92ce364595ba081fb5da052a657dafd5e81c70 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 17 Aug 2021 16:06:59 +0800 Subject: [PATCH 38/61] virtio-net: use NETIF_F_GRO_HW instead of NETIF_F_LRO [ Upstream commit dbcf24d153884439dad30484a0e3f02350692e4c ] Commit a02e8964eaf92 ("virtio-net: ethtool configurable LRO") maps LRO to virtio guest offloading features and allows the administrator to enable and disable those features via ethtool. This leads to several issues: - For a device that doesn't support control guest offloads, the "LRO" can't be disabled triggering WARN in dev_disable_lro() when turning off LRO or when enabling forwarding bridging etc. - For a device that supports control guest offloads, the guest offloads are disabled in cases of bridging, forwarding etc slowing down the traffic. Fix this by using NETIF_F_GRO_HW instead. Though the spec does not guarantee packets to be re-segmented as the original ones, we can add that to the spec, possibly with a flag for devices to differentiate between GRO and LRO. Further, we never advertised LRO historically before a02e8964eaf92 ("virtio-net: ethtool configurable LRO") and so bridged/forwarded configs effectively always relied on virtio receive offloads behaving like GRO - thus even if this breaks any configs it is at least not a regression. Fixes: a02e8964eaf92 ("virtio-net: ethtool configurable LRO") Acked-by: Michael S. Tsirkin Reported-by: Ivan Tested-by: Ivan Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 36f8aeb113a8..37c2cecd1e50 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,7 +63,7 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; -#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ +#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_NET_F_GUEST_UFO)) @@ -2493,7 +2493,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); return -EOPNOTSUPP; } @@ -2644,15 +2644,15 @@ static int virtnet_set_features(struct net_device *dev, u64 offloads; int err; - if ((dev->features ^ features) & NETIF_F_LRO) { + if ((dev->features ^ features) & NETIF_F_GRO_HW) { if (vi->xdp_enabled) return -EBUSY; - if (features & NETIF_F_LRO) + if (features & NETIF_F_GRO_HW) offloads = vi->guest_offloads_capable; else offloads = vi->guest_offloads_capable & - ~GUEST_OFFLOAD_LRO_MASK; + ~GUEST_OFFLOAD_GRO_HW_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) @@ -3128,9 +3128,9 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_RXCSUM; if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) - dev->features |= NETIF_F_LRO; + dev->features |= NETIF_F_GRO_HW; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) - dev->hw_features |= NETIF_F_LRO; + dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; From 6b70c67849bb0b5b7d81d2265d2aa35150a2a72c Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 16 Aug 2021 21:14:04 +0800 Subject: [PATCH 39/61] net: qlcnic: add missed unlock in qlcnic_83xx_flash_read32 [ Upstream commit 0a298d133893c72c96e2156ed7cb0f0c4a306a3e ] qlcnic_83xx_unlock_flash() is called on all paths after we call qlcnic_83xx_lock_flash(), except for one error path on failure of QLCRD32(), which may cause a deadlock. This bug is suggested by a static analysis tool, please advise. Fixes: 81d0aeb0a4fff ("qlcnic: flash template based firmware reset recovery") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20210816131405.24024-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 29b9c728a65e..f2014c10f7c9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3158,8 +3158,10 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr, indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr); ret = QLCRD32(adapter, indirect_addr, &err); - if (err == -EIO) + if (err == -EIO) { + qlcnic_83xx_unlock_flash(adapter); return err; + } word = ret; *(u32 *)p_data = word; From 453486e79ed2ecf6aaf82e5802e90917b2f1606d Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 17 Aug 2021 20:38:02 -0700 Subject: [PATCH 40/61] net: mdio-mux: Don't ignore memory allocation errors [ Upstream commit 99d81e942474cc7677d12f673f42a7ea699e2589 ] If we are seeing memory allocation errors, don't try to continue registering child mdiobus devices. It's unlikely they'll succeed. Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose") Signed-off-by: Saravana Kannan Reviewed-by: Andrew Lunn Acked-by: Marc Zyngier Tested-by: Marc Zyngier Acked-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mdio-mux.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 6a1d3540210b..c96ef3b3fa3a 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -82,6 +82,17 @@ out: static int parent_count; +static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb) +{ + struct mdio_mux_child_bus *cb = pb->children; + + while (cb) { + mdiobus_unregister(cb->mii_bus); + mdiobus_free(cb->mii_bus); + cb = cb->next; + } +} + int mdio_mux_init(struct device *dev, struct device_node *mux_node, int (*switch_fn)(int cur, int desired, void *data), @@ -144,7 +155,7 @@ int mdio_mux_init(struct device *dev, cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL); if (!cb) { ret_val = -ENOMEM; - continue; + goto err_loop; } cb->bus_number = v; cb->parent = pb; @@ -152,8 +163,7 @@ int mdio_mux_init(struct device *dev, cb->mii_bus = mdiobus_alloc(); if (!cb->mii_bus) { ret_val = -ENOMEM; - devm_kfree(dev, cb); - continue; + goto err_loop; } cb->mii_bus->priv = cb; @@ -182,6 +192,10 @@ int mdio_mux_init(struct device *dev, dev_err(dev, "Error: No acceptable child buses found\n"); devm_kfree(dev, pb); + +err_loop: + mdio_mux_uninit_children(pb); + of_node_put(child_bus_node); err_pb_kz: put_device(&parent_bus->dev); err_parent_bus: @@ -193,14 +207,8 @@ EXPORT_SYMBOL_GPL(mdio_mux_init); void mdio_mux_uninit(void *mux_handle) { struct mdio_mux_parent_bus *pb = mux_handle; - struct mdio_mux_child_bus *cb = pb->children; - - while (cb) { - mdiobus_unregister(cb->mii_bus); - mdiobus_free(cb->mii_bus); - cb = cb->next; - } + mdio_mux_uninit_children(pb); put_device(&pb->mii_bus->dev); } EXPORT_SYMBOL_GPL(mdio_mux_uninit); From 84dbbf5482e3e5f5b386d473ce03e999c5d0413b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 17 Aug 2021 20:38:03 -0700 Subject: [PATCH 41/61] net: mdio-mux: Handle -EPROBE_DEFER correctly [ Upstream commit 7bd0cef5dac685f09ef8b0b2a7748ff42d284dc7 ] When registering mdiobus children, if we get an -EPROBE_DEFER, we shouldn't ignore it and continue registering the rest of the mdiobus children. This would permanently prevent the deferring child mdiobus from working instead of reattempting it in the future. So, if a child mdiobus needs to be reattempted in the future, defer the entire mdio-mux initialization. This fixes the issue where PHYs sitting under the mdio-mux aren't initialized correctly if the PHY's interrupt controller is not yet ready when the mdio-mux is being probed. Additional context in the link below. Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.") Link: https://lore.kernel.org/lkml/CAGETcx95kHrv8wA-O+-JtfH7H9biJEGJtijuPVN0V5dUKUAB3A@mail.gmail.com/#t Signed-off-by: Saravana Kannan Reviewed-by: Andrew Lunn Acked-by: Marc Zyngier Tested-by: Marc Zyngier Acked-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mdio-mux.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index c96ef3b3fa3a..ccb3ee704eb1 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -175,11 +175,15 @@ int mdio_mux_init(struct device *dev, cb->mii_bus->write = mdio_mux_write; r = of_mdiobus_register(cb->mii_bus, child_bus_node); if (r) { + mdiobus_free(cb->mii_bus); + if (r == -EPROBE_DEFER) { + ret_val = r; + goto err_loop; + } + devm_kfree(dev, cb); dev_err(dev, "Error: Failed to register MDIO bus for child %pOF\n", child_bus_node); - mdiobus_free(cb->mii_bus); - devm_kfree(dev, cb); } else { cb->next = pb->children; pb->children = cb; From 1b8a8fba7853905dff29dd7dfe59d1c4e8a33bde Mon Sep 17 00:00:00 2001 From: "kaixi.fan" Date: Wed, 18 Aug 2021 10:22:15 +0800 Subject: [PATCH 42/61] ovs: clear skb->tstamp in forwarding path [ Upstream commit 01634047bf0d5c2d9b7d8095bb4de1663dbeedeb ] fq qdisc requires tstamp to be cleared in the forwarding path. Now ovs doesn't clear skb->tstamp. We encountered a problem with linux version 5.4.56 and ovs version 2.14.1, and packets failed to dequeue from qdisc when fq qdisc was attached to ovs port. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: kaixi.fan Signed-off-by: xiexiaohui Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/openvswitch/vport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 3fc38d16c456..19af0efeb8dc 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -499,6 +499,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; + skb->tstamp = 0; vport->ops->send(skb); return; From a498115dcd9c3d321379500abe3bc66a04259067 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 18 Aug 2021 10:42:16 -0700 Subject: [PATCH 43/61] i40e: Fix ATR queue selection [ Upstream commit a222be597e316389f9f8c26033352c124ce93056 ] Without this patch, ATR does not work. Receive/transmit uses queue selection based on SW DCB hashing method. If traffic classes are not configured for PF, then use netdev_pick_tx function for selecting queue for packet transmission. Instead of calling i40e_swdcb_skb_tx_hash, call netdev_pick_tx, which ensures that packet is transmitted/received from CPU that is running the application. Reproduction steps: 1. Load i40e driver 2. Map each MSI interrupt of i40e port for each CPU 3. Disable ntuple, enable ATR i.e.: ethtool -K $interface ntuple off ethtool --set-priv-flags $interface flow-director-atr 4. Run application that is generating traffic and is bound to a single CPU, i.e.: taskset -c 9 netperf -H 1.1.1.1 -t TCP_RR -l 10 5. Observe behavior: Application's traffic should be restricted to the CPU provided in taskset. Fixes: 89ec1f0886c1 ("i40e: Fix queue-to-TC mapping on Tx") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Arkadiusz Kubalewski Tested-by: Dave Switzer Signed-off-by: Tony Nguyen Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 8e38c547b53f..06987913837a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3553,8 +3553,7 @@ u16 i40e_lan_select_queue(struct net_device *netdev, /* is DCB enabled at all? */ if (vsi->tc_config.numtc == 1) - return i40e_swdcb_skb_tx_hash(netdev, skb, - netdev->real_num_tx_queues); + return netdev_pick_tx(netdev, skb, sb_dev); prio = skb->priority; hw = &vsi->back->hw; From 85813f1f9e86c4a620db7b2f3eedd2679af2c4be Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Wed, 18 Aug 2021 10:42:17 -0700 Subject: [PATCH 44/61] iavf: Fix ping is lost after untrusted VF had tried to change MAC [ Upstream commit 8da80c9d50220a8e4190a4eaa0dd6aeefcbbb5bf ] Make changes to MAC address dependent on the response of PF. Disallow changes to HW MAC address and MAC filter from untrusted VF, thanks to that ping is not lost if VF tries to change MAC. Add a new field in iavf_mac_filter, to indicate whether there was response from PF for given filter. Based on this field pass or discard the filter. If untrusted VF tried to change it's address, it's not changed. Still filter was changed, because of that ping couldn't go through. Fixes: c5c922b3e09b ("iavf: fix MAC address setting for VFs when filter is rejected") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Sylwester Dziedziuch Signed-off-by: Mateusz Palczewski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + .../net/ethernet/intel/iavf/iavf_virtchnl.c | 47 ++++++++++++++++++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 6b9117a350fa..81ca6472937d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -134,6 +134,7 @@ struct iavf_q_vector { struct iavf_mac_filter { struct list_head list; u8 macaddr[ETH_ALEN]; + bool is_new_mac; /* filter is new, wait for PF decision */ bool remove; /* filter needs to be removed */ bool add; /* filter needs to be added */ }; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index dc902e371c2c..94a3f000e999 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -761,6 +761,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; + f->is_new_mac = true; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; } else { f->remove = false; diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 9655318803b7..4d471a6f2946 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -564,6 +564,47 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) kfree(veal); } +/** + * iavf_mac_add_ok + * @adapter: adapter structure + * + * Submit list of filters based on PF response. + **/ +static void iavf_mac_add_ok(struct iavf_adapter *adapter) +{ + struct iavf_mac_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + f->is_new_mac = false; + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + +/** + * iavf_mac_add_reject + * @adapter: adapter structure + * + * Remove filters from list based on PF response. + **/ +static void iavf_mac_add_reject(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct iavf_mac_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr)) + f->remove = false; + + if (f->is_new_mac) { + list_del(&f->list); + kfree(f); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + /** * iavf_add_vlans * @adapter: adapter structure @@ -1316,6 +1357,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ADD_ETH_ADDR: dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", iavf_stat_str(&adapter->hw, v_retval)); + iavf_mac_add_reject(adapter); /* restore administratively set MAC address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); break; @@ -1385,10 +1427,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } } switch (v_opcode) { - case VIRTCHNL_OP_ADD_ETH_ADDR: { + case VIRTCHNL_OP_ADD_ETH_ADDR: + if (!v_retval) + iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - } break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = From 4f6c9caf7b6c51404bb0cef6480ced31da4ad530 Mon Sep 17 00:00:00 2001 From: Murphy Zhou Date: Fri, 17 Jan 2020 20:49:29 +0800 Subject: [PATCH 45/61] ovl: add splice file read write helper [ Upstream commit 1a980b8cbf0059a5308eea61522f232fd03002e2 ] Now overlayfs falls back to use default file splice read and write, which is not compatiple with overlayfs, returning EFAULT. xfstests generic/591 can reproduce part of this. Tested this patch with xfstests auto group tests. Signed-off-by: Murphy Zhou Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 7a08a576f7b2..ab5e92897270 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include #include "overlayfs.h" static char ovl_whatisit(struct inode *inode, struct inode *realinode) @@ -293,6 +296,48 @@ out_unlock: return ret; } +static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + ssize_t ret; + struct fd real; + const struct cred *old_cred; + + ret = ovl_real_fdget(in, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(in)->i_sb); + ret = generic_file_splice_read(real.file, ppos, pipe, len, flags); + revert_creds(old_cred); + + ovl_file_accessed(in); + fdput(real); + return ret; +} + +static ssize_t +ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct fd real; + const struct cred *old_cred; + ssize_t ret; + + ret = ovl_real_fdget(out, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(out)->i_sb); + ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); + revert_creds(old_cred); + + ovl_file_accessed(out); + fdput(real); + return ret; +} + static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fd real; @@ -649,6 +694,8 @@ const struct file_operations ovl_file_operations = { .fadvise = ovl_fadvise, .unlocked_ioctl = ovl_ioctl, .compat_ioctl = ovl_compat_ioctl, + .splice_read = ovl_splice_read, + .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, From 85e60614d1f60ef1de5bae426174452fbe5c617b Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 30 Jun 2021 12:22:32 +0200 Subject: [PATCH 46/61] mmc: dw_mmc: Fix hang on data CRC error [ Upstream commit 25f8203b4be1937c4939bb98623e67dcfd7da4d1 ] When a Data CRC interrupt is received, the driver disables the DMA, then sends the stop/abort command and then waits for Data Transfer Over. However, sometimes, when a data CRC error is received in the middle of a multi-block write transfer, the Data Transfer Over interrupt is never received, and the driver hangs and never completes the request. The driver sets the BMOD.SWR bit (SDMMC_IDMAC_SWRESET) when stopping the DMA, but according to the manual CMD.STOP_ABORT_CMD should be programmed "before assertion of SWR". Do these operations in the recommended order. With this change the Data Transfer Over is always received correctly in my tests. Signed-off-by: Vincent Whitchurch Reviewed-by: Jaehoon Chung Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210630102232.16011-1-vincent.whitchurch@axis.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/dw_mmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 79c55c7b4afd..6ace82028667 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2017,8 +2017,8 @@ static void dw_mci_tasklet_func(unsigned long priv) continue; } - dw_mci_stop_dma(host); send_stop_abort(host, data); + dw_mci_stop_dma(host); state = STATE_SENDING_STOP; break; } @@ -2042,10 +2042,10 @@ static void dw_mci_tasklet_func(unsigned long priv) */ if (test_and_clear_bit(EVENT_DATA_ERROR, &host->pending_events)) { - dw_mci_stop_dma(host); if (!(host->data_status & (SDMMC_INT_DRTO | SDMMC_INT_EBE))) send_stop_abort(host, data); + dw_mci_stop_dma(host); state = STATE_DATA_ERROR; break; } @@ -2078,10 +2078,10 @@ static void dw_mci_tasklet_func(unsigned long priv) */ if (test_and_clear_bit(EVENT_DATA_ERROR, &host->pending_events)) { - dw_mci_stop_dma(host); if (!(host->data_status & (SDMMC_INT_DRTO | SDMMC_INT_EBE))) send_stop_abort(host, data); + dw_mci_stop_dma(host); state = STATE_DATA_ERROR; break; } From 8fbfebe188c020fbcce0c739a5936d025c08b43d Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 11 Aug 2021 18:14:41 +0200 Subject: [PATCH 47/61] ALSA: hda - fix the 'Capture Switch' value change notifications [ Upstream commit a2befe9380dd04ee76c871568deca00eedf89134 ] The original code in the cap_put_caller() function does not handle correctly the positive values returned from the passed function for multiple iterations. It means that the change notifications may be lost. Fixes: 352f7f914ebb ("ALSA: hda - Merge Realtek parser code to generic parser") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213851 Cc: Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20210811161441.1325250-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_generic.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 7ac3f04ca8c0..e92fcb150e57 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3458,7 +3458,7 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol, struct hda_gen_spec *spec = codec->spec; const struct hda_input_mux *imux; struct nid_path *path; - int i, adc_idx, err = 0; + int i, adc_idx, ret, err = 0; imux = &spec->input_mux; adc_idx = kcontrol->id.index; @@ -3468,9 +3468,13 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol, if (!path || !path->ctls[type]) continue; kcontrol->private_value = path->ctls[type]; - err = func(kcontrol, ucontrol); - if (err < 0) + ret = func(kcontrol, ucontrol); + if (ret < 0) { + err = ret; break; + } + if (ret > 0) + err = 1; } mutex_unlock(&codec->control_mutex); if (err >= 0 && spec->cap_sync_hook) From 20c2f141b1e58cdc07cd33c84a5a01f2ce5ec3eb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 8 Aug 2021 00:30:11 -0400 Subject: [PATCH 48/61] tracing / histogram: Fix NULL pointer dereference on strcmp() on NULL event name [ Upstream commit 5acce0bff2a0420ce87d4591daeb867f47d552c2 ] The following commands: # echo 'read_max u64 size;' > synthetic_events # echo 'hist:keys=common_pid:count=count:onmax($count).trace(read_max,count)' > events/syscalls/sys_enter_read/trigger Causes: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 4 PID: 1763 Comm: bash Not tainted 5.14.0-rc2-test+ #155 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:strcmp+0xc/0x20 Code: 75 f7 31 c0 0f b6 0c 06 88 0c 02 48 83 c0 01 84 c9 75 f1 4c 89 c0 c3 0f 1f 80 00 00 00 00 31 c0 eb 08 48 83 c0 01 84 d2 74 0f <0f> b6 14 07 3a 14 06 74 ef 19 c0 83 c8 01 c3 31 c0 c3 66 90 48 89 RSP: 0018:ffffb5fdc0963ca8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffffffb3a4e040 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9714c0d0b640 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000022986b7cde R09: ffffffffb3a4dff8 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9714c50603c8 R13: 0000000000000000 R14: ffff97143fdf9e48 R15: ffff9714c01a2210 FS: 00007f1fa6785740(0000) GS:ffff9714da400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000002d863004 CR4: 00000000001706e0 Call Trace: __find_event_file+0x4e/0x80 action_create+0x6b7/0xeb0 ? kstrdup+0x44/0x60 event_hist_trigger_func+0x1a07/0x2130 trigger_process_regex+0xbd/0x110 event_trigger_write+0x71/0xd0 vfs_write+0xe9/0x310 ksys_write+0x68/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f1fa6879e87 The problem was the "trace(read_max,count)" where the "count" should be "$count" as "onmax()" only handles variables (although it really should be able to figure out that "count" is a field of sys_enter_read). But there's a path that does not find the variable and ends up passing a NULL for the event, which ends up getting passed to "strcmp()". Add a check for NULL to return and error on the command with: # cat error_log hist:syscalls:sys_enter_read: error: Couldn't create or find variable Command: hist:keys=common_pid:count=count:onmax($count).trace(read_max,count) ^ Link: https://lkml.kernel.org/r/20210808003011.4037f8d0@oasis.local.home Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 50450603ec9cb tracing: Add 'onmax' hist trigger action support Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e40712abe089..f63766366e23 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4291,6 +4291,8 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data, event = data->match_data.event; } + if (!event) + goto free; /* * At this point, we're looking at a field on another * event. Because we can't modify a hist trigger on From 0786d315f55c3b5a6cf3e34065360bf1f2e4dada Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:25 +0100 Subject: [PATCH 49/61] slimbus: messaging: start transaction ids from 1 instead of zero [ Upstream commit 9659281ce78de0f15a4aa124da8f7450b1399c09 ] As tid is unsigned its hard to figure out if the tid is valid or invalid. So Start the transaction ids from 1 instead of zero so that we could differentiate between a valid tid and invalid tids This is useful in cases where controller would add a tid for controller specific transfers. Fixes: d3062a210930 ("slimbus: messaging: add slim_alloc/free_txn_tid()") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/slimbus/messaging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index d5879142dbef..3b77713f1e3f 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -66,7 +66,7 @@ int slim_alloc_txn_tid(struct slim_controller *ctrl, struct slim_msg_txn *txn) int ret = 0; spin_lock_irqsave(&ctrl->txn_lock, flags); - ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 0, + ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 1, SLIM_MAX_TIDS, GFP_ATOMIC); if (ret < 0) { spin_unlock_irqrestore(&ctrl->txn_lock, flags); From abce32d0f7f4f416e2069b76a72cc9962581b2bb Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:26 +0100 Subject: [PATCH 50/61] slimbus: messaging: check for valid transaction id [ Upstream commit a263c1ff6abe0e66712f40d595bbddc7a35907f8 ] In some usecases transaction ids are dynamically allocated inside the controller driver after sending the messages which have generic acknowledge responses. So check for this before refcounting pm_runtime. Without this we would end up imbalancing runtime pm count by doing pm_runtime_put() in both slim_do_transfer() and slim_msg_response() for a single pm_runtime_get() in slim_do_transfer() Fixes: d3062a210930 ("slimbus: messaging: add slim_alloc/free_txn_tid()") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/slimbus/messaging.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index 3b77713f1e3f..ddf0371ad52b 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -131,7 +131,8 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) goto slim_xfer_err; } } - + /* Initialize tid to invalid value */ + txn->tid = 0; need_tid = slim_tid_txn(txn->mt, txn->mc); if (need_tid) { @@ -163,7 +164,7 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) txn->mt, txn->mc, txn->la, ret); slim_xfer_err: - if (!clk_pause_msg && (!need_tid || ret == -ETIMEDOUT)) { + if (!clk_pause_msg && (txn->tid == 0 || ret == -ETIMEDOUT)) { /* * remove runtime-pm vote if this was TX only, or * if there was error during this transaction From cb7aa5103146f88074a09ba9c96bde26d8614def Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:28 +0100 Subject: [PATCH 51/61] slimbus: ngd: reset dma setup during runtime pm [ Upstream commit d77772538f00b7265deace6e77e555ee18365ad0 ] During suspend/resume NGD remote instance is power cycled along with remotely controlled bam dma engine. So Reset the dma configuration during this suspend resume path so that we are not dealing with any stale dma setup. Without this transactions timeout after first suspend resume path. Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/slimbus/qcom-ngd-ctrl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index b60541c3f72d..09ecd1fb24ae 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1061,7 +1061,8 @@ static void qcom_slim_ngd_setup(struct qcom_slim_ngd_ctrl *ctrl) { u32 cfg = readl_relaxed(ctrl->ngd->base); - if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN) + if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN || + ctrl->state == QCOM_SLIM_NGD_CTRL_ASLEEP) qcom_slim_ngd_init_dma(ctrl); /* By default enable message queues */ @@ -1112,6 +1113,7 @@ static int qcom_slim_ngd_power_up(struct qcom_slim_ngd_ctrl *ctrl) dev_info(ctrl->dev, "Subsys restart: ADSP active framer\n"); return 0; } + qcom_slim_ngd_setup(ctrl); return 0; } @@ -1500,6 +1502,7 @@ static int __maybe_unused qcom_slim_ngd_runtime_suspend(struct device *dev) struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev); int ret = 0; + qcom_slim_ngd_exit_dma(ctrl); if (!ctrl->qmi.handle) return 0; From 280d66b317976458cb653725ea522b852805030f Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 10 Aug 2021 18:03:18 +0800 Subject: [PATCH 52/61] ipack: tpci200: fix many double free issues in tpci200_pci_probe [ Upstream commit 57a1681095f912239c7fb4d66683ab0425973838 ] The function tpci200_register called by tpci200_install and tpci200_unregister called by tpci200_uninstall are in pair. However, tpci200_unregister has some cleanup operations not in the tpci200_register. So the error handling code of tpci200_pci_probe has many different double free issues. Fix this problem by moving those cleanup operations out of tpci200_unregister, into tpci200_pci_remove and reverting the previous commit 9272e5d0028d ("ipack/carriers/tpci200: Fix a double free in tpci200_pci_probe"). Fixes: 9272e5d0028d ("ipack/carriers/tpci200: Fix a double free in tpci200_pci_probe") Cc: stable@vger.kernel.org Reported-by: Dongliang Mu Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210810100323.3938492-1-mudongliangabcd@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/ipack/carriers/tpci200.c | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c index b05d6125c787..80cd0150f592 100644 --- a/drivers/ipack/carriers/tpci200.c +++ b/drivers/ipack/carriers/tpci200.c @@ -91,16 +91,13 @@ static void tpci200_unregister(struct tpci200_board *tpci200) free_irq(tpci200->info->pdev->irq, (void *) tpci200); pci_iounmap(tpci200->info->pdev, tpci200->info->interface_regs); - pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs); pci_release_region(tpci200->info->pdev, TPCI200_IP_INTERFACE_BAR); pci_release_region(tpci200->info->pdev, TPCI200_IO_ID_INT_SPACES_BAR); pci_release_region(tpci200->info->pdev, TPCI200_MEM16_SPACE_BAR); pci_release_region(tpci200->info->pdev, TPCI200_MEM8_SPACE_BAR); - pci_release_region(tpci200->info->pdev, TPCI200_CFG_MEM_BAR); pci_disable_device(tpci200->info->pdev); - pci_dev_put(tpci200->info->pdev); } static void tpci200_enable_irq(struct tpci200_board *tpci200, @@ -529,7 +526,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, tpci200->info = kzalloc(sizeof(struct tpci200_infos), GFP_KERNEL); if (!tpci200->info) { ret = -ENOMEM; - goto out_err_info; + goto err_tpci200; } pci_dev_get(pdev); @@ -540,7 +537,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, if (ret) { dev_err(&pdev->dev, "Failed to allocate PCI Configuration Memory"); ret = -EBUSY; - goto out_err_pci_request; + goto err_tpci200_info; } tpci200->info->cfg_regs = ioremap_nocache( pci_resource_start(pdev, TPCI200_CFG_MEM_BAR), @@ -548,7 +545,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, if (!tpci200->info->cfg_regs) { dev_err(&pdev->dev, "Failed to map PCI Configuration Memory"); ret = -EFAULT; - goto out_err_ioremap; + goto err_request_region; } /* Disable byte swapping for 16 bit IP module access. This will ensure @@ -571,7 +568,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, if (ret) { dev_err(&pdev->dev, "error during tpci200 install\n"); ret = -ENODEV; - goto out_err_install; + goto err_cfg_regs; } /* Register the carrier in the industry pack bus driver */ @@ -583,7 +580,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "error registering the carrier on ipack driver\n"); ret = -EFAULT; - goto out_err_bus_register; + goto err_tpci200_install; } /* save the bus number given by ipack to logging purpose */ @@ -594,19 +591,16 @@ static int tpci200_pci_probe(struct pci_dev *pdev, tpci200_create_device(tpci200, i); return 0; -out_err_bus_register: +err_tpci200_install: tpci200_uninstall(tpci200); - /* tpci200->info->cfg_regs is unmapped in tpci200_uninstall */ - tpci200->info->cfg_regs = NULL; -out_err_install: - if (tpci200->info->cfg_regs) - iounmap(tpci200->info->cfg_regs); -out_err_ioremap: +err_cfg_regs: + pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs); +err_request_region: pci_release_region(pdev, TPCI200_CFG_MEM_BAR); -out_err_pci_request: - pci_dev_put(pdev); +err_tpci200_info: kfree(tpci200->info); -out_err_info: + pci_dev_put(pdev); +err_tpci200: kfree(tpci200); return ret; } @@ -616,6 +610,12 @@ static void __tpci200_pci_remove(struct tpci200_board *tpci200) ipack_bus_unregister(tpci200->info->ipack_bus); tpci200_uninstall(tpci200); + pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs); + + pci_release_region(tpci200->info->pdev, TPCI200_CFG_MEM_BAR); + + pci_dev_put(tpci200->info->pdev); + kfree(tpci200->info); kfree(tpci200); } From 0fc6a9c2025b4238c7a3e86c6931003153483684 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 10 Aug 2021 18:03:19 +0800 Subject: [PATCH 53/61] ipack: tpci200: fix memory leak in the tpci200_register [ Upstream commit 50f05bd114a46a74726e432bf81079d3f13a55b7 ] The error handling code in tpci200_register does not free interface_regs allocated by ioremap and the current version of error handling code is problematic. Fix this by refactoring the error handling code and free interface_regs when necessary. Fixes: 43986798fd50 ("ipack: add error handling for ioremap_nocache") Cc: stable@vger.kernel.org Reported-by: Dongliang Mu Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210810100323.3938492-2-mudongliangabcd@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/ipack/carriers/tpci200.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c index 80cd0150f592..46665b1cef85 100644 --- a/drivers/ipack/carriers/tpci200.c +++ b/drivers/ipack/carriers/tpci200.c @@ -256,7 +256,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 2 !", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_disable_pci; + goto err_disable_device; } /* Request IO ID INT space (Bar 3) */ @@ -268,7 +268,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 3 !", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_ip_space; + goto err_ip_interface_bar; } /* Request MEM8 space (Bar 5) */ @@ -279,7 +279,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 5!", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_ioid_int_space; + goto err_io_id_int_spaces_bar; } /* Request MEM16 space (Bar 4) */ @@ -290,7 +290,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 4!", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_mem8_space; + goto err_mem8_space_bar; } /* Map internal tpci200 driver user space */ @@ -304,7 +304,7 @@ static int tpci200_register(struct tpci200_board *tpci200) tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); res = -ENOMEM; - goto out_release_mem8_space; + goto err_mem16_space_bar; } /* Initialize lock that protects interface_regs */ @@ -343,18 +343,22 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) unable to register IRQ !", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_ioid_int_space; + goto err_interface_regs; } return 0; -out_release_mem8_space: +err_interface_regs: + pci_iounmap(tpci200->info->pdev, tpci200->info->interface_regs); +err_mem16_space_bar: + pci_release_region(tpci200->info->pdev, TPCI200_MEM16_SPACE_BAR); +err_mem8_space_bar: pci_release_region(tpci200->info->pdev, TPCI200_MEM8_SPACE_BAR); -out_release_ioid_int_space: +err_io_id_int_spaces_bar: pci_release_region(tpci200->info->pdev, TPCI200_IO_ID_INT_SPACES_BAR); -out_release_ip_space: +err_ip_interface_bar: pci_release_region(tpci200->info->pdev, TPCI200_IP_INTERFACE_BAR); -out_disable_pci: +err_disable_device: pci_disable_device(tpci200->info->pdev); return res; } From 548b75f4905eea41cfa2e9d373236c7cea1a18c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Aug 2021 14:26:24 +1000 Subject: [PATCH 54/61] btrfs: prevent rename2 from exchanging a subvol with a directory from different parents [ Upstream commit 3f79f6f6247c83f448c8026c3ee16d4636ef8d4f ] Cross-rename lacks a check when that would prevent exchanging a directory and subvolume from different parent subvolume. This causes data inconsistencies and is caught before commit by tree-checker, turning the filesystem to read-only. Calling the renameat2 with RENAME_EXCHANGE flags like renameat2(AT_FDCWD, namesrc, AT_FDCWD, namedest, (1 << 1)) on two paths: namesrc = dir1/subvol1/dir2 namedest = subvol2/subvol3 will cause key order problem with following write time tree-checker report: [1194842.307890] BTRFS critical (device loop1): corrupt leaf: root=5 block=27574272 slot=10 ino=258, invalid previous key objectid, have 257 expect 258 [1194842.322221] BTRFS info (device loop1): leaf 27574272 gen 8 total ptrs 11 free space 15444 owner 5 [1194842.331562] BTRFS info (device loop1): refs 2 lock_owner 0 current 26561 [1194842.338772] item 0 key (256 1 0) itemoff 16123 itemsize 160 [1194842.338793] inode generation 3 size 16 mode 40755 [1194842.338801] item 1 key (256 12 256) itemoff 16111 itemsize 12 [1194842.338809] item 2 key (256 84 2248503653) itemoff 16077 itemsize 34 [1194842.338817] dir oid 258 type 2 [1194842.338823] item 3 key (256 84 2363071922) itemoff 16043 itemsize 34 [1194842.338830] dir oid 257 type 2 [1194842.338836] item 4 key (256 96 2) itemoff 16009 itemsize 34 [1194842.338843] item 5 key (256 96 3) itemoff 15975 itemsize 34 [1194842.338852] item 6 key (257 1 0) itemoff 15815 itemsize 160 [1194842.338863] inode generation 6 size 8 mode 40755 [1194842.338869] item 7 key (257 12 256) itemoff 15801 itemsize 14 [1194842.338876] item 8 key (257 84 2505409169) itemoff 15767 itemsize 34 [1194842.338883] dir oid 256 type 2 [1194842.338888] item 9 key (257 96 2) itemoff 15733 itemsize 34 [1194842.338895] item 10 key (258 12 256) itemoff 15719 itemsize 14 [1194842.339163] BTRFS error (device loop1): block=27574272 write time tree block corruption detected [1194842.339245] ------------[ cut here ]------------ [1194842.443422] WARNING: CPU: 6 PID: 26561 at fs/btrfs/disk-io.c:449 csum_one_extent_buffer+0xed/0x100 [btrfs] [1194842.511863] CPU: 6 PID: 26561 Comm: kworker/u17:2 Not tainted 5.14.0-rc3-git+ #793 [1194842.511870] Hardware name: empty empty/S3993, BIOS PAQEX0-3 02/24/2008 [1194842.511876] Workqueue: btrfs-worker-high btrfs_work_helper [btrfs] [1194842.511976] RIP: 0010:csum_one_extent_buffer+0xed/0x100 [btrfs] [1194842.512068] RSP: 0018:ffffa2c284d77da0 EFLAGS: 00010282 [1194842.512074] RAX: 0000000000000000 RBX: 0000000000001000 RCX: ffff928867bd9978 [1194842.512078] RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff928867bd9970 [1194842.512081] RBP: ffff92876b958000 R08: 0000000000000001 R09: 00000000000c0003 [1194842.512085] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [1194842.512088] R13: ffff92875f989f98 R14: 0000000000000000 R15: 0000000000000000 [1194842.512092] FS: 0000000000000000(0000) GS:ffff928867a00000(0000) knlGS:0000000000000000 [1194842.512095] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1194842.512099] CR2: 000055f5384da1f0 CR3: 0000000102fe4000 CR4: 00000000000006e0 [1194842.512103] Call Trace: [1194842.512128] ? run_one_async_free+0x10/0x10 [btrfs] [1194842.631729] btree_csum_one_bio+0x1ac/0x1d0 [btrfs] [1194842.631837] run_one_async_start+0x18/0x30 [btrfs] [1194842.631938] btrfs_work_helper+0xd5/0x1d0 [btrfs] [1194842.647482] process_one_work+0x262/0x5e0 [1194842.647520] worker_thread+0x4c/0x320 [1194842.655935] ? process_one_work+0x5e0/0x5e0 [1194842.655946] kthread+0x135/0x160 [1194842.655953] ? set_kthread_struct+0x40/0x40 [1194842.655965] ret_from_fork+0x1f/0x30 [1194842.672465] irq event stamp: 1729 [1194842.672469] hardirqs last enabled at (1735): [] console_trylock_spinning+0x185/0x1a0 [1194842.672477] hardirqs last disabled at (1740): [] console_trylock_spinning+0x15c/0x1a0 [1194842.672482] softirqs last enabled at (1666): [] __do_softirq+0x2e1/0x50a [1194842.672491] softirqs last disabled at (1651): [] __irq_exit_rcu+0xa7/0xd0 The corrupted data will not be written, and filesystem can be unmounted and mounted again (all changes since the last commit will be lost). Add the missing check for new_ino so that all non-subvolumes must reside under the same parent subvolume. There's an exception allowing to exchange two subvolumes from any parents as the directory representing a subvolume is only a logical link and does not have any other structures related to the parent subvolume, unlike files, directories etc, that are always in the inode namespace of the parent subvolume. Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") CC: stable@vger.kernel.org # 4.7+ Reviewed-by: Nikolay Borisov Signed-off-by: NeilBrown Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b044b1d910de..54b607a3cc3f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9702,8 +9702,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, bool root_log_pinned = false; bool dest_log_pinned = false; - /* we only allow rename subvolume link between subvolumes */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) + /* + * For non-subvolumes allow exchange only within one subvolume, in the + * same inode namespace. Two subvolumes (represented as directory) can + * be exchanged as they're a logical link and have a fixed inode number. + */ + if (root != dest && + (old_ino != BTRFS_FIRST_FREE_OBJECTID || + new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ From 846ba58a7c068903b64da3c9800d6be42670aee4 Mon Sep 17 00:00:00 2001 From: Marcin Bachry Date: Wed, 21 Jul 2021 22:58:58 -0400 Subject: [PATCH 55/61] PCI: Increase D3 delay for AMD Renoir/Cezanne XHCI [ Upstream commit e0bff43220925b7e527f9d3bc9f5c624177c959e ] The Renoir XHCI controller apparently doesn't resume reliably with the standard D3hot-to-D0 delay. Increase it to 20ms. [Alex: I talked to the AMD USB hardware team and the AMD Windows team and they are not aware of any HW errata or specific issues. The HW works fine in Windows. I was told Windows uses a rather generous default delay of 100ms for PCI state transitions.] Link: https://lore.kernel.org/r/20210722025858.220064-1-alexander.deucher@amd.com Signed-off-by: Marcin Bachry Signed-off-by: Alex Deucher Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Cc: Mario Limonciello Cc: Prike Liang Cc: Shyam Sundar S K Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6c1b936a94fa..0241f0dcc093 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1905,6 +1905,7 @@ static void quirk_ryzen_xhci_d3hot(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e0, quirk_ryzen_xhci_d3hot); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e1, quirk_ryzen_xhci_d3hot); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1639, quirk_ryzen_xhci_d3hot); #ifdef CONFIG_X86_IO_APIC static int dmi_disable_ioapicreroute(const struct dmi_system_id *d) From 9c1c449dcca09274ec5bc6efc53a03ec95746d2e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Aug 2021 17:29:45 +0200 Subject: [PATCH 56/61] ASoC: intel: atom: Fix breakage for PCM buffer address setup [ Upstream commit 65ca89c2b12cca0d473f3dd54267568ad3af55cc ] The commit 2e6b836312a4 ("ASoC: intel: atom: Fix reference to PCM buffer address") changed the reference of PCM buffer address to substream->runtime->dma_addr as the buffer address may change dynamically. However, I forgot that the dma_addr field is still not set up for the CONTINUOUS buffer type (that this driver uses) yet in 5.14 and earlier kernels, and it resulted in garbage I/O. The problem will be fixed in 5.15, but we need to address it quickly for now. The fix is to deduce the address again from the DMA pointer with virt_to_phys(), but from the right one, substream->runtime->dma_area. Fixes: 2e6b836312a4 ("ASoC: intel: atom: Fix reference to PCM buffer address") Reported-and-tested-by: Hans de Goede Cc: Acked-by: Mark Brown Link: https://lore.kernel.org/r/2048c6aa-2187-46bd-6772-36a4fb3c5aeb@redhat.com Link: https://lore.kernel.org/r/20210819152945.8510-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 216e88624c5f..7d59846808b5 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -127,7 +127,7 @@ static void sst_fill_alloc_params(struct snd_pcm_substream *substream, snd_pcm_uframes_t period_size; ssize_t periodbytes; ssize_t buffer_bytes = snd_pcm_lib_buffer_bytes(substream); - u32 buffer_addr = substream->runtime->dma_addr; + u32 buffer_addr = virt_to_phys(substream->runtime->dma_area); channels = substream->runtime->channels; period_size = substream->runtime->period_size; From 1a3aa81444d3e137fe953daaf9aebe0fac270af8 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 6 Aug 2020 23:22:01 -0700 Subject: [PATCH 57/61] mm, memcg: avoid stale protection values when cgroup is above protection [ Upstream commit 22f7496f0b901249f23c5251eb8a10aae126b909 ] Patch series "mm, memcg: memory.{low,min} reclaim fix & cleanup", v4. This series contains a fix for a edge case in my earlier protection calculation patches, and a patch to make the area overall a little more robust to hopefully help avoid this in future. This patch (of 2): A cgroup can have both memory protection and a memory limit to isolate it from its siblings in both directions - for example, to prevent it from being shrunk below 2G under high pressure from outside, but also from growing beyond 4G under low pressure. Commit 9783aa9917f8 ("mm, memcg: proportional memory.{low,min} reclaim") implemented proportional scan pressure so that multiple siblings in excess of their protection settings don't get reclaimed equally but instead in accordance to their unprotected portion. During limit reclaim, this proportionality shouldn't apply of course: there is no competition, all pressure is from within the cgroup and should be applied as such. Reclaim should operate at full efficiency. However, mem_cgroup_protected() never expected anybody to look at the effective protection values when it indicated that the cgroup is above its protection. As a result, a query during limit reclaim may return stale protection values that were calculated by a previous reclaim cycle in which the cgroup did have siblings. When this happens, reclaim is unnecessarily hesitant and potentially slow to meet the desired limit. In theory this could lead to premature OOM kills, although it's not obvious this has occurred in practice. Workaround the problem by special casing reclaim roots in mem_cgroup_protection. These memcgs are never participating in the reclaim protection because the reclaim is internal. We have to ignore effective protection values for reclaim roots because mem_cgroup_protected might be called from racing reclaim contexts with different roots. Calculation is relying on root -> leaf tree traversal therefore top-down reclaim protection invariants should hold. The only exception is the reclaim root which should have effective protection set to 0 but that would be problematic for the following setup: Let's have global and A's reclaim in parallel: | A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G) |\ | C (low = 1G, usage = 2.5G) B (low = 1G, usage = 0.5G) for A reclaim we have B.elow = B.low C.elow = C.low For the global reclaim A.elow = A.low B.elow = min(B.usage, B.low) because children_low_usage <= A.elow C.elow = min(C.usage, C.low) With the effective values resetting we have A reclaim A.elow = 0 B.elow = B.low C.elow = C.low and global reclaim could see the above and then B.elow = C.elow = 0 because children_low_usage > A.elow Which means that protected memcgs would get reclaimed. In future we would like to make mem_cgroup_protected more robust against racing reclaim contexts but that is likely more complex solution than this simple workaround. [hannes@cmpxchg.org - large part of the changelog] [mhocko@suse.com - workaround explanation] [chris@chrisdown.name - retitle] Fixes: 9783aa9917f8 ("mm, memcg: proportional memory.{low,min} reclaim") Signed-off-by: Yafang Shao Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Chris Down Acked-by: Roman Gushchin Link: http://lkml.kernel.org/r/cover.1594638158.git.chris@chrisdown.name Link: http://lkml.kernel.org/r/044fb8ecffd001c7905d27c0c2ad998069fdc396.1594638158.git.chris@chrisdown.name Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/memcontrol.h | 42 ++++++++++++++++++++++++++++++++++++-- mm/memcontrol.c | 8 ++++++++ mm/vmscan.c | 3 ++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fb5b2a41bd45..059f55841cc8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -356,12 +356,49 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg, +static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, + struct mem_cgroup *memcg, bool in_low_reclaim) { if (mem_cgroup_disabled()) return 0; + /* + * There is no reclaim protection applied to a targeted reclaim. + * We are special casing this specific case here because + * mem_cgroup_protected calculation is not robust enough to keep + * the protection invariant for calculated effective values for + * parallel reclaimers with different reclaim target. This is + * especially a problem for tail memcgs (as they have pages on LRU) + * which would want to have effective values 0 for targeted reclaim + * but a different value for external reclaim. + * + * Example + * Let's have global and A's reclaim in parallel: + * | + * A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G) + * |\ + * | C (low = 1G, usage = 2.5G) + * B (low = 1G, usage = 0.5G) + * + * For the global reclaim + * A.elow = A.low + * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow + * C.elow = min(C.usage, C.low) + * + * With the effective values resetting we have A reclaim + * A.elow = 0 + * B.elow = B.low + * C.elow = C.low + * + * If the global reclaim races with A's reclaim then + * B.elow = C.elow = 0 because children_low_usage > A.elow) + * is possible and reclaiming B would be violating the protection. + * + */ + if (root == memcg) + return 0; + if (in_low_reclaim) return READ_ONCE(memcg->memory.emin); @@ -847,7 +884,8 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, { } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg, +static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, + struct mem_cgroup *memcg, bool in_low_reclaim) { return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2701497edda5..6d7fe3589e4a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6446,6 +6446,14 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, if (!root) root = root_mem_cgroup; + + /* + * Effective values of the reclaim targets are ignored so they + * can be stale. Have a look at mem_cgroup_protection for more + * details. + * TODO: calculation should be more robust so that we do not need + * that special casing. + */ if (memcg == root) return MEMCG_PROT_NONE; diff --git a/mm/vmscan.c b/mm/vmscan.c index 10feb872d9a4..dc44da27673d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2462,7 +2462,8 @@ out: unsigned long protection; lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); - protection = mem_cgroup_protection(memcg, + protection = mem_cgroup_protection(sc->target_mem_cgroup, + memcg, sc->memcg_low_reclaim); if (protection) { From 41c7f46c89f64a5729d145dedd09c7cba9119972 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 19 Aug 2021 19:04:21 -0700 Subject: [PATCH 58/61] mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim [ Upstream commit f56ce412a59d7d938b81de8878faef128812482c ] We've noticed occasional OOM killing when memory.low settings are in effect for cgroups. This is unexpected and undesirable as memory.low is supposed to express non-OOMing memory priorities between cgroups. The reason for this is proportional memory.low reclaim. When cgroups are below their memory.low threshold, reclaim passes them over in the first round, and then retries if it couldn't find pages anywhere else. But when cgroups are slightly above their memory.low setting, page scan force is scaled down and diminished in proportion to the overage, to the point where it can cause reclaim to fail as well - only in that case we currently don't retry, and instead trigger OOM. To fix this, hook proportional reclaim into the same retry logic we have in place for when cgroups are skipped entirely. This way if reclaim fails and some cgroups were scanned with diminished pressure, we'll try another full-force cycle before giving up and OOMing. [akpm@linux-foundation.org: coding-style fixes] Link: https://lkml.kernel.org/r/20210817180506.220056-1-hannes@cmpxchg.org Fixes: 9783aa9917f8 ("mm, memcg: proportional memory.{low,min} reclaim") Signed-off-by: Johannes Weiner Reported-by: Leon Yang Reviewed-by: Rik van Riel Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Chris Down Acked-by: Michal Hocko Cc: [5.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/memcontrol.h | 29 +++++++++++++++-------------- mm/vmscan.c | 27 +++++++++++++++++++-------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 059f55841cc8..b6d0b68f5503 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -356,12 +356,15 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg, - bool in_low_reclaim) +static inline void mem_cgroup_protection(struct mem_cgroup *root, + struct mem_cgroup *memcg, + unsigned long *min, + unsigned long *low) { + *min = *low = 0; + if (mem_cgroup_disabled()) - return 0; + return; /* * There is no reclaim protection applied to a targeted reclaim. @@ -397,13 +400,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, * */ if (root == memcg) - return 0; + return; - if (in_low_reclaim) - return READ_ONCE(memcg->memory.emin); - - return max(READ_ONCE(memcg->memory.emin), - READ_ONCE(memcg->memory.elow)); + *min = READ_ONCE(memcg->memory.emin); + *low = READ_ONCE(memcg->memory.elow); } enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, @@ -884,11 +884,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, { } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg, - bool in_low_reclaim) +static inline void mem_cgroup_protection(struct mem_cgroup *root, + struct mem_cgroup *memcg, + unsigned long *min, + unsigned long *low) { - return 0; + *min = *low = 0; } static inline enum mem_cgroup_protection mem_cgroup_protected( diff --git a/mm/vmscan.c b/mm/vmscan.c index dc44da27673d..fad9be4703ec 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -89,9 +89,12 @@ struct scan_control { unsigned int may_swap:1; /* - * Cgroups are not reclaimed below their configured memory.low, - * unless we threaten to OOM. If any cgroups are skipped due to - * memory.low and nothing was reclaimed, go back for memory.low. + * Cgroup memory below memory.low is protected as long as we + * don't threaten to OOM. If any cgroup is reclaimed at + * reduced force or passed over entirely due to its memory.low + * setting (memcg_low_skipped), and nothing is reclaimed as a + * result, then go back for one more cycle that reclaims the protected + * memory (memcg_low_reclaim) to avert OOM. */ unsigned int memcg_low_reclaim:1; unsigned int memcg_low_skipped:1; @@ -2458,15 +2461,14 @@ out: for_each_evictable_lru(lru) { int file = is_file_lru(lru); unsigned long lruvec_size; + unsigned long low, min; unsigned long scan; - unsigned long protection; lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); - protection = mem_cgroup_protection(sc->target_mem_cgroup, - memcg, - sc->memcg_low_reclaim); + mem_cgroup_protection(sc->target_mem_cgroup, memcg, + &min, &low); - if (protection) { + if (min || low) { /* * Scale a cgroup's reclaim pressure by proportioning * its current usage to its memory.low or memory.min @@ -2497,6 +2499,15 @@ out: * hard protection. */ unsigned long cgroup_size = mem_cgroup_size(memcg); + unsigned long protection; + + /* memory.low scaling, make sure we retry before OOM */ + if (!sc->memcg_low_reclaim && low > min) { + protection = low; + sc->memcg_low_skipped = 1; + } else { + protection = min; + } /* Avoid TOCTOU with earlier protection check */ cgroup_size = max(cgroup_size, protection); From e4fd994f02c5c23ef8978d0e631869a0de1e44ff Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Aug 2021 09:29:50 -0400 Subject: [PATCH 59/61] fs: warn about impending deprecation of mandatory locks [ Upstream commit fdd92b64d15bc4aec973caa25899afd782402e68 ] We've had CONFIG_MANDATORY_FILE_LOCKING since 2015 and a lot of distros have disabled it. Warn the stragglers that still use "-o mand" that we'll be dropping support for that mount option. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Sasha Levin --- fs/namespace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index a092611d89e7..5782cd55dfdb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1647,8 +1647,12 @@ static inline bool may_mount(void) } #ifdef CONFIG_MANDATORY_FILE_LOCKING -static inline bool may_mandlock(void) +static bool may_mandlock(void) { + pr_warn_once("======================================================\n" + "WARNING: the mand mount option is being deprecated and\n" + " will be removed in v5.15!\n" + "======================================================\n"); return capable(CAP_SYS_ADMIN); } #else From 4bf19415810298bb0562c8923dfadbd3ee29c486 Mon Sep 17 00:00:00 2001 From: Sergey Marinkevich Date: Sun, 29 Mar 2020 19:19:14 +0700 Subject: [PATCH 60/61] netfilter: nft_exthdr: fix endianness of tcp option cast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2e34328b396a69b73661ba38d47d92b7cf21c2c4 ] I got a problem on MIPS with Big-Endian is turned on: every time when NF trying to change TCP MSS it returns because of new.v16 was greater than old.v16. But real MSS was 1460 and my rule was like this: add rule table chain tcp option maxseg size set 1400 And 1400 is lesser that 1460, not greater. Later I founded that main causer is cast from u32 to __be16. Debugging: In example MSS = 1400(HEX: 0x578). Here is representation of each byte like it is in memory by addresses from left to right(e.g. [0x0 0x1 0x2 0x3]). LE — Little-Endian system, BE — Big-Endian, left column is type. LE BE u32: [78 05 00 00] [00 00 05 78] As you can see, u32 representation will be casted to u16 from different half of 4-byte address range. But actually nf_tables uses registers and store data of various size. Actually TCP MSS stored in 2 bytes. But registers are still u32 in definition: struct nft_regs { union { u32 data[20]; struct nft_verdict verdict; }; }; So, access like regs->data[priv->sreg] exactly u32. So, according to table presents above, per-byte representation of stored TCP MSS in register will be: LE BE (u32)regs->data[]: [78 05 00 00] [05 78 00 00] ^^ ^^ We see that register uses just half of u32 and other 2 bytes may be used for some another data. But in nft_exthdr_tcp_set_eval() it casted just like u32 -> __be16: new.v16 = src But u32 overfill __be16, so it get 2 low bytes. For clarity draw one more table( means that bytes will be used for cast). LE BE u32: [<78 05> 00 00] [00 00 <05 78>] (u32)regs->data[]: [<78 05> 00 00] [05 78 <00 00>] As you can see, for Little-Endian nothing changes, but for Big-endian we take the wrong half. In my case there is some other data instead of zeros, so new MSS was wrongly greater. For shooting this bug I used solution for ports ranges. Applying of this patch does not affect Little-Endian systems. Signed-off-by: Sergey Marinkevich Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_exthdr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 00f4323cfeb8..faa0844c01fb 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -231,7 +231,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, unsigned int i, optl, tcphdr_len, offset; struct tcphdr *tcph; u8 *opt; - u32 src; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) @@ -240,7 +239,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { union { - u8 octet; __be16 v16; __be32 v32; } old, new; @@ -262,13 +260,13 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, if (!tcph) return; - src = regs->data[priv->sreg]; offset = i + priv->offset; switch (priv->len) { case 2: old.v16 = get_unaligned((u16 *)(opt + offset)); - new.v16 = src; + new.v16 = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg]); switch (priv->type) { case TCPOPT_MSS: @@ -286,7 +284,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, old.v16, new.v16, false); break; case 4: - new.v32 = src; + new.v32 = regs->data[priv->sreg]; old.v32 = get_unaligned((u32 *)(opt + offset)); if (old.v32 == new.v32) From fd80923202c6bfd723742fc32426a7aa3632abaa Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 26 Aug 2021 08:55:22 -0400 Subject: [PATCH 61/61] Linux 5.4.143 Tested-by: Sudip Mukherjee Tested-by: Hulk Robot Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Shuah Khan Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef3adc6ccb87..e99fabc4dfc8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 142 +SUBLEVEL = 143 EXTRAVERSION = NAME = Kleptomaniac Octopus